v1-0001-Pathify-RHS-unique-ification-for-semijoin-plannin.patch
application/octet-stream
Filename: v1-0001-Pathify-RHS-unique-ification-for-semijoin-plannin.patch
Type: application/octet-stream
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v1-0001
Subject: Pathify RHS unique-ification for semijoin planning
| File | + | − |
|---|---|---|
| src/backend/optimizer/path/joinpath.c | 76 | 205 |
| src/backend/optimizer/path/joinrels.c | 9 | 9 |
| src/backend/optimizer/plan/createplan.c | 15 | 277 |
| src/backend/optimizer/plan/planner.c | 461 | 8 |
| src/backend/optimizer/prep/prepunion.c | 15 | 15 |
| src/backend/optimizer/README | 1 | 2 |
| src/backend/optimizer/util/pathnode.c | 13 | 293 |
| src/backend/optimizer/util/relnode.c | 6 | 4 |
| src/include/nodes/nodes.h | 2 | 2 |
| src/include/nodes/pathnodes.h | 11 | 34 |
| src/include/optimizer/pathnode.h | 5 | 7 |
| src/include/optimizer/planner.h | 3 | 0 |
| src/test/regress/expected/join.out | 7 | 10 |
| src/test/regress/expected/partition_join.out | 49 | 45 |
| src/test/regress/expected/subselect.out | 25 | 26 |
| src/tools/pgindent/typedefs.list | 0 | 2 |
From c525d28b3cb0a1bcc3fc5205f5b5e1ebbdb69a2a Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 21 May 2025 12:32:29 +0900
Subject: [PATCH v1] Pathify RHS unique-ification for semijoin planning
---
src/backend/optimizer/README | 3 +-
src/backend/optimizer/path/joinpath.c | 281 +++--------
src/backend/optimizer/path/joinrels.c | 18 +-
src/backend/optimizer/plan/createplan.c | 292 +-----------
src/backend/optimizer/plan/planner.c | 469 ++++++++++++++++++-
src/backend/optimizer/prep/prepunion.c | 30 +-
src/backend/optimizer/util/pathnode.c | 306 +-----------
src/backend/optimizer/util/relnode.c | 10 +-
src/include/nodes/nodes.h | 4 +-
src/include/nodes/pathnodes.h | 45 +-
src/include/optimizer/pathnode.h | 12 +-
src/include/optimizer/planner.h | 3 +
src/test/regress/expected/join.out | 17 +-
src/test/regress/expected/partition_join.out | 94 ++--
src/test/regress/expected/subselect.out | 51 +-
src/tools/pgindent/typedefs.list | 2 -
16 files changed, 698 insertions(+), 939 deletions(-)
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 9c724ccfabf..843368096fd 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -640,7 +640,6 @@ RelOptInfo - a relation or joined relations
GroupResultPath - childless Result plan node (used for degenerate grouping)
MaterialPath - a Material plan node
MemoizePath - a Memoize plan node for caching tuples from sub-paths
- UniquePath - remove duplicate rows (either by hashing or sorting)
GatherPath - collect the results of parallel workers
GatherMergePath - collect parallel results, preserving their common sort order
ProjectionPath - a Result plan node with child (used for projection)
@@ -648,7 +647,7 @@ RelOptInfo - a relation or joined relations
SortPath - a Sort plan node applied to some sub-path
IncrementalSortPath - an IncrementalSort plan node applied to some sub-path
GroupPath - a Group plan node applied to some sub-path
- UpperUniquePath - a Unique plan node applied to some sub-path
+ UniquePath - a Unique plan node applied to some sub-path
AggPath - an Agg plan node applied to some sub-path
GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 26f0336f1e4..7a91641cbd9 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -113,12 +113,12 @@ static void generate_mergejoin_paths(PlannerInfo *root,
* direction from what's indicated in sjinfo.
*
* Also, this routine and others in this module accept the special JoinTypes
- * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
- * unique-ify the outer or inner relation and then apply a regular inner
- * join. These values are not allowed to propagate outside this module,
- * however. Path cost estimation code may need to recognize that it's
- * dealing with such a case --- the combination of nominal jointype INNER
- * with sjinfo->jointype == JOIN_SEMI indicates that.
+ * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that the outer or inner
+ * relation has been unique-ified and a regular inner join should then be
+ * applied. These values are not allowed to propagate outside this module,
+ * however. Path cost estimation code may need to recognize that it's dealing
+ * with such a case --- the combination of nominal jointype INNER with
+ * sjinfo->jointype == JOIN_SEMI indicates that.
*/
void
add_paths_to_joinrel(PlannerInfo *root,
@@ -161,10 +161,10 @@ add_paths_to_joinrel(PlannerInfo *root,
* (else reduce_unique_semijoins would've simplified it), so there's no
* point in calling innerrel_is_unique. However, if the LHS covers all of
* the semijoin's min_lefthand, then it's appropriate to set inner_unique
- * because the path produced by create_unique_path will be unique relative
- * to the LHS. (If we have an LHS that's only part of the min_lefthand,
- * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
- * letting that value escape this module.
+ * because the unique relation produced by create_unique_paths will be
+ * unique relative to the LHS. (If we have an LHS that's only part of the
+ * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
+ * JOIN_INNER to avoid letting that value escape this module.
*/
switch (jointype)
{
@@ -1376,6 +1376,13 @@ sort_inner_and_outer(PlannerInfo *root,
if (extra->mergeclause_list == NIL)
return;
+ /*
+ * If the outer or inner relation has been unique-ified, handle as a plain
+ * inner join.
+ */
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
/*
* We only consider the cheapest-total-cost input paths, since we are
* assuming here that a sort is required. We will consider
@@ -1402,25 +1409,6 @@ sort_inner_and_outer(PlannerInfo *root,
PATH_PARAM_BY_REL(inner_path, outerrel))
return;
- /*
- * If unique-ification is requested, do it and then handle as a plain
- * inner join.
- */
- if (jointype == JOIN_UNIQUE_OUTER)
- {
- outer_path = (Path *) create_unique_path(root, outerrel,
- outer_path, extra->sjinfo);
- Assert(outer_path);
- jointype = JOIN_INNER;
- }
- else if (jointype == JOIN_UNIQUE_INNER)
- {
- inner_path = (Path *) create_unique_path(root, innerrel,
- inner_path, extra->sjinfo);
- Assert(inner_path);
- jointype = JOIN_INNER;
- }
-
/*
* If the joinrel is parallel-safe, we may be able to consider a partial
* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
@@ -1441,7 +1429,7 @@ sort_inner_and_outer(PlannerInfo *root,
if (inner_path->parallel_safe)
cheapest_safe_inner = inner_path;
- else if (save_jointype != JOIN_UNIQUE_INNER)
+ else
cheapest_safe_inner =
get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
}
@@ -1580,12 +1568,10 @@ generate_mergejoin_paths(PlannerInfo *root,
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
- JoinType save_jointype = jointype;
int num_sortkeys;
int sortkeycnt;
- if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
- jointype = JOIN_INNER;
+ Assert(jointype != JOIN_UNIQUE_OUTER && jointype != JOIN_UNIQUE_INNER);
/* Look for useful mergeclauses (if any) */
mergeclauses =
@@ -1636,10 +1622,6 @@ generate_mergejoin_paths(PlannerInfo *root,
extra,
is_partial);
- /* Can't do anything else if inner path needs to be unique'd */
- if (save_jointype == JOIN_UNIQUE_INNER)
- return;
-
/*
* Look for presorted inner paths that satisfy the innersortkey list ---
* or any truncation thereof, if we are allowed to build a mergejoin using
@@ -1877,20 +1859,7 @@ match_unsorted_outer(PlannerInfo *root,
if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
inner_cheapest_total = NULL;
- /*
- * If we need to unique-ify the inner path, we will consider only the
- * cheapest-total inner.
- */
- if (save_jointype == JOIN_UNIQUE_INNER)
- {
- /* No way to do this with an inner path parameterized by outer rel */
- if (inner_cheapest_total == NULL)
- return;
- inner_cheapest_total = (Path *)
- create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
- Assert(inner_cheapest_total);
- }
- else if (nestjoinOK)
+ if (nestjoinOK)
{
/*
* Consider materializing the cheapest inner path, unless
@@ -1914,20 +1883,6 @@ match_unsorted_outer(PlannerInfo *root,
if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
- /*
- * If we need to unique-ify the outer path, it's pointless to consider
- * any but the cheapest outer. (XXX we don't consider parameterized
- * outers, nor inners, for unique-ified cases. Should we?)
- */
- if (save_jointype == JOIN_UNIQUE_OUTER)
- {
- if (outerpath != outerrel->cheapest_total_path)
- continue;
- outerpath = (Path *) create_unique_path(root, outerrel,
- outerpath, extra->sjinfo);
- Assert(outerpath);
- }
-
/*
* The result will have this sort order (even if it is implemented as
* a nestloop, and even if some of the mergeclauses are implemented by
@@ -1936,21 +1891,7 @@ match_unsorted_outer(PlannerInfo *root,
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
- if (save_jointype == JOIN_UNIQUE_INNER)
- {
- /*
- * Consider nestloop join, but only with the unique-ified cheapest
- * inner path
- */
- try_nestloop_path(root,
- joinrel,
- outerpath,
- inner_cheapest_total,
- merge_pathkeys,
- jointype,
- extra);
- }
- else if (nestjoinOK)
+ if (nestjoinOK)
{
/*
* Consider nestloop joins using this outer path and various
@@ -2001,17 +1942,13 @@ match_unsorted_outer(PlannerInfo *root,
extra);
}
- /* Can't do anything else if outer path needs to be unique'd */
- if (save_jointype == JOIN_UNIQUE_OUTER)
- continue;
-
/* Can't do anything else if inner rel is parameterized by outer */
if (inner_cheapest_total == NULL)
continue;
/* Generate merge join paths */
generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
- save_jointype, extra, useallclauses,
+ jointype, extra, useallclauses,
inner_cheapest_total, merge_pathkeys,
false);
}
@@ -2035,25 +1972,22 @@ match_unsorted_outer(PlannerInfo *root,
{
if (nestjoinOK)
consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
- save_jointype, extra);
+ jointype, extra);
/*
* If inner_cheapest_total is NULL or non parallel-safe then find the
- * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
- * can't use any alternative inner path.
+ * cheapest total parallel safe path.
*/
if (inner_cheapest_total == NULL ||
!inner_cheapest_total->parallel_safe)
{
- if (save_jointype == JOIN_UNIQUE_INNER)
- return;
-
- inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+ inner_cheapest_total =
+ get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
}
if (inner_cheapest_total)
consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
- save_jointype, extra,
+ jointype, extra,
inner_cheapest_total);
}
}
@@ -2118,24 +2052,17 @@ consider_parallel_nestloop(PlannerInfo *root,
JoinType jointype,
JoinPathExtraData *extra)
{
- JoinType save_jointype = jointype;
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
ListCell *lc1;
- if (jointype == JOIN_UNIQUE_INNER)
- jointype = JOIN_INNER;
-
/*
- * Consider materializing the cheapest inner path, unless: 1) we're doing
- * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
- * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
- * path is not parallel-safe, 4) the cheapest inner path is parameterized
- * by the outer rel, or 5) the cheapest inner path materializes its output
- * anyway.
+ * Consider materializing the cheapest inner path, unless: 1)
+ * enable_material is off, 2) the cheapest inner path is not
+ * parallel-safe, 3) the cheapest inner path is parameterized by the outer
+ * rel, or 4) the cheapest inner path materializes its output anyway.
*/
- if (save_jointype != JOIN_UNIQUE_INNER &&
- enable_material && inner_cheapest_total->parallel_safe &&
+ if (enable_material && inner_cheapest_total->parallel_safe &&
!PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
{
@@ -2169,23 +2096,6 @@ consider_parallel_nestloop(PlannerInfo *root,
if (!innerpath->parallel_safe)
continue;
- /*
- * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
- * cheapest_total_path, and we have to unique-ify it. (We might
- * be able to relax this to allow other safe, unparameterized
- * inner paths, but right now create_unique_path is not on board
- * with that.)
- */
- if (save_jointype == JOIN_UNIQUE_INNER)
- {
- if (innerpath != innerrel->cheapest_total_path)
- continue;
- innerpath = (Path *) create_unique_path(root, innerrel,
- innerpath,
- extra->sjinfo);
- Assert(innerpath);
- }
-
try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
pathkeys, jointype, extra);
@@ -2232,6 +2142,13 @@ hash_inner_and_outer(PlannerInfo *root,
List *hashclauses;
ListCell *l;
+ /*
+ * If the outer or inner relation has been unique-ified, handle as a plain
+ * inner join.
+ */
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
/*
* We need to build only one hashclauses list for any given pair of outer
* and inner relations; all of the hashable clauses will be used as keys.
@@ -2290,6 +2207,8 @@ hash_inner_and_outer(PlannerInfo *root,
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
+ ListCell *lc1;
+ ListCell *lc2;
/*
* If either cheapest-total path is parameterized by the other rel, we
@@ -2301,102 +2220,55 @@ hash_inner_and_outer(PlannerInfo *root,
PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
return;
- /* Unique-ify if need be; we ignore parameterized possibilities */
- if (jointype == JOIN_UNIQUE_OUTER)
- {
- cheapest_total_outer = (Path *)
- create_unique_path(root, outerrel,
- cheapest_total_outer, extra->sjinfo);
- Assert(cheapest_total_outer);
- jointype = JOIN_INNER;
- try_hashjoin_path(root,
- joinrel,
- cheapest_total_outer,
- cheapest_total_inner,
- hashclauses,
- jointype,
- extra);
- /* no possibility of cheap startup here */
- }
- else if (jointype == JOIN_UNIQUE_INNER)
- {
- cheapest_total_inner = (Path *)
- create_unique_path(root, innerrel,
- cheapest_total_inner, extra->sjinfo);
- Assert(cheapest_total_inner);
- jointype = JOIN_INNER;
+ /*
+ * Consider the cheapest startup outer together with the cheapest
+ * total inner, and then consider pairings of cheapest-total paths
+ * including parameterized ones. There is no use in generating
+ * parameterized paths on the basis of possibly cheap startup cost, so
+ * this is sufficient.
+ */
+ if (cheapest_startup_outer != NULL)
try_hashjoin_path(root,
joinrel,
- cheapest_total_outer,
+ cheapest_startup_outer,
cheapest_total_inner,
hashclauses,
jointype,
extra);
- if (cheapest_startup_outer != NULL &&
- cheapest_startup_outer != cheapest_total_outer)
- try_hashjoin_path(root,
- joinrel,
- cheapest_startup_outer,
- cheapest_total_inner,
- hashclauses,
- jointype,
- extra);
- }
- else
+
+ foreach(lc1, outerrel->cheapest_parameterized_paths)
{
+ Path *outerpath = (Path *) lfirst(lc1);
+
/*
- * For other jointypes, we consider the cheapest startup outer
- * together with the cheapest total inner, and then consider
- * pairings of cheapest-total paths including parameterized ones.
- * There is no use in generating parameterized paths on the basis
- * of possibly cheap startup cost, so this is sufficient.
+ * We cannot use an outer path that is parameterized by the inner
+ * rel.
*/
- ListCell *lc1;
- ListCell *lc2;
-
- if (cheapest_startup_outer != NULL)
- try_hashjoin_path(root,
- joinrel,
- cheapest_startup_outer,
- cheapest_total_inner,
- hashclauses,
- jointype,
- extra);
+ if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ continue;
- foreach(lc1, outerrel->cheapest_parameterized_paths)
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
{
- Path *outerpath = (Path *) lfirst(lc1);
+ Path *innerpath = (Path *) lfirst(lc2);
/*
- * We cannot use an outer path that is parameterized by the
- * inner rel.
+ * We cannot use an inner path that is parameterized by the
+ * outer rel, either.
*/
- if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ if (PATH_PARAM_BY_REL(innerpath, outerrel))
continue;
- foreach(lc2, innerrel->cheapest_parameterized_paths)
- {
- Path *innerpath = (Path *) lfirst(lc2);
-
- /*
- * We cannot use an inner path that is parameterized by
- * the outer rel, either.
- */
- if (PATH_PARAM_BY_REL(innerpath, outerrel))
- continue;
-
- if (outerpath == cheapest_startup_outer &&
- innerpath == cheapest_total_inner)
- continue; /* already tried it */
+ if (outerpath == cheapest_startup_outer &&
+ innerpath == cheapest_total_inner)
+ continue; /* already tried it */
- try_hashjoin_path(root,
- joinrel,
- outerpath,
- innerpath,
- hashclauses,
- jointype,
- extra);
- }
+ try_hashjoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ hashclauses,
+ jointype,
+ extra);
}
}
@@ -2441,9 +2313,8 @@ hash_inner_and_outer(PlannerInfo *root,
* Normally, given that the joinrel is parallel-safe, the cheapest
* total inner path will also be parallel-safe, but if not, we'll
* have to search for the cheapest safe, unparameterized inner
- * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
- * inner path. If full, right, right-semi or right-anti join, we
- * can't use parallelism (building the hash table in each backend)
+ * path. If full, right, right-semi or right-anti join, we can't
+ * use parallelism (building the hash table in each backend)
* because no one process has all the match bits.
*/
if (save_jointype == JOIN_FULL ||
@@ -2453,7 +2324,7 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_safe_inner = NULL;
else if (cheapest_total_inner->parallel_safe)
cheapest_safe_inner = cheapest_total_inner;
- else if (save_jointype != JOIN_UNIQUE_INNER)
+ else
cheapest_safe_inner =
get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..fec359c28f6 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -19,6 +19,7 @@
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
+#include "optimizer/planner.h"
#include "partitioning/partbounds.h"
#include "utils/memutils.h"
@@ -444,8 +445,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
}
else if (sjinfo->jointype == JOIN_SEMI &&
bms_equal(sjinfo->syn_righthand, rel2->relids) &&
- create_unique_path(root, rel2, rel2->cheapest_total_path,
- sjinfo) != NULL)
+ create_unique_paths(root, rel2, sjinfo) != NULL)
{
/*----------
* For a semijoin, we can join the RHS to anything else by
@@ -477,8 +477,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
}
else if (sjinfo->jointype == JOIN_SEMI &&
bms_equal(sjinfo->syn_righthand, rel1->relids) &&
- create_unique_path(root, rel1, rel1->cheapest_total_path,
- sjinfo) != NULL)
+ create_unique_paths(root, rel1, sjinfo) != NULL)
{
/* Reversed semijoin case */
if (match_sjinfo)
@@ -895,6 +894,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
RelOptInfo *rel2, RelOptInfo *joinrel,
SpecialJoinInfo *sjinfo, List *restrictlist)
{
+ RelOptInfo *unique_rel2;
+
/*
* Consider paths using each rel as both outer and inner. Depending on
* the join type, a provably empty outer or inner rel might mean the join
@@ -1000,14 +1001,13 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
/*
* If we know how to unique-ify the RHS and one input rel is
* exactly the RHS (not a superset) we can consider unique-ifying
- * it and then doing a regular join. (The create_unique_path
+ * it and then doing a regular join. (The create_unique_paths
* check here is probably redundant with what join_is_legal did,
* but if so the check is cheap because it's cached. So test
* anyway to be sure.)
*/
if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
- create_unique_path(root, rel2, rel2->cheapest_total_path,
- sjinfo) != NULL)
+ (unique_rel2 = create_unique_paths(root, rel2, sjinfo)) != NULL)
{
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist, joinrel, false))
@@ -1015,10 +1015,10 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
mark_dummy_rel(joinrel);
break;
}
- add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ add_paths_to_joinrel(root, joinrel, rel1, unique_rel2,
JOIN_UNIQUE_INNER, sjinfo,
restrictlist);
- add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ add_paths_to_joinrel(root, joinrel, unique_rel2, rel1,
JOIN_UNIQUE_OUTER, sjinfo,
restrictlist);
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..1658b20f17e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -95,8 +95,6 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
int flags);
static Memoize *create_memoize_plan(PlannerInfo *root, MemoizePath *best_path,
int flags);
-static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path,
- int flags);
static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path);
static Plan *create_projection_plan(PlannerInfo *root,
ProjectionPath *best_path,
@@ -106,8 +104,7 @@ static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
static IncrementalSort *create_incrementalsort_plan(PlannerInfo *root,
IncrementalSortPath *best_path, int flags);
static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path);
-static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path,
- int flags);
+static Unique *create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags);
static Agg *create_agg_plan(PlannerInfo *root, AggPath *best_path);
static Plan *create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path);
static Result *create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path);
@@ -293,9 +290,9 @@ static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
static Group *make_group(List *tlist, List *qual, int numGroupCols,
AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
Plan *lefttree);
-static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
static Unique *make_unique_from_pathkeys(Plan *lefttree,
- List *pathkeys, int numCols);
+ List *pathkeys, int numCols,
+ Relids relids);
static Gather *make_gather(List *qptlist, List *qpqual,
int nworkers, int rescan_param, bool single_copy, Plan *subplan);
static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
@@ -467,19 +464,9 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
flags);
break;
case T_Unique:
- if (IsA(best_path, UpperUniquePath))
- {
- plan = (Plan *) create_upper_unique_plan(root,
- (UpperUniquePath *) best_path,
- flags);
- }
- else
- {
- Assert(IsA(best_path, UniquePath));
- plan = create_unique_plan(root,
- (UniquePath *) best_path,
- flags);
- }
+ plan = (Plan *) create_unique_plan(root,
+ (UniquePath *) best_path,
+ flags);
break;
case T_Gather:
plan = (Plan *) create_gather_plan(root,
@@ -1710,207 +1697,6 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
return plan;
}
-/*
- * create_unique_plan
- * Create a Unique plan for 'best_path' and (recursively) plans
- * for its subpaths.
- *
- * Returns a Plan node.
- */
-static Plan *
-create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
-{
- Plan *plan;
- Plan *subplan;
- List *in_operators;
- List *uniq_exprs;
- List *newtlist;
- int nextresno;
- bool newitems;
- int numGroupCols;
- AttrNumber *groupColIdx;
- Oid *groupCollations;
- int groupColPos;
- ListCell *l;
-
- /* Unique doesn't project, so tlist requirements pass through */
- subplan = create_plan_recurse(root, best_path->subpath, flags);
-
- /* Done if we don't need to do any actual unique-ifying */
- if (best_path->umethod == UNIQUE_PATH_NOOP)
- return subplan;
-
- /*
- * As constructed, the subplan has a "flat" tlist containing just the Vars
- * needed here and at upper levels. The values we are supposed to
- * unique-ify may be expressions in these variables. We have to add any
- * such expressions to the subplan's tlist.
- *
- * The subplan may have a "physical" tlist if it is a simple scan plan. If
- * we're going to sort, this should be reduced to the regular tlist, so
- * that we don't sort more data than we need to. For hashing, the tlist
- * should be left as-is if we don't need to add any expressions; but if we
- * do have to add expressions, then a projection step will be needed at
- * runtime anyway, so we may as well remove unneeded items. Therefore
- * newtlist starts from build_path_tlist() not just a copy of the
- * subplan's tlist; and we don't install it into the subplan unless we are
- * sorting or stuff has to be added.
- */
- in_operators = best_path->in_operators;
- uniq_exprs = best_path->uniq_exprs;
-
- /* initialize modified subplan tlist as just the "required" vars */
- newtlist = build_path_tlist(root, &best_path->path);
- nextresno = list_length(newtlist) + 1;
- newitems = false;
-
- foreach(l, uniq_exprs)
- {
- Expr *uniqexpr = lfirst(l);
- TargetEntry *tle;
-
- tle = tlist_member(uniqexpr, newtlist);
- if (!tle)
- {
- tle = makeTargetEntry((Expr *) uniqexpr,
- nextresno,
- NULL,
- false);
- newtlist = lappend(newtlist, tle);
- nextresno++;
- newitems = true;
- }
- }
-
- /* Use change_plan_targetlist in case we need to insert a Result node */
- if (newitems || best_path->umethod == UNIQUE_PATH_SORT)
- subplan = change_plan_targetlist(subplan, newtlist,
- best_path->path.parallel_safe);
-
- /*
- * Build control information showing which subplan output columns are to
- * be examined by the grouping step. Unfortunately we can't merge this
- * with the previous loop, since we didn't then know which version of the
- * subplan tlist we'd end up using.
- */
- newtlist = subplan->targetlist;
- numGroupCols = list_length(uniq_exprs);
- groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
- groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
-
- groupColPos = 0;
- foreach(l, uniq_exprs)
- {
- Expr *uniqexpr = lfirst(l);
- TargetEntry *tle;
-
- tle = tlist_member(uniqexpr, newtlist);
- if (!tle) /* shouldn't happen */
- elog(ERROR, "failed to find unique expression in subplan tlist");
- groupColIdx[groupColPos] = tle->resno;
- groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
- groupColPos++;
- }
-
- if (best_path->umethod == UNIQUE_PATH_HASH)
- {
- Oid *groupOperators;
-
- /*
- * Get the hashable equality operators for the Agg node to use.
- * Normally these are the same as the IN clause operators, but if
- * those are cross-type operators then the equality operators are the
- * ones for the IN clause operators' RHS datatype.
- */
- groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid));
- groupColPos = 0;
- foreach(l, in_operators)
- {
- Oid in_oper = lfirst_oid(l);
- Oid eq_oper;
-
- if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper))
- elog(ERROR, "could not find compatible hash operator for operator %u",
- in_oper);
- groupOperators[groupColPos++] = eq_oper;
- }
-
- /*
- * Since the Agg node is going to project anyway, we can give it the
- * minimum output tlist, without any stuff we might have added to the
- * subplan tlist.
- */
- plan = (Plan *) make_agg(build_path_tlist(root, &best_path->path),
- NIL,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- numGroupCols,
- groupColIdx,
- groupOperators,
- groupCollations,
- NIL,
- NIL,
- best_path->path.rows,
- 0,
- subplan);
- }
- else
- {
- List *sortList = NIL;
- Sort *sort;
-
- /* Create an ORDER BY list to sort the input compatibly */
- groupColPos = 0;
- foreach(l, in_operators)
- {
- Oid in_oper = lfirst_oid(l);
- Oid sortop;
- Oid eqop;
- TargetEntry *tle;
- SortGroupClause *sortcl;
-
- sortop = get_ordering_op_for_equality_op(in_oper, false);
- if (!OidIsValid(sortop)) /* shouldn't happen */
- elog(ERROR, "could not find ordering operator for equality operator %u",
- in_oper);
-
- /*
- * The Unique node will need equality operators. Normally these
- * are the same as the IN clause operators, but if those are
- * cross-type operators then the equality operators are the ones
- * for the IN clause operators' RHS datatype.
- */
- eqop = get_equality_op_for_ordering_op(sortop, NULL);
- if (!OidIsValid(eqop)) /* shouldn't happen */
- elog(ERROR, "could not find equality operator for ordering operator %u",
- sortop);
-
- tle = get_tle_by_resno(subplan->targetlist,
- groupColIdx[groupColPos]);
- Assert(tle != NULL);
-
- sortcl = makeNode(SortGroupClause);
- sortcl->tleSortGroupRef = assignSortGroupRef(tle,
- subplan->targetlist);
- sortcl->eqop = eqop;
- sortcl->sortop = sortop;
- sortcl->reverse_sort = false;
- sortcl->nulls_first = false;
- sortcl->hashable = false; /* no need to make this accurate */
- sortList = lappend(sortList, sortcl);
- groupColPos++;
- }
- sort = make_sort_from_sortclauses(sortList, subplan);
- label_sort_with_costsize(root, sort, -1.0);
- plan = (Plan *) make_unique_from_sortclauses((Plan *) sort, sortList);
- }
-
- /* Copy cost data from Path to Plan */
- copy_generic_path_info(plan, &best_path->path);
-
- return plan;
-}
-
/*
* create_gather_plan
*
@@ -2268,13 +2054,13 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
}
/*
- * create_upper_unique_plan
+ * create_unique_plan
*
* Create a Unique plan for 'best_path' and (recursively) plans
* for its subpaths.
*/
static Unique *
-create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flags)
+create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
{
Unique *plan;
Plan *subplan;
@@ -2288,7 +2074,8 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag
plan = make_unique_from_pathkeys(subplan,
best_path->path.pathkeys,
- best_path->numkeys);
+ best_path->numkeys,
+ best_path->path.parent->relids);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6761,61 +6548,12 @@ make_group(List *tlist,
}
/*
- * distinctList is a list of SortGroupClauses, identifying the targetlist items
- * that should be considered by the Unique filter. The input path must
- * already be sorted accordingly.
- */
-static Unique *
-make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
-{
- Unique *node = makeNode(Unique);
- Plan *plan = &node->plan;
- int numCols = list_length(distinctList);
- int keyno = 0;
- AttrNumber *uniqColIdx;
- Oid *uniqOperators;
- Oid *uniqCollations;
- ListCell *slitem;
-
- plan->targetlist = lefttree->targetlist;
- plan->qual = NIL;
- plan->lefttree = lefttree;
- plan->righttree = NULL;
-
- /*
- * convert SortGroupClause list into arrays of attr indexes and equality
- * operators, as wanted by executor
- */
- Assert(numCols > 0);
- uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
- uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
- uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
-
- foreach(slitem, distinctList)
- {
- SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
- TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
-
- uniqColIdx[keyno] = tle->resno;
- uniqOperators[keyno] = sortcl->eqop;
- uniqCollations[keyno] = exprCollation((Node *) tle->expr);
- Assert(OidIsValid(uniqOperators[keyno]));
- keyno++;
- }
-
- node->numCols = numCols;
- node->uniqColIdx = uniqColIdx;
- node->uniqOperators = uniqOperators;
- node->uniqCollations = uniqCollations;
-
- return node;
-}
-
-/*
- * as above, but use pathkeys to identify the sort columns and semantics
+ * pathkeys is a list of PathKeys, identifying the sort columns and semantics.
+ * The input path must already be sorted accordingly.
*/
static Unique *
-make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
+make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols,
+ Relids relids)
{
Unique *node = makeNode(Unique);
Plan *plan = &node->plan;
@@ -6878,7 +6616,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
foreach(j, plan->targetlist)
{
tle = (TargetEntry *) lfirst(j);
- em = find_ec_member_matching_expr(ec, tle->expr, NULL);
+ em = find_ec_member_matching_expr(ec, tle->expr, relids);
if (em)
{
/* found expr already in tlist */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ff65867eebe..f41be9ca2a0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -267,6 +267,12 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
static int common_prefix_cmp(const void *a, const void *b);
static List *generate_setop_child_grouplist(SetOperationStmt *op,
List *targetlist);
+static void create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ List *sortPathkeys, List *groupClause,
+ SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel);
+static void create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ List *sortPathkeys, List *groupClause,
+ SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel);
/*****************************************************************************
@@ -4917,10 +4923,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
else
{
add_partial_path(partial_distinct_rel, (Path *)
- create_upper_unique_path(root, partial_distinct_rel,
- sorted_path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ create_unique_path(root, partial_distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
}
}
}
@@ -5111,10 +5117,10 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
else
{
add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- sorted_path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ create_unique_path(root, distinct_rel,
+ sorted_path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
}
}
}
@@ -8248,3 +8254,450 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
return grouplist;
}
+
+/*
+ * create_unique_paths
+ * Build a new RelOptInfo containing Paths that represent elimination of
+ * distinct rows from the input data. Distinct-ness is defined according to
+ * the needs of the semijoin represented by sjinfo. If it is not possible
+ * to identify how to make the data unique, NULL is returned.
+ *
+ * If used at all, this is likely to be called repeatedly on the same rel;
+ * So we cache the result.
+ */
+RelOptInfo *
+create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
+{
+ RelOptInfo *unique_rel;
+ List *newtlist;
+ int nextresno;
+ List *sortList = NIL;
+ List *sortPathkeys = NIL;
+ List *groupClause = NIL;
+ MemoryContext oldcontext;
+ ListCell *lc1;
+ ListCell *lc2;
+
+ /* Caller made a mistake if SpecialJoinInfo is the wrong one */
+ Assert(sjinfo->jointype == JOIN_SEMI);
+ Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
+
+ /* If result already cached, return it */
+ if (rel->unique_rel)
+ return (RelOptInfo *) rel->unique_rel;
+
+ /* If it's not possible to unique-ify, return NULL */
+ if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
+ return NULL;
+
+ /*
+ * When called during GEQO join planning, we are in a short-lived memory
+ * context. We must make sure that the unique rel and any subsidiary data
+ * structures created for a baserel survive the GEQO cycle, else the
+ * baserel is trashed for future GEQO cycles. On the other hand, when we
+ * are creating those for a joinrel during GEQO, we don't want them to
+ * clutter the main planning context. Upshot is that the best solution is
+ * to explicitly allocate memory in the same context the given RelOptInfo
+ * is in.
+ */
+ oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+ unique_rel = makeNode(RelOptInfo);
+ memcpy(unique_rel, rel, sizeof(RelOptInfo));
+
+ /*
+ * clear path info
+ */
+ unique_rel->pathlist = NIL;
+ unique_rel->ppilist = NIL;
+ unique_rel->partial_pathlist = NIL;
+ unique_rel->cheapest_startup_path = NULL;
+ unique_rel->cheapest_total_path = NULL;
+ unique_rel->cheapest_parameterized_paths = NIL;
+
+ /* Estimate number of output rows */
+ unique_rel->rows = estimate_num_groups(root,
+ sjinfo->semi_rhs_exprs,
+ rel->rows,
+ NULL,
+ NULL);
+
+ /*
+ * The values we are supposed to unique-ify may be expressions in the
+ * variables of the input rel's targetlist. We have to add any such
+ * expressions to the unique rel's tlist.
+ *
+ * While in the loop, build the lists of SortGroupClause's representing
+ * the ordering for the sort-based implementation and the grouping for
+ * hash-based implementation.
+ */
+ newtlist = make_tlist_from_pathtarget(rel->reltarget);
+ nextresno = list_length(newtlist) + 1;
+
+ forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators)
+ {
+ Expr *uniqexpr = lfirst(lc1);
+ Oid in_oper = lfirst_oid(lc2);
+ Oid sortop = InvalidOid;
+ TargetEntry *tle;
+
+ tle = tlist_member(uniqexpr, newtlist);
+ if (!tle)
+ {
+ tle = makeTargetEntry((Expr *) uniqexpr,
+ nextresno,
+ NULL,
+ false);
+ newtlist = lappend(newtlist, tle);
+ nextresno++;
+ }
+
+ if (sjinfo->semi_can_btree)
+ {
+ /* Create an ORDER BY list to sort the input compatibly */
+ Oid eqop;
+ SortGroupClause *sortcl;
+
+ sortop = get_ordering_op_for_equality_op(in_oper, false);
+ if (!OidIsValid(sortop)) /* shouldn't happen */
+ elog(ERROR, "could not find ordering operator for equality operator %u",
+ in_oper);
+
+ /*
+ * The Unique node will need equality operators. Normally these
+ * are the same as the IN clause operators, but if those are
+ * cross-type operators then the equality operators are the ones
+ * for the IN clause operators' RHS datatype.
+ */
+ eqop = get_equality_op_for_ordering_op(sortop, NULL);
+ if (!OidIsValid(eqop)) /* shouldn't happen */
+ elog(ERROR, "could not find equality operator for ordering operator %u",
+ sortop);
+
+ sortcl = makeNode(SortGroupClause);
+ sortcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist);
+ sortcl->eqop = eqop;
+ sortcl->sortop = sortop;
+ sortcl->reverse_sort = false;
+ sortcl->nulls_first = false;
+ sortcl->hashable = false; /* no need to make this accurate */
+ sortList = lappend(sortList, sortcl);
+ }
+ if (sjinfo->semi_can_hash)
+ {
+ /* Create a GROUP BY list for the Agg node to use */
+ Oid eq_oper;
+ SortGroupClause *groupcl;
+
+ /*
+ * Get the hashable equality operators for the Agg node to use.
+ * Normally these are the same as the IN clause operators, but if
+ * those are cross-type operators then the equality operators are
+ * the ones for the IN clause operators' RHS datatype.
+ */
+ if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper))
+ elog(ERROR, "could not find compatible hash operator for operator %u",
+ in_oper);
+
+ groupcl = makeNode(SortGroupClause);
+ groupcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist);
+ groupcl->eqop = eq_oper;
+ groupcl->sortop = sortop;
+ groupcl->reverse_sort = false;
+ groupcl->nulls_first = false;
+ groupcl->hashable = true;
+ groupClause = lappend(groupClause, groupcl);
+ }
+ }
+
+ unique_rel->reltarget = create_pathtarget(root, newtlist);
+ if (!IS_OTHER_REL(rel))
+ sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, newtlist);
+ else
+ sortPathkeys = rel->top_parent->unique_pathkeys;
+
+ /* build unique paths based on input rel's pathlist */
+ create_final_unique_paths(root, rel, sortPathkeys, groupClause,
+ sjinfo, unique_rel);
+
+ /* build unique paths based on input rel's partial_pathlist */
+ create_partial_unique_paths(root, rel, sortPathkeys, groupClause,
+ sjinfo, unique_rel);
+
+ /* Now choose the best path(s) */
+ set_cheapest(unique_rel);
+
+ /*
+ * There should be no partial paths for the unique relation; otherwise, we
+ * won't be able to properly guarantee uniqueness.
+ */
+ Assert(unique_rel->partial_pathlist == NIL);
+
+ /* Cache the result */
+ rel->unique_rel = unique_rel;
+ rel->unique_pathkeys = sortPathkeys;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return unique_rel;
+}
+
+/*
+ * create_final_unique_paths
+ * Create unique paths in 'unique_rel' based on 'input_rel' pathlist
+ */
+static void
+create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ List *sortPathkeys, List *groupClause,
+ SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel)
+{
+ /* Consider sort-based implementations, if possible. */
+ if (sjinfo->semi_can_btree)
+ {
+ ListCell *lc;
+
+ /*
+ * Use any available suitably-sorted path as input, and also consider
+ * sorting the cheapest-total path and incremental sort on any paths
+ * with presorted keys.
+ */
+ foreach(lc, input_rel->pathlist)
+ {
+ Path *input_path = (Path *) lfirst(lc);
+ Path *path;
+ bool is_sorted;
+ int presorted_keys;
+
+ /*
+ * Make a separate ProjectionPath in case we need a Result node.
+ */
+ path = (Path *) create_projection_path(root,
+ unique_rel,
+ input_path,
+ unique_rel->reltarget);
+
+ is_sorted = pathkeys_count_contained_in(sortPathkeys,
+ path->pathkeys,
+ &presorted_keys);
+
+ if (!is_sorted)
+ {
+ /*
+ * Try at least sorting the cheapest path and also try
+ * incrementally sorting any path which is partially sorted
+ * already (no need to deal with paths which have presorted
+ * keys when incremental sort is disabled unless it's the
+ * cheapest input path).
+ */
+ if (input_path != input_rel->cheapest_total_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ continue;
+
+ /*
+ * We've no need to consider both a sort and incremental sort.
+ * We'll just do a sort if there are no presorted keys and an
+ * incremental sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ path = (Path *) create_sort_path(root,
+ unique_rel,
+ path,
+ sortPathkeys,
+ -1.0);
+ else
+ path = (Path *) create_incremental_sort_path(root,
+ unique_rel,
+ path,
+ sortPathkeys,
+ presorted_keys,
+ -1.0);
+ }
+
+ path = (Path *) create_unique_path(root, unique_rel, path,
+ list_length(sortPathkeys),
+ unique_rel->rows);
+
+ add_path(unique_rel, path);
+ }
+ }
+
+ /* Consider hash-based implementation, if possible. */
+ if (sjinfo->semi_can_hash)
+ {
+ Path *path;
+
+ /*
+ * Make a separate ProjectionPath in case we need a Result node.
+ */
+ path = (Path *) create_projection_path(root,
+ unique_rel,
+ input_rel->cheapest_total_path,
+ unique_rel->reltarget);
+
+ path = (Path *) create_agg_path(root,
+ unique_rel,
+ path,
+ unique_rel->reltarget,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupClause,
+ NIL,
+ NULL,
+ unique_rel->rows);
+
+ add_path(unique_rel, path);
+
+ }
+}
+
+/*
+ * create_partial_unique_paths
+ * Create unique paths in 'unique_rel' based on 'input_rel' partial_pathlist
+ */
+static void
+create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ List *sortPathkeys, List *groupClause,
+ SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel)
+{
+ RelOptInfo *partial_unique_rel;
+ Path *cheapest_partial_path;
+
+ /* nothing to do when there are no partial paths in the input rel */
+ if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
+ return;
+
+ cheapest_partial_path = linitial(input_rel->partial_pathlist);
+
+ partial_unique_rel = makeNode(RelOptInfo);
+ memcpy(partial_unique_rel, input_rel, sizeof(RelOptInfo));
+
+ /*
+ * clear path info
+ */
+ partial_unique_rel->pathlist = NIL;
+ partial_unique_rel->ppilist = NIL;
+ partial_unique_rel->partial_pathlist = NIL;
+ partial_unique_rel->cheapest_startup_path = NULL;
+ partial_unique_rel->cheapest_total_path = NULL;
+ partial_unique_rel->cheapest_parameterized_paths = NIL;
+
+ /* Estimate number of output rows */
+ partial_unique_rel->rows = estimate_num_groups(root,
+ sjinfo->semi_rhs_exprs,
+ cheapest_partial_path->rows,
+ NULL,
+ NULL);
+ partial_unique_rel->reltarget = unique_rel->reltarget;
+
+ /* Consider sort-based implementations, if possible. */
+ if (sjinfo->semi_can_btree)
+ {
+ ListCell *lc;
+
+ /*
+ * Use any available suitably-sorted path as input, and also consider
+ * sorting the cheapest partial path and incremental sort on any paths
+ * with presorted keys.
+ */
+ foreach(lc, input_rel->partial_pathlist)
+ {
+ Path *input_path = (Path *) lfirst(lc);
+ Path *path;
+ bool is_sorted;
+ int presorted_keys;
+
+ /*
+ * Make a separate ProjectionPath in case we need a Result node.
+ */
+ path = (Path *) create_projection_path(root,
+ partial_unique_rel,
+ input_path,
+ partial_unique_rel->reltarget);
+
+ is_sorted = pathkeys_count_contained_in(sortPathkeys,
+ path->pathkeys,
+ &presorted_keys);
+
+ if (!is_sorted)
+ {
+ /*
+ * Try at least sorting the cheapest path and also try
+ * incrementally sorting any path which is partially sorted
+ * already (no need to deal with paths which have presorted
+ * keys when incremental sort is disabled unless it's the
+ * cheapest input path).
+ */
+ if (input_path != cheapest_partial_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ continue;
+
+ /*
+ * We've no need to consider both a sort and incremental sort.
+ * We'll just do a sort if there are no presorted keys and an
+ * incremental sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ path = (Path *) create_sort_path(root,
+ partial_unique_rel,
+ path,
+ sortPathkeys,
+ -1.0);
+ else
+ path = (Path *) create_incremental_sort_path(root,
+ partial_unique_rel,
+ path,
+ sortPathkeys,
+ presorted_keys,
+ -1.0);
+ }
+
+ path = (Path *) create_unique_path(root, partial_unique_rel, path,
+ list_length(sortPathkeys),
+ partial_unique_rel->rows);
+
+ add_partial_path(partial_unique_rel, path);
+ }
+ }
+
+ /* Consider hash-based implementation, if possible. */
+ if (sjinfo->semi_can_hash)
+ {
+ Path *path;
+
+ /*
+ * Make a separate ProjectionPath in case we need a Result node.
+ */
+ path = (Path *) create_projection_path(root,
+ partial_unique_rel,
+ cheapest_partial_path,
+ partial_unique_rel->reltarget);
+
+ path = (Path *) create_agg_path(root,
+ partial_unique_rel,
+ path,
+ partial_unique_rel->reltarget,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupClause,
+ NIL,
+ NULL,
+ partial_unique_rel->rows);
+
+ add_partial_path(partial_unique_rel, path);
+ }
+
+ if (partial_unique_rel->partial_pathlist != NIL)
+ {
+ generate_useful_gather_paths(root, partial_unique_rel, true);
+ set_cheapest(partial_unique_rel);
+
+ /*
+ * Finally, create paths to unique-ify the final result. This step is
+ * needed to remove any duplicates due to combining rows from parallel
+ * workers.
+ */
+ create_final_unique_paths(root, partial_unique_rel,
+ sortPathkeys, groupClause,
+ sjinfo, unique_rel);
+ }
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index eab44da65b8..28a4ae64440 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -929,11 +929,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
make_pathkeys_for_sortclauses(root, groupList, tlist),
-1.0);
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(path->pathkeys),
- dNumGroups);
+ path = (Path *) create_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
add_path(result_rel, path);
@@ -946,11 +946,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
make_pathkeys_for_sortclauses(root, groupList, tlist),
-1.0);
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(path->pathkeys),
- dNumGroups);
+ path = (Path *) create_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
add_path(result_rel, path);
}
}
@@ -970,11 +970,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
NULL);
/* and make the MergeAppend unique */
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(tlist),
- dNumGroups);
+ path = (Path *) create_unique_path(root,
+ result_rel,
+ path,
+ list_length(tlist),
+ dNumGroups);
add_path(result_rel, path);
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..2ee06dc7317 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -46,7 +46,6 @@ typedef enum
*/
#define STD_FUZZ_FACTOR 1.01
-static List *translate_sub_tlist(List *tlist, int relid);
static int append_total_cost_compare(const ListCell *a, const ListCell *b);
static int append_startup_cost_compare(const ListCell *a, const ListCell *b);
static List *reparameterize_pathlist_by_child(PlannerInfo *root,
@@ -381,7 +380,6 @@ set_cheapest(RelOptInfo *parent_rel)
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
- parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
parent_rel->cheapest_parameterized_paths = parameterized_paths;
}
@@ -1712,246 +1710,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
return pathnode;
}
-/*
- * create_unique_path
- * Creates a path representing elimination of distinct rows from the
- * input data. Distinct-ness is defined according to the needs of the
- * semijoin represented by sjinfo. If it is not possible to identify
- * how to make the data unique, NULL is returned.
- *
- * If used at all, this is likely to be called repeatedly on the same rel;
- * and the input subpath should always be the same (the cheapest_total path
- * for the rel). So we cache the result.
- */
-UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
- SpecialJoinInfo *sjinfo)
-{
- UniquePath *pathnode;
- Path sort_path; /* dummy for result of cost_sort */
- Path agg_path; /* dummy for result of cost_agg */
- MemoryContext oldcontext;
- int numCols;
-
- /* Caller made a mistake if subpath isn't cheapest_total ... */
- Assert(subpath == rel->cheapest_total_path);
- Assert(subpath->parent == rel);
- /* ... or if SpecialJoinInfo is the wrong one */
- Assert(sjinfo->jointype == JOIN_SEMI);
- Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
-
- /* If result already cached, return it */
- if (rel->cheapest_unique_path)
- return (UniquePath *) rel->cheapest_unique_path;
-
- /* If it's not possible to unique-ify, return NULL */
- if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
- return NULL;
-
- /*
- * When called during GEQO join planning, we are in a short-lived memory
- * context. We must make sure that the path and any subsidiary data
- * structures created for a baserel survive the GEQO cycle, else the
- * baserel is trashed for future GEQO cycles. On the other hand, when we
- * are creating those for a joinrel during GEQO, we don't want them to
- * clutter the main planning context. Upshot is that the best solution is
- * to explicitly allocate memory in the same context the given RelOptInfo
- * is in.
- */
- oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
-
- pathnode = makeNode(UniquePath);
-
- pathnode->path.pathtype = T_Unique;
- pathnode->path.parent = rel;
- pathnode->path.pathtarget = rel->reltarget;
- pathnode->path.param_info = subpath->param_info;
- pathnode->path.parallel_aware = false;
- pathnode->path.parallel_safe = rel->consider_parallel &&
- subpath->parallel_safe;
- pathnode->path.parallel_workers = subpath->parallel_workers;
-
- /*
- * Assume the output is unsorted, since we don't necessarily have pathkeys
- * to represent it. (This might get overridden below.)
- */
- pathnode->path.pathkeys = NIL;
-
- pathnode->subpath = subpath;
-
- /*
- * Under GEQO and when planning child joins, the sjinfo might be
- * short-lived, so we'd better make copies of data structures we extract
- * from it.
- */
- pathnode->in_operators = copyObject(sjinfo->semi_operators);
- pathnode->uniq_exprs = copyObject(sjinfo->semi_rhs_exprs);
-
- /*
- * If the input is a relation and it has a unique index that proves the
- * semi_rhs_exprs are unique, then we don't need to do anything. Note
- * that relation_has_unique_index_for automatically considers restriction
- * clauses for the rel, as well.
- */
- if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
- relation_has_unique_index_for(root, rel, NIL,
- sjinfo->semi_rhs_exprs,
- sjinfo->semi_operators))
- {
- pathnode->umethod = UNIQUE_PATH_NOOP;
- pathnode->path.rows = rel->rows;
- pathnode->path.disabled_nodes = subpath->disabled_nodes;
- pathnode->path.startup_cost = subpath->startup_cost;
- pathnode->path.total_cost = subpath->total_cost;
- pathnode->path.pathkeys = subpath->pathkeys;
-
- rel->cheapest_unique_path = (Path *) pathnode;
-
- MemoryContextSwitchTo(oldcontext);
-
- return pathnode;
- }
-
- /*
- * If the input is a subquery whose output must be unique already, then we
- * don't need to do anything. The test for uniqueness has to consider
- * exactly which columns we are extracting; for example "SELECT DISTINCT
- * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
- * this optimization unless semi_rhs_exprs consists only of simple Vars
- * referencing subquery outputs. (Possibly we could do something with
- * expressions in the subquery outputs, too, but for now keep it simple.)
- */
- if (rel->rtekind == RTE_SUBQUERY)
- {
- RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
-
- if (query_supports_distinctness(rte->subquery))
- {
- List *sub_tlist_colnos;
-
- sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
- rel->relid);
-
- if (sub_tlist_colnos &&
- query_is_distinct_for(rte->subquery,
- sub_tlist_colnos,
- sjinfo->semi_operators))
- {
- pathnode->umethod = UNIQUE_PATH_NOOP;
- pathnode->path.rows = rel->rows;
- pathnode->path.disabled_nodes = subpath->disabled_nodes;
- pathnode->path.startup_cost = subpath->startup_cost;
- pathnode->path.total_cost = subpath->total_cost;
- pathnode->path.pathkeys = subpath->pathkeys;
-
- rel->cheapest_unique_path = (Path *) pathnode;
-
- MemoryContextSwitchTo(oldcontext);
-
- return pathnode;
- }
- }
- }
-
- /* Estimate number of output rows */
- pathnode->path.rows = estimate_num_groups(root,
- sjinfo->semi_rhs_exprs,
- rel->rows,
- NULL,
- NULL);
- numCols = list_length(sjinfo->semi_rhs_exprs);
-
- if (sjinfo->semi_can_btree)
- {
- /*
- * Estimate cost for sort+unique implementation
- */
- cost_sort(&sort_path, root, NIL,
- subpath->disabled_nodes,
- subpath->total_cost,
- rel->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /*
- * Charge one cpu_operator_cost per comparison per input tuple. We
- * assume all columns get compared at most of the tuples. (XXX
- * probably this is an overestimate.) This should agree with
- * create_upper_unique_path.
- */
- sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
- }
-
- if (sjinfo->semi_can_hash)
- {
- /*
- * Estimate the overhead per hashtable entry at 64 bytes (same as in
- * planner.c).
- */
- int hashentrysize = subpath->pathtarget->width + 64;
-
- if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
- {
- /*
- * We should not try to hash. Hack the SpecialJoinInfo to
- * remember this, in case we come through here again.
- */
- sjinfo->semi_can_hash = false;
- }
- else
- cost_agg(&agg_path, root,
- AGG_HASHED, NULL,
- numCols, pathnode->path.rows,
- NIL,
- subpath->disabled_nodes,
- subpath->startup_cost,
- subpath->total_cost,
- rel->rows,
- subpath->pathtarget->width);
- }
-
- if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
- {
- if (agg_path.disabled_nodes < sort_path.disabled_nodes ||
- (agg_path.disabled_nodes == sort_path.disabled_nodes &&
- agg_path.total_cost < sort_path.total_cost))
- pathnode->umethod = UNIQUE_PATH_HASH;
- else
- pathnode->umethod = UNIQUE_PATH_SORT;
- }
- else if (sjinfo->semi_can_btree)
- pathnode->umethod = UNIQUE_PATH_SORT;
- else if (sjinfo->semi_can_hash)
- pathnode->umethod = UNIQUE_PATH_HASH;
- else
- {
- /* we can get here only if we abandoned hashing above */
- MemoryContextSwitchTo(oldcontext);
- return NULL;
- }
-
- if (pathnode->umethod == UNIQUE_PATH_HASH)
- {
- pathnode->path.disabled_nodes = agg_path.disabled_nodes;
- pathnode->path.startup_cost = agg_path.startup_cost;
- pathnode->path.total_cost = agg_path.total_cost;
- }
- else
- {
- pathnode->path.disabled_nodes = sort_path.disabled_nodes;
- pathnode->path.startup_cost = sort_path.startup_cost;
- pathnode->path.total_cost = sort_path.total_cost;
- }
-
- rel->cheapest_unique_path = (Path *) pathnode;
-
- MemoryContextSwitchTo(oldcontext);
-
- return pathnode;
-}
-
/*
* create_gather_merge_path
*
@@ -2003,36 +1761,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
return pathnode;
}
-/*
- * translate_sub_tlist - get subquery column numbers represented by tlist
- *
- * The given targetlist usually contains only Vars referencing the given relid.
- * Extract their varattnos (ie, the column numbers of the subquery) and return
- * as an integer List.
- *
- * If any of the tlist items is not a simple Var, we cannot determine whether
- * the subquery's uniqueness condition (if any) matches ours, so punt and
- * return NIL.
- */
-static List *
-translate_sub_tlist(List *tlist, int relid)
-{
- List *result = NIL;
- ListCell *l;
-
- foreach(l, tlist)
- {
- Var *var = (Var *) lfirst(l);
-
- if (!var || !IsA(var, Var) ||
- var->varno != relid)
- return NIL; /* punt */
-
- result = lappend_int(result, var->varattno);
- }
- return result;
-}
-
/*
* create_gather_path
* Creates a path corresponding to a gather scan, returning the
@@ -2790,8 +2518,7 @@ create_projection_path(PlannerInfo *root,
pathnode->path.pathtype = T_Result;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
- /* For now, assume we are above any joins, so no parameterization */
- pathnode->path.param_info = NULL;
+ pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe &&
@@ -3046,8 +2773,7 @@ create_incremental_sort_path(PlannerInfo *root,
pathnode->path.parent = rel;
/* Sort doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
- /* For now, assume we are above any joins, so no parameterization */
- pathnode->path.param_info = NULL;
+ pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3094,8 +2820,7 @@ create_sort_path(PlannerInfo *root,
pathnode->path.parent = rel;
/* Sort doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
- /* For now, assume we are above any joins, so no parameterization */
- pathnode->path.param_info = NULL;
+ pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3171,13 +2896,10 @@ create_group_path(PlannerInfo *root,
}
/*
- * create_upper_unique_path
+ * create_unique_path
* Creates a pathnode that represents performing an explicit Unique step
* on presorted input.
*
- * This produces a Unique plan node, but the use-case is so different from
- * create_unique_path that it doesn't seem worth trying to merge the two.
- *
* 'rel' is the parent relation associated with the result
* 'subpath' is the path representing the source of data
* 'numCols' is the number of grouping columns
@@ -3186,21 +2908,20 @@ create_group_path(PlannerInfo *root,
* The input path must be sorted on the grouping columns, plus possibly
* additional columns; so the first numCols pathkeys are the grouping columns
*/
-UpperUniquePath *
-create_upper_unique_path(PlannerInfo *root,
- RelOptInfo *rel,
- Path *subpath,
- int numCols,
- double numGroups)
+UniquePath *
+create_unique_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *subpath,
+ int numCols,
+ double numGroups)
{
- UpperUniquePath *pathnode = makeNode(UpperUniquePath);
+ UniquePath *pathnode = makeNode(UniquePath);
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
/* Unique doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
- /* For now, assume we are above any joins, so no parameterization */
- pathnode->path.param_info = NULL;
+ pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3256,8 +2977,7 @@ create_agg_path(PlannerInfo *root,
pathnode->path.pathtype = T_Agg;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
- /* For now, assume we are above any joins, so no parameterization */
- pathnode->path.param_info = NULL;
+ pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ff507331a06..469b98009b9 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -217,7 +217,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->partial_pathlist = NIL;
rel->cheapest_startup_path = NULL;
rel->cheapest_total_path = NULL;
- rel->cheapest_unique_path = NULL;
rel->cheapest_parameterized_paths = NIL;
rel->relid = relid;
rel->rtekind = rte->rtekind;
@@ -269,6 +268,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->fdw_private = NULL;
rel->unique_for_rels = NIL;
rel->non_unique_for_rels = NIL;
+ rel->unique_rel = NULL;
+ rel->unique_pathkeys = NIL;
rel->baserestrictinfo = NIL;
rel->baserestrictcost.startup = 0;
rel->baserestrictcost.per_tuple = 0;
@@ -713,7 +714,6 @@ build_join_rel(PlannerInfo *root,
joinrel->partial_pathlist = NIL;
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
- joinrel->cheapest_unique_path = NULL;
joinrel->cheapest_parameterized_paths = NIL;
/* init direct_lateral_relids from children; we'll finish it up below */
joinrel->direct_lateral_relids =
@@ -748,6 +748,8 @@ build_join_rel(PlannerInfo *root,
joinrel->fdw_private = NULL;
joinrel->unique_for_rels = NIL;
joinrel->non_unique_for_rels = NIL;
+ joinrel->unique_rel = NULL;
+ joinrel->unique_pathkeys = NIL;
joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0;
@@ -906,7 +908,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->partial_pathlist = NIL;
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
- joinrel->cheapest_unique_path = NULL;
joinrel->cheapest_parameterized_paths = NIL;
joinrel->direct_lateral_relids = NULL;
joinrel->lateral_relids = NULL;
@@ -933,6 +934,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->useridiscurrent = false;
joinrel->fdwroutine = NULL;
joinrel->fdw_private = NULL;
+ joinrel->unique_rel = NULL;
+ joinrel->unique_pathkeys = NIL;
joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0;
@@ -1488,7 +1491,6 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
upperrel->pathlist = NIL;
upperrel->cheapest_startup_path = NULL;
upperrel->cheapest_total_path = NULL;
- upperrel->cheapest_unique_path = NULL;
upperrel->cheapest_parameterized_paths = NIL;
root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index fbe333d88fa..e97566b5938 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -319,8 +319,8 @@ typedef enum JoinType
* These codes are used internally in the planner, but are not supported
* by the executor (nor, indeed, by most of the planner).
*/
- JOIN_UNIQUE_OUTER, /* LHS path must be made unique */
- JOIN_UNIQUE_INNER, /* RHS path must be made unique */
+ JOIN_UNIQUE_OUTER, /* LHS has be made unique */
+ JOIN_UNIQUE_INNER, /* RHS has be made unique */
/*
* We might need additional join types someday.
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 6567759595d..c7e80113e95 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -924,7 +924,6 @@ typedef struct RelOptInfo
List *partial_pathlist; /* partial Paths */
struct Path *cheapest_startup_path;
struct Path *cheapest_total_path;
- struct Path *cheapest_unique_path;
List *cheapest_parameterized_paths;
/*
@@ -1002,6 +1001,12 @@ typedef struct RelOptInfo
/* known not unique for these set(s) */
List *non_unique_for_rels;
+ /*
+ * information about unique-ification of this relation
+ */
+ struct RelOptInfo *unique_rel;
+ List *unique_pathkeys;
+
/*
* used by various scans and joins:
*/
@@ -1739,8 +1744,8 @@ typedef struct ParamPathInfo
* and the specified outer rel(s).
*
* "rows" is the same as parent->rows in simple paths, but in parameterized
- * paths and UniquePaths it can be less than parent->rows, reflecting the
- * fact that we've filtered by extra join conditions or removed duplicates.
+ * paths it can be less than parent->rows, reflecting the fact that we've
+ * filtered by extra join conditions.
*
* "pathkeys" is a List of PathKey nodes (see above), describing the sort
* ordering of the path's output rows.
@@ -2137,34 +2142,6 @@ typedef struct MemoizePath
* if unknown */
} MemoizePath;
-/*
- * UniquePath represents elimination of distinct rows from the output of
- * its subpath.
- *
- * This can represent significantly different plans: either hash-based or
- * sort-based implementation, or a no-op if the input path can be proven
- * distinct already. The decision is sufficiently localized that it's not
- * worth having separate Path node types. (Note: in the no-op case, we could
- * eliminate the UniquePath node entirely and just return the subpath; but
- * it's convenient to have a UniquePath in the path tree to signal upper-level
- * routines that the input is known distinct.)
- */
-typedef enum UniquePathMethod
-{
- UNIQUE_PATH_NOOP, /* input is known unique already */
- UNIQUE_PATH_HASH, /* use hashing */
- UNIQUE_PATH_SORT, /* use sorting */
-} UniquePathMethod;
-
-typedef struct UniquePath
-{
- Path path;
- Path *subpath;
- UniquePathMethod umethod;
- List *in_operators; /* equality operators of the IN clause */
- List *uniq_exprs; /* expressions to be made unique */
-} UniquePath;
-
/*
* GatherPath runs several copies of a plan in parallel and collects the
* results. The parallel leader may also execute the plan, unless the
@@ -2371,17 +2348,17 @@ typedef struct GroupPath
} GroupPath;
/*
- * UpperUniquePath represents adjacent-duplicate removal (in presorted input)
+ * UniquePath represents adjacent-duplicate removal (in presorted input)
*
* The columns to be compared are the first numkeys columns of the path's
* pathkeys. The input is presumed already sorted that way.
*/
-typedef struct UpperUniquePath
+typedef struct UniquePath
{
Path path;
Path *subpath; /* path representing input source */
int numkeys; /* number of pathkey columns to compare */
-} UpperUniquePath;
+} UniquePath;
/*
* AggPath represents generic computation of aggregate functions
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 60dcdb77e41..71d2945b175 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -91,8 +91,6 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root,
bool singlerow,
bool binary_mode,
double calls);
-extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
- Path *subpath, SpecialJoinInfo *sjinfo);
extern GatherPath *create_gather_path(PlannerInfo *root,
RelOptInfo *rel, Path *subpath, PathTarget *target,
Relids required_outer, double *rows);
@@ -223,11 +221,11 @@ extern GroupPath *create_group_path(PlannerInfo *root,
List *groupClause,
List *qual,
double numGroups);
-extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
- RelOptInfo *rel,
- Path *subpath,
- int numCols,
- double numGroups);
+extern UniquePath *create_unique_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *subpath,
+ int numCols,
+ double numGroups);
extern AggPath *create_agg_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 347c582a789..f220e9a270d 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -59,4 +59,7 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
+extern RelOptInfo *create_unique_paths(PlannerInfo *root, RelOptInfo *rel,
+ SpecialJoinInfo *sjinfo);
+
#endif /* PLANNER_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f35a0b18c37..bb1807b4521 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -9226,23 +9226,20 @@ where exists (select 1 from tenk1 t3
---------------------------------------------------------------------------------
Nested Loop
Output: t1.unique1, t2.hundred
- -> Hash Join
+ -> Nested Loop
Output: t1.unique1, t3.tenthous
- Hash Cond: (t3.thousand = t1.unique1)
- -> HashAggregate
+ -> Index Only Scan using onek_unique1 on public.onek t1
+ Output: t1.unique1
+ Index Cond: (t1.unique1 < 1)
+ -> Unique
Output: t3.thousand, t3.tenthous
- Group Key: t3.thousand, t3.tenthous
-> Index Only Scan using tenk1_thous_tenthous on public.tenk1 t3
Output: t3.thousand, t3.tenthous
- -> Hash
- Output: t1.unique1
- -> Index Only Scan using onek_unique1 on public.onek t1
- Output: t1.unique1
- Index Cond: (t1.unique1 < 1)
+ Index Cond: (t3.thousand = t1.unique1)
-> Index Only Scan using tenk1_hundred on public.tenk1 t2
Output: t2.hundred
Index Cond: (t2.hundred = t3.tenthous)
-(18 rows)
+(15 rows)
-- ... unless it actually is unique
create table j3 as select unique1, tenthous from onek;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index d5368186caa..24e06845f92 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1134,48 +1134,50 @@ EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
QUERY PLAN
---------------------------------------------------------------------------------
- Sort
+ Merge Append
Sort Key: t1.a
- -> Append
- -> Nested Loop
- Join Filter: (t1_2.a = t1_5.b)
- -> HashAggregate
- Group Key: t1_5.b
+ -> Nested Loop
+ Join Filter: (t1_2.a = t1_5.b)
+ -> Unique
+ -> Sort
+ Sort Key: t1_5.b
-> Hash Join
Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_5.b)
-> Seq Scan on prt1_e_p1 t2_1
-> Hash
-> Seq Scan on prt2_p1 t1_5
Filter: (a = 0)
- -> Index Scan using iprt1_p1_a on prt1_p1 t1_2
- Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
- Filter: (b = 0)
- -> Nested Loop
- Join Filter: (t1_3.a = t1_6.b)
- -> HashAggregate
- Group Key: t1_6.b
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1_2
+ Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
+ Filter: (b = 0)
+ -> Nested Loop
+ Join Filter: (t1_3.a = t1_6.b)
+ -> Unique
+ -> Sort
+ Sort Key: t1_6.b
-> Hash Join
Hash Cond: (((t2_2.a + t2_2.b) / 2) = t1_6.b)
-> Seq Scan on prt1_e_p2 t2_2
-> Hash
-> Seq Scan on prt2_p2 t1_6
Filter: (a = 0)
- -> Index Scan using iprt1_p2_a on prt1_p2 t1_3
- Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
- Filter: (b = 0)
- -> Nested Loop
- Join Filter: (t1_4.a = t1_7.b)
- -> HashAggregate
- Group Key: t1_7.b
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_3
+ Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
+ Filter: (b = 0)
+ -> Nested Loop
+ Join Filter: (t1_4.a = t1_7.b)
+ -> Unique
+ -> Sort
+ Sort Key: t1_7.b
-> Nested Loop
-> Seq Scan on prt2_p3 t1_7
Filter: (a = 0)
-> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t2_3
Index Cond: (((a + b) / 2) = t1_7.b)
- -> Index Scan using iprt1_p3_a on prt1_p3 t1_4
- Index Cond: (a = ((t2_3.a + t2_3.b) / 2))
- Filter: (b = 0)
-(41 rows)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_4
+ Index Cond: (a = ((t2_3.a + t2_3.b) / 2))
+ Filter: (b = 0)
+(43 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
a | b | c
@@ -1190,46 +1192,48 @@ EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
QUERY PLAN
---------------------------------------------------------------------------
- Sort
+ Merge Append
Sort Key: t1.a
- -> Append
- -> Nested Loop
- -> HashAggregate
- Group Key: t1_6.b
+ -> Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: t1_6.b
-> Hash Semi Join
Hash Cond: (t1_6.b = ((t1_9.a + t1_9.b) / 2))
-> Seq Scan on prt2_p1 t1_6
-> Hash
-> Seq Scan on prt1_e_p1 t1_9
Filter: (c = 0)
- -> Index Scan using iprt1_p1_a on prt1_p1 t1_3
- Index Cond: (a = t1_6.b)
- Filter: (b = 0)
- -> Nested Loop
- -> HashAggregate
- Group Key: t1_7.b
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1_3
+ Index Cond: (a = t1_6.b)
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: t1_7.b
-> Hash Semi Join
Hash Cond: (t1_7.b = ((t1_10.a + t1_10.b) / 2))
-> Seq Scan on prt2_p2 t1_7
-> Hash
-> Seq Scan on prt1_e_p2 t1_10
Filter: (c = 0)
- -> Index Scan using iprt1_p2_a on prt1_p2 t1_4
- Index Cond: (a = t1_7.b)
- Filter: (b = 0)
- -> Nested Loop
- -> HashAggregate
- Group Key: t1_8.b
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_4
+ Index Cond: (a = t1_7.b)
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: t1_8.b
-> Hash Semi Join
Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
-> Seq Scan on prt2_p3 t1_8
-> Hash
-> Seq Scan on prt1_e_p3 t1_11
Filter: (c = 0)
- -> Index Scan using iprt1_p3_a on prt1_p3 t1_5
- Index Cond: (a = t1_8.b)
- Filter: (b = 0)
-(39 rows)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_5
+ Index Cond: (a = t1_8.b)
+ Filter: (b = 0)
+(41 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
a | b | c
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 40d8056fcea..f1d91dfa4ca 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2612,21 +2612,23 @@ explain (costs off)
SELECT * FROM tenk1 A INNER JOIN tenk2 B
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd)
WHERE a.thousand < 750;
- QUERY PLAN
--------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------
Hash Join
Hash Cond: (c.odd = b.odd)
- -> Hash Join
- Hash Cond: (a.hundred = c.hundred)
- -> Seq Scan on tenk1 a
- Filter: (thousand < 750)
- -> Hash
- -> HashAggregate
- Group Key: c.odd, c.hundred
- -> Seq Scan on tenk2 c
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: c.odd, c.hundred
+ -> Seq Scan on tenk2 c
+ -> Memoize
+ Cache Key: c.hundred
+ Cache Mode: logical
+ -> Index Scan using tenk1_hundred on tenk1 a
+ Index Cond: (hundred = c.hundred)
+ Filter: (thousand < 750)
-> Hash
-> Seq Scan on tenk2 b
-(12 rows)
+(14 rows)
-- we can pull up the aggregate sublink into RHS of a left join.
explain (costs off)
@@ -2672,18 +2674,17 @@ EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
ORDER BY unique1;
- QUERY PLAN
------------------------------------------------------------------
- Sort
- Sort Key: onek.unique1
- -> Nested Loop
- -> HashAggregate
- Group Key: "*VALUES*".column1, "*VALUES*".column2
+ QUERY PLAN
+----------------------------------------------------------------
+ Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
-> Values Scan on "*VALUES*"
- -> Index Scan using onek_unique1 on onek
- Index Cond: (unique1 = "*VALUES*".column1)
- Filter: ("*VALUES*".column2 = ten)
-(9 rows)
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = "*VALUES*".column1)
+ Filter: ("*VALUES*".column2 = ten)
+(8 rows)
EXPLAIN (COSTS OFF)
SELECT * FROM onek
@@ -2858,12 +2859,10 @@ SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
-> Unique
-> Sort
Sort Key: "*VALUES*".column1
- -> Sort
- Sort Key: "*VALUES*".column1
- -> Values Scan on "*VALUES*"
+ -> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = "*VALUES*".column1)
-(9 rows)
+(7 rows)
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index a8346cda633..6b715d456c6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3140,7 +3140,6 @@ UnicodeNormalizationForm
UnicodeNormalizationQC
Unique
UniquePath
-UniquePathMethod
UniqueState
UnlistenStmt
UnresolvedTup
@@ -3155,7 +3154,6 @@ UpgradeTaskSlotState
UpgradeTaskStep
UploadManifestCmd
UpperRelationKind
-UpperUniquePath
UserAuth
UserContext
UserMapping
--
2.43.0