v1-0001-add-preliminary-infrastructure.patch
text/x-diff
Filename: v1-0001-add-preliminary-infrastructure.patch
Type: text/x-diff
Part: 1
Message:
Making Vars outer-join aware
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: unified
Series: patch v1-0001
| File | + | − |
|---|---|---|
| src/backend/nodes/copyfuncs.c | 6 | 0 |
| src/backend/nodes/equalfuncs.c | 7 | 1 |
| src/backend/nodes/makefuncs.c | 6 | 4 |
| src/backend/nodes/nodeFuncs.c | 2 | 1 |
| src/backend/nodes/outfuncs.c | 9 | 2 |
| src/backend/nodes/readfuncs.c | 2 | 0 |
| src/backend/rewrite/rewriteManip.c | 173 | 5 |
| src/backend/utils/misc/queryjumble.c | 5 | 0 |
| src/include/nodes/parsenodes.h | 8 | 0 |
| src/include/nodes/pathnodes.h | 93 | 42 |
| src/include/nodes/plannodes.h | 3 | 1 |
| src/include/nodes/primnodes.h | 9 | 0 |
| src/include/parser/parse_node.h | 8 | 0 |
| src/include/rewrite/rewriteManip.h | 4 | 0 |
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 51d630fa89..a34e7643d7 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -789,6 +789,7 @@ _copyForeignScan(const ForeignScan *from)
COPY_NODE_FIELD(fdw_scan_tlist);
COPY_NODE_FIELD(fdw_recheck_quals);
COPY_BITMAPSET_FIELD(fs_relids);
+ COPY_BITMAPSET_FIELD(fs_base_relids);
COPY_SCALAR_FIELD(fsSystemCol);
return newnode;
@@ -1458,6 +1459,7 @@ _copyVar(const Var *from)
COPY_SCALAR_FIELD(vartype);
COPY_SCALAR_FIELD(vartypmod);
COPY_SCALAR_FIELD(varcollid);
+ COPY_BITMAPSET_FIELD(varnullingrels);
COPY_SCALAR_FIELD(varlevelsup);
COPY_SCALAR_FIELD(varnosyn);
COPY_SCALAR_FIELD(varattnosyn);
@@ -2825,6 +2827,7 @@ _copyRestrictInfo(const RestrictInfo *from)
COPY_SCALAR_FIELD(leakproof);
COPY_SCALAR_FIELD(has_volatile);
COPY_SCALAR_FIELD(security_level);
+ COPY_SCALAR_FIELD(num_base_rels);
COPY_BITMAPSET_FIELD(clause_relids);
COPY_BITMAPSET_FIELD(required_relids);
COPY_BITMAPSET_FIELD(outer_relids);
@@ -2867,6 +2870,7 @@ _copyPlaceHolderVar(const PlaceHolderVar *from)
COPY_NODE_FIELD(phexpr);
COPY_BITMAPSET_FIELD(phrels);
+ COPY_BITMAPSET_FIELD(phnullingrels);
COPY_SCALAR_FIELD(phid);
COPY_SCALAR_FIELD(phlevelsup);
@@ -2886,6 +2890,8 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from)
COPY_BITMAPSET_FIELD(syn_lefthand);
COPY_BITMAPSET_FIELD(syn_righthand);
COPY_SCALAR_FIELD(jointype);
+ COPY_SCALAR_FIELD(ojrelid);
+ COPY_BITMAPSET_FIELD(strict_relids);
COPY_SCALAR_FIELD(lhs_strict);
COPY_SCALAR_FIELD(delay_upper_joins);
COPY_SCALAR_FIELD(semi_can_btree);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e747e1667d..d8d1d6cbae 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -231,6 +231,7 @@ _equalVar(const Var *a, const Var *b)
COMPARE_SCALAR_FIELD(vartype);
COMPARE_SCALAR_FIELD(vartypmod);
COMPARE_SCALAR_FIELD(varcollid);
+ COMPARE_BITMAPSET_FIELD(varnullingrels);
COMPARE_SCALAR_FIELD(varlevelsup);
/*
@@ -1231,12 +1232,15 @@ _equalPlaceHolderVar(const PlaceHolderVar *a, const PlaceHolderVar *b)
* could get replaced by differently-numbered Params when sublink folding
* is done. (The end result of such a situation would be some
* unreferenced initplans, which is annoying but not really a problem.) On
- * the same reasoning, there is no need to examine phrels.
+ * the same reasoning, there is no need to examine phrels. But we do need
+ * to compare phnullingrels, as that is in some sense external to the
+ * value of the PHV proper.
*
* COMPARE_NODE_FIELD(phexpr);
*
* COMPARE_BITMAPSET_FIELD(phrels);
*/
+ COMPARE_BITMAPSET_FIELD(phnullingrels);
COMPARE_SCALAR_FIELD(phid);
COMPARE_SCALAR_FIELD(phlevelsup);
@@ -1251,6 +1255,8 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
COMPARE_BITMAPSET_FIELD(syn_lefthand);
COMPARE_BITMAPSET_FIELD(syn_righthand);
COMPARE_SCALAR_FIELD(jointype);
+ COMPARE_SCALAR_FIELD(ojrelid);
+ COMPARE_BITMAPSET_FIELD(strict_relids);
COMPARE_SCALAR_FIELD(lhs_strict);
COMPARE_SCALAR_FIELD(delay_upper_joins);
COMPARE_SCALAR_FIELD(semi_can_btree);
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index 28288dcfc1..19606c495f 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -81,11 +81,13 @@ makeVar(int varno,
var->varlevelsup = varlevelsup;
/*
- * Only a few callers need to make Var nodes with varnosyn/varattnosyn
- * different from varno/varattno. We don't provide separate arguments for
- * them, but just initialize them to the given varno/varattno. This
- * reduces code clutter and chance of error for most callers.
+ * Only a few callers need to make Var nodes with non-null varnullingrels,
+ * or with varnosyn/varattnosyn different from varno/varattno. We don't
+ * provide separate arguments for them, but just initialize them to NULL
+ * and the given varno/varattno. This reduces code clutter and chance of
+ * error for most callers.
*/
+ var->varnullingrels = NULL;
var->varnosyn = (Index) varno;
var->varattnosyn = varattno;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 4cb1744da6..ccf63515fa 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2847,6 +2847,7 @@ expression_tree_mutator(Node *node,
Var *newnode;
FLATCOPY(newnode, var, Var);
+ /* Assume we need not copy the varnullingrels bitmapset */
return (Node *) newnode;
}
break;
@@ -3442,7 +3443,7 @@ expression_tree_mutator(Node *node,
FLATCOPY(newnode, phv, PlaceHolderVar);
MUTATE(newnode->phexpr, phv->phexpr, Expr *);
- /* Assume we need not copy the relids bitmapset */
+ /* Assume we need not copy the relids bitmapsets */
return (Node *) newnode;
}
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ce12915592..408d8ace34 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -725,6 +725,7 @@ _outForeignScan(StringInfo str, const ForeignScan *node)
WRITE_NODE_FIELD(fdw_scan_tlist);
WRITE_NODE_FIELD(fdw_recheck_quals);
WRITE_BITMAPSET_FIELD(fs_relids);
+ WRITE_BITMAPSET_FIELD(fs_base_relids);
WRITE_BOOL_FIELD(fsSystemCol);
}
@@ -1146,6 +1147,7 @@ _outVar(StringInfo str, const Var *node)
WRITE_OID_FIELD(vartype);
WRITE_INT_FIELD(vartypmod);
WRITE_OID_FIELD(varcollid);
+ WRITE_BITMAPSET_FIELD(varnullingrels);
WRITE_UINT_FIELD(varlevelsup);
WRITE_UINT_FIELD(varnosyn);
WRITE_INT_FIELD(varattnosyn);
@@ -2459,6 +2461,8 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(plan_params);
WRITE_BITMAPSET_FIELD(outer_params);
WRITE_BITMAPSET_FIELD(all_baserels);
+ WRITE_BITMAPSET_FIELD(outer_join_rels);
+ WRITE_BITMAPSET_FIELD(all_query_rels);
WRITE_BITMAPSET_FIELD(nullable_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
@@ -2470,7 +2474,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(canon_pathkeys);
WRITE_NODE_FIELD(left_join_clauses);
WRITE_NODE_FIELD(right_join_clauses);
- WRITE_NODE_FIELD(full_join_clauses);
+ /* can't dump full_join_clauses because its contents are not Nodes */
WRITE_NODE_FIELD(join_info_list);
WRITE_BITMAPSET_FIELD(all_result_relids);
WRITE_BITMAPSET_FIELD(leaf_result_relids);
@@ -2552,7 +2556,6 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
WRITE_BOOL_FIELD(consider_partitionwise_join);
- WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_BOOL_FIELD(partbounds_merged);
WRITE_BITMAPSET_FIELD(live_parts);
WRITE_BITMAPSET_FIELD(all_partrels);
@@ -2709,6 +2712,7 @@ _outRestrictInfo(StringInfo str, const RestrictInfo *node)
WRITE_BOOL_FIELD(leakproof);
WRITE_ENUM_FIELD(has_volatile, VolatileFunctionStatus);
WRITE_UINT_FIELD(security_level);
+ WRITE_INT_FIELD(num_base_rels);
WRITE_BITMAPSET_FIELD(clause_relids);
WRITE_BITMAPSET_FIELD(required_relids);
WRITE_BITMAPSET_FIELD(outer_relids);
@@ -2749,6 +2753,7 @@ _outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
WRITE_NODE_FIELD(phexpr);
WRITE_BITMAPSET_FIELD(phrels);
+ WRITE_BITMAPSET_FIELD(phnullingrels);
WRITE_UINT_FIELD(phid);
WRITE_UINT_FIELD(phlevelsup);
}
@@ -2763,6 +2768,8 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
WRITE_BITMAPSET_FIELD(syn_lefthand);
WRITE_BITMAPSET_FIELD(syn_righthand);
WRITE_ENUM_FIELD(jointype, JoinType);
+ WRITE_UINT_FIELD(ojrelid);
+ WRITE_BITMAPSET_FIELD(strict_relids);
WRITE_BOOL_FIELD(lhs_strict);
WRITE_BOOL_FIELD(delay_upper_joins);
WRITE_BOOL_FIELD(semi_can_btree);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 6a05b69415..08b8ca78f0 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -622,6 +622,7 @@ _readVar(void)
READ_OID_FIELD(vartype);
READ_INT_FIELD(vartypmod);
READ_OID_FIELD(varcollid);
+ READ_BITMAPSET_FIELD(varnullingrels);
READ_UINT_FIELD(varlevelsup);
READ_UINT_FIELD(varnosyn);
READ_INT_FIELD(varattnosyn);
@@ -2312,6 +2313,7 @@ _readForeignScan(void)
READ_NODE_FIELD(fdw_scan_tlist);
READ_NODE_FIELD(fdw_recheck_quals);
READ_BITMAPSET_FIELD(fs_relids);
+ READ_BITMAPSET_FIELD(fs_base_relids);
READ_BOOL_FIELD(fsSystemCol);
READ_DONE();
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 101c39553a..a0a0026469 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -40,6 +40,13 @@ typedef struct
int win_location;
} locate_windowfunc_context;
+typedef struct
+{
+ Bitmapset *removable_relids;
+ Bitmapset *except_relids;
+ int sublevels_up;
+} remove_nulling_relids_context;
+
static bool contain_aggs_of_level_walker(Node *node,
contain_aggs_of_level_context *context);
static bool locate_agg_of_level_walker(Node *node,
@@ -50,6 +57,9 @@ static bool locate_windowfunc_walker(Node *node,
static bool checkExprHasSubLink_walker(Node *node, void *context);
static Relids offset_relid_set(Relids relids, int offset);
static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
+static bool get_nulling_relids_walker(Node *node, Bitmapset **context);
+static Node *remove_nulling_relids_mutator(Node *node,
+ remove_nulling_relids_context *context);
/*
@@ -348,6 +358,8 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
if (var->varlevelsup == context->sublevels_up)
{
var->varno += context->offset;
+ var->varnullingrels = offset_relid_set(var->varnullingrels,
+ context->offset);
if (var->varnosyn > 0)
var->varnosyn += context->offset;
}
@@ -386,6 +398,8 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
{
phv->phrels = offset_relid_set(phv->phrels,
context->offset);
+ phv->phnullingrels = offset_relid_set(phv->phnullingrels,
+ context->offset);
}
/* fall through to examine children */
}
@@ -510,11 +524,13 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
{
Var *var = (Var *) node;
- if (var->varlevelsup == context->sublevels_up &&
- var->varno == context->rt_index)
+ if (var->varlevelsup == context->sublevels_up)
{
- var->varno = context->new_index;
- /* If the syntactic referent is same RTE, fix it too */
+ if (var->varno == context->rt_index)
+ var->varno = context->new_index;
+ var->varnullingrels = adjust_relid_set(var->varnullingrels,
+ context->rt_index,
+ context->new_index);
if (var->varnosyn == context->rt_index)
var->varnosyn = context->new_index;
}
@@ -557,6 +573,9 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
phv->phrels = adjust_relid_set(phv->phrels,
context->rt_index,
context->new_index);
+ phv->phnullingrels = adjust_relid_set(phv->phnullingrels,
+ context->rt_index,
+ context->new_index);
}
/* fall through to examine children */
}
@@ -833,7 +852,8 @@ rangeTableEntry_used_walker(Node *node,
Var *var = (Var *) node;
if (var->varlevelsup == context->sublevels_up &&
- var->varno == context->rt_index)
+ (var->varno == context->rt_index ||
+ bms_is_member(context->rt_index, var->varnullingrels)))
return true;
return false;
}
@@ -1061,6 +1081,154 @@ AddInvertedQual(Query *parsetree, Node *qual)
}
+/*
+ * get_nulling_relids collects all the level-zero RT indexes mentioned in
+ * Var.varnullingrels and PlaceHolderVar.phnullingrels fields within the
+ * given expression.
+ */
+Bitmapset *
+get_nulling_relids(Node *node)
+{
+ Bitmapset *result = NULL;
+
+ (void) get_nulling_relids_walker(node, &result);
+ return result;
+}
+
+static bool
+get_nulling_relids_walker(Node *node, Bitmapset **context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varlevelsup == 0)
+ *context = bms_add_members(*context, var->varnullingrels);
+ }
+ else if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ if (phv->phlevelsup == 0)
+ *context = bms_add_members(*context, phv->phnullingrels);
+ }
+
+ /*
+ * Currently, this is only used after the planner has converted SubLinks
+ * to SubPlans, so we don't need to support recursion into sub-Queries; so
+ * no sublevels_up counting is needed.
+ */
+ Assert(!IsA(node, SubLink));
+ Assert(!IsA(node, Query));
+ return expression_tree_walker(node, get_nulling_relids_walker, context);
+}
+
+/*
+ * remove_nulling_relids removes mentions of the specified RT index(es)
+ * in Var.varnullingrels and PlaceHolderVar.phnullingrels fields within
+ * the given expression, except in nodes belonging to rels listed in
+ * except_relids.
+ *
+ * XXX consider making this a destructive walker.
+ */
+Node *
+remove_nulling_relids(Node *node, Bitmapset *removable_relids,
+ Bitmapset *except_relids)
+{
+ remove_nulling_relids_context context;
+
+ context.removable_relids = removable_relids;
+ context.except_relids = except_relids;
+ context.sublevels_up = 0;
+ return query_or_expression_tree_mutator(node,
+ remove_nulling_relids_mutator,
+ &context,
+ 0);
+}
+
+static Node *
+remove_nulling_relids_mutator(Node *node,
+ remove_nulling_relids_context *context)
+{
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varlevelsup == context->sublevels_up &&
+ !bms_is_member(var->varno, context->except_relids) &&
+ bms_overlap(var->varnullingrels, context->removable_relids))
+ {
+ Relids newnullingrels = bms_difference(var->varnullingrels,
+ context->removable_relids);
+
+ /* Micro-optimization: ensure nullingrels is NULL if empty */
+ if (bms_is_empty(newnullingrels))
+ newnullingrels = NULL;
+ /* Copy the Var ... */
+ var = copyObject(var);
+ /* ... and replace the copy's varnullingrels field */
+ var->varnullingrels = newnullingrels;
+ return (Node *) var;
+ }
+ /* Otherwise fall through to copy the Var normally */
+ }
+ else if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ if (phv->phlevelsup == context->sublevels_up &&
+ !bms_overlap(phv->phrels, context->except_relids))
+ {
+ Relids newnullingrels = bms_difference(phv->phnullingrels,
+ context->removable_relids);
+
+ /*
+ * Micro-optimization: ensure nullingrels is NULL if empty.
+ *
+ * Note: it might seem desirable to remove the PHV altogether if
+ * phnullingrels goes to empty. Currently we dare not do that
+ * because we use PHVs in some cases to enforce separate identity
+ * of subexpressions; see wrap_non_vars usages in prepjointree.c.
+ */
+ if (bms_is_empty(newnullingrels))
+ newnullingrels = NULL;
+ /* Copy the PlaceHolderVar and mutate what's below ... */
+ phv = (PlaceHolderVar *)
+ expression_tree_mutator(node,
+ remove_nulling_relids_mutator,
+ (void *) context);
+ /* ... and replace the copy's phnullingrels field */
+ phv->phnullingrels = newnullingrels;
+ /* We must also update phrels, if it contains a removable RTI */
+ phv->phrels = bms_difference(phv->phrels,
+ context->removable_relids);
+ Assert(!bms_is_empty(phv->phrels));
+ return (Node *) phv;
+ }
+ /* Otherwise fall through to copy the PlaceHolderVar normally */
+ }
+ else if (IsA(node, Query))
+ {
+ /* Recurse into RTE or sublink subquery */
+ Query *newnode;
+
+ context->sublevels_up++;
+ newnode = query_tree_mutator((Query *) node,
+ remove_nulling_relids_mutator,
+ (void *) context,
+ 0);
+ context->sublevels_up--;
+ return (Node *) newnode;
+ }
+ return expression_tree_mutator(node, remove_nulling_relids_mutator,
+ (void *) context);
+}
+
+
/*
* replace_rte_variables() finds all Vars in an expression tree
* that reference a particular RTE, and replaces them with substitute
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index eeaa0b31fe..e517e0363c 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -381,6 +381,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
APP_JUMB(var->varno);
APP_JUMB(var->varattno);
APP_JUMB(var->varlevelsup);
+
+ /*
+ * We can omit varnullingrels, because it's fully determined
+ * by varno/varlevelsup plus the Var's query location.
+ */
}
break;
case T_Const:
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 73f635b455..78e6d93bf5 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1067,6 +1067,14 @@ typedef struct RangeTblEntry
* alias Vars are generated only for merged columns). We keep these
* entries only because they're needed in expandRTE() and similar code.
*
+ * Vars appearing within joinaliasvars are marked with varnullingrels sets
+ * that describe the nulling effects of this join and lower ones. This is
+ * essential for FULL JOIN cases, because the COALESCE expression only
+ * describes the semantics correctly if its inputs have been nulled by the
+ * join. For other cases, it allows expandRTE() to generate a valid
+ * representation of the join's output without consulting additional
+ * parser state.
+ *
* Within a Query loaded from a stored rule, it is possible for non-merged
* joinaliasvars items to be null pointers, which are placeholders for
* (necessarily unreferenced) columns dropped since the rule was made.
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a6e5db4eec..b697a00839 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -202,13 +202,26 @@ struct PlannerInfo
struct AppendRelInfo **append_rel_array;
/*
- * all_baserels is a Relids set of all base relids (but not "other"
- * relids) in the query; that is, the Relids identifier of the final join
- * we need to form. This is computed in make_one_rel, just before we
- * start making Paths.
+ * all_baserels is a Relids set of all base relids (but not joins or
+ * "other" relids) in the query. This is computed in make_one_rel, just
+ * before we start making Paths.
*/
Relids all_baserels;
+ /*
+ * outer_join_rels is a Relids set of all outer-join relids in the query.
+ * This is computed in deconstruct_jointree.
+ */
+ Relids outer_join_rels;
+
+ /*
+ * all_query_rels is a Relids set of all base relids and outer join relids
+ * (but not "other" relids) in the query. This is the Relids identifier
+ * of the final join we need to form. This is computed in make_one_rel,
+ * just before we start making Paths.
+ */
+ Relids all_query_rels;
+
/*
* nullable_baserels is a Relids set of base relids that are nullable by
* some outer join in the jointree; these are rels that are potentially
@@ -261,8 +274,8 @@ struct PlannerInfo
* outer join clauses w/nonnullable var on
* right */
- List *full_join_clauses; /* list of RestrictInfos for mergejoinable
- * full join clauses */
+ List *full_join_clauses; /* list of FullJoinClauseInfos for
+ * mergejoinable full join clauses */
List *join_info_list; /* list of SpecialJoinInfos */
@@ -430,9 +443,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
* or the output of a sub-SELECT or function that appears in the range table.
* In either case it is uniquely identified by an RT index. A "joinrel"
* is the joining of two or more base rels. A joinrel is identified by
- * the set of RT indexes for its component baserels. We create RelOptInfo
- * nodes for each baserel and joinrel, and store them in the PlannerInfo's
- * simple_rel_array and join_rel_list respectively.
+ * the set of RT indexes for its component baserels, along with RT indexes
+ * for any outer joins it has computed. We create RelOptInfo nodes for each
+ * baserel and joinrel, and store them in the PlannerInfo's simple_rel_array
+ * and join_rel_list respectively.
*
* Note that there is only one joinrel for any given set of component
* baserels, no matter what order we assemble them in; so an unordered
@@ -471,8 +485,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
* Parts of this data structure are specific to various scan and join
* mechanisms. It didn't seem worth creating new node types for them.
*
- * relids - Set of base-relation identifiers; it is a base relation
- * if there is just one, a join relation if more than one
+ * relids - Set of relation identifiers (RT indexes). This is a base
+ * relation if there is just one, a join relation if more;
+ * in the join case, RT indexes of any outer joins formed
+ * at or below this join are included along with baserels
* rows - estimated number of tuples in the relation after restriction
* clauses have been applied (ie, output rows of a plan for it)
* consider_startup - true if there is any value in keeping plain paths for
@@ -679,7 +695,7 @@ typedef struct RelOptInfo
RelOptKind reloptkind;
/* all relations included in this RelOptInfo */
- Relids relids; /* set of base relids (rangetable indexes) */
+ Relids relids; /* base + OJ relids (rangetable indexes) */
/* size estimates generated by planner */
Cardinality rows; /* estimated number of result tuples */
@@ -754,8 +770,10 @@ typedef struct RelOptInfo
/* used by partitionwise joins: */
bool consider_partitionwise_join; /* consider partitionwise join
* paths? (if partitioned rel) */
- Relids top_parent_relids; /* Relids of topmost parents (if "other"
- * rel) */
+
+ /* inheritance links, if this is an otherrel (otherwise NULL): */
+ struct RelOptInfo *parent; /* immediate parent */
+ struct RelOptInfo *top_parent; /* topmost parent */
/* used for partitioned relations: */
PartitionScheme part_scheme; /* Partitioning scheme */
@@ -1940,17 +1958,17 @@ typedef struct LimitPath
* If a restriction clause references a single base relation, it will appear
* in the baserestrictinfo list of the RelOptInfo for that base rel.
*
- * If a restriction clause references more than one base rel, it will
+ * If a restriction clause references more than one base+OJ relation, it will
* appear in the joininfo list of every RelOptInfo that describes a strict
- * subset of the base rels mentioned in the clause. The joininfo lists are
+ * subset of the relations mentioned in the clause. The joininfo lists are
* used to drive join tree building by selecting plausible join candidates.
* The clause cannot actually be applied until we have built a join rel
- * containing all the base rels it references, however.
+ * containing all the relations it references, however.
*
- * When we construct a join rel that includes all the base rels referenced
+ * When we construct a join rel that includes all the relations referenced
* in a multi-relation restriction clause, we place that clause into the
* joinrestrictinfo lists of paths for the join rel, if neither left nor
- * right sub-path includes all base rels referenced in the clause. The clause
+ * right sub-path includes all relations referenced in the clause. The clause
* will be applied at that join level, and will not propagate any further up
* the join tree. (Note: the "predicate migration" code was once intended to
* push restriction clauses up and down the plan tree based on evaluation
@@ -1971,12 +1989,15 @@ typedef struct LimitPath
* or join to enforce that all members of each EquivalenceClass are in fact
* equal in all rows emitted by the scan or join.
*
- * When dealing with outer joins we have to be very careful about pushing qual
- * clauses up and down the tree. An outer join's own JOIN/ON conditions must
- * be evaluated exactly at that join node, unless they are "degenerate"
- * conditions that reference only Vars from the nullable side of the join.
- * Quals appearing in WHERE or in a JOIN above the outer join cannot be pushed
- * down below the outer join, if they reference any nullable Vars.
+ * The clause_relids field lists the base plus outer-join RT indexes that
+ * actually appear in the clause. required_relids lists the minimum set of
+ * relids needed to evaluate the clause; while this is often equal to
+ * clause_relids, it can be more. We will add relids to required_relids when
+ * we need to force an outer join ON clause to be evaluated exactly at the
+ * level of the outer join, which is true except when it is a "degenerate"
+ * condition that references only Vars from the nullable side of the join.
+ *
+ * XXX rewrite or remove me:
* RestrictInfo nodes contain a flag to indicate whether a qual has been
* pushed down to a lower level than its original syntactic placement in the
* join tree would suggest. If an outer join prevents us from pushing a qual
@@ -2084,12 +2105,14 @@ typedef struct RestrictInfo
bool leakproof; /* true if known to contain no leaked Vars */
- VolatileFunctionStatus has_volatile; /* to indicate if clause contains
- * any volatile functions. */
+ VolatileFunctionStatus has_volatile; /* indicates if clause contains
+ * any volatile functions */
Index security_level; /* see comment above */
- /* The set of relids (varnos) actually referenced in the clause: */
+ int num_base_rels; /* number of base rels in clause_relids */
+
+ /* The relids (varnos+varnullingrels) actually referenced in the clause: */
Relids clause_relids;
/* The set of relids required to evaluate the clause: */
@@ -2147,6 +2170,7 @@ typedef struct RestrictInfo
} RestrictInfo;
/*
+ * XXX this will need work:
* This macro embodies the correct way to test whether a RestrictInfo is
* "pushed down" to a given outer join, that is, should be treated as a filter
* clause rather than a join clause at that outer join. This is certainly so
@@ -2186,10 +2210,15 @@ typedef struct MergeScanSelCache
* of a plan tree. This is used during planning to represent the contained
* expression. At the end of the planning process it is replaced by either
* the contained expression or a Var referring to a lower-level evaluation of
- * the contained expression. Typically the evaluation occurs below an outer
+ * the contained expression. Generally the evaluation occurs below an outer
* join, and Var references above the outer join might thereby yield NULL
* instead of the expression value.
*
+ * phrels and phlevelsup correspond to the varno/varlevelsup fields of a
+ * plain Var, except that phrels has to be a relid set since the evaluation
+ * level of a PlaceHolderVar might be a join rather than a base relation.
+ * Likewise, phnullingrels corresponds to varnullingrels.
+ *
* Although the planner treats this as an expression node type, it is not
* recognized by the parser or executor, so we declare it here rather than
* in primnodes.h.
@@ -2199,7 +2228,8 @@ typedef struct PlaceHolderVar
{
Expr xpr;
Expr *phexpr; /* the represented expression */
- Relids phrels; /* base relids syntactically within expr src */
+ Relids phrels; /* relids syntactically within expr src */
+ Relids phnullingrels; /* RT indexes of joins that can null PHV */
Index phid; /* ID for PHV (unique within planner run) */
Index phlevelsup; /* > 0 if PHV belongs to outer query */
} PlaceHolderVar;
@@ -2220,17 +2250,20 @@ typedef struct PlaceHolderVar
* We make SpecialJoinInfos for FULL JOINs even though there is no flexibility
* of planning for them, because this simplifies make_join_rel()'s API.
*
- * min_lefthand and min_righthand are the sets of base relids that must be
- * available on each side when performing the special join. lhs_strict is
- * true if the special join's condition cannot succeed when the LHS variables
- * are all NULL (this means that an outer join can commute with upper-level
+ * min_lefthand and min_righthand are the sets of base+OJ relids that must be
+ * available on each side when performing the special join.
+ *
+ * strict_relids is the set of base+OJ relids for which the special join's
+ * condition is strict, ie it cannot succeed if any of those rels produce
+ * an all-NULL row. lhs_strict reports whether any LHS rels appear in
+ * strict_relids (this means that an outer join can commute with upper-level
* outer joins even if it appears in their RHS). We don't bother to set
- * lhs_strict for FULL JOINs, however.
+ * strict_relids or lhs_strict for FULL JOINs, however.
*
* It is not valid for either min_lefthand or min_righthand to be empty sets;
* if they were, this would break the logic that enforces join order.
*
- * syn_lefthand and syn_righthand are the sets of base relids that are
+ * syn_lefthand and syn_righthand are the sets of base+OJ relids that are
* syntactically below this special join. (These are needed to help compute
* min_lefthand and min_righthand for higher joins.)
*
@@ -2252,14 +2285,18 @@ typedef struct PlaceHolderVar
* the inputs to make it a LEFT JOIN. So the allowed values of jointype
* in a join_info_list member are only LEFT, FULL, SEMI, or ANTI.
*
+ * ojrelid is the RT index of the join RTE representing this outer join,
+ * if there is one. It is zero when jointype is INNER or SEMI.
+ *
* For purposes of join selectivity estimation, we create transient
* SpecialJoinInfo structures for regular inner joins; so it is possible
* to have jointype == JOIN_INNER in such a structure, even though this is
* not allowed within join_info_list. We also create transient
* SpecialJoinInfos with jointype == JOIN_INNER for outer joins, since for
* cost estimation purposes it is sometimes useful to know the join size under
- * plain innerjoin semantics. Note that lhs_strict, delay_upper_joins, and
- * of course the semi_xxx fields are not set meaningfully within such structs.
+ * plain innerjoin semantics. Note that strict_relids, lhs_strict,
+ * delay_upper_joins, and of course the semi_xxx fields are not set
+ * meaningfully within such structs.
*/
#ifndef HAVE_SPECIALJOININFO_TYPEDEF
typedef struct SpecialJoinInfo SpecialJoinInfo;
@@ -2269,11 +2306,13 @@ typedef struct SpecialJoinInfo SpecialJoinInfo;
struct SpecialJoinInfo
{
NodeTag type;
- Relids min_lefthand; /* base relids in minimum LHS for join */
- Relids min_righthand; /* base relids in minimum RHS for join */
- Relids syn_lefthand; /* base relids syntactically within LHS */
- Relids syn_righthand; /* base relids syntactically within RHS */
+ Relids min_lefthand; /* base+OJ relids in minimum LHS for join */
+ Relids min_righthand; /* base+OJ relids in minimum RHS for join */
+ Relids syn_lefthand; /* base+OJ relids syntactically within LHS */
+ Relids syn_righthand; /* base+OJ relids syntactically within RHS */
JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */
+ Index ojrelid; /* outer join's RT index; 0 if none */
+ Relids strict_relids; /* joinclause is strict for these relids */
bool lhs_strict; /* joinclause is strict for some LHS rel */
bool delay_upper_joins; /* can't commute with upper RHS */
/* Remaining fields are set only for JOIN_SEMI jointype: */
@@ -2283,6 +2322,18 @@ struct SpecialJoinInfo
List *semi_rhs_exprs; /* righthand-side expressions of these ops */
};
+/*
+ * FULL JOIN clause info.
+ *
+ * We set aside every FULL JOIN ON clause that looks mergejoinable, and
+ * process it specially at the end of qual distribution.
+ */
+typedef struct FullJoinClauseInfo
+{
+ RestrictInfo *rinfo; /* a mergejoinable FULL JOIN clause */
+ SpecialJoinInfo *sjinfo; /* the FULL JOIN's SpecialJoinInfo */
+} FullJoinClauseInfo;
+
/*
* Append-relation info.
*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 0ea9a22dfb..5ca0314c8f 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -652,6 +652,7 @@ typedef struct WorkTableScan
* When the plan node represents a foreign join, scan.scanrelid is zero and
* fs_relids must be consulted to identify the join relation. (fs_relids
* is valid for simple scans as well, but will always match scan.scanrelid.)
+ * fs_relids includes outer joins; fs_base_relids does not.
*
* If the FDW's PlanDirectModify() callback decides to repurpose a ForeignScan
* node to perform the UPDATE or DELETE operation directly in the remote
@@ -671,7 +672,8 @@ typedef struct ForeignScan
List *fdw_private; /* private data for FDW */
List *fdw_scan_tlist; /* optional tlist describing scan tuple */
List *fdw_recheck_quals; /* original quals not in scan.plan.qual */
- Bitmapset *fs_relids; /* RTIs generated by this scan */
+ Bitmapset *fs_relids; /* base+OJ RTIs generated by this scan */
+ Bitmapset *fs_base_relids; /* base RTIs generated by this scan */
bool fsSystemCol; /* true if any "system column" is needed */
} ForeignScan;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 51505eee85..eba47ecbff 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -171,6 +171,14 @@ typedef struct Expr
* row identity information during UPDATE/DELETE. This value should never
* be seen outside the planner.
*
+ * varnullingrels is the set of RT indexes of outer joins that can force
+ * the Var's value to null (at the point where it appears in the query).
+ * See optimizer/README for discussion of that.
+ *
+ * varlevelsup is greater than zero in Vars that represent outer references.
+ * Note that it affects all of varno, varnullingrels, and varnosyn, all of
+ * which refer to the range table of that query level.
+ *
* In the parser, varnosyn and varattnosyn are either identical to
* varno/varattno, or they specify the column's position in an aliased JOIN
* RTE that hides the semantic referent RTE's refname. This is a syntactic
@@ -202,6 +210,7 @@ typedef struct Var
Oid vartype; /* pg_type OID for the type of this var */
int32 vartypmod; /* pg_attribute typmod value */
Oid varcollid; /* OID of collation, or InvalidOid if none */
+ Bitmapset *varnullingrels; /* RT indexes of joins that can null var */
Index varlevelsup; /* for subquery variables referencing outer
* relations; 0 in a normal var, >0 means N
* levels up */
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index cf9c759025..8bef98487d 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -115,6 +115,13 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
* This is one-for-one with p_rtable, but contains NULLs for non-join
* RTEs, and may be shorter than p_rtable if the last RTE(s) aren't joins.
*
+ * p_nullingrels: list of Bitmapsets associated with p_rtable entries, each
+ * containing the set of outer-join RTE indexes that can null that relation
+ * at the current point in the parse tree. This is one-for-one with p_rtable,
+ * but may be shorter than p_rtable, in which case the missing entries are
+ * implicitly empty (NULL). That rule allows us to save work when the query
+ * contains no outer joins.
+ *
* p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that
* will become the fromlist of the query's top-level FromExpr node.
*
@@ -182,6 +189,7 @@ struct ParseState
const char *p_sourcetext; /* source text, or NULL if not available */
List *p_rtable; /* range table so far */
List *p_joinexprs; /* JoinExprs for RTE_JOIN p_rtable entries */
+ List *p_nullingrels; /* Bitmapsets showing nulling outer joins */
List *p_joinlist; /* join items so far (will become FromExpr
* node's fromlist) */
List *p_namespace; /* currently-referenceable RTEs (List of
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h
index 98b9b3a288..a3f902c1bb 100644
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -63,6 +63,10 @@ extern bool contain_windowfuncs(Node *node);
extern int locate_windowfunc(Node *node);
extern bool checkExprHasSubLink(Node *node);
+extern Bitmapset *get_nulling_relids(Node *node);
+extern Node *remove_nulling_relids(Node *node, Bitmapset *removable_relids,
+ Bitmapset *except_relids);
+
extern Node *replace_rte_variables(Node *node,
int target_varno, int sublevels_up,
replace_rte_variables_callback callback,