v6-0008-avoid-using-nullable_relids.patch
text/x-diff
Filename: v6-0008-avoid-using-nullable_relids.patch
Type: text/x-diff
Part: 8
Message:
Re: Making Vars outer-join aware
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: unified
Series: patch v6-0008
| File | + | − |
|---|---|---|
| src/backend/optimizer/plan/initsplan.c | 32 | 0 |
| src/backend/optimizer/util/relnode.c | 9 | 0 |
| src/backend/optimizer/util/restrictinfo.c | 22 | 21 |
| src/include/nodes/pathnodes.h | 3 | 0 |
commit 9ffde4a92a31e9f6d1c04daa8ab64ad89107991d
Author: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat Nov 5 17:37:11 2022 -0400
Don't use RestrictInfo.nullable_relids in join_clause_is_movable_to.
Instead of using per-clause nullable_relids data, compute a
per-baserel set of outer joins that can null each relation, and
check for overlap between that and clause_relids to detect whether
the clause can safely be pushed down to relation scan level.
join_clause_is_movable_into also uses nullable_relids, but it
turns out that that test can just be dropped entirely. Now that
clause_relids includes nulling outer joins, the preceding tests
in the function are sufficient to reject clauses that can't be
pushed down.
This might seem like a net loss given that we have to add a bit
of code to initsplan.c to compute RelOptInfo.nulling_relids.
However, that's not much code at all, and the payoff is this:
we no longer need RestrictInfo.nullable_relids at all.
The next patch, which removes that field and the extensive
infrastructure that maintains it, saves way more code and cycles
than we add here. Also, I think there are likely going to be
other uses for RelOptInfo.nulling_relids.
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 96e8033930..364c26badf 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -57,6 +57,8 @@ static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
static void process_security_barrier_quals(PlannerInfo *root,
int rti, Relids qualscope,
bool below_outer_join);
+static void mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
+ Relids lower_rels);
static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
Relids left_rels, Relids right_rels,
Relids inner_join_rels,
@@ -963,6 +965,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
*qualscope = bms_add_member(*qualscope, j->rtindex);
root->outer_join_rels = bms_add_member(root->outer_join_rels,
j->rtindex);
+ mark_rels_nulled_by_join(root, j->rtindex, rightids);
}
*inner_join_rels = bms_union(left_inners, right_inners);
nonnullable_rels = leftids;
@@ -1005,6 +1008,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
*qualscope = bms_add_member(*qualscope, j->rtindex);
root->outer_join_rels = bms_add_member(root->outer_join_rels,
j->rtindex);
+ mark_rels_nulled_by_join(root, j->rtindex, leftids);
+ mark_rels_nulled_by_join(root, j->rtindex, rightids);
*inner_join_rels = bms_union(left_inners, right_inners);
/* each side is both outer and inner */
nonnullable_rels = *qualscope;
@@ -1221,6 +1226,33 @@ process_security_barrier_quals(PlannerInfo *root,
Assert(security_level <= root->qual_security_level);
}
+/*
+ * mark_rels_nulled_by_join
+ * Fill RelOptInfo.nulling_relids of baserels nulled by this outer join
+ *
+ * Inputs:
+ * ojrelid: RT index of the join RTE (must not be 0)
+ * lower_rels: the base+OJ Relids syntactically below nullable side of join
+ */
+static void
+mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
+ Relids lower_rels)
+{
+ int relid = -1;
+
+ while ((relid = bms_next_member(lower_rels, relid)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[relid];
+
+ if (rel == NULL) /* must be an outer join */
+ {
+ Assert(bms_is_member(relid, root->outer_join_rels));
+ continue;
+ }
+ rel->nulling_relids = bms_add_member(rel->nulling_relids, ojrelid);
+ }
+}
+
/*
* make_outerjoininfo
* Build a SpecialJoinInfo for the current outer join
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index cea298c633..3dd9317320 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -277,6 +277,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->top_parent = parent->top_parent ? parent->top_parent : parent;
rel->top_parent_relids = rel->top_parent->relids;
+ /*
+ * A child rel is below the same outer joins as its parent. (We
+ * presume this info was already calculated for the parent.)
+ */
+ rel->nulling_relids = parent->nulling_relids;
+
/*
* Also propagate lateral-reference information from appendrel parent
* rels to their child rels. We intentionally give each child rel the
@@ -300,6 +306,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->parent = NULL;
rel->top_parent = NULL;
rel->top_parent_relids = NULL;
+ rel->nulling_relids = NULL;
rel->direct_lateral_relids = NULL;
rel->lateral_relids = NULL;
rel->lateral_referencers = NULL;
@@ -680,6 +687,7 @@ build_join_rel(PlannerInfo *root,
joinrel->max_attr = 0;
joinrel->attr_needed = NULL;
joinrel->attr_widths = NULL;
+ joinrel->nulling_relids = NULL;
joinrel->lateral_vars = NIL;
joinrel->lateral_referencers = NULL;
joinrel->indexlist = NIL;
@@ -869,6 +877,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->max_attr = 0;
joinrel->attr_needed = NULL;
joinrel->attr_widths = NULL;
+ joinrel->nulling_relids = NULL;
joinrel->lateral_vars = NIL;
joinrel->lateral_referencers = NULL;
joinrel->indexlist = NIL;
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index bcbee8f943..15f410cf36 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -618,8 +618,17 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
if (bms_is_member(baserel->relid, rinfo->outer_relids))
return false;
- /* Target rel must not be nullable below the clause */
- if (bms_is_member(baserel->relid, rinfo->nullable_relids))
+ /*
+ * Target rel's Vars must not be nulled by any outer join. We can check
+ * this without groveling through the individual Vars by seeing whether
+ * clause_relids (which includes all such Vars' varnullingrels) includes
+ * any outer join that can null the target rel. You might object that
+ * this could reject the clause on the basis of an OJ relid that came from
+ * some other rel's Var. However, that would still mean that the clause
+ * came from above that outer join and shouldn't be pushed down; so there
+ * should be no false positives.
+ */
+ if (bms_overlap(rinfo->clause_relids, baserel->nulling_relids))
return false;
/* Clause must not use any rels with LATERAL references to this rel */
@@ -651,16 +660,17 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
* relation plus the outer rels. We also check that it does reference at
* least one current Var, ensuring that the clause will be pushed down to
* a unique place in a parameterized join tree. And we check that we're
- * not pushing the clause into its outer-join outer side, nor down into
- * a lower outer join's inner side.
- *
- * The check about pushing a clause down into a lower outer join's inner side
- * is only approximate; it sometimes returns "false" when actually it would
- * be safe to use the clause here because we're still above the outer join
- * in question. This is okay as long as the answers at different join levels
- * are consistent: it just means we might sometimes fail to push a clause as
- * far down as it could safely be pushed. It's unclear whether it would be
- * worthwhile to do this more precisely. (But if it's ever fixed to be
+ * not pushing the clause into its outer-join outer side.
+ *
+ * We used to need to check that we're not pushing the clause into a lower
+ * outer join's inner side. However, now that clause_relids includes
+ * references to potentially-nulling outer joins, the other tests handle that
+ * concern. If the clause references any Var coming from the inside of a
+ * lower outer join, its clause_relids will mention that outer join, causing
+ * the evaluability check to fail; while if it references no such Vars, the
+ * references-a-target-rel check will fail.
+ *
+ * XXX not clear if we can do this yet: (But if it's ever fixed to be
* exactly accurate, there's an Assert in get_joinrel_parampathinfo() that
* should be re-enabled.)
*
@@ -704,14 +714,5 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
if (bms_overlap(currentrelids, rinfo->outer_relids))
return false;
- /*
- * Target rel(s) must not be nullable below the clause. This is
- * approximate, in the safe direction, because the current join might be
- * above the join where the nulling would happen, in which case the clause
- * would work correctly here. But we don't have enough info to be sure.
- */
- if (bms_overlap(currentrelids, rinfo->nullable_relids))
- return false;
-
return true;
}
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 9d89b0c9eb..3f89199e64 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -660,6 +660,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
* outer-join relids.
* attr_widths - cache space for per-attribute width estimates;
* zero means not computed yet
+ * nulling_relids - relids of outer joins that can null this rel
* lateral_vars - lateral cross-references of rel, if any (list of
* Vars and PlaceHolderVars)
* lateral_referencers - relids of rels that reference this one laterally
@@ -893,6 +894,8 @@ typedef struct RelOptInfo
Relids *attr_needed pg_node_attr(read_write_ignore);
/* array indexed [min_attr .. max_attr] */
int32 *attr_widths pg_node_attr(read_write_ignore);
+ /* relids of outer joins that can null this baserel */
+ Relids nulling_relids;
/* LATERAL Vars and PHVs referenced by rel */
List *lateral_vars;
/* rels that reference this baserel laterally */