wip-subquery-stats.patch
text/x-patch
Filename: wip-subquery-stats.patch
Type: text/x-patch
Part: 0
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index ef29374fcccae23cb663c04470f12c22321a0e2c..a703f4a910c0e5f521f09cf9564a05c73cf803b8 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 95,100 ****
--- 95,101 ----
#include "access/sysattr.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
+ #include "catalog/pg_inherits_fn.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
*************** static double convert_one_bytea_to_scala
*** 168,173 ****
--- 169,176 ----
int rangelo, int rangehi);
static char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid);
+ static void examine_variable_recurse(Query *subquery, AttrNumber resno,
+ VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
*************** examine_variable(PlannerInfo *root, Node
*** 4176,4196 ****
}
else if (rte->rtekind == RTE_RELATION)
{
vardata->statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(rte->relid),
Int16GetDatum(var->varattno),
BoolGetDatum(rte->inh));
vardata->freefunc = ReleaseSysCache;
}
else
{
/*
! * XXX This means the Var comes from a JOIN or sub-SELECT. Later
! * add code to dig down into the join etc and see if we can trace
! * the variable to something with stats. (But beware of
! * sub-SELECTs with DISTINCT/GROUP BY/etc. Perhaps there are no
! * cases where this would really be useful, because we'd have
! * flattened the subselect if it is??)
*/
}
--- 4179,4205 ----
}
else if (rte->rtekind == RTE_RELATION)
{
+ /* plain table, so look up the column in pg_statistic */
vardata->statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(rte->relid),
Int16GetDatum(var->varattno),
BoolGetDatum(rte->inh));
vardata->freefunc = ReleaseSysCache;
}
+ else if (rte->rtekind == RTE_SUBQUERY)
+ {
+ /* recurse into subquery */
+ examine_variable_recurse(rte->subquery, var->varattno,
+ vardata);
+ }
else
{
/*
! * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.
! * (We won't see RTE_JOIN here because join alias Vars have
! * already been flattened.) There's not much we can do with
! * function outputs, but maybe someday try to be smarter about
! * VALUES and/or CTEs.
*/
}
*************** examine_variable(PlannerInfo *root, Node
*** 4335,4340 ****
--- 4344,4435 ----
}
/*
+ * examine_variable_recurse
+ * Handle a Var referring to a subquery result for examine_variable
+ *
+ * If the Var ultimately refers to a column of a table, we can fill in the
+ * statsTuple; none of the other fields of vardata need to be changed.
+ * (In particular, we don't try to set isunique, since even if the underlying
+ * column is unique, the subquery may have joined to other tables in a way
+ * that creates duplicates.)
+ *
+ * XXX For the moment, we ignore the possibility that the subquery may change
+ * the column's statistics so much that we'd be better off not consulting
+ * pg_statistic at all. Possibly we should give up if the subquery uses
+ * GROUP BY or DISTINCT, for instance.
+ *
+ * The subquery we are looking at has not been through planner preprocessing,
+ * so some things have to be done differently here than in examine_variable.
+ * (Perhaps we should rearrange things so that the sub-root data structure
+ * is kept around after we plan a subquery?)
+ */
+ static void
+ examine_variable_recurse(Query *subquery, AttrNumber resno,
+ VariableStatData *vardata)
+ {
+ TargetEntry *ste;
+ Var *var;
+ RangeTblEntry *rte;
+
+ /* Get the subquery output expression referenced by the upper Var */
+ ste = get_tle_by_resno(subquery->targetList, resno);
+ if (ste == NULL || ste->resjunk)
+ elog(ERROR, "subquery does not have attribute %d", resno);
+ var = (Var *) ste->expr;
+
+ restart:
+ /* Can only handle a simple Var of subquery's query level */
+ if (var && IsA(var, Var) &&
+ var->varlevelsup == 0)
+ {
+ /* OK, fetch the RTE referenced by the Var */
+ rte = rt_fetch(var->varno, subquery->rtable);
+
+ /* XXX Can't call get_relation_stats_hook for lack of subroot */
+
+ if (rte->rtekind == RTE_RELATION)
+ {
+ /*
+ * We can't just rely on rte->inh here, because in parser output
+ * that only signifies the absence of ONLY. We have to check
+ * relhassubclass as well. For speed, we don't actually examine
+ * pg_inherits, so we might get fooled by a table that once had
+ * children but no longer does. But that's okay, because ANALYZE
+ * will still produce inheritance-tree statistics in such cases,
+ * so we will still find an applicable pg_statistic entry.
+ */
+ bool inh;
+
+ inh = rte->inh && has_subclass(rte->relid);
+ vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(rte->relid),
+ Int16GetDatum(var->varattno),
+ BoolGetDatum(inh));
+ vardata->freefunc = ReleaseSysCache;
+ }
+ else if (rte->rtekind == RTE_SUBQUERY)
+ {
+ /* recurse some more ... */
+ examine_variable_recurse(rte->subquery, var->varattno,
+ vardata);
+ }
+ else if (rte->rtekind == RTE_JOIN)
+ {
+ /*
+ * Since join alias variables haven't been flattened in the
+ * subquery, we have to be prepared to dereference them.
+ */
+ if (var->varattno == InvalidAttrNumber)
+ return; /* punt on whole-row reference */
+ Assert(var->varattno > 0);
+ var = (Var *) list_nth(rte->joinaliasvars, var->varattno - 1);
+ goto restart;
+ }
+ /* As in examine_variable, do nothing for other RTE kinds */
+ }
+ }
+
+ /*
* get_variable_numdistinct
* Estimate the number of distinct values of a variable.
*