v1-0001-Always-use-two-fractional-digits-for-estimated-rows_SRC.patch
text/x-patch
Filename: v1-0001-Always-use-two-fractional-digits-for-estimated-rows_SRC.patch
Type: text/x-patch
Part: 0
Message:
Re: explain analyze rows=%.0f
From a042ee22dbdae5049f4830116e83a3a72ea5e405 Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdokimov@tantorlabs.com>
Date: Wed, 9 Apr 2025 22:24:01 +0300
Subject: [PATCH v1] Always use two fractional digits for estimated rows count
---
contrib/file_fdw/file_fdw.c | 4 +-
contrib/postgres_fdw/postgres_fdw.c | 4 +-
contrib/tsm_system_rows/tsm_system_rows.c | 4 +-
contrib/tsm_system_time/tsm_system_time.c | 4 +-
src/backend/access/table/tableam.c | 2 +-
src/backend/access/tablesample/bernoulli.c | 2 +-
src/backend/access/tablesample/system.c | 4 +-
src/backend/commands/explain.c | 4 +-
src/backend/optimizer/path/costsize.c | 53 +++++++++++++++-------
src/backend/optimizer/util/pathnode.c | 4 +-
src/backend/utils/adt/selfuncs.c | 14 +++---
src/include/optimizer/optimizer.h | 1 +
12 files changed, 61 insertions(+), 39 deletions(-)
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index a9a5671d95a..682bb86adde 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1097,7 +1097,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
double density;
density = baserel->tuples / (double) baserel->pages;
- ntuples = clamp_row_est(density * (double) pages);
+ ntuples = clamp_tuple_est(density * (double) pages);
}
else
{
@@ -1113,7 +1113,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
tuple_width = MAXALIGN(baserel->reltarget->width) +
MAXALIGN(SizeofHeapTupleHeader);
- ntuples = clamp_row_est((double) stat_buf.st_size /
+ ntuples = clamp_tuple_est((double) stat_buf.st_size /
(double) tuple_width);
}
fdw_private->ntuples = ntuples;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index a7e0cc9f323..5e1c7bb32e1 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3278,7 +3278,7 @@ estimate_path_cost_size(PlannerInfo *root,
* Back into an estimate of the number of retrieved rows. Just in
* case this is nuts, clamp to at most nrows.
*/
- retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
+ retrieved_rows = clamp_tuple_est(rows / fpinfo->local_conds_sel);
retrieved_rows = Min(retrieved_rows, nrows);
/*
@@ -3453,7 +3453,7 @@ estimate_path_cost_size(PlannerInfo *root,
* Back into an estimate of the number of retrieved rows. Just in
* case this is nuts, clamp to at most foreignrel->tuples.
*/
- retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
+ retrieved_rows = clamp_tuple_est(rows / fpinfo->local_conds_sel);
retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
/*
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c
index f401efa2131..308bb301da1 100644
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -135,7 +135,7 @@ system_rows_samplescangetsamplesize(PlannerInfo *root,
/* Clamp to the estimated relation size */
if (ntuples > baserel->tuples)
ntuples = (int64) baserel->tuples;
- ntuples = clamp_row_est(ntuples);
+ ntuples = clamp_tuple_est(ntuples);
if (baserel->tuples > 0 && baserel->pages > 0)
{
@@ -151,7 +151,7 @@ system_rows_samplescangetsamplesize(PlannerInfo *root,
}
/* Clamp to sane value */
- npages = clamp_row_est(Min((double) baserel->pages, npages));
+ npages = clamp_tuple_est(Min((double) baserel->pages, npages));
*pages = npages;
*tuples = ntuples;
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c
index c9c71d8c3af..b2d1d7e37d7 100644
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -151,7 +151,7 @@ system_time_samplescangetsamplesize(PlannerInfo *root,
npages = millis; /* even more bogus, but whatcha gonna do? */
/* Clamp to sane value */
- npages = clamp_row_est(Min((double) baserel->pages, npages));
+ npages = clamp_tuple_est(Min((double) baserel->pages, npages));
if (baserel->tuples > 0 && baserel->pages > 0)
{
@@ -167,7 +167,7 @@ system_time_samplescangetsamplesize(PlannerInfo *root,
}
/* Clamp to the estimated relation size */
- ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
+ ntuples = clamp_tuple_est(Min(baserel->tuples, ntuples));
*pages = npages;
*tuples = ntuples;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index a56c5eceb14..e14a315f319 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -742,7 +742,7 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
/* note: integer division is intentional here */
density = (usable_bytes_per_page * fillfactor / 100) / tuple_width;
/* There's at least one row on the page, even with low fillfactor. */
- density = clamp_row_est(density);
+ density = clamp_tuple_est(density);
}
*tuples = rint(density * (double) curpages);
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
index 5e1c5d2b723..d1993299158 100644
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -117,7 +117,7 @@ bernoulli_samplescangetsamplesize(PlannerInfo *root,
/* We'll visit all pages of the baserel */
*pages = baserel->pages;
- *tuples = clamp_row_est(baserel->tuples * samplefract);
+ *tuples = clamp_tuple_est(baserel->tuples * samplefract);
}
/*
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index 8db813b89fc..8944fe28799 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -117,10 +117,10 @@ system_samplescangetsamplesize(PlannerInfo *root,
}
/* We'll visit a sample of the pages ... */
- *pages = clamp_row_est(baserel->pages * samplefract);
+ *pages = clamp_tuple_est(baserel->pages * samplefract);
/* ... and hopefully get a representative number of tuples from them */
- *tuples = clamp_row_est(baserel->tuples * samplefract);
+ *tuples = clamp_tuple_est(baserel->tuples * samplefract);
}
/*
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ef8aa489af8..9d9da4880a3 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1808,7 +1808,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
{
if (es->format == EXPLAIN_FORMAT_TEXT)
{
- appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)",
+ appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.2f width=%d)",
plan->startup_cost, plan->total_cost,
plan->plan_rows, plan->plan_width);
}
@@ -1819,7 +1819,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainPropertyFloat("Total Cost", NULL, plan->total_cost,
2, es);
ExplainPropertyFloat("Plan Rows", NULL, plan->plan_rows,
- 0, es);
+ 2, es);
ExplainPropertyInteger("Plan Width", NULL, plan->plan_width,
es);
}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 60b0fcfb6be..27f8d70eb2c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -205,6 +205,29 @@ static double page_size(double tuples, int width);
static double get_parallel_divisor(Path *path);
+/*
+ * clamp_tuple_est
+ * Force a tuple-count estimate to a sane value.
+ */
+double
+clamp_tuple_est(double ntuples)
+{
+ /*
+ * Avoid infinite and NaN tuple estimates. Costs derived from such values
+ * are going to be useless. Also force the estimate to be at least one
+ * tuple, to make explain output look better and to avoid possible
+ * divide-by-zero when interpolating costs. Make it an integer, too.
+ */
+ if (ntuples > MAXIMUM_ROWCOUNT || isnan(ntuples))
+ ntuples = MAXIMUM_ROWCOUNT;
+ else if (ntuples <= 1.0)
+ ntuples = 1.0;
+ else
+ ntuples = rint(ntuples);
+
+ return ntuples;
+}
+
/*
* clamp_row_est
* Force a row-count estimate to a sane value.
@@ -216,14 +239,12 @@ clamp_row_est(double nrows)
* Avoid infinite and NaN row estimates. Costs derived from such values
* are going to be useless. Also force the estimate to be at least one
* row, to make explain output look better and to avoid possible
- * divide-by-zero when interpolating costs. Make it an integer, too.
+ * divide-by-zero when interpolating costs.
*/
if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows))
nrows = MAXIMUM_ROWCOUNT;
else if (nrows <= 1.0)
nrows = 1.0;
- else
- nrows = rint(nrows);
return nrows;
}
@@ -249,7 +270,7 @@ clamp_width_est(int64 tuple_width)
return (int32) MaxAllocSize;
/*
- * Unlike clamp_row_est, we just Assert that the value isn't negative,
+ * Unlike clamp_tuple_est, we just Assert that the value isn't negative,
* rather than masking such errors.
*/
Assert(tuple_width >= 0);
@@ -643,7 +664,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
run_cost += indexTotalCost - indexStartupCost;
/* estimate number of main-table tuples fetched */
- tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+ tuples_fetched = clamp_tuple_est(indexSelectivity * baserel->tuples);
/* fetch estimated page costs for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
@@ -901,7 +922,7 @@ extract_nonindex_conditions(List *qual_clauses, List *indexclauses)
* computed for us by make_one_rel.
*
* Caller is expected to have ensured that tuples_fetched is greater than zero
- * and rounded to integer (see clamp_row_est). The result will likewise be
+ * and rounded to integer (see clamp_tuple_est). The result will likewise be
* greater than zero and integral.
*/
double
@@ -3084,7 +3105,7 @@ get_windowclause_startup_tuples(PlannerInfo *root, WindowClause *wc,
* subnode.
*/
- return clamp_row_est(return_tuples);
+ return clamp_tuple_est(return_tuples);
}
/*
@@ -3298,7 +3319,7 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
*/
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
- if (outer_path_rows > 1)
+ if (outer_path_rows >= 2)
run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
@@ -3323,7 +3344,7 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
{
/* Normal case; we'll scan whole input rel for each outer row */
run_cost += inner_run_cost;
- if (outer_path_rows > 1)
+ if (outer_path_rows >= 2)
run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
}
@@ -3656,8 +3677,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
* Convert selectivities to row counts. We force outer_rows and
* inner_rows to be at least 1, but the skip_rows estimates can be zero.
*/
- outer_skip_rows = rint(outer_path_rows * outerstartsel);
- inner_skip_rows = rint(inner_path_rows * innerstartsel);
+ outer_skip_rows = outer_path_rows * outerstartsel;
+ inner_skip_rows = inner_path_rows * innerstartsel;
outer_rows = clamp_row_est(outer_path_rows * outerendsel);
inner_rows = clamp_row_est(inner_path_rows * innerendsel);
@@ -4415,7 +4436,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
* that way, so it will be unable to drive the batch size below hash_mem
* when this is true.)
*/
- if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
+ if (relation_byte_size(clamp_tuple_est(inner_path_rows * innermcvfreq),
inner_path->pathtarget->width) > get_hash_memory_limit())
startup_cost += disable_cost;
@@ -4449,7 +4470,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
* to clamp inner_scan_frac to at most 1.0; but since match_count is
* at least 1, no such clamp is needed now.)
*/
- outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
+ outer_matched_rows = outer_path_rows * extra->semifactors.outer_match_frac;
inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
startup_cost += hash_qual_cost.startup;
@@ -4573,7 +4594,7 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
if (subplan->subLinkType == EXISTS_SUBLINK)
{
/* we only need to fetch 1 tuple; clamp to avoid zero divide */
- sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows);
+ sp_cost.per_tuple += plan_run_cost / clamp_tuple_est(plan->plan_rows);
}
else if (subplan->subLinkType == ALL_SUBLINK ||
subplan->subLinkType == ANY_SUBLINK)
@@ -6517,7 +6538,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
/*
* Estimate number of main-table pages fetched.
*/
- tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+ tuples_fetched = clamp_tuple_est(indexSelectivity * baserel->tuples);
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
@@ -6583,7 +6604,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
*/
if (lossy_pages > 0)
tuples_fetched =
- clamp_row_est(indexSelectivity *
+ clamp_tuple_est(indexSelectivity *
(exact_pages / heap_pages) * baserel->tuples +
(lossy_pages / heap_pages) * baserel->tuples);
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..0b9ec6b26fc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1687,7 +1687,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
pathnode->param_exprs = param_exprs;
pathnode->singlerow = singlerow;
pathnode->binary_mode = binary_mode;
- pathnode->calls = clamp_row_est(calls);
+ pathnode->calls = clamp_tuple_est(calls);
/*
* For now we set est_entries to 0. cost_memoize_rescan() does all the
@@ -4049,7 +4049,7 @@ adjust_limit_rows_costs(double *rows, /* in/out parameter */
if (offset_est > 0)
offset_rows = (double) offset_est;
else
- offset_rows = clamp_row_est(input_rows * 0.10);
+ offset_rows = clamp_tuple_est(input_rows * 0.10);
if (offset_rows > *rows)
offset_rows = *rows;
if (input_rows > 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 588d991fa57..0c077c09175 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2183,7 +2183,7 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
ATTSTATSSLOT_NUMBERS))
{
if (sslot.nnumbers > 0)
- nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
+ nelem = clamp_tuple_est(sslot.numbers[sslot.nnumbers - 1]);
free_attstatsslot(&sslot);
}
}
@@ -3462,7 +3462,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
* estimate is usually already at least 1, but clamp it just in case it
* isn't.
*/
- input_rows = clamp_row_est(input_rows);
+ input_rows = clamp_tuple_est(input_rows);
/*
* If no grouping columns, there's exactly one group. (This can't happen
@@ -3755,7 +3755,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
(1 - pow((rel->tuples - rel->rows) / rel->tuples,
rel->tuples / reldistinct));
}
- reldistinct = clamp_row_est(reldistinct);
+ reldistinct = clamp_tuple_est(reldistinct);
/*
* Update estimate of total distinct groups.
@@ -4071,7 +4071,7 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
if (vardata.rel && vardata.rel->tuples > 0)
{
ndistinct *= vardata.rel->rows / vardata.rel->tuples;
- ndistinct = clamp_row_est(ndistinct);
+ ndistinct = clamp_tuple_est(ndistinct);
}
/*
@@ -6168,7 +6168,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
* If we had an absolute estimate, use that.
*/
if (stadistinct > 0.0)
- return clamp_row_est(stadistinct);
+ return clamp_tuple_est(stadistinct);
/*
* Otherwise we need to get the relation size; punt if not available.
@@ -6189,7 +6189,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
* If we had a relative estimate, use that.
*/
if (stadistinct < 0.0)
- return clamp_row_est(-stadistinct * ntuples);
+ return clamp_tuple_est(-stadistinct * ntuples);
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
@@ -6197,7 +6197,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
* that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
- return clamp_row_est(ntuples);
+ return clamp_tuple_est(ntuples);
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 546828b54bd..e1be9dc6175 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -90,6 +90,7 @@ extern PGDLLIMPORT double recursive_worktable_factor;
extern PGDLLIMPORT int effective_cache_size;
extern double clamp_row_est(double nrows);
+extern double clamp_tuple_est(double ntuples);
extern int32 clamp_width_est(int64 tuple_width);
extern long clamp_cardinality_to_long(Cardinality x);
--
2.34.1