v1-0001-Always-use-two-fractional-digits-for-estimated-rows_SRC.patch

text/x-patch
Filename: v1-0001-Always-use-two-fractional-digits-for-estimated-rows_SRC.patch
Type: text/x-patch
Part: 0
From a042ee22dbdae5049f4830116e83a3a72ea5e405 Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdokimov@tantorlabs.com>
Date: Wed, 9 Apr 2025 22:24:01 +0300
Subject: [PATCH v1] Always use two fractional digits for estimated rows count

---
 contrib/file_fdw/file_fdw.c                |  4 +-
 contrib/postgres_fdw/postgres_fdw.c        |  4 +-
 contrib/tsm_system_rows/tsm_system_rows.c  |  4 +-
 contrib/tsm_system_time/tsm_system_time.c  |  4 +-
 src/backend/access/table/tableam.c         |  2 +-
 src/backend/access/tablesample/bernoulli.c |  2 +-
 src/backend/access/tablesample/system.c    |  4 +-
 src/backend/commands/explain.c             |  4 +-
 src/backend/optimizer/path/costsize.c      | 53 +++++++++++++++-------
 src/backend/optimizer/util/pathnode.c      |  4 +-
 src/backend/utils/adt/selfuncs.c           | 14 +++---
 src/include/optimizer/optimizer.h          |  1 +
 12 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index a9a5671d95a..682bb86adde 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1097,7 +1097,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 		double		density;
 
 		density = baserel->tuples / (double) baserel->pages;
-		ntuples = clamp_row_est(density * (double) pages);
+		ntuples = clamp_tuple_est(density * (double) pages);
 	}
 	else
 	{
@@ -1113,7 +1113,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 
 		tuple_width = MAXALIGN(baserel->reltarget->width) +
 			MAXALIGN(SizeofHeapTupleHeader);
-		ntuples = clamp_row_est((double) stat_buf.st_size /
+		ntuples = clamp_tuple_est((double) stat_buf.st_size /
 								(double) tuple_width);
 	}
 	fdw_private->ntuples = ntuples;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index a7e0cc9f323..5e1c7bb32e1 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3278,7 +3278,7 @@ estimate_path_cost_size(PlannerInfo *root,
 			 * Back into an estimate of the number of retrieved rows.  Just in
 			 * case this is nuts, clamp to at most nrows.
 			 */
-			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
+			retrieved_rows = clamp_tuple_est(rows / fpinfo->local_conds_sel);
 			retrieved_rows = Min(retrieved_rows, nrows);
 
 			/*
@@ -3453,7 +3453,7 @@ estimate_path_cost_size(PlannerInfo *root,
 			 * Back into an estimate of the number of retrieved rows.  Just in
 			 * case this is nuts, clamp to at most foreignrel->tuples.
 			 */
-			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
+			retrieved_rows = clamp_tuple_est(rows / fpinfo->local_conds_sel);
 			retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
 
 			/*
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c
index f401efa2131..308bb301da1 100644
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -135,7 +135,7 @@ system_rows_samplescangetsamplesize(PlannerInfo *root,
 	/* Clamp to the estimated relation size */
 	if (ntuples > baserel->tuples)
 		ntuples = (int64) baserel->tuples;
-	ntuples = clamp_row_est(ntuples);
+	ntuples = clamp_tuple_est(ntuples);
 
 	if (baserel->tuples > 0 && baserel->pages > 0)
 	{
@@ -151,7 +151,7 @@ system_rows_samplescangetsamplesize(PlannerInfo *root,
 	}
 
 	/* Clamp to sane value */
-	npages = clamp_row_est(Min((double) baserel->pages, npages));
+	npages = clamp_tuple_est(Min((double) baserel->pages, npages));
 
 	*pages = npages;
 	*tuples = ntuples;
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c
index c9c71d8c3af..b2d1d7e37d7 100644
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -151,7 +151,7 @@ system_time_samplescangetsamplesize(PlannerInfo *root,
 		npages = millis;		/* even more bogus, but whatcha gonna do? */
 
 	/* Clamp to sane value */
-	npages = clamp_row_est(Min((double) baserel->pages, npages));
+	npages = clamp_tuple_est(Min((double) baserel->pages, npages));
 
 	if (baserel->tuples > 0 && baserel->pages > 0)
 	{
@@ -167,7 +167,7 @@ system_time_samplescangetsamplesize(PlannerInfo *root,
 	}
 
 	/* Clamp to the estimated relation size */
-	ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
+	ntuples = clamp_tuple_est(Min(baserel->tuples, ntuples));
 
 	*pages = npages;
 	*tuples = ntuples;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index a56c5eceb14..e14a315f319 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -742,7 +742,7 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
 		/* note: integer division is intentional here */
 		density = (usable_bytes_per_page * fillfactor / 100) / tuple_width;
 		/* There's at least one row on the page, even with low fillfactor. */
-		density = clamp_row_est(density);
+		density = clamp_tuple_est(density);
 	}
 	*tuples = rint(density * (double) curpages);
 
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
index 5e1c5d2b723..d1993299158 100644
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -117,7 +117,7 @@ bernoulli_samplescangetsamplesize(PlannerInfo *root,
 	/* We'll visit all pages of the baserel */
 	*pages = baserel->pages;
 
-	*tuples = clamp_row_est(baserel->tuples * samplefract);
+	*tuples = clamp_tuple_est(baserel->tuples * samplefract);
 }
 
 /*
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index 8db813b89fc..8944fe28799 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -117,10 +117,10 @@ system_samplescangetsamplesize(PlannerInfo *root,
 	}
 
 	/* We'll visit a sample of the pages ... */
-	*pages = clamp_row_est(baserel->pages * samplefract);
+	*pages = clamp_tuple_est(baserel->pages * samplefract);
 
 	/* ... and hopefully get a representative number of tuples from them */
-	*tuples = clamp_row_est(baserel->tuples * samplefract);
+	*tuples = clamp_tuple_est(baserel->tuples * samplefract);
 }
 
 /*
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ef8aa489af8..9d9da4880a3 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1808,7 +1808,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	{
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			appendStringInfo(es->str, "  (cost=%.2f..%.2f rows=%.0f width=%d)",
+			appendStringInfo(es->str, "  (cost=%.2f..%.2f rows=%.2f width=%d)",
 							 plan->startup_cost, plan->total_cost,
 							 plan->plan_rows, plan->plan_width);
 		}
@@ -1819,7 +1819,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			ExplainPropertyFloat("Total Cost", NULL, plan->total_cost,
 								 2, es);
 			ExplainPropertyFloat("Plan Rows", NULL, plan->plan_rows,
-								 0, es);
+								 2, es);
 			ExplainPropertyInteger("Plan Width", NULL, plan->plan_width,
 								   es);
 		}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 60b0fcfb6be..27f8d70eb2c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -205,6 +205,29 @@ static double page_size(double tuples, int width);
 static double get_parallel_divisor(Path *path);
 
 
+/*
+ * clamp_tuple_est
+ *		Force a tuple-count estimate to a sane value.
+ */
+double
+clamp_tuple_est(double ntuples)
+{
+	/*
+	 * Avoid infinite and NaN tuple estimates.  Costs derived from such values
+	 * are going to be useless.  Also force the estimate to be at least one
+	 * tuple, to make explain output look better and to avoid possible
+	 * divide-by-zero when interpolating costs.  Make it an integer, too.
+	 */
+	if (ntuples > MAXIMUM_ROWCOUNT || isnan(ntuples))
+		ntuples = MAXIMUM_ROWCOUNT;
+	else if (ntuples <= 1.0)
+		ntuples = 1.0;
+	else
+		ntuples = rint(ntuples);
+
+	return ntuples;
+}
+
 /*
  * clamp_row_est
  *		Force a row-count estimate to a sane value.
@@ -216,14 +239,12 @@ clamp_row_est(double nrows)
 	 * Avoid infinite and NaN row estimates.  Costs derived from such values
 	 * are going to be useless.  Also force the estimate to be at least one
 	 * row, to make explain output look better and to avoid possible
-	 * divide-by-zero when interpolating costs.  Make it an integer, too.
+	 * divide-by-zero when interpolating costs.
 	 */
 	if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows))
 		nrows = MAXIMUM_ROWCOUNT;
 	else if (nrows <= 1.0)
 		nrows = 1.0;
-	else
-		nrows = rint(nrows);
 
 	return nrows;
 }
@@ -249,7 +270,7 @@ clamp_width_est(int64 tuple_width)
 		return (int32) MaxAllocSize;
 
 	/*
-	 * Unlike clamp_row_est, we just Assert that the value isn't negative,
+	 * Unlike clamp_tuple_est, we just Assert that the value isn't negative,
 	 * rather than masking such errors.
 	 */
 	Assert(tuple_width >= 0);
@@ -643,7 +664,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 	run_cost += indexTotalCost - indexStartupCost;
 
 	/* estimate number of main-table tuples fetched */
-	tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+	tuples_fetched = clamp_tuple_est(indexSelectivity * baserel->tuples);
 
 	/* fetch estimated page costs for tablespace containing table */
 	get_tablespace_page_costs(baserel->reltablespace,
@@ -901,7 +922,7 @@ extract_nonindex_conditions(List *qual_clauses, List *indexclauses)
  * computed for us by make_one_rel.
  *
  * Caller is expected to have ensured that tuples_fetched is greater than zero
- * and rounded to integer (see clamp_row_est).  The result will likewise be
+ * and rounded to integer (see clamp_tuple_est).  The result will likewise be
  * greater than zero and integral.
  */
 double
@@ -3084,7 +3105,7 @@ get_windowclause_startup_tuples(PlannerInfo *root, WindowClause *wc,
 	 * subnode.
 	 */
 
-	return clamp_row_est(return_tuples);
+	return clamp_tuple_est(return_tuples);
 }
 
 /*
@@ -3298,7 +3319,7 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 	 */
 	startup_cost += outer_path->startup_cost + inner_path->startup_cost;
 	run_cost += outer_path->total_cost - outer_path->startup_cost;
-	if (outer_path_rows > 1)
+	if (outer_path_rows >= 2)
 		run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
 
 	inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
@@ -3323,7 +3344,7 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 	{
 		/* Normal case; we'll scan whole input rel for each outer row */
 		run_cost += inner_run_cost;
-		if (outer_path_rows > 1)
+		if (outer_path_rows >= 2)
 			run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
 	}
 
@@ -3656,8 +3677,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	 * Convert selectivities to row counts.  We force outer_rows and
 	 * inner_rows to be at least 1, but the skip_rows estimates can be zero.
 	 */
-	outer_skip_rows = rint(outer_path_rows * outerstartsel);
-	inner_skip_rows = rint(inner_path_rows * innerstartsel);
+	outer_skip_rows = outer_path_rows * outerstartsel;
+	inner_skip_rows = inner_path_rows * innerstartsel;
 	outer_rows = clamp_row_est(outer_path_rows * outerendsel);
 	inner_rows = clamp_row_est(inner_path_rows * innerendsel);
 
@@ -4415,7 +4436,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	 * that way, so it will be unable to drive the batch size below hash_mem
 	 * when this is true.)
 	 */
-	if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
+	if (relation_byte_size(clamp_tuple_est(inner_path_rows * innermcvfreq),
 						   inner_path->pathtarget->width) > get_hash_memory_limit())
 		startup_cost += disable_cost;
 
@@ -4449,7 +4470,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 		 * to clamp inner_scan_frac to at most 1.0; but since match_count is
 		 * at least 1, no such clamp is needed now.)
 		 */
-		outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
+		outer_matched_rows = outer_path_rows * extra->semifactors.outer_match_frac;
 		inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
 
 		startup_cost += hash_qual_cost.startup;
@@ -4573,7 +4594,7 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
 		if (subplan->subLinkType == EXISTS_SUBLINK)
 		{
 			/* we only need to fetch 1 tuple; clamp to avoid zero divide */
-			sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows);
+			sp_cost.per_tuple += plan_run_cost / clamp_tuple_est(plan->plan_rows);
 		}
 		else if (subplan->subLinkType == ALL_SUBLINK ||
 				 subplan->subLinkType == ANY_SUBLINK)
@@ -6517,7 +6538,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
 	/*
 	 * Estimate number of main-table pages fetched.
 	 */
-	tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+	tuples_fetched = clamp_tuple_est(indexSelectivity * baserel->tuples);
 
 	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 
@@ -6583,7 +6604,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
 		 */
 		if (lossy_pages > 0)
 			tuples_fetched =
-				clamp_row_est(indexSelectivity *
+				clamp_tuple_est(indexSelectivity *
 							  (exact_pages / heap_pages) * baserel->tuples +
 							  (lossy_pages / heap_pages) * baserel->tuples);
 	}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..0b9ec6b26fc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1687,7 +1687,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->param_exprs = param_exprs;
 	pathnode->singlerow = singlerow;
 	pathnode->binary_mode = binary_mode;
-	pathnode->calls = clamp_row_est(calls);
+	pathnode->calls = clamp_tuple_est(calls);
 
 	/*
 	 * For now we set est_entries to 0.  cost_memoize_rescan() does all the
@@ -4049,7 +4049,7 @@ adjust_limit_rows_costs(double *rows,	/* in/out parameter */
 		if (offset_est > 0)
 			offset_rows = (double) offset_est;
 		else
-			offset_rows = clamp_row_est(input_rows * 0.10);
+			offset_rows = clamp_tuple_est(input_rows * 0.10);
 		if (offset_rows > *rows)
 			offset_rows = *rows;
 		if (input_rows > 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 588d991fa57..0c077c09175 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2183,7 +2183,7 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
 								 ATTSTATSSLOT_NUMBERS))
 			{
 				if (sslot.nnumbers > 0)
-					nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
+					nelem = clamp_tuple_est(sslot.numbers[sslot.nnumbers - 1]);
 				free_attstatsslot(&sslot);
 			}
 		}
@@ -3462,7 +3462,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 * estimate is usually already at least 1, but clamp it just in case it
 	 * isn't.
 	 */
-	input_rows = clamp_row_est(input_rows);
+	input_rows = clamp_tuple_est(input_rows);
 
 	/*
 	 * If no grouping columns, there's exactly one group.  (This can't happen
@@ -3755,7 +3755,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					(1 - pow((rel->tuples - rel->rows) / rel->tuples,
 							 rel->tuples / reldistinct));
 			}
-			reldistinct = clamp_row_est(reldistinct);
+			reldistinct = clamp_tuple_est(reldistinct);
 
 			/*
 			 * Update estimate of total distinct groups.
@@ -4071,7 +4071,7 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
 	if (vardata.rel && vardata.rel->tuples > 0)
 	{
 		ndistinct *= vardata.rel->rows / vardata.rel->tuples;
-		ndistinct = clamp_row_est(ndistinct);
+		ndistinct = clamp_tuple_est(ndistinct);
 	}
 
 	/*
@@ -6168,7 +6168,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 	 * If we had an absolute estimate, use that.
 	 */
 	if (stadistinct > 0.0)
-		return clamp_row_est(stadistinct);
+		return clamp_tuple_est(stadistinct);
 
 	/*
 	 * Otherwise we need to get the relation size; punt if not available.
@@ -6189,7 +6189,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 	 * If we had a relative estimate, use that.
 	 */
 	if (stadistinct < 0.0)
-		return clamp_row_est(-stadistinct * ntuples);
+		return clamp_tuple_est(-stadistinct * ntuples);
 
 	/*
 	 * With no data, estimate ndistinct = ntuples if the table is small, else
@@ -6197,7 +6197,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 	 * that the behavior isn't discontinuous.
 	 */
 	if (ntuples < DEFAULT_NUM_DISTINCT)
-		return clamp_row_est(ntuples);
+		return clamp_tuple_est(ntuples);
 
 	*isdefault = true;
 	return DEFAULT_NUM_DISTINCT;
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 546828b54bd..e1be9dc6175 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -90,6 +90,7 @@ extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);
+extern double clamp_tuple_est(double ntuples);
 extern int32 clamp_width_est(int64 tuple_width);
 extern long clamp_cardinality_to_long(Cardinality x);
 
-- 
2.34.1