v2-0001-Fix-ANALYZE-crash-on-extended-stats-with-virtual-gen.patch

application/octet-stream

Filename: v2-0001-Fix-ANALYZE-crash-on-extended-stats-with-virtual-gen.patch
Type: application/octet-stream
Part: 0
Message: Re: Infinite Autovacuum loop caused by failing virtual generated column expression
From 6a2705d2084abd9b6b304c460cf17a4c96c1fdad Mon Sep 17 00:00:00 2001
From: Satya Narlapuram <satyanarlapuram@gmail.com>
Date: Sun, 3 May 2026 17:37:24 +0000
Subject: [PATCH] Fix ANALYZE crash on extended stats with virtual generated
 columns

When a table has extended statistics involving expressions on virtual
generated columns, ANALYZE can fail if evaluating those expressions
raises an error (e.g. division by zero). This happens because the
generated column expression is evaluated directly during statistics
computation.

Fix by wrapping expression evaluation in PG_TRY/PG_CATCH blocks:

- In make_build_data() and compute_expr_stats(), add PG_TRY blocks
  that clean up executor resources (slot, estate) before re-throwing,
  preventing resource leaks on error.

- In BuildRelationExtStatistics(), catch errors from expression
  evaluation, issue a WARNING with the statistics object name and
  error details, then continue processing the remaining statistics
  objects.

This ensures ANALYZE completes successfully even when some extended
statistics objects cannot be computed due to expression evaluation
errors.
---
 src/backend/statistics/extended_stats.c | 238 +++++++++++++++---------
 src/test/regress/expected/stats_ext.out |  29 +++
 src/test/regress/sql/stats_ext.sql      |  20 ++
 3 files changed, 195 insertions(+), 92 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 2b83355d..88cea81b 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -194,41 +194,73 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 		if (stattarget == 0)
 			continue;
 
-		/* evaluate expressions (if the statistics object has any) */
-		data = make_build_data(onerel, stat, numrows, rows, stats, stattarget);
-
-		/* compute statistic of each requested type */
-		foreach(lc2, stat->types)
+		/*
+		 * Wrap expression evaluation and stats computation in PG_TRY so
+		 * that errors from evaluating expressions (e.g. division by zero
+		 * in virtual generated columns) don't cause ANALYZE to fail
+		 * entirely.  Skip the statistics object and issue a WARNING
+		 * instead.
+		 */
+		PG_TRY();
 		{
-			char		t = (char) lfirst_int(lc2);
-
-			if (t == STATS_EXT_NDISTINCT)
-				ndistinct = statext_ndistinct_build(totalrows, data);
-			else if (t == STATS_EXT_DEPENDENCIES)
-				dependencies = statext_dependencies_build(data);
-			else if (t == STATS_EXT_MCV)
-				mcv = statext_mcv_build(data, totalrows, stattarget);
-			else if (t == STATS_EXT_EXPRESSIONS)
+			/* evaluate expressions (if the statistics has any) */
+			data = make_build_data(onerel, stat, numrows, rows,
+								   stats, stattarget);
+
+			/* compute statistic of each requested type */
+			foreach(lc2, stat->types)
 			{
-				AnlExprData *exprdata;
-				int			nexprs;
+				char		t = (char) lfirst_int(lc2);
+
+				if (t == STATS_EXT_NDISTINCT)
+					ndistinct = statext_ndistinct_build(totalrows, data);
+				else if (t == STATS_EXT_DEPENDENCIES)
+					dependencies = statext_dependencies_build(data);
+				else if (t == STATS_EXT_MCV)
+					mcv = statext_mcv_build(data, totalrows, stattarget);
+				else if (t == STATS_EXT_EXPRESSIONS)
+				{
+					AnlExprData *exprdata;
+					int			nexprs;
 
-				/* should not happen, thanks to checks when defining stats */
-				if (!stat->exprs)
-					elog(ERROR, "requested expression stats, but there are no expressions");
+					/* should not happen, thanks to checks when defining stats */
+					if (!stat->exprs)
+						elog(ERROR, "requested expression stats, but there are no expressions");
 
-				exprdata = build_expr_data(stat->exprs, stattarget);
-				nexprs = list_length(stat->exprs);
+					exprdata = build_expr_data(stat->exprs, stattarget);
+					nexprs = list_length(stat->exprs);
 
-				compute_expr_stats(onerel, exprdata, nexprs, rows, numrows);
+					compute_expr_stats(onerel, exprdata, nexprs,
+									   rows, numrows);
 
-				exprstats = serialize_expr_stats(exprdata, nexprs);
+					exprstats = serialize_expr_stats(exprdata, nexprs);
+				}
 			}
-		}
 
-		/* store the statistics in the catalog */
-		statext_store(stat->statOid, inh,
-					  ndistinct, dependencies, mcv, exprstats, stats);
+			/* store the statistics in the catalog */
+			statext_store(stat->statOid, inh,
+						  ndistinct, dependencies, mcv, exprstats, stats);
+		}
+		PG_CATCH();
+		{
+			ErrorData  *edata;
+
+			/* Save the error, issue a WARNING and continue */
+			MemoryContextSwitchTo(cxt);
+			edata = CopyErrorData();
+			FlushErrorState();
+
+			ereport(WARNING,
+					(errcode(ERRCODE_WARNING),
+					 errmsg("skipping statistics object \"%s.%s\" for relation \"%s.%s\"",
+							stat->schema, stat->name,
+							get_namespace_name(onerel->rd_rel->relnamespace),
+							RelationGetRelationName(onerel)),
+					 errdetail("%s", edata->message)));
+
+			FreeErrorData(edata);
+		}
+		PG_END_TRY();
 
 		/* for reporting progress */
 		pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
@@ -2185,46 +2217,58 @@ compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
 		exprnulls = (bool *) palloc(numrows * sizeof(bool));
 
 		tcnt = 0;
-		for (i = 0; i < numrows; i++)
+		PG_TRY();
 		{
-			Datum		datum;
-			bool		isnull;
+			for (i = 0; i < numrows; i++)
+			{
+				Datum		datum;
+				bool		isnull;
 
-			/*
-			 * Reset the per-tuple context each time, to reclaim any cruft
-			 * left behind by evaluating the statistics expressions.
-			 */
-			ResetExprContext(econtext);
+				/*
+				 * Reset the per-tuple context each time, to reclaim any
+				 * cruft left behind by evaluating the statistics
+				 * expressions.
+				 */
+				ResetExprContext(econtext);
 
-			/* Set up for expression evaluation */
-			ExecStoreHeapTuple(rows[i], slot, false);
+				/* Set up for expression evaluation */
+				ExecStoreHeapTuple(rows[i], slot, false);
 
-			/*
-			 * Evaluate the expression. We do this in the per-tuple context so
-			 * as not to leak memory, and then copy the result into the
-			 * context created at the beginning of this function.
-			 */
-			datum = ExecEvalExprSwitchContext(exprstate,
-											  GetPerTupleExprContext(estate),
-											  &isnull);
-			if (isnull)
-			{
-				exprvals[tcnt] = (Datum) 0;
-				exprnulls[tcnt] = true;
-			}
-			else
-			{
-				/* Make sure we copy the data into the context. */
-				Assert(CurrentMemoryContext == expr_context);
+				/*
+				 * Evaluate the expression. We do this in the per-tuple
+				 * context so as not to leak memory, and then copy the
+				 * result into the context created at the beginning of
+				 * this function.
+				 */
+				datum = ExecEvalExprSwitchContext(exprstate,
+												  GetPerTupleExprContext(estate),
+												  &isnull);
+				if (isnull)
+				{
+					exprvals[tcnt] = (Datum) 0;
+					exprnulls[tcnt] = true;
+				}
+				else
+				{
+					/* Make sure we copy the data into the context. */
+					Assert(CurrentMemoryContext == expr_context);
 
-				exprvals[tcnt] = datumCopy(datum,
-										   stats->attrtype->typbyval,
-										   stats->attrtype->typlen);
-				exprnulls[tcnt] = false;
-			}
+					exprvals[tcnt] = datumCopy(datum,
+											   stats->attrtype->typbyval,
+											   stats->attrtype->typlen);
+					exprnulls[tcnt] = false;
+				}
 
-			tcnt++;
+				tcnt++;
+			}
+		}
+		PG_CATCH();
+		{
+			ExecDropSingleTupleTableSlot(slot);
+			FreeExecutorState(estate);
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
 
 		/*
 		 * Now we can compute the statistics for the expression columns.
@@ -2624,46 +2668,56 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
 	/* Set up expression evaluation state */
 	exprstates = ExecPrepareExprList(stat->exprs, estate);
 
-	for (i = 0; i < numrows; i++)
+	PG_TRY();
 	{
-		/*
-		 * Reset the per-tuple context each time, to reclaim any cruft left
-		 * behind by evaluating the statistics object expressions.
-		 */
-		ResetExprContext(econtext);
-
-		/* Set up for expression evaluation */
-		ExecStoreHeapTuple(rows[i], slot, false);
-
-		idx = bms_num_members(stat->columns);
-		foreach(lc, exprstates)
+		for (i = 0; i < numrows; i++)
 		{
-			Datum		datum;
-			bool		isnull;
-			ExprState  *exprstate = (ExprState *) lfirst(lc);
-
 			/*
-			 * XXX This probably leaks memory. Maybe we should use
-			 * ExecEvalExprSwitchContext but then we need to copy the result
-			 * somewhere else.
+			 * Reset the per-tuple context each time, to reclaim any cruft
+			 * left behind by evaluating the statistics object expressions.
 			 */
-			datum = ExecEvalExpr(exprstate,
-								 GetPerTupleExprContext(estate),
-								 &isnull);
-			if (isnull)
-			{
-				result->values[idx][i] = (Datum) 0;
-				result->nulls[idx][i] = true;
-			}
-			else
+			ResetExprContext(econtext);
+
+			/* Set up for expression evaluation */
+			ExecStoreHeapTuple(rows[i], slot, false);
+
+			idx = bms_num_members(stat->columns);
+			foreach(lc, exprstates)
 			{
-				result->values[idx][i] = datum;
-				result->nulls[idx][i] = false;
-			}
+				Datum		datum;
+				bool		isnull;
+				ExprState  *exprstate = (ExprState *) lfirst(lc);
 
-			idx++;
+				/*
+				 * XXX This probably leaks memory. Maybe we should use
+				 * ExecEvalExprSwitchContext but then we need to copy the
+				 * result somewhere else.
+				 */
+				datum = ExecEvalExpr(exprstate,
+									 GetPerTupleExprContext(estate),
+									 &isnull);
+				if (isnull)
+				{
+					result->values[idx][i] = (Datum) 0;
+					result->nulls[idx][i] = true;
+				}
+				else
+				{
+					result->values[idx][i] = datum;
+					result->nulls[idx][i] = false;
+				}
+
+				idx++;
+			}
 		}
 	}
+	PG_CATCH();
+	{
+		ExecDropSingleTupleTableSlot(slot);
+		FreeExecutorState(estate);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
 
 	ExecDropSingleTupleTableSlot(slot);
 	FreeExecutorState(estate);
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 37070c1a..43892f4f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3207,6 +3207,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE w = 0'
 (1 row)
 
 DROP TABLE virtual_gen_stats;
+-- extended statistics on virtual generated columns whose expressions can error
+CREATE TABLE virtual_gen_err (a int, b int GENERATED ALWAYS AS (a / 0) VIRTUAL);
+INSERT INTO virtual_gen_err VALUES (1), (2), (3);
+CREATE STATISTICS virtual_gen_err_s ON a, b FROM virtual_gen_err;
+ANALYZE virtual_gen_err;  -- should warn, not fail
+WARNING:  skipping statistics object "public.virtual_gen_err_s" for relation "public.virtual_gen_err"
+DETAIL:  division by zero
+DROP TABLE virtual_gen_err;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
@@ -3709,3 +3717,24 @@ SELECT range_length_histogram, range_empty_frac, range_bounds_histogram
 (1 row)
 
 DROP TABLE stats_ext_tbl_range;
+-- Expression statistics with errors (not involving virtual generated columns).
+-- Errors during expression evaluation should produce a WARNING, not a failure,
+-- and non-erroring statistics objects should still be computed.
+CREATE TABLE expr_err (a int);
+INSERT INTO expr_err VALUES (1), (2), (3);
+CREATE STATISTICS expr_err_s1 ON ((a/0)) FROM expr_err;
+CREATE STATISTICS expr_err_s2 ON (a/0),(a+1) FROM expr_err;
+CREATE STATISTICS expr_err_s3 ON ((a+1)) FROM expr_err;
+ANALYZE expr_err;  -- should warn, not fail
+WARNING:  skipping statistics object "public.expr_err_s1" for relation "public.expr_err"
+DETAIL:  division by zero
+WARNING:  skipping statistics object "public.expr_err_s2" for relation "public.expr_err"
+DETAIL:  division by zero
+SELECT statistics_name from pg_stats_ext x
+    WHERE tablename = 'expr_err' ORDER BY ROW(x.*);
+ statistics_name 
+-----------------
+ expr_err_s3
+(1 row)
+
+DROP TABLE expr_err;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 3cc6012b..f78fd902 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1608,6 +1608,13 @@ SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE w = 0'
 
 DROP TABLE virtual_gen_stats;
 
+-- extended statistics on virtual generated columns whose expressions can error
+CREATE TABLE virtual_gen_err (a int, b int GENERATED ALWAYS AS (a / 0) VIRTUAL);
+INSERT INTO virtual_gen_err VALUES (1), (2), (3);
+CREATE STATISTICS virtual_gen_err_s ON a, b FROM virtual_gen_err;
+ANALYZE virtual_gen_err;  -- should warn, not fail
+DROP TABLE virtual_gen_err;
+
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
@@ -1908,3 +1915,16 @@ SELECT range_length_histogram, range_empty_frac, range_bounds_histogram
    FROM pg_stats_ext_exprs
    WHERE statistics_name = 'stats_ext_range';
 DROP TABLE stats_ext_tbl_range;
+
+-- Expression statistics with errors (not involving virtual generated columns).
+-- Errors during expression evaluation should produce a WARNING, not a failure,
+-- and non-erroring statistics objects should still be computed.
+CREATE TABLE expr_err (a int);
+INSERT INTO expr_err VALUES (1), (2), (3);
+CREATE STATISTICS expr_err_s1 ON ((a/0)) FROM expr_err;
+CREATE STATISTICS expr_err_s2 ON (a/0),(a+1) FROM expr_err;
+CREATE STATISTICS expr_err_s3 ON ((a+1)) FROM expr_err;
+ANALYZE expr_err;  -- should warn, not fail
+SELECT statistics_name from pg_stats_ext x
+    WHERE tablename = 'expr_err' ORDER BY ROW(x.*);
+DROP TABLE expr_err;
-- 
2.43.0