vac-analyze-reltuple-changes.patch

text/x-patch

Filename: vac-analyze-reltuple-changes.patch
Type: text/x-patch
Part: 0
Message: Re: [ADMIN] pg_class reltuples/relpages not updated by autovacuum/vacuum

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: unified
File+
src/backend/commands/analyze.c 0 0
src/backend/commands/vacuum.c 0 0
src/backend/commands/vacuumlazy.c 0 0
src/backend/postmaster/pgstat.c 0 0
src/include/commands/vacuum.h 0 0
src/include/pgstat.h 0 0
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 0568a1bcf86281a9b1086d343e7027557295065c..fa84989fc6fa8be90d4eecb9c33e94a232d79880 100644
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
*************** static MemoryContext anl_context = NULL;
*** 84,91 ****
  static BufferAccessStrategy vac_strategy;
  
  
! static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
! 			   bool update_reltuples, bool inh);
  static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
  				  int samplesize);
  static bool BlockSampler_HasMore(BlockSampler bs);
--- 84,90 ----
  static BufferAccessStrategy vac_strategy;
  
  
! static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh);
  static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
  				  int samplesize);
  static bool BlockSampler_HasMore(BlockSampler bs);
*************** static bool std_typanalyze(VacAttrStats 
*** 115,132 ****
  
  /*
   *	analyze_rel() -- analyze one relation
-  *
-  * If update_reltuples is true, we update reltuples and relpages columns
-  * in pg_class.  Caller should pass false if we're part of VACUUM ANALYZE,
-  * and the VACUUM didn't skip any pages.  We only have an approximate count,
-  * so we don't want to overwrite the accurate values already inserted by the
-  * VACUUM in that case.  VACUUM always scans all indexes, however, so the
-  * pg_class entries for indexes are never updated if we're part of VACUUM
-  * ANALYZE.
   */
  void
! analyze_rel(Oid relid, VacuumStmt *vacstmt,
! 			BufferAccessStrategy bstrategy, bool update_reltuples)
  {
  	Relation	onerel;
  
--- 114,122 ----
  
  /*
   *	analyze_rel() -- analyze one relation
   */
  void
! analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
  {
  	Relation	onerel;
  
*************** analyze_rel(Oid relid, VacuumStmt *vacst
*** 238,250 ****
  	/*
  	 * Do the normal non-recursive ANALYZE.
  	 */
! 	do_analyze_rel(onerel, vacstmt, update_reltuples, false);
  
  	/*
  	 * If there are child tables, do recursive ANALYZE.
  	 */
  	if (onerel->rd_rel->relhassubclass)
! 		do_analyze_rel(onerel, vacstmt, false, true);
  
  	/*
  	 * Close source relation now, but keep lock so that no one deletes it
--- 228,240 ----
  	/*
  	 * Do the normal non-recursive ANALYZE.
  	 */
! 	do_analyze_rel(onerel, vacstmt, false);
  
  	/*
  	 * If there are child tables, do recursive ANALYZE.
  	 */
  	if (onerel->rd_rel->relhassubclass)
! 		do_analyze_rel(onerel, vacstmt, true);
  
  	/*
  	 * Close source relation now, but keep lock so that no one deletes it
*************** analyze_rel(Oid relid, VacuumStmt *vacst
*** 267,274 ****
   *	do_analyze_rel() -- analyze one relation, recursively or not
   */
  static void
! do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
! 			   bool update_reltuples, bool inh)
  {
  	int			attr_cnt,
  				tcnt,
--- 257,263 ----
   *	do_analyze_rel() -- analyze one relation, recursively or not
   */
  static void
! do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
  {
  	int			attr_cnt,
  				tcnt,
*************** do_analyze_rel(Relation onerel, VacuumSt
*** 437,445 ****
  	}
  
  	/*
! 	 * Quit if no analyzable columns and no pg_class update needed.
  	 */
! 	if (attr_cnt <= 0 && !analyzableindex && !update_reltuples)
  		goto cleanup;
  
  	/*
--- 426,434 ----
  	}
  
  	/*
! 	 * Quit if no analyzable columns.
  	 */
! 	if (attr_cnt <= 0 && !analyzableindex)
  		goto cleanup;
  
  	/*
*************** do_analyze_rel(Relation onerel, VacuumSt
*** 549,558 ****
  	}
  
  	/*
! 	 * Update pages/tuples stats in pg_class, but not if we're inside a VACUUM
! 	 * that got a more precise number.
  	 */
! 	if (update_reltuples)
  		vac_update_relstats(onerel,
  							RelationGetNumberOfBlocks(onerel),
  							totalrows, hasindex, InvalidTransactionId);
--- 538,547 ----
  	}
  
  	/*
! 	 * Update pages/tuples stats in pg_class ... but not if we're doing
! 	 * inherited stats.
  	 */
! 	if (!inh)
  		vac_update_relstats(onerel,
  							RelationGetNumberOfBlocks(onerel),
  							totalrows, hasindex, InvalidTransactionId);
*************** do_analyze_rel(Relation onerel, VacuumSt
*** 562,568 ****
  	 * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
  	 * VACUUM.
  	 */
! 	if (!(vacstmt->options & VACOPT_VACUUM))
  	{
  		for (ind = 0; ind < nindexes; ind++)
  		{
--- 551,557 ----
  	 * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
  	 * VACUUM.
  	 */
! 	if (!inh && !(vacstmt->options & VACOPT_VACUUM))
  	{
  		for (ind = 0; ind < nindexes; ind++)
  		{
*************** do_analyze_rel(Relation onerel, VacuumSt
*** 577,589 ****
  	}
  
  	/*
! 	 * Report ANALYZE to the stats collector, too; likewise, tell it to adopt
! 	 * these numbers only if we're not inside a VACUUM that got a better
! 	 * number.	However, a call with inh = true shouldn't reset the stats.
  	 */
  	if (!inh)
! 		pgstat_report_analyze(onerel, update_reltuples,
! 							  totalrows, totaldeadrows);
  
  	/* We skip to here if there were no analyzable columns */
  cleanup:
--- 566,577 ----
  	}
  
  	/*
! 	 * Report ANALYZE to the stats collector, too.  However, if doing
! 	 * inherited stats we shouldn't report, because the stats collector only
! 	 * tracks per-table stats.
  	 */
  	if (!inh)
! 		pgstat_report_analyze(onerel, totalrows, totaldeadrows);
  
  	/* We skip to here if there were no analyzable columns */
  cleanup:
*************** acquire_sample_rows(Relation onerel, Hea
*** 1243,1260 ****
  		qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
  	/*
! 	 * Estimate total numbers of rows in relation.
  	 */
  	if (bs.m > 0)
! 	{
! 		*totalrows = floor((liverows * totalblocks) / bs.m + 0.5);
! 		*totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);
! 	}
  	else
- 	{
- 		*totalrows = 0.0;
  		*totaldeadrows = 0.0;
- 	}
  
  	/*
  	 * Emit some interesting relation info
--- 1231,1249 ----
  		qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
  	/*
! 	 * Estimate total numbers of rows in relation.  For live rows, use
! 	 * vac_estimate_reltuples; for dead rows, we have no source of old
! 	 * information, so we have to assume the density is the same in unseen
! 	 * pages as in the pages we scanned.
  	 */
+ 	*totalrows = vac_estimate_reltuples(onerel, true,
+ 										totalblocks,
+ 										bs.m,
+ 										liverows);
  	if (bs.m > 0)
! 		*totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
  	else
  		*totaldeadrows = 0.0;
  
  	/*
  	 * Emit some interesting relation info
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 9606569617afafe16c55752183cb2a6de89bcad1..efae31acab7de0f6d4cbe45fb175a301486ce582 100644
*** a/src/backend/commands/vacuum.c
--- b/src/backend/commands/vacuum.c
***************
*** 20,25 ****
--- 20,27 ----
   */
  #include "postgres.h"
  
+ #include <math.h>
+ 
  #include "access/clog.h"
  #include "access/genam.h"
  #include "access/heapam.h"
*************** static BufferAccessStrategy vac_strategy
*** 62,68 ****
  static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  static void vac_truncate_clog(TransactionId frozenXID);
  static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
! 		   bool for_wraparound, bool *scanned_all);
  
  
  /*
--- 64,70 ----
  static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  static void vac_truncate_clog(TransactionId frozenXID);
  static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
! 		   bool for_wraparound);
  
  
  /*
*************** vacuum(VacuumStmt *vacstmt, Oid relid, b
*** 219,230 ****
  		foreach(cur, relations)
  		{
  			Oid			relid = lfirst_oid(cur);
- 			bool		scanned_all = false;
  
  			if (vacstmt->options & VACOPT_VACUUM)
  			{
! 				if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
! 								&scanned_all))
  					continue;
  			}
  
--- 221,230 ----
  		foreach(cur, relations)
  		{
  			Oid			relid = lfirst_oid(cur);
  
  			if (vacstmt->options & VACOPT_VACUUM)
  			{
! 				if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
  					continue;
  			}
  
*************** vacuum(VacuumStmt *vacstmt, Oid relid, b
*** 241,247 ****
  					PushActiveSnapshot(GetTransactionSnapshot());
  				}
  
! 				analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
  
  				if (use_own_xacts)
  				{
--- 241,247 ----
  					PushActiveSnapshot(GetTransactionSnapshot());
  				}
  
! 				analyze_rel(relid, vacstmt, vac_strategy);
  
  				if (use_own_xacts)
  				{
*************** vacuum_set_xid_limits(int freeze_min_age
*** 454,459 ****
--- 454,537 ----
  
  
  /*
+  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+  *
+  *		If we scanned the whole relation then we should just use the count of
+  *		live tuples seen; but if we did not, we should not trust the count
+  *		unreservedly, especially not in VACUUM, which may have scanned a quite
+  *		nonrandom subset of the table.  When we have only partial information,
+  *		we take the old value of pg_class.reltuples as a measurement of the
+  *		tuple density in the unscanned pages.
+  *
+  *		This routine is shared by VACUUM and ANALYZE.
+  */
+ double
+ vac_estimate_reltuples(Relation relation, bool is_analyze,
+ 					   BlockNumber total_pages,
+ 					   BlockNumber scanned_pages,
+ 					   double scanned_tuples)
+ {
+ 	BlockNumber	old_rel_pages = relation->rd_rel->relpages;
+ 	double		old_rel_tuples = relation->rd_rel->reltuples;
+ 	double		old_density;
+ 	double		new_density;
+ 	double		multiplier;
+ 	double		updated_density;
+ 
+ 	/* If we did scan the whole table, just use the count as-is */
+ 	if (scanned_pages >= total_pages)
+ 		return scanned_tuples;
+ 
+ 	/*
+ 	 * If scanned_pages is zero but total_pages isn't, keep the existing
+ 	 * value of reltuples.
+ 	 */
+ 	if (scanned_pages == 0)
+ 		return old_rel_tuples;
+ 
+ 	/*
+ 	 * If old value of relpages is zero, old density is indeterminate; we
+ 	 * can't do much except scale up scanned_tuples to match total_pages.
+ 	 */
+ 	if (old_rel_pages == 0)
+ 		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+ 
+ 	/*
+ 	 * Okay, we've covered the corner cases.  The normal calculation is to
+ 	 * convert the old measurement to a density (tuples per page), then
+ 	 * update the density using an exponential-moving-average approach,
+ 	 * and finally compute reltuples as updated_density * total_pages.
+ 	 *
+ 	 * For ANALYZE, the moving average multiplier is just the fraction of
+ 	 * the table's pages we scanned.  This is equivalent to assuming
+ 	 * that the tuple density in the unscanned pages didn't change.  Of
+ 	 * course, it probably did, if the new density measurement is different.
+ 	 * But over repeated cycles, the value of reltuples will converge towards
+ 	 * the correct value, if repeated measurements show the same new density.
+ 	 *
+ 	 * For VACUUM, the situation is a bit different: we have looked at a
+ 	 * nonrandom sample of pages, but we know for certain that the pages we
+ 	 * didn't look at are precisely the ones that haven't changed lately.
+ 	 * Thus, there is a reasonable argument for doing exactly the same thing
+ 	 * as for the ANALYZE case, that is use the old density measurement as
+ 	 * the value for the unscanned pages.
+ 	 *
+ 	 * This logic could probably use further refinement.
+ 	 */
+ 	old_density = old_rel_tuples / old_rel_pages;
+ 	new_density = scanned_tuples / scanned_pages;
+ 	multiplier = (double) scanned_pages / (double) total_pages;
+ #ifdef NOT_USED
+ 	/* this makes things less accurate, not more so :-( */
+ 	if (!is_analyze && multiplier < 0.5)
+ 		multiplier *= 0.25;		/* apply fudge factor */
+ #endif
+ 	updated_density = old_density + (new_density - old_density) * multiplier;
+ 	return floor(updated_density * total_pages + 0.5);
+ }
+ 
+ 
+ /*
   *	vac_update_relstats() -- update statistics for one relation
   *
   *		Update the whole-relation statistics that are kept in its pg_class
*************** vacuum_set_xid_limits(int freeze_min_age
*** 480,486 ****
   *		somebody vacuuming pg_class might think they could delete a tuple
   *		marked with xmin = our xid.
   *
!  *		This routine is shared by VACUUM and stand-alone ANALYZE.
   */
  void
  vac_update_relstats(Relation relation,
--- 558,564 ----
   *		somebody vacuuming pg_class might think they could delete a tuple
   *		marked with xmin = our xid.
   *
!  *		This routine is shared by VACUUM and ANALYZE.
   */
  void
  vac_update_relstats(Relation relation,
*************** vac_truncate_clog(TransactionId frozenXI
*** 758,771 ****
   *		many small transactions.  Otherwise, two-phase locking would require
   *		us to lock the entire database during one pass of the vacuum cleaner.
   *
-  *		We'll return true in *scanned_all if the vacuum scanned all heap
-  *		pages, and updated pg_class.
-  *
   *		At entry and exit, we are not inside a transaction.
   */
  static bool
! vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
! 		   bool *scanned_all)
  {
  	LOCKMODE	lmode;
  	Relation	onerel;
--- 836,845 ----
   *		many small transactions.  Otherwise, two-phase locking would require
   *		us to lock the entire database during one pass of the vacuum cleaner.
   *
   *		At entry and exit, we are not inside a transaction.
   */
  static bool
! vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
  {
  	LOCKMODE	lmode;
  	Relation	onerel;
*************** vacuum_rel(Oid relid, VacuumStmt *vacstm
*** 775,783 ****
  	int			save_sec_context;
  	int			save_nestlevel;
  
- 	if (scanned_all)
- 		*scanned_all = false;
- 
  	/* Begin a transaction for vacuuming this relation */
  	StartTransactionCommand();
  
--- 849,854 ----
*************** vacuum_rel(Oid relid, VacuumStmt *vacstm
*** 971,977 ****
  					vacstmt->freeze_min_age, vacstmt->freeze_table_age);
  	}
  	else
! 		lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
  
  	/* Roll back any GUC changes executed by index functions */
  	AtEOXact_GUC(false, save_nestlevel);
--- 1042,1048 ----
  					vacstmt->freeze_min_age, vacstmt->freeze_table_age);
  	}
  	else
! 		lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
  
  	/* Roll back any GUC changes executed by index functions */
  	AtEOXact_GUC(false, save_nestlevel);
*************** vacuum_rel(Oid relid, VacuumStmt *vacstm
*** 997,1003 ****
  	 * totally unimportant for toast relations.
  	 */
  	if (toast_relid != InvalidOid)
! 		vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
  
  	/*
  	 * Now release the session-level lock on the master table.
--- 1068,1074 ----
  	 * totally unimportant for toast relations.
  	 */
  	if (toast_relid != InvalidOid)
! 		vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
  
  	/*
  	 * Now release the session-level lock on the master table.
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 9393fa0727aaad7508e1163623322b4066412257..570f1032f3d4eba46ed54de3f6cd39d62e6d09f0 100644
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 77,93 ****
   * Before we consider skipping a page that's marked as clean in
   * visibility map, we must've seen at least this many clean pages.
   */
! #define SKIP_PAGES_THRESHOLD	32
  
  typedef struct LVRelStats
  {
  	/* hasindex = true means two-pass strategy; false means one-pass */
  	bool		hasindex;
- 	bool		scanned_all;	/* have we scanned all pages (this far)? */
  	/* Overall statistics about rel */
! 	BlockNumber rel_pages;
  	double		old_rel_tuples; /* previous value of pg_class.reltuples */
! 	double		rel_tuples;		/* counts only tuples on scanned pages */
  	BlockNumber pages_removed;
  	double		tuples_deleted;
  	BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
--- 77,94 ----
   * Before we consider skipping a page that's marked as clean in
   * visibility map, we must've seen at least this many clean pages.
   */
! #define SKIP_PAGES_THRESHOLD	((BlockNumber) 32)
  
  typedef struct LVRelStats
  {
  	/* hasindex = true means two-pass strategy; false means one-pass */
  	bool		hasindex;
  	/* Overall statistics about rel */
! 	BlockNumber rel_pages;		/* total number of pages */
! 	BlockNumber scanned_pages;	/* number of pages we examined */
! 	double		scanned_tuples;	/* counts only tuples on scanned pages */
  	double		old_rel_tuples; /* previous value of pg_class.reltuples */
! 	double		new_rel_tuples; /* new estimated total # of tuples */
  	BlockNumber pages_removed;
  	double		tuples_deleted;
  	BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
*************** static int	vac_cmp_itemptr(const void *l
*** 143,149 ****
   */
  void
  lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
! 				BufferAccessStrategy bstrategy, bool *scanned_all)
  {
  	LVRelStats *vacrelstats;
  	Relation   *Irel;
--- 144,150 ----
   */
  void
  lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
! 				BufferAccessStrategy bstrategy)
  {
  	LVRelStats *vacrelstats;
  	Relation   *Irel;
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 175,181 ****
  
  	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
  
- 	vacrelstats->scanned_all = true;	/* will be cleared if we skip a page */
  	vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
  	vacrelstats->num_index_scans = 0;
  
--- 176,181 ----
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 205,228 ****
  	FreeSpaceMapVacuum(onerel);
  
  	/*
! 	 * Update statistics in pg_class.  But only if we didn't skip any pages;
! 	 * the tuple count only includes tuples from the pages we've visited, and
! 	 * we haven't frozen tuples in unvisited pages either.  The page count is
! 	 * accurate in any case, but because we use the reltuples / relpages ratio
! 	 * in the planner, it's better to not update relpages either if we can't
! 	 * update reltuples.
  	 */
! 	if (vacrelstats->scanned_all)
! 		vac_update_relstats(onerel,
! 							vacrelstats->rel_pages, vacrelstats->rel_tuples,
! 							vacrelstats->hasindex,
! 							FreezeLimit);
  
  	/* report results to the stats collector, too */
  	pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
! 						 vacrelstats->scanned_all,
! 						 vacrelstats->rel_tuples);
  
  	/* and log the action if appropriate */
  	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
--- 205,224 ----
  	FreeSpaceMapVacuum(onerel);
  
  	/*
! 	 * Update statistics in pg_class.  But don't change relfrozenxid if we
! 	 * skipped any pages.
  	 */
! 	vac_update_relstats(onerel,
! 						vacrelstats->rel_pages, vacrelstats->new_rel_tuples,
! 						vacrelstats->hasindex,
! 						(vacrelstats->scanned_pages < vacrelstats->rel_pages) ?
! 						InvalidTransactionId :
! 						FreezeLimit);
  
  	/* report results to the stats collector, too */
  	pgstat_report_vacuum(RelationGetRelid(onerel),
  						 onerel->rd_rel->relisshared,
! 						 vacrelstats->new_rel_tuples);
  
  	/* and log the action if appropriate */
  	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
*************** lazy_vacuum_rel(Relation onerel, VacuumS
*** 239,251 ****
  							get_namespace_name(RelationGetNamespace(onerel)),
  							RelationGetRelationName(onerel),
  							vacrelstats->num_index_scans,
! 						  vacrelstats->pages_removed, vacrelstats->rel_pages,
! 						vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
  							pg_rusage_show(&ru0))));
  	}
- 
- 	if (scanned_all)
- 		*scanned_all = vacrelstats->scanned_all;
  }
  
  /*
--- 235,246 ----
  							get_namespace_name(RelationGetNamespace(onerel)),
  							RelationGetRelationName(onerel),
  							vacrelstats->num_index_scans,
! 							vacrelstats->pages_removed,
! 							vacrelstats->rel_pages,
! 							vacrelstats->tuples_deleted,
! 							vacrelstats->new_rel_tuples,
  							pg_rusage_show(&ru0))));
  	}
  }
  
  /*
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 301,307 ****
  	HeapTupleData tuple;
  	char	   *relname;
  	BlockNumber empty_pages,
- 				scanned_pages,
  				vacuumed_pages;
  	double		num_tuples,
  				tups_vacuumed,
--- 296,301 ----
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 321,327 ****
  					get_namespace_name(RelationGetNamespace(onerel)),
  					relname)));
  
! 	empty_pages = vacuumed_pages = scanned_pages = 0;
  	num_tuples = tups_vacuumed = nkeep = nunused = 0;
  
  	indstats = (IndexBulkDeleteResult **)
--- 315,321 ----
  					get_namespace_name(RelationGetNamespace(onerel)),
  					relname)));
  
! 	empty_pages = vacuumed_pages = 0;
  	num_tuples = tups_vacuumed = nkeep = nunused = 0;
  
  	indstats = (IndexBulkDeleteResult **)
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 329,340 ****
--- 323,363 ----
  
  	nblocks = RelationGetNumberOfBlocks(onerel);
  	vacrelstats->rel_pages = nblocks;
+ 	vacrelstats->scanned_pages = 0;
  	vacrelstats->nonempty_pages = 0;
  	vacrelstats->latestRemovedXid = InvalidTransactionId;
  
  	lazy_space_alloc(vacrelstats, nblocks);
  
+ 	/*
+ 	 * We want to skip pages that don't require vacuuming according to the
+ 	 * visibility map, but only if we've seen a streak of at least
+ 	 * SKIP_PAGES_THRESHOLD pages marked as clean.  Since we're reading
+ 	 * sequentially, the OS should be doing readahead for us and there's no
+ 	 * gain in skipping a page now and then.  You need a longer run of
+ 	 * consecutive skipped pages before it's worthwhile.  Also, skipping even
+ 	 * a single page means that we can't update relfrozenxid, so we only want
+ 	 * to do it if there's a good chance to skip a goodly number of pages.
+ 	 *
+ 	 * Before entering the main loop, look ahead to see if there are at least
+ 	 * SKIP_PAGES_THRESHOLD clean pages at the start of the relation; if so,
+ 	 * we are going to skip at least one page, and we might as well skip the
+ 	 * initial pages too.  This extra logic prevents over-weighting these
+ 	 * pages when we estimate the new tuple density in vac_estimate_reltuples.
+ 	 */
  	all_visible_streak = 0;
+ 	if (!scan_all && nblocks >= SKIP_PAGES_THRESHOLD)
+ 	{
+ 		for (blkno = 0; blkno < SKIP_PAGES_THRESHOLD; blkno++)
+ 		{
+ 			if (!visibilitymap_test(onerel, blkno, &vmbuffer))
+ 				break;
+ 			all_visible_streak++;
+ 		}
+ 		if (all_visible_streak < SKIP_PAGES_THRESHOLD)
+ 			all_visible_streak = 0;
+ 	}
+ 
  	for (blkno = 0; blkno < nblocks; blkno++)
  	{
  		Buffer		buf;
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 351,379 ****
  		bool		all_visible;
  		bool		has_dead_tuples;
  
- 		/*
- 		 * Skip pages that don't require vacuuming according to the visibility
- 		 * map. But only if we've seen a streak of at least
- 		 * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
- 		 * sequentially, the OS should be doing readahead for us and there's
- 		 * no gain in skipping a page now and then. You need a longer run of
- 		 * consecutive skipped pages before it's worthwhile. Also, skipping
- 		 * even a single page means that we can't update relfrozenxid or
- 		 * reltuples, so we only want to do it if there's a good chance to
- 		 * skip a goodly number of pages.
- 		 */
  		if (!scan_all)
  		{
  			all_visible_according_to_vm =
  				visibilitymap_test(onerel, blkno, &vmbuffer);
  			if (all_visible_according_to_vm)
  			{
  				all_visible_streak++;
  				if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
- 				{
- 					vacrelstats->scanned_all = false;
  					continue;
- 				}
  			}
  			else
  				all_visible_streak = 0;
--- 374,392 ----
  		bool		all_visible;
  		bool		has_dead_tuples;
  
  		if (!scan_all)
  		{
+ 			/*
+ 			 * Skip pages that don't require vacuuming according to the
+ 			 * visibility map, as per comment above.
+ 			 */
  			all_visible_according_to_vm =
  				visibilitymap_test(onerel, blkno, &vmbuffer);
  			if (all_visible_according_to_vm)
  			{
  				all_visible_streak++;
  				if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
  					continue;
  			}
  			else
  				all_visible_streak = 0;
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 381,387 ****
  
  		vacuum_delay_point();
  
! 		scanned_pages++;
  
  		/*
  		 * If we are close to overrunning the available space for dead-tuple
--- 394,400 ----
  
  		vacuum_delay_point();
  
! 		vacrelstats->scanned_pages++;
  
  		/*
  		 * If we are close to overrunning the available space for dead-tuple
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 764,772 ****
  	}
  
  	/* save stats for use later */
! 	vacrelstats->rel_tuples = num_tuples;
  	vacrelstats->tuples_deleted = tups_vacuumed;
  
  	/* If any tuples need to be deleted, perform final vacuum cycle */
  	/* XXX put a threshold on min number of tuples here? */
  	if (vacrelstats->num_dead_tuples > 0)
--- 777,791 ----
  	}
  
  	/* save stats for use later */
! 	vacrelstats->scanned_tuples = num_tuples;
  	vacrelstats->tuples_deleted = tups_vacuumed;
  
+ 	/* now we can compute the new value for pg_class.reltuples */
+ 	vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
+ 														 nblocks,
+ 														 vacrelstats->scanned_pages,
+ 														 num_tuples);
+ 
  	/* If any tuples need to be deleted, perform final vacuum cycle */
  	/* XXX put a threshold on min number of tuples here? */
  	if (vacrelstats->num_dead_tuples > 0)
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 805,811 ****
  	ereport(elevel,
  			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
  					RelationGetRelationName(onerel),
! 					tups_vacuumed, num_tuples, scanned_pages, nblocks),
  			 errdetail("%.0f dead row versions cannot be removed yet.\n"
  					   "There were %.0f unused item pointers.\n"
  					   "%u pages are entirely empty.\n"
--- 824,831 ----
  	ereport(elevel,
  			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
  					RelationGetRelationName(onerel),
! 					tups_vacuumed, num_tuples,
! 					vacrelstats->scanned_pages, nblocks),
  			 errdetail("%.0f dead row versions cannot be removed yet.\n"
  					   "There were %.0f unused item pointers.\n"
  					   "%u pages are entirely empty.\n"
*************** lazy_cleanup_index(Relation indrel,
*** 977,986 ****
  
  	ivinfo.index = indrel;
  	ivinfo.analyze_only = false;
! 	ivinfo.estimated_count = !vacrelstats->scanned_all;
  	ivinfo.message_level = elevel;
! 	/* use rel_tuples only if we scanned all pages, else fall back */
! 	ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
  	ivinfo.strategy = vac_strategy;
  
  	stats = index_vacuum_cleanup(&ivinfo, stats);
--- 997,1005 ----
  
  	ivinfo.index = indrel;
  	ivinfo.analyze_only = false;
! 	ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
  	ivinfo.message_level = elevel;
! 	ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
  	ivinfo.strategy = vac_strategy;
  
  	stats = index_vacuum_cleanup(&ivinfo, stats);
*************** lazy_truncate_heap(Relation onerel, LVRe
*** 1041,1048 ****
  	new_rel_pages = RelationGetNumberOfBlocks(onerel);
  	if (new_rel_pages != old_rel_pages)
  	{
! 		/* might as well use the latest news when we update pg_class stats */
! 		vacrelstats->rel_pages = new_rel_pages;
  		UnlockRelation(onerel, AccessExclusiveLock);
  		return;
  	}
--- 1060,1072 ----
  	new_rel_pages = RelationGetNumberOfBlocks(onerel);
  	if (new_rel_pages != old_rel_pages)
  	{
! 		/*
! 		 * Note: we intentionally don't update vacrelstats->rel_pages with
! 		 * the new rel size here.  If we did, it would amount to assuming that
! 		 * the new pages are empty, which is unlikely.  Leaving the numbers
! 		 * alone amounts to assuming that the new pages have the same tuple
! 		 * density as existing ones, which is less unlikely.
! 		 */
  		UnlockRelation(onerel, AccessExclusiveLock);
  		return;
  	}
*************** lazy_truncate_heap(Relation onerel, LVRe
*** 1076,1082 ****
  	 */
  	UnlockRelation(onerel, AccessExclusiveLock);
  
! 	/* update statistics */
  	vacrelstats->rel_pages = new_rel_pages;
  	vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
  
--- 1100,1110 ----
  	 */
  	UnlockRelation(onerel, AccessExclusiveLock);
  
! 	/*
! 	 * Update statistics.  Here, it *is* correct to adjust rel_pages without
! 	 * also touching reltuples, since the tuple count wasn't changed by the
! 	 * truncation.
! 	 */
  	vacrelstats->rel_pages = new_rel_pages;
  	vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
  
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 5ed6e8337c11fdd599a0deb219e1cb0285228a88..1d80c311d879d9cf9009621860cda3ab19c6dea9 100644
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
*************** pgstat_report_autovac(Oid dboid)
*** 1246,1253 ****
   * ---------
   */
  void
! pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
! 					 PgStat_Counter tuples)
  {
  	PgStat_MsgVacuum msg;
  
--- 1246,1252 ----
   * ---------
   */
  void
! pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter tuples)
  {
  	PgStat_MsgVacuum msg;
  
*************** pgstat_report_vacuum(Oid tableoid, bool 
*** 1257,1263 ****
  	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
  	msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
  	msg.m_tableoid = tableoid;
- 	msg.m_adopt_counts = adopt_counts;
  	msg.m_autovacuum = IsAutoVacuumWorkerProcess();
  	msg.m_vacuumtime = GetCurrentTimestamp();
  	msg.m_tuples = tuples;
--- 1256,1261 ----
*************** pgstat_report_vacuum(Oid tableoid, bool 
*** 1271,1277 ****
   * --------
   */
  void
! pgstat_report_analyze(Relation rel, bool adopt_counts,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples)
  {
  	PgStat_MsgAnalyze msg;
--- 1269,1275 ----
   * --------
   */
  void
! pgstat_report_analyze(Relation rel,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples)
  {
  	PgStat_MsgAnalyze msg;
*************** pgstat_report_analyze(Relation rel, bool
*** 1308,1314 ****
  	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
  	msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
  	msg.m_tableoid = RelationGetRelid(rel);
- 	msg.m_adopt_counts = adopt_counts;
  	msg.m_autovacuum = IsAutoVacuumWorkerProcess();
  	msg.m_analyzetime = GetCurrentTimestamp();
  	msg.m_live_tuples = livetuples;
--- 1306,1311 ----
*************** pgstat_recv_vacuum(PgStat_MsgVacuum *msg
*** 4197,4204 ****
  
  	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
! 	if (msg->m_adopt_counts)
! 		tabentry->n_live_tuples = msg->m_tuples;
  	/* Resetting dead_tuples to 0 is an approximation ... */
  	tabentry->n_dead_tuples = 0;
  
--- 4194,4200 ----
  
  	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
! 	tabentry->n_live_tuples = msg->m_tuples;
  	/* Resetting dead_tuples to 0 is an approximation ... */
  	tabentry->n_dead_tuples = 0;
  
*************** pgstat_recv_analyze(PgStat_MsgAnalyze *m
*** 4233,4243 ****
  
  	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
! 	if (msg->m_adopt_counts)
! 	{
! 		tabentry->n_live_tuples = msg->m_live_tuples;
! 		tabentry->n_dead_tuples = msg->m_dead_tuples;
! 	}
  
  	/*
  	 * We reset changes_since_analyze to zero, forgetting any changes that
--- 4229,4236 ----
  
  	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
! 	tabentry->n_live_tuples = msg->m_live_tuples;
! 	tabentry->n_dead_tuples = msg->m_dead_tuples;
  
  	/*
  	 * We reset changes_since_analyze to zero, forgetting any changes that
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 79c9f5d90fb674ca8c778a65ca540ef62bded0af..cfbe0c43924029843f636845d87a09996d706af3 100644
*** a/src/include/commands/vacuum.h
--- b/src/include/commands/vacuum.h
*************** extern void vacuum(VacuumStmt *vacstmt, 
*** 142,147 ****
--- 142,151 ----
  extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
  				 int *nindexes, Relation **Irel);
  extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
+ extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
+ 					   BlockNumber total_pages,
+ 					   BlockNumber scanned_pages,
+ 					   double scanned_tuples);
  extern void vac_update_relstats(Relation relation,
  					BlockNumber num_pages,
  					double num_tuples,
*************** extern void vacuum_delay_point(void);
*** 157,166 ****
  
  /* in commands/vacuumlazy.c */
  extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
! 				BufferAccessStrategy bstrategy, bool *scanned_all);
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
! 			BufferAccessStrategy bstrategy, bool update_reltuples);
  
  #endif   /* VACUUM_H */
--- 161,170 ----
  
  /* in commands/vacuumlazy.c */
  extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
! 				BufferAccessStrategy bstrategy);
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
! 			BufferAccessStrategy bstrategy);
  
  #endif   /* VACUUM_H */
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f04be95b459b2053c66495979b95f0b697a2af13..5446fa04409ebefc31a3a1f282e7dd1224a5d734 100644
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
*************** typedef struct PgStat_MsgVacuum
*** 322,328 ****
  	PgStat_MsgHdr m_hdr;
  	Oid			m_databaseid;
  	Oid			m_tableoid;
- 	bool		m_adopt_counts;
  	bool		m_autovacuum;
  	TimestampTz m_vacuumtime;
  	PgStat_Counter m_tuples;
--- 322,327 ----
*************** typedef struct PgStat_MsgAnalyze
*** 339,345 ****
  	PgStat_MsgHdr m_hdr;
  	Oid			m_databaseid;
  	Oid			m_tableoid;
- 	bool		m_adopt_counts;
  	bool		m_autovacuum;
  	TimestampTz m_analyzetime;
  	PgStat_Counter m_live_tuples;
--- 338,343 ----
*************** extern void pgstat_reset_shared_counters
*** 706,714 ****
  extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
  
  extern void pgstat_report_autovac(Oid dboid);
! extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
  					 PgStat_Counter tuples);
! extern void pgstat_report_analyze(Relation rel, bool adopt_counts,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples);
  
  extern void pgstat_report_recovery_conflict(int reason);
--- 704,712 ----
  extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
  
  extern void pgstat_report_autovac(Oid dboid);
! extern void pgstat_report_vacuum(Oid tableoid, bool shared,
  					 PgStat_Counter tuples);
! extern void pgstat_report_analyze(Relation rel,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples);
  
  extern void pgstat_report_recovery_conflict(int reason);