postgresql-analyze-v3.patch

text/plain
Filename: postgresql-analyze-v3.patch
Type: text/plain
Part: 0
Message: Re: WIP: Collecting statistics on CSV file data
Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: context
Series: patch v3
File	+	−
contrib/file_fdw/file_fdw.c	410	0
contrib/file_fdw/input/file_fdw.source	5	0
contrib/file_fdw/output/file_fdw.source	15	0
doc/src/sgml/fdwhandler.sgml	13	0
doc/src/sgml/maintenance.sgml	4	0
doc/src/sgml/ref/alter_foreign_table.sgml	47	0
doc/src/sgml/ref/analyze.sgml	9	0
src/backend/commands/analyze.c	42	0
src/backend/commands/copy.c	0	139
src/backend/commands/tablecmds.c	28	0
src/bin/psql/tab-complete.c	16	0
src/include/commands/copy.h	146	0
src/include/commands/vacuum.h	8	0
src/include/foreign/fdwapi.h	6	0
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 15,30 ****
--- 15,42 ----
  #include <sys/stat.h>
  #include <unistd.h>
  
+ #include "access/htup.h"
  #include "access/reloptions.h"
+ #include "access/transam.h"
  #include "catalog/pg_foreign_table.h"
  #include "commands/copy.h"
+ #include "commands/dbcommands.h"
  #include "commands/defrem.h"
  #include "commands/explain.h"
+ #include "commands/vacuum.h"
  #include "foreign/fdwapi.h"
  #include "foreign/foreign.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #include "optimizer/cost.h"
+ #include "optimizer/plancat.h"
+ #include "parser/parse_relation.h"
+ #include "pgstat.h"
+ #include "utils/attoptcache.h"
+ #include "utils/elog.h"
+ #include "utils/guc.h"
+ #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
  #include "utils/rel.h"
  #include "utils/syscache.h"
  
***************
*** 101,106 **** static void fileBeginForeignScan(ForeignScanState *node, int eflags);
--- 113,119 ----
  static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
  static void fileReScanForeignScan(ForeignScanState *node);
  static void fileEndForeignScan(ForeignScanState *node);
+ static void fileAnalyzeForeignTable(Relation onerel, VacuumStmt *vacstmt, int elevel);
  
  /*
   * Helper functions
***************
*** 112,118 **** static List *get_file_fdw_attribute_options(Oid relid);
  static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  			   const char *filename,
  			   Cost *startup_cost, Cost *total_cost);
! 
  
  /*
   * Foreign-data wrapper handler function: return a struct with pointers
--- 125,132 ----
  static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  			   const char *filename,
  			   Cost *startup_cost, Cost *total_cost);
! static void file_fdw_do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, int elevel);
! static int  file_fdw_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, BlockNumber *totalpages, double *totalrows);
  
  /*
   * Foreign-data wrapper handler function: return a struct with pointers
***************
*** 129,134 **** file_fdw_handler(PG_FUNCTION_ARGS)
--- 143,149 ----
  	fdwroutine->IterateForeignScan = fileIterateForeignScan;
  	fdwroutine->ReScanForeignScan = fileReScanForeignScan;
  	fdwroutine->EndForeignScan = fileEndForeignScan;
+ 	fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable;
  
  	PG_RETURN_POINTER(fdwroutine);
  }
***************
*** 575,580 **** fileReScanForeignScan(ForeignScanState *node)
--- 590,605 ----
  }
  
  /*
+  * fileAnalyzeForeignTable
+  *		Analyze table
+  */
+ static void
+ fileAnalyzeForeignTable(Relation onerel, VacuumStmt *vacstmt, int elevel)
+ {
+ 	file_fdw_do_analyze_rel(onerel, vacstmt, elevel);
+ }
+ 
+ /*
   * Estimate costs of scanning a foreign table.
   */
  static void
***************
*** 584,590 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  {
  	struct stat stat_buf;
  	BlockNumber pages;
! 	int			tuple_width;
  	double		ntuples;
  	double		nrows;
  	Cost		run_cost = 0;
--- 609,616 ----
  {
  	struct stat stat_buf;
  	BlockNumber pages;
! 	BlockNumber	relpages;
! 	double		reltuples;
  	double		ntuples;
  	double		nrows;
  	Cost		run_cost = 0;
***************
*** 604,619 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  	if (pages < 1)
  		pages = 1;
  
! 	/*
! 	 * Estimate the number of tuples in the file.  We back into this estimate
! 	 * using the planner's idea of the relation width; which is bogus if not
! 	 * all columns are being read, not to mention that the text representation
! 	 * of a row probably isn't the same size as its internal representation.
! 	 * FIXME later.
! 	 */
! 	tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
  
! 	ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
  
  	/*
  	 * Now estimate the number of rows returned by the scan after applying the
--- 630,661 ----
  	if (pages < 1)
  		pages = 1;
  
! 	relpages = baserel->pages;
! 	reltuples = baserel->tuples;
! 
! 	if (relpages > 0)
! 	{
! 		double		density;
  
! 		density = reltuples / (double) relpages;
! 
! 		ntuples = clamp_row_est(density * (double) pages);
! 	}
! 	else
! 	{
! 		int			tuple_width;
! 
! 		/*
! 		 * Estimate the number of tuples in the file.  We back into this estimate
! 		 * using the planner's idea of the relation width; which is bogus if not
! 		 * all columns are being read, not to mention that the text representation
! 		 * of a row probably isn't the same size as its internal representation.
! 		 * FIXME later.
! 		 */
! 		tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
! 
! 		ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
! 	}
  
  	/*
  	 * Now estimate the number of rows returned by the scan after applying the
***************
*** 645,647 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
--- 687,1046 ----
  	run_cost += cpu_per_tuple * ntuples;
  	*total_cost = *startup_cost + run_cost;
  }
+ 
+ /*
+  * file_fdw_do_analyze_rel() -- analyze one foreign table
+  */
+ static void
+ file_fdw_do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, int elevel)
+ {
+ 	int			i,
+ 				attr_cnt,
+ 				tcnt,
+ 				numrows = 0,
+ 				targrows;
+ 	double		totalrows = 0;
+ 	BlockNumber	totalpages = 0;
+ 	HeapTuple  *rows;
+ 	VacAttrStats **vacattrstats;
+ 	MemoryContext anl_context;
+ 	MemoryContext caller_context;
+ 
+ 	/*
+ 	 * Set up a working context so that we can easily free whatever junk gets
+ 	 * created.
+ 	 */
+ 	anl_context = AllocSetContextCreate(CurrentMemoryContext,
+ 										"Analyze",
+ 										ALLOCSET_DEFAULT_MINSIZE,
+ 										ALLOCSET_DEFAULT_INITSIZE,
+ 										ALLOCSET_DEFAULT_MAXSIZE);
+ 	caller_context = MemoryContextSwitchTo(anl_context);
+ 
+ 	/*
+ 	 * Determine which columns to analyze
+ 	 *
+ 	 * Note that system attributes are never analyzed.
+ 	 */
+ 	if (vacstmt->va_cols != NIL)
+ 	{
+ 		ListCell	   *le;
+ 
+ 		vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
+ 												sizeof(VacAttrStats *));
+ 		tcnt = 0;
+ 		foreach(le, vacstmt->va_cols)
+ 		{
+ 			char	   *col = strVal(lfirst(le));
+ 
+ 			i = attnameAttNum(onerel, col, false);
+ 			if (i == InvalidAttrNumber)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_UNDEFINED_COLUMN),
+ 						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+ 								col, RelationGetRelationName(onerel))));
+ 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
+ 			if (vacattrstats[tcnt] != NULL)
+ 				tcnt++;
+ 		}
+ 		attr_cnt = tcnt;
+ 	}
+ 	else
+ 	{
+ 		attr_cnt = onerel->rd_att->natts;
+ 		vacattrstats = (VacAttrStats **) palloc(attr_cnt * sizeof(VacAttrStats *));
+ 		tcnt = 0;
+ 		for (i = 1; i <= attr_cnt; i++)
+ 		{
+ 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
+ 			if (vacattrstats[tcnt] != NULL)
+ 				tcnt++;
+ 		}
+ 		attr_cnt = tcnt;
+ 	}
+ 
+ 	/*
+ 	 * Determine how many rows we need to sample, using the worst case from
+ 	 * all analyzable columns.	We use a lower bound of 100 rows to avoid
+ 	 * possible overflow in Vitter's algorithm.
+ 	 */
+ 	targrows = 100;
+ 	for (i = 0; i < attr_cnt; i++)
+ 	{
+ 		if (targrows < vacattrstats[i]->minrows)
+ 			targrows = vacattrstats[i]->minrows;
+ 	}
+ 
+ 	/*
+ 	 * Acquire the sample rows
+ 	 */
+ 	rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+ 	numrows = file_fdw_acquire_sample_rows(onerel, elevel, rows, targrows, &totalpages, &totalrows);
+ 
+ 	/*
+ 	 * Compute the statistics.	Temporary results during the calculations for
+ 	 * each column are stored in a child context.  The calc routines are
+ 	 * responsible to make sure that whatever they store into the VacAttrStats
+ 	 * structure is allocated in anl_context.
+ 	 */
+ 	if (numrows > 0)
+ 	{
+ 		MemoryContext col_context, old_context;
+ 
+ 		col_context = AllocSetContextCreate(anl_context,
+ 											"Analyze Column",
+ 											ALLOCSET_DEFAULT_MINSIZE,
+ 											ALLOCSET_DEFAULT_INITSIZE,
+ 											ALLOCSET_DEFAULT_MAXSIZE);
+ 		old_context = MemoryContextSwitchTo(col_context);
+ 
+ 		for (i = 0; i < attr_cnt; i++)
+ 		{
+ 			VacAttrStats	   *stats = vacattrstats[i];
+ 			AttributeOpts	   *aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
+ 
+ 			stats->rows = rows;
+ 			stats->tupDesc = onerel->rd_att;
+ 			(*stats->compute_stats) (stats,
+ 									 std_fetch_func,
+ 									 numrows,
+ 									 totalrows);
+ 
+ 			/*
+ 			 * If the appropriate flavor of the n_distinct option is
+ 			 * specified, override with the corresponding value.
+ 			 */
+ 			if (aopt != NULL)
+ 			{
+ 				float8		n_distinct = aopt->n_distinct;
+ 
+ 				if (n_distinct != 0.0)
+ 					stats->stadistinct = n_distinct;
+ 			}
+ 
+ 			MemoryContextResetAndDeleteChildren(col_context);
+ 		}
+ 
+ 		MemoryContextSwitchTo(old_context);
+ 		MemoryContextDelete(col_context);
+ 
+ 		/*
+ 		 * Emit the completed stats rows into pg_statistic, replacing any
+ 		 * previous statistics for the target columns.	(If there are stats in
+ 		 * pg_statistic for columns we didn't process, we leave them alone.)
+ 		 */
+ 		update_attstats(onerel->rd_id, false, attr_cnt, vacattrstats);
+ 	}
+ 
+ 	/*
+ 	 * Update pages/tuples stats in pg_class.
+ 	 */
+ 	vac_update_relstats(onerel, totalpages, totalrows, 0, false, InvalidTransactionId);
+ 
+ 	/*
+ 	 * Report ANALYZE to the stats collector, too.
+ 	 */
+ 	pgstat_report_analyze(onerel, totalrows, 0);
+ 
+ 	/* Restore current context and release memory */
+ 	MemoryContextSwitchTo(caller_context);
+ 	MemoryContextDelete(anl_context);
+ 	anl_context = NULL;
+ }
+ 
+ /*
+  * file_fdw_acquire_sample_rows -- acquire a random sample of rows from the table
+  *
+  * Selected rows are returned in the caller-allocated array rows[], which
+  * must have at least targrows entries.
+  * The actual number of rows selected is returned as the function result.
+  * We also count the number of rows in the table, and return it into *totalrows.
+  *
+  * The returned list of tuples is in order by physical position in the table.
+  * (We will rely on this later to derive correlation estimates.)
+  */
+ static int
+ file_fdw_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, BlockNumber *totalpages, double *totalrows)
+ {
+ 	int			numrows = 0;
+ 	double		samplerows = 0;  /* total # rows collected */
+ 	double		rowstoskip = -1; /* -1 means not set yet */
+ 	double		rstate;
+ 	HeapTuple	tuple;
+ 	TupleDesc	tupDesc;
+ 	TupleConstr *constr;
+ 	int			natts;
+ 	int			attrChk;
+ 	Datum	   *values;
+ 	bool	   *nulls;
+ 	bool		found;
+ 	bool		sample_it = false;
+ 	BlockNumber	blknum;
+ 	OffsetNumber offnum;
+ 	char	   *filename;
+ 	struct stat	stat_buf;
+ 	List	   *options;
+ 	CopyState	cstate;
+ 	ErrorContextCallback errcontext;
+ 
+ 	Assert(onerel);
+ 	Assert(targrows > 0);
+ 
+ 	tupDesc = RelationGetDescr(onerel);
+ 	constr = tupDesc->constr;
+ 	natts = tupDesc->natts;
+ 	values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
+ 	nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
+ 
+ 	/* Fetch options of foreign table */
+ 	fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+ 
+ 	/*
+ 	 * Get size of the file.
+ 	 */
+ 	if (stat(filename, &stat_buf) < 0)
+ 		ereport(ERROR,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not stat file \"%s\": %m",
+ 						filename)));
+ 
+ 	/*
+ 	 * Convert size to pages for use in I/O cost estimate.
+ 	 */
+ 	*totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
+ 	if (*totalpages < 1)
+ 		*totalpages = 1;
+ 
+ 	/*
+ 	 * Create CopyState from FDW options.  We always acquire all columns, so
+ 	 * as to match the expected ScanTupleSlot signature.
+ 	 */
+ 	cstate = BeginCopyFrom(onerel, filename, NIL, options);
+ 
+ 	/* Prepare for sampling rows */
+ 	rstate = init_selection_state(targrows);
+ 
+ 	/* Set up callback to identify error line number. */
+ 	errcontext.callback = CopyFromErrorCallback;
+ 	errcontext.arg = (void *) cstate;
+ 	errcontext.previous = error_context_stack;
+ 	error_context_stack = &errcontext;
+ 
+ 	for (;;)
+ 	{
+ 		sample_it = true;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		found = NextCopyFrom(cstate, NULL, values, nulls, NULL);
+ 
+ 		if (!found)
+ 			break;
+ 
+ 		tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+ 		if (constr && constr->has_not_null)
+ 		{
+ 			for (attrChk = 1; attrChk <= natts; attrChk++)
+ 			{
+ 				if (onerel->rd_att->attrs[attrChk - 1]->attnotnull &&
+ 					!(cstate->force_notnull_flags[attrChk - 1]) &&
+ 					heap_attisnull(tuple, attrChk))
+ 				{
+ 					sample_it = false;
+ 					break;
+ 				}
+ 			}
+ 		}
+ 
+ 		if (!sample_it)
+ 		{
+ 			heap_freetuple(tuple);
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * The first targrows sample rows are simply copied into the
+ 		 * reservoir. Then we start replacing tuples in the sample
+ 		 * until we reach the end of the relation.	This algorithm is
+ 		 * from Jeff Vitter's paper (see full citation below). It
+ 		 * works by repeatedly computing the number of tuples to skip
+ 		 * before selecting a tuple, which replaces a randomly chosen
+ 		 * element of the reservoir (current set of tuples).  At all
+ 		 * times the reservoir is a true random sample of the tuples
+ 		 * we've passed over so far, so when we fall off the end of
+ 		 * the relation we're done.
+ 		 */
+ 		if (numrows < targrows)
+ 		{
+ 			blknum = (BlockNumber) samplerows / MaxOffsetNumber;
+ 			offnum = (OffsetNumber) samplerows % MaxOffsetNumber + 1;
+ 			ItemPointerSet(&tuple->t_self, blknum, offnum);
+ 			rows[numrows++] = heap_copytuple(tuple);
+ 		}
+ 		else
+ 		{
+ 			/*
+ 			 * t in Vitter's paper is the number of records already
+ 			 * processed.  If we need to compute a new S value, we
+ 			 * must use the not-yet-incremented value of samplerows as
+ 			 * t.
+ 			 */
+ 			if (rowstoskip < 0)
+ 				rowstoskip = get_next_S(samplerows, targrows, &rstate);
+ 
+ 			if (rowstoskip <= 0)
+ 			{
+ 				/*
+ 				 * Found a suitable tuple, so save it, replacing one
+ 				 * old tuple at random
+ 				 */
+ 				int k = (int) (targrows * random_fract());
+ 
+ 				Assert(k >= 0 && k < targrows);
+ 				heap_freetuple(rows[k]);
+ 
+ 				blknum = (BlockNumber) samplerows / MaxOffsetNumber;
+ 				offnum = (OffsetNumber) samplerows % MaxOffsetNumber + 1;
+ 				ItemPointerSet(&tuple->t_self, blknum, offnum);
+ 				rows[k] = heap_copytuple(tuple);
+ 			}
+ 
+ 			rowstoskip -= 1;
+ 		}
+ 
+ 		samplerows += 1;
+ 		heap_freetuple(tuple);
+ 	}
+ 
+ 	/* Remove error callback. */
+ 	error_context_stack = errcontext.previous;
+ 
+ 	/*
+ 	 * If we didn't find as many tuples as we wanted then we're done. No sort
+ 	 * is needed, since they're already in order.
+ 	 *
+ 	 * Otherwise we need to sort the collected tuples by position
+ 	 * (itempointer). It's not worth worrying about corner cases where the
+ 	 * tuples are already sorted.
+ 	 */
+ 	if (numrows == targrows)
+ 		qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
+ 
+ 	*totalrows = samplerows;
+ 
+ 	EndCopyFrom(cstate);
+ 
+ 	pfree(values);
+ 	pfree(nulls);
+ 
+ 	/*
+ 	 * Emit some interesting relation info
+ 	 */
+ 	ereport(elevel,
+ 			(errmsg("\"%s\": scanned, "
+ 					"%d rows in sample, %d total rows",
+ 					RelationGetRelationName(onerel), numrows, (int) *totalrows)));
+ 
+ 	return numrows;
+ }
*** a/contrib/file_fdw/input/file_fdw.source
--- b/contrib/file_fdw/input/file_fdw.source
***************
*** 111,116 **** EXECUTE st(100);
--- 111,121 ----
  EXECUTE st(100);
  DEALLOCATE st;
  
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+ 
  -- tableoid
  SELECT tableoid::regclass, b FROM agg_csv;
  
*** a/contrib/file_fdw/output/file_fdw.source
--- b/contrib/file_fdw/output/file_fdw.source
***************
*** 174,179 **** EXECUTE st(100);
--- 174,194 ----
  (1 row)
  
  DEALLOCATE st;
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+  relpages | reltuples 
+ ----------+-----------
+         1 |         3
+ (1 row)
+ 
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+  schemaname | tablename | attname | inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs |    histogram_bounds     | correlation 
+ ------------+-----------+---------+-----------+-----------+-----------+------------+------------------+-------------------+-------------------------+-------------
+  public     | agg_csv   | a       | f         |         0 |         2 |         -1 |                  |                   | {0,42,100}              |        -0.5
+  public     | agg_csv   | b       | f         |         0 |         4 |         -1 |                  |                   | {0.09561,99.097,324.78} |         0.5
+ (2 rows)
+ 
  -- tableoid
  SELECT tableoid::regclass, b FROM agg_csv;
   tableoid |    b    
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 228,233 **** EndForeignScan (ForeignScanState *node);
--- 228,246 ----
      </para>
  
      <para>
+ <programlisting>
+ void
+ AnalyzeForeignTable (Relation onerel,
+                      VacuumStmt *vacstmt, 
+                      int elevel);
+ </programlisting>
+ 
+      Collect statistics on a foreign table and store the results in the
+      pg_class and pg_statistics system catalogs.
+      This is called when <command>ANALYZE</> command is run.
+     </para>
+ 
+     <para>
       The <structname>FdwRoutine</> and <structname>FdwPlan</> struct types
       are declared in <filename>src/include/foreign/fdwapi.h</>, which see
       for additional details.
*** a/doc/src/sgml/maintenance.sgml
--- b/doc/src/sgml/maintenance.sgml
***************
*** 279,284 ****
--- 279,288 ----
      <command>ANALYZE</> strictly as a function of the number of rows
      inserted or updated; it has no knowledge of whether that will lead
      to meaningful statistical changes.
+     Note that the autovacuum daemon does not issue <command>ANALYZE</>
+     commands on foreign tables.  It is recommended to run manually-managed
+     <command>ANALYZE</> commands as needed, which typically are executed
+     according to a schedule by cron or Task Scheduler scripts.
     </para>
  
     <para>
*** a/doc/src/sgml/ref/alter_foreign_table.sgml
--- b/doc/src/sgml/ref/alter_foreign_table.sgml
***************
*** 36,41 **** ALTER FOREIGN TABLE <replaceable class="PARAMETER">name</replaceable>
--- 36,44 ----
      DROP [ COLUMN ] [ IF EXISTS ] <replaceable class="PARAMETER">column</replaceable> [ RESTRICT | CASCADE ]
      ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> [ SET DATA ] TYPE <replaceable class="PARAMETER">type</replaceable>
      ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> { SET | DROP } NOT NULL
+     ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS <replaceable class="PARAMETER">integer</replaceable>
+     ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET ( <replaceable class="PARAMETER">attribute_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] )
+     ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> RESET ( <replaceable class="PARAMETER">attribute_option</replaceable> [, ... ] )
      ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> OPTIONS ( [ ADD | SET | DROP ] <replaceable class="PARAMETER">option</replaceable> ['<replaceable class="PARAMETER">value</replaceable>'] [, ... ])
      OWNER TO <replaceable class="PARAMETER">new_owner</replaceable>
      OPTIONS ( [ ADD | SET | DROP ] <replaceable class="PARAMETER">option</replaceable> ['<replaceable class="PARAMETER">value</replaceable>'] [, ... ])
***************
*** 94,99 **** ALTER FOREIGN TABLE <replaceable class="PARAMETER">name</replaceable>
--- 97,146 ----
     </varlistentry>
  
     <varlistentry>
+     <term><literal>SET STATISTICS</literal></term>
+     <listitem>
+      <para>
+       This form
+       sets the per-column statistics-gathering target for subsequent
+       <xref linkend="sql-analyze"> operations.
+       The target can be set in the range 0 to 10000; alternatively, set it
+       to -1 to revert to using the system default statistics
+       target (<xref linkend="guc-default-statistics-target">).
+      </para>
+     </listitem>
+    </varlistentry>
+ 
+    <varlistentry>
+     <term><literal>SET ( <replaceable class="PARAMETER">attribute_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] )</literal></term>
+     <term><literal>RESET ( <replaceable class="PARAMETER">attribute_option</replaceable> [, ... ] )</literal></term>
+     <listitem>
+      <para>
+       This form
+       sets or resets a per-attribute option.  Currently, the only defined
+       per-attribute option is <literal>n_distinct</>, which overrides
+       the number-of-distinct-values estimates made by subsequent
+       <xref linkend="sql-analyze"> operations. 
+       When set to a positive value, <command>ANALYZE</> will assume that
+       the column contains exactly the specified number of distinct nonnull
+       values.
+       When set to a negative value, which must be greater than or equal
+       to -1, <command>ANALYZE</> will assume that the number of distinct
+       nonnull values in the column is linear in the size of the foreign
+       table; the exact count is to be computed by multiplying the estimated
+       foreign table size by the absolute value of the given number.
+       For example,
+       a value of -1 implies that all values in the column are distinct,
+       while a value of -0.5 implies that each value appears twice on the
+       average.
+       This can be useful when the size of the foreign table changes over
+       time, since the multiplication by the number of rows in the foreign
+       table is not performed until query planning time.  Specify a value
+       of 0 to revert to estimating the number of distinct values normally.
+      </para>
+     </listitem>
+    </varlistentry>
+ 
+    <varlistentry>
      <term><literal>OWNER</literal></term>
      <listitem>
       <para>
*** a/doc/src/sgml/ref/analyze.sgml
--- b/doc/src/sgml/ref/analyze.sgml
***************
*** 39,47 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
  
    <para>
     With no parameter, <command>ANALYZE</command> examines every table in the
!    current database.  With a parameter, <command>ANALYZE</command> examines
!    only that table.  It is further possible to give a list of column names,
!    in which case only the statistics for those columns are collected.
    </para>
   </refsect1>
  
--- 39,48 ----
  
    <para>
     With no parameter, <command>ANALYZE</command> examines every table in the
!    current database except for foreign tables.  With a parameter, <command>
!    ANALYZE</command> examines only that table.  It is further possible to 
!    give a list of column names, in which case only the statistics for those
!    columns are collected.
    </para>
   </refsect1>
  
***************
*** 63,69 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
      <listitem>
       <para>
        The name (possibly schema-qualified) of a specific table to
!       analyze. Defaults to all tables in the current database.
       </para>
      </listitem>
     </varlistentry>
--- 64,71 ----
      <listitem>
       <para>
        The name (possibly schema-qualified) of a specific table to
!       analyze. Defaults to all tables in the current database except
!       for foreign tables.
       </para>
      </listitem>
     </varlistentry>
***************
*** 137,143 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
     In rare situations, this non-determinism will cause the planner's
     choices of query plans to change after <command>ANALYZE</command> is run.
     To avoid this, raise the amount of statistics collected by
!    <command>ANALYZE</command>, as described below.
    </para>
  
    <para>
--- 139,147 ----
     In rare situations, this non-determinism will cause the planner's
     choices of query plans to change after <command>ANALYZE</command> is run.
     To avoid this, raise the amount of statistics collected by
!    <command>ANALYZE</command>, as described below.  Note that the time
!    needed to analyze on foreign tables depends on the implementation of
!    the foreign data wrapper via which such tables are attached.
    </para>
  
    <para>
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
***************
*** 23,28 ****
--- 23,29 ----
  #include "access/xact.h"
  #include "catalog/index.h"
  #include "catalog/indexing.h"
+ #include "catalog/pg_class.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
***************
*** 30,35 ****
--- 31,38 ----
  #include "commands/tablecmds.h"
  #include "commands/vacuum.h"
  #include "executor/executor.h"
+ #include "foreign/foreign.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/nodeFuncs.h"
  #include "parser/parse_oper.h"
***************
*** 94,113 **** static void compute_index_stats(Relation onerel, double totalrows,
  					AnlIndexData *indexdata, int nindexes,
  					HeapTuple *rows, int numrows,
  					MemoryContext col_context);
- static VacAttrStats *examine_attribute(Relation onerel, int attnum,
- 				  Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
  					int targrows, double *totalrows, double *totaldeadrows);
- static double random_fract(void);
- static double init_selection_state(int n);
- static double get_next_S(double t, int n, double *stateptr);
- static int	compare_rows(const void *a, const void *b);
  static int acquire_inherited_sample_rows(Relation onerel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows, double *totaldeadrows);
- static void update_attstats(Oid relid, bool inh,
- 				int natts, VacAttrStats **vacattrstats);
- static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  
  static bool std_typanalyze(VacAttrStats *stats);
--- 97,107 ----
***************
*** 184,193 **** analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
  	}
  
  	/*
! 	 * Check that it's a plain table; we used to do this in get_rel_oids() but
! 	 * seems safer to check after we've locked the relation.
  	 */
! 	if (onerel->rd_rel->relkind != RELKIND_RELATION)
  	{
  		/* No need for a WARNING if we already complained during VACUUM */
  		if (!(vacstmt->options & VACOPT_VACUUM))
--- 178,188 ----
  	}
  
  	/*
! 	 * Check that it's a plain table or foreign table; we used to do this in
! 	 * get_rel_oids() but seems safer to check after we've locked the relation.
  	 */
! 	if (!(onerel->rd_rel->relkind == RELKIND_RELATION ||
! 		onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
  	{
  		/* No need for a WARNING if we already complained during VACUUM */
  		if (!(vacstmt->options & VACOPT_VACUUM))
***************
*** 226,241 **** analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
  	MyProc->vacuumFlags |= PROC_IN_ANALYZE;
  	LWLockRelease(ProcArrayLock);
  
! 	/*
! 	 * Do the normal non-recursive ANALYZE.
! 	 */
! 	do_analyze_rel(onerel, vacstmt, false);
  
! 	/*
! 	 * If there are child tables, do recursive ANALYZE.
! 	 */
! 	if (onerel->rd_rel->relhassubclass)
! 		do_analyze_rel(onerel, vacstmt, true);
  
  	/*
  	 * Close source relation now, but keep lock so that no one deletes it
--- 221,251 ----
  	MyProc->vacuumFlags |= PROC_IN_ANALYZE;
  	LWLockRelease(ProcArrayLock);
  
! 	if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 	{
! 		FdwRoutine		   *fdwroutine;
  
! 		ereport(elevel,
! 				(errmsg("analyzing \"%s.%s\"",
! 						get_namespace_name(RelationGetNamespace(onerel)),
! 						RelationGetRelationName(onerel))));
! 
! 		fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(onerel));
! 		fdwroutine->AnalyzeForeignTable(onerel, vacstmt, elevel);
! 	}
! 	else
! 	{
! 		/*
! 		 * Do the normal non-recursive ANALYZE.
! 		 */
! 		do_analyze_rel(onerel, vacstmt, false);
! 
! 		/*
! 		 * If there are child tables, do recursive ANALYZE.
! 		 */
! 		if (onerel->rd_rel->relhassubclass)
! 			do_analyze_rel(onerel, vacstmt, true);
! 	}
  
  	/*
  	 * Close source relation now, but keep lock so that no one deletes it
***************
*** 343,349 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
  						(errcode(ERRCODE_UNDEFINED_COLUMN),
  					errmsg("column \"%s\" of relation \"%s\" does not exist",
  						   col, RelationGetRelationName(onerel))));
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
--- 353,359 ----
  						(errcode(ERRCODE_UNDEFINED_COLUMN),
  					errmsg("column \"%s\" of relation \"%s\" does not exist",
  						   col, RelationGetRelationName(onerel))));
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
***************
*** 357,363 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
  		tcnt = 0;
  		for (i = 1; i <= attr_cnt; i++)
  		{
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
--- 367,373 ----
  		tcnt = 0;
  		for (i = 1; i <= attr_cnt; i++)
  		{
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
***************
*** 411,417 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
  						indexkey = (Node *) lfirst(indexpr_item);
  						indexpr_item = lnext(indexpr_item);
  						thisdata->vacattrstats[tcnt] =
! 							examine_attribute(Irel[ind], i + 1, indexkey);
  						if (thisdata->vacattrstats[tcnt] != NULL)
  							tcnt++;
  					}
--- 421,427 ----
  						indexkey = (Node *) lfirst(indexpr_item);
  						indexpr_item = lnext(indexpr_item);
  						thisdata->vacattrstats[tcnt] =
! 							examine_attribute(Irel[ind], i + 1, indexkey, anl_context);
  						if (thisdata->vacattrstats[tcnt] != NULL)
  							tcnt++;
  					}
***************
*** 807,814 **** compute_index_stats(Relation onerel, double totalrows,
   * If index_expr isn't NULL, then we're trying to analyze an expression index,
   * and index_expr is the expression tree representing the column's data.
   */
! static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
--- 817,824 ----
   * If index_expr isn't NULL, then we're trying to analyze an expression index,
   * and index_expr is the expression tree representing the column's data.
   */
! VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr, MemoryContext anl_context)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
***************
*** 1254,1260 **** acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
  }
  
  /* Select a random value R uniformly distributed in (0 - 1) */
! static double
  random_fract(void)
  {
  	return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
--- 1264,1270 ----
  }
  
  /* Select a random value R uniformly distributed in (0 - 1) */
! double
  random_fract(void)
  {
  	return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
***************
*** 1274,1287 **** random_fract(void)
   * determines the number of records to skip before the next record is
   * processed.
   */
! static double
  init_selection_state(int n)
  {
  	/* Initial value of W (for use when Algorithm Z is first applied) */
  	return exp(-log(random_fract()) / n);
  }
  
! static double
  get_next_S(double t, int n, double *stateptr)
  {
  	double		S;
--- 1284,1297 ----
   * determines the number of records to skip before the next record is
   * processed.
   */
! double
  init_selection_state(int n)
  {
  	/* Initial value of W (for use when Algorithm Z is first applied) */
  	return exp(-log(random_fract()) / n);
  }
  
! double
  get_next_S(double t, int n, double *stateptr)
  {
  	double		S;
***************
*** 1366,1372 **** get_next_S(double t, int n, double *stateptr)
  /*
   * qsort comparator for sorting rows[] array
   */
! static int
  compare_rows(const void *a, const void *b)
  {
  	HeapTuple	ha = *(const HeapTuple *) a;
--- 1376,1382 ----
  /*
   * qsort comparator for sorting rows[] array
   */
! int
  compare_rows(const void *a, const void *b)
  {
  	HeapTuple	ha = *(const HeapTuple *) a;
***************
*** 1561,1567 **** acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
   *		ANALYZE the same table concurrently.  Presently, we lock that out
   *		by taking a self-exclusive lock on the relation in analyze_rel().
   */
! static void
  update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
  {
  	Relation	sd;
--- 1571,1577 ----
   *		ANALYZE the same table concurrently.  Presently, we lock that out
   *		by taking a self-exclusive lock on the relation in analyze_rel().
   */
! void
  update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
  {
  	Relation	sd;
***************
*** 1698,1704 **** update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
   * This exists to provide some insulation between compute_stats routines
   * and the actual storage of the sample data.
   */
! static Datum
  std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
  	int			attnum = stats->tupattnum;
--- 1708,1714 ----
   * This exists to provide some insulation between compute_stats routines
   * and the actual storage of the sample data.
   */
! Datum
  std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
  	int			attnum = stats->tupattnum;
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 42,192 ****
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
- #include "utils/rel.h"
  #include "utils/snapmgr.h"
  
  
  #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  #define OCTVALUE(c) ((c) - '0')
  
- /*
-  * Represents the different source/dest cases we need to worry about at
-  * the bottom level
-  */
- typedef enum CopyDest
- {
- 	COPY_FILE,					/* to/from file */
- 	COPY_OLD_FE,				/* to/from frontend (2.0 protocol) */
- 	COPY_NEW_FE					/* to/from frontend (3.0 protocol) */
- } CopyDest;
- 
- /*
-  *	Represents the end-of-line terminator type of the input
-  */
- typedef enum EolType
- {
- 	EOL_UNKNOWN,
- 	EOL_NL,
- 	EOL_CR,
- 	EOL_CRNL
- } EolType;
- 
- /*
-  * This struct contains all the state variables used throughout a COPY
-  * operation. For simplicity, we use the same struct for all variants of COPY,
-  * even though some fields are used in only some cases.
-  *
-  * Multi-byte encodings: all supported client-side encodings encode multi-byte
-  * characters by having the first byte's high bit set. Subsequent bytes of the
-  * character can have the high bit not set. When scanning data in such an
-  * encoding to look for a match to a single-byte (ie ASCII) character, we must
-  * use the full pg_encoding_mblen() machinery to skip over multibyte
-  * characters, else we might find a false match to a trailing byte. In
-  * supported server encodings, there is no possibility of a false match, and
-  * it's faster to make useless comparisons to trailing bytes than it is to
-  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
-  * when we have to do it the hard way.
-  */
- typedef struct CopyStateData
- {
- 	/* low-level state data */
- 	CopyDest	copy_dest;		/* type of copy source/destination */
- 	FILE	   *copy_file;		/* used if copy_dest == COPY_FILE */
- 	StringInfo	fe_msgbuf;		/* used for all dests during COPY TO, only for
- 								 * dest == COPY_NEW_FE in COPY FROM */
- 	bool		fe_eof;			/* true if detected end of copy data */
- 	EolType		eol_type;		/* EOL type of input */
- 	int			file_encoding;	/* file or remote side's character encoding */
- 	bool		need_transcoding;		/* file encoding diff from server? */
- 	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
- 
- 	/* parameters from the COPY command */
- 	Relation	rel;			/* relation to copy to or from */
- 	QueryDesc  *queryDesc;		/* executable query to copy from */
- 	List	   *attnumlist;		/* integer list of attnums to copy */
- 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
- 	bool		binary;			/* binary format? */
- 	bool		oids;			/* include OIDs? */
- 	bool		csv_mode;		/* Comma Separated Value format? */
- 	bool		header_line;	/* CSV header line? */
- 	char	   *null_print;		/* NULL marker string (server encoding!) */
- 	int			null_print_len; /* length of same */
- 	char	   *null_print_client;		/* same converted to file encoding */
- 	char	   *delim;			/* column delimiter (must be 1 byte) */
- 	char	   *quote;			/* CSV quote char (must be 1 byte) */
- 	char	   *escape;			/* CSV escape char (must be 1 byte) */
- 	List	   *force_quote;	/* list of column names */
- 	bool		force_quote_all;	/* FORCE QUOTE *? */
- 	bool	   *force_quote_flags;		/* per-column CSV FQ flags */
- 	List	   *force_notnull;	/* list of column names */
- 	bool	   *force_notnull_flags;	/* per-column CSV FNN flags */
- 
- 	/* these are just for error messages, see CopyFromErrorCallback */
- 	const char *cur_relname;	/* table name for error messages */
- 	int			cur_lineno;		/* line number for error messages */
- 	const char *cur_attname;	/* current att for error messages */
- 	const char *cur_attval;		/* current att value for error messages */
- 
- 	/*
- 	 * Working state for COPY TO/FROM
- 	 */
- 	MemoryContext copycontext;	/* per-copy execution context */
- 
- 	/*
- 	 * Working state for COPY TO
- 	 */
- 	FmgrInfo   *out_functions;	/* lookup info for output functions */
- 	MemoryContext rowcontext;	/* per-row evaluation context */
- 
- 	/*
- 	 * Working state for COPY FROM
- 	 */
- 	AttrNumber	num_defaults;
- 	bool		file_has_oids;
- 	FmgrInfo	oid_in_function;
- 	Oid			oid_typioparam;
- 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
- 	Oid		   *typioparams;	/* array of element types for in_functions */
- 	int		   *defmap;			/* array of default att numbers */
- 	ExprState **defexprs;		/* array of default att expressions */
- 
- 	/*
- 	 * These variables are used to reduce overhead in textual COPY FROM.
- 	 *
- 	 * attribute_buf holds the separated, de-escaped text for each field of
- 	 * the current line.  The CopyReadAttributes functions return arrays of
- 	 * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
- 	 * the buffer on each cycle.
- 	 */
- 	StringInfoData attribute_buf;
- 
- 	/* field raw data pointers found by COPY FROM */
- 
- 	int			max_fields;
- 	char	  **raw_fields;
- 
- 	/*
- 	 * Similarly, line_buf holds the whole input line being processed. The
- 	 * input cycle is first to read the whole line into line_buf, convert it
- 	 * to server encoding there, and then extract the individual attribute
- 	 * fields into attribute_buf.  line_buf is preserved unmodified so that we
- 	 * can display it in error messages if appropriate.
- 	 */
- 	StringInfoData line_buf;
- 	bool		line_buf_converted;		/* converted to server encoding? */
- 
- 	/*
- 	 * Finally, raw_buf holds raw data read from the data source (file or
- 	 * client connection).	CopyReadLine parses this data sufficiently to
- 	 * locate line boundaries, then transfers the data to line_buf and
- 	 * converts it.  Note: we guarantee that there is a \0 at
- 	 * raw_buf[raw_buf_len].
- 	 */
- #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
- 	char	   *raw_buf;
- 	int			raw_buf_index;	/* next byte to process */
- 	int			raw_buf_len;	/* total # of bytes stored */
- } CopyStateData;
  
  /* DestReceiver for COPY (SELECT) TO */
  typedef struct
--- 42,53 ----
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 311,316 **** static void ATPrepSetStatistics(Relation rel, const char *colName,
--- 311,318 ----
  					Node *newValue, LOCKMODE lockmode);
  static void ATExecSetStatistics(Relation rel, const char *colName,
  					Node *newValue, LOCKMODE lockmode);
+ static void ATPrepSetOptions(Relation rel, const char *colName,
+ 				 Node *options, LOCKMODE lockmode);
  static void ATExecSetOptions(Relation rel, const char *colName,
  				 Node *options, bool isReset, LOCKMODE lockmode);
  static void ATExecSetStorage(Relation rel, const char *colName,
***************
*** 2886,2892 **** ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
  			break;
  		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
  		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
! 			ATSimplePermissions(rel, ATT_TABLE | ATT_INDEX);
  			/* This command never recurses */
  			pass = AT_PASS_MISC;
  			break;
--- 2888,2895 ----
  			break;
  		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
  		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
! 			ATSimplePermissions(rel, ATT_TABLE | ATT_INDEX | ATT_FOREIGN_TABLE);
! 			ATPrepSetOptions(rel, cmd->name, cmd->def, lockmode);
  			/* This command never recurses */
  			pass = AT_PASS_MISC;
  			break;
***************
*** 4822,4831 **** ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
  	 * allowSystemTableMods to be turned on.
  	 */
  	if (rel->rd_rel->relkind != RELKIND_RELATION &&
! 		rel->rd_rel->relkind != RELKIND_INDEX)
  		ereport(ERROR,
  				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 				 errmsg("\"%s\" is not a table or index",
  						RelationGetRelationName(rel))));
  
  	/* Permissions checks */
--- 4825,4835 ----
  	 * allowSystemTableMods to be turned on.
  	 */
  	if (rel->rd_rel->relkind != RELKIND_RELATION &&
! 		rel->rd_rel->relkind != RELKIND_INDEX &&
! 		rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
  		ereport(ERROR,
  				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 				 errmsg("\"%s\" is not a table, index, or foreign table",
  						RelationGetRelationName(rel))));
  
  	/* Permissions checks */
***************
*** 4893,4898 **** ATExecSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
--- 4897,4923 ----
  	heap_close(attrelation, RowExclusiveLock);
  }
  
+ 
+ static void
+ ATPrepSetOptions(Relation rel, const char *colName, Node *options,
+ 				 LOCKMODE lockmode)
+ {
+ 	if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 	{
+ 		ListCell   *cell;
+ 
+ 		foreach(cell, (List *) options)
+ 		{
+ 			DefElem    *def = (DefElem *) lfirst(cell);
+ 
+ 			if (pg_strncasecmp(def->defname, "n_distinct_inherited", strlen("n_distinct_inherited")) == 0)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 						 errmsg("cannot support option \"n_distinct_inherited\" for foreign tables")));
+ 		}
+ 	}
+ }
+ 
  static void
  ATExecSetOptions(Relation rel, const char *colName, Node *options,
  				 bool isReset, LOCKMODE lockmode)
*** a/src/bin/psql/tab-complete.c
--- b/src/bin/psql/tab-complete.c
***************
*** 399,404 **** static const SchemaQuery Query_for_list_of_tsvf = {
--- 399,419 ----
  	NULL
  };
  
+ static const SchemaQuery Query_for_list_of_tf = {
+ 	/* catname */
+ 	"pg_catalog.pg_class c",
+ 	/* selcondition */
+ 	"c.relkind IN ('r', 'f')",
+ 	/* viscondition */
+ 	"pg_catalog.pg_table_is_visible(c.oid)",
+ 	/* namespace */
+ 	"c.relnamespace",
+ 	/* result */
+ 	"pg_catalog.quote_ident(c.relname)",
+ 	/* qualresult */
+ 	NULL
+ };
+ 
  static const SchemaQuery Query_for_list_of_views = {
  	/* catname */
  	"pg_catalog.pg_class c",
***************
*** 2755,2761 **** psql_completion(char *text, int start, int end)
  /* ANALYZE */
  	/* If the previous word is ANALYZE, produce list of tables */
  	else if (pg_strcasecmp(prev_wd, "ANALYZE") == 0)
! 		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL);
  
  /* WHERE */
  	/* Simple case of the word before the where being the table name */
--- 2770,2776 ----
  /* ANALYZE */
  	/* If the previous word is ANALYZE, produce list of tables */
  	else if (pg_strcasecmp(prev_wd, "ANALYZE") == 0)
! 		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tf, NULL);
  
  /* WHERE */
  	/* Simple case of the word before the where being the table name */
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
***************
*** 14,22 ****
--- 14,168 ----
  #ifndef COPY_H
  #define COPY_H
  
+ #include "access/attnum.h"
+ #include "executor/execdesc.h"
+ #include "fmgr.h"
+ #include "lib/stringinfo.h"
  #include "nodes/execnodes.h"
  #include "nodes/parsenodes.h"
+ #include "nodes/pg_list.h"
  #include "tcop/dest.h"
+ #include "utils/palloc.h"
+ #include "utils/rel.h"
+ 
+ /*
+  * Represents the different source/dest cases we need to worry about at
+  * the bottom level
+  */
+ typedef enum CopyDest
+ {
+ 	COPY_FILE,					/* to/from file */
+ 	COPY_OLD_FE,				/* to/from frontend (2.0 protocol) */
+ 	COPY_NEW_FE					/* to/from frontend (3.0 protocol) */
+ } CopyDest;
+ 
+ /*
+  *	Represents the end-of-line terminator type of the input
+  */
+ typedef enum EolType
+ {
+ 	EOL_UNKNOWN,
+ 	EOL_NL,
+ 	EOL_CR,
+ 	EOL_CRNL
+ } EolType;
+ 
+ /*
+  * This struct contains all the state variables used throughout a COPY
+  * operation. For simplicity, we use the same struct for all variants of COPY,
+  * even though some fields are used in only some cases.
+  *
+  * Multi-byte encodings: all supported client-side encodings encode multi-byte
+  * characters by having the first byte's high bit set. Subsequent bytes of the
+  * character can have the high bit not set. When scanning data in such an
+  * encoding to look for a match to a single-byte (ie ASCII) character, we must
+  * use the full pg_encoding_mblen() machinery to skip over multibyte
+  * characters, else we might find a false match to a trailing byte. In
+  * supported server encodings, there is no possibility of a false match, and
+  * it's faster to make useless comparisons to trailing bytes than it is to
+  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
+  * when we have to do it the hard way.
+  */
+ typedef struct CopyStateData
+ {
+ 	/* low-level state data */
+ 	CopyDest	copy_dest;		/* type of copy source/destination */
+ 	FILE	   *copy_file;		/* used if copy_dest == COPY_FILE */
+ 	StringInfo	fe_msgbuf;		/* used for all dests during COPY TO, only for
+ 								 * dest == COPY_NEW_FE in COPY FROM */
+ 	bool		fe_eof;			/* true if detected end of copy data */
+ 	EolType		eol_type;		/* EOL type of input */
+ 	int			file_encoding;	/* file or remote side's character encoding */
+ 	bool		need_transcoding;		/* file encoding diff from server? */
+ 	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
+ 
+ 	/* parameters from the COPY command */
+ 	Relation	rel;			/* relation to copy to or from */
+ 	QueryDesc  *queryDesc;		/* executable query to copy from */
+ 	List	   *attnumlist;		/* integer list of attnums to copy */
+ 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
+ 	bool		binary;			/* binary format? */
+ 	bool		oids;			/* include OIDs? */
+ 	bool		csv_mode;		/* Comma Separated Value format? */
+ 	bool		header_line;	/* CSV header line? */
+ 	char	   *null_print;		/* NULL marker string (server encoding!) */
+ 	int			null_print_len; /* length of same */
+ 	char	   *null_print_client;		/* same converted to file encoding */
+ 	char	   *delim;			/* column delimiter (must be 1 byte) */
+ 	char	   *quote;			/* CSV quote char (must be 1 byte) */
+ 	char	   *escape;			/* CSV escape char (must be 1 byte) */
+ 	List	   *force_quote;	/* list of column names */
+ 	bool		force_quote_all;	/* FORCE QUOTE *? */
+ 	bool	   *force_quote_flags;		/* per-column CSV FQ flags */
+ 	List	   *force_notnull;	/* list of column names */
+ 	bool	   *force_notnull_flags;	/* per-column CSV FNN flags */
+ 
+ 	/* these are just for error messages, see CopyFromErrorCallback */
+ 	const char *cur_relname;	/* table name for error messages */
+ 	int			cur_lineno;		/* line number for error messages */
+ 	const char *cur_attname;	/* current att for error messages */
+ 	const char *cur_attval;		/* current att value for error messages */
+ 
+ 	/*
+ 	 * Working state for COPY TO/FROM
+ 	 */
+ 	MemoryContext copycontext;	/* per-copy execution context */
+ 
+ 	/*
+ 	 * Working state for COPY TO
+ 	 */
+ 	FmgrInfo   *out_functions;	/* lookup info for output functions */
+ 	MemoryContext rowcontext;	/* per-row evaluation context */
+ 
+ 	/*
+ 	 * Working state for COPY FROM
+ 	 */
+ 	AttrNumber	num_defaults;
+ 	bool		file_has_oids;
+ 	FmgrInfo	oid_in_function;
+ 	Oid			oid_typioparam;
+ 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
+ 	Oid		   *typioparams;	/* array of element types for in_functions */
+ 	int		   *defmap;			/* array of default att numbers */
+ 	ExprState **defexprs;		/* array of default att expressions */
+ 
+ 	/*
+ 	 * These variables are used to reduce overhead in textual COPY FROM.
+ 	 *
+ 	 * attribute_buf holds the separated, de-escaped text for each field of
+ 	 * the current line.  The CopyReadAttributes functions return arrays of
+ 	 * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
+ 	 * the buffer on each cycle.
+ 	 */
+ 	StringInfoData attribute_buf;
+ 
+ 	/* field raw data pointers found by COPY FROM */
+ 
+ 	int			max_fields;
+ 	char	  **raw_fields;
+ 
+ 	/*
+ 	 * Similarly, line_buf holds the whole input line being processed. The
+ 	 * input cycle is first to read the whole line into line_buf, convert it
+ 	 * to server encoding there, and then extract the individual attribute
+ 	 * fields into attribute_buf.  line_buf is preserved unmodified so that we
+ 	 * can display it in error messages if appropriate.
+ 	 */
+ 	StringInfoData line_buf;
+ 	bool		line_buf_converted;		/* converted to server encoding? */
+ 
+ 	/*
+ 	 * Finally, raw_buf holds raw data read from the data source (file or
+ 	 * client connection).	CopyReadLine parses this data sufficiently to
+ 	 * locate line boundaries, then transfers the data to line_buf and
+ 	 * converts it.  Note: we guarantee that there is a \0 at
+ 	 * raw_buf[raw_buf_len].
+ 	 */
+ #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
+ 	char	   *raw_buf;
+ 	int			raw_buf_index;	/* next byte to process */
+ 	int			raw_buf_len;	/* total # of bytes stored */
+ } CopyStateData;
  
  /* CopyStateData is private in commands/copy.c */
  typedef struct CopyStateData *CopyState;
*** a/src/include/commands/vacuum.h
--- b/src/include/commands/vacuum.h
***************
*** 167,171 **** extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
--- 167,179 ----
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
  			BufferAccessStrategy bstrategy);
+ extern VacAttrStats * examine_attribute(Relation onerel, int attnum, Node *index_expr,
+ 			MemoryContext anl_context);
+ extern double random_fract(void);
+ extern double init_selection_state(int n);
+ extern double get_next_S(double t, int n, double *stateptr);
+ extern int	compare_rows(const void *a, const void *b);
+ extern void	update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats);
+ extern Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  
  #endif   /* VACUUM_H */
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 12,19 ****
--- 12,21 ----
  #ifndef FDWAPI_H
  #define FDWAPI_H
  
+ #include "foreign/foreign.h"
  #include "nodes/execnodes.h"
  #include "nodes/relation.h"
+ #include "utils/rel.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
  struct ExplainState;
***************
*** 68,73 **** typedef void (*ReScanForeignScan_function) (ForeignScanState *node);
--- 70,78 ----
  
  typedef void (*EndForeignScan_function) (ForeignScanState *node);
  
+ typedef void (*AnalyzeForeignTable_function) (Relation relation,
+ 											  VacuumStmt *vacstmt,
+ 											  int elevel);
  
  /*
   * FdwRoutine is the struct returned by a foreign-data wrapper's handler
***************
*** 88,93 **** typedef struct FdwRoutine
--- 93,99 ----
  	IterateForeignScan_function IterateForeignScan;
  	ReScanForeignScan_function ReScanForeignScan;
  	EndForeignScan_function EndForeignScan;
+ 	AnalyzeForeignTable_function AnalyzeForeignTable;
  } FdwRoutine;