postgresql-analyze-v3.patch
text/plain
Filename: postgresql-analyze-v3.patch
Type: text/plain
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: context
Series: patch v3
| File | + | − |
|---|---|---|
| contrib/file_fdw/file_fdw.c | 410 | 0 |
| contrib/file_fdw/input/file_fdw.source | 5 | 0 |
| contrib/file_fdw/output/file_fdw.source | 15 | 0 |
| doc/src/sgml/fdwhandler.sgml | 13 | 0 |
| doc/src/sgml/maintenance.sgml | 4 | 0 |
| doc/src/sgml/ref/alter_foreign_table.sgml | 47 | 0 |
| doc/src/sgml/ref/analyze.sgml | 9 | 0 |
| src/backend/commands/analyze.c | 42 | 0 |
| src/backend/commands/copy.c | 0 | 139 |
| src/backend/commands/tablecmds.c | 28 | 0 |
| src/bin/psql/tab-complete.c | 16 | 0 |
| src/include/commands/copy.h | 146 | 0 |
| src/include/commands/vacuum.h | 8 | 0 |
| src/include/foreign/fdwapi.h | 6 | 0 |
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 15,30 ****
--- 15,42 ----
#include <sys/stat.h>
#include <unistd.h>
+ #include "access/htup.h"
#include "access/reloptions.h"
+ #include "access/transam.h"
#include "catalog/pg_foreign_table.h"
#include "commands/copy.h"
+ #include "commands/dbcommands.h"
#include "commands/defrem.h"
#include "commands/explain.h"
+ #include "commands/vacuum.h"
#include "foreign/fdwapi.h"
#include "foreign/foreign.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "optimizer/cost.h"
+ #include "optimizer/plancat.h"
+ #include "parser/parse_relation.h"
+ #include "pgstat.h"
+ #include "utils/attoptcache.h"
+ #include "utils/elog.h"
+ #include "utils/guc.h"
+ #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
***************
*** 101,106 **** static void fileBeginForeignScan(ForeignScanState *node, int eflags);
--- 113,119 ----
static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
static void fileReScanForeignScan(ForeignScanState *node);
static void fileEndForeignScan(ForeignScanState *node);
+ static void fileAnalyzeForeignTable(Relation onerel, VacuumStmt *vacstmt, int elevel);
/*
* Helper functions
***************
*** 112,118 **** static List *get_file_fdw_attribute_options(Oid relid);
static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
const char *filename,
Cost *startup_cost, Cost *total_cost);
!
/*
* Foreign-data wrapper handler function: return a struct with pointers
--- 125,132 ----
static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
const char *filename,
Cost *startup_cost, Cost *total_cost);
! static void file_fdw_do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, int elevel);
! static int file_fdw_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, BlockNumber *totalpages, double *totalrows);
/*
* Foreign-data wrapper handler function: return a struct with pointers
***************
*** 129,134 **** file_fdw_handler(PG_FUNCTION_ARGS)
--- 143,149 ----
fdwroutine->IterateForeignScan = fileIterateForeignScan;
fdwroutine->ReScanForeignScan = fileReScanForeignScan;
fdwroutine->EndForeignScan = fileEndForeignScan;
+ fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable;
PG_RETURN_POINTER(fdwroutine);
}
***************
*** 575,580 **** fileReScanForeignScan(ForeignScanState *node)
--- 590,605 ----
}
/*
+ * fileAnalyzeForeignTable
+ * Analyze table
+ */
+ static void
+ fileAnalyzeForeignTable(Relation onerel, VacuumStmt *vacstmt, int elevel)
+ {
+ file_fdw_do_analyze_rel(onerel, vacstmt, elevel);
+ }
+
+ /*
* Estimate costs of scanning a foreign table.
*/
static void
***************
*** 584,590 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
{
struct stat stat_buf;
BlockNumber pages;
! int tuple_width;
double ntuples;
double nrows;
Cost run_cost = 0;
--- 609,616 ----
{
struct stat stat_buf;
BlockNumber pages;
! BlockNumber relpages;
! double reltuples;
double ntuples;
double nrows;
Cost run_cost = 0;
***************
*** 604,619 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
if (pages < 1)
pages = 1;
! /*
! * Estimate the number of tuples in the file. We back into this estimate
! * using the planner's idea of the relation width; which is bogus if not
! * all columns are being read, not to mention that the text representation
! * of a row probably isn't the same size as its internal representation.
! * FIXME later.
! */
! tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
! ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
/*
* Now estimate the number of rows returned by the scan after applying the
--- 630,661 ----
if (pages < 1)
pages = 1;
! relpages = baserel->pages;
! reltuples = baserel->tuples;
!
! if (relpages > 0)
! {
! double density;
! density = reltuples / (double) relpages;
!
! ntuples = clamp_row_est(density * (double) pages);
! }
! else
! {
! int tuple_width;
!
! /*
! * Estimate the number of tuples in the file. We back into this estimate
! * using the planner's idea of the relation width; which is bogus if not
! * all columns are being read, not to mention that the text representation
! * of a row probably isn't the same size as its internal representation.
! * FIXME later.
! */
! tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
!
! ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
! }
/*
* Now estimate the number of rows returned by the scan after applying the
***************
*** 645,647 **** estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
--- 687,1046 ----
run_cost += cpu_per_tuple * ntuples;
*total_cost = *startup_cost + run_cost;
}
+
+ /*
+ * file_fdw_do_analyze_rel() -- analyze one foreign table
+ */
+ static void
+ file_fdw_do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, int elevel)
+ {
+ int i,
+ attr_cnt,
+ tcnt,
+ numrows = 0,
+ targrows;
+ double totalrows = 0;
+ BlockNumber totalpages = 0;
+ HeapTuple *rows;
+ VacAttrStats **vacattrstats;
+ MemoryContext anl_context;
+ MemoryContext caller_context;
+
+ /*
+ * Set up a working context so that we can easily free whatever junk gets
+ * created.
+ */
+ anl_context = AllocSetContextCreate(CurrentMemoryContext,
+ "Analyze",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ caller_context = MemoryContextSwitchTo(anl_context);
+
+ /*
+ * Determine which columns to analyze
+ *
+ * Note that system attributes are never analyzed.
+ */
+ if (vacstmt->va_cols != NIL)
+ {
+ ListCell *le;
+
+ vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
+ sizeof(VacAttrStats *));
+ tcnt = 0;
+ foreach(le, vacstmt->va_cols)
+ {
+ char *col = strVal(lfirst(le));
+
+ i = attnameAttNum(onerel, col, false);
+ if (i == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ col, RelationGetRelationName(onerel))));
+ vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ attr_cnt = tcnt;
+ }
+ else
+ {
+ attr_cnt = onerel->rd_att->natts;
+ vacattrstats = (VacAttrStats **) palloc(attr_cnt * sizeof(VacAttrStats *));
+ tcnt = 0;
+ for (i = 1; i <= attr_cnt; i++)
+ {
+ vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ attr_cnt = tcnt;
+ }
+
+ /*
+ * Determine how many rows we need to sample, using the worst case from
+ * all analyzable columns. We use a lower bound of 100 rows to avoid
+ * possible overflow in Vitter's algorithm.
+ */
+ targrows = 100;
+ for (i = 0; i < attr_cnt; i++)
+ {
+ if (targrows < vacattrstats[i]->minrows)
+ targrows = vacattrstats[i]->minrows;
+ }
+
+ /*
+ * Acquire the sample rows
+ */
+ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+ numrows = file_fdw_acquire_sample_rows(onerel, elevel, rows, targrows, &totalpages, &totalrows);
+
+ /*
+ * Compute the statistics. Temporary results during the calculations for
+ * each column are stored in a child context. The calc routines are
+ * responsible to make sure that whatever they store into the VacAttrStats
+ * structure is allocated in anl_context.
+ */
+ if (numrows > 0)
+ {
+ MemoryContext col_context, old_context;
+
+ col_context = AllocSetContextCreate(anl_context,
+ "Analyze Column",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ old_context = MemoryContextSwitchTo(col_context);
+
+ for (i = 0; i < attr_cnt; i++)
+ {
+ VacAttrStats *stats = vacattrstats[i];
+ AttributeOpts *aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
+
+ stats->rows = rows;
+ stats->tupDesc = onerel->rd_att;
+ (*stats->compute_stats) (stats,
+ std_fetch_func,
+ numrows,
+ totalrows);
+
+ /*
+ * If the appropriate flavor of the n_distinct option is
+ * specified, override with the corresponding value.
+ */
+ if (aopt != NULL)
+ {
+ float8 n_distinct = aopt->n_distinct;
+
+ if (n_distinct != 0.0)
+ stats->stadistinct = n_distinct;
+ }
+
+ MemoryContextResetAndDeleteChildren(col_context);
+ }
+
+ MemoryContextSwitchTo(old_context);
+ MemoryContextDelete(col_context);
+
+ /*
+ * Emit the completed stats rows into pg_statistic, replacing any
+ * previous statistics for the target columns. (If there are stats in
+ * pg_statistic for columns we didn't process, we leave them alone.)
+ */
+ update_attstats(onerel->rd_id, false, attr_cnt, vacattrstats);
+ }
+
+ /*
+ * Update pages/tuples stats in pg_class.
+ */
+ vac_update_relstats(onerel, totalpages, totalrows, 0, false, InvalidTransactionId);
+
+ /*
+ * Report ANALYZE to the stats collector, too.
+ */
+ pgstat_report_analyze(onerel, totalrows, 0);
+
+ /* Restore current context and release memory */
+ MemoryContextSwitchTo(caller_context);
+ MemoryContextDelete(anl_context);
+ anl_context = NULL;
+ }
+
+ /*
+ * file_fdw_acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Selected rows are returned in the caller-allocated array rows[], which
+ * must have at least targrows entries.
+ * The actual number of rows selected is returned as the function result.
+ * We also count the number of rows in the table, and return it into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+ static int
+ file_fdw_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, BlockNumber *totalpages, double *totalrows)
+ {
+ int numrows = 0;
+ double samplerows = 0; /* total # rows collected */
+ double rowstoskip = -1; /* -1 means not set yet */
+ double rstate;
+ HeapTuple tuple;
+ TupleDesc tupDesc;
+ TupleConstr *constr;
+ int natts;
+ int attrChk;
+ Datum *values;
+ bool *nulls;
+ bool found;
+ bool sample_it = false;
+ BlockNumber blknum;
+ OffsetNumber offnum;
+ char *filename;
+ struct stat stat_buf;
+ List *options;
+ CopyState cstate;
+ ErrorContextCallback errcontext;
+
+ Assert(onerel);
+ Assert(targrows > 0);
+
+ tupDesc = RelationGetDescr(onerel);
+ constr = tupDesc->constr;
+ natts = tupDesc->natts;
+ values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
+ nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
+
+ /* Fetch options of foreign table */
+ fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+
+ /*
+ * Get size of the file.
+ */
+ if (stat(filename, &stat_buf) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ filename)));
+
+ /*
+ * Convert size to pages for use in I/O cost estimate.
+ */
+ *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
+ if (*totalpages < 1)
+ *totalpages = 1;
+
+ /*
+ * Create CopyState from FDW options. We always acquire all columns, so
+ * as to match the expected ScanTupleSlot signature.
+ */
+ cstate = BeginCopyFrom(onerel, filename, NIL, options);
+
+ /* Prepare for sampling rows */
+ rstate = init_selection_state(targrows);
+
+ /* Set up callback to identify error line number. */
+ errcontext.callback = CopyFromErrorCallback;
+ errcontext.arg = (void *) cstate;
+ errcontext.previous = error_context_stack;
+ error_context_stack = &errcontext;
+
+ for (;;)
+ {
+ sample_it = true;
+
+ CHECK_FOR_INTERRUPTS();
+
+ found = NextCopyFrom(cstate, NULL, values, nulls, NULL);
+
+ if (!found)
+ break;
+
+ tuple = heap_form_tuple(tupDesc, values, nulls);
+
+ if (constr && constr->has_not_null)
+ {
+ for (attrChk = 1; attrChk <= natts; attrChk++)
+ {
+ if (onerel->rd_att->attrs[attrChk - 1]->attnotnull &&
+ !(cstate->force_notnull_flags[attrChk - 1]) &&
+ heap_attisnull(tuple, attrChk))
+ {
+ sample_it = false;
+ break;
+ }
+ }
+ }
+
+ if (!sample_it)
+ {
+ heap_freetuple(tuple);
+ continue;
+ }
+
+ /*
+ * The first targrows sample rows are simply copied into the
+ * reservoir. Then we start replacing tuples in the sample
+ * until we reach the end of the relation. This algorithm is
+ * from Jeff Vitter's paper (see full citation below). It
+ * works by repeatedly computing the number of tuples to skip
+ * before selecting a tuple, which replaces a randomly chosen
+ * element of the reservoir (current set of tuples). At all
+ * times the reservoir is a true random sample of the tuples
+ * we've passed over so far, so when we fall off the end of
+ * the relation we're done.
+ */
+ if (numrows < targrows)
+ {
+ blknum = (BlockNumber) samplerows / MaxOffsetNumber;
+ offnum = (OffsetNumber) samplerows % MaxOffsetNumber + 1;
+ ItemPointerSet(&tuple->t_self, blknum, offnum);
+ rows[numrows++] = heap_copytuple(tuple);
+ }
+ else
+ {
+ /*
+ * t in Vitter's paper is the number of records already
+ * processed. If we need to compute a new S value, we
+ * must use the not-yet-incremented value of samplerows as
+ * t.
+ */
+ if (rowstoskip < 0)
+ rowstoskip = get_next_S(samplerows, targrows, &rstate);
+
+ if (rowstoskip <= 0)
+ {
+ /*
+ * Found a suitable tuple, so save it, replacing one
+ * old tuple at random
+ */
+ int k = (int) (targrows * random_fract());
+
+ Assert(k >= 0 && k < targrows);
+ heap_freetuple(rows[k]);
+
+ blknum = (BlockNumber) samplerows / MaxOffsetNumber;
+ offnum = (OffsetNumber) samplerows % MaxOffsetNumber + 1;
+ ItemPointerSet(&tuple->t_self, blknum, offnum);
+ rows[k] = heap_copytuple(tuple);
+ }
+
+ rowstoskip -= 1;
+ }
+
+ samplerows += 1;
+ heap_freetuple(tuple);
+ }
+
+ /* Remove error callback. */
+ error_context_stack = errcontext.previous;
+
+ /*
+ * If we didn't find as many tuples as we wanted then we're done. No sort
+ * is needed, since they're already in order.
+ *
+ * Otherwise we need to sort the collected tuples by position
+ * (itempointer). It's not worth worrying about corner cases where the
+ * tuples are already sorted.
+ */
+ if (numrows == targrows)
+ qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
+
+ *totalrows = samplerows;
+
+ EndCopyFrom(cstate);
+
+ pfree(values);
+ pfree(nulls);
+
+ /*
+ * Emit some interesting relation info
+ */
+ ereport(elevel,
+ (errmsg("\"%s\": scanned, "
+ "%d rows in sample, %d total rows",
+ RelationGetRelationName(onerel), numrows, (int) *totalrows)));
+
+ return numrows;
+ }
*** a/contrib/file_fdw/input/file_fdw.source
--- b/contrib/file_fdw/input/file_fdw.source
***************
*** 111,116 **** EXECUTE st(100);
--- 111,121 ----
EXECUTE st(100);
DEALLOCATE st;
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
*** a/contrib/file_fdw/output/file_fdw.source
--- b/contrib/file_fdw/output/file_fdw.source
***************
*** 174,179 **** EXECUTE st(100);
--- 174,194 ----
(1 row)
DEALLOCATE st;
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+ relpages | reltuples
+ ----------+-----------
+ 1 | 3
+ (1 row)
+
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+ schemaname | tablename | attname | inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation
+ ------------+-----------+---------+-----------+-----------+-----------+------------+------------------+-------------------+-------------------------+-------------
+ public | agg_csv | a | f | 0 | 2 | -1 | | | {0,42,100} | -0.5
+ public | agg_csv | b | f | 0 | 4 | -1 | | | {0.09561,99.097,324.78} | 0.5
+ (2 rows)
+
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
tableoid | b
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 228,233 **** EndForeignScan (ForeignScanState *node);
--- 228,246 ----
</para>
<para>
+ <programlisting>
+ void
+ AnalyzeForeignTable (Relation onerel,
+ VacuumStmt *vacstmt,
+ int elevel);
+ </programlisting>
+
+ Collect statistics on a foreign table and store the results in the
+ pg_class and pg_statistics system catalogs.
+ This is called when <command>ANALYZE</> command is run.
+ </para>
+
+ <para>
The <structname>FdwRoutine</> and <structname>FdwPlan</> struct types
are declared in <filename>src/include/foreign/fdwapi.h</>, which see
for additional details.
*** a/doc/src/sgml/maintenance.sgml
--- b/doc/src/sgml/maintenance.sgml
***************
*** 279,284 ****
--- 279,288 ----
<command>ANALYZE</> strictly as a function of the number of rows
inserted or updated; it has no knowledge of whether that will lead
to meaningful statistical changes.
+ Note that the autovacuum daemon does not issue <command>ANALYZE</>
+ commands on foreign tables. It is recommended to run manually-managed
+ <command>ANALYZE</> commands as needed, which typically are executed
+ according to a schedule by cron or Task Scheduler scripts.
</para>
<para>
*** a/doc/src/sgml/ref/alter_foreign_table.sgml
--- b/doc/src/sgml/ref/alter_foreign_table.sgml
***************
*** 36,41 **** ALTER FOREIGN TABLE <replaceable class="PARAMETER">name</replaceable>
--- 36,44 ----
DROP [ COLUMN ] [ IF EXISTS ] <replaceable class="PARAMETER">column</replaceable> [ RESTRICT | CASCADE ]
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> [ SET DATA ] TYPE <replaceable class="PARAMETER">type</replaceable>
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> { SET | DROP } NOT NULL
+ ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS <replaceable class="PARAMETER">integer</replaceable>
+ ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET ( <replaceable class="PARAMETER">attribute_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] )
+ ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> RESET ( <replaceable class="PARAMETER">attribute_option</replaceable> [, ... ] )
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> OPTIONS ( [ ADD | SET | DROP ] <replaceable class="PARAMETER">option</replaceable> ['<replaceable class="PARAMETER">value</replaceable>'] [, ... ])
OWNER TO <replaceable class="PARAMETER">new_owner</replaceable>
OPTIONS ( [ ADD | SET | DROP ] <replaceable class="PARAMETER">option</replaceable> ['<replaceable class="PARAMETER">value</replaceable>'] [, ... ])
***************
*** 94,99 **** ALTER FOREIGN TABLE <replaceable class="PARAMETER">name</replaceable>
--- 97,146 ----
</varlistentry>
<varlistentry>
+ <term><literal>SET STATISTICS</literal></term>
+ <listitem>
+ <para>
+ This form
+ sets the per-column statistics-gathering target for subsequent
+ <xref linkend="sql-analyze"> operations.
+ The target can be set in the range 0 to 10000; alternatively, set it
+ to -1 to revert to using the system default statistics
+ target (<xref linkend="guc-default-statistics-target">).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>SET ( <replaceable class="PARAMETER">attribute_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] )</literal></term>
+ <term><literal>RESET ( <replaceable class="PARAMETER">attribute_option</replaceable> [, ... ] )</literal></term>
+ <listitem>
+ <para>
+ This form
+ sets or resets a per-attribute option. Currently, the only defined
+ per-attribute option is <literal>n_distinct</>, which overrides
+ the number-of-distinct-values estimates made by subsequent
+ <xref linkend="sql-analyze"> operations.
+ When set to a positive value, <command>ANALYZE</> will assume that
+ the column contains exactly the specified number of distinct nonnull
+ values.
+ When set to a negative value, which must be greater than or equal
+ to -1, <command>ANALYZE</> will assume that the number of distinct
+ nonnull values in the column is linear in the size of the foreign
+ table; the exact count is to be computed by multiplying the estimated
+ foreign table size by the absolute value of the given number.
+ For example,
+ a value of -1 implies that all values in the column are distinct,
+ while a value of -0.5 implies that each value appears twice on the
+ average.
+ This can be useful when the size of the foreign table changes over
+ time, since the multiplication by the number of rows in the foreign
+ table is not performed until query planning time. Specify a value
+ of 0 to revert to estimating the number of distinct values normally.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><literal>OWNER</literal></term>
<listitem>
<para>
*** a/doc/src/sgml/ref/analyze.sgml
--- b/doc/src/sgml/ref/analyze.sgml
***************
*** 39,47 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
<para>
With no parameter, <command>ANALYZE</command> examines every table in the
! current database. With a parameter, <command>ANALYZE</command> examines
! only that table. It is further possible to give a list of column names,
! in which case only the statistics for those columns are collected.
</para>
</refsect1>
--- 39,48 ----
<para>
With no parameter, <command>ANALYZE</command> examines every table in the
! current database except for foreign tables. With a parameter, <command>
! ANALYZE</command> examines only that table. It is further possible to
! give a list of column names, in which case only the statistics for those
! columns are collected.
</para>
</refsect1>
***************
*** 63,69 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
<listitem>
<para>
The name (possibly schema-qualified) of a specific table to
! analyze. Defaults to all tables in the current database.
</para>
</listitem>
</varlistentry>
--- 64,71 ----
<listitem>
<para>
The name (possibly schema-qualified) of a specific table to
! analyze. Defaults to all tables in the current database except
! for foreign tables.
</para>
</listitem>
</varlistentry>
***************
*** 137,143 **** ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
In rare situations, this non-determinism will cause the planner's
choices of query plans to change after <command>ANALYZE</command> is run.
To avoid this, raise the amount of statistics collected by
! <command>ANALYZE</command>, as described below.
</para>
<para>
--- 139,147 ----
In rare situations, this non-determinism will cause the planner's
choices of query plans to change after <command>ANALYZE</command> is run.
To avoid this, raise the amount of statistics collected by
! <command>ANALYZE</command>, as described below. Note that the time
! needed to analyze on foreign tables depends on the implementation of
! the foreign data wrapper via which such tables are attached.
</para>
<para>
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
***************
*** 23,28 ****
--- 23,29 ----
#include "access/xact.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
+ #include "catalog/pg_class.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
***************
*** 30,35 ****
--- 31,38 ----
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
#include "executor/executor.h"
+ #include "foreign/foreign.h"
+ #include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "parser/parse_oper.h"
***************
*** 94,113 **** static void compute_index_stats(Relation onerel, double totalrows,
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
MemoryContext col_context);
- static VacAttrStats *examine_attribute(Relation onerel, int attnum,
- Node *index_expr);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
- static double random_fract(void);
- static double init_selection_state(int n);
- static double get_next_S(double t, int n, double *stateptr);
- static int compare_rows(const void *a, const void *b);
static int acquire_inherited_sample_rows(Relation onerel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows);
- static void update_attstats(Oid relid, bool inh,
- int natts, VacAttrStats **vacattrstats);
- static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static bool std_typanalyze(VacAttrStats *stats);
--- 97,107 ----
***************
*** 184,193 **** analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
}
/*
! * Check that it's a plain table; we used to do this in get_rel_oids() but
! * seems safer to check after we've locked the relation.
*/
! if (onerel->rd_rel->relkind != RELKIND_RELATION)
{
/* No need for a WARNING if we already complained during VACUUM */
if (!(vacstmt->options & VACOPT_VACUUM))
--- 178,188 ----
}
/*
! * Check that it's a plain table or foreign table; we used to do this in
! * get_rel_oids() but seems safer to check after we've locked the relation.
*/
! if (!(onerel->rd_rel->relkind == RELKIND_RELATION ||
! onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
{
/* No need for a WARNING if we already complained during VACUUM */
if (!(vacstmt->options & VACOPT_VACUUM))
***************
*** 226,241 **** analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
MyProc->vacuumFlags |= PROC_IN_ANALYZE;
LWLockRelease(ProcArrayLock);
! /*
! * Do the normal non-recursive ANALYZE.
! */
! do_analyze_rel(onerel, vacstmt, false);
! /*
! * If there are child tables, do recursive ANALYZE.
! */
! if (onerel->rd_rel->relhassubclass)
! do_analyze_rel(onerel, vacstmt, true);
/*
* Close source relation now, but keep lock so that no one deletes it
--- 221,251 ----
MyProc->vacuumFlags |= PROC_IN_ANALYZE;
LWLockRelease(ProcArrayLock);
! if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! {
! FdwRoutine *fdwroutine;
! ereport(elevel,
! (errmsg("analyzing \"%s.%s\"",
! get_namespace_name(RelationGetNamespace(onerel)),
! RelationGetRelationName(onerel))));
!
! fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(onerel));
! fdwroutine->AnalyzeForeignTable(onerel, vacstmt, elevel);
! }
! else
! {
! /*
! * Do the normal non-recursive ANALYZE.
! */
! do_analyze_rel(onerel, vacstmt, false);
!
! /*
! * If there are child tables, do recursive ANALYZE.
! */
! if (onerel->rd_rel->relhassubclass)
! do_analyze_rel(onerel, vacstmt, true);
! }
/*
* Close source relation now, but keep lock so that no one deletes it
***************
*** 343,349 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
--- 353,359 ----
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
***************
*** 357,363 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
tcnt = 0;
for (i = 1; i <= attr_cnt; i++)
{
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
--- 367,373 ----
tcnt = 0;
for (i = 1; i <= attr_cnt; i++)
{
! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, anl_context);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
***************
*** 411,417 **** do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
thisdata->vacattrstats[tcnt] =
! examine_attribute(Irel[ind], i + 1, indexkey);
if (thisdata->vacattrstats[tcnt] != NULL)
tcnt++;
}
--- 421,427 ----
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
thisdata->vacattrstats[tcnt] =
! examine_attribute(Irel[ind], i + 1, indexkey, anl_context);
if (thisdata->vacattrstats[tcnt] != NULL)
tcnt++;
}
***************
*** 807,814 **** compute_index_stats(Relation onerel, double totalrows,
* If index_expr isn't NULL, then we're trying to analyze an expression index,
* and index_expr is the expression tree representing the column's data.
*/
! static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
--- 817,824 ----
* If index_expr isn't NULL, then we're trying to analyze an expression index,
* and index_expr is the expression tree representing the column's data.
*/
! VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr, MemoryContext anl_context)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
***************
*** 1254,1260 **** acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
}
/* Select a random value R uniformly distributed in (0 - 1) */
! static double
random_fract(void)
{
return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
--- 1264,1270 ----
}
/* Select a random value R uniformly distributed in (0 - 1) */
! double
random_fract(void)
{
return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
***************
*** 1274,1287 **** random_fract(void)
* determines the number of records to skip before the next record is
* processed.
*/
! static double
init_selection_state(int n)
{
/* Initial value of W (for use when Algorithm Z is first applied) */
return exp(-log(random_fract()) / n);
}
! static double
get_next_S(double t, int n, double *stateptr)
{
double S;
--- 1284,1297 ----
* determines the number of records to skip before the next record is
* processed.
*/
! double
init_selection_state(int n)
{
/* Initial value of W (for use when Algorithm Z is first applied) */
return exp(-log(random_fract()) / n);
}
! double
get_next_S(double t, int n, double *stateptr)
{
double S;
***************
*** 1366,1372 **** get_next_S(double t, int n, double *stateptr)
/*
* qsort comparator for sorting rows[] array
*/
! static int
compare_rows(const void *a, const void *b)
{
HeapTuple ha = *(const HeapTuple *) a;
--- 1376,1382 ----
/*
* qsort comparator for sorting rows[] array
*/
! int
compare_rows(const void *a, const void *b)
{
HeapTuple ha = *(const HeapTuple *) a;
***************
*** 1561,1567 **** acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* ANALYZE the same table concurrently. Presently, we lock that out
* by taking a self-exclusive lock on the relation in analyze_rel().
*/
! static void
update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
{
Relation sd;
--- 1571,1577 ----
* ANALYZE the same table concurrently. Presently, we lock that out
* by taking a self-exclusive lock on the relation in analyze_rel().
*/
! void
update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
{
Relation sd;
***************
*** 1698,1704 **** update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
* This exists to provide some insulation between compute_stats routines
* and the actual storage of the sample data.
*/
! static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
int attnum = stats->tupattnum;
--- 1708,1714 ----
* This exists to provide some insulation between compute_stats routines
* and the actual storage of the sample data.
*/
! Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
int attnum = stats->tupattnum;
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 42,192 ****
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
- #include "utils/rel.h"
#include "utils/snapmgr.h"
#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
#define OCTVALUE(c) ((c) - '0')
- /*
- * Represents the different source/dest cases we need to worry about at
- * the bottom level
- */
- typedef enum CopyDest
- {
- COPY_FILE, /* to/from file */
- COPY_OLD_FE, /* to/from frontend (2.0 protocol) */
- COPY_NEW_FE /* to/from frontend (3.0 protocol) */
- } CopyDest;
-
- /*
- * Represents the end-of-line terminator type of the input
- */
- typedef enum EolType
- {
- EOL_UNKNOWN,
- EOL_NL,
- EOL_CR,
- EOL_CRNL
- } EolType;
-
- /*
- * This struct contains all the state variables used throughout a COPY
- * operation. For simplicity, we use the same struct for all variants of COPY,
- * even though some fields are used in only some cases.
- *
- * Multi-byte encodings: all supported client-side encodings encode multi-byte
- * characters by having the first byte's high bit set. Subsequent bytes of the
- * character can have the high bit not set. When scanning data in such an
- * encoding to look for a match to a single-byte (ie ASCII) character, we must
- * use the full pg_encoding_mblen() machinery to skip over multibyte
- * characters, else we might find a false match to a trailing byte. In
- * supported server encodings, there is no possibility of a false match, and
- * it's faster to make useless comparisons to trailing bytes than it is to
- * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
- * when we have to do it the hard way.
- */
- typedef struct CopyStateData
- {
- /* low-level state data */
- CopyDest copy_dest; /* type of copy source/destination */
- FILE *copy_file; /* used if copy_dest == COPY_FILE */
- StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
- * dest == COPY_NEW_FE in COPY FROM */
- bool fe_eof; /* true if detected end of copy data */
- EolType eol_type; /* EOL type of input */
- int file_encoding; /* file or remote side's character encoding */
- bool need_transcoding; /* file encoding diff from server? */
- bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
-
- /* parameters from the COPY command */
- Relation rel; /* relation to copy to or from */
- QueryDesc *queryDesc; /* executable query to copy from */
- List *attnumlist; /* integer list of attnums to copy */
- char *filename; /* filename, or NULL for STDIN/STDOUT */
- bool binary; /* binary format? */
- bool oids; /* include OIDs? */
- bool csv_mode; /* Comma Separated Value format? */
- bool header_line; /* CSV header line? */
- char *null_print; /* NULL marker string (server encoding!) */
- int null_print_len; /* length of same */
- char *null_print_client; /* same converted to file encoding */
- char *delim; /* column delimiter (must be 1 byte) */
- char *quote; /* CSV quote char (must be 1 byte) */
- char *escape; /* CSV escape char (must be 1 byte) */
- List *force_quote; /* list of column names */
- bool force_quote_all; /* FORCE QUOTE *? */
- bool *force_quote_flags; /* per-column CSV FQ flags */
- List *force_notnull; /* list of column names */
- bool *force_notnull_flags; /* per-column CSV FNN flags */
-
- /* these are just for error messages, see CopyFromErrorCallback */
- const char *cur_relname; /* table name for error messages */
- int cur_lineno; /* line number for error messages */
- const char *cur_attname; /* current att for error messages */
- const char *cur_attval; /* current att value for error messages */
-
- /*
- * Working state for COPY TO/FROM
- */
- MemoryContext copycontext; /* per-copy execution context */
-
- /*
- * Working state for COPY TO
- */
- FmgrInfo *out_functions; /* lookup info for output functions */
- MemoryContext rowcontext; /* per-row evaluation context */
-
- /*
- * Working state for COPY FROM
- */
- AttrNumber num_defaults;
- bool file_has_oids;
- FmgrInfo oid_in_function;
- Oid oid_typioparam;
- FmgrInfo *in_functions; /* array of input functions for each attrs */
- Oid *typioparams; /* array of element types for in_functions */
- int *defmap; /* array of default att numbers */
- ExprState **defexprs; /* array of default att expressions */
-
- /*
- * These variables are used to reduce overhead in textual COPY FROM.
- *
- * attribute_buf holds the separated, de-escaped text for each field of
- * the current line. The CopyReadAttributes functions return arrays of
- * pointers into this buffer. We avoid palloc/pfree overhead by re-using
- * the buffer on each cycle.
- */
- StringInfoData attribute_buf;
-
- /* field raw data pointers found by COPY FROM */
-
- int max_fields;
- char **raw_fields;
-
- /*
- * Similarly, line_buf holds the whole input line being processed. The
- * input cycle is first to read the whole line into line_buf, convert it
- * to server encoding there, and then extract the individual attribute
- * fields into attribute_buf. line_buf is preserved unmodified so that we
- * can display it in error messages if appropriate.
- */
- StringInfoData line_buf;
- bool line_buf_converted; /* converted to server encoding? */
-
- /*
- * Finally, raw_buf holds raw data read from the data source (file or
- * client connection). CopyReadLine parses this data sufficiently to
- * locate line boundaries, then transfers the data to line_buf and
- * converts it. Note: we guarantee that there is a \0 at
- * raw_buf[raw_buf_len].
- */
- #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
- char *raw_buf;
- int raw_buf_index; /* next byte to process */
- int raw_buf_len; /* total # of bytes stored */
- } CopyStateData;
/* DestReceiver for COPY (SELECT) TO */
typedef struct
--- 42,53 ----
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 311,316 **** static void ATPrepSetStatistics(Relation rel, const char *colName,
--- 311,318 ----
Node *newValue, LOCKMODE lockmode);
static void ATExecSetStatistics(Relation rel, const char *colName,
Node *newValue, LOCKMODE lockmode);
+ static void ATPrepSetOptions(Relation rel, const char *colName,
+ Node *options, LOCKMODE lockmode);
static void ATExecSetOptions(Relation rel, const char *colName,
Node *options, bool isReset, LOCKMODE lockmode);
static void ATExecSetStorage(Relation rel, const char *colName,
***************
*** 2886,2892 **** ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
break;
case AT_SetOptions: /* ALTER COLUMN SET ( options ) */
case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */
! ATSimplePermissions(rel, ATT_TABLE | ATT_INDEX);
/* This command never recurses */
pass = AT_PASS_MISC;
break;
--- 2888,2895 ----
break;
case AT_SetOptions: /* ALTER COLUMN SET ( options ) */
case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */
! ATSimplePermissions(rel, ATT_TABLE | ATT_INDEX | ATT_FOREIGN_TABLE);
! ATPrepSetOptions(rel, cmd->name, cmd->def, lockmode);
/* This command never recurses */
pass = AT_PASS_MISC;
break;
***************
*** 4822,4831 **** ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
* allowSystemTableMods to be turned on.
*/
if (rel->rd_rel->relkind != RELKIND_RELATION &&
! rel->rd_rel->relkind != RELKIND_INDEX)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! errmsg("\"%s\" is not a table or index",
RelationGetRelationName(rel))));
/* Permissions checks */
--- 4825,4835 ----
* allowSystemTableMods to be turned on.
*/
if (rel->rd_rel->relkind != RELKIND_RELATION &&
! rel->rd_rel->relkind != RELKIND_INDEX &&
! rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! errmsg("\"%s\" is not a table, index, or foreign table",
RelationGetRelationName(rel))));
/* Permissions checks */
***************
*** 4893,4898 **** ATExecSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
--- 4897,4923 ----
heap_close(attrelation, RowExclusiveLock);
}
+
+ static void
+ ATPrepSetOptions(Relation rel, const char *colName, Node *options,
+ LOCKMODE lockmode)
+ {
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ ListCell *cell;
+
+ foreach(cell, (List *) options)
+ {
+ DefElem *def = (DefElem *) lfirst(cell);
+
+ if (pg_strncasecmp(def->defname, "n_distinct_inherited", strlen("n_distinct_inherited")) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot support option \"n_distinct_inherited\" for foreign tables")));
+ }
+ }
+ }
+
static void
ATExecSetOptions(Relation rel, const char *colName, Node *options,
bool isReset, LOCKMODE lockmode)
*** a/src/bin/psql/tab-complete.c
--- b/src/bin/psql/tab-complete.c
***************
*** 399,404 **** static const SchemaQuery Query_for_list_of_tsvf = {
--- 399,419 ----
NULL
};
+ static const SchemaQuery Query_for_list_of_tf = {
+ /* catname */
+ "pg_catalog.pg_class c",
+ /* selcondition */
+ "c.relkind IN ('r', 'f')",
+ /* viscondition */
+ "pg_catalog.pg_table_is_visible(c.oid)",
+ /* namespace */
+ "c.relnamespace",
+ /* result */
+ "pg_catalog.quote_ident(c.relname)",
+ /* qualresult */
+ NULL
+ };
+
static const SchemaQuery Query_for_list_of_views = {
/* catname */
"pg_catalog.pg_class c",
***************
*** 2755,2761 **** psql_completion(char *text, int start, int end)
/* ANALYZE */
/* If the previous word is ANALYZE, produce list of tables */
else if (pg_strcasecmp(prev_wd, "ANALYZE") == 0)
! COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL);
/* WHERE */
/* Simple case of the word before the where being the table name */
--- 2770,2776 ----
/* ANALYZE */
/* If the previous word is ANALYZE, produce list of tables */
else if (pg_strcasecmp(prev_wd, "ANALYZE") == 0)
! COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tf, NULL);
/* WHERE */
/* Simple case of the word before the where being the table name */
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
***************
*** 14,22 ****
--- 14,168 ----
#ifndef COPY_H
#define COPY_H
+ #include "access/attnum.h"
+ #include "executor/execdesc.h"
+ #include "fmgr.h"
+ #include "lib/stringinfo.h"
#include "nodes/execnodes.h"
#include "nodes/parsenodes.h"
+ #include "nodes/pg_list.h"
#include "tcop/dest.h"
+ #include "utils/palloc.h"
+ #include "utils/rel.h"
+
+ /*
+ * Represents the different source/dest cases we need to worry about at
+ * the bottom level
+ */
+ typedef enum CopyDest
+ {
+ COPY_FILE, /* to/from file */
+ COPY_OLD_FE, /* to/from frontend (2.0 protocol) */
+ COPY_NEW_FE /* to/from frontend (3.0 protocol) */
+ } CopyDest;
+
+ /*
+ * Represents the end-of-line terminator type of the input
+ */
+ typedef enum EolType
+ {
+ EOL_UNKNOWN,
+ EOL_NL,
+ EOL_CR,
+ EOL_CRNL
+ } EolType;
+
+ /*
+ * This struct contains all the state variables used throughout a COPY
+ * operation. For simplicity, we use the same struct for all variants of COPY,
+ * even though some fields are used in only some cases.
+ *
+ * Multi-byte encodings: all supported client-side encodings encode multi-byte
+ * characters by having the first byte's high bit set. Subsequent bytes of the
+ * character can have the high bit not set. When scanning data in such an
+ * encoding to look for a match to a single-byte (ie ASCII) character, we must
+ * use the full pg_encoding_mblen() machinery to skip over multibyte
+ * characters, else we might find a false match to a trailing byte. In
+ * supported server encodings, there is no possibility of a false match, and
+ * it's faster to make useless comparisons to trailing bytes than it is to
+ * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
+ * when we have to do it the hard way.
+ */
+ typedef struct CopyStateData
+ {
+ /* low-level state data */
+ CopyDest copy_dest; /* type of copy source/destination */
+ FILE *copy_file; /* used if copy_dest == COPY_FILE */
+ StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
+ * dest == COPY_NEW_FE in COPY FROM */
+ bool fe_eof; /* true if detected end of copy data */
+ EolType eol_type; /* EOL type of input */
+ int file_encoding; /* file or remote side's character encoding */
+ bool need_transcoding; /* file encoding diff from server? */
+ bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
+
+ /* parameters from the COPY command */
+ Relation rel; /* relation to copy to or from */
+ QueryDesc *queryDesc; /* executable query to copy from */
+ List *attnumlist; /* integer list of attnums to copy */
+ char *filename; /* filename, or NULL for STDIN/STDOUT */
+ bool binary; /* binary format? */
+ bool oids; /* include OIDs? */
+ bool csv_mode; /* Comma Separated Value format? */
+ bool header_line; /* CSV header line? */
+ char *null_print; /* NULL marker string (server encoding!) */
+ int null_print_len; /* length of same */
+ char *null_print_client; /* same converted to file encoding */
+ char *delim; /* column delimiter (must be 1 byte) */
+ char *quote; /* CSV quote char (must be 1 byte) */
+ char *escape; /* CSV escape char (must be 1 byte) */
+ List *force_quote; /* list of column names */
+ bool force_quote_all; /* FORCE QUOTE *? */
+ bool *force_quote_flags; /* per-column CSV FQ flags */
+ List *force_notnull; /* list of column names */
+ bool *force_notnull_flags; /* per-column CSV FNN flags */
+
+ /* these are just for error messages, see CopyFromErrorCallback */
+ const char *cur_relname; /* table name for error messages */
+ int cur_lineno; /* line number for error messages */
+ const char *cur_attname; /* current att for error messages */
+ const char *cur_attval; /* current att value for error messages */
+
+ /*
+ * Working state for COPY TO/FROM
+ */
+ MemoryContext copycontext; /* per-copy execution context */
+
+ /*
+ * Working state for COPY TO
+ */
+ FmgrInfo *out_functions; /* lookup info for output functions */
+ MemoryContext rowcontext; /* per-row evaluation context */
+
+ /*
+ * Working state for COPY FROM
+ */
+ AttrNumber num_defaults;
+ bool file_has_oids;
+ FmgrInfo oid_in_function;
+ Oid oid_typioparam;
+ FmgrInfo *in_functions; /* array of input functions for each attrs */
+ Oid *typioparams; /* array of element types for in_functions */
+ int *defmap; /* array of default att numbers */
+ ExprState **defexprs; /* array of default att expressions */
+
+ /*
+ * These variables are used to reduce overhead in textual COPY FROM.
+ *
+ * attribute_buf holds the separated, de-escaped text for each field of
+ * the current line. The CopyReadAttributes functions return arrays of
+ * pointers into this buffer. We avoid palloc/pfree overhead by re-using
+ * the buffer on each cycle.
+ */
+ StringInfoData attribute_buf;
+
+ /* field raw data pointers found by COPY FROM */
+
+ int max_fields;
+ char **raw_fields;
+
+ /*
+ * Similarly, line_buf holds the whole input line being processed. The
+ * input cycle is first to read the whole line into line_buf, convert it
+ * to server encoding there, and then extract the individual attribute
+ * fields into attribute_buf. line_buf is preserved unmodified so that we
+ * can display it in error messages if appropriate.
+ */
+ StringInfoData line_buf;
+ bool line_buf_converted; /* converted to server encoding? */
+
+ /*
+ * Finally, raw_buf holds raw data read from the data source (file or
+ * client connection). CopyReadLine parses this data sufficiently to
+ * locate line boundaries, then transfers the data to line_buf and
+ * converts it. Note: we guarantee that there is a \0 at
+ * raw_buf[raw_buf_len].
+ */
+ #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
+ char *raw_buf;
+ int raw_buf_index; /* next byte to process */
+ int raw_buf_len; /* total # of bytes stored */
+ } CopyStateData;
/* CopyStateData is private in commands/copy.c */
typedef struct CopyStateData *CopyState;
*** a/src/include/commands/vacuum.h
--- b/src/include/commands/vacuum.h
***************
*** 167,171 **** extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
--- 167,179 ----
/* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
+ extern VacAttrStats * examine_attribute(Relation onerel, int attnum, Node *index_expr,
+ MemoryContext anl_context);
+ extern double random_fract(void);
+ extern double init_selection_state(int n);
+ extern double get_next_S(double t, int n, double *stateptr);
+ extern int compare_rows(const void *a, const void *b);
+ extern void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats);
+ extern Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
#endif /* VACUUM_H */
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 12,19 ****
--- 12,21 ----
#ifndef FDWAPI_H
#define FDWAPI_H
+ #include "foreign/foreign.h"
#include "nodes/execnodes.h"
#include "nodes/relation.h"
+ #include "utils/rel.h"
/* To avoid including explain.h here, reference ExplainState thus: */
struct ExplainState;
***************
*** 68,73 **** typedef void (*ReScanForeignScan_function) (ForeignScanState *node);
--- 70,78 ----
typedef void (*EndForeignScan_function) (ForeignScanState *node);
+ typedef void (*AnalyzeForeignTable_function) (Relation relation,
+ VacuumStmt *vacstmt,
+ int elevel);
/*
* FdwRoutine is the struct returned by a foreign-data wrapper's handler
***************
*** 88,93 **** typedef struct FdwRoutine
--- 93,99 ----
IterateForeignScan_function IterateForeignScan;
ReScanForeignScan_function ReScanForeignScan;
EndForeignScan_function EndForeignScan;
+ AnalyzeForeignTable_function AnalyzeForeignTable;
} FdwRoutine;