join_pushdown.patch

text/plain

Filename: join_pushdown.patch
Type: text/plain
Part: 0
Message: Re: WIP: Join push-down for foreign tables

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: unified
File+
contrib/pgsql_fdw/deparse.c 0 0
contrib/pgsql_fdw/pgsql_fdw.c 0 0
contrib/pgsql_fdw/pgsql_fdw.h 0 0
doc/src/sgml/config.sgml 0 0
doc/src/sgml/fdwhandler.sgml 0 0
doc/src/sgml/ref/postgres-ref.sgml 0 0
src/backend/commands/explain.c 0 0
src/backend/executor/execUtils.c 0 0
src/backend/executor/nodeForeignscan.c 0 0
src/backend/foreign/foreign.c 0 0
src/backend/nodes/copyfuncs.c 0 0
src/backend/nodes/outfuncs.c 0 0
src/backend/optimizer/path/allpaths.c 0 0
src/backend/optimizer/path/costsize.c 0 0
src/backend/optimizer/path/joinpath.c 0 0
src/backend/optimizer/plan/createplan.c 0 0
src/backend/optimizer/README 0 0
src/backend/optimizer/util/pathnode.c 0 0
src/backend/optimizer/util/plancat.c 0 0
src/backend/optimizer/util/relnode.c 0 0
src/backend/tcop/postgres.c 0 0
src/backend/utils/misc/guc.c 0 0
src/backend/utils/misc/postgresql.conf.sample 0 0
src/include/executor/executor.h 0 0
src/include/foreign/fdwapi.h 0 0
src/include/nodes/execnodes.h 0 0
src/include/nodes/nodes.h 0 0
src/include/nodes/plannodes.h 0 0
src/include/nodes/relation.h 0 0
src/include/optimizer/cost.h 0 0
src/include/optimizer/pathnode.h 0 0
src/test/regress/expected/rangefuncs.out 0 0
diff --git a/contrib/pgsql_fdw/deparse.c b/contrib/pgsql_fdw/deparse.c
index b5f6c79..d24b629 100644
*** a/contrib/pgsql_fdw/deparse.c
--- b/contrib/pgsql_fdw/deparse.c
*************** is_proc_remotely_executable(Oid procid)
*** 362,364 ****
--- 362,538 ----
  	return true;
  }
  
+ /*
+  * Append WHERE clause elements to buf.
+  */
+ static void
+ append_where_clause(StringInfo buf, List *context, Path *path, bool *first)
+ {
+ 	List		   *restrictinfo;
+ 	ListCell	   *lc;
+ 	Path		   *outer_path = NULL;
+ 	Path		   *inner_path = NULL;
+ 
+ 	/* Determine that which restrictinfo is used. */
+ 	if (IsA(path, ForeignPath))
+ 	{
+ 		restrictinfo = path->parent->baserestrictinfo;
+ 	}
+ 	else if (IsA(path, ForeignJoinPath))
+ 	{
+ 		JoinPath	   *joinpath = (JoinPath *) path;
+ 		restrictinfo = joinpath->joinrestrictinfo;
+ 		outer_path = joinpath->outerjoinpath;
+ 		inner_path = joinpath->innerjoinpath;
+ 	}
+ 
+ 	/* Deparse restrict expressions. */
+ 	foreach(lc, restrictinfo)
+ 	{
+ 		RestrictInfo   *ri  = (RestrictInfo *) lfirst(lc);
+ 
+ 		if (!*first)
+ 			appendStringInfo(buf, " AND ");
+ 		appendStringInfo(buf, "%s",
+ 			deparse_expression((Node *) ri->clause, context, true, false));
+ 		*first = false;
+ 	}
+ 
+ 	/* Use children's restrictinfo recursively, if any. */
+ 	if (outer_path != NULL)
+ 		append_where_clause(buf, context, outer_path, first);
+ 	if (inner_path != NULL)
+ 		append_where_clause(buf, context, inner_path, first);
+ }
+ 
+ /*
+  * Store qualified relation name into result.
+  */
+ static void
+ get_qualified_relname(Oid relid, StringInfo result)
+ {
+ 	ForeignTable   *ft;				/* foreign table to be scanned */
+ 	const char *nspname = NULL;		/* plain namespace name */
+ 	const char *relname = NULL;		/* plain relation name */
+ 	const char *q_nspname;			/* quoted namespace name */
+ 	const char *q_relname;			/* quoted relation name */
+ 
+ 	ft = GetForeignTable(relid);
+ 	if (ft->options != NIL)
+ 	{
+ 		ListCell	   *lc;
+ 
+ 		foreach (lc, ft->options)
+ 		{
+ 			DefElem	   *opt = lfirst(lc);
+ 			if (strcmp(opt->defname, "nspname") == 0)
+ 				nspname = strVal(opt->arg);
+ 			else if (strcmp(opt->defname, "relname") == 0)
+ 				relname = strVal(opt->arg);
+ 		}
+ 	}
+ 	if (nspname == NULL)
+ 		nspname = get_namespace_name(get_rel_namespace(relid));
+ 	if (relname == NULL)
+ 		relname = get_rel_name(relid);
+ 	q_nspname = quote_identifier(nspname);
+ 	q_relname = quote_identifier(relname);
+ 	appendStringInfo(result, "%s.%s", q_nspname, q_relname);
+ }
+ 
+ /*
+  * Deparse join representation into SQL statement which suits for remote
+  * PostgreSQL server.  See also deparseSql().
+  */
+ char *
+ deparseJoinSql(Oid serverid,
+ 			   PlannerInfo *root,
+ 			   RelOptInfo *joinrel,
+ 			   JoinType jointype,
+ 			   SpecialJoinInfo *sjinfo,
+ 			   Path *outer_path,
+ 			   Path *inner_path,
+ 			   List *restrict_clauses,
+ 			   List *pathkeys)
+ {
+ 	int				i;
+ 	List		   *rtables = NIL;
+ 	List		   *context;
+ 	StringInfoData	sql;
+ 	bool			first;
+ 	ListCell	   *lc;
+ 	Bitmapset	   *bms;
+ 
+ 	/*
+ 	 * First of all, check that this join can be pushed down.
+ 	 *
+ 	 * There are some redundancy such as duplicated attribute loop, but this
+ 	 * is necessary to avoid memory leak.
+ 	 */
+ 	foreach(lc, joinrel->reltargetlist)
+ 	{
+ 		Var	   *var = (Var *) lfirst(lc);
+ 		if (var->varattno < 0)
+ 			return NULL;
+ 	}
+ 
+ 	/* Create context used for deparsing this join. */
+ 	for (i = 1; i < root->simple_rel_array_size; i++)
+ 	{
+ 		rtables = lappend(rtables, copyObject(root->simple_rte_array[i]));
+ 	}
+ 	context = deparse_context_for_rtelist(rtables);
+ 
+ 	/* Initialize buffer. */
+ 	initStringInfo(&sql);
+ 
+ 	/* deparse SELECT clause */
+ 	/* TODO: replace unused references with NULL */
+ 	appendStringInfo(&sql, "SELECT ");
+ 	first = true;
+ 	foreach(lc, joinrel->reltargetlist)
+ 	{
+ 		Var	   *var = (Var *) lfirst(lc);
+ 		appendStringInfo(&sql, "%s%s", first ? "" : ", ",
+ 			deparse_expression((Node *) var, context, true, false));
+ 		first = false;
+ 	}
+ 
+ 	/* deparse FROM clause */
+ 	appendStringInfo(&sql, "\nFROM ");
+ 	bms = bms_copy(joinrel->relids);
+ 	first = true;
+ 	while (!bms_is_empty(bms))
+ 	{
+ 		int relid = bms_first_member(bms);
+ 		RangeTblEntry	   *rte = root->simple_rte_array[relid];
+ 
+ 		if (!first)
+ 			appendStringInfo(&sql, ", ");
+ 		get_qualified_relname(rte->relid, &sql);
+ 		appendStringInfo(&sql, " %s", rte->eref->aliasname);
+ 		first = false;
+ 	}
+ 
+ 	/* deparse WHERE clause */
+ 	appendStringInfo(&sql, "\nWHERE ");
+ 	first = true;
+ 	foreach(lc, restrict_clauses)
+ 	{
+ 		RestrictInfo   *ri  = (RestrictInfo *) lfirst(lc);
+ 
+ 		if (!first)
+ 			appendStringInfo(&sql, " AND ");
+ 		appendStringInfo(&sql, "%s",
+ 			deparse_expression((Node *) ri->clause, context, true, false));
+ 		first = false;
+ 	}
+ 	if (outer_path != NULL)
+ 		append_where_clause(&sql, context, outer_path, &first);
+ 	if (inner_path != NULL)
+ 		append_where_clause(&sql, context, inner_path, &first);
+ 
+ 	/* return constructed SQL statement. */
+ 	elog(DEBUG1, "deparsed join SQL: [%s]", sql.data);
+ 	return sql.data;
+ }
diff --git a/contrib/pgsql_fdw/pgsql_fdw.c b/contrib/pgsql_fdw/pgsql_fdw.c
index 61d20ee..564f45d 100644
*** a/contrib/pgsql_fdw/pgsql_fdw.c
--- b/contrib/pgsql_fdw/pgsql_fdw.c
*************** static void pgsqlBeginForeignScan(Foreig
*** 109,114 ****
--- 109,123 ----
  static TupleTableSlot *pgsqlIterateForeignScan(ForeignScanState *node);
  static void pgsqlReScanForeignScan(ForeignScanState *node);
  static void pgsqlEndForeignScan(ForeignScanState *node);
+ static FdwPlan *pgsqlPlanForeignJoin(Oid serverid,
+ 									 PlannerInfo *root,
+ 									 RelOptInfo *joinrel,
+ 									 JoinType jointype,
+ 									 SpecialJoinInfo *sjinfo,
+ 									 Path *outer_path,
+ 									 Path *inner_path,
+ 									 List *restrict_clauses,
+ 									 List *pathkeys);
  
  /*
   * Helper functions
*************** pgsql_fdw_handler(PG_FUNCTION_ARGS)
*** 141,188 ****
  	fdwroutine->ReScanForeignScan = pgsqlReScanForeignScan;
  	fdwroutine->EndForeignScan = pgsqlEndForeignScan;
  
  	PG_RETURN_POINTER(fdwroutine);
  }
  
  /*
!  * pgsqlPlanForeignScan
!  *		Create a FdwPlan for a scan on the foreign table
   */
! static FdwPlan *
! pgsqlPlanForeignScan(Oid foreigntableid,
! 					PlannerInfo *root,
! 					RelOptInfo *baserel)
  {
  	char			name[128];	/* must be larger than format + 10 */
  	StringInfoData	cursor;
  	const char	   *fetch_count_str;
  	int				fetch_count = DEFAULT_FETCH_COUNT;
- 	char		   *sql;
- 	FdwPlan		   *fdwplan;
  	List		   *fdw_private = NIL;
- 	ForeignTable   *table;
- 	ForeignServer  *server;
- 
- 	/* Construct FdwPlan with cost estimates */
- 	fdwplan = makeNode(FdwPlan);
- 	sql = deparseSql(foreigntableid, root, baserel);
- 	table = GetForeignTable(foreigntableid);
- 	server = GetForeignServer(table->serverid);
- 	estimate_costs(root, baserel, sql, server->serverid,
- 				   &fdwplan->startup_cost, &fdwplan->total_cost);
  
  	/*
  	 * Store plain SELECT statement in private area of FdwPlan.  This will be
  	 * used for executing remote query and explaining scan.
  	 */
! 	fdw_private = list_make1(makeString(sql));
  
  	/* Use specified fetch_count instead of default value, if any. */
! 	fetch_count_str = GetFdwOptionValue(InvalidOid, InvalidOid, foreigntableid,
  										InvalidAttrNumber, "fetch_count");
  	if (fetch_count_str != NULL)
  		fetch_count = strtol(fetch_count_str, NULL, 10);
! 	elog(DEBUG1, "relid=%u fetch_count=%d", foreigntableid, fetch_count);
  
  	/*
  	 * We store some more information in FdwPlan to pass them beyond the
--- 150,187 ----
  	fdwroutine->ReScanForeignScan = pgsqlReScanForeignScan;
  	fdwroutine->EndForeignScan = pgsqlEndForeignScan;
  
+ 	/* Optional handlers. */
+ 	fdwroutine->PlanForeignJoin = pgsqlPlanForeignJoin;
+ 
  	PG_RETURN_POINTER(fdwroutine);
  }
  
  /*
!  * Make list of private information which are specific to pgsql_fdw.
!  * Actual contents are:
!  *
   */
! static List *
! make_fdw_private(const char *sql, Oid serverid, Oid relid)
  {
  	char			name[128];	/* must be larger than format + 10 */
  	StringInfoData	cursor;
  	const char	   *fetch_count_str;
  	int				fetch_count = DEFAULT_FETCH_COUNT;
  	List		   *fdw_private = NIL;
  
  	/*
  	 * Store plain SELECT statement in private area of FdwPlan.  This will be
  	 * used for executing remote query and explaining scan.
  	 */
! 	fdw_private = list_make1(makeString(pstrdup(sql)));
  
  	/* Use specified fetch_count instead of default value, if any. */
! 	fetch_count_str = GetFdwOptionValue(InvalidOid, serverid, relid,
  										InvalidAttrNumber, "fetch_count");
  	if (fetch_count_str != NULL)
  		fetch_count = strtol(fetch_count_str, NULL, 10);
! 	elog(DEBUG1, "relid=%u fetch_count=%d", relid, fetch_count);
  
  	/*
  	 * We store some more information in FdwPlan to pass them beyond the
*************** pgsqlPlanForeignScan(Oid foreigntableid,
*** 224,231 ****
  	appendStringInfo(&cursor, "CLOSE %s", name);
  	fdw_private = lappend(fdw_private, makeString(cursor.data));
  
  	/* Store FDW private information into FdwPlan */
! 	fdwplan->fdw_private = fdw_private;
  
  	return fdwplan;
  }
--- 223,256 ----
  	appendStringInfo(&cursor, "CLOSE %s", name);
  	fdw_private = lappend(fdw_private, makeString(cursor.data));
  
+ 	return fdw_private;
+ }
+ 
+ /*
+  * pgsqlPlanForeignScan
+  *		Create a FdwPlan for a scan on the foreign table
+  */
+ static FdwPlan *
+ pgsqlPlanForeignScan(Oid foreigntableid,
+ 					PlannerInfo *root,
+ 					RelOptInfo *baserel)
+ {
+ 	char		   *sql;
+ 	FdwPlan		   *fdwplan;
+ 	ForeignTable   *table;
+ 	ForeignServer  *server;
+ 
+ 	/* Construct FdwPlan with cost estimates */
+ 	fdwplan = makeNode(FdwPlan);
+ 	sql = deparseSql(foreigntableid, root, baserel);
+ 	table = GetForeignTable(foreigntableid);
+ 	server = GetForeignServer(table->serverid);
+ 	estimate_costs(root, baserel, sql, server->serverid,
+ 				   &fdwplan->startup_cost, &fdwplan->total_cost);
+ 
  	/* Store FDW private information into FdwPlan */
! 	fdwplan->fdw_private = make_fdw_private(sql, table->serverid,
! 											foreigntableid);
  
  	return fdwplan;
  }
*************** pgsqlBeginForeignScan(ForeignScanState *
*** 254,261 ****
  {
  	PgsqlFdwExecutionState *festate;
  	PGconn		   *conn;
- 	Oid				relid;
- 	ForeignTable   *table;
  	ForeignServer  *server;
  	UserMapping	   *user;
  	TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
--- 279,284 ----
*************** pgsqlBeginForeignScan(ForeignScanState *
*** 276,284 ****
  	 * Get connection to the foreign server.  Connection manager would
  	 * establish new connection if necessary.
  	 */
! 	relid = RelationGetRelid(node->ss.ss_currentRelation);
! 	table = GetForeignTable(relid);
! 	server = GetForeignServer(table->serverid);
  	user = GetUserMapping(GetOuterUserId(), server->serverid);
  	conn = GetConnection(server, user);
  	festate->conn = conn;
--- 299,305 ----
  	 * Get connection to the foreign server.  Connection manager would
  	 * establish new connection if necessary.
  	 */
! 	server = GetForeignServer(node->serverid);
  	user = GetUserMapping(GetOuterUserId(), server->serverid);
  	conn = GetConnection(server, user);
  	festate->conn = conn;
*************** pgsqlEndForeignScan(ForeignScanState *no
*** 501,506 ****
--- 522,577 ----
  }
  
  /*
+  * pgsqlPlanForeignJoin
+  *		Create a FdwPlan for a query which contains of join of foreign tables
+  */
+ static FdwPlan *
+ pgsqlPlanForeignJoin(Oid serverid,
+ 					 PlannerInfo *root,
+ 					 RelOptInfo *joinrel,
+ 					 JoinType jointype,
+ 					 SpecialJoinInfo *sjinfo,
+ 					 Path *outer_path,
+ 					 Path *inner_path,
+ 					 List *restrict_clauses,
+ 					 List *pathkeys)
+ {
+ 	FdwPlan	   *fdwplan = NULL;
+ 	char	   *sql;
+ 
+ 	Assert(joinrel->serverid != InvalidOid);
+ 	Assert(IsA(outer_path, ForeignPath) || IsA(outer_path, ForeignJoinPath));
+ 	Assert(IsA(inner_path, ForeignPath) || IsA(inner_path, ForeignJoinPath));
+ 
+ 	/*
+ 	 * In this first version, consider only inner join, because OUTER JOIN
+ 	 * might populate more result rows than summary of result of children.
+ 	 */
+ 	if (jointype != JOIN_INNER)
+ 		return NULL;
+ 
+ 	/*
+ 	 * Deparse join represention into SQL statement which retrieves whole
+ 	 * result of the join.  NULL means that pgsql_fdw can't push this join down
+ 	 * to remote side.
+ 	 */
+ 	sql = deparseJoinSql(serverid, root, joinrel, jointype, sjinfo,
+ 						 outer_path, inner_path, restrict_clauses, pathkeys);
+ 	if (sql == NULL)
+ 		return NULL;
+ 
+ 	fdwplan = makeNode(FdwPlan);
+ 	fdwplan->startup_cost = 0.0;
+ 	fdwplan->total_cost = 0.0;
+ 	/* TODO estimate cost by executing EXPLAIN on remote side. */
+ 	estimate_costs(root, joinrel, sql, serverid, &fdwplan->startup_cost,
+ 				   &fdwplan->total_cost);
+ 	fdwplan->fdw_private = make_fdw_private(sql, serverid, InvalidOid);
+ 
+ 	return fdwplan;
+ }
+ 
+ /*
   * Estimate costs of scanning a foreign table.
   */
  static void
*************** estimate_costs(PlannerInfo *root, RelOpt
*** 526,535 ****
--- 597,609 ----
  	 * tend to choose custom plan.
  	 *
  	 * See comments in plancache.c for details of custom plan.
+ 	 *
+ 	 * TODO check this in deparse*Sql(), and receive a flag.
  	 */
  	foreach(lc, baserel->baserestrictinfo)
  	{
  		RestrictInfo	   *rs = (RestrictInfo *) lfirst(lc);
+ 
  		if (contain_ext_param((Node *) rs->clause))
  		{
  			*startup_cost = CONNECTION_COSTS;
diff --git a/contrib/pgsql_fdw/pgsql_fdw.h b/contrib/pgsql_fdw/pgsql_fdw.h
index fb49ffb..e0c3ee2 100644
*** a/contrib/pgsql_fdw/pgsql_fdw.h
--- b/contrib/pgsql_fdw/pgsql_fdw.h
*************** int ExtractConnectionOptions(List *defel
*** 24,28 ****
--- 24,37 ----
  
  /* in deparse.c */
  char *deparseSql(Oid relid, PlannerInfo *root, RelOptInfo *baserel);
+ char *deparseJoinSql(Oid serverid,
+ 					 PlannerInfo *root,
+ 					 RelOptInfo *joinrel,
+ 					 JoinType jointype,
+ 					 SpecialJoinInfo *sjinfo,
+ 					 Path *outer_path,
+ 					 Path *inner_path,
+ 					 List *restrict_clauses,
+ 					 List *pathkeys);
  
  #endif /* PGSQL_FDW_H */
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d1e628f..78c5f39 100644
*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
*************** SET ENABLE_SEQSCAN TO OFF;
*** 2362,2367 ****
--- 2362,2380 ----
        </listitem>
       </varlistentry>
  
+      <varlistentry id="guc-enable-foreignjoin" xreflabel="enable_foreignjoin">
+       <term><varname>enable_foreignjoin</varname> (<type>boolean</type>)</term>
+       <indexterm>
+        <primary><varname>enable_foreignjoin</> configuration parameter</primary>
+       </indexterm>
+       <listitem>
+        <para>
+         Enables or disables the query planner's use of foreign-join plan
+         types. The default is <literal>on</>.
+        </para>
+       </listitem>
+      </varlistentry>
+ 
       <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
        <term><varname>enable_hashagg</varname> (<type>boolean</type>)</term>
        <indexterm>
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index d809cac..7e3c5bd 100644
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
*************** EndForeignScan (ForeignScanState *node);
*** 228,233 ****
--- 228,273 ----
      </para>
  
      <para>
+      Callback functions above are required for every foreign data wrappers.
+      In contrast, callback functions below are optional, so you can set each of
+      them to NULL to tell the planner and/or executor that the FDW doesn't
+      support the feature.  Actually, in such case, you don't have to set NULL
+      explicitly because <function>makeNode</function> has already initialized
+      them to NULL.
+     </para>
+ 
+     <para>
+ <programlisting>
+ FdwPlan *
+ PlanForeignJoin (Oid serverid,
+                  PlannerInfo *root,
+                  RelOptInfo *joinrel,
+                  JoinType jointype,
+                  SpecialJoinInfo *sjinfo,
+                  Path *outer_path,
+                  Path *inner_path,
+                  List *restrict_clauses,
+                  List *pathkeys);
+ </programlisting>
+ 
+      Plan a join between two foreign scans performed on same server.  In this
+      context, a foreign scans can be a simple foreign table reference planned
+      by <function>PlanForeignScan</function>, or a result of a foreign join
+      planned by <function>PlanForeignJoin</function>.
+      Arguments <literal>outer_path</literal> and <literal>inner_path</literal>
+      provide details of each child nodes, and other arguments provide details
+      of the join itself.
+     </para>
+ 
+     <para>
+      This function is called during planning to determine whether foreign join
+      can be pushed down.  FDW can use private area of
+      <structname>FdwPlan</structname>, similarly to
+      <function>PlanForeignScan</function>, to store FDW-specific information
+      which are needed to get result of the join.
+     </para>
+ 
+     <para>
       The <structname>FdwRoutine</> and <structname>FdwPlan</> struct types
       are declared in <filename>src/include/foreign/fdwapi.h</>, which see
       for additional details.
*************** GetForeignTable(Oid relid);
*** 298,303 ****
--- 338,362 ----
  
      <para>
  <programlisting>
+ char *
+ GetFdwOptionValue(Oid relid, AttrNumber attnum, const char *optname);
+ </programlisting>
+ 
+      This function returns a copy (created in current memory context) of the
+      value of the given option for the given object (relation or its column).
+      If attnum is InvalidAttrNumber, pg_attribute is ignored.
+      If specified option is set in multiple object level, the one in the
+      finest-grained object is used; e.g. priority is given to user mapping 
+      over than a foreign server for the mapping or foreign-data wrapper for the
+      server.
+      This function would be useful when you know which option is needed but you
+      don't know which object(s) have it.
+      If you already know the source object, it would be more efficient to use
+      object retrieval functions.
+     </para>
+ 
+     <para>
+ <programlisting>
  List *
  GetForeignTableColumnOptions(Oid relid, AttrNumber attnum);
  </programlisting>
*************** GetForeignServerByName(const char *name,
*** 334,388 ****
      </para>
  
      <para>
- <programlisting>
- char *
- GetFdwOptionValue(Oid fdwid, Oid serverid, Oid relid, AttrNumber attnum, const char *optname);
- </programlisting>
- 
-      This function returns a copied string (created in current memory context)
-      of the value of a FDW option with given name which is set on a object with
-      given oid and attribute number.  This function ignores catalogs if invalid
-      identifir is given for it.
- 
-      <itemizedlist>
-       <listitem>
-        <para>
-         If attnum is <literal>InvalidAttrNumber</literal> or relid is
-         <literal>Invalidoid</literal>, <structname>pg_attribute</structname> is
-         ignored.
-        </para>
-       </listitem>
-       <listitem>
-        <para>
-         If relid is <literal>InvalidOid</literal>,
-         <structname>pg_foreign_table</structname> is ignored.
-        </para>
-       </listitem>
-       <listitem>
-        <para>
-         If both serverid and relid are <literal>InvalidOid</literal>,
-         <structname>pg_foreign_server</structname> is ignored.
-        </para>
-       </listitem>
-       <listitem>
-        <para>
-         If all of fdwid, serverid and relid are <literal>InvalidOid</literal>,
-         <structname>pg_foreign_data_wrapper</structname> is ignored.
-        </para>
-       </listitem>
-      </itemizedlist>
-     </para>
- 
-     <para>
-      If the option with given name is set in multiple object level, the one in
-      the finest-grained object is used; e.g. priority is given to user mappings 
-      over than foreign servers.
-      This function would be useful when you know which option is needed but you
-      don't know where it is set.  If you already know the source object, it
-      would be more efficient to use object retrieval functions.
-     </para>
- 
-     <para>
       To use any of these functions, you need to include
       <filename>foreign/foreign.h</filename> in your source file.
      </para>
--- 393,398 ----
diff --git a/doc/src/sgml/ref/postgres-ref.sgml b/doc/src/sgml/ref/postgres-ref.sgml
index 9869a1f..d785c2f 100644
*** a/doc/src/sgml/ref/postgres-ref.sgml
--- b/doc/src/sgml/ref/postgres-ref.sgml
*************** PostgreSQL documentation
*** 376,382 ****
  
      <variablelist>
       <varlistentry>
!       <term><option>-f</option> <literal>{ s | i | o | b | t | n | m | h }</literal></term>
        <listitem>
         <para>
          Forbids the use of particular scan and join methods:
--- 376,382 ----
  
      <variablelist>
       <varlistentry>
!       <term><option>-f</option> <literal>{ s | i | o | b | t | n | m | h | f }</literal></term>
        <listitem>
         <para>
          Forbids the use of particular scan and join methods:
*************** PostgreSQL documentation
*** 385,392 ****
          <literal>o</literal>, <literal>b</literal> and <literal>t</literal>
          disable index-only scans, bitmap index scans, and TID scans
          respectively, while
!         <literal>n</literal>, <literal>m</literal>, and <literal>h</literal>
!         disable nested-loop, merge and hash joins respectively.
         </para>
  
         <para>
--- 385,393 ----
          <literal>o</literal>, <literal>b</literal> and <literal>t</literal>
          disable index-only scans, bitmap index scans, and TID scans
          respectively, while
!         <literal>n</literal>, <literal>m</literal>, <literal>h</literal>, and
!         <literal>f</literal> disable nested-loop, merge joins, hash joins, and
!         foreign joins respectively.
         </para>
  
         <para>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index e38de5c..86e0203 100644
*** a/src/backend/commands/explain.c
--- b/src/backend/commands/explain.c
*************** static const char *explain_get_index_nam
*** 82,87 ****
--- 82,88 ----
  static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
  						ExplainState *es);
  static void ExplainScanTarget(Scan *plan, ExplainState *es);
+ static void ExplainForeignScanTarget(Scan *plan, ExplainState *es);
  static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es);
  static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es);
  static void ExplainMemberNodes(List *plans, PlanState **planstates,
*************** ExplainNode(PlanState *planstate, List *
*** 803,811 ****
  		case T_ValuesScan:
  		case T_CteScan:
  		case T_WorkTableScan:
- 		case T_ForeignScan:
  			ExplainScanTarget((Scan *) plan, es);
  			break;
  		case T_IndexScan:
  			{
  				IndexScan  *indexscan = (IndexScan *) plan;
--- 804,814 ----
  		case T_ValuesScan:
  		case T_CteScan:
  		case T_WorkTableScan:
  			ExplainScanTarget((Scan *) plan, es);
  			break;
+ 		case T_ForeignScan:
+ 			ExplainForeignScanTarget((Scan *) plan, es);
+ 			break;
  		case T_IndexScan:
  			{
  				IndexScan  *indexscan = (IndexScan *) plan;
*************** ExplainScanTarget(Scan *plan, ExplainSta
*** 1674,1679 ****
--- 1677,1702 ----
  }
  
  /*
+  * Show the target of a ForeignScan node
+  */
+ static void
+ ExplainForeignScanTarget(Scan *plan, ExplainState *es)
+ {
+ 	Assert(IsA(plan, ForeignScan));
+ 
+ 	/*
+ 	 * If scan target is an foreign table, show in normal scan format,
+ 	 * otherwise, show in specific format.
+ 	 */
+ 	if (plan->scanrelid > 0)
+ 		ExplainTargetRel((Plan *) plan, plan->scanrelid, es);
+ 	else
+ 	{
+ 		appendStringInfo(es->str, " on multiple foreign tables");
+ 	}
+ }
+ 
+ /*
   * Show the target of a ModifyTable node
   */
  static void
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 65591e2..e7adb74 100644
*** a/src/backend/executor/execUtils.c
--- b/src/backend/executor/execUtils.c
*************** ExecAssignScanTypeFromOuterPlan(ScanStat
*** 760,765 ****
--- 760,790 ----
  	ExecAssignScanType(scanstate, tupDesc);
  }
  
+ /* ----------------
+  *		ExecAssignScanTypeFromTL
+  * ----------------
+  */
+ void
+ ExecAssignScanTypeFromTL(ScanState *scanstate)
+ {
+ 	bool			hasoid;
+ 	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;
+ 	TupleDesc		tupDesc;
+ 
+ 	if (ExecContextForcesOids(&scanstate->ps, &hasoid))
+ 	{
+ 		/* context forces OID choice; hasoid is now set correctly */
+ 	}
+ 	else
+ 	{
+ 		/* given free choice, don't leave space for OIDs in result tuples */
+ 		hasoid = false;
+ 	}
+ 
+ 	tupDesc = ExecTypeFromTL(scanstate->ps.plan->targetlist, hasoid);
+ 	ExecSetSlotDescriptor(slot, tupDesc);
+ }
+ 
  
  /* ----------------------------------------------------------------
   *				  Scan node support
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index 841ae69..4bf6d6c 100644
*** a/src/backend/executor/nodeForeignscan.c
--- b/src/backend/executor/nodeForeignscan.c
***************
*** 25,30 ****
--- 25,31 ----
  #include "executor/executor.h"
  #include "executor/nodeForeignscan.h"
  #include "foreign/fdwapi.h"
+ #include "foreign/foreign.h"
  #include "utils/rel.h"
  
  static TupleTableSlot *ForeignNext(ForeignScanState *node);
*************** ExecForeignScan(ForeignScanState *node)
*** 101,109 ****
  ForeignScanState *
  ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
  {
! 	ForeignScanState *scanstate;
! 	Relation	currentRelation;
! 	FdwRoutine *fdwroutine;
  
  	/* check for unsupported flags */
  	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
--- 102,112 ----
  ForeignScanState *
  ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
  {
! 	ForeignScanState   *scanstate;
! 	Relation			currentRelation;
! 	ForeignServer	   *server;
! 	ForeignDataWrapper *wrapper;
! 	FdwRoutine		   *fdwroutine;
  
  	/* check for unsupported flags */
  	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
*************** ExecInitForeignScan(ForeignScan *node, E
*** 140,166 ****
  	ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
  	ExecInitScanTupleSlot(estate, &scanstate->ss);
  
! 	/*
! 	 * open the base relation and acquire appropriate lock on it.
! 	 */
! 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
! 	scanstate->ss.ss_currentRelation = currentRelation;
  
! 	/*
! 	 * get the scan type from the relation descriptor.
! 	 */
! 	ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
  
! 	/*
! 	 * Initialize result tuple type and projection info.
! 	 */
! 	ExecAssignResultTypeFromTL(&scanstate->ss.ps);
! 	ExecAssignScanProjectionInfo(&scanstate->ss);
  
  	/*
  	 * Acquire function pointers from the FDW's handler, and init fdw_state.
  	 */
! 	fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(currentRelation));
  	scanstate->fdwroutine = fdwroutine;
  	scanstate->fdw_state = NULL;
  
--- 143,193 ----
  	ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
  	ExecInitScanTupleSlot(estate, &scanstate->ss);
  
! 	if (node->scan.scanrelid != InvalidOid)
! 	{
! 		/*
! 		 * open the base relation and acquire appropriate lock on it.
! 		 */
! 		currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
! 		scanstate->ss.ss_currentRelation = currentRelation;
  
! 		/*
! 		 * get the scan type from the relation descriptor.
! 		 */
! 		ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
  
! 		/*
! 		 * Initialize result tuple type and projection info.
! 		 */
! 		ExecAssignResultTypeFromTL(&scanstate->ss.ps);
! 		ExecAssignScanProjectionInfo(&scanstate->ss);
! 	}
! 	else
! 	{
! 		/* TODO: open related relations and acquire appropriate lock on them. */
! 		scanstate->ss.ss_currentRelation = NULL;
! 
! 		/*
! 		 * get the scan type from the target list.
! 		 */
! 		ExecAssignScanTypeFromTL(&scanstate->ss);
! 
! 		/*
! 		 * Initialize result tuple type and projection info.
! 		 */
! 		ExecAssignResultTypeFromTL(&scanstate->ss.ps);
! 		ExecAssignProjectionInfo(&scanstate->ss.ps, NULL);
! 	}
! 
! 	/* We know which server should be use. */
! 	scanstate->serverid = node->serverid;
  
  	/*
  	 * Acquire function pointers from the FDW's handler, and init fdw_state.
  	 */
! 	server = GetForeignServer(node->serverid);
! 	wrapper = GetForeignDataWrapper(server->fdwid);
! 	fdwroutine = GetFdwRoutine(wrapper->fdwhandler);
  	scanstate->fdwroutine = fdwroutine;
  	scanstate->fdw_state = NULL;
  
*************** ExecEndForeignScan(ForeignScanState *nod
*** 192,198 ****
  	ExecClearTuple(node->ss.ss_ScanTupleSlot);
  
  	/* close the relation. */
! 	ExecCloseScanRelation(node->ss.ss_currentRelation);
  }
  
  /* ----------------------------------------------------------------
--- 219,226 ----
  	ExecClearTuple(node->ss.ss_ScanTupleSlot);
  
  	/* close the relation. */
! 	if (node->ss.ss_currentRelation != NULL)
! 		ExecCloseScanRelation(node->ss.ss_currentRelation);
  }
  
  /* ----------------------------------------------------------------
diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c
index b984a5e..7c77847 100644
*** a/src/backend/foreign/foreign.c
--- b/src/backend/foreign/foreign.c
*************** GetFdwOptionValue(Oid fdwid, Oid serveri
*** 292,299 ****
  	ForeignDataWrapper  *wrapper = NULL;
  	char		   *value;
  
! 	/* Do we need to search pg_attribute? */
! 	if (attnum != InvalidAttrNumber && relid != InvalidOid)
  	{
  		value = get_options_value(GetForeignColumnOptions(relid, attnum),
  								  optname);
--- 292,299 ----
  	ForeignDataWrapper  *wrapper = NULL;
  	char		   *value;
  
! 	/* Do we need to use pg_attribute.attfdwoptions too? */
! 	if (attnum != InvalidAttrNumber)
  	{
  		value = get_options_value(GetForeignColumnOptions(relid, attnum),
  								  optname);
*************** GetFdwOptionValue(Oid fdwid, Oid serveri
*** 301,334 ****
  			return value;
  	}
  
- 	/* Do we need to search pg_foreign_table? */
  	if (relid != InvalidOid)
  	{
  		table = GetForeignTable(relid);
  		value = get_options_value(table->options, optname);
  		if (value != NULL)
  			return value;
- 
  		serverid = table->serverid;
  	}
  
- 	/* Do we need to search pg_user_mapping and pg_foreign_server? */
  	if (serverid != InvalidOid)
  	{
  		user = GetUserMapping(GetOuterUserId(), serverid);
  		value = get_options_value(user->options, optname);
  		if (value != NULL)
  			return value;
  
  		server = GetForeignServer(serverid);
  		value = get_options_value(server->options, optname);
  		if (value != NULL)
  			return value;
- 
  		fdwid = server->fdwid;
  	}
  
- 	/* Do we need to search pg_foreign_data_wrapper? */
  	if (fdwid != InvalidOid)
  	{
  		wrapper = GetForeignDataWrapper(fdwid);
--- 301,332 ----
  			return value;
  	}
  
  	if (relid != InvalidOid)
  	{
  		table = GetForeignTable(relid);
  		value = get_options_value(table->options, optname);
  		if (value != NULL)
  			return value;
  		serverid = table->serverid;
  	}
  
  	if (serverid != InvalidOid)
  	{
  		user = GetUserMapping(GetOuterUserId(), serverid);
  		value = get_options_value(user->options, optname);
  		if (value != NULL)
  			return value;
+ 	}
  
+ 	if (serverid != InvalidOid)
+ 	{
  		server = GetForeignServer(serverid);
  		value = get_options_value(server->options, optname);
  		if (value != NULL)
  			return value;
  		fdwid = server->fdwid;
  	}
  
  	if (fdwid != InvalidOid)
  	{
  		wrapper = GetForeignDataWrapper(fdwid);
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 63958c3..df3f25a 100644
*** a/src/backend/nodes/copyfuncs.c
--- b/src/backend/nodes/copyfuncs.c
*************** _copyForeignScan(ForeignScan *from)
*** 590,595 ****
--- 590,596 ----
  	/*
  	 * copy remainder of node
  	 */
+ 	COPY_SCALAR_FIELD(serverid);
  	COPY_SCALAR_FIELD(fsSystemCol);
  	COPY_NODE_FIELD(fdwplan);
  
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index f7d39ed..85f4071 100644
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
*************** _outForeignScan(StringInfo str, ForeignS
*** 557,562 ****
--- 557,563 ----
  
  	_outScanInfo(str, (Scan *) node);
  
+ 	WRITE_OID_FIELD(serverid);
  	WRITE_BOOL_FIELD(fsSystemCol);
  	WRITE_NODE_FIELD(fdwplan);
  }
*************** _outHashPath(StringInfo str, HashPath *n
*** 1661,1666 ****
--- 1662,1677 ----
  }
  
  static void
+ _outForeignJoinPath(StringInfo str, ForeignJoinPath *node)
+ {
+ 	WRITE_NODE_TYPE("FOREIGNJOINPATH");
+ 
+ 	_outJoinPathInfo(str, (JoinPath *) node);
+ 
+ 	WRITE_NODE_FIELD(fdwplan);
+ }
+ 
+ static void
  _outPlannerGlobal(StringInfo str, PlannerGlobal *node)
  {
  	WRITE_NODE_TYPE("PLANNERGLOBAL");
*************** _outRelOptInfo(StringInfo str, RelOptInf
*** 1749,1754 ****
--- 1760,1766 ----
  	WRITE_NODE_FIELD(baserestrictinfo);
  	WRITE_NODE_FIELD(joininfo);
  	WRITE_BOOL_FIELD(has_eclass_joins);
+ 	WRITE_OID_FIELD(serverid);
  	WRITE_BITMAPSET_FIELD(index_outer_relids);
  	WRITE_NODE_FIELD(index_inner_paths);
  }
*************** _outNode(StringInfo str, void *obj)
*** 2987,2992 ****
--- 2999,3007 ----
  			case T_HashPath:
  				_outHashPath(str, obj);
  				break;
+ 			case T_ForeignJoinPath:
+ 				_outForeignJoinPath(str, obj);
+ 				break;
  			case T_PlannerGlobal:
  				_outPlannerGlobal(str, obj);
  				break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index aaa754c..f06cb27 100644
*** a/src/backend/optimizer/README
--- b/src/backend/optimizer/README
*************** RelOptInfo      - a relation or joined r
*** 356,361 ****
--- 356,362 ----
    NestPath      - nested-loop joins
    MergePath     - merge joins
    HashPath      - hash joins
+   ForeignJoinPath - foreign joins
  
   EquivalenceClass - a data structure representing a set of values known equal
  
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 815b996..17b8cbe 100644
*** a/src/backend/optimizer/path/allpaths.c
--- b/src/backend/optimizer/path/allpaths.c
*************** print_path(PlannerInfo *root, Path *path
*** 1610,1615 ****
--- 1610,1619 ----
  			ptype = "HashJoin";
  			join = true;
  			break;
+ 		case T_ForeignJoinPath:
+ 			ptype = "ForeignJoin";
+ 			join = true;
+ 			break;
  		default:
  			ptype = "???Path";
  			break;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 348c36b..def47ee 100644
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
*************** bool		enable_nestloop = true;
*** 119,124 ****
--- 119,125 ----
  bool		enable_material = true;
  bool		enable_mergejoin = true;
  bool		enable_hashjoin = true;
+ bool		enable_foreignjoin = true;
  
  typedef struct
  {
*************** cost_mergejoin(MergePath *path, PlannerI
*** 2100,2105 ****
--- 2101,2111 ----
  }
  
  /*
+  * cost_foreignjoin() is not defined here because the costs of a foreign join
+  * is estimated by each FDW via PlanForeignJoin.
+  */
+ 
+ /*
   * run mergejoinscansel() with caching
   */
  static MergeScanSelCache *
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7d3cf42..3024565 100644
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
*************** match_unsorted_outer(PlannerInfo *root,
*** 541,546 ****
--- 541,566 ----
  											  merge_pathkeys));
  		}
  
+ 		if (enable_foreignjoin &&
+ 			joinrel->serverid != InvalidOid &&
+ 			(IsA(outerpath, ForeignPath) || IsA(outerpath, ForeignJoinPath)) &&
+ 			(IsA(inner_cheapest_total, ForeignPath) ||
+ 			 IsA(inner_cheapest_total, ForeignJoinPath)))
+ 
+ 		{
+ 			ForeignJoinPath	   *path;
+ 			path = create_foreignjoin_path(root,
+ 										   joinrel,
+ 										   jointype,
+ 										   sjinfo,
+ 										   outerpath,
+ 										   inner_cheapest_total,
+ 										   restrictlist,
+ 										   merge_pathkeys);
+ 			if (path != NULL)
+ 				add_path(joinrel, (Path *) path);
+ 		}
+ 
  		/* Can't do anything else if outer path needs to be unique'd */
  		if (save_jointype == JOIN_UNIQUE_OUTER)
  			continue;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8138b01..8a1f1c7 100644
*** a/src/backend/optimizer/plan/createplan.c
--- b/src/backend/optimizer/plan/createplan.c
*************** static WorkTableScan *create_worktablesc
*** 75,80 ****
--- 75,82 ----
  						  List *tlist, List *scan_clauses);
  static ForeignScan *create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
  						List *tlist, List *scan_clauses);
+ static ForeignScan *create_foreignjoin_plan(PlannerInfo *root, ForeignJoinPath *best_path,
+ 						List *tlist, List *scan_clauses);
  static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path,
  					 Plan *outer_plan, Plan *inner_plan);
  static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path,
*************** static CteScan *make_ctescan(List *qptli
*** 123,129 ****
  static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
  				   Index scanrelid, int wtParam);
  static ForeignScan *make_foreignscan(List *qptlist, List *qpqual,
! 				 Index scanrelid, bool fsSystemCol, FdwPlan *fdwplan);
  static BitmapAnd *make_bitmap_and(List *bitmapplans);
  static BitmapOr *make_bitmap_or(List *bitmapplans);
  static NestLoop *make_nestloop(List *tlist,
--- 125,132 ----
  static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
  				   Index scanrelid, int wtParam);
  static ForeignScan *make_foreignscan(List *qptlist, List *qpqual,
! 				 Index scanrelid, Oid serverid, bool fsSystemCol,
! 				 FdwPlan *fdwplan);
  static BitmapAnd *make_bitmap_and(List *bitmapplans);
  static BitmapOr *make_bitmap_or(List *bitmapplans);
  static NestLoop *make_nestloop(List *tlist,
*************** create_plan_recurse(PlannerInfo *root, P
*** 221,226 ****
--- 224,230 ----
  		case T_CteScan:
  		case T_WorkTableScan:
  		case T_ForeignScan:
+ 		case T_ForeignJoin:		/* ForeignJoinPath become a ForeignScan */
  			plan = create_scan_plan(root, best_path);
  			break;
  		case T_HashJoin:
*************** create_scan_plan(PlannerInfo *root, Path
*** 385,390 ****
--- 389,401 ----
  													scan_clauses);
  			break;
  
+ 		case T_ForeignJoin:
+ 			plan = (Plan *) create_foreignjoin_plan(root,
+ 													(ForeignJoinPath *) best_path,
+ 													tlist,
+ 													scan_clauses);
+ 			break;
+ 
  		default:
  			elog(ERROR, "unrecognized node type: %d",
  				 (int) best_path->pathtype);
*************** create_foreignscan_plan(PlannerInfo *roo
*** 1856,1861 ****
--- 1867,1873 ----
  	scan_plan = make_foreignscan(tlist,
  								 scan_clauses,
  								 scan_relid,
+ 								 rel->serverid,
  								 fsSystemCol,
  								 best_path->fdwplan);
  
*************** create_foreignscan_plan(PlannerInfo *roo
*** 1864,1869 ****
--- 1876,1926 ----
  	return scan_plan;
  }
  
+ /*
+  * create_foreignjoin_plan
+  *	 Returns a foreignscan plan for the join relation joined by 'best_path'
+  *	 with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+  */
+ static ForeignScan *
+ create_foreignjoin_plan(PlannerInfo *root, ForeignJoinPath *best_path,
+ 						List *tlist, List *scan_clauses)
+ {
+ 	ForeignScan *scan_plan;
+ 	RelOptInfo *rel = best_path->jpath.path.parent;
+ 	Index		scan_relid = rel->relid;
+ 	bool		fsSystemCol;
+ 
+ 	/* Sort clauses into best execution order */
+ 	scan_clauses = order_qual_clauses(root, scan_clauses);
+ 
+ 	/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
+ 	scan_clauses = extract_actual_clauses(scan_clauses, false);
+ 
+ 	fsSystemCol = false;
+ #ifdef NOT_USED
+ 	/* Detect whether any system columns are requested from rel */
+ 	for (i = rel->min_attr; i < 0; i++)
+ 	{
+ 		if (!bms_is_empty(rel->attr_needed[i - rel->min_attr]))
+ 		{
+ 			fsSystemCol = true;
+ 			break;
+ 		}
+ 	}
+ #endif
+ 
+ 	scan_plan = make_foreignscan(tlist,
+ 								 scan_clauses,
+ 								 scan_relid,
+ 								 rel->serverid,
+ 								 fsSystemCol,
+ 								 best_path->fdwplan);
+ 
+ 	copy_path_costsize(&scan_plan->scan.plan, &best_path->jpath.path);
+ 
+ 	return scan_plan;
+ }
+ 
  
  /*****************************************************************************
   *
*************** static ForeignScan *
*** 3170,3175 ****
--- 3227,3233 ----
  make_foreignscan(List *qptlist,
  				 List *qpqual,
  				 Index scanrelid,
+ 				 Oid serverid,
  				 bool fsSystemCol,
  				 FdwPlan *fdwplan)
  {
*************** make_foreignscan(List *qptlist,
*** 3182,3187 ****
--- 3240,3246 ----
  	plan->lefttree = NULL;
  	plan->righttree = NULL;
  	node->scan.scanrelid = scanrelid;
+ 	node->serverid = serverid;
  	node->fsSystemCol = fsSystemCol;
  	node->fdwplan = fdwplan;
  
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 1e7aac9..ea5095b 100644
*** a/src/backend/optimizer/util/pathnode.c
--- b/src/backend/optimizer/util/pathnode.c
***************
*** 17,22 ****
--- 17,23 ----
  #include <math.h>
  
  #include "foreign/fdwapi.h"
+ #include "foreign/foreign.h"
  #include "miscadmin.h"
  #include "nodes/nodeFuncs.h"
  #include "optimizer/clauses.h"
*************** create_hashjoin_path(PlannerInfo *root,
*** 1629,1631 ****
--- 1630,1712 ----
  
  	return pathnode;
  }
+ 
+ /*
+  * create_foreignjoin_path
+  *	  Creates a pathnode corresponding to a foreign join between two
+  *	  relations.
+  *
+  * 'joinrel' is the join relation.
+  * 'jointype' is the type of join required
+  * 'sjinfo' is extra info about the join for selectivity estimation
+  * 'outer_path' is the outer path
+  * 'inner_path' is the inner path
+  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
+  * 'pathkeys' are the path keys of the new join path
+  *
+  * Returns the resulting path node, or NULL to indicate that this path is
+  * unavailable.
+  */
+ ForeignJoinPath *
+ create_foreignjoin_path(PlannerInfo *root,
+ 						RelOptInfo *joinrel,
+ 						JoinType jointype,
+ 						SpecialJoinInfo *sjinfo,
+ 						Path *outer_path,
+ 						Path *inner_path,
+ 						List *restrict_clauses,
+ 						List *pathkeys)
+ {
+ 	ForeignJoinPath	   *pathnode;
+ 	ForeignServer	   *server;
+ 	ForeignDataWrapper *wrapper;
+ 	FdwRoutine		   *fdwroutine;
+ 	FdwPlan			   *fdwplan;
+ 
+ 	/* Both outer and inner of this join must come from same foreign server. */
+ 	Assert(IsA(outer_path, ForeignPath) || IsA(outer_path, ForeignJoinPath));
+ 	Assert(IsA(inner_path, ForeignPath) || IsA(inner_path, ForeignJoinPath));
+ 
+ 	/*
+ 	 * First we try to get FDW's callback info.  If the FDW has planner for
+ 	 * foreign join, let the FDW plan this join.
+ 	 */
+ 	server = GetForeignServer(joinrel->serverid);
+ 	wrapper = GetForeignDataWrapper(server->fdwid);
+ 	fdwroutine = GetFdwRoutine(wrapper->fdwhandler);
+ 	if (fdwroutine->PlanForeignJoin == NULL)
+ 		return NULL;
+ 
+ 	fdwplan = fdwroutine->PlanForeignJoin(joinrel->serverid,
+ 										  root,
+ 										  joinrel,
+ 										  jointype,
+ 										  sjinfo,
+ 										  outer_path,
+ 										  inner_path,
+ 										  restrict_clauses,
+ 										  pathkeys);
+ 	/* Returning NULL indicates that the FDW can't handle this join. */
+ 	if (fdwplan == NULL)
+ 		return NULL;
+ 	Assert(IsA(fdwplan, FdwPlan));
+ 
+ 	/* OK, this FDW can handle this join. */
+ 	pathnode = makeNode(ForeignJoinPath);
+ 	pathnode->jpath.path.pathtype = T_ForeignJoin;
+ 	pathnode->jpath.path.parent = joinrel;
+ 	pathnode->jpath.jointype = jointype;
+ 	pathnode->jpath.outerjoinpath = outer_path;
+ 	pathnode->jpath.innerjoinpath = inner_path;
+ 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
+ 	pathnode->jpath.path.pathkeys = pathkeys;
+ 
+ 	/* Use costs estimated by FDW */
+ 	pathnode->jpath.path.startup_cost = fdwplan->startup_cost;
+ 	pathnode->jpath.path.total_cost = fdwplan->total_cost;
+ 
+ 	/* Store FDW-private information too. */
+ 	pathnode->fdwplan = fdwplan;
+ 
+ 	return pathnode;
+ }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index de629e9..d69be69 100644
*** a/src/backend/optimizer/util/plancat.c
--- b/src/backend/optimizer/util/plancat.c
***************
*** 22,27 ****
--- 22,28 ----
  #include "access/sysattr.h"
  #include "access/transam.h"
  #include "catalog/catalog.h"
+ #include "foreign/foreign.h"
  #include "catalog/heap.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
*************** get_relation_info(PlannerInfo *root, Oid
*** 359,364 ****
--- 360,374 ----
  
  	rel->indexlist = indexinfos;
  
+ 	/* Get server oid for further planning, if this is a foreign table. */
+ 	if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 	{
+ 		ForeignTable   *table;
+ 
+ 		table = GetForeignTable(relationObjectId);
+ 		rel->serverid = table->serverid;
+ 	}
+ 
  	heap_close(relation, NoLock);
  
  	/*
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 37187e2..24cfe24 100644
*** a/src/backend/optimizer/util/relnode.c
--- b/src/backend/optimizer/util/relnode.c
*************** build_simple_rel(PlannerInfo *root, int 
*** 117,122 ****
--- 117,123 ----
  	rel->baserestrictcost.per_tuple = 0;
  	rel->joininfo = NIL;
  	rel->has_eclass_joins = false;
+ 	rel->serverid = InvalidOid;
  	rel->index_outer_relids = NULL;
  	rel->index_inner_paths = NIL;
  
*************** build_join_rel(PlannerInfo *root,
*** 371,376 ****
--- 372,378 ----
  	joinrel->baserestrictcost.per_tuple = 0;
  	joinrel->joininfo = NIL;
  	joinrel->has_eclass_joins = false;
+ 	joinrel->serverid = InvalidOid;
  	joinrel->index_outer_relids = NULL;
  	joinrel->index_inner_paths = NIL;
  
*************** build_join_rel(PlannerInfo *root,
*** 443,448 ****
--- 445,458 ----
  			lappend(root->join_rel_level[root->join_cur_level], joinrel);
  	}
  
+ 	/*
+ 	 * If both outer and inner are from one oreign server, maybe this join can
+ 	 * be pushed down, so remember the oid of the foreign server in this
+ 	 * relation.
+ 	 */
+ 	if (outer_rel->serverid == inner_rel->serverid)
+ 		joinrel->serverid = outer_rel->serverid;
+ 
  	return joinrel;
  }
  
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 976a832..e97a94d 100644
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
*************** set_plan_disabling_options(const char *a
*** 3093,3098 ****
--- 3093,3101 ----
  		case 'h':				/* hashjoin */
  			tmp = "enable_hashjoin";
  			break;
+ 		case 'f':				/* foreignjoin */
+ 			tmp = "enable_foreignjoin";
+ 			break;
  	}
  	if (tmp)
  	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index da7b6d4..524582d 100644
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** static struct config_bool ConfigureNames
*** 765,770 ****
--- 765,779 ----
  		NULL, NULL, NULL
  	},
  	{
+ 		{"enable_foreignjoin", PGC_USERSET, QUERY_TUNING_METHOD,
+ 			gettext_noop("Enables the planner's use of foreign join plans."),
+ 			NULL
+ 		},
+ 		&enable_foreignjoin,
+ 		true,
+ 		NULL, NULL, NULL
+ 	},
+ 	{
  		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
  			gettext_noop("Enables genetic query optimization."),
  			gettext_noop("This algorithm attempts to do planning without "
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 315db46..d1dec0e 100644
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 240,245 ****
--- 240,246 ----
  # - Planner Method Configuration -
  
  #enable_bitmapscan = on
+ #enable_foreignjoin = on
  #enable_hashagg = on
  #enable_hashjoin = on
  #enable_indexscan = on
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index bdd499b..e4d3426 100644
*** a/src/include/executor/executor.h
--- b/src/include/executor/executor.h
*************** extern void ExecFreeExprContext(PlanStat
*** 332,337 ****
--- 332,338 ----
  extern TupleDesc ExecGetScanType(ScanState *scanstate);
  extern void ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc);
  extern void ExecAssignScanTypeFromOuterPlan(ScanState *scanstate);
+ extern void ExecAssignScanTypeFromTL(ScanState *scanstate);
  
  extern bool ExecRelationIsTargetRelation(EState *estate, Index scanrelid);
  
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 3378ba9..e535e12 100644
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
*************** typedef void (*ReScanForeignScan_functio
*** 68,73 ****
--- 68,83 ----
  
  typedef void (*EndForeignScan_function) (ForeignScanState *node);
  
+ typedef FdwPlan *(*PlanForeignJoin_function) (Oid serverid,
+ 											  PlannerInfo *root,
+ 											  RelOptInfo *joinrel,
+ 											  JoinType jointype,
+ 											  SpecialJoinInfo *sjinfo,
+ 											  Path *outer_path,
+ 											  Path *inner_path,
+ 											  List *restrict_clauses,
+ 											  List *pathkeys);
+ 
  
  /*
   * FdwRoutine is the struct returned by a foreign-data wrapper's handler
*************** typedef struct FdwRoutine
*** 88,93 ****
--- 98,106 ----
  	IterateForeignScan_function IterateForeignScan;
  	ReScanForeignScan_function ReScanForeignScan;
  	EndForeignScan_function EndForeignScan;
+ 
+ 	/* functions below are optional */
+ 	PlanForeignJoin_function PlanForeignJoin;
  } FdwRoutine;
  
  
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0a89f18..27d37a0 100644
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
*************** typedef struct WorkTableScanState
*** 1463,1468 ****
--- 1463,1469 ----
  typedef struct ForeignScanState
  {
  	ScanState	ss;				/* its first field is NodeTag */
+ 	Oid			serverid;		/* OID of foreign server */
  	/* use struct pointer to avoid including fdwapi.h here */
  	struct FdwRoutine *fdwroutine;
  	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 824d8b5..0f5dac2 100644
*** a/src/include/nodes/nodes.h
--- b/src/include/nodes/nodes.h
*************** typedef enum NodeTag
*** 67,72 ****
--- 67,73 ----
  	T_NestLoop,
  	T_MergeJoin,
  	T_HashJoin,
+ 	T_ForeignJoin,
  	T_Material,
  	T_Sort,
  	T_Group,
*************** typedef enum NodeTag
*** 221,226 ****
--- 222,228 ----
  	T_NestPath,
  	T_MergePath,
  	T_HashPath,
+ 	T_ForeignJoinPath,
  	T_TidPath,
  	T_ForeignPath,
  	T_AppendPath,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 6685864..2fc8a36 100644
*** a/src/include/nodes/plannodes.h
--- b/src/include/nodes/plannodes.h
*************** typedef struct WorkTableScan
*** 467,472 ****
--- 467,473 ----
  typedef struct ForeignScan
  {
  	Scan		scan;
+ 	Oid			serverid;		/* OID of foreign server */
  	bool		fsSystemCol;	/* true if any "system column" is needed */
  	/* use struct pointer to avoid including fdwapi.h here */
  	struct FdwPlan *fdwplan;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index a400960..d790a34 100644
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
*************** typedef struct RelOptInfo
*** 416,421 ****
--- 416,422 ----
  	List	   *joininfo;		/* RestrictInfo structures for join clauses
  								 * involving this rel */
  	bool		has_eclass_joins;		/* T means joininfo is incomplete */
+ 	Oid			serverid;		/* foriegn server, if foreign scan/join */
  
  	/* cached info about inner indexscan paths for relation: */
  	Relids		index_outer_relids;		/* other relids in indexable join
*************** typedef struct HashPath
*** 954,959 ****
--- 955,970 ----
  } HashPath;
  
  /*
+  * A foreignjoin path has no additional field.
+  */
+ typedef struct ForeignJoinPath
+ {
+ 	JoinPath	jpath;
+ 	/* use struct pointer to avoid including fdwapi.h here */
+ 	struct FdwPlan *fdwplan;	/* FDW-specific information */
+ } ForeignJoinPath;
+ 
+ /*
   * Restriction clause info.
   *
   * We create one of these for each AND sub-clause of a restriction condition
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 125808a..284eb92 100644
*** a/src/include/optimizer/cost.h
--- b/src/include/optimizer/cost.h
*************** extern bool enable_nestloop;
*** 61,66 ****
--- 61,67 ----
  extern bool enable_material;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
+ extern bool enable_foreignjoin;
  extern int	constraint_exclusion;
  
  extern double clamp_row_est(double nrows);
*************** extern void cost_mergejoin(MergePath *pa
*** 114,119 ****
--- 115,124 ----
  			   SpecialJoinInfo *sjinfo);
  extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
  			  SpecialJoinInfo *sjinfo);
+ /*
+  * cost_foreignjoin() is not defined here because the costs of a foreign join
+  * is estimated by each FDW via PlanForeignJoin.
+  */
  extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
  extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
  extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 38c8c1c..b303270 100644
*** a/src/include/optimizer/pathnode.h
--- b/src/include/optimizer/pathnode.h
*************** extern HashPath *create_hashjoin_path(Pl
*** 94,99 ****
--- 94,108 ----
  					 List *restrict_clauses,
  					 List *hashclauses);
  
+ extern ForeignJoinPath *create_foreignjoin_path(PlannerInfo *root,
+ 							RelOptInfo *joinrel,
+ 							JoinType jointype,
+ 							SpecialJoinInfo *sjinfo,
+ 							Path *outer_path,
+ 							Path *inner_path,
+ 							List *restrict_clauses,
+ 							List *pathkeys);
+ 
  /*
   * prototypes for relnode.c
   */
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index 5f20c93..8a81674 100644
*** a/src/test/regress/expected/rangefuncs.out
--- b/src/test/regress/expected/rangefuncs.out
*************** SELECT name, setting FROM pg_settings WH
*** 2,7 ****
--- 2,8 ----
           name         | setting 
  ----------------------+---------
   enable_bitmapscan    | on
+  enable_foreignjoin   | on
   enable_hashagg       | on
   enable_hashjoin      | on
   enable_indexonlyscan | on
*************** SELECT name, setting FROM pg_settings WH
*** 12,18 ****
   enable_seqscan       | on
   enable_sort          | on
   enable_tidscan       | on
! (11 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
--- 13,19 ----
   enable_seqscan       | on
   enable_sort          | on
   enable_tidscan       | on
! (12 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);