gist-fast-build-v0.8-lobotomized-1.patch

text/x-diff
Filename: gist-fast-build-v0.8-lobotomized-1.patch
Type: text/x-diff
Part: 0
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 4657425..d829243 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -30,6 +30,9 @@
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
+
+static void validateBufferingOption(char *value);
+
 /*
  * Contents of pg_class.reloptions
  *
@@ -66,6 +69,14 @@ static relopt_bool boolRelOpts[] =
 		},
 		true
 	},
+	{
+		{
+			"neighborrelocation",
+			"Enables relocation of index tuples into neighbor node buffers in GiST index buffering build",
+			RELOPT_KIND_GIST
+		},
+		true
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -159,6 +170,22 @@ static relopt_int intRelOpts[] =
 			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
 		}, -1, 0, 2000000000
 	},
+	{
+		{
+			"levelstep",
+			"Level step in GiST index buffering build",
+			RELOPT_KIND_GIST
+		},
+		-1, 1, 100
+	},
+	{
+		{
+			"buffersize",
+			"Buffer size in GiST index buffering build",
+			RELOPT_KIND_GIST
+		},
+		-1, 1, 1000000000
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -219,6 +246,17 @@ static relopt_real realRelOpts[] =
 
 static relopt_string stringRelOpts[] =
 {
+	{
+		{
+			"buffering",
+			"Enables buffering build for this GiST index",
+			RELOPT_KIND_GIST
+		},
+		4,
+		false,
+		validateBufferingOption,			
+		"auto"
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -1282,3 +1320,22 @@ tablespace_reloptions(Datum reloptions, bool validate)
 
 	return (bytea *) tsopts;
 }
+
+/*
+ * Validator for "buffering" option of GiST indexed. Allows only "on", "off" and
+ * "auto" values.
+ */
+static void
+validateBufferingOption(char *value)
+{
+	if (!value ||
+		(
+			strcmp(value, "on") &&
+			strcmp(value, "off") &&
+			strcmp(value, "auto")
+		)
+	)
+	{
+		elog(ERROR, "Only \"on\", \"off\" and \"auto\" values are available for \"buffering\" option.");
+	}
+}
diff --git a/src/backend/access/gist/Makefile b/src/backend/access/gist/Makefile
index f8051a2..cc9468f 100644
--- a/src/backend/access/gist/Makefile
+++ b/src/backend/access/gist/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = gist.o gistutil.o gistxlog.o gistvacuum.o gistget.o gistscan.o \
-       gistproc.o gistsplit.o
+       gistproc.o gistsplit.o gistbuild.o gistbuildbuffers.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/gist/README b/src/backend/access/gist/README
index 2d78dcb..45f8fc9 100644
--- a/src/backend/access/gist/README
+++ b/src/backend/access/gist/README
@@ -24,6 +24,7 @@ The current implementation of GiST supports:
   * provides NULL-safe interface to GiST core
   * Concurrency
   * Recovery support via WAL logging
+  * Buffering build algorithm
 
 The support for concurrency implemented in PostgreSQL was developed based on
 the paper "Access Methods for Next-Generation Database Systems" by
@@ -31,6 +32,12 @@ Marcel Kornaker:
 
     http://www.sai.msu.su/~megera/postgres/gist/papers/concurrency/access-methods-for-next-generation.pdf.gz
 
+Buffering build algorithm for GiST was developed based on the paper "Efficient
+Bulk Operations on Dynamic R-trees" by Lars Arge, Klaus Hinrichs, Jan Vahrenhold
+and Jeffrey Scott Vitter.
+
+    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.135.9894&rep=rep1&type=pdf
+
 The original algorithms were modified in several ways:
 
 * They had to be adapted to PostgreSQL conventions. For example, the SEARCH
@@ -278,6 +285,113 @@ would complicate the insertion algorithm. So when an insertion sees a page
 with F_FOLLOW_RIGHT set, it immediately tries to bring the split that
 crashed in the middle to completion by adding the downlink in the parent.
 
+Buffering build algorithm
+--------------------
+
+In buffering build algorithm levels are numbering upwards and leaf pages level
+has number zero. An example is given on the picture below. Such numbering is
+used in order to make pages save it's level number during all life-time even if
+root splits.
+
+Level                    Tree
+
+3                         *
+                      /       \
+2                *                 *
+              /  |  \           /  |  \
+1          *     *     *     *     *     *
+          / \   / \   / \   / \   / \   / \
+0        o   o o   o o   o o   o o   o o   o
+
+* - internal page
+o - leaf page
+
+In buffering build algorithm additional buffers are associated with pages. Leaf
+pages never have buffers. Internal pages have buffers with some level step.
+I.e. pages on levels level_step*i (i >=1) have buffers. If level_step = 1 then
+each internal page have buffer. 
+
+Level        Tree (level_step = 1)                Tree (level_step = 2)   
+                                        
+3                      *(b)                                  *
+                   /       \                             /       \
+2             *(b)              *(b)                *(b)              *(b)
+           /  |  \           /  |  \             /  |  \           /  |  \
+1       *(b)  *(b)  *(b)  *(b)  *(b)  *(b)    *     *     *     *     *     *
+       / \   / \   / \   / \   / \   / \     / \   / \   / \   / \   / \   / \
+0     o   o o   o o   o o   o o   o o   o   o   o o   o o   o o   o o   o o   o
+
+(b) - buffer
+
+Each buffer can be in one of following states:
+1) Append. New tuples are being added to the buffer. Last page of the buffer is kept in main memory
+2) Flush. Tuples are being read from the buffer. Page containing last returned tuple is kept in main memory.
+
+Buffer goes from append mode to flush mode on the first call to popItupFromNodeBuffer(), and from flush mode to append mode when the buffer is empty.
+
+When index tuple is inserting, it's first path can end in following points:
+1) Leaf page if no levels has buffers, i.e. root level <= level_step.
+2) Buffer of root page if root page has a buffer.
+3) Buffer of topmost level page if root page doesn't have a buffer.
+
+New index tuples are processing until root level buffer (or buffer of topmost 
+level page) will be filled at half. When some buffer is filled at halt then
+the process of it's emptying is starting.
+
+Buffer emptying process means that index tuples from buffer are moving into
+underlying buffers(if any) or leaf pages. For buffer emptying to another buffers
+following items should be loaded into main memory:
+1) Buffer itself should be completely loaded
+2) Underlying buffers should be loaded for append
+3) Page associated with buffer
+4) Pages between buffer and underlying buffers if level_step != 1 (note that 
+   pages associated with underlying buffers aren't required to be loaded)
+For emptying to leaf pages list of those items is following
+1) Buffer itself should be completely loaded
+2) Page associated with buffer
+3) Pages between buffer and leaf pages if level_step != 1 
+4) Leaf pages
+Illustration of this requirements is given below.
+
+   Buffer emptying to another buffers    Buffer emptying to leaf pages
+
+                 +(cb)                                 +(cb)                  
+              /     \                                /     \                    
+          +             +                        +             +            
+        /   \         /   \                    /   \         /   \              
+      *(ab)   *(ab) *(ab)   *(ab)            x       x     x       x  
+
++    - loaded into main memory internal page
+x    - loaded into main memory leaf page
+*    - not loaded into main memory internal page
+(cb) - completely loaded buffer
+(ab) - loaded for append buffer
+
+One buffer emptying process can trigger another buffer emptying processes. 
+Buffer emptying stack data structure is the data structure responsible for
+sequence of buffer emptying. Each node buffer which is half filled should be
+inserted into buffer emptying stack.
+
+When we're moving from buffer emptying on higher level to the buffer emptying
+on lower level, loaded part of tree (only pages of tree not the buffers) are
+remained in the main memory. Tree parts stack is the data structure which 
+represents hierarchy of loaded tree parts.
+
+If split occurs on the page which have a buffer then index tuples are
+relocating. When neighborrelocation is off index tuples are relocating between 
+buffers of pages produces by split using penalty method. This method was
+proposed in the original paper. For for reasons of index quality improvement
+another method of relocation was implemented. When neighborrelocation is on
+index tuples are relocation into both buffers of pages produces by split and
+buffers on neighbor pages (pages with same parent). This method uses more CPU
+and IO but improves index quality.
+
+When all index tuples are inserted there are still some index tuples in buffers.
+At this moment final buffer emptying starts. Each level have a list of non-empty
+buffers. Final emptying contain loop over all tree levels starting from topmost.
+On each levels all it's buffers are sequentially emptying until all buffers of
+the level are empty. Since no index tuples move upwards during buffer emptying
+all the buffers are empty when final emptying are finished.
 
 Authors:
 	Teodor Sigaev	<teodor@sigaev.ru>
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 4fc7a21..278de32 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -24,38 +24,16 @@
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
-/* Working state for gistbuild and its callback */
-typedef struct
-{
-	GISTSTATE	giststate;
-	int			numindexattrs;
-	double		indtuples;
-	MemoryContext tmpCtx;
-} GISTBuildState;
-
 /* A List of these is used represent a split-in-progress. */
 typedef struct
 {
 	Buffer		buf;			/* the split page "half" */
 	IndexTuple	downlink;		/* downlink for this half. */
+    bool        release;
 } GISTPageSplitInfo;
 
 /* non-export function prototypes */
-static void gistbuildCallback(Relation index,
-				  HeapTuple htup,
-				  Datum *values,
-				  bool *isnull,
-				  bool tupleIsAlive,
-				  void *state);
-static void gistdoinsert(Relation r,
-			 IndexTuple itup,
-			 Size freespace,
-			 GISTSTATE *GISTstate);
 static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
-static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
-				 GISTSTATE *giststate,
-				 IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
-				 Buffer leftchild);
 static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
 				GISTSTATE *giststate, List *splitinfo);
 
@@ -89,138 +67,6 @@ createTempGistContext(void)
 }
 
 /*
- * Routine to build an index.  Basically calls insert over and over.
- *
- * XXX: it would be nice to implement some sort of bulk-loading
- * algorithm, but it is not clear how to do that.
- */
-Datum
-gistbuild(PG_FUNCTION_ARGS)
-{
-	Relation	heap = (Relation) PG_GETARG_POINTER(0);
-	Relation	index = (Relation) PG_GETARG_POINTER(1);
-	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
-	IndexBuildResult *result;
-	double		reltuples;
-	GISTBuildState buildstate;
-	Buffer		buffer;
-	Page		page;
-
-	/*
-	 * We expect to be called exactly once for any index relation. If that's
-	 * not the case, big trouble's what we have.
-	 */
-	if (RelationGetNumberOfBlocks(index) != 0)
-		elog(ERROR, "index \"%s\" already contains data",
-			 RelationGetRelationName(index));
-
-	/* no locking is needed */
-	initGISTstate(&buildstate.giststate, index);
-
-	/* initialize the root page */
-	buffer = gistNewBuffer(index);
-	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-	page = BufferGetPage(buffer);
-
-	START_CRIT_SECTION();
-
-	GISTInitBuffer(buffer, F_LEAF);
-
-	MarkBufferDirty(buffer);
-
-	if (RelationNeedsWAL(index))
-	{
-		XLogRecPtr	recptr;
-		XLogRecData rdata;
-
-		rdata.data = (char *) &(index->rd_node);
-		rdata.len = sizeof(RelFileNode);
-		rdata.buffer = InvalidBuffer;
-		rdata.next = NULL;
-
-		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
-		PageSetLSN(page, recptr);
-		PageSetTLI(page, ThisTimeLineID);
-	}
-	else
-		PageSetLSN(page, GetXLogRecPtrForTemp());
-
-	UnlockReleaseBuffer(buffer);
-
-	END_CRIT_SECTION();
-
-	/* build the index */
-	buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
-	buildstate.indtuples = 0;
-
-	/*
-	 * create a temporary memory context that is reset once for each tuple
-	 * inserted into the index
-	 */
-	buildstate.tmpCtx = createTempGistContext();
-
-	/* do the heap scan */
-	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-								   gistbuildCallback, (void *) &buildstate);
-
-	/* okay, all heap tuples are indexed */
-	MemoryContextDelete(buildstate.tmpCtx);
-
-	freeGISTstate(&buildstate.giststate);
-
-	/*
-	 * Return statistics
-	 */
-	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
-
-	result->heap_tuples = reltuples;
-	result->index_tuples = buildstate.indtuples;
-
-	PG_RETURN_POINTER(result);
-}
-
-/*
- * Per-tuple callback from IndexBuildHeapScan
- */
-static void
-gistbuildCallback(Relation index,
-				  HeapTuple htup,
-				  Datum *values,
-				  bool *isnull,
-				  bool tupleIsAlive,
-				  void *state)
-{
-	GISTBuildState *buildstate = (GISTBuildState *) state;
-	IndexTuple	itup;
-	MemoryContext oldCtx;
-
-	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
-
-	/* form an index tuple and point it at the heap tuple */
-	itup = gistFormTuple(&buildstate->giststate, index,
-						 values, isnull, true /* size is currently bogus */ );
-	itup->t_tid = htup->t_self;
-
-	/*
-	 * Since we already have the index relation locked, we call gistdoinsert
-	 * directly.  Normal access method calls dispatch through gistinsert,
-	 * which locks the relation for write.	This is the right thing to do if
-	 * you're inserting single tups, but not when you're initializing the
-	 * whole index at once.
-	 *
-	 * In this path we respect the fillfactor setting, whereas insertions
-	 * after initial build do not.
-	 */
-	gistdoinsert(index, itup,
-			  RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
-				 &buildstate->giststate);
-
-	buildstate->indtuples += 1;
-	MemoryContextSwitchTo(oldCtx);
-	MemoryContextReset(buildstate->tmpCtx);
-}
-
-/*
  *	gistbuildempty() -- build an empty gist index in the initialization fork
  */
 Datum
@@ -275,7 +121,6 @@ gistinsert(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(false);
 }
 
-
 /*
  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
  * at that offset is atomically removed along with inserting the new tuples.
@@ -608,7 +453,7 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate,
  * this routine assumes it is invoked in a short-lived memory context,
  * so it does not bother releasing palloc'd allocations.
  */
-static void
+BlockNumber
 gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 {
 	ItemId		iid;
@@ -617,6 +462,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 	GISTInsertStack *stack;
 	GISTInsertState state;
 	bool		xlocked = false;
+	BlockNumber	leafBlocknum = InvalidBlockNumber;
 
 	memset(&state, 0, sizeof(GISTInsertState));
 	state.freespace = freespace;
@@ -841,7 +687,8 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 			}
 
 			/* now state.stack->(page, buffer and blkno) points to leaf page */
-
+			
+			leafBlocknum = stack->blkno;
 			gistinserttuples(&state, stack, giststate, &itup, 1,
 							 InvalidOffsetNumber, InvalidBuffer);
 			LockBuffer(stack->buffer, GIST_UNLOCK);
@@ -852,6 +699,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 			break;
 		}
 	}
+	return leafBlocknum;
 }
 
 /*
@@ -1183,7 +1031,7 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
  * to hold an exclusive lock on state->stack->buffer, but if we had to split
  * the page, it might not contain the tuple we just inserted/updated.
  */
-static bool
+bool
 gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
 				 GISTSTATE *giststate,
 				 IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
@@ -1414,6 +1262,7 @@ initGISTstate(GISTSTATE *giststate, Relation index)
 		else
 			giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
 	}
+	giststate->gfbb = NULL;
 }
 
 void
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
new file mode 100644
index 0000000..8543033
--- /dev/null
+++ b/src/backend/access/gist/gistbuild.c
@@ -0,0 +1,1173 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistbuild.c
+ *	  build algorithm for GiST indexes implementation.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/gist/gistbuild.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gist_private.h"
+#include "catalog/index.h"
+#include "catalog/pg_collation.h"
+#include "miscadmin.h"
+#include "optimizer/cost.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#define LEAF_PAGES_STATS_STEP 128
+#define LEAF_PAGES_STATS_COUNT 16
+
+
+/* Working state for gistbuild and its callback */
+typedef struct
+{
+	GISTSTATE		giststate;
+	int				numindexattrs;
+	int64			indtuples;
+	/*
+	 * Buffering build mode. Possible values:
+	 * 'y' - we are in buffering build mode.
+	 * 'a' - we are now in regular build mode, but can switch to buffering 
+	 *       build mode when we decide to.
+	 * 'n' - we are in regular build mode and aren't going to switch.
+	 */
+	char			bufferingMode;
+	MemoryContext	tmpCtx;
+	/* Tracking statistics about last accessed leaf pages */
+	HTAB		   *leafPagesTab;
+	int				nonHitLeafPagesStats[LEAF_PAGES_STATS_COUNT];
+	int				nonHitLeafPagesStatsIndex;
+} GISTBuildState;
+
+typedef struct
+{
+	BlockNumber		blockNumber;
+	int64			lastTupleNumber;
+} LeafPageInfo;
+
+
+static void gistBufferingBuildInsert(Relation index, IndexTuple itup, 
+						 GISTBuildState *buildstate);
+static void gistBuildCallback(Relation index,
+				  HeapTuple htup,
+				  Datum *values,
+				  bool *isnull,
+				  bool tupleIsAlive,
+				  void *state);
+
+static bool initBuffering(GISTBuildState *buildstate, Relation index);
+static bool bufferingbuildinsert(GISTInsertState *state, 
+				GISTLoadedPartItem *item,
+				GISTSTATE *giststate,
+				IndexTuple *itup, int ntup, 
+				OffsetNumber oldoffnum);
+static int bufferlevel_cmp(const void *a, const void *b);
+
+static void gistFindCorrectParent(GISTSTATE *giststate, Relation r, GISTLoadedPartItem *child);
+
+#ifdef GIST_DEBUG
+#include "utils/geo_decls.h"
+static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber coff, BOX *downlink, StringInfo out, int maxlevel);
+static int gist_counttups(Relation r, BlockNumber blk);
+static int gist_count_tuples_in_buffers(GISTBuildBuffers *gfbb);
+#endif
+
+/*
+ * Index tuple insert function of buffering build algorithm. In simpler than
+ * regular insert function in the fact that it don't takes care about
+ * concurrency. It invokes buffer relocation function when it splits page. Also
+ * it take several oldoffnums as a parameter because buffer relocation can alter
+ * a number of parent index tuples.
+ */
+static bool
+bufferingbuildinsert(GISTInsertState *state, 
+					 GISTLoadedPartItem *path,
+					 GISTSTATE *giststate,
+					 IndexTuple *itup, int ntup, 
+					 OffsetNumber oldoffnum)
+{
+	GISTBuildBuffers *gfbb = giststate->gfbb;
+	Buffer		buffer = ReadBuffer(state->r, path->blkno);
+	Page		page = (Page) BufferGetPage(buffer);
+	bool		is_leaf = (GistPageIsLeaf(page)) ? true : false;
+	int			i;
+	bool		is_split;
+
+	LockBuffer(buffer, GIST_EXCLUSIVE);
+
+	/* Remove old index tuple if needed */
+	if (OffsetNumberIsValid(oldoffnum))
+		PageIndexMultiDelete(page, &oldoffnum, 1);
+	
+	/* Check if there is enough space for insertion */
+	is_split = gistnospace(page, itup, ntup, 
+						   InvalidOffsetNumber, state->freespace);
+
+	if (is_split)
+	{
+		/* no space for insertion */
+		IndexTuple *itvec;
+		int			tlen;
+		SplitedPageLayout *dist = NULL,
+				   *ptr;
+		SplitedPageLayout rootpg;
+		BlockNumber blkno = BufferGetBlockNumber(buffer);
+		BlockNumber oldrlink = InvalidBlockNumber;
+		bool		is_rootsplit;
+		XLogRecPtr	recptr;
+
+		is_rootsplit = (blkno == GIST_ROOT_BLKNO);
+		
+		/*
+		 * Form index tuples vector to split. Old tuple was already removed
+		 * from the vector.
+		 */
+		itvec = gistextractpage(page, &tlen);
+		itvec = gistjoinvector(itvec, &tlen, itup, ntup);
+		dist = gistSplit(state->r, page, itvec, tlen, giststate);
+
+		/*
+		 * Set up pages to work with. Allocate new buffers for all but the
+		 * leftmost page. The original page becomes the new leftmost page, and
+		 * is just replaced with the new contents.
+		 *
+		 * For a root-split, allocate new buffers for all child pages, the
+		 * original page is overwritten with new root page containing
+		 * downlinks to the new child pages.
+		 */
+		ptr = dist;
+		if (!is_rootsplit)
+		{
+			oldrlink = GistPageGetOpaque(page)->rightlink;
+			dist->buffer = buffer;
+			dist->block.blkno = BufferGetBlockNumber(buffer);
+			dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
+
+			/* clean all flags except F_LEAF */
+			GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
+
+			ptr = ptr->next;
+		}
+		for (; ptr; ptr = ptr->next)
+		{
+			/* Allocate new page */
+			ptr->buffer = gistNewBuffer(state->r);
+			GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
+			ptr->page = BufferGetPage(ptr->buffer);
+			ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
+		}		
+
+		/*
+		 * Now that we know which blocks the new pages go to, set up downlink
+		 * tuples to point to them.
+		 */
+		for (ptr = dist; ptr; ptr = ptr->next)
+		{
+			ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
+			GistTupleSetValid(ptr->itup);
+		}
+
+		{
+			StringInfoData sb;
+			initStringInfo(&sb);
+			for (ptr = dist; ptr; ptr = ptr->next)
+				appendStringInfo(&sb, "%u ", ptr->block.blkno);
+		}
+
+		if (is_rootsplit)
+		{
+			/*
+			 * Adjust the top element in the insert stacks for the new root
+			 * page.
+			 */
+			GISTLoadedPartItem *oldroot = gfbb->rootitem;
+
+			gfbb->rootitem = (GISTLoadedPartItem *) MemoryContextAlloc(gfbb->context,
+																	   sizeof(GISTLoadedPartItem));
+			gfbb->rootitem->parent = NULL;
+			gfbb->rootitem->blkno = GIST_ROOT_BLKNO;
+			gfbb->rootitem->downlinkoffnum = InvalidOffsetNumber;
+			gfbb->rootitem->level = oldroot->level + 1;
+
+			oldroot->parent = gfbb->rootitem;
+			oldroot->blkno = dist->next->block.blkno;
+			oldroot->downlinkoffnum = InvalidOffsetNumber;
+		}
+
+		/* Maintain node buffers and loaded tree parts on split */
+		relocateBuildBuffersOnSplit(giststate->gfbb, giststate, state->r,
+									path, buffer, dist);
+
+		/*
+		 * If this is a root split, we construct the new root page with the
+		 * downlinks here directly, instead of do recursive call  for their
+		 * insertion. Add the new root page to the list along with the child
+		 * pages.
+		 */
+		if (is_rootsplit)
+		{
+			IndexTuple *downlinks;
+			int			ndownlinks = 0;
+			int			i;
+
+			rootpg.buffer = buffer;
+			rootpg.page = PageGetTempPageCopySpecial(
+				BufferGetPage(rootpg.buffer));
+			GistPageGetOpaque(rootpg.page)->flags = 0;
+
+			/* Prepare a vector of all the downlinks */
+			for (ptr = dist; ptr; ptr = ptr->next)
+				ndownlinks++;
+			downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
+			for (i = 0, ptr = dist; ptr; ptr = ptr->next)
+				downlinks[i++] = ptr->itup;
+
+			rootpg.block.blkno = GIST_ROOT_BLKNO;
+			rootpg.block.num = ndownlinks;
+			rootpg.list = gistfillitupvec(downlinks, ndownlinks,
+										  &(rootpg.lenlist));
+			rootpg.itup = NULL;
+
+			rootpg.next = dist;
+			dist = &rootpg;
+		}
+		
+		/*
+		 * Fill all pages. All the pages are new, ie. freshly allocated empty
+		 * pages, or a temporary copy of the old page.
+		 */
+		for (ptr = dist; ptr; ptr = ptr->next)
+		{
+			char	   *data = (char *) (ptr->list);
+
+			for (i = 0; i < ptr->block.num; i++)
+			{
+				if (PageAddItem(ptr->page, 
+							    (Item) data, IndexTupleSize((IndexTuple) data), 
+								i + FirstOffsetNumber, false, false) == 
+					InvalidOffsetNumber)
+					elog(ERROR, "failed to add item to index page in \"%s\"", 
+						RelationGetRelationName(state->r));
+				data += IndexTupleSize((IndexTuple) data);
+			}
+			
+			/* Set up rightlinks */
+			if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
+				GistPageGetOpaque(ptr->page)->rightlink =
+					ptr->next->block.blkno;
+			else
+				GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
+		}
+
+		/* Mark buffers dirty */
+		for (ptr = dist; ptr; ptr = ptr->next)
+			MarkBufferDirty(ptr->buffer);
+
+		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
+		dist->page = BufferGetPage(dist->buffer);
+
+		/* TODO: Write the WAL record */
+#ifdef NOT_IMPLEMENTED
+		if (RelationNeedsWAL(state->r))
+			recptr = gistXLogSplit(state->r->rd_node, blkno, is_leaf,
+								   dist, oldrlink, oldnsn, leftchildbuf);
+		else
+			recptr = GetXLogRecPtrForTemp();
+#else
+		recptr.xlogid = 0;
+		recptr.xrecoff = 1;
+#endif
+		
+		for (ptr = dist; ptr; ptr = ptr->next)
+		{
+			PageSetLSN(ptr->page, recptr);
+			PageSetTLI(ptr->page, ThisTimeLineID);
+		}
+
+		/* Release buffers, except the one holding the inserted/updated tuple */
+		for (ptr = dist; ptr; ptr = ptr->next)
+		{
+			if (BufferIsValid(ptr->buffer))
+				UnlockReleaseBuffer(ptr->buffer);
+		}
+
+		/*
+		 * If it wasn't root split, we have to insert downlinks to parent page.
+		 */
+		if (!is_rootsplit)
+		{
+			IndexTuple *itups;
+			int cnt = 0, i;
+			
+			for (ptr = dist; ptr; ptr = ptr->next)
+			{
+				cnt++;
+			}
+			itups = (IndexTuple *)palloc(sizeof(IndexTuple) * cnt);
+			i = 0;
+			for (ptr = dist; ptr; ptr = ptr->next)
+			{
+				itups[i] = ptr->itup;
+				i++;
+			}
+
+			if (!is_rootsplit)
+				gistFindCorrectParent(giststate, state->r, path);
+
+			bufferingbuildinsert(state, path->parent, giststate, itups,
+								 cnt,
+								 is_rootsplit ? InvalidOffsetNumber : path->downlinkoffnum);
+		}
+	}
+	else
+	{
+		/*
+		 * Enough of space. Just insert index tuples to the page.
+		 */
+		gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
+		MarkBufferDirty(buffer);
+		UnlockReleaseBuffer(buffer);
+	}
+
+	return is_split;
+}
+
+/*
+ * Process index tuple. Run index tuple down until it meet leaf page or
+ * node buffer.
+ */
+static void
+processItup(GISTSTATE *giststate, GISTInsertState *state, 
+			GISTBuildBuffers *gfbb, IndexTuple itup,
+			GISTLoadedPartItem *startparent)
+{
+	GISTLoadedPartItem *path;
+	BlockNumber childblkno; 
+	Buffer		buffer;
+
+	if (!startparent)
+		path = gfbb->rootitem;
+	else
+		path = startparent;
+
+	/*
+	 * Loop until we are on leaf page (level == 0) or we reach level with 
+	 * buffers (if it wasn't level that we've start at).
+	 */
+	for (;;)
+	{
+		ItemId		iid;
+		IndexTuple	idxtuple, newtup;
+		Page		page;
+		OffsetNumber childoffnum;
+		GISTLoadedPartItem *parent;
+
+		if (path != startparent && LEVEL_HAS_BUFFERS(path->level, gfbb))
+			break;
+
+		if (path->level == 0)
+			break;
+
+		/* Choose child for insertion */
+		buffer = ReadBuffer(state->r, path->blkno);
+		LockBuffer(buffer, GIST_EXCLUSIVE);
+
+		page = (Page) BufferGetPage(buffer);
+		childoffnum = gistchoose(state->r, page, itup, giststate);
+		iid = PageGetItemId(page, childoffnum);
+		idxtuple = (IndexTuple) PageGetItem(page, iid);
+		childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+
+		/* Adjust key representing child if needed */
+		newtup = gistgetadjusted(state->r, idxtuple, itup, giststate);
+
+		UnlockReleaseBuffer(buffer);
+
+		if (newtup)
+			bufferingbuildinsert(state, path, giststate, 
+								 &newtup, 1, childoffnum);
+
+		/* descend */
+		parent = path;
+		path = (GISTLoadedPartItem *) MemoryContextAlloc(gfbb->context,
+														 sizeof(GISTLoadedPartItem));
+		path->parent = parent;
+		path->level = parent->level - 1;
+		path->blkno = childblkno;
+		path->downlinkoffnum = childoffnum;
+	}
+
+	if (LEVEL_HAS_BUFFERS(path->level, gfbb))
+	{
+		/*
+		 * We've reached level with buffers. Now place index tuple to the
+		 * buffer and add buffer emptying stack element if buffer overflows.
+		 */
+		bool wasOverflowed;
+		NodeBuffer *childNodeBuffer;
+
+		childNodeBuffer = getNodeBuffer(gfbb, giststate, path->blkno, path->downlinkoffnum, path->parent, true);
+		wasOverflowed = BUFFER_IS_OVERLOW(childNodeBuffer, gfbb);
+		pushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
+		if (!wasOverflowed &&  BUFFER_IS_OVERLOW(childNodeBuffer, gfbb))
+		{
+			MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+			gfbb->bufferEmptyingStack = lcons(childNodeBuffer, gfbb->bufferEmptyingStack);
+			MemoryContextSwitchTo(oldcxt);
+			path = NULL; /* don't free the allocated items */
+		}
+	}
+	else
+	{
+		/*
+		 * We've reached leaf level. So, place index tuple here.
+		 */
+		bufferingbuildinsert(state, path, giststate, &itup, 1, InvalidOffsetNumber);
+	}
+
+	if (path)
+	{
+		while(path != startparent && path != gfbb->rootitem)
+		{
+			GISTLoadedPartItem *parent = path->parent;
+			pfree(path);
+			path = parent;
+		}
+	}
+}
+
+
+static void
+gistFindCorrectParent(GISTSTATE *giststate, Relation r, GISTLoadedPartItem *child)
+{
+	GISTBuildBuffers *gfbb = giststate->gfbb;
+	GISTLoadedPartItem *parent = child->parent;
+	OffsetNumber i,
+		maxoff;
+	ItemId		iid;
+	IndexTuple	idxtuple;
+	Buffer buffer;
+	Page page;
+	bool copied = false;
+
+	buffer = ReadBuffer(r, parent->blkno);
+	page = BufferGetPage(buffer);
+	LockBuffer(buffer, GIST_EXCLUSIVE);
+	gistcheckpage(r, buffer);
+
+	/* Check if it has not moved */
+	if (child->downlinkoffnum != InvalidOffsetNumber)
+	{
+		iid = PageGetItemId(page, child->downlinkoffnum);
+		idxtuple = (IndexTuple) PageGetItem(page, iid);
+		if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
+		{
+			/* Still there */
+			UnlockReleaseBuffer(buffer);
+			return;
+		}
+	}
+
+	/* parent is changed, look child in right links until found */
+	while (true)
+	{
+		maxoff = PageGetMaxOffsetNumber(page);
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+		{
+			iid = PageGetItemId(page, i);
+			idxtuple = (IndexTuple) PageGetItem(page, iid);
+			if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
+			{
+				/* yes!!, found */
+				child->downlinkoffnum = i;
+				UnlockReleaseBuffer(buffer);
+				return;
+			}
+		}
+
+		if (!copied)
+		{
+			parent = (GISTLoadedPartItem *) MemoryContextAlloc(gfbb->context,
+															   sizeof(GISTLoadedPartItem));
+			memcpy(parent, child->parent, sizeof(GISTLoadedPartItem));
+			copied = true;
+		}
+
+		parent->blkno = GistPageGetOpaque(page)->rightlink;
+		UnlockReleaseBuffer(buffer);
+
+		if (parent->blkno == InvalidBlockNumber)
+		{
+			/*
+			 * End of chain and still didn't find parent. Should not happen
+			 * during index build.
+			 */
+			break;
+		}
+
+		/* Next page */
+
+		buffer = ReadBuffer(r, parent->blkno);
+		page = BufferGetPage(buffer);
+		LockBuffer(buffer, GIST_EXCLUSIVE);
+		gistcheckpage(r, buffer);
+	}
+
+	elog(ERROR, "failed to re-find parent for block %u", child->blkno);
+}
+
+/*
+ * Process buffers emptying stack. Emptying of one buffer can cause emptying 
+ * of other buffers. This function iterates until this cascading emptying
+ * process finished, e.g. until buffers emptying stack is empty.
+ */
+static void
+processEmptyingStack(GISTSTATE *giststate, GISTInsertState *state)
+{
+	GISTBuildBuffers *gfbb = giststate->gfbb;
+	int requiredSizeDecrease = gfbb->pagesPerBuffer * BLCKSZ;
+	
+	/* Iterate while we have elements in buffers emptying stack. */	
+	while (gfbb->bufferEmptyingStack != NIL)
+	{
+		int initialBusySize, busySize;		
+		NodeBuffer *emptyingNodeBuffer;
+		
+		/* Remove element from the stack and prepare for emptying. */
+		emptyingNodeBuffer = (NodeBuffer *) linitial(gfbb->bufferEmptyingStack);
+		gfbb->bufferEmptyingStack = list_delete_first(gfbb->bufferEmptyingStack);
+
+		initialBusySize = getNodeBufferBusySize(emptyingNodeBuffer);
+		gfbb->currentEmptyingBufferSplit = false;
+		
+		while(true)
+		{
+			IndexTuple itup;
+
+			/* Get one index tuple from node buffer and process it */			
+			if (!popItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
+				break;
+			processItup(giststate, state, gfbb, itup,
+						emptyingNodeBuffer->path);
+
+			/* Free all the memory allocated during index tuple processing */	
+			MemoryContextReset(CurrentMemoryContext);
+
+			/* 
+			 * If current emptying node buffer split we should stop emptying
+			 * just because there is just no such node buffer anymore.
+			 */
+			if (gfbb->currentEmptyingBufferSplit)
+				break;
+			
+			busySize = getNodeBufferBusySize(emptyingNodeBuffer);
+			
+			/* 
+			 * If we've processed half of buffer size limit and buffer is not
+			 * overflowed anymore we should stop in order to avoid exceeding
+			 * of limit in lower buffers.
+			 */
+			if (initialBusySize - busySize >= requiredSizeDecrease && 
+				!BUFFER_IS_OVERLOW(emptyingNodeBuffer, gfbb))
+				break;				
+		}
+	}
+}
+
+/*
+ * Insert function for buffering index build.
+ */
+static void
+gistBufferingBuildInsert(Relation index, IndexTuple itup, 
+						 GISTBuildState *buildstate)
+{
+	GISTBuildBuffers *gfbb = buildstate->giststate.gfbb;
+	GISTInsertState insertstate;
+	
+	memset(&insertstate, 0, sizeof(GISTInsertState));
+	insertstate.freespace = RelationGetTargetPageFreeSpace(index, 
+													GIST_DEFAULT_FILLFACTOR);
+	insertstate.r = index;
+	
+	/* We are ready for index tuple processing */
+	processItup(&buildstate->giststate, &insertstate, gfbb, itup, NULL);
+	
+	/* Process buffer emptying stack if any */
+	processEmptyingStack(&buildstate->giststate, &insertstate);
+}
+
+/*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+static void
+gistBuildCallback(Relation index,
+				  HeapTuple htup,
+				  Datum *values,
+				  bool *isnull,
+				  bool tupleIsAlive,
+				  void *state)
+{
+	GISTBuildState *buildstate = (GISTBuildState *) state;
+	IndexTuple	itup;
+	MemoryContext oldCtx;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+	/* form an index tuple and point it at the heap tuple */
+	itup = gistFormTuple(&buildstate->giststate, index,
+						 values, isnull, true /* size is currently bogus */ );
+	itup->t_tid = htup->t_self;
+	
+	if (buildstate->bufferingMode == 'y')
+	{
+		/* We've decided to use buffering. So let's use buffering insert. */
+		gistBufferingBuildInsert(index, itup, buildstate);
+	}
+	else
+	{
+		BlockNumber		leafBlocknum;
+		LeafPageInfo   *leafInfo;
+		bool			found;
+		
+		/* We didn't decide to use buffering yet or aren't goint to use it at
+		 * all. Since we already have the index relation locked, we call
+		 * gistdoinsert directly.  Normal access method calls dispatch through
+		 * gistinsert, which locks the relation for write.	This is the right
+		 * thing to do if you're inserting single tups, but not when you're
+		 * initializing the whole index at once.
+		 *
+		 * In this path we respect the fillfactor setting, whereas insertions
+		 * after initial build do not.
+		 */
+		leafBlocknum = gistdoinsert(index, itup,
+				 RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
+					 &buildstate->giststate);
+		
+		if (buildstate->bufferingMode == 'a')
+		{
+			/* Add information about leaf pages accesses */
+			leafInfo = (LeafPageInfo *) hash_search(buildstate->leafPagesTab,
+												 (const void *) &leafBlocknum,
+												 HASH_ENTER, 
+												 &found);
+			leafInfo->lastTupleNumber = buildstate->indtuples;
+		}
+		
+	}
+	
+	buildstate->indtuples += 1;
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextReset(buildstate->tmpCtx);
+	
+	if (buildstate->bufferingMode == 'a' &&
+		buildstate->indtuples % LEAF_PAGES_STATS_STEP == 0)
+	{
+		HASH_SEQ_STATUS scan_status;
+		LeafPageInfo   *leafInfo;
+		int				removedItemsCount = 0;
+		BlockNumber		indexSize;
+			
+		/* 
+		 * Count leaf pages which was accessed by previous chunk of index
+		 * tuples and wasn't accessed by last chunk of index tuples.
+		 */
+		hash_seq_init(&scan_status, buildstate->leafPagesTab);		
+		while((leafInfo = (LeafPageInfo *)hash_seq_search(&scan_status)) != NULL)
+		{
+			if (leafInfo->blockNumber < buildstate->indtuples - LEAF_PAGES_STATS_STEP)
+			{
+				if (hash_search(buildstate->leafPagesTab, 
+								(const void *) &leafInfo->blockNumber,
+								HASH_REMOVE, NULL) == NULL)
+					elog(ERROR, "hash table corrupted");
+				removedItemsCount++;
+			}
+		}
+		buildstate->nonHitLeafPagesStats[buildstate->nonHitLeafPagesStatsIndex] =
+			removedItemsCount;
+		buildstate->nonHitLeafPagesStatsIndex = 
+			(buildstate->nonHitLeafPagesStatsIndex + 1) % LEAF_PAGES_STATS_COUNT;
+		
+		indexSize = smgrnblocks(index->rd_smgr, MAIN_FORKNUM);
+
+		/*
+		 * Check if we are going to switch to buffering build.
+		 */
+		if (buildstate->bufferingMode == 'a' && 
+			effective_cache_size < indexSize && 
+			indexSize >= 2 * LEAF_PAGES_STATS_STEP)
+		{
+			int i, nonHitLeafPages = 0;
+			double lambda, factor;
+			for (i = 0; i < LEAF_PAGES_STATS_COUNT; i++)
+			{
+				nonHitLeafPages += buildstate->nonHitLeafPagesStats[i];
+			}
+			/* Calculate parameter of exponential distribution. */
+			lambda = (-1.0 / (double)LEAF_PAGES_STATS_STEP) * 
+					 log((double)nonHitLeafPages / (double)(LEAF_PAGES_STATS_STEP * LEAF_PAGES_STATS_COUNT));
+			
+			/* Estimate portion index tuples which can be processed without IO */			
+			factor = (1 - exp(- (double)effective_cache_size * lambda)) / 
+					 (1 - exp(- (double)indexSize * lambda));
+
+			/* If estimated portion exceeds threshold then switch to buffering build */			
+			if (factor < 0.95)
+			{
+				if (initBuffering(buildstate, index))
+				{
+					/*
+					 * Buffering build is successfully initialized. Now we can
+					 * set appropriate flag.
+					 */
+					buildstate->bufferingMode = 'y';
+					elog(INFO, "switching to buffered mode");
+				}
+				else
+				{
+					/*
+					 * Failed to switch to buffering build due to not enough
+					 * memory settings. Mark that we aren't going to switch
+					 * anymore.
+					 */
+					buildstate->bufferingMode = 'n';
+				}
+			}
+		}	
+	}
+}
+
+/*
+ * Initial calculations for GiST buffering build.
+ */
+static bool
+initBuffering(GISTBuildState *buildstate, Relation index)
+{
+	int			pagesPerBuffer = -1;
+	bool		neighborRelocation = true;
+	Size		pageFreeSpace;
+	Size		itupMinSize;
+	int			i, maxIndexTuplesCount;
+	int			effectiveMemory;
+	int			levelStep = 0;
+	GISTBuildBuffers *gfbb;
+	
+	/* try to get user difened options */		
+	if (index->rd_options)
+	{
+		GiSTOptions *options = (GiSTOptions *)index->rd_options;
+		levelStep = options->levelStep;
+		pagesPerBuffer = options->bufferSize;
+		neighborRelocation = options->neighborRelocation;
+	}
+
+	/* calc number of index tuples which fit in page */		
+	pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - 
+		sizeof(GISTPageOpaqueData) - sizeof(ItemIdData);
+	itupMinSize = (Size)MAXALIGN(sizeof(IndexTupleData));
+	for (i = 0; i < index->rd_att->natts; i++)
+	{
+		if (index->rd_att->attrs[i]->attlen < 0)
+			itupMinSize += VARHDRSZ;
+		else
+			itupMinSize += index->rd_att->attrs[i]->attlen;
+	}
+	maxIndexTuplesCount = pageFreeSpace / itupMinSize;
+
+	/* calculate level step if it isn't specified by user */		
+	effectiveMemory = Min(maintenance_work_mem * 1024 / BLCKSZ,
+						  effective_cache_size);
+	if (levelStep <= 0)
+	{
+		levelStep = (int)log((double)effectiveMemory / 4.0) / 
+			log((double)maxIndexTuplesCount);
+		if (levelStep < 0) 
+			levelStep = 0;
+	}
+
+	/* calculate buffer size if it isn't specified by user */
+	if (pagesPerBuffer <= 0)
+	{
+		pagesPerBuffer = 2;
+		for (i = 0; i < levelStep; i++)
+		{
+			pagesPerBuffer *= maxIndexTuplesCount;
+		}
+	}
+
+	if (levelStep > 0)
+	{
+		/* Enough of memory for at least level_step == 1. */
+		gfbb = palloc(sizeof(GISTBuildBuffers));
+		gfbb->pagesPerBuffer = pagesPerBuffer;
+		gfbb->levelStep = levelStep;
+		gfbb->neighborRelocation = neighborRelocation;
+		initGiSTBuildBuffers(gfbb);
+		buildstate->giststate.gfbb = gfbb;
+		elog(INFO, "Level step = %d, pagesPerBuffer = %d", levelStep, 
+			 pagesPerBuffer);
+		return true;
+	}
+	else
+	{
+		/* Not enough memory for buffering build. */
+		return false;
+	}
+}
+
+/*
+ * Routine to build an index.  Basically calls insert over and over.
+ *
+ * XXX: it would be nice to implement some sort of bulk-loading
+ * algorithm, but it is not clear how to do that.
+ */
+Datum
+gistbuild(PG_FUNCTION_ARGS)
+{
+	Relation	heap = (Relation) PG_GETARG_POINTER(0);
+	Relation	index = (Relation) PG_GETARG_POINTER(1);
+	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
+	IndexBuildResult *result;
+	double		reltuples;
+	GISTBuildState buildstate;
+	Buffer		buffer;
+	Page		page;
+	HASHCTL		hashCtl;
+	int			i;
+	MemoryContext oldcxt = CurrentMemoryContext;
+	
+	if (index->rd_options)
+	{
+		/* Get buffering mode from the options string */
+		GiSTOptions *options = (GiSTOptions *)index->rd_options;
+		char *bufferingMode = (char *)options + options->bufferingModeOffset;
+		if (!strcmp(bufferingMode, "on"))
+			buildstate.bufferingMode = 'y';
+		if (!strcmp(bufferingMode, "off"))
+			buildstate.bufferingMode = 'n';
+		if (!strcmp(bufferingMode, "auto"))
+			buildstate.bufferingMode = 'a';
+	}
+	else
+	{
+		/* Automatic buffering mode switching by default */
+		buildstate.bufferingMode = 'a';
+	}
+	
+	if (buildstate.bufferingMode == 'a')
+	{
+		/* Init hash for tracking leaf pages accesses */
+		hashCtl.keysize = sizeof(BlockNumber);
+		hashCtl.entrysize = sizeof(LeafPageInfo);
+		hashCtl.hcxt = CurrentMemoryContext;	
+		hashCtl.hash = tag_hash;
+		hashCtl.match = memcmp;
+		buildstate.leafPagesTab = hash_create(
+					"leafpagestab", 
+					2 * LEAF_PAGES_STATS_STEP, 
+					&hashCtl,
+					HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
+	}
+	else
+		buildstate.leafPagesTab = NULL;
+
+	/*
+	 * We expect to be called exactly once for any index relation. If that's
+	 * not the case, big trouble's what we have.
+	 */
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	/* no locking is needed */
+	initGISTstate(&buildstate.giststate, index);
+	if (buildstate.bufferingMode == 'y')
+	{
+		if (!initBuffering(&buildstate, index))
+			buildstate.bufferingMode = 'n';
+	}
+
+	/* initialize the root page */
+	buffer = gistNewBuffer(index);
+	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
+	page = BufferGetPage(buffer);
+
+	START_CRIT_SECTION();
+
+	GISTInitBuffer(buffer, F_LEAF);
+
+	MarkBufferDirty(buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+		XLogRecData rdata;
+
+		rdata.data = (char *) &(index->rd_node);
+		rdata.len = sizeof(RelFileNode);
+		rdata.buffer = InvalidBuffer;
+		rdata.next = NULL;
+
+		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+		PageSetLSN(page, recptr);
+		PageSetTLI(page, ThisTimeLineID);
+	}
+	else
+		PageSetLSN(page, GetXLogRecPtrForTemp());
+
+	UnlockReleaseBuffer(buffer);
+
+	END_CRIT_SECTION();
+
+	/* build the index */
+	buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
+	buildstate.indtuples = 0;
+	buildstate.nonHitLeafPagesStatsIndex = 0;
+	for (i = 0; i < LEAF_PAGES_STATS_COUNT; i++)
+		buildstate.nonHitLeafPagesStats[i] = 0;
+
+	/*
+	 * create a temporary memory context that is reset once for each tuple
+	 * inserted into the index
+	 */
+	buildstate.tmpCtx = createTempGistContext();
+	
+	/* 
+	 * Do the heap scan.
+	 */
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+							   gistBuildCallback, (void *) &buildstate);
+	
+	/*
+	 * If buffering build do final node buffers emptying.
+	 */
+	if (buildstate.bufferingMode == 'y')
+	{
+		int i;
+		GISTInsertState insertstate;
+		MemoryContext oldCtx;
+		GISTBuildBuffers *gfbb = buildstate.giststate.gfbb;
+		NodeBuffer **buffers;
+		NodeBuffer *buf;
+		HASH_SEQ_STATUS scan_status;
+		int		nbuffers;
+
+		oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+		
+		memset(&insertstate, 0, sizeof(GISTInsertState));
+		insertstate.freespace = RelationGetTargetPageFreeSpace(index, 
+													GIST_DEFAULT_FILLFACTOR);
+		insertstate.r = index;
+
+		for (;;)
+		{
+			/* Collect all buffers into array */
+			buffers = MemoryContextAlloc(gfbb->context,
+										 hash_get_num_entries(gfbb->nodeBuffersTab) * sizeof(NodeBuffer *));
+			nbuffers = 0;
+			/* Sort the buffers by level */
+			hash_seq_init(&scan_status, gfbb->nodeBuffersTab);
+			while ((buf = hash_seq_search(&scan_status)) != NULL)
+			{
+				if (buf->tuplesCount > 0)
+					buffers[nbuffers++] = buf;
+			}
+			if (nbuffers == 0)
+				break;
+
+			/*
+			 * Iterate through the buffers, from top to bottom
+			 */
+			qsort(buffers, nbuffers, sizeof(NodeBuffer *), bufferlevel_cmp);
+			for (i = 0; i < nbuffers; i++)
+			{
+				MemoryContext oldcxt;
+
+				oldcxt = MemoryContextSwitchTo(gfbb->context);
+				gfbb->bufferEmptyingStack = lcons(buffers[i], gfbb->bufferEmptyingStack);
+				MemoryContextSwitchTo(oldcxt);
+
+				processEmptyingStack(&buildstate.giststate, &insertstate);
+			}
+			/*
+			 * Emptying these buffers might've created new buffers, so iterate
+			 * until we're fully done
+			 */
+		}
+		for (i = 0; i < nbuffers; i++)
+		{
+			Assert(buffers[i]->tuplesCount == 0);
+		}
+	}
+	
+	/* okay, all heap tuples are indexed */
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(buildstate.tmpCtx);
+
+	freeGISTstate(&buildstate.giststate);
+
+	/*
+	 * Return statistics
+	 */
+	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+
+	result->heap_tuples = reltuples;
+	result->index_tuples = (double)buildstate.indtuples;
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * qsort comparator for sorting NodeBuffers by level.
+ */
+static int
+bufferlevel_cmp(const void *a, const void *b)
+{
+	NodeBuffer *abuf = *((NodeBuffer **) a);
+	NodeBuffer *bbuf = *((NodeBuffer **) b);
+
+	return (bbuf->level - abuf->level);
+}
+
+
+#ifdef GIST_DEBUG
+static void
+gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber coff, BOX *downlink, StringInfo out, int maxlevel) {
+	Buffer		buffer;
+	Page		page;
+	IndexTuple	which;
+	ItemId		iid;
+	OffsetNumber i,
+				maxoff;
+	BlockNumber cblk;
+	char	   *pred;
+
+	pred = (char *) palloc(sizeof(char) * level * 4 + 1);
+	MemSet(pred, ' ', level*4);
+	pred[level*4] = '\0';
+
+	buffer = ReadBuffer(r, blk);
+	page = (Page) BufferGetPage(buffer);
+
+	maxoff = PageGetMaxOffsetNumber(page);
+
+	appendStringInfo(out, "%s%d(l:%d) blk: %d numTuple: %d free: %db(%.2f%%) rightlink:%u (%s) (%.5g, %.5g),(%.5g, %.5g)\n", 
+		pred,
+		coff, 
+		level, 
+		(int) blk,
+		(int) maxoff, 
+		PageGetFreeSpace(page),  
+		100.0*(((float)BLCKSZ)-(float)PageGetFreeSpace(page))/((float)BLCKSZ),
+		GistPageGetOpaque(page)->rightlink,
+					 ( GistPageGetOpaque(page)->rightlink == InvalidBlockNumber ) ? "InvalidBlockNumber" : "OK",
+					 downlink ? downlink->low.x : 0,
+					 downlink ? downlink->low.y : 0,
+					 downlink ? downlink->high.x : 0,
+					 downlink ? downlink->high.y :0 );
+
+	if (maxlevel<0 || level<maxlevel)
+	{
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+		{
+			bool isnull;
+			BOX *box;
+
+			iid = PageGetItemId(page, i);
+			which = (IndexTuple) PageGetItem(page, iid);
+
+			box = (BOX *)index_getattr(which, 1, RelationGetDescr(r), &isnull);
+
+			if (!GistPageIsLeaf(page))
+			{
+				cblk = ItemPointerGetBlockNumber(&(which->t_tid));
+				gist_dumptree(r, level + 1, cblk, i, box, out, maxlevel);
+			}
+			else
+			{
+				bool invalid = false;
+				if (downlink)
+				{
+					if (box->high.x > downlink->high.x)
+						invalid = true;
+					if (box->high.y > downlink->high.y)
+						invalid = true;
+					if (box->high.x < downlink->low.x)
+						invalid = true;
+					if (box->high.y < downlink->low.y)
+						invalid = true;
+				}
+
+				if (invalid)
+				appendStringInfo(out, "%s    leaf item %d points to %u/%u: (%.5g, %.5g)%s\n",
+								 pred, i,
+								 ItemPointerGetBlockNumber(&(which->t_tid)),
+								 ItemPointerGetOffsetNumber(&(which->t_tid)),
+								 box->high.x, box->high.y, invalid ? " INVALID" : "");
+			}
+		}
+	}
+	ReleaseBuffer(buffer);
+	pfree(pred);
+}
+
+static int
+gist_counttups(Relation r, BlockNumber blk)
+{
+	Buffer		buffer;
+	Page		page;
+	IndexTuple	which;
+	ItemId		iid;
+	OffsetNumber i,
+				maxoff;
+	BlockNumber cblk;
+	int			ntuples;
+
+	buffer = ReadBuffer(r, blk);
+	page = (Page) BufferGetPage(buffer);
+	maxoff = PageGetMaxOffsetNumber(page);
+
+	ntuples = 0;
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		iid = PageGetItemId(page, i);
+		which = (IndexTuple) PageGetItem(page, iid);
+
+		if (!GistPageIsLeaf(page))
+		{
+			cblk = ItemPointerGetBlockNumber(&(which->t_tid));
+			ntuples += gist_counttups(r, cblk);
+		}
+		else
+			ntuples++;
+	}
+	ReleaseBuffer(buffer);
+
+	return ntuples;
+}
+
+static int
+gist_count_tuples_in_buffers(GISTBuildBuffers *gfbb)
+{
+	HASH_SEQ_STATUS scan_status;
+	NodeBuffer *buf;
+	int ntuples = 0;
+
+	hash_seq_init(&scan_status, gfbb->nodeBuffersTab);
+	while ((buf = hash_seq_search(&scan_status)) != NULL)
+		ntuples += buf->tuplesCount;
+	return ntuples;
+}
+#endif
diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c
new file mode 100644
index 0000000..02c91e7
--- /dev/null
+++ b/src/backend/access/gist/gistbuildbuffers.c
@@ -0,0 +1,600 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistbuildbuffers.c
+ *	  buffers management functions for GiST buffering build algorithm.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/gist/gistbuildbuffers.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gist_private.h"
+#include "catalog/index.h"
+#include "catalog/pg_collation.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/buffile.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+static NodeBufferPage *allocateNewPageBuffer(GISTBuildBuffers *gfbb);
+static void placeItupToPage(NodeBufferPage *pageBuffer, IndexTuple item);
+static void getItupFromPage(NodeBufferPage *pageBuffer, IndexTuple *item);
+static int freeBlocks_cmp(const void *a, const void *b);
+static long getFreeBlock(GISTBuildBuffers *gfbb);
+static void releaseBlock(GISTBuildBuffers *gfbb, long blocknum);
+
+/*
+ * Initialize GiST buffering build structure.
+ */
+void
+initGiSTBuildBuffers(GISTBuildBuffers * gfbb)
+{
+	HASHCTL		hashCtl;
+
+	/*
+	 * Create temporary file initialize data structures for free pages
+	 * management.
+	 */
+	gfbb->pfile = BufFileCreateTemp(true);
+	gfbb->nFileBlocks = 0;
+	gfbb->nFreeBlocks = 0;
+	gfbb->blocksSorted = false;
+	gfbb->freeBlocksLen = 32;
+	gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
+	
+	/*
+	 * Current memory context will be used for all in-memory data structures
+	 * of buffers.
+	 */
+	gfbb->context = CurrentMemoryContext;
+	
+	/*
+	 * nodeBuffersTab hash is association between index blocks and it's buffer.
+	 */
+	hashCtl.keysize = sizeof(BlockNumber);
+	hashCtl.entrysize = sizeof(NodeBuffer);
+	hashCtl.hcxt = CurrentMemoryContext;	
+	hashCtl.hash = tag_hash;
+	hashCtl.match = memcmp;
+	gfbb->nodeBuffersTab = hash_create(
+								"gistbuildbuffers", 
+								1024, 
+								&hashCtl,
+								HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
+	
+	/*
+	 * Stack of node buffers which was planned for emptying.
+	 */
+	gfbb->bufferEmptyingStack = NIL;
+	
+	gfbb->currentEmptyingBufferBlockNumber = InvalidBlockNumber;
+	gfbb->currentEmptyingBufferSplit = false;
+
+	gfbb->rootitem = (GISTLoadedPartItem *) MemoryContextAlloc(gfbb->context,
+															   sizeof(GISTLoadedPartItem));
+	gfbb->rootitem->parent = NULL;
+	gfbb->rootitem->blkno = GIST_ROOT_BLKNO;
+	gfbb->rootitem->downlinkoffnum = InvalidOffsetNumber;
+	gfbb->rootitem->level = 0;
+}
+
+/*
+ * Return NodeBuffer structure by it's block number. If createNew flag is
+ * specified then new NodeBuffer structure will be created on it's absence.
+ */
+NodeBuffer *
+getNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+			  BlockNumber nodeBlocknum,
+			  OffsetNumber downlinkoffnum,
+			  GISTLoadedPartItem *parent,
+			  bool createNew)
+{
+	NodeBuffer *nodeBuffer;
+	bool found;
+
+	/* Find nodeBuffer in hash table */
+	nodeBuffer = (NodeBuffer *) hash_search(gfbb->nodeBuffersTab,
+											 (const void *) &nodeBlocknum,
+											 createNew ? HASH_ENTER : HASH_FIND, 
+											 &found);
+	if (!found)
+	{
+		GISTLoadedPartItem *path;
+
+		/* 
+		 * Node buffer wasn't found. Create new if required.
+		 */
+		if (!createNew)
+			return NULL;
+
+		if (nodeBlocknum != GIST_ROOT_BLKNO)
+		{
+			path = (GISTLoadedPartItem *) MemoryContextAlloc(gfbb->context,
+															 sizeof(GISTLoadedPartItem));
+			path->parent = parent;
+			path->blkno = nodeBlocknum;
+			path->downlinkoffnum = downlinkoffnum;
+			path->level = parent->level - 1;
+			Assert(path->level > 0);
+		}
+		else
+			path = gfbb->rootitem;
+		
+		/*
+		 * New node buffer. Fill data structure with default values.
+		 */		
+		nodeBuffer->pageBuffer = NULL;
+		nodeBuffer->blocksCount = 0;
+		nodeBuffer->tuplesCount = 0;
+		nodeBuffer->level = path->level;
+		nodeBuffer->path = path;
+	}
+	else
+	{
+		Assert(nodeBuffer->path->parent == parent);
+	}
+
+	return nodeBuffer;
+}
+
+/*
+ * Allocate memory for buffer page.
+ */
+static NodeBufferPage *
+allocateNewPageBuffer(GISTBuildBuffers *gfbb)
+{
+	NodeBufferPage *pageBuffer;
+	/* Allocate memory for page in appropriate context. */
+	pageBuffer = (NodeBufferPage *) MemoryContextAlloc(gfbb->context, BLCKSZ);	
+	/* Set page free space */
+	PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - MAXALIGN(sizeof(uint32));	
+	return pageBuffer;
+}
+
+/*
+ * Add item to buffer page.
+ */
+static void
+placeItupToPage(NodeBufferPage *pageBuffer, IndexTuple itup)
+{
+	/* Get pointer to page free space start */
+	char *ptr = (char *)pageBuffer + PAGE_FREE_SPACE(pageBuffer)
+								   - MAXALIGN(IndexTupleSize(itup));
+	/* There should be enough of space */
+	Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(IndexTupleSize(itup)));
+	/* Copy index tuple to free space */
+	memcpy(ptr, itup, IndexTupleSize(itup));
+	/* Reduce free space value of page */
+	PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(IndexTupleSize(itup));
+}
+
+/*
+ * Get last item from buffer page and remove it from page.
+ */
+static void
+getItupFromPage(NodeBufferPage *pageBuffer, IndexTuple *itup)
+{
+	/* Get pointer to last index tuple */
+	IndexTuple ptr = (IndexTuple)((char *)pageBuffer + 
+								  PAGE_FREE_SPACE(pageBuffer));	
+	/* Page shouldn't be empty */
+	Assert(!PAGE_IS_EMPTY(pageBuffer));
+	/* Allocate memory for returned index tuple copy */
+	*itup = (IndexTuple)palloc(IndexTupleSize(ptr));
+	/* Copy data */
+	memcpy(*itup, ptr, IndexTupleSize(ptr));
+	/* Increase free space value of page */
+	PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(IndexTupleSize(*itup));	
+}
+
+/*
+ * Push new index tuple to node buffer.
+ */
+void
+pushItupToNodeBuffer(GISTBuildBuffers *gfbb, NodeBuffer *nodeBuffer, 
+					 IndexTuple itup)
+{
+	/*
+	 * Most part of memory operations will be in buffering build persistent
+	 * context. So, let's switch to it.
+	 */
+	MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+	/*
+	 * If there wasn't any data in buffer before then we should add this node
+	 * buffer to nonempty buffers list.
+	 * Allocate a page buffer if we don't have one yet.
+	 */
+	if (nodeBuffer->blocksCount == 0)
+	{
+		nodeBuffer->pageBuffer = allocateNewPageBuffer(gfbb);
+		nodeBuffer->pageBuffer->prev = InvalidBlockNumber;
+		nodeBuffer->blocksCount = 1;
+	}
+
+	/* Check if there is enough space on the last page for the tuple */
+	if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
+	{
+		/* Swap previous block to disk and allocate new one */
+		BlockNumber blkno;
+
+		nodeBuffer->blocksCount++;
+
+		blkno = getFreeBlock(gfbb);
+		BufFileSeekBlock(gfbb->pfile, blkno);
+		BufFileWrite(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+
+		nodeBuffer->pageBuffer = allocateNewPageBuffer(gfbb);
+		nodeBuffer->pageBuffer->prev = blkno;
+	}
+
+	placeItupToPage(nodeBuffer->pageBuffer, itup);
+
+	/* Increase tuples counter of node buffer */
+	nodeBuffer->tuplesCount++;
+
+	/* Restore memory context */
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Removes one index tuple from node buffer. Returns true if success and false
+ * if node buffer is empty.
+ */
+bool
+popItupFromNodeBuffer(GISTBuildBuffers *gfbb, NodeBuffer *nodeBuffer, 
+					  IndexTuple *itup)
+{
+	/* If node buffer is empty then return false. */
+	if (nodeBuffer->blocksCount <= 0)
+		return false;
+	
+	/* Get index tuple from last non-empty page and mark it as dirty. */
+	getItupFromPage(nodeBuffer->pageBuffer, itup);
+	
+	/* Check if the page which the index tuple was got from is now empty */	
+	if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
+	{
+		BlockNumber prevblkno;
+		/* 
+		 * If it's empty then we need to release buffer file block and free
+		 * page buffer.
+		 */
+		nodeBuffer->blocksCount--;
+
+		/* If there's more pages, fetch previous one */
+		prevblkno = nodeBuffer->pageBuffer->prev;
+		if (prevblkno != InvalidBlockNumber)
+		{
+			Assert(nodeBuffer->blocksCount > 0);
+			BufFileSeekBlock(gfbb->pfile, prevblkno);
+			BufFileRead(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+
+			releaseBlock(gfbb, prevblkno);
+		}
+		else
+		{
+			Assert(nodeBuffer->blocksCount == 0);
+			pfree(nodeBuffer->pageBuffer);
+			nodeBuffer->pageBuffer = NULL;
+		}
+	}
+	
+	/* Decrease node buffer tuples counter. */
+	nodeBuffer->tuplesCount--;
+	return true;
+}
+
+/*
+ * qsort comparator for sorting freeBlocks[] into decreasing order.
+ */
+static int
+freeBlocks_cmp(const void *a, const void *b)
+{
+	long		ablk = *((const long *) a);
+	long		bblk = *((const long *) b);
+
+	/* can't just subtract because long might be wider than int */
+	if (ablk < bblk)
+		return 1;
+	if (ablk > bblk)
+		return -1;
+	return 0;
+}
+
+/*
+ * Select a currently unused block for writing to.
+ *
+ * NB: should only be called when writer is ready to write immediately,
+ * to ensure that first write pass is sequential.
+ */
+static long
+getFreeBlock(GISTBuildBuffers *gfbb)
+{
+	/*
+	 * If there are multiple free blocks, we select the one appearing last in
+	 * freeBlocks[] (after sorting the array if needed).  If there are none,
+	 * assign the next block at the end of the file.
+	 */
+	if (gfbb->nFreeBlocks > 0)
+	{
+		if (!gfbb->blocksSorted)
+		{
+			qsort((void *) gfbb->freeBlocks, gfbb->nFreeBlocks,
+				  sizeof(long), freeBlocks_cmp);
+			gfbb->blocksSorted = true;
+		}
+		return gfbb->freeBlocks[--gfbb->nFreeBlocks];
+	}
+	else
+		return gfbb->nFileBlocks++;
+}
+
+/*
+ * Return a block# to the freelist.
+ */
+static void
+releaseBlock(GISTBuildBuffers *gfbb, long blocknum)
+{
+	int			ndx;
+
+	/*
+	 * Enlarge freeBlocks array if full.
+	 */
+	if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
+	{
+		gfbb->freeBlocksLen *= 2;
+		gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
+										  gfbb->freeBlocksLen * sizeof(long));
+	}
+
+	/*
+	 * Add blocknum to array, and mark the array unsorted if it's no longer in
+	 * decreasing order.
+	 */
+	ndx = gfbb->nFreeBlocks++;
+	gfbb->freeBlocks[ndx] = blocknum;
+	if (ndx > 0 && gfbb->freeBlocks[ndx - 1] < blocknum)
+		gfbb->blocksSorted = false;
+}
+
+/*
+ * Free buffering build data structure.
+ */
+void 
+freeGiSTBuildBuffers(GISTBuildBuffers *gfbb)
+{
+	/* Close buffers file. */
+	BufFileClose(gfbb->pfile);
+	/* All other things will be free on memory context release */
+}
+
+/*
+ * Data structure representing information about node buffer for index tuples
+ * relocation from splitted node buffer.
+ */
+typedef struct
+{
+	GISTENTRY			entry[INDEX_MAX_KEYS];
+	bool				isnull[INDEX_MAX_KEYS];
+	SplitedPageLayout  *dist;
+	NodeBuffer		   *nodeBuffer;
+	OffsetNumber		offnum;
+} RelocationBufferInfo;
+
+/*
+ * Maintain data structures on page split.
+ */
+void
+relocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+							Relation r, GISTLoadedPartItem *path,
+							Buffer buffer, SplitedPageLayout *dist)
+{
+	RelocationBufferInfo *relocationBuffersInfos;
+	bool found;
+	NodeBuffer *nodeBuffer;
+	BlockNumber blocknum;	
+	IndexTuple itup;
+	int splitPagesCount = 0, i;
+	GISTENTRY	entry[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	SplitedPageLayout *ptr;
+	int level = path->level;
+	NodeBuffer nodebuf;
+	SplitedPageLayout *last = NULL;
+
+	blocknum = BufferGetBlockNumber(buffer);
+
+	/*
+	 * If splitted page level doesn't have buffers, then everything is done.
+	 * Otherwise we also need to relocate index tuples of buffer of splitted 
+	 * page.
+	 */
+	if (!LEVEL_HAS_BUFFERS(level, gfbb))
+		return;
+
+	/*
+	 * Get pointer of node buffer of splitted page and remove it from the hash.
+	 */	
+	nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
+							 HASH_FIND, &found);
+	if (!found)
+	{
+		/*
+		 * Node buffer should anyway be created at this moment. Either by
+		 * index tuples insertion or page split.
+		 */
+		elog(ERROR, "node buffer of splitting page (%u) doesn't exists while it should.", blocknum);
+	}
+
+	/*
+	 * We are going to perform some operations with node buffers hash. Thus,
+	 * it unsafe to operate with already removed hash item. Let's save it.
+	 */		
+	memcpy(&nodebuf, nodeBuffer, sizeof(NodeBuffer));
+	nodeBuffer = &nodebuf;
+
+	hash_search(gfbb->nodeBuffersTab, &blocknum, HASH_REMOVE, &found);
+	Assert(found);
+
+	/*
+	 * Count pages produced by split and save pointer data structure of
+	 * the last one.
+	 */
+	for (ptr = dist; ptr; ptr = ptr->next)
+	{
+		last = ptr;
+		splitPagesCount++;
+	}
+
+	/* Allocate memory for information about relocation buffers. */
+	relocationBuffersInfos = (RelocationBufferInfo *)palloc(
+		sizeof(RelocationBufferInfo) * splitPagesCount);
+
+	/*
+	 * Fill relocation buffers information for node buffers of pages
+	 * produced by split.
+	 */		
+	i = 0;
+	for (ptr = dist; ptr; ptr = ptr->next)
+	{
+		/* Decompress parent index tuple of node buffer page. */
+		gistDeCompressAtt(giststate, r,
+						  ptr->itup, NULL, (OffsetNumber) 0,
+						  relocationBuffersInfos[i].entry, 
+						  relocationBuffersInfos[i].isnull);
+
+		/* Fill relocation information */
+		relocationBuffersInfos[i].nodeBuffer = 
+			getNodeBuffer(gfbb, giststate, ptr->block.blkno,
+						  path->downlinkoffnum, path->parent, true);
+
+		/* Fill node buffer structure */
+		relocationBuffersInfos[i].dist = ptr;
+
+		i++;
+	}
+
+	/*
+	 * Loop of index tuples relocation.
+	 */		
+	while (popItupFromNodeBuffer(gfbb, nodeBuffer, &itup))
+	{
+		float sum_grow,	which_grow[INDEX_MAX_KEYS];
+		int i, which;
+		IndexTuple newtup;
+		bool wasOverflow;
+
+		/* Choose node buffer for index tuple insert. */
+
+		gistDeCompressAtt(giststate, r,
+						  itup, NULL, (OffsetNumber) 0,
+						  entry, isnull);
+
+		which = -1;
+		*which_grow = -1.0f;
+		sum_grow = 1.0f;
+
+		for (i = 0; i < splitPagesCount && sum_grow; i++)
+		{
+			int j;
+			RelocationBufferInfo *splitPageInfo = & relocationBuffersInfos[i];
+
+			sum_grow = 0.0f;
+			for (j = 0; j < r->rd_att->natts; j++)
+			{
+				float		usize;
+
+				usize = gistpenalty(giststate, j, 
+									&splitPageInfo->entry[j], 
+									splitPageInfo->isnull[j],
+									&entry[j], isnull[j]);
+
+				if (which_grow[j] < 0 || usize < which_grow[j])
+				{
+					which = i;
+					which_grow[j] = usize;
+					if (j < r->rd_att->natts - 1 && i == 0)
+						which_grow[j + 1] = -1;
+					sum_grow += which_grow[j];
+				}
+				else if (which_grow[j] == usize)
+					sum_grow += usize;
+				else
+				{
+					sum_grow = 1;
+						break;
+				}
+			}
+		}
+			
+		wasOverflow = BUFFER_IS_OVERLOW(
+			relocationBuffersInfos[which].nodeBuffer, gfbb);
+
+		/* push item to selected node buffer */
+		pushItupToNodeBuffer(gfbb, relocationBuffersInfos[which].nodeBuffer, 
+							 itup);
+			
+		/* 
+		 * If node buffer was just overflowed then we should add it to the
+		 * emptying stack.
+		 */
+		if (BUFFER_IS_OVERLOW(relocationBuffersInfos[which].nodeBuffer, gfbb) &&
+			!wasOverflow)
+		{
+			MemoryContext oldcxt = CurrentMemoryContext;
+			MemoryContextSwitchTo(gfbb->context);
+			gfbb->bufferEmptyingStack = lcons(relocationBuffersInfos[which].nodeBuffer, gfbb->bufferEmptyingStack);
+			MemoryContextSwitchTo(oldcxt);
+		}
+
+		/* adjust tuple of parent page */
+		newtup = gistgetadjusted(r, relocationBuffersInfos[which].dist->itup, 
+								 itup, giststate);
+		if (newtup)
+		{
+			/*
+			 * Parent page index tuple expands. We need to update old
+			 * index tuple with the new one.
+			 */
+			gistDeCompressAtt(giststate, r,
+							  newtup, NULL, (OffsetNumber) 0,
+							  relocationBuffersInfos[which].entry, 
+							  relocationBuffersInfos[which].isnull);
+
+			relocationBuffersInfos[which].dist->itup = newtup;
+		}
+	}
+
+	if (blocknum == gfbb->currentEmptyingBufferBlockNumber)
+		gfbb->currentEmptyingBufferSplit = true;
+
+	pfree(relocationBuffersInfos);
+}
+
+/*
+ * Return size of node buffer occupied by stored index tuples.
+ */
+int
+getNodeBufferBusySize(NodeBuffer *nodeBuffer)
+{
+	int size;
+	
+	/* No occupied buffer file blocks means that node buffer is empty. */
+	if (nodeBuffer->blocksCount == 0)
+		return 0;
+	
+	/* We assume only the last page to be not fully filled. */
+	size = (BLCKSZ - MAXALIGN(sizeof(uint32))) * nodeBuffer->blocksCount;
+	size -= PAGE_FREE_SPACE(nodeBuffer->pageBuffer);
+	return size;	
+}
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 1754a10..ca1a0a3 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -670,13 +670,33 @@ gistoptions(PG_FUNCTION_ARGS)
 {
 	Datum		reloptions = PG_GETARG_DATUM(0);
 	bool		validate = PG_GETARG_BOOL(1);
-	bytea	   *result;
+	relopt_value *options;
+	GiSTOptions *rdopts;
+	int			numoptions;
+	static const relopt_parse_elt tab[] = {
+		{"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
+		{"levelstep", RELOPT_TYPE_INT, offsetof(GiSTOptions, levelStep)},
+		{"buffersize", RELOPT_TYPE_INT, offsetof(GiSTOptions, bufferSize)},
+		{"neighborrelocation", RELOPT_TYPE_BOOL, offsetof(GiSTOptions, neighborRelocation)},
+		{"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
+	};
 
-	result = default_reloptions(reloptions, validate, RELOPT_KIND_GIST);
+	options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
+							  &numoptions);
+
+	/* if none set, we're done */
+	if (numoptions == 0)
+		PG_RETURN_NULL();
+
+	rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
+
+	fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
+				   validate, tab, lengthof(tab));
+
+	pfree(options);
+
+	PG_RETURN_BYTEA_P(rdopts);
 
-	if (result)
-		PG_RETURN_BYTEA_P(result);
-	PG_RETURN_NULL();
 }
 
 /*
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 9fb20a6..b7f2bf8 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -17,13 +17,56 @@
 #include "access/gist.h"
 #include "access/itup.h"
 #include "storage/bufmgr.h"
+#include "storage/buffile.h"
 #include "utils/rbtree.h"
+#include "utils/hsearch.h"
+
+/* Has specified level buffers? */
+#define LEVEL_HAS_BUFFERS(level,gfbb) ((level) != 0 && (level) % (gfbb)->levelStep == 0)
+/* Is specified buffer at least halt-filled (should be planned for emptying)?*/
+#define BUFFER_IS_OVERLOW(nodeBuffer,gfbb) ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
 
 /* Buffer lock modes */
 #define GIST_SHARE	BUFFER_LOCK_SHARE
 #define GIST_EXCLUSIVE	BUFFER_LOCK_EXCLUSIVE
 #define GIST_UNLOCK BUFFER_LOCK_UNLOCK
 
+typedef struct
+{
+	BlockNumber prev;
+	uint32		freespace;
+	char		tupledata[1];
+} NodeBufferPage;
+
+/* Returns free space in node buffer page */
+#define PAGE_FREE_SPACE(nbp) (nbp->freespace)
+/* Checks if node buffer page is empty */
+#define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - offsetof(NodeBufferPage, tupledata))
+/* Checks if node buffers page don't contain sufficient space for index tuple */
+#define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
+										MAXALIGN(IndexTupleSize(itup)))
+
+/* Buffer of tree node data structure */
+typedef struct NodeBuffer
+{
+    /* number of page containing node */
+	BlockNumber	 nodeBlocknum;
+ 
+    /* count of blocks occupied by buffer */
+	int			 blocksCount;
+
+	NodeBufferPage *pageBuffer;
+
+    /* corresponding downlink number in parent page */
+	OffsetNumber downlinkoffnum;
+    /* level of corresponding node */
+	int          level;
+    /* number of tuples in buffer */
+	int          tuplesCount;
+
+	struct GISTLoadedPartItem *path;
+} NodeBuffer;
+
 /*
  * GISTSTATE: information needed for any GiST index operation
  *
@@ -43,6 +86,8 @@ typedef struct GISTSTATE
 
 	/* Collations to pass to the support functions */
 	Oid			supportCollation[INDEX_MAX_KEYS];
+    
+    struct GISTBuildBuffers *gfbb;
 
 	TupleDesc	tupdesc;
 } GISTSTATE;
@@ -225,6 +270,86 @@ typedef struct GISTInsertStack
 	struct GISTInsertStack *parent;
 } GISTInsertStack;
 
+/*
+ * Extended GISTInsertStack for buffering GiST index build. It additionally hold
+ * level number of page.
+ */
+typedef struct GISTLoadedPartItem
+{
+	/* current page */
+	BlockNumber blkno;
+
+	/* offset of the downlink in the parent page, that points to this page */
+	OffsetNumber downlinkoffnum;
+
+	/* pointer to parent */
+	struct GISTLoadedPartItem *parent;
+
+	/* level number */
+	int level;
+} GISTLoadedPartItem;
+
+/* List of non-empty node buffers on specific level */
+typedef struct
+{
+	NodeBuffer *first, *last;
+} NodeBuffersOnLevel;
+
+/*
+ * Data structure with general information about build buffers.
+ */
+typedef struct GISTBuildBuffers
+{
+	/* memory context which is persistent during buffering build */
+	MemoryContext	context;    
+	/* underlying files */
+	BufFile		   *pfile;
+	/* # of blocks used in underlying files */
+	long			nFileBlocks;
+	/* is freeBlocks[] currently in order? */
+	bool			blocksSorted;
+	/* resizable array of free blocks */
+	long		   *freeBlocks;
+	/* # of currently free blocks */
+	int				nFreeBlocks;
+	/* current allocated length of freeBlocks[] */
+	int				freeBlocksLen;
+
+	/* hash for buffers by block number*/
+	HTAB		   *nodeBuffersTab;
+
+	/* stack of buffers for emptying */
+	List		   *bufferEmptyingStack;
+	/* number of currently emptying buffer */
+	BlockNumber     currentEmptyingBufferBlockNumber;
+	/* whether currently emptying buffer was split - a signal to stop emptying */
+	bool            currentEmptyingBufferSplit;
+
+	/* whether to use neighbor relocation of buffers when split */
+	bool            neighborRelocation;
+	/* step of levels for buffers location */
+	int             levelStep;
+	/* maximal number of pages occupied by buffer */
+	int             pagesPerBuffer;
+
+	GISTLoadedPartItem *rootitem;
+} GISTBuildBuffers;
+
+/*
+ * Information about sub-tree level planned for load.
+ */
+typedef struct
+{
+	/* pages of sub-tree level */
+	GISTLoadedPartItem **items;
+	/* lenght of items array */
+	int itemsLen;
+	/* number of pages */
+	int itemsCount;
+	/* level number in whole tree */
+	int level;
+} SubtreeLevelInfo;
+
 typedef struct GistSplitVector
 {
 	GIST_SPLITVEC splitVector;	/* to/from PickSplit method */
@@ -286,6 +411,15 @@ extern Datum gistinsert(PG_FUNCTION_ARGS);
 extern MemoryContext createTempGistContext(void);
 extern void initGISTstate(GISTSTATE *giststate, Relation index);
 extern void freeGISTstate(GISTSTATE *giststate);
+BlockNumber gistdoinsert(Relation r,
+			 IndexTuple itup,
+			 Size freespace,
+			 GISTSTATE *GISTstate);
+bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
+				 GISTSTATE *giststate,
+				 IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
+				 Buffer leftchild);
+
 
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
 		  int len, GISTSTATE *giststate);
@@ -313,6 +447,19 @@ extern Datum gistgetbitmap(PG_FUNCTION_ARGS);
 
 /* gistutil.c */
 
+/*
+ * Storage type for GiST's reloptions
+ */
+typedef struct GiSTOptions
+{
+	int32		vl_len_;       /* varlena header (do not touch directly!) */
+	int			fillfactor;	   /* page fill factor in percent (0..100) */
+	int 		bufferingModeOffset;  /* use buffering build? */
+	bool		neighborRelocation;  /* use neighbor buffer relocation? */
+	int			levelStep;     /* level step in buffering build */
+	int			bufferSize;    /* buffer size in buffering build */
+} GiSTOptions;
+
 #define GiSTPageSize   \
 	( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
 
@@ -380,4 +527,14 @@ extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
 			   GistSplitVector *v, GistEntryVector *entryvec,
 			   int attno);
 
+/* gistbuildbuffers.c */
+
+void initGiSTBuildBuffers(GISTBuildBuffers * gfbb);
+extern NodeBuffer *getNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate, BlockNumber blkno, OffsetNumber downlinkoffnu, GISTLoadedPartItem *parent, bool createNew);
+void pushItupToNodeBuffer(GISTBuildBuffers *gfbb, NodeBuffer *nodeBuffer, IndexTuple item);
+bool popItupFromNodeBuffer(GISTBuildBuffers *gfbb, NodeBuffer *nodeBuffer, IndexTuple *item);
+void freeGiSTBuildBuffers(GISTBuildBuffers *gfbb);
+extern void relocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, Relation r, GISTLoadedPartItem *path, Buffer buffer, SplitedPageLayout *dist);
+int getNodeBufferBusySize(NodeBuffer *nodeBuffer);
+
 #endif   /* GIST_PRIVATE_H */