v1-0001-Precalculate-CompactAttribute-s-attcacheoff.patch

text/plain

Filename: v1-0001-Precalculate-CompactAttribute-s-attcacheoff.patch
Type: text/plain
Part: 0
Message: More speedups for tuple deformation
From 41f7dbbc560a026e2e311896056284fd60796cf0 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Tue, 31 Dec 2024 09:19:24 +1300
Subject: [PATCH v1 1/3] Precalculate CompactAttribute's attcacheoff

This allows code to be removed from the tuple deform routines which
shrinks down the code a little, which can make it run more quickly.
This also makes a dedicated deformer loop to deform the portion of the
tuple which has a known offset, which makes deforming much faster when
a leading set of the table's columns are non-NULL values and fixed-width
types.
---
 contrib/dblink/dblink.c                       |   2 +
 contrib/pg_buffercache/pg_buffercache_pages.c |   1 +
 contrib/pg_visibility/pg_visibility.c         |   2 +
 src/backend/access/brin/brin_tuple.c          |   1 +
 src/backend/access/common/heaptuple.c         | 317 ++++++----------
 src/backend/access/common/indextuple.c        | 355 +++++++-----------
 src/backend/access/common/tupdesc.c           |  56 +++
 src/backend/access/gin/ginutil.c              |   1 +
 src/backend/access/gist/gistscan.c            |   1 +
 src/backend/access/spgist/spgutils.c          |   4 +-
 src/backend/access/transam/twophase.c         |   1 +
 src/backend/access/transam/xlogfuncs.c        |   1 +
 src/backend/backup/basebackup_copy.c          |   3 +
 src/backend/catalog/index.c                   |   2 +
 src/backend/catalog/pg_publication.c          |   1 +
 src/backend/catalog/toasting.c                |   6 +
 src/backend/commands/explain.c                |   1 +
 src/backend/commands/functioncmds.c           |   1 +
 src/backend/commands/sequence.c               |   1 +
 src/backend/commands/tablecmds.c              |   4 +
 src/backend/executor/execSRF.c                |   2 +
 src/backend/executor/execTuples.c             | 303 +++++++--------
 src/backend/executor/nodeFunctionscan.c       |   2 +
 src/backend/parser/parse_relation.c           |   4 +-
 src/backend/parser/parse_target.c             |   2 +
 .../libpqwalreceiver/libpqwalreceiver.c       |   1 +
 src/backend/replication/walsender.c           |   5 +
 src/backend/utils/adt/acl.c                   |   1 +
 src/backend/utils/adt/genfile.c               |   1 +
 src/backend/utils/adt/lockfuncs.c             |   1 +
 src/backend/utils/adt/orderedsetaggs.c        |   1 +
 src/backend/utils/adt/pgstatfuncs.c           |   5 +
 src/backend/utils/adt/tsvector_op.c           |   1 +
 src/backend/utils/cache/relcache.c            |  20 +-
 src/backend/utils/fmgr/funcapi.c              |   6 +
 src/backend/utils/init/postinit.c             |   1 +
 src/backend/utils/misc/guc_funcs.c            |   5 +
 src/include/access/htup_details.h             |  19 +-
 src/include/access/itup.h                     |  20 +-
 src/include/access/tupdesc.h                  |  12 +
 src/include/access/tupmacs.h                  |  57 +++
 src/include/executor/tuptable.h               |   9 +-
 src/pl/plpgsql/src/pl_comp.c                  |   2 +
 .../modules/test_predtest/test_predtest.c     |   1 +
 44 files changed, 613 insertions(+), 629 deletions(-)

diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c
index 8bf8fc8ea2f..82dbabc8927 100644
--- a/contrib/dblink/dblink.c
+++ b/contrib/dblink/dblink.c
@@ -1045,6 +1045,7 @@ materializeQueryResult(FunctionCallInfo fcinfo,
 			TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 							   TEXTOID, -1, 0);
 			attinmeta = TupleDescGetAttInMetadata(tupdesc);
+			TupleDescFinalize(tupdesc);
 
 			oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
 			tupstore = tuplestore_begin_heap(true, false, work_mem);
@@ -1534,6 +1535,7 @@ dblink_get_pkey(PG_FUNCTION_ARGS)
 		 * C strings
 		 */
 		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+		TupleDescFinalize(tupdesc);
 		funcctx->attinmeta = attinmeta;
 
 		if ((results != NULL) && (indnkeyatts > 0))
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 0c58e4b265c..976c38b9197 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -174,6 +174,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 			TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
 							   INT4OID, -1, 0);
 
+		TupleDescFinalize(tupledesc);
 		fctx->tupdesc = BlessTupleDesc(tupledesc);
 
 		/* Allocate NBuffers worth of BufferCachePagesRec records. */
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 715f5cdd17c..7047895c5e8 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -469,6 +469,8 @@ pg_visibility_tupdesc(bool include_blkno, bool include_pd)
 		TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
 	Assert(a == maxattr);
 
+	TupleDescFinalize(tupdesc);
+
 	return BlessTupleDesc(tupdesc);
 }
 
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 43850ce8f48..1e0c2a44b7a 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -84,6 +84,7 @@ brtuple_disk_tupdesc(BrinDesc *brdesc)
 
 		MemoryContextSwitchTo(oldcxt);
 
+		TupleDescFinalize(tupdesc);
 		brdesc->bd_disktdesc = tupdesc;
 	}
 
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index b7820d692e2..c24ba949c11 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -497,20 +497,8 @@ heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
 /* ----------------
  *		nocachegetattr
  *
- *		This only gets called from fastgetattr(), in cases where we
- *		can't use a cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
+ *		This only gets called from fastgetattr(), in cases where the
+ *		attcacheoff is not set.
  *
  *		NOTE: if you need to change this code, see also heap_deform_tuple.
  *		Also see nocache_index_getattr, which is the same code for index
@@ -522,194 +510,101 @@ nocachegetattr(HeapTuple tup,
 			   int attnum,
 			   TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	HeapTupleHeader td = tup->t_data;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = td->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstnullattr;
+	bool		hasnulls = HeapTupleHasNulls(tup);
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
+	/*
+	 * If there are no NULLs before the required attnum, then we can start at
+	 * the highest attribute with a known offset, or the first attribute if
+	 * none have a cached offset.  If the tuple has no variable width types,
+	 * then we can use a slightly cheaper method of offset calculation, as we
+	 * just need to add the attlen to the aligned offset when skipping over
+	 * columns.  When the tuple contains variable-width types, we must use
+	 * att_addlength_pointer(), which does a bit more branching and is
+	 * slightly less efficient.
 	 */
-
 	attnum--;
 
-	if (!HeapTupleNoNulls(tup))
-	{
-		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if any preceding bits are null...
-		 */
-		int			byte = attnum >> 3;
-		int			finalbit = attnum & 0x07;
-
-		/* check for nulls "before" final bit of last byte */
-		if ((~bp[byte]) & ((1 << finalbit) - 1))
-			slow = true;
-		else
-		{
-			/* check for nulls in any "earlier" bytes */
-			int			i;
+	if (hasnulls)
+		firstnullattr = first_null_attr(bp, attnum);
+	else
+		firstnullattr = attnum;
 
-			for (i = 0; i < byte; i++)
-			{
-				if (bp[i] != 0xFF)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+	if (tupleDesc->firstNonCachedOffAttr >= 0)
+	{
+		startAttr = Min(tupleDesc->firstNonCachedOffAttr - 1, firstnullattr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
+	}
+	else
+	{
+		startAttr = 0;
+		off = 0;
 	}
 
 	tp = (char *) td + td->t_hoff;
 
-	if (!slow)
+	if (hasnulls)
 	{
-		CompactAttribute *att;
+		for (int i = startAttr; i < attnum; i++)
+		{
+			CompactAttribute *att;
 
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
+			if (att_isnull(i, bp))
+				continue;
 
-		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
-		 */
-		if (HeapTupleHasVarWidth(tup))
-		{
-			int			j;
+			att = TupleDescCompactAttr(tupleDesc, i);
 
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
+			off = att_pointer_alignby(off,
+									  att->attalignby,
+									  att->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, att->attlen, tp + off);
 		}
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		off = att_pointer_alignby(off,
+								  cattr->attalignby,
+								  cattr->attlen,
+								  tp + off);
 	}
-
-	if (!slow)
+	else if (!HeapTupleHasVarWidth(tup))
 	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
-
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
-
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
-
-		for (; j < natts; j++)
+		for (int i = startAttr; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
-
-			if (att->attlen <= 0)
-				break;
+			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
 
 			off = att_nominal_alignby(off, att->attalignby);
-
-			att->attcacheoff = off;
-
 			off += att->attlen;
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		off = att_nominal_alignby(off, cattr->attalignby);
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
-
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		for (int i = startAttr; i < attnum; i++)
 		{
 			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
 
-			if (HeapTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
-
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
-
-				if (usecache)
-					att->attcacheoff = off;
-			}
-
-			if (i == attnum)
-				break;
-
+			off = att_pointer_alignby(off,
+									  att->attalignby,
+									  att->attlen,
+									  tp + off);
 			off = att_addlength_pointer(off, att->attlen, tp + off);
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
 		}
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		off = att_pointer_alignby(off,
+								  cattr->attalignby,
+								  cattr->attlen,
+								  tp + off);
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	return fetchatt(cattr, tp + off);
 }
 
 /* ----------------
@@ -1354,7 +1249,8 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 	char	   *tp;				/* ptr to tuple data */
 	uint32		off;			/* offset in tuple data */
 	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			cacheoffattrs;
+	int			firstnullattr;
 
 	natts = HeapTupleHeaderGetNatts(tup);
 
@@ -1364,60 +1260,77 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 	 * the caller's arrays.
 	 */
 	natts = Min(natts, tdesc_natts);
+	cacheoffattrs = Min(tupleDesc->firstNonCachedOffAttr, natts);
 
-	tp = (char *) tup + tup->t_hoff;
+	if (hasnulls)
+	{
+		firstnullattr = first_null_attr(bp, natts);
+		cacheoffattrs = Min(cacheoffattrs, firstnullattr);
+	}
+	else
+		firstnullattr = natts;
 
+	tp = (char *) tup + tup->t_hoff;
 	off = 0;
 
-	for (attnum = 0; attnum < natts; attnum++)
+	for (attnum = 0; attnum < cacheoffattrs; attnum++)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+		CompactAttribute *cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		Assert(cattr->attcacheoff >= 0);
+
+		values[attnum] = fetch_att(tp + cattr->attcacheoff, cattr->attbyval,
+								   cattr->attlen);
+		isnull[attnum] = false;
+		off = cattr->attcacheoff + cattr->attlen;
+	}
 
-		if (hasnulls && att_isnull(attnum, bp))
+	for (; attnum < firstnullattr; attnum++)
+	{
+		CompactAttribute *cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		if (cattr->attlen == -1)
+			off = att_pointer_alignby(off, cattr->attalignby, -1,
+									  tp + off);
+		else
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
-			continue;
+			/* not varlena, so safe to use att_nominal_alignby */
+			off = att_nominal_alignby(off, cattr->attalignby);
 		}
 
 		isnull[attnum] = false;
+		values[attnum] = fetchatt(cattr, tp + off);
 
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
+	}
+
+	for (; attnum < natts; attnum++)
+	{
+		CompactAttribute *cattr;
+
+		Assert(hasnulls);
+
+		if (att_isnull(attnum, bp))
 		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			continue;
 		}
+
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		if (cattr->attlen == -1)
+			off = att_pointer_alignby(off, cattr->attalignby, -1,
+									  tp + off);
 		else
 		{
 			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
-
-			if (!slow)
-				thisatt->attcacheoff = off;
+			off = att_nominal_alignby(off, cattr->attalignby);
 		}
 
-		values[attnum] = fetchatt(thisatt, tp + off);
-
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+		isnull[attnum] = false;
+		values[attnum] = fetchatt(cattr, tp + off);
 
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
 	}
 
 	/*
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index 3efa3889c6f..8d0c273cdf6 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -223,18 +223,6 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
  *
  *		This gets called from index_getattr() macro, and only in cases
  *		where we can't use cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
  * ----------------
  */
 Datum
@@ -242,205 +230,126 @@ nocache_index_getattr(IndexTuple tup,
 					  int attnum,
 					  TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = NULL;		/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			data_off;		/* tuple data offset */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstnullattr;
+	bool		hasnulls = IndexTupleHasNulls(tup);
+	int			i;
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
-	 */
+	attnum--;
 
+	/*
+	 * If there are no NULLs before the required attnum, then we can start at
+	 * the highest attribute with a known offset, or the first attribute if
+	 * none have a cached offset.  If the tuple has no variable width types,
+	 * which is common with indexes, then we can use a slightly cheaper method
+	 * of offset calculation, as we just need to add the attlen to the aligned
+	 * offset when skipping over columns.  When the tuple contains
+	 * variable-width types, we must use att_addlength_pointer(), which does a
+	 * bit more branching and is slightly less efficient.
+	 */
 	data_off = IndexInfoFindDataOffset(tup->t_info);
+	tp = (char *) tup + data_off;
 
-	attnum--;
-
-	if (IndexTupleHasNulls(tup))
+	/*
+	 * Find the first NULL column, or if there's none set the first NULL to
+	 * attnum so that we can forego NULL checking all the way to attnum.
+	 */
+	if (hasnulls)
 	{
-		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if desired att is null
-		 */
-
-		/* XXX "knows" t_bits are just after fixed tuple header! */
 		bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData));
-
-		/*
-		 * Now check to see if any preceding bits are null...
-		 */
-		{
-			int			byte = attnum >> 3;
-			int			finalbit = attnum & 0x07;
-
-			/* check for nulls "before" final bit of last byte */
-			if ((~bp[byte]) & ((1 << finalbit) - 1))
-				slow = true;
-			else
-			{
-				/* check for nulls in any "earlier" bytes */
-				int			i;
-
-				for (i = 0; i < byte; i++)
-				{
-					if (bp[i] != 0xFF)
-					{
-						slow = true;
-						break;
-					}
-				}
-			}
-		}
+		firstnullattr = first_null_attr(bp, attnum);
 	}
+	else
+		firstnullattr = attnum;
 
-	tp = (char *) tup + data_off;
-
-	if (!slow)
+	if (tupleDesc->firstNonCachedOffAttr >= 0)
 	{
-		CompactAttribute *att;
-
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
-
-		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
-		 */
-		if (IndexTupleHasVarwidths(tup))
-		{
-			int			j;
-
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+		startAttr = Min(tupleDesc->firstNonCachedOffAttr - 1, firstnullattr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
 	}
-
-	if (!slow)
+	else
 	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
+		startAttr = 0;
+		off = 0;
+	}
 
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
+	/* Handle tuples with var-width attributes */
+	if (IndexTupleHasVarwidths(tup))
+	{
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstnullattr; i++)
+		{
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
+		}
 
-		for (; j < natts; j++)
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
-
-			if (att->attlen <= 0)
-				break;
+			Assert(hasnulls);
 
-			off = att_nominal_alignby(off, att->attalignby);
+			if (att_isnull(i, bp))
+				continue;
 
-			att->attcacheoff = off;
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			off += att->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
+		/* Handle tuples with only fixed-width attributes */
 
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstnullattr; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
-
-			if (IndexTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
-
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
+
+			Assert(cattr->attlen > 0);
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off += cattr->attlen;
+		}
 
-				if (usecache)
-					att->attcacheoff = off;
-			}
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
+		{
+			Assert(hasnulls);
 
-			if (i == attnum)
-				break;
+			if (att_isnull(i, bp))
+				continue;
 
-			off = att_addlength_pointer(off, att->attlen, tp + off);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
+			Assert(cattr->attlen > 0);
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off += cattr->attlen;
 		}
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	cattr = TupleDescCompactAttr(tupleDesc, attnum);
+	off = att_pointer_alignby(off, cattr->attalignby,
+							  cattr->attlen, tp + off);
+	return fetchatt(cattr, tp + off);
 }
 
 /*
@@ -481,62 +390,76 @@ index_deform_tuple_internal(TupleDesc tupleDescriptor,
 							char *tp, bits8 *bp, int hasnulls)
 {
 	int			natts = tupleDescriptor->natts; /* number of atts to extract */
-	int			attnum;
+	int			attnum = 0;
 	int			off = 0;		/* offset in tuple data */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			cacheoffattrs;
+	int			firstnullattr;
 
 	/* Assert to protect callers who allocate fixed-size arrays */
 	Assert(natts <= INDEX_MAX_KEYS);
 
-	for (attnum = 0; attnum < natts; attnum++)
+	cacheoffattrs = Min(tupleDescriptor->firstNonCachedOffAttr, natts);
+
+	if (hasnulls)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDescriptor, attnum);
+		firstnullattr = first_null_attr(bp, natts);
+		cacheoffattrs = Min(cacheoffattrs, firstnullattr);
+	}
+	else
+		firstnullattr = natts;
+
+	if (attnum < cacheoffattrs)
+	{
+		CompactAttribute *cattr;
 
-		if (hasnulls && att_isnull(attnum, bp))
+		do
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
-			continue;
-		}
+			cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+			Assert(cattr->attcacheoff >= 0);
+
+			values[attnum] = fetch_att(tp + cattr->attcacheoff, cattr->attbyval,
+									   cattr->attlen);
+			isnull[attnum] = false;
+		} while (++attnum < cacheoffattrs);
+
+		off = cattr->attcacheoff + cattr->attlen;
+	}
+
+	for (; attnum < firstnullattr; attnum++)
+	{
+		CompactAttribute *cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+		off = att_pointer_alignby(off, cattr->attalignby, cattr->attlen,
+								  tp + off);
 
 		isnull[attnum] = false;
+		values[attnum] = fetchatt(cattr, tp + off);
 
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
+	}
+
+	for (; attnum < natts; attnum++)
+	{
+		CompactAttribute *cattr;
+
+		Assert(hasnulls);
 
-			if (!slow)
-				thisatt->attcacheoff = off;
+		if (att_isnull(attnum, bp))
+		{
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			continue;
 		}
 
-		values[attnum] = fetchatt(thisatt, tp + off);
+		cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+		off = att_pointer_alignby(off, cattr->attalignby, cattr->attlen,
+								  tp + off);
 
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+		isnull[attnum] = false;
+		values[attnum] = fetchatt(cattr, tp + off);
 
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
 	}
 }
 
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index bcd1ddcc68b..4aebb0190f8 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -238,6 +238,9 @@ CreateTupleDesc(int natts, Form_pg_attribute *attrs)
 		memcpy(TupleDescAttr(desc, i), attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
 		populate_compact_attribute(desc, i);
 	}
+
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -282,6 +285,8 @@ CreateTupleDescCopy(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -328,6 +333,8 @@ CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -413,6 +420,8 @@ CreateTupleDescCopyConstr(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -455,6 +464,8 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
 	 * source's refcount would be wrong in any case.)
 	 */
 	dst->tdrefcount = -1;
+
+	TupleDescFinalize(dst);
 }
 
 /*
@@ -463,6 +474,9 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
  *		descriptor to another.
  *
  * !!! Constraints and defaults are not copied !!!
+ *
+ * The caller must take care of calling TupleDescFinalize() on once all
+ * TupleDesc changes have been made.
  */
 void
 TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
@@ -495,6 +509,46 @@ TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 	populate_compact_attribute(dst, dstAttno - 1);
 }
 
+/*
+ * TupleDescFinalize
+ *		Finalize the given TupleDesc.  This must be called after the
+ *		attributes arrays have been populated or adjusted by any code.
+ *
+ * Must be called after populate_compact_attribute()
+ */
+void
+TupleDescFinalize(TupleDesc tupdesc)
+{
+	int			firstNonCachedOffAttr = -1;
+	int			firstByRefAttr = tupdesc->natts;
+	int			offp = 0;
+
+	for (int i = 0; i < tupdesc->natts; i++)
+	{
+		CompactAttribute *cattr = TupleDescCompactAttr(tupdesc, i);
+
+		if (!cattr->attbyval)
+			firstByRefAttr = Min(firstByRefAttr, i);
+
+		/*
+		 * We can't cache the offset for the first varlena attr as the
+		 * alignment for those depends on 1 vs 4 byte headers, however we
+		 * possibily could cache the first attlen == -2 attr.  Worthwhile?
+		 */
+		if (cattr->attlen <= 0)
+			break;
+
+		offp = att_nominal_alignby(offp, cattr->attalignby);
+		cattr->attcacheoff = offp;
+
+		offp += cattr->attlen;
+		firstNonCachedOffAttr = i + 1;
+	}
+
+	tupdesc->firstNonCachedOffAttr = firstNonCachedOffAttr;
+	tupdesc->firstByRefAttr = firstByRefAttr;
+}
+
 /*
  * Free a TupleDesc including all substructure
  */
@@ -1082,6 +1136,8 @@ BuildDescFromLists(const List *names, const List *types, const List *typmods, co
 		TupleDescInitEntryCollation(desc, attnum, attcollation);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 605f80aad39..a7286615f5b 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -128,6 +128,7 @@ initGinState(GinState *state, Relation index)
 							   attr->attndims);
 			TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
 										attr->attcollation);
+			TupleDescFinalize(state->tupdesc[i]);
 		}
 
 		/*
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 01b8ff0b6fa..6f58ba6cf95 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -201,6 +201,7 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
 											 attno - 1)->atttypid,
 							   -1, 0);
 		}
+		TupleDescFinalize(so->giststate->fetchTupdesc);
 		scan->xs_hitupdesc = so->giststate->fetchTupdesc;
 
 		/* Also create a memory context that will hold the returned tuples */
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index a60ec85e8be..391e7a4c9a1 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -334,11 +334,9 @@ getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
 		/* We shouldn't need to bother with making these valid: */
 		att->attcompression = InvalidCompressionMethod;
 		att->attcollation = InvalidOid;
-		/* In case we changed typlen, we'd better reset following offsets */
-		for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++)
-			TupleDescCompactAttr(outTupDesc, i)->attcacheoff = -1;
 
 		populate_compact_attribute(outTupDesc, spgKeyColumn);
+		TupleDescFinalize(outTupDesc);
 	}
 	return outTupDesc;
 }
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 3bc85986829..31956d2d0a8 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -744,6 +744,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
 						   OIDOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 339cb75c3ad..fbc116b747f 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -401,6 +401,7 @@ pg_walfile_name_offset(PG_FUNCTION_ARGS)
 					   INT4OID, -1, 0);
 
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+	TupleDescFinalize(resultTupleDesc);
 
 	/*
 	 * xlogfilename
diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c
index 8bb8d3939fe..d227bfad384 100644
--- a/src/backend/backup/basebackup_copy.c
+++ b/src/backend/backup/basebackup_copy.c
@@ -357,6 +357,8 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
 	 */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "tli", INT8OID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
+
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
 
@@ -388,6 +390,7 @@ SendTablespaceList(List *tablespaces)
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "spcoid", OIDOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "spclocation", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "size", INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 8dea58ad96b..56b46385a0b 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -481,6 +481,8 @@ ConstructTupleDescriptor(Relation heapRelation,
 		populate_compact_attribute(indexTupDesc, i);
 	}
 
+	TupleDescFinalize(indexTupDesc);
+
 	pfree(amroutine);
 
 	return indexTupDesc;
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 7aa3f179924..219190720a3 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -1230,6 +1230,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "qual",
 						   PG_NODE_TREEOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 		funcctx->user_fctx = table_infos;
 
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index 874a8fc89ad..8c1fede1090 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -229,6 +229,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
 	TupleDescAttr(tupdesc, 1)->attcompression = InvalidCompressionMethod;
 	TupleDescAttr(tupdesc, 2)->attcompression = InvalidCompressionMethod;
 
+	populate_compact_attribute(tupdesc, 0);
+	populate_compact_attribute(tupdesc, 1);
+	populate_compact_attribute(tupdesc, 2);
+
+	TupleDescFinalize(tupdesc);
+
 	/*
 	 * Toast tables for regular relations go in pg_toast; those for temp
 	 * relations go into the per-backend temp-toast-table namespace.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 5a6390631eb..26eee4ace42 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -281,6 +281,7 @@ ExplainResultDesc(ExplainStmt *stmt)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
 					   result_type, -1, 0);
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 8a435cd93db..bf73ef7d0a3 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -2423,6 +2423,7 @@ CallStmtResultDesc(CallStmt *stmt)
 							   -1,
 							   0);
 		}
+		TupleDescFinalize(tupdesc);
 	}
 
 	return tupdesc;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 51567994126..b26cd8e642e 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -1810,6 +1810,7 @@ pg_get_sequence_data(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 3, "page_lsn",
 					   LSNOID, -1, 0);
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+	TupleDescFinalize(resultTupleDesc);
 
 	init_sequence(relid, &elm, &seqrel);
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 1d9565b09fc..89e3dc4a6a9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1029,6 +1029,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 		}
 	}
 
+	TupleDescFinalize(descriptor);
+
 	/*
 	 * For relations with table AM and partitioned tables, select access
 	 * method to use: an explicitly indicated one, or (in the case of a
@@ -1447,6 +1449,8 @@ BuildDescForRelation(const List *columns)
 		populate_compact_attribute(desc, attnum - 1);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c
index a03fe780a02..3267f129b60 100644
--- a/src/backend/executor/execSRF.c
+++ b/src/backend/executor/execSRF.c
@@ -272,6 +272,7 @@ ExecMakeTableFunctionResult(SetExprState *setexpr,
 									   funcrettype,
 									   -1,
 									   0);
+					TupleDescFinalize(tupdesc);
 					rsinfo.setDesc = tupdesc;
 				}
 				MemoryContextSwitchTo(oldcontext);
@@ -776,6 +777,7 @@ init_sexpr(Oid foid, Oid input_collation, Expr *node,
 							   funcrettype,
 							   -1,
 							   0);
+			TupleDescFinalize(tupdesc);
 			sexpr->funcResultDesc = tupdesc;
 			sexpr->funcReturnsTuple = false;
 		}
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index b0dc2cfa66f..6d33f494a70 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -992,118 +992,6 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple,
 	}
 }
 
-/*
- * slot_deform_heap_tuple_internal
- *		An always inline helper function for use in slot_deform_heap_tuple to
- *		allow the compiler to emit specialized versions of this function for
- *		various combinations of "slow" and "hasnulls".  For example, if a
- *		given tuple has no nulls, then we needn't check "hasnulls" for every
- *		attribute that we're deforming.  The caller can just call this
- *		function with hasnulls set to constant-false and have the compiler
- *		remove the constant-false branches and emit more optimal code.
- *
- * Returns the next attnum to deform, which can be equal to natts when the
- * function manages to deform all requested attributes.  *offp is an input and
- * output parameter which is the byte offset within the tuple to start deforming
- * from which, on return, gets set to the offset where the next attribute
- * should be deformed from.  *slowp is set to true when subsequent deforming
- * of this tuple must use a version of this function with "slow" passed as
- * true.
- *
- * Callers cannot assume when we return "attnum" (i.e. all requested
- * attributes have been deformed) that slow mode isn't required for any
- * additional deforming as the final attribute may have caused a switch to
- * slow mode.
- */
-static pg_attribute_always_inline int
-slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
-								int attnum, int natts, bool slow,
-								bool hasnulls, uint32 *offp, bool *slowp)
-{
-	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
-	Datum	   *values = slot->tts_values;
-	bool	   *isnull = slot->tts_isnull;
-	HeapTupleHeader tup = tuple->t_data;
-	char	   *tp;				/* ptr to tuple data */
-	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slownext = false;
-
-	tp = (char *) tup + tup->t_hoff;
-
-	for (; attnum < natts; attnum++)
-	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
-
-		if (hasnulls && att_isnull(attnum, bp))
-		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			if (!slow)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-			else
-				continue;
-		}
-
-		isnull[attnum] = false;
-
-		/* calculate the offset of this attribute */
-		if (!slow && thisatt->attcacheoff >= 0)
-			*offp = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow && *offp == att_nominal_alignby(*offp, thisatt->attalignby))
-				thisatt->attcacheoff = *offp;
-			else
-			{
-				*offp = att_pointer_alignby(*offp,
-											thisatt->attalignby,
-											-1,
-											tp + *offp);
-
-				if (!slow)
-					slownext = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			*offp = att_nominal_alignby(*offp, thisatt->attalignby);
-
-			if (!slow)
-				thisatt->attcacheoff = *offp;
-		}
-
-		values[attnum] = fetchatt(thisatt, tp + *offp);
-
-		*offp = att_addlength_pointer(*offp, thisatt->attlen, tp + *offp);
-
-		/* check if we need to switch to slow mode */
-		if (!slow)
-		{
-			/*
-			 * We're unable to deform any further if the above code set
-			 * 'slownext', or if this isn't a fixed-width attribute.
-			 */
-			if (slownext || thisatt->attlen <= 0)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-		}
-	}
-
-	return natts;
-}
-
 /*
  * slot_deform_heap_tuple
  *		Given a TupleTableSlot, extract data from the slot's physical tuple
@@ -1122,78 +1010,165 @@ static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 					   int natts)
 {
+	CompactAttribute *cattr;
+	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
 	bool		hasnulls = HeapTupleHasNulls(tuple);
+	HeapTupleHeader tup = tuple->t_data;
+	bits8	   *bp;				/* ptr to null bitmap in tuple */
 	int			attnum;
+	int			firstNonCacheOffsetAttr;
+
+/* #define OPTIMIZE_BYVAL */
+#ifdef OPTIMIZE_BYVAL
+	int			firstByRefAttr;
+#endif
+	int			firstNullAttr;
+	Datum	   *values;
+	bool	   *isnull;
+	char	   *tp;				/* ptr to tuple data */
 	uint32		off;			/* offset in tuple data */
-	bool		slow;			/* can we use/set attcacheoff? */
 
 	/* We can only fetch as many attributes as the tuple has. */
-	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), natts);
+	natts = Min(HeapTupleHeaderGetNatts(tup), natts);
+	attnum = slot->tts_nvalid;
+	firstNonCacheOffsetAttr = Min(tupleDesc->firstNonCachedOffAttr, natts);
+
+	if (hasnulls)
+	{
+		bp = tup->t_bits;
+		firstNullAttr = first_null_attr(bp, natts);
+		firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
+	}
+	else
+	{
+		bp = NULL;
+		firstNullAttr = natts;
+	}
+
+#ifdef OPTIMIZE_BYVAL
+	firstByRefAttr = Min(firstNonCacheOffsetAttr, tupleDesc->firstByRefAttr);
+#endif
+	values = slot->tts_values;
+	isnull = slot->tts_isnull;
+	tp = (char *) tup + tup->t_hoff;
+
+#ifdef OPTIMIZE_BYVAL
 
 	/*
-	 * Check whether the first call for this tuple, and initialize or restore
-	 * loop state.
+	 * Many tuples have leading byval attributes, try and process as many of
+	 * those as possible with a special loop that can't handle byref types.
 	 */
-	attnum = slot->tts_nvalid;
-	if (attnum == 0)
+	if (attnum < firstByRefAttr)
+	{
+		/* Use do/while as we already know we need to loop at least once. */
+		do
+		{
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+			Assert(cattr->attcacheoff >= 0);
+
+			/*
+			 * Hard code byval == true to allow the compiler to remove the
+			 * byval check when inlining fetch_att().
+			 */
+			values[attnum] = fetch_att(tp + cattr->attcacheoff, true, cattr->attlen);
+			isnull[attnum] = false;
+		} while (++attnum < firstByRefAttr);
+
+		/*
+		 * Point the offset after the end of the last attribute with a cached
+		 * offset.  We expect the final cached offset attribute to have a
+		 * fixed width, so just add the attlen to the attcacheoff.
+		 */
+		Assert(cattr->attlen > 0);
+		off = cattr->attcacheoff + cattr->attlen;
+	}
+#endif
+
+	/*
+	 * Handle the portion of the tuple that we have cached the offset for up
+	 * to the first NULL attribute.  The offset is effectively fixed for these
+	 * so we can use the CompactAttribute's attcacheoff.
+	 */
+	if (attnum < firstNonCacheOffsetAttr)
+	{
+		do
+		{
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+			Assert(cattr->attcacheoff >= 0);
+
+			values[attnum] = fetchatt(cattr, tp + cattr->attcacheoff);
+			isnull[attnum] = false;
+		} while (++attnum < firstNonCacheOffsetAttr);
+
+		/*
+		 * Point the offset after the end of the last attribute with a cached
+		 * offset.  We expect the final cached offset attribute to have a
+		 * fixed width, so just add the attlen to the attcacheoff
+		 */
+		Assert(cattr->attlen > 0);
+		off = cattr->attcacheoff + cattr->attlen;
+	}
+	else if (attnum == 0)
 	{
 		/* Start from the first attribute */
 		off = 0;
-		slow = false;
 	}
 	else
 	{
 		/* Restore state from previous execution */
 		off = *offp;
-		slow = TTS_SLOW(slot);
 	}
 
 	/*
-	 * If 'slow' isn't set, try deforming using deforming code that does not
-	 * contain any of the extra checks required for non-fixed offset
-	 * deforming.  During deforming, if or when we find a NULL or a variable
-	 * length attribute, we'll switch to a deforming method which includes the
-	 * extra code required for non-fixed offset deforming, a.k.a slow mode.
-	 * Because this is performance critical, we inline
-	 * slot_deform_heap_tuple_internal passing the 'slow' and 'hasnull'
-	 * parameters as constants to allow the compiler to emit specialized code
-	 * with the known-const false comparisons and subsequent branches removed.
+	 * Handle any portion of the tuple that doesn't have a fixed offset up
+	 * until the first NULL attribute.  This loops only differs from the one
+	 * after it by the NULL checks.
 	 */
-	if (!slow)
+	for (; attnum < firstNullAttr; attnum++)
 	{
-		/* Tuple without any NULLs? We can skip doing any NULL checking */
-		if (!hasnulls)
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 false, /* hasnulls */
-													 &off,
-													 &slow);
-		else
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 true,	/* hasnulls */
-													 &off,
-													 &slow);
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		/* align the offset for this attribute */
+		off = att_pointer_alignby(off,
+								  cattr->attalignby,
+								  cattr->attlen,
+								  tp + off);
+
+		values[attnum] = fetchatt(cattr, tp + off);
+		isnull[attnum] = false;
+
+		/* move the offset beyond this attribute */
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
 	}
 
-	/* If there's still work to do then we must be in slow mode */
-	if (attnum < natts)
+	/*
+	 * Now handle any remaining tuples, this time include NULL checks as we're
+	 * now at the first NULL attribute.
+	 */
+	for (; attnum < natts; attnum++)
 	{
-		/* XXX is it worth adding a separate call when hasnulls is false? */
-		attnum = slot_deform_heap_tuple_internal(slot,
-												 tuple,
-												 attnum,
-												 natts,
-												 true,	/* slow */
-												 hasnulls,
-												 &off,
-												 &slow);
+		if (att_isnull(attnum, bp))
+		{
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			continue;
+		}
+
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		/* align the offset for this attribute */
+		off = att_pointer_alignby(off,
+								  cattr->attalignby,
+								  cattr->attlen,
+								  tp + off);
+
+		values[attnum] = fetchatt(cattr, tp + off);
+		isnull[attnum] = false;
+
+		/* move the offset beyond this attribute */
+		off = att_addlength_pointer(off, cattr->attlen, tp + off);
 	}
 
 	/*
@@ -1201,10 +1176,6 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	 */
 	slot->tts_nvalid = attnum;
 	*offp = off;
-	if (slow)
-		slot->tts_flags |= TTS_FLAG_SLOW;
-	else
-		slot->tts_flags &= ~TTS_FLAG_SLOW;
 }
 
 const TupleTableSlotOps TTSOpsVirtual = {
@@ -2173,6 +2144,8 @@ ExecTypeFromTLInternal(List *targetList, bool skipjunk)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
@@ -2207,6 +2180,8 @@ ExecTypeFromExprList(List *exprList)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c
index af75dd8fc5e..ea19684de2e 100644
--- a/src/backend/executor/nodeFunctionscan.c
+++ b/src/backend/executor/nodeFunctionscan.c
@@ -414,6 +414,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 				TupleDescInitEntryCollation(tupdesc,
 											(AttrNumber) 1,
 											exprCollation(funcexpr));
+				TupleDescFinalize(tupdesc);
 			}
 			else
 			{
@@ -485,6 +486,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 							   0);
 		}
 
+		TupleDescFinalize(scan_tupdesc);
 		Assert(attno == natts);
 	}
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index dd64f45478a..23cbb92d859 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1891,6 +1891,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 			TupleDescInitEntryCollation(tupdesc,
 										(AttrNumber) 1,
 										exprCollation(funcexpr));
+			TupleDescFinalize(tupdesc);
 		}
 		else if (functypclass == TYPEFUNC_RECORD)
 		{
@@ -1948,6 +1949,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 
 				i++;
 			}
+			TupleDescFinalize(tupdesc);
 
 			/*
 			 * Ensure that the coldeflist defines a legal set of names (no
@@ -2016,7 +2018,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 							   0);
 			/* no need to set collation */
 		}
-
+		TupleDescFinalize(tupdesc);
 		Assert(natts == totalatts);
 	}
 	else
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index 905c975d83b..f0387166279 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -1570,6 +1570,8 @@ expandRecordVariable(ParseState *pstate, Var *var, int levelsup)
 		}
 		Assert(lname == NULL && lvar == NULL);	/* lists same length? */
 
+		TupleDescFinalize(tupleDesc);
+
 		return tupleDesc;
 	}
 
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 5ddc9e812e7..75a33ea6ada 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -1049,6 +1049,7 @@ libpqrcv_processTuples(PGresult *pgres, WalRcvExecResult *walres,
 		TupleDescInitEntry(walres->tupledesc, (AttrNumber) coln + 1,
 						   PQfname(pgres, coln), retTypes[coln], -1, 0);
 	attinmeta = TupleDescGetAttInMetadata(walres->tupledesc);
+	TupleDescFinalize(walres->tupledesc);
 
 	/* No point in doing more here if there were no tuples returned. */
 	if (PQntuples(pgres) == 0)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 96cede8f45a..364ae7a3ee1 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -452,6 +452,7 @@ IdentifySystem(void)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "dbname",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -497,6 +498,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
 	/* TimeLineID is unsigned, so int4 is not wide enough. */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "restart_tli",
 							  INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	memset(nulls, true, READ_REPLICATION_SLOT_COLS * sizeof(bool));
 
@@ -599,6 +601,7 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 	tupdesc = CreateTemplateTupleDesc(2);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "filename", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "content", TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	TLHistoryFileName(histfname, cmd->timeline);
 	TLHistoryFilePath(path, cmd->timeline);
@@ -1016,6 +1019,7 @@ StartReplication(StartReplicationCmd *cmd)
 								  INT8OID, -1, 0);
 		TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "next_tli_startpos",
 								  TEXTOID, -1, 0);
+		TupleDescFinalize(tupdesc);
 
 		/* prepare for projection of tuple */
 		tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -1370,6 +1374,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "output_plugin",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index 05d48412f82..3d3ca2185e6 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -1818,6 +1818,7 @@ aclexplode(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_grantable",
 						   BOOLOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/* allocate memory for user context */
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 80bb807fbe9..26348513b18 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -454,6 +454,7 @@ pg_stat_file(PG_FUNCTION_ARGS)
 					   "creation", TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6,
 					   "isdir", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	memset(isnull, false, sizeof(isnull));
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index bf38d68aa03..5c0c6dda7c5 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -146,6 +146,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart",
 						   TIMESTAMPTZOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index ac3963fc3e0..2ae1e46fbef 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -233,6 +233,7 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 								   -1,
 								   0);
 
+				TupleDescFinalize(newdesc);
 				FreeTupleDesc(qstate->tupdesc);
 				qstate->tupdesc = newdesc;
 			}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a97aa7c73db..b5aebc0f3e6 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -769,6 +769,7 @@ pg_stat_get_backend_subxact(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "subxact_overflow",
 					   BOOLOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if ((local_beentry = pgstat_get_local_beentry_by_proc_number(procNumber)) != NULL)
@@ -1658,6 +1659,7 @@ pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters,
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Fill values and NULLs */
@@ -2085,6 +2087,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Get statistics about the archiver process */
@@ -2166,6 +2169,7 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 					   TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	namestrcpy(&slotname, text_to_cstring(slotname_text));
@@ -2253,6 +2257,7 @@ pg_stat_get_subscription_stats(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if (!subentry)
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index b809089ac5d..78592499b0c 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -651,6 +651,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 			elog(ERROR, "return type must be a row type");
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = tupdesc;
 
 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 2d0cb7bcfd4..642c4b96297 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -666,14 +666,6 @@ RelationBuildTupleDesc(Relation relation)
 		elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
 			 need, RelationGetRelid(relation));
 
-	/*
-	 * We can easily set the attcacheoff value for the first attribute: it
-	 * must be zero.  This eliminates the need for special cases for attnum=1
-	 * that used to exist in fastgetattr() and index_getattr().
-	 */
-	if (RelationGetNumberOfAttributes(relation) > 0)
-		TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
-
 	/*
 	 * Set up constraint/default info
 	 */
@@ -729,6 +721,8 @@ RelationBuildTupleDesc(Relation relation)
 		pfree(constr);
 		relation->rd_att->constr = NULL;
 	}
+
+	TupleDescFinalize(relation->rd_att);
 }
 
 /*
@@ -1988,8 +1982,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 		populate_compact_attribute(relation->rd_att, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
+	TupleDescFinalize(relation->rd_att);
 
 	/* mark not-null status */
 	if (has_not_null)
@@ -3693,6 +3686,8 @@ RelationBuildLocalRelation(const char *relname,
 	for (i = 0; i < natts; i++)
 		TupleDescAttr(rel->rd_att, i)->attrelid = relid;
 
+	TupleDescFinalize(rel->rd_att);
+
 	rel->rd_rel->reltablespace = reltablespace;
 
 	if (mapped_relation)
@@ -4446,8 +4441,7 @@ BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
 		populate_compact_attribute(result, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(result, 0)->attcacheoff = 0;
+	TupleDescFinalize(result);
 
 	/* Note: we don't bother to set up a TupleConstr entry */
 
@@ -6273,6 +6267,8 @@ load_relcache_init_file(bool shared)
 			populate_compact_attribute(rel->rd_att, i);
 		}
 
+		TupleDescFinalize(rel->rd_att);
+
 		/* next read the access method specific field */
 		if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
 			goto read_failed;
diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c
index f40879f0617..a98bc9f9e4f 100644
--- a/src/backend/utils/fmgr/funcapi.c
+++ b/src/backend/utils/fmgr/funcapi.c
@@ -340,6 +340,8 @@ get_expr_result_type(Node *expr,
 										exprCollation(col));
 			i++;
 		}
+		TupleDescFinalize(tupdesc);
+
 		if (resultTypeId)
 			*resultTypeId = rexpr->row_typeid;
 		if (resultTupleDesc)
@@ -1044,6 +1046,7 @@ resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args,
 		}
 	}
 
+	TupleDescFinalize(tupdesc);
 	return true;
 }
 
@@ -1853,6 +1856,8 @@ build_function_result_tupdesc_d(char prokind,
 						   0);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -1970,6 +1975,7 @@ TypeGetTupleDesc(Oid typeoid, List *colaliases)
 						   typeoid,
 						   -1,
 						   0);
+		TupleDescFinalize(tupdesc);
 	}
 	else if (functypclass == TYPEFUNC_RECORD)
 	{
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index b7e94ca45bd..afbcb8193a5 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -718,6 +718,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	char		dbname[NAMEDATALEN];
 	int			nfree = 0;
 
+	/* pg_usleep(10000000); */
 	elog(DEBUG3, "InitPostgres");
 
 	/*
diff --git a/src/backend/utils/misc/guc_funcs.c b/src/backend/utils/misc/guc_funcs.c
index 9dbc5d3aeb9..554f20f61d1 100644
--- a/src/backend/utils/misc/guc_funcs.c
+++ b/src/backend/utils/misc/guc_funcs.c
@@ -444,6 +444,7 @@ GetPGVariableResultDesc(const char *name)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, varname,
 						   TEXTOID, -1, 0);
 	}
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
@@ -465,6 +466,7 @@ ShowGUCConfigOption(const char *name, DestReceiver *dest)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, varname,
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -499,6 +501,7 @@ ShowAllGUCConfig(DestReceiver *dest)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "description",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -939,6 +942,8 @@ show_all_settings(PG_FUNCTION_ARGS)
 		 * C strings
 		 */
 		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+		TupleDescFinalize(tupdesc);
+
 		funcctx->attinmeta = attinmeta;
 
 		/* collect the variables, in sorted order */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index f3593acc8c2..0901950b206 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -865,20 +865,17 @@ extern MinimalTuple minimal_expand_tuple(HeapTuple sourceTuple, TupleDesc tupleD
 static inline Datum
 fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
 {
-	Assert(attnum > 0);
+	CompactAttribute *att = TupleDescCompactAttr(tupleDesc, attnum - 1);
 
+	Assert(attnum > 0);
 	*isnull = false;
-	if (HeapTupleNoNulls(tup))
-	{
-		CompactAttribute *att;
 
-		att = TupleDescCompactAttr(tupleDesc, attnum - 1);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, (char *) tup->t_data + tup->t_data->t_hoff +
-							att->attcacheoff);
-		else
-			return nocachegetattr(tup, attnum, tupleDesc);
-	}
+	if (att->attcacheoff >= 0 && !HeapTupleHasNulls(tup))
+		return fetchatt(att, (char *) tup->t_data + tup->t_data->t_hoff +
+						att->attcacheoff);
+
+	if (HeapTupleNoNulls(tup))
+		return nocachegetattr(tup, attnum, tupleDesc);
 	else
 	{
 		if (att_isnull(attnum - 1, tup->t_data->t_bits))
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index 4ba928c7132..d52e8cd2a83 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -131,24 +131,20 @@ IndexInfoFindDataOffset(unsigned short t_info)
 static inline Datum
 index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
 {
+	CompactAttribute *attr = TupleDescCompactAttr(tupleDesc, attnum - 1);
+
 	Assert(isnull);
 	Assert(attnum > 0);
 
 	*isnull = false;
 
-	if (!IndexTupleHasNulls(tup))
-	{
-		CompactAttribute *attr = TupleDescCompactAttr(tupleDesc, attnum - 1);
+	if (attr->attcacheoff >= 0 && !IndexTupleHasNulls(tup))
+		return fetchatt(attr,
+						(char *) tup + IndexInfoFindDataOffset(tup->t_info) +
+						attr->attcacheoff);
 
-		if (attr->attcacheoff >= 0)
-		{
-			return fetchatt(attr,
-							(char *) tup + IndexInfoFindDataOffset(tup->t_info) +
-							attr->attcacheoff);
-		}
-		else
-			return nocache_index_getattr(tup, attnum, tupleDesc);
-	}
+	if (!IndexTupleHasNulls(tup))
+		return nocache_index_getattr(tup, attnum, tupleDesc);
 	else
 	{
 		if (att_isnull(attnum - 1, (bits8 *) tup + sizeof(IndexTupleData)))
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index a25b94ba423..dca20301b7f 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -131,6 +131,12 @@ typedef struct CompactAttribute
  * Any code making changes manually to and fields in the FormData_pg_attribute
  * array must subsequently call populate_compact_attribute() to flush the
  * changes out to the corresponding 'compact_attrs' element.
+ *
+ * firstNonCachedOffAttr stores the index into the compact_attrs array for the
+ * first attribute that we don't have a known attcacheoff for.
+ *
+ * Once a TupleDesc has been populated, before it is used for any purpose
+ * TupleDescFinalize() must be called on it.
  */
 typedef struct TupleDescData
 {
@@ -138,6 +144,10 @@ typedef struct TupleDescData
 	Oid			tdtypeid;		/* composite type ID for tuple type */
 	int32		tdtypmod;		/* typmod for tuple type */
 	int			tdrefcount;		/* reference count, or -1 if not counting */
+	int			firstNonCachedOffAttr;	/* index of last att with an
+										 * attcacheoff */
+	int			firstByRefAttr; /* index of the first attr with !attbyval, or
+								 * natts if none. */
 	TupleConstr *constr;		/* constraints, or NULL if none */
 	/* compact_attrs[N] is the compact metadata of Attribute Number N+1 */
 	CompactAttribute compact_attrs[FLEXIBLE_ARRAY_MEMBER];
@@ -205,6 +215,8 @@ extern void TupleDescCopy(TupleDesc dst, TupleDesc src);
 extern void TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 							   TupleDesc src, AttrNumber srcAttno);
 
+extern void TupleDescFinalize(TupleDesc tupdesc);
+
 extern void FreeTupleDesc(TupleDesc tupdesc);
 
 extern void IncrTupleDescRefCount(TupleDesc tupdesc);
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index 84b3e7fd896..d6ab90bbde1 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -15,6 +15,7 @@
 #define TUPMACS_H
 
 #include "catalog/pg_type_d.h"	/* for TYPALIGN macros */
+#include "port/pg_bitutils.h"
 
 
 /*
@@ -69,6 +70,62 @@ fetch_att(const void *T, bool attbyval, int attlen)
 	else
 		return PointerGetDatum(T);
 }
+
+/*
+ * first_null_attr
+ *		Inspect a NULL bitmask from a tuple and return the 0-based attnum of the
+ *		first NULL attribute.  Returns natts if no NULLs were found.
+ */
+static inline int
+first_null_attr(const bits8 *bits, int natts)
+{
+	int			lastByte = natts >> 3;
+	uint8		mask;
+	int			res = natts;
+	uint8		byte;
+
+#ifdef USE_ASSERT_CHECKING
+	int			firstnull_check = natts;
+
+	/* Do it the slow way and check we get the same answer. */
+	for (int i = 0; i < natts; i++)
+	{
+		if (att_isnull(i, bits))
+		{
+			firstnull_check = i;
+			break;
+		}
+	}
+#endif
+
+	/* Process all bytes up to just before the byte for the natts index */
+	for (int bytenum = 0; bytenum < lastByte; bytenum++)
+	{
+		if (bits[bytenum] != 0xFF)
+		{
+			byte = ~bits[bytenum];
+			res = bytenum * 8;
+			res += pg_rightmost_one_pos[byte];
+
+			Assert(res == firstnull_check);
+			return res;
+		}
+	}
+
+	/* Create a mask with all bits beyond natts's bit set to off */
+	mask = 0xFF & ((((uint8) 1) << (natts & 7)) - 1);
+	byte = (~bits[lastByte]) & mask;
+
+	if (byte != 0)
+	{
+		res = lastByte * 8;
+		res += pg_rightmost_one_pos[byte];
+	}
+
+	Assert(res == firstnull_check);
+
+	return res;
+}
 #endif							/* FRONTEND */
 
 /*
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 43f1d999b91..ff3ebbc76b9 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -84,9 +84,6 @@
  * tts_values/tts_isnull are allocated either when the slot is created (when
  * the descriptor is provided), or when a descriptor is assigned to the slot;
  * they are of length equal to the descriptor's natts.
- *
- * The TTS_FLAG_SLOW flag is saved state for
- * slot_deform_heap_tuple, and should not be touched by any other code.
  *----------
  */
 
@@ -98,12 +95,8 @@
 #define			TTS_FLAG_SHOULDFREE		(1 << 2)
 #define TTS_SHOULDFREE(slot) (((slot)->tts_flags & TTS_FLAG_SHOULDFREE) != 0)
 
-/* saved state for slot_deform_heap_tuple */
-#define			TTS_FLAG_SLOW		(1 << 3)
-#define TTS_SLOW(slot) (((slot)->tts_flags & TTS_FLAG_SLOW) != 0)
-
 /* fixed tuple descriptor */
-#define			TTS_FLAG_FIXED		(1 << 4)
+#define			TTS_FLAG_FIXED		(1 << 4)	/* XXX change to #3? */
 #define TTS_FIXED(slot) (((slot)->tts_flags & TTS_FLAG_FIXED) != 0)
 
 struct TupleTableSlotOps;
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index 4d90a0c2f06..ace4f5f03c0 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -1912,6 +1912,8 @@ build_row_from_vars(PLpgSQL_variable **vars, int numvars)
 		TupleDescInitEntryCollation(row->rowtupdesc, i + 1, typcoll);
 	}
 
+	TupleDescFinalize(row->rowtupdesc);
+
 	return row;
 }
 
diff --git a/src/test/modules/test_predtest/test_predtest.c b/src/test/modules/test_predtest/test_predtest.c
index be5b8c40914..9911fbe642b 100644
--- a/src/test/modules/test_predtest/test_predtest.c
+++ b/src/test/modules/test_predtest/test_predtest.c
@@ -230,6 +230,7 @@ test_predtest(PG_FUNCTION_ARGS)
 					   "s_r_holds", BOOLOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 8,
 					   "w_r_holds", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	tupdesc = BlessTupleDesc(tupdesc);
 
 	values[0] = BoolGetDatum(strong_implied_by);
-- 
2.43.0