v2-0002-Refactor-code-in-basebackup.c.patch

text/x-patch

Filename: v2-0002-Refactor-code-in-basebackup.c.patch
Type: text/x-patch
Part: 3
Message: Re: block-level incremental backup

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v2-0002
Subject: Refactor code in basebackup.c
File+
src/backend/replication/basebackup.c 200 148
src/bin/pg_basebackup/pg_basebackup.c 9 120
src/fe_utils/simple_list.c 104 0
src/include/fe_utils/simple_list.h 21 0
From 0da6d78690f8f45a8cf4e1e0caed8e53a9a3d010 Mon Sep 17 00:00:00 2001
From: Jeevan Chalke <jeevan.chalke@enterprisedb.com>
Date: Mon, 9 Sep 2019 10:56:43 +0530
Subject: [PATCH v2 2/4] Refactor code in basebackup.c

 - Refactor full backup code to the separate function.
 - Refactor checksum verifying logic to the separate function.
 - Refactor tablespace mapping code.
---
 src/backend/replication/basebackup.c  | 348 +++++++++++++++++++---------------
 src/bin/pg_basebackup/pg_basebackup.c | 129 +------------
 src/fe_utils/simple_list.c            | 104 ++++++++++
 src/include/fe_utils/simple_list.h    |  21 ++
 4 files changed, 334 insertions(+), 268 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index e72bf8e..bf15262 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -75,6 +75,14 @@ static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
 static int	compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
 static bool is_checksummed_file(const char *fullpath, const char *filename);
+static void verify_page_checksum(const char *readfilename, FILE *fp,
+								 char *page, int backup_distance,
+								 BlockNumber blkno, int segmentno,
+								 int *checksum_failures);
+static pgoff_t sendCompleteFile(const char *readfilename,
+								const char *tarfilename, FILE *fp,
+								struct stat *statbuf, int segmentno,
+								bool verify_checksum, int *checksum_failures);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
@@ -1391,17 +1399,11 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		 bool missing_ok, Oid dboid)
 {
 	FILE	   *fp;
-	BlockNumber blkno = 0;
-	bool		block_retry = false;
 	char		buf[TAR_SEND_SIZE];
-	uint16		checksum;
 	int			checksum_failures = 0;
 	off_t		cnt;
-	int			i;
 	pgoff_t		len = 0;
-	char	   *page;
 	size_t		pad;
-	PageHeader	phdr;
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
@@ -1416,8 +1418,6 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 				 errmsg("could not open file \"%s\": %m", readfilename)));
 	}
 
-	_tarWriteHeader(tarfilename, NULL, statbuf, false);
-
 	if (!noverify_checksums && DataChecksumsEnabled())
 	{
 		char	   *filename;
@@ -1449,146 +1449,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		}
 	}
 
-	while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
-	{
-		/*
-		 * The checksums are verified at block level, so we iterate over the
-		 * buffer in chunks of BLCKSZ, after making sure that
-		 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
-		 * BLCKSZ bytes.
-		 */
-		Assert(TAR_SEND_SIZE % BLCKSZ == 0);
-
-		if (verify_checksum && (cnt % BLCKSZ != 0))
-		{
-			ereport(WARNING,
-					(errmsg("cannot verify checksum in file \"%s\", block "
-							"%d: read buffer size %d and page size %d "
-							"differ",
-							readfilename, blkno, (int) cnt, BLCKSZ)));
-			verify_checksum = false;
-		}
-
-		if (verify_checksum)
-		{
-			for (i = 0; i < cnt / BLCKSZ; i++)
-			{
-				page = buf + BLCKSZ * i;
-
-				/*
-				 * Only check pages which have not been modified since the
-				 * start of the base backup. Otherwise, they might have been
-				 * written only halfway and the checksum would not be valid.
-				 * However, replaying WAL would reinstate the correct page in
-				 * this case. We also skip completely new pages, since they
-				 * don't have a checksum yet.
-				 */
-				if (!PageIsNew(page) && PageGetLSN(page) < startptr)
-				{
-					checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
-					phdr = (PageHeader) page;
-					if (phdr->pd_checksum != checksum)
-					{
-						/*
-						 * Retry the block on the first failure.  It's
-						 * possible that we read the first 4K page of the
-						 * block just before postgres updated the entire block
-						 * so it ends up looking torn to us.  We only need to
-						 * retry once because the LSN should be updated to
-						 * something we can ignore on the next pass.  If the
-						 * error happens again then it is a true validation
-						 * failure.
-						 */
-						if (block_retry == false)
-						{
-							/* Reread the failed block */
-							if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
-							{
-								ereport(ERROR,
-										(errcode_for_file_access(),
-										 errmsg("could not fseek in file \"%s\": %m",
-												readfilename)));
-							}
-
-							if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
-							{
-								/*
-								 * If we hit end-of-file, a concurrent
-								 * truncation must have occurred, so break out
-								 * of this loop just as if the initial fread()
-								 * returned 0. We'll drop through to the same
-								 * code that handles that case. (We must fix
-								 * up cnt first, though.)
-								 */
-								if (feof(fp))
-								{
-									cnt = BLCKSZ * i;
-									break;
-								}
-
-								ereport(ERROR,
-										(errcode_for_file_access(),
-										 errmsg("could not reread block %d of file \"%s\": %m",
-												blkno, readfilename)));
-							}
-
-							if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
-							{
-								ereport(ERROR,
-										(errcode_for_file_access(),
-										 errmsg("could not fseek in file \"%s\": %m",
-												readfilename)));
-							}
-
-							/* Set flag so we know a retry was attempted */
-							block_retry = true;
-
-							/* Reset loop to validate the block again */
-							i--;
-							continue;
-						}
-
-						checksum_failures++;
-
-						if (checksum_failures <= 5)
-							ereport(WARNING,
-									(errmsg("checksum verification failed in "
-											"file \"%s\", block %d: calculated "
-											"%X but expected %X",
-											readfilename, blkno, checksum,
-											phdr->pd_checksum)));
-						if (checksum_failures == 5)
-							ereport(WARNING,
-									(errmsg("further checksum verification "
-											"failures in file \"%s\" will not "
-											"be reported", readfilename)));
-					}
-				}
-				block_retry = false;
-				blkno++;
-			}
-		}
-
-		/* Send the chunk as a CopyData message */
-		if (pq_putmessage('d', buf, cnt))
-			ereport(ERROR,
-					(errmsg("base backup could not send data, aborting backup")));
-
-		len += cnt;
-		throttle(cnt);
-
-		if (feof(fp) || len >= statbuf->st_size)
-		{
-			/*
-			 * Reached end of file. The file could be longer, if it was
-			 * extended while we were sending it, but for a base backup we can
-			 * ignore such extended data. It will be restored from WAL.
-			 */
-			break;
-		}
-	}
-
-	CHECK_FREAD_ERROR(fp, readfilename);
+	/* Send complete file to the client. */
+	len = sendCompleteFile(readfilename, tarfilename, fp, statbuf, segmentno,
+						   verify_checksum, &checksum_failures);
 
 	/* If the file was truncated while we were sending it, pad it with zeros */
 	if (len < statbuf->st_size)
@@ -1761,3 +1624,192 @@ throttle(size_t increment)
 	 */
 	throttled_last = GetCurrentTimestamp();
 }
+
+/*
+ * verify_page_checksum
+ *
+ * Verifies checksum for one page.
+ */
+static void
+verify_page_checksum(const char *readfilename, FILE *fp, char *page,
+					 int backup_distance, BlockNumber blkno, int segmentno,
+					 int *checksum_failures)
+{
+	uint16		checksum;
+	bool		block_retried = false;
+	PageHeader	phdr;
+
+	while (1)
+	{
+		/*
+		 * Only check pages which have not been modified since the start of the
+		 * base backup.  Otherwise, they might have been written only halfway
+		 * and the checksum would not be valid.  However, replaying WAL would
+		 * reinstate the correct page in this case.  We also skip completely
+		 * new pages, since they don't have a checksum yet.
+		 */
+		if (PageIsNew(page) || PageGetLSN(page) >= startptr)
+			return;
+
+		checksum = pg_checksum_page(page, blkno + segmentno * RELSEG_SIZE);
+		phdr = (PageHeader) page;
+		if (phdr->pd_checksum == checksum)
+			return;
+
+		/*
+		 * Retry the block on the first failure.  It's possible that we
+		 * read the first 4K page of the block just before postgres updated
+		 * the entire block so it ends up looking torn to us.  We only need
+		 * to retry once because the LSN should be updated to something we
+		 * can ignore on the next pass.  If the error happens again then it
+		 * is a true validation failure.
+		 */
+		if (block_retried == false)
+		{
+			/* Reread the failed block */
+			if (fseek(fp, -backup_distance, SEEK_CUR) == -1)
+			{
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not fseek in file \"%s\": %m",
+								readfilename)));
+			}
+
+			if (fread(page, 1, BLCKSZ, fp) != BLCKSZ)
+			{
+				/*
+				 * If we hit end-of-file, return from here.  Caller will take
+				 * care of the rest.
+				 */
+				if (feof(fp))
+					return;
+
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not reread block %d of file \"%s\": %m",
+								blkno, readfilename)));
+			}
+
+			if (fseek(fp, backup_distance - BLCKSZ, SEEK_CUR) == -1)
+			{
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not fseek in file \"%s\": %m",
+								readfilename)));
+			}
+
+			/* Set flag so we know a retry was attempted */
+			block_retried = true;
+
+			/* Re-validate the block again */
+			continue;
+		}
+
+		(*checksum_failures)++;
+
+		if (*checksum_failures <= 5)
+			ereport(WARNING,
+					(errmsg("checksum verification failed in file \"%s\", block %d: calculated %X but expected %X",
+							readfilename, blkno, checksum,
+							phdr->pd_checksum)));
+		if (*checksum_failures == 5)
+			ereport(WARNING,
+					(errmsg("further checksum verification failures in file \"%s\" will not be reported",
+							readfilename)));
+
+		/*
+		 * Note that we want this loop to run only once, so we return from
+		 * here.  If we were needed to reread the block, then it was already
+		 * done above.
+		 */
+		return;
+	}
+}
+
+/*
+ * sendCompleteFile
+ *
+ * Sends complete file to the client.
+ */
+static pgoff_t
+sendCompleteFile(const char *readfilename, const char *tarfilename, FILE *fp,
+				 struct stat *statbuf, int segmentno, bool verify_checksum,
+				 int *checksum_failures)
+{
+	char		buf[TAR_SEND_SIZE];
+	off_t		cnt;
+	pgoff_t		len = 0;
+	BlockNumber blkno = 0;
+	int			i;
+
+	_tarWriteHeader(tarfilename, NULL, statbuf, false);
+
+	while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
+	{
+		/*
+		 * The checksums are verified at block level, so we iterate over the
+		 * buffer in chunks of BLCKSZ, after making sure that
+		 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
+		 * BLCKSZ bytes.
+		 */
+		Assert(TAR_SEND_SIZE % BLCKSZ == 0);
+
+		if (verify_checksum && (cnt % BLCKSZ != 0))
+		{
+			ereport(WARNING,
+					(errmsg("cannot verify checksum in file \"%s\", block "
+							"%d: read buffer size %d and page size %d "
+							"differ",
+							readfilename, blkno, (int) cnt, BLCKSZ)));
+			verify_checksum = false;
+		}
+
+		if (verify_checksum)
+		{
+			for (i = 0; i < cnt / BLCKSZ; i++)
+			{
+				int			page_index_in_buf = (BLCKSZ * i);
+
+				verify_page_checksum(readfilename, fp, buf + page_index_in_buf,
+									 (cnt - page_index_in_buf), blkno,
+									 segmentno, checksum_failures);
+
+				/*
+				 * If we hit end-of-file, a concurrent truncation must have
+				 * occurred, so break out of this loop just as if the initial
+				 * fread() returned 0. We'll drop through to the same code that
+				 * handles that case. (We must fix up cnt first, though.)
+				 */
+				if (feof(fp))
+				{
+					cnt = page_index_in_buf;
+					break;
+				}
+
+				blkno++;
+			}
+		}
+
+		/* Send the chunk as a CopyData message */
+		if (pq_putmessage('d', buf, cnt))
+			ereport(ERROR,
+					(errmsg("base backup could not send data, aborting backup")));
+
+		len += cnt;
+		throttle(cnt);
+
+		if (feof(fp) || len >= statbuf->st_size)
+		{
+			/*
+			 * Reached end of file. The file could be longer, if it was
+			 * extended while we were sending it, but for a base backup we can
+			 * ignore such extended data. It will be restored from WAL.
+			 */
+			break;
+		}
+	}
+
+	CHECK_FREAD_ERROR(fp, readfilename);
+
+	return len;
+}
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1791853..8df453b 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -31,6 +31,7 @@
 #include "common/file_utils.h"
 #include "common/logging.h"
 #include "common/string.h"
+#include "fe_utils/simple_list.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "libpq-fe.h"
@@ -43,19 +44,6 @@
 
 #define ERRCODE_DATA_CORRUPTED	"XX001"
 
-typedef struct TablespaceListCell
-{
-	struct TablespaceListCell *next;
-	char		old_dir[MAXPGPATH];
-	char		new_dir[MAXPGPATH];
-} TablespaceListCell;
-
-typedef struct TablespaceList
-{
-	TablespaceListCell *head;
-	TablespaceListCell *tail;
-} TablespaceList;
-
 /*
  * pg_xlog has been renamed to pg_wal in version 10.  This version number
  * should be compared with PQserverVersion().
@@ -155,9 +143,6 @@ static void BaseBackup(void);
 static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
 								 bool segment_finished);
 
-static const char *get_tablespace_mapping(const char *dir);
-static void tablespace_list_append(const char *arg);
-
 
 static void
 cleanup_directories_atexit(void)
@@ -227,84 +212,6 @@ kill_bgchild_atexit(void)
 }
 #endif
 
-/*
- * Split argument into old_dir and new_dir and append to tablespace mapping
- * list.
- */
-static void
-tablespace_list_append(const char *arg)
-{
-	TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell));
-	char	   *dst;
-	char	   *dst_ptr;
-	const char *arg_ptr;
-
-	dst_ptr = dst = cell->old_dir;
-	for (arg_ptr = arg; *arg_ptr; arg_ptr++)
-	{
-		if (dst_ptr - dst >= MAXPGPATH)
-		{
-			pg_log_error("directory name too long");
-			exit(1);
-		}
-
-		if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=')
-			;					/* skip backslash escaping = */
-		else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\'))
-		{
-			if (*cell->new_dir)
-			{
-				pg_log_error("multiple \"=\" signs in tablespace mapping");
-				exit(1);
-			}
-			else
-				dst = dst_ptr = cell->new_dir;
-		}
-		else
-			*dst_ptr++ = *arg_ptr;
-	}
-
-	if (!*cell->old_dir || !*cell->new_dir)
-	{
-		pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg);
-		exit(1);
-	}
-
-	/*
-	 * This check isn't absolutely necessary.  But all tablespaces are created
-	 * with absolute directories, so specifying a non-absolute path here would
-	 * just never match, possibly confusing users.  It's also good to be
-	 * consistent with the new_dir check.
-	 */
-	if (!is_absolute_path(cell->old_dir))
-	{
-		pg_log_error("old directory is not an absolute path in tablespace mapping: %s",
-					 cell->old_dir);
-		exit(1);
-	}
-
-	if (!is_absolute_path(cell->new_dir))
-	{
-		pg_log_error("new directory is not an absolute path in tablespace mapping: %s",
-					 cell->new_dir);
-		exit(1);
-	}
-
-	/*
-	 * Comparisons done with these values should involve similarly
-	 * canonicalized path values.  This is particularly sensitive on Windows
-	 * where path values may not necessarily use Unix slashes.
-	 */
-	canonicalize_path(cell->old_dir);
-	canonicalize_path(cell->new_dir);
-
-	if (tablespace_dirs.tail)
-		tablespace_dirs.tail->next = cell;
-	else
-		tablespace_dirs.head = cell;
-	tablespace_dirs.tail = cell;
-}
-
 
 #ifdef HAVE_LIBZ
 static const char *
@@ -1359,28 +1266,6 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 
 
 /*
- * Retrieve tablespace path, either relocated or original depending on whether
- * -T was passed or not.
- */
-static const char *
-get_tablespace_mapping(const char *dir)
-{
-	TablespaceListCell *cell;
-	char		canon_dir[MAXPGPATH];
-
-	/* Canonicalize path for comparison consistency */
-	strlcpy(canon_dir, dir, sizeof(canon_dir));
-	canonicalize_path(canon_dir);
-
-	for (cell = tablespace_dirs.head; cell; cell = cell->next)
-		if (strcmp(canon_dir, cell->old_dir) == 0)
-			return cell->new_dir;
-
-	return dir;
-}
-
-
-/*
  * Receive a tar format stream from the connection to the server, and unpack
  * the contents of it into a directory. Only files, directories and
  * symlinks are supported, no other kinds of special files.
@@ -1406,7 +1291,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
 		strlcpy(current_path, basedir, sizeof(current_path));
 	else
 		strlcpy(current_path,
-				get_tablespace_mapping(PQgetvalue(res, rownum, 1)),
+				get_tablespace_mapping(&tablespace_dirs,
+									   PQgetvalue(res, rownum, 1)),
 				sizeof(current_path));
 
 	/*
@@ -1537,7 +1423,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
 					 */
 					filename[strlen(filename) - 1] = '\0';	/* Remove trailing slash */
 
-					mapped_tblspc_path = get_tablespace_mapping(&copybuf[157]);
+					mapped_tblspc_path = get_tablespace_mapping(&tablespace_dirs,
+																&copybuf[157]);
 					if (symlink(mapped_tblspc_path, filename) != 0)
 					{
 						pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m",
@@ -1966,7 +1853,9 @@ BaseBackup(void)
 		 */
 		if (format == 'p' && !PQgetisnull(res, i, 1))
 		{
-			char	   *path = unconstify(char *, get_tablespace_mapping(PQgetvalue(res, i, 1)));
+			char	   *path = unconstify(char *,
+										  get_tablespace_mapping(&tablespace_dirs,
+																 PQgetvalue(res, i, 1)));
 
 			verify_dir_is_empty_or_create(path, &made_tablespace_dirs, &found_tablespace_dirs);
 		}
@@ -2276,7 +2165,7 @@ main(int argc, char **argv)
 				no_slot = true;
 				break;
 			case 'T':
-				tablespace_list_append(optarg);
+				tablespace_list_append(&tablespace_dirs, optarg);
 				break;
 			case 'X':
 				if (strcmp(optarg, "n") == 0 ||
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index cfdb7dc..90ef114 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -16,6 +16,7 @@
  */
 #include "postgres_fe.h"
 
+#include "common/logging.h"
 #include "fe_utils/simple_list.h"
 
 
@@ -152,3 +153,106 @@ simple_string_list_not_touched(SimpleStringList *list)
 	}
 	return NULL;
 }
+
+/*
+ * Split argument into old_dir and new_dir and append to tablespace mapping
+ * list.
+ */
+void
+tablespace_list_append(TablespaceList *tablespace_dirs, const char *arg)
+{
+	TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell));
+	char	   *dst;
+	char	   *dst_ptr;
+	const char *arg_ptr;
+
+	Assert(tablespace_dirs);
+
+	dst_ptr = dst = cell->old_dir;
+	for (arg_ptr = arg; *arg_ptr; arg_ptr++)
+	{
+		if (dst_ptr - dst >= MAXPGPATH)
+		{
+			pg_log_error("directory name too long");
+			exit(1);
+		}
+
+		if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=')
+			;					/* skip backslash escaping = */
+		else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\'))
+		{
+			if (*cell->new_dir)
+			{
+				pg_log_error("multiple \"=\" signs in tablespace mapping");
+				exit(1);
+			}
+			else
+				dst = dst_ptr = cell->new_dir;
+		}
+		else
+			*dst_ptr++ = *arg_ptr;
+	}
+
+	if (!*cell->old_dir || !*cell->new_dir)
+	{
+		pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg);
+		exit(1);
+	}
+
+	/*
+	 * This check isn't absolutely necessary.  But all tablespaces are created
+	 * with absolute directories, so specifying a non-absolute path here would
+	 * just never match, possibly confusing users.  It's also good to be
+	 * consistent with the new_dir check.
+	 */
+	if (!is_absolute_path(cell->old_dir))
+	{
+		pg_log_error("old directory is not an absolute path in tablespace mapping: %s",
+					 cell->old_dir);
+		exit(1);
+	}
+
+	if (!is_absolute_path(cell->new_dir))
+	{
+		pg_log_error("new directory is not an absolute path in tablespace mapping: %s",
+					 cell->new_dir);
+		exit(1);
+	}
+
+	/*
+	 * Comparisons done with these values should involve similarly
+	 * canonicalized path values.  This is particularly sensitive on Windows
+	 * where path values may not necessarily use Unix slashes.
+	 */
+	canonicalize_path(cell->old_dir);
+	canonicalize_path(cell->new_dir);
+
+	if (tablespace_dirs->tail)
+		tablespace_dirs->tail->next = cell;
+	else
+		tablespace_dirs->head = cell;
+	tablespace_dirs->tail = cell;
+}
+
+/*
+ * Retrieve tablespace path, either relocated or original depending on whether
+ * -T was passed or not.
+ */
+const char *
+get_tablespace_mapping(TablespaceList *tablespace_dirs, const char *dir)
+{
+	TablespaceListCell *cell;
+	char		canon_dir[MAXPGPATH];
+
+	Assert(tablespace_dirs);
+
+	/* Canonicalize path for comparison consistency */
+	strlcpy(canon_dir, dir, sizeof(canon_dir));
+	canonicalize_path(canon_dir);
+
+	for (cell = tablespace_dirs->head; cell; cell = cell->next)
+		if (strcmp(canon_dir, cell->old_dir) == 0)
+			return cell->new_dir;
+
+	return dir;
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 75738be..436358b 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -6,6 +6,9 @@
  * these is very primitive compared to the backend's List facilities, but
  * it's all we need in, eg, pg_dump.
  *
+ * Also, has data structures for simple lists of tablespace mappings used in
+ * backups.
+ *
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -43,6 +46,19 @@ typedef struct SimpleStringList
 	SimpleStringListCell *tail;
 } SimpleStringList;
 
+typedef struct TablespaceListCell
+{
+	struct TablespaceListCell *next;
+	char		old_dir[MAXPGPATH];
+	char		new_dir[MAXPGPATH];
+} TablespaceListCell;
+
+typedef struct TablespaceList
+{
+	TablespaceListCell *head;
+	TablespaceListCell *tail;
+} TablespaceList;
+
 
 extern void simple_oid_list_append(SimpleOidList *list, Oid val);
 extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
@@ -54,4 +70,9 @@ extern void simple_string_list_destroy(SimpleStringList *list);
 
 extern const char *simple_string_list_not_touched(SimpleStringList *list);
 
+extern void tablespace_list_append(TablespaceList *tablespace_dirs,
+								   const char *arg);
+extern const char *get_tablespace_mapping(TablespaceList *tablespace_dirs,
+										  const char *dir);
+
 #endif							/* SIMPLE_LIST_H */
-- 
1.8.3.1