v10-0002-Generate-backup-manifests-for-base-backups.patch

application/octet-stream

Filename: v10-0002-Generate-backup-manifests-for-base-backups.patch
Type: application/octet-stream
Part: 3
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v10-0002
Subject: Generate backup manifests for base backups.
File+
src/backend/access/transam/xlog.c 2 1
src/backend/replication/basebackup.c 341 22
src/backend/replication/repl_gram.y 6 0
src/backend/replication/repl_scanner.l 1 0
src/backend/replication/walsender.c 30 0
src/bin/pg_basebackup/pg_basebackup.c 126 4
src/include/replication/basebackup.h 6 1
src/include/replication/walsender.h 1 0
From 7e63c0ed8ffeb669759fa760012a511c2a5e647f Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 9 Mar 2020 17:59:38 -0400
Subject: [PATCH v10 2/4] Generate backup manifests for base backups.

A manifest is a JSON document which includes the file name, size, last
modification time, and a checksum for each file backed up, as well as
a checksum for the manifest itself. By default, we use CRC-32C for the
checksum algorithm, because we are trying to detect corruption and
user error, not foil an adversary. However, pg_basebackup and the
server-side BASE_BACKUP command now have options to select the
checksum algorithm, so users wanting a cryptographic hash function can
select SHA-224, SHA-256, SHA-384, or SHA-512; and users not wanting
any checksums at all can disable them. Using a cryptographic hash
function in place of CRC-32C consumes significantly more CPU cycles,
which may slow down backups in some cases.

Robert Haas with help from Rushabh Lathia and Suraj Kharage.
---
 src/backend/access/transam/xlog.c      |   3 +-
 src/backend/replication/basebackup.c   | 363 +++++++++++++++++++++++--
 src/backend/replication/repl_gram.y    |   6 +
 src/backend/replication/repl_scanner.l |   1 +
 src/backend/replication/walsender.c    |  30 ++
 src/bin/pg_basebackup/pg_basebackup.c  | 130 ++++++++-
 src/include/replication/basebackup.h   |   7 +-
 src/include/replication/walsender.h    |   1 +
 8 files changed, 513 insertions(+), 28 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b0e953f894..3f9908427f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10551,7 +10551,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 			ti->oid = pstrdup(de->d_name);
 			ti->path = pstrdup(buflinkpath.data);
 			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+			ti->size = infotbssize ?
+				sendTablespace(fullpath, ti->oid, true, NULL) : -1;
 
 			if (tablespaces)
 				*tablespaces = lappend(*tablespaces, ti);
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index f66cbc2428..77f15f6233 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -18,6 +18,7 @@
 
 #include "access/xlog_internal.h"	/* for pg_start/stop_backup */
 #include "catalog/pg_type.h"
+#include "common/checksum_helper.h"
 #include "common/file_perm.h"
 #include "commands/progress.h"
 #include "lib/stringinfo.h"
@@ -32,6 +33,7 @@
 #include "replication/basebackup.h"
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
+#include "storage/buffile.h"
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
@@ -39,8 +41,10 @@
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
+#include "utils/json.h"
 #include "utils/ps_status.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "utils/timestamp.h"
 
 typedef struct
@@ -52,20 +56,40 @@ typedef struct
 	bool		includewal;
 	uint32		maxrate;
 	bool		sendtblspcmapfile;
+	pg_checksum_type checksum_type;
 } basebackup_options;
 
+struct manifest_info
+{
+	BufFile    *buffile;
+	pg_checksum_type checksum_type;
+	pg_sha256_ctx manifest_ctx;
+	uint64		manifest_size;
+	bool		first_file;
+	bool		still_checksumming;
+};
+
 
 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
-					 List *tablespaces, bool sendtblspclinks);
+					 List *tablespaces, bool sendtblspclinks,
+					 manifest_info *manifest, const char *spcoid);
 static bool sendFile(const char *readfilename, const char *tarfilename,
-					 struct stat *statbuf, bool missing_ok, Oid dboid);
-static void sendFileWithContent(const char *filename, const char *content);
+					 struct stat *statbuf, bool missing_ok, Oid dboid,
+					 manifest_info *manifest, const char *spcoid);
+static void sendFileWithContent(const char *filename, const char *content,
+								manifest_info *manifest);
 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
 							 struct stat *statbuf, bool sizeonly);
 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
 						  bool sizeonly);
 static void send_int8_string(StringInfoData *buf, int64 intval);
 static void SendBackupHeader(List *tablespaces);
+static void InitializeManifest(manifest_info *manifest, pg_checksum_type);
+static void AppendStringToManifest(manifest_info *manifest, char *s);
+static void AddFileToManifest(manifest_info *manifest, const char *spcoid,
+							  const char *pathname, size_t size, time_t mtime,
+							  pg_checksum_context *checksum_ctx);
+static void SendBackupManifest(manifest_info *manifest);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
@@ -102,6 +126,16 @@ do { \
 				(errmsg("could not read from file \"%s\"", filename))); \
 } while (0)
 
+/*
+ * Convenience macro for appending data to the backup manifest.
+ */
+#define AppendToManifest(manifest, ...) \
+	{ \
+		char *_manifest_s = psprintf(__VA_ARGS__);	\
+		AppendStringToManifest(manifest, _manifest_s);	\
+		pfree(_manifest_s);	\
+	}
+
 /* The actual number of bytes, transfer of which may cause sleep. */
 static uint64 throttling_sample;
 
@@ -251,6 +285,7 @@ perform_base_backup(basebackup_options *opt)
 	TimeLineID	endtli;
 	StringInfo	labelfile;
 	StringInfo	tblspc_map_file = NULL;
+	manifest_info manifest;
 	int			datadirpathlen;
 	List	   *tablespaces = NIL;
 
@@ -258,12 +293,17 @@ perform_base_backup(basebackup_options *opt)
 	backup_streamed = 0;
 	pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid);
 
+	/* we're going to use a BufFile, so we need a ResourceOwner */
+	Assert(CurrentResourceOwner == NULL);
+	CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
+
 	datadirpathlen = strlen(DataDir);
 
 	backup_started_in_recovery = RecoveryInProgress();
 
 	labelfile = makeStringInfo();
 	tblspc_map_file = makeStringInfo();
+	InitializeManifest(&manifest, opt->checksum_type);
 
 	total_checksum_failures = 0;
 
@@ -301,7 +341,10 @@ perform_base_backup(basebackup_options *opt)
 
 		/* Add a node for the base directory at the end */
 		ti = palloc0(sizeof(tablespaceinfo));
-		ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
+		if (opt->progress)
+			ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL);
+		else
+			ti->size = -1;
 		tablespaces = lappend(tablespaces, ti);
 
 		/*
@@ -380,7 +423,8 @@ perform_base_backup(basebackup_options *opt)
 				struct stat statbuf;
 
 				/* In the main tar, include the backup_label first... */
-				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
+				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data,
+									&manifest);
 
 				/*
 				 * Send tablespace_map file if required and then the bulk of
@@ -388,11 +432,14 @@ perform_base_backup(basebackup_options *opt)
 				 */
 				if (tblspc_map_file && opt->sendtblspcmapfile)
 				{
-					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
-					sendDir(".", 1, false, tablespaces, false);
+					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data,
+										&manifest);
+					sendDir(".", 1, false, tablespaces, false,
+							&manifest, NULL);
 				}
 				else
-					sendDir(".", 1, false, tablespaces, true);
+					sendDir(".", 1, false, tablespaces, true,
+							&manifest, NULL);
 
 				/* ... and pg_control after everything else. */
 				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -400,10 +447,11 @@ perform_base_backup(basebackup_options *opt)
 							(errcode_for_file_access(),
 							 errmsg("could not stat file \"%s\": %m",
 									XLOG_CONTROL_FILE)));
-				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid);
+				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
+						 false, InvalidOid, &manifest, NULL);
 			}
 			else
-				sendTablespace(ti->path, false);
+				sendTablespace(ti->path, ti->oid, false, &manifest);
 
 			/*
 			 * If we're including WAL, and this is the main data directory we
@@ -632,7 +680,7 @@ perform_base_backup(basebackup_options *opt)
 			 * complete segment.
 			 */
 			StatusFilePath(pathbuf, walFileName, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", &manifest);
 		}
 
 		/*
@@ -655,16 +703,20 @@ perform_base_backup(basebackup_options *opt)
 						(errcode_for_file_access(),
 						 errmsg("could not stat file \"%s\": %m", pathbuf)));
 
-			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
+			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid,
+					 &manifest, NULL);
 
 			/* unconditionally mark file as archived */
 			StatusFilePath(pathbuf, fname, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", &manifest);
 		}
 
 		/* Send CopyDone message for the last tar file */
 		pq_putemptymessage('c');
 	}
+
+	SendBackupManifest(&manifest);
+
 	SendXlogRecPtrResult(endptr, endtli);
 
 	if (total_checksum_failures)
@@ -678,6 +730,9 @@ perform_base_backup(basebackup_options *opt)
 				 errmsg("checksum verification failure during base backup")));
 	}
 
+	/* clean up the resource owner we created */
+	WalSndResourceCleanup(true);
+
 	pgstat_progress_end_command();
 }
 
@@ -709,8 +764,11 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 	bool		o_maxrate = false;
 	bool		o_tablespace_map = false;
 	bool		o_noverify_checksums = false;
+	bool		o_manifest_checksums = false;
 
 	MemSet(opt, 0, sizeof(*opt));
+	opt->checksum_type = CHECKSUM_TYPE_CRC32C;
+
 	foreach(lopt, options)
 	{
 		DefElem    *defel = (DefElem *) lfirst(lopt);
@@ -797,6 +855,21 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 			noverify_checksums = true;
 			o_noverify_checksums = true;
 		}
+		else if (strcmp(defel->defname, "manifest_checksums") == 0)
+		{
+			char	   *optval = strVal(defel->arg);
+
+			if (o_manifest_checksums)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("duplicate option \"%s\"", defel->defname)));
+			if (!pg_checksum_parse_type(optval, &opt->checksum_type))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("unrecognized checksum algorithm: \"%s\"",
+								optval)));
+			o_manifest_checksums = true;
+		}
 		else
 			elog(ERROR, "option \"%s\" not recognized",
 				 defel->defname);
@@ -918,6 +991,228 @@ SendBackupHeader(List *tablespaces)
 	pq_puttextmessage('C', "SELECT");
 }
 
+/*
+ * Initialize state so that we can construct a backup manifest.
+ *
+ * NB: Although the checksum type for the data files is configurable, the
+ * checksum for the manifest itself always uses SHA-256. See comments in
+ * SendBackupManifest.
+ */
+static void
+InitializeManifest(manifest_info *manifest, pg_checksum_type checksum_type)
+{
+	manifest->buffile = BufFileCreateTemp(false);
+	manifest->checksum_type = checksum_type;
+	pg_sha256_init(&manifest->manifest_ctx);
+	manifest->manifest_size = UINT64CONST(0);
+	manifest->first_file = true;
+	manifest->still_checksumming = true;
+
+	AppendToManifest(manifest,
+					 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
+					 "\"Files\": [");
+}
+
+/*
+ * Append a cstring to the manifest.
+ */
+static void
+AppendStringToManifest(manifest_info *manifest, char *s)
+{
+	int			len = strlen(s);
+	size_t		written;
+
+	if (manifest->still_checksumming)
+		pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
+	written = BufFileWrite(manifest->buffile, s, len);
+	if (written != len)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to temporary file: %m")));
+	manifest->manifest_size += len;
+}
+
+/*
+ * Add an entry to the backup manifest for a file.
+ */
+static void
+AddFileToManifest(manifest_info *manifest, const char *spcoid,
+				  const char *pathname, size_t size, time_t mtime,
+				  pg_checksum_context *checksum_ctx)
+{
+	char		pathbuf[MAXPGPATH];
+	int			pathlen;
+	StringInfoData	buf;
+
+	/*
+	 * If this file is part of a tablespace, the pathname passed to this
+	 * function will be relative to the tar file that contains it. We want the
+	 * pathname relative to the data directory (ignoring the intermediate
+	 * symlink traversal).
+	 */
+	if (spcoid != NULL)
+	{
+		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
+				 pathname);
+		pathname = pathbuf;
+	}
+
+	/*
+	 * Each file's entry need to be separated from any entry that follows
+	 * by a comma, but there's no comma before the first one or after the
+	 * last one. To make that work, adding a file to the manifest starts
+	 * by terminating the most recently added line, with a comma if
+	 * appropriate, but does not terminate the line inserted for this file.
+	 */
+	initStringInfo(&buf);
+	if (manifest->first_file)
+	{
+		appendStringInfoString(&buf, "\n");
+		manifest->first_file = false;
+	}
+	else
+		appendStringInfoString(&buf, ",\n");
+
+	/*
+	 * Write the relative pathname to this file out to the manifest. The
+	 * manifest is always stored in UTF-8, so we have to encode paths that
+	 * are not valid in that encoding.
+	 */
+	pathlen = strlen(pathname);
+	if (pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
+	{
+		appendStringInfoString(&buf, "{ \"Path\": ");
+		escape_json(&buf, pathname);
+		appendStringInfoString(&buf, ", ");
+	}
+	else
+	{
+		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
+		enlargeStringInfo(&buf, 2 * pathlen);
+		buf.len += hex_encode((char *) pathname, pathlen,
+							  &buf.data[buf.len]);
+		appendStringInfoString(&buf, "\", ");
+	}
+
+	appendStringInfo(&buf, "\"Size\": %zu, ", size);
+
+	/*
+	 * Convert last modification time to a string and append it to the
+	 * manifest. Since it's not clear what time zone to use and since time
+	 * zone definitions can change, possibly causing confusion, use GMT always.
+	 */
+	appendStringInfoString(&buf, "\"Last-Modified\": \"");
+	enlargeStringInfo(&buf, 128);
+	buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
+						   pg_gmtime(&mtime));
+	appendStringInfoString(&buf, "\"");
+
+	/* Add checksum information. */
+	if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
+	{
+		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+		int			checksumlen;
+
+		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
+
+		appendStringInfo(&buf,
+						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
+						 pg_checksum_type_name(checksum_ctx->type));
+		enlargeStringInfo(&buf, 2 * checksumlen);
+		buf.len += hex_encode((char *) checksumbuf, checksumlen,
+							  &buf.data[buf.len]);
+		appendStringInfoString(&buf, "\"");
+	}
+
+	/* Close out the object. */
+	appendStringInfoString(&buf, " }");
+
+	/* OK, add it to the manifest. */
+	AppendStringToManifest(manifest, buf.data);
+
+	/* Avoid leaking memory. */
+	pfree(buf.data);
+}
+
+/*
+ * Finalize the backup manifest, and send it to the client.
+ */
+static void
+SendBackupManifest(manifest_info *manifest)
+{
+	StringInfoData protobuf;
+	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
+	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
+	size_t		manifest_bytes_done = 0;
+
+	/* Terminate the list of files. */
+	AppendStringToManifest(manifest, "],\n");
+
+	/*
+	 * Append manifest checksum, so that the problems with the manifest itself
+	 * can be detected.
+	 *
+	 * We always use SHA-256 for this, regardless of what algorithm is chosen
+	 * for checksumming the files.  If we ever want to make the checksum
+	 * algorithm used for the manifest file variable, the client will need a
+	 * way to figure out which algorithm to use as close to the beginning of
+	 * the manifest file as possible, to avoid having to read the whole thing
+	 * twice.
+	 */
+	manifest->still_checksumming = false;
+	pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
+	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
+	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
+	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
+	AppendStringToManifest(manifest, checksumstringbuf);
+	AppendStringToManifest(manifest, "\"}\n");
+
+	/*
+	 * We've written all the data to the manifest file.  Rewind the file so
+	 * that we can read it all back.
+	 */
+	if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not rewind temporary file: %m")));
+
+	/* Send CopyOutResponse message */
+	pq_beginmessage(&protobuf, 'H');
+	pq_sendbyte(&protobuf, 0);	/* overall format */
+	pq_sendint16(&protobuf, 0); /* natts */
+	pq_endmessage(&protobuf);
+
+	/*
+	 * Send CopyData messages.
+	 *
+	 * We choose to read back the data from the temporary file in chunks of
+	 * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
+	 * size, so it seems to make sense to match that value here.
+	 */
+	while (manifest_bytes_done < manifest->manifest_size)
+	{
+		char		manifestbuf[BLCKSZ];
+		size_t		bytes_to_read;
+		size_t		rc;
+
+		bytes_to_read = Min(sizeof(manifestbuf),
+							manifest->manifest_size - manifest_bytes_done);
+		rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
+		if (rc != bytes_to_read)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read from temporary file: %m")));
+		pq_putmessage('d', manifestbuf, bytes_to_read);
+		manifest_bytes_done += bytes_to_read;
+	}
+
+	/* No more data, so send CopyDone message */
+	pq_putemptymessage('c');
+
+	/* Release resources */
+	BufFileClose(manifest->buffile);
+}
+
 /*
  * Send a single resultset containing just a single
  * XLogRecPtr record (in text format)
@@ -978,11 +1273,15 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
  * Inject a file with given name and content in the output tar stream.
  */
 static void
-sendFileWithContent(const char *filename, const char *content)
+sendFileWithContent(const char *filename, const char *content,
+					manifest_info *manifest)
 {
 	struct stat statbuf;
 	int			pad,
 				len;
+	pg_checksum_context checksum_ctx;
+
+	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	len = strlen(content);
 
@@ -1017,6 +1316,10 @@ sendFileWithContent(const char *filename, const char *content)
 		pq_putmessage('d', buf, pad);
 		update_basebackup_progress(pad);
 	}
+
+	pg_checksum_update(&checksum_ctx, (uint8 *) content, len);
+	AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
+					  &checksum_ctx);
 }
 
 /*
@@ -1027,7 +1330,8 @@ sendFileWithContent(const char *filename, const char *content)
  * Only used to send auxiliary tablespaces, not PGDATA.
  */
 int64
-sendTablespace(char *path, bool sizeonly)
+sendTablespace(char *path, char *spcoid, bool sizeonly,
+			   manifest_info *manifest)
 {
 	int64		size;
 	char		pathbuf[MAXPGPATH];
@@ -1060,7 +1364,8 @@ sendTablespace(char *path, bool sizeonly)
 						   sizeonly);
 
 	/* Send all the files in the tablespace version directory */
-	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
+	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest,
+					spcoid);
 
 	return size;
 }
@@ -1079,7 +1384,7 @@ sendTablespace(char *path, bool sizeonly)
  */
 static int64
 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
-		bool sendtblspclinks)
+		bool sendtblspclinks, manifest_info *manifest, const char *spcoid)
 {
 	DIR		   *dir;
 	struct dirent *de;
@@ -1359,7 +1664,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 				skip_this_dir = true;
 
 			if (!skip_this_dir)
-				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
+				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces,
+								sendtblspclinks, manifest, spcoid);
 		}
 		else if (S_ISREG(statbuf.st_mode))
 		{
@@ -1367,7 +1673,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 
 			if (!sizeonly)
 				sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
-								true, isDbDir ? atooid(lastDir + 1) : InvalidOid);
+								true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
+								manifest, spcoid);
 
 			if (sent || sizeonly)
 			{
@@ -1437,8 +1744,9 @@ is_checksummed_file(const char *fullpath, const char *filename)
  * and the file did not exist.
  */
 static bool
-sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
-		 bool missing_ok, Oid dboid)
+sendFile(const char *readfilename, const char *tarfilename,
+		 struct stat *statbuf, bool missing_ok, Oid dboid,
+		 manifest_info *manifest, const char *spcoid)
 {
 	FILE	   *fp;
 	BlockNumber blkno = 0;
@@ -1455,6 +1763,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
+	pg_checksum_context checksum_ctx;
+
+	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1625,6 +1936,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 					(errmsg("base backup could not send data, aborting backup")));
 		update_basebackup_progress(cnt);
 
+		/* Also feed it to the checksum machinery. */
+		pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
+
 		len += cnt;
 		throttle(cnt);
 
@@ -1649,6 +1963,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		{
 			cnt = Min(sizeof(buf), statbuf->st_size - len);
 			pq_putmessage('d', buf, cnt);
+			pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
 			update_basebackup_progress(cnt);
 			len += cnt;
 			throttle(cnt);
@@ -1657,7 +1972,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	/*
 	 * Pad to 512 byte boundary, per tar format requirements. (This small
-	 * piece of data is probably not worth throttling.)
+	 * piece of data is probably not worth throttling, and is not checksummed
+	 * because it's not actually part of the file.)
 	 */
 	pad = ((len + 511) & ~511) - len;
 	if (pad > 0)
@@ -1682,6 +1998,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	total_checksum_failures += checksum_failures;
 
+	AddFileToManifest(manifest, spcoid, tarfilename, statbuf->st_size,
+					  statbuf->st_mtime, &checksum_ctx);
+
 	return true;
 }
 
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 14fcd53221..0621884ad8 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void);
 %token K_EXPORT_SNAPSHOT
 %token K_NOEXPORT_SNAPSHOT
 %token K_USE_SNAPSHOT
+%token K_MANIFEST_CHECKSUMS
 
 %type <node>	command
 %type <node>	base_backup start_replication start_logical_replication
@@ -214,6 +215,11 @@ base_backup_opt:
 				  $$ = makeDefElem("noverify_checksums",
 								   (Node *)makeInteger(true), -1);
 				}
+			| K_MANIFEST_CHECKSUMS SCONST
+				{
+				  $$ = makeDefElem("manifest_checksums",
+								   (Node *)makeString($2), -1);
+				}
 			;
 
 create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 14c9a1e798..5653d233b5 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -107,6 +107,7 @@ EXPORT_SNAPSHOT		{ return K_EXPORT_SNAPSHOT; }
 NOEXPORT_SNAPSHOT	{ return K_NOEXPORT_SNAPSHOT; }
 USE_SNAPSHOT		{ return K_USE_SNAPSHOT; }
 WAIT				{ return K_WAIT; }
+MANIFEST_CHECKSUMS	{ return K_MANIFEST_CHECKSUMS; }
 
 ","				{ return ','; }
 ";"				{ return ';'; }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index ae4a9cbe11..cc0b97627c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -315,6 +315,8 @@ WalSndErrorCleanup(void)
 
 	replication_active = false;
 
+	WalSndResourceCleanup(false);
+
 	if (got_STOPPING || got_SIGUSR2)
 		proc_exit(0);
 
@@ -322,6 +324,34 @@ WalSndErrorCleanup(void)
 	WalSndSetState(WALSNDSTATE_STARTUP);
 }
 
+/*
+ * Clean up any ResourceOwner we created.
+ */
+void
+WalSndResourceCleanup(bool isCommit)
+{
+	ResourceOwner	resowner;
+
+	if (CurrentResourceOwner == NULL)
+		return;
+
+	/*
+	 * Deleting CurrentResourceOwner is not allowed, so we must save a
+	 * pointer in a local variable and clear it first.
+	 */
+	resowner = CurrentResourceOwner;
+	CurrentResourceOwner = NULL;
+
+	/* Now we can release resources and delete it. */
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS, isCommit, true);
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_LOCKS, isCommit, true);
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_AFTER_LOCKS, isCommit, true);
+	ResourceOwnerDelete(resowner);
+}
+
 /*
  * Handle a client's connection abort in an orderly manner.
  */
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 48bd838803..235416a7c2 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -88,6 +88,12 @@ typedef struct UnpackTarState
 	FILE	   *file;
 } UnpackTarState;
 
+typedef struct WriteManifestState
+{
+	char		filename[MAXPGPATH];
+	FILE	   *file;
+} WriteManifestState;
+
 typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
 								   void *callback_data);
 
@@ -135,6 +141,7 @@ static bool temp_replication_slot = true;
 static bool create_slot = false;
 static bool no_slot = false;
 static bool verify_checksums = true;
+static char *manifest_checksums = NULL;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -180,6 +187,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
 static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
 static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf,
 										 void *callback_data);
+static void ReceiveBackupManifest(PGconn *conn);
+static void ReceiveBackupManifestChunk(size_t r, char *copybuf,
+									   void *callback_data);
+static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf);
+static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+											   void *callback_data);
 static void BaseBackup(void);
 
 static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
@@ -386,6 +399,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --manifest-checksums=SHA{224,256,384,512}|CRC32C|NONE\n"
+			 "                         use algorithm for manifest checksums\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -1184,6 +1199,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 		}
 	}
 
+	/*
+	 * Normally, we emit the backup manifest as a separate file, but when
+	 * we're writing a tarfile to stdout, we don't have that option, so
+	 * include it in the one tarfile we've got.
+	 */
+	if (strcmp(basedir, "-") == 0)
+	{
+		char		header[512];
+		PQExpBufferData	buf;
+
+		initPQExpBuffer(&buf);
+		ReceiveBackupManifestInMemory(conn, &buf);
+		if (PQExpBufferDataBroken(buf))
+		{
+			pg_log_error("out of memory");
+			exit(1);
+		}
+		tarCreateHeader(header, "backup_manifest", NULL, buf.len,
+						pg_file_create_mode, 04000, 02000,
+						time(NULL));
+		writeTarData(&state, header, sizeof(header));
+		writeTarData(&state, buf.data, buf.len);
+		termPQExpBuffer(&buf);
+	}
+
 	/* 2 * 512 bytes empty data at end of file */
 	writeTarData(&state, zerobuf, sizeof(zerobuf));
 
@@ -1655,6 +1695,64 @@ ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data)
 	}							/* continuing data in existing file */
 }
 
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifest(PGconn *conn)
+{
+	WriteManifestState state;
+
+	snprintf(state.filename, sizeof(state.filename),
+			 "%s/backup_manifest", basedir);
+	state.file = fopen(state.filename, "wb");
+	if (state.file == NULL)
+	{
+		pg_log_error("could not create file \"%s\": %m", state.filename);
+		exit(1);
+	}
+
+	ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state);
+
+	fclose(state.file);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data)
+{
+	WriteManifestState *state = callback_data;
+
+	if (fwrite(copybuf, r, 1, state->file) != 1)
+	{
+		pg_log_error("could not write to file \"%s\": %m", state->filename);
+		exit(1);
+	}
+}
+
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf)
+{
+	ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+								   void *callback_data)
+{
+	PQExpBuffer buf = callback_data;
+
+	appendPQExpBuffer(buf, copybuf, r);
+}
+
 static void
 BaseBackup(void)
 {
@@ -1665,6 +1763,7 @@ BaseBackup(void)
 	char	   *basebkp;
 	char		escaped_label[MAXPGPATH];
 	char	   *maxrate_clause = NULL;
+	char	   *manifest_checksums_clause = NULL;
 	int			i;
 	char		xlogstart[64];
 	char		xlogend[64];
@@ -1672,6 +1771,7 @@ BaseBackup(void)
 				maxServerMajor;
 	int			serverVersion,
 				serverMajor;
+	int			writing_to_stdout;
 
 	Assert(conn != NULL);
 
@@ -1725,6 +1825,9 @@ BaseBackup(void)
 
 	if (maxrate > 0)
 		maxrate_clause = psprintf("MAX_RATE %u", maxrate);
+	if (manifest_checksums != NULL)
+		manifest_checksums_clause = psprintf("MANIFEST_CHECKSUMS '%s'",
+											 manifest_checksums);
 
 	if (verbose)
 		pg_log_info("initiating base backup, waiting for checkpoint to complete");
@@ -1739,7 +1842,7 @@ BaseBackup(void)
 	}
 
 	basebkp =
-		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s",
+		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s",
 				 escaped_label,
 				 showprogress ? "PROGRESS" : "",
 				 includewal == FETCH_WAL ? "WAL" : "",
@@ -1747,7 +1850,8 @@ BaseBackup(void)
 				 includewal == NO_WAL ? "" : "NOWAIT",
 				 maxrate_clause ? maxrate_clause : "",
 				 format == 't' ? "TABLESPACE_MAP" : "",
-				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS");
+				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS",
+				 manifest_checksums_clause ? manifest_checksums_clause : "");
 
 	if (PQsendQuery(conn, basebkp) == 0)
 	{
@@ -1835,7 +1939,8 @@ BaseBackup(void)
 	/*
 	 * When writing to stdout, require a single tablespace
 	 */
-	if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1)
+	writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0;
+	if (writing_to_stdout && PQntuples(res) > 1)
 	{
 		pg_log_error("can only write single tablespace to stdout, database has %d",
 					 PQntuples(res));
@@ -1864,6 +1969,19 @@ BaseBackup(void)
 			ReceiveAndUnpackTarFile(conn, res, i);
 	}							/* Loop over all tablespaces */
 
+	/*
+	 * Now receive backup manifest, if appropriate.
+	 *
+	 * If we're writing a tarfile to stdout, ReceiveTarFile will have already
+	 * processed the backup manifest and included it in the output tarfile.
+	 * Such a configuration doesn't allow for writing multiple files.
+	 *
+	 * If we're talking to an older server, it won't send a backup manifest,
+	 * so don't try to receive one.
+	 */
+	if (!writing_to_stdout && serverMajor >= 1300)
+		ReceiveBackupManifest(conn);
+
 	if (showprogress)
 	{
 		progress_report(PQntuples(res), NULL, true);
@@ -2066,6 +2184,7 @@ main(int argc, char **argv)
 		{"waldir", required_argument, NULL, 1},
 		{"no-slot", no_argument, NULL, 2},
 		{"no-verify-checksums", no_argument, NULL, 3},
+		{"manifest-checksums", required_argument, NULL, 'm'},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2093,7 +2212,7 @@ main(int argc, char **argv)
 
 	atexit(cleanup_directories_atexit);
 
-	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvP",
+	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvPm:",
 							long_options, &option_index)) != -1)
 	{
 		switch (c)
@@ -2234,6 +2353,9 @@ main(int argc, char **argv)
 			case 3:
 				verify_checksums = false;
 				break;
+			case 'm':
+				manifest_checksums = pg_strdup(optarg);
+				break;
 			default:
 
 				/*
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 07ed281bd6..d5b594c928 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -12,6 +12,7 @@
 #ifndef _BASEBACKUP_H
 #define _BASEBACKUP_H
 
+#include "lib/stringinfo.h"
 #include "nodes/replnodes.h"
 
 /*
@@ -29,8 +30,12 @@ typedef struct
 	int64		size;
 } tablespaceinfo;
 
+struct manifest_info;
+typedef struct manifest_info manifest_info;
+
 extern void SendBaseBackup(BaseBackupCmd *cmd);
 
-extern int64 sendTablespace(char *path, bool sizeonly);
+extern int64 sendTablespace(char *path, char *oid, bool sizeonly,
+							manifest_info *manifest);
 
 #endif							/* _BASEBACKUP_H */
diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h
index fd4305e53f..40d81b87f0 100644
--- a/src/include/replication/walsender.h
+++ b/src/include/replication/walsender.h
@@ -38,6 +38,7 @@ extern bool log_replication_commands;
 extern void InitWalSender(void);
 extern bool exec_replication_command(const char *query_string);
 extern void WalSndErrorCleanup(void);
+extern void WalSndResourceCleanup(bool isCommit);
 extern void WalSndSignals(void);
 extern Size WalSndShmemSize(void);
 extern void WalSndShmemInit(void);
-- 
2.17.2 (Apple Git-113)