v11-0002-Generate-backup-manifests-for-base-backups.patch

application/octet-stream

Filename: v11-0002-Generate-backup-manifests-for-base-backups.patch
Type: application/octet-stream
Part: 4
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v11-0002
Subject: Generate backup manifests for base backups.
File+
src/backend/access/transam/xlog.c 2 1
src/backend/replication/basebackup.c 341 22
src/backend/replication/repl_gram.y 6 0
src/backend/replication/repl_scanner.l 1 0
src/backend/replication/walsender.c 30 0
src/bin/pg_basebackup/pg_basebackup.c 136 4
src/include/replication/basebackup.h 6 1
src/include/replication/walsender.h 1 0
From c8f8d516b3416bc2df03f69544061472f29071b6 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 9 Mar 2020 17:59:38 -0400
Subject: [PATCH v11 2/5] Generate backup manifests for base backups.

A manifest is a JSON document which includes the file name, size, last
modification time, and a checksum for each file backed up, as well as
a checksum for the manifest itself. By default, we use CRC-32C for the
checksum algorithm, because we are trying to detect corruption and
user error, not foil an adversary. However, pg_basebackup and the
server-side BASE_BACKUP command now have options to select the
checksum algorithm, so users wanting a cryptographic hash function can
select SHA-224, SHA-256, SHA-384, or SHA-512; and users not wanting
any checksums at all can disable them. Using a cryptographic hash
function in place of CRC-32C consumes significantly more CPU cycles,
which may slow down backups in some cases.

Robert Haas, with help, review, and testing from Rushabh Lathia,
Suraj Kharage, Tushar Ahuja, and Rajkumar Raghuwanshi.
---
 src/backend/access/transam/xlog.c      |   3 +-
 src/backend/replication/basebackup.c   | 363 +++++++++++++++++++++++--
 src/backend/replication/repl_gram.y    |   6 +
 src/backend/replication/repl_scanner.l |   1 +
 src/backend/replication/walsender.c    |  30 ++
 src/bin/pg_basebackup/pg_basebackup.c  | 140 +++++++++-
 src/include/replication/basebackup.h   |   7 +-
 src/include/replication/walsender.h    |   1 +
 8 files changed, 523 insertions(+), 28 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4fa446ffa4..10a6714843 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10551,7 +10551,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 			ti->oid = pstrdup(de->d_name);
 			ti->path = pstrdup(buflinkpath.data);
 			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+			ti->size = infotbssize ?
+				sendTablespace(fullpath, ti->oid, true, NULL) : -1;
 
 			if (tablespaces)
 				*tablespaces = lappend(*tablespaces, ti);
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 806d013108..dd1ec1a688 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -18,6 +18,7 @@
 
 #include "access/xlog_internal.h"	/* for pg_start/stop_backup */
 #include "catalog/pg_type.h"
+#include "common/checksum_helper.h"
 #include "common/file_perm.h"
 #include "commands/progress.h"
 #include "lib/stringinfo.h"
@@ -32,6 +33,7 @@
 #include "replication/basebackup.h"
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
+#include "storage/buffile.h"
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
@@ -39,8 +41,10 @@
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
+#include "utils/json.h"
 #include "utils/ps_status.h"
 #include "utils/relcache.h"
+#include "utils/resowner.h"
 #include "utils/timestamp.h"
 
 typedef struct
@@ -52,20 +56,40 @@ typedef struct
 	bool		includewal;
 	uint32		maxrate;
 	bool		sendtblspcmapfile;
+	pg_checksum_type checksum_type;
 } basebackup_options;
 
+struct manifest_info
+{
+	BufFile    *buffile;
+	pg_checksum_type checksum_type;
+	pg_sha256_ctx manifest_ctx;
+	uint64		manifest_size;
+	bool		first_file;
+	bool		still_checksumming;
+};
+
 
 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
-					 List *tablespaces, bool sendtblspclinks);
+					 List *tablespaces, bool sendtblspclinks,
+					 manifest_info *manifest, const char *spcoid);
 static bool sendFile(const char *readfilename, const char *tarfilename,
-					 struct stat *statbuf, bool missing_ok, Oid dboid);
-static void sendFileWithContent(const char *filename, const char *content);
+					 struct stat *statbuf, bool missing_ok, Oid dboid,
+					 manifest_info *manifest, const char *spcoid);
+static void sendFileWithContent(const char *filename, const char *content,
+								manifest_info *manifest);
 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
 							 struct stat *statbuf, bool sizeonly);
 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
 						  bool sizeonly);
 static void send_int8_string(StringInfoData *buf, int64 intval);
 static void SendBackupHeader(List *tablespaces);
+static void InitializeManifest(manifest_info *manifest, pg_checksum_type);
+static void AppendStringToManifest(manifest_info *manifest, char *s);
+static void AddFileToManifest(manifest_info *manifest, const char *spcoid,
+							  const char *pathname, size_t size, time_t mtime,
+							  pg_checksum_context *checksum_ctx);
+static void SendBackupManifest(manifest_info *manifest);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
@@ -102,6 +126,16 @@ do { \
 				(errmsg("could not read from file \"%s\"", filename))); \
 } while (0)
 
+/*
+ * Convenience macro for appending data to the backup manifest.
+ */
+#define AppendToManifest(manifest, ...) \
+	{ \
+		char *_manifest_s = psprintf(__VA_ARGS__);	\
+		AppendStringToManifest(manifest, _manifest_s);	\
+		pfree(_manifest_s);	\
+	}
+
 /* The actual number of bytes, transfer of which may cause sleep. */
 static uint64 throttling_sample;
 
@@ -251,6 +285,7 @@ perform_base_backup(basebackup_options *opt)
 	TimeLineID	endtli;
 	StringInfo	labelfile;
 	StringInfo	tblspc_map_file = NULL;
+	manifest_info manifest;
 	int			datadirpathlen;
 	List	   *tablespaces = NIL;
 
@@ -258,12 +293,17 @@ perform_base_backup(basebackup_options *opt)
 	backup_streamed = 0;
 	pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid);
 
+	/* we're going to use a BufFile, so we need a ResourceOwner */
+	Assert(CurrentResourceOwner == NULL);
+	CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
+
 	datadirpathlen = strlen(DataDir);
 
 	backup_started_in_recovery = RecoveryInProgress();
 
 	labelfile = makeStringInfo();
 	tblspc_map_file = makeStringInfo();
+	InitializeManifest(&manifest, opt->checksum_type);
 
 	total_checksum_failures = 0;
 
@@ -301,7 +341,10 @@ perform_base_backup(basebackup_options *opt)
 
 		/* Add a node for the base directory at the end */
 		ti = palloc0(sizeof(tablespaceinfo));
-		ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
+		if (opt->progress)
+			ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL);
+		else
+			ti->size = -1;
 		tablespaces = lappend(tablespaces, ti);
 
 		/*
@@ -380,7 +423,8 @@ perform_base_backup(basebackup_options *opt)
 				struct stat statbuf;
 
 				/* In the main tar, include the backup_label first... */
-				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
+				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data,
+									&manifest);
 
 				/*
 				 * Send tablespace_map file if required and then the bulk of
@@ -388,11 +432,14 @@ perform_base_backup(basebackup_options *opt)
 				 */
 				if (tblspc_map_file && opt->sendtblspcmapfile)
 				{
-					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
-					sendDir(".", 1, false, tablespaces, false);
+					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data,
+										&manifest);
+					sendDir(".", 1, false, tablespaces, false,
+							&manifest, NULL);
 				}
 				else
-					sendDir(".", 1, false, tablespaces, true);
+					sendDir(".", 1, false, tablespaces, true,
+							&manifest, NULL);
 
 				/* ... and pg_control after everything else. */
 				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -400,10 +447,11 @@ perform_base_backup(basebackup_options *opt)
 							(errcode_for_file_access(),
 							 errmsg("could not stat file \"%s\": %m",
 									XLOG_CONTROL_FILE)));
-				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid);
+				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
+						 false, InvalidOid, &manifest, NULL);
 			}
 			else
-				sendTablespace(ti->path, false);
+				sendTablespace(ti->path, ti->oid, false, &manifest);
 
 			/*
 			 * If we're including WAL, and this is the main data directory we
@@ -632,7 +680,7 @@ perform_base_backup(basebackup_options *opt)
 			 * complete segment.
 			 */
 			StatusFilePath(pathbuf, walFileName, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", &manifest);
 		}
 
 		/*
@@ -655,16 +703,20 @@ perform_base_backup(basebackup_options *opt)
 						(errcode_for_file_access(),
 						 errmsg("could not stat file \"%s\": %m", pathbuf)));
 
-			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
+			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid,
+					 &manifest, NULL);
 
 			/* unconditionally mark file as archived */
 			StatusFilePath(pathbuf, fname, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", &manifest);
 		}
 
 		/* Send CopyDone message for the last tar file */
 		pq_putemptymessage('c');
 	}
+
+	SendBackupManifest(&manifest);
+
 	SendXlogRecPtrResult(endptr, endtli);
 
 	if (total_checksum_failures)
@@ -678,6 +730,9 @@ perform_base_backup(basebackup_options *opt)
 				 errmsg("checksum verification failure during base backup")));
 	}
 
+	/* clean up the resource owner we created */
+	WalSndResourceCleanup(true);
+
 	pgstat_progress_end_command();
 }
 
@@ -709,8 +764,11 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 	bool		o_maxrate = false;
 	bool		o_tablespace_map = false;
 	bool		o_noverify_checksums = false;
+	bool		o_manifest_checksums = false;
 
 	MemSet(opt, 0, sizeof(*opt));
+	opt->checksum_type = CHECKSUM_TYPE_CRC32C;
+
 	foreach(lopt, options)
 	{
 		DefElem    *defel = (DefElem *) lfirst(lopt);
@@ -797,6 +855,21 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 			noverify_checksums = true;
 			o_noverify_checksums = true;
 		}
+		else if (strcmp(defel->defname, "manifest_checksums") == 0)
+		{
+			char	   *optval = strVal(defel->arg);
+
+			if (o_manifest_checksums)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("duplicate option \"%s\"", defel->defname)));
+			if (!pg_checksum_parse_type(optval, &opt->checksum_type))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("unrecognized checksum algorithm: \"%s\"",
+								optval)));
+			o_manifest_checksums = true;
+		}
 		else
 			elog(ERROR, "option \"%s\" not recognized",
 				 defel->defname);
@@ -918,6 +991,228 @@ SendBackupHeader(List *tablespaces)
 	pq_puttextmessage('C', "SELECT");
 }
 
+/*
+ * Initialize state so that we can construct a backup manifest.
+ *
+ * NB: Although the checksum type for the data files is configurable, the
+ * checksum for the manifest itself always uses SHA-256. See comments in
+ * SendBackupManifest.
+ */
+static void
+InitializeManifest(manifest_info *manifest, pg_checksum_type checksum_type)
+{
+	manifest->buffile = BufFileCreateTemp(false);
+	manifest->checksum_type = checksum_type;
+	pg_sha256_init(&manifest->manifest_ctx);
+	manifest->manifest_size = UINT64CONST(0);
+	manifest->first_file = true;
+	manifest->still_checksumming = true;
+
+	AppendToManifest(manifest,
+					 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
+					 "\"Files\": [");
+}
+
+/*
+ * Append a cstring to the manifest.
+ */
+static void
+AppendStringToManifest(manifest_info *manifest, char *s)
+{
+	int			len = strlen(s);
+	size_t		written;
+
+	if (manifest->still_checksumming)
+		pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
+	written = BufFileWrite(manifest->buffile, s, len);
+	if (written != len)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to temporary file: %m")));
+	manifest->manifest_size += len;
+}
+
+/*
+ * Add an entry to the backup manifest for a file.
+ */
+static void
+AddFileToManifest(manifest_info *manifest, const char *spcoid,
+				  const char *pathname, size_t size, time_t mtime,
+				  pg_checksum_context *checksum_ctx)
+{
+	char		pathbuf[MAXPGPATH];
+	int			pathlen;
+	StringInfoData	buf;
+
+	/*
+	 * If this file is part of a tablespace, the pathname passed to this
+	 * function will be relative to the tar file that contains it. We want the
+	 * pathname relative to the data directory (ignoring the intermediate
+	 * symlink traversal).
+	 */
+	if (spcoid != NULL)
+	{
+		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
+				 pathname);
+		pathname = pathbuf;
+	}
+
+	/*
+	 * Each file's entry need to be separated from any entry that follows
+	 * by a comma, but there's no comma before the first one or after the
+	 * last one. To make that work, adding a file to the manifest starts
+	 * by terminating the most recently added line, with a comma if
+	 * appropriate, but does not terminate the line inserted for this file.
+	 */
+	initStringInfo(&buf);
+	if (manifest->first_file)
+	{
+		appendStringInfoString(&buf, "\n");
+		manifest->first_file = false;
+	}
+	else
+		appendStringInfoString(&buf, ",\n");
+
+	/*
+	 * Write the relative pathname to this file out to the manifest. The
+	 * manifest is always stored in UTF-8, so we have to encode paths that
+	 * are not valid in that encoding.
+	 */
+	pathlen = strlen(pathname);
+	if (pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
+	{
+		appendStringInfoString(&buf, "{ \"Path\": ");
+		escape_json(&buf, pathname);
+		appendStringInfoString(&buf, ", ");
+	}
+	else
+	{
+		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
+		enlargeStringInfo(&buf, 2 * pathlen);
+		buf.len += hex_encode((char *) pathname, pathlen,
+							  &buf.data[buf.len]);
+		appendStringInfoString(&buf, "\", ");
+	}
+
+	appendStringInfo(&buf, "\"Size\": %zu, ", size);
+
+	/*
+	 * Convert last modification time to a string and append it to the
+	 * manifest. Since it's not clear what time zone to use and since time
+	 * zone definitions can change, possibly causing confusion, use GMT always.
+	 */
+	appendStringInfoString(&buf, "\"Last-Modified\": \"");
+	enlargeStringInfo(&buf, 128);
+	buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
+						   pg_gmtime(&mtime));
+	appendStringInfoString(&buf, "\"");
+
+	/* Add checksum information. */
+	if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
+	{
+		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+		int			checksumlen;
+
+		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
+
+		appendStringInfo(&buf,
+						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
+						 pg_checksum_type_name(checksum_ctx->type));
+		enlargeStringInfo(&buf, 2 * checksumlen);
+		buf.len += hex_encode((char *) checksumbuf, checksumlen,
+							  &buf.data[buf.len]);
+		appendStringInfoString(&buf, "\"");
+	}
+
+	/* Close out the object. */
+	appendStringInfoString(&buf, " }");
+
+	/* OK, add it to the manifest. */
+	AppendStringToManifest(manifest, buf.data);
+
+	/* Avoid leaking memory. */
+	pfree(buf.data);
+}
+
+/*
+ * Finalize the backup manifest, and send it to the client.
+ */
+static void
+SendBackupManifest(manifest_info *manifest)
+{
+	StringInfoData protobuf;
+	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
+	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
+	size_t		manifest_bytes_done = 0;
+
+	/* Terminate the list of files. */
+	AppendStringToManifest(manifest, "],\n");
+
+	/*
+	 * Append manifest checksum, so that the problems with the manifest itself
+	 * can be detected.
+	 *
+	 * We always use SHA-256 for this, regardless of what algorithm is chosen
+	 * for checksumming the files.  If we ever want to make the checksum
+	 * algorithm used for the manifest file variable, the client will need a
+	 * way to figure out which algorithm to use as close to the beginning of
+	 * the manifest file as possible, to avoid having to read the whole thing
+	 * twice.
+	 */
+	manifest->still_checksumming = false;
+	pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
+	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
+	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
+	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
+	AppendStringToManifest(manifest, checksumstringbuf);
+	AppendStringToManifest(manifest, "\"}\n");
+
+	/*
+	 * We've written all the data to the manifest file.  Rewind the file so
+	 * that we can read it all back.
+	 */
+	if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not rewind temporary file: %m")));
+
+	/* Send CopyOutResponse message */
+	pq_beginmessage(&protobuf, 'H');
+	pq_sendbyte(&protobuf, 0);	/* overall format */
+	pq_sendint16(&protobuf, 0); /* natts */
+	pq_endmessage(&protobuf);
+
+	/*
+	 * Send CopyData messages.
+	 *
+	 * We choose to read back the data from the temporary file in chunks of
+	 * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
+	 * size, so it seems to make sense to match that value here.
+	 */
+	while (manifest_bytes_done < manifest->manifest_size)
+	{
+		char		manifestbuf[BLCKSZ];
+		size_t		bytes_to_read;
+		size_t		rc;
+
+		bytes_to_read = Min(sizeof(manifestbuf),
+							manifest->manifest_size - manifest_bytes_done);
+		rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
+		if (rc != bytes_to_read)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read from temporary file: %m")));
+		pq_putmessage('d', manifestbuf, bytes_to_read);
+		manifest_bytes_done += bytes_to_read;
+	}
+
+	/* No more data, so send CopyDone message */
+	pq_putemptymessage('c');
+
+	/* Release resources */
+	BufFileClose(manifest->buffile);
+}
+
 /*
  * Send a single resultset containing just a single
  * XLogRecPtr record (in text format)
@@ -978,11 +1273,15 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
  * Inject a file with given name and content in the output tar stream.
  */
 static void
-sendFileWithContent(const char *filename, const char *content)
+sendFileWithContent(const char *filename, const char *content,
+					manifest_info *manifest)
 {
 	struct stat statbuf;
 	int			pad,
 				len;
+	pg_checksum_context checksum_ctx;
+
+	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	len = strlen(content);
 
@@ -1017,6 +1316,10 @@ sendFileWithContent(const char *filename, const char *content)
 		pq_putmessage('d', buf, pad);
 		update_basebackup_progress(pad);
 	}
+
+	pg_checksum_update(&checksum_ctx, (uint8 *) content, len);
+	AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
+					  &checksum_ctx);
 }
 
 /*
@@ -1027,7 +1330,8 @@ sendFileWithContent(const char *filename, const char *content)
  * Only used to send auxiliary tablespaces, not PGDATA.
  */
 int64
-sendTablespace(char *path, bool sizeonly)
+sendTablespace(char *path, char *spcoid, bool sizeonly,
+			   manifest_info *manifest)
 {
 	int64		size;
 	char		pathbuf[MAXPGPATH];
@@ -1060,7 +1364,8 @@ sendTablespace(char *path, bool sizeonly)
 						   sizeonly);
 
 	/* Send all the files in the tablespace version directory */
-	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
+	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest,
+					spcoid);
 
 	return size;
 }
@@ -1079,7 +1384,7 @@ sendTablespace(char *path, bool sizeonly)
  */
 static int64
 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
-		bool sendtblspclinks)
+		bool sendtblspclinks, manifest_info *manifest, const char *spcoid)
 {
 	DIR		   *dir;
 	struct dirent *de;
@@ -1359,7 +1664,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 				skip_this_dir = true;
 
 			if (!skip_this_dir)
-				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
+				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces,
+								sendtblspclinks, manifest, spcoid);
 		}
 		else if (S_ISREG(statbuf.st_mode))
 		{
@@ -1367,7 +1673,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 
 			if (!sizeonly)
 				sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
-								true, isDbDir ? atooid(lastDir + 1) : InvalidOid);
+								true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
+								manifest, spcoid);
 
 			if (sent || sizeonly)
 			{
@@ -1437,8 +1744,9 @@ is_checksummed_file(const char *fullpath, const char *filename)
  * and the file did not exist.
  */
 static bool
-sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
-		 bool missing_ok, Oid dboid)
+sendFile(const char *readfilename, const char *tarfilename,
+		 struct stat *statbuf, bool missing_ok, Oid dboid,
+		 manifest_info *manifest, const char *spcoid)
 {
 	FILE	   *fp;
 	BlockNumber blkno = 0;
@@ -1455,6 +1763,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
+	pg_checksum_context checksum_ctx;
+
+	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1625,6 +1936,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 					(errmsg("base backup could not send data, aborting backup")));
 		update_basebackup_progress(cnt);
 
+		/* Also feed it to the checksum machinery. */
+		pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
+
 		len += cnt;
 		throttle(cnt);
 
@@ -1649,6 +1963,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		{
 			cnt = Min(sizeof(buf), statbuf->st_size - len);
 			pq_putmessage('d', buf, cnt);
+			pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
 			update_basebackup_progress(cnt);
 			len += cnt;
 			throttle(cnt);
@@ -1657,7 +1972,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	/*
 	 * Pad to 512 byte boundary, per tar format requirements. (This small
-	 * piece of data is probably not worth throttling.)
+	 * piece of data is probably not worth throttling, and is not checksummed
+	 * because it's not actually part of the file.)
 	 */
 	pad = ((len + 511) & ~511) - len;
 	if (pad > 0)
@@ -1682,6 +1998,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	total_checksum_failures += checksum_failures;
 
+	AddFileToManifest(manifest, spcoid, tarfilename, statbuf->st_size,
+					  statbuf->st_mtime, &checksum_ctx);
+
 	return true;
 }
 
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 14fcd53221..0621884ad8 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void);
 %token K_EXPORT_SNAPSHOT
 %token K_NOEXPORT_SNAPSHOT
 %token K_USE_SNAPSHOT
+%token K_MANIFEST_CHECKSUMS
 
 %type <node>	command
 %type <node>	base_backup start_replication start_logical_replication
@@ -214,6 +215,11 @@ base_backup_opt:
 				  $$ = makeDefElem("noverify_checksums",
 								   (Node *)makeInteger(true), -1);
 				}
+			| K_MANIFEST_CHECKSUMS SCONST
+				{
+				  $$ = makeDefElem("manifest_checksums",
+								   (Node *)makeString($2), -1);
+				}
 			;
 
 create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 14c9a1e798..5653d233b5 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -107,6 +107,7 @@ EXPORT_SNAPSHOT		{ return K_EXPORT_SNAPSHOT; }
 NOEXPORT_SNAPSHOT	{ return K_NOEXPORT_SNAPSHOT; }
 USE_SNAPSHOT		{ return K_USE_SNAPSHOT; }
 WAIT				{ return K_WAIT; }
+MANIFEST_CHECKSUMS	{ return K_MANIFEST_CHECKSUMS; }
 
 ","				{ return ','; }
 ";"				{ return ';'; }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3f74bc8493..88c73dc21c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -316,6 +316,8 @@ WalSndErrorCleanup(void)
 
 	replication_active = false;
 
+	WalSndResourceCleanup(false);
+
 	if (got_STOPPING || got_SIGUSR2)
 		proc_exit(0);
 
@@ -323,6 +325,34 @@ WalSndErrorCleanup(void)
 	WalSndSetState(WALSNDSTATE_STARTUP);
 }
 
+/*
+ * Clean up any ResourceOwner we created.
+ */
+void
+WalSndResourceCleanup(bool isCommit)
+{
+	ResourceOwner	resowner;
+
+	if (CurrentResourceOwner == NULL)
+		return;
+
+	/*
+	 * Deleting CurrentResourceOwner is not allowed, so we must save a
+	 * pointer in a local variable and clear it first.
+	 */
+	resowner = CurrentResourceOwner;
+	CurrentResourceOwner = NULL;
+
+	/* Now we can release resources and delete it. */
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS, isCommit, true);
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_LOCKS, isCommit, true);
+	ResourceOwnerRelease(resowner,
+						 RESOURCE_RELEASE_AFTER_LOCKS, isCommit, true);
+	ResourceOwnerDelete(resowner);
+}
+
 /*
  * Handle a client's connection abort in an orderly manner.
  */
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 48bd838803..942e377bbc 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -88,6 +88,12 @@ typedef struct UnpackTarState
 	FILE	   *file;
 } UnpackTarState;
 
+typedef struct WriteManifestState
+{
+	char		filename[MAXPGPATH];
+	FILE	   *file;
+} WriteManifestState;
+
 typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
 								   void *callback_data);
 
@@ -135,6 +141,7 @@ static bool temp_replication_slot = true;
 static bool create_slot = false;
 static bool no_slot = false;
 static bool verify_checksums = true;
+static char *manifest_checksums = NULL;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -180,6 +187,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
 static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
 static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf,
 										 void *callback_data);
+static void ReceiveBackupManifest(PGconn *conn);
+static void ReceiveBackupManifestChunk(size_t r, char *copybuf,
+									   void *callback_data);
+static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf);
+static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+											   void *callback_data);
 static void BaseBackup(void);
 
 static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
@@ -386,6 +399,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --manifest-checksums=SHA{224,256,384,512}|CRC32C|NONE\n"
+			 "                         use algorithm for manifest checksums\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -1184,6 +1199,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 		}
 	}
 
+	/*
+	 * Normally, we emit the backup manifest as a separate file, but when
+	 * we're writing a tarfile to stdout, we don't have that option, so
+	 * include it in the one tarfile we've got.
+	 */
+	if (strcmp(basedir, "-") == 0)
+	{
+		char		header[512];
+		PQExpBufferData	buf;
+
+		initPQExpBuffer(&buf);
+		ReceiveBackupManifestInMemory(conn, &buf);
+		if (PQExpBufferDataBroken(buf))
+		{
+			pg_log_error("out of memory");
+			exit(1);
+		}
+		tarCreateHeader(header, "backup_manifest", NULL, buf.len,
+						pg_file_create_mode, 04000, 02000,
+						time(NULL));
+		writeTarData(&state, header, sizeof(header));
+		writeTarData(&state, buf.data, buf.len);
+		termPQExpBuffer(&buf);
+	}
+
 	/* 2 * 512 bytes empty data at end of file */
 	writeTarData(&state, zerobuf, sizeof(zerobuf));
 
@@ -1655,6 +1695,64 @@ ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data)
 	}							/* continuing data in existing file */
 }
 
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifest(PGconn *conn)
+{
+	WriteManifestState state;
+
+	snprintf(state.filename, sizeof(state.filename),
+			 "%s/backup_manifest", basedir);
+	state.file = fopen(state.filename, "wb");
+	if (state.file == NULL)
+	{
+		pg_log_error("could not create file \"%s\": %m", state.filename);
+		exit(1);
+	}
+
+	ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state);
+
+	fclose(state.file);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data)
+{
+	WriteManifestState *state = callback_data;
+
+	if (fwrite(copybuf, r, 1, state->file) != 1)
+	{
+		pg_log_error("could not write to file \"%s\": %m", state->filename);
+		exit(1);
+	}
+}
+
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf)
+{
+	ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+								   void *callback_data)
+{
+	PQExpBuffer buf = callback_data;
+
+	appendPQExpBuffer(buf, copybuf, r);
+}
+
 static void
 BaseBackup(void)
 {
@@ -1665,6 +1763,7 @@ BaseBackup(void)
 	char	   *basebkp;
 	char		escaped_label[MAXPGPATH];
 	char	   *maxrate_clause = NULL;
+	char	   *manifest_checksums_clause = NULL;
 	int			i;
 	char		xlogstart[64];
 	char		xlogend[64];
@@ -1672,6 +1771,7 @@ BaseBackup(void)
 				maxServerMajor;
 	int			serverVersion,
 				serverMajor;
+	int			writing_to_stdout;
 
 	Assert(conn != NULL);
 
@@ -1725,6 +1825,19 @@ BaseBackup(void)
 
 	if (maxrate > 0)
 		maxrate_clause = psprintf("MAX_RATE %u", maxrate);
+	if (manifest_checksums != NULL)
+	{
+		if (serverMajor < 1300)
+		{
+			const char *serverver = PQparameterStatus(conn, "server_version");
+
+			pg_log_error("manifest checksums are not supported by server version %s",
+						 serverver ? serverver : "'unknown'");
+			exit(1);
+		}
+		manifest_checksums_clause = psprintf("MANIFEST_CHECKSUMS '%s'",
+											 manifest_checksums);
+	}
 
 	if (verbose)
 		pg_log_info("initiating base backup, waiting for checkpoint to complete");
@@ -1739,7 +1852,7 @@ BaseBackup(void)
 	}
 
 	basebkp =
-		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s",
+		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s",
 				 escaped_label,
 				 showprogress ? "PROGRESS" : "",
 				 includewal == FETCH_WAL ? "WAL" : "",
@@ -1747,7 +1860,8 @@ BaseBackup(void)
 				 includewal == NO_WAL ? "" : "NOWAIT",
 				 maxrate_clause ? maxrate_clause : "",
 				 format == 't' ? "TABLESPACE_MAP" : "",
-				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS");
+				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS",
+				 manifest_checksums_clause ? manifest_checksums_clause : "");
 
 	if (PQsendQuery(conn, basebkp) == 0)
 	{
@@ -1835,7 +1949,8 @@ BaseBackup(void)
 	/*
 	 * When writing to stdout, require a single tablespace
 	 */
-	if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1)
+	writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0;
+	if (writing_to_stdout && PQntuples(res) > 1)
 	{
 		pg_log_error("can only write single tablespace to stdout, database has %d",
 					 PQntuples(res));
@@ -1864,6 +1979,19 @@ BaseBackup(void)
 			ReceiveAndUnpackTarFile(conn, res, i);
 	}							/* Loop over all tablespaces */
 
+	/*
+	 * Now receive backup manifest, if appropriate.
+	 *
+	 * If we're writing a tarfile to stdout, ReceiveTarFile will have already
+	 * processed the backup manifest and included it in the output tarfile.
+	 * Such a configuration doesn't allow for writing multiple files.
+	 *
+	 * If we're talking to an older server, it won't send a backup manifest,
+	 * so don't try to receive one.
+	 */
+	if (!writing_to_stdout && serverMajor >= 1300)
+		ReceiveBackupManifest(conn);
+
 	if (showprogress)
 	{
 		progress_report(PQntuples(res), NULL, true);
@@ -2066,6 +2194,7 @@ main(int argc, char **argv)
 		{"waldir", required_argument, NULL, 1},
 		{"no-slot", no_argument, NULL, 2},
 		{"no-verify-checksums", no_argument, NULL, 3},
+		{"manifest-checksums", required_argument, NULL, 'm'},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2093,7 +2222,7 @@ main(int argc, char **argv)
 
 	atexit(cleanup_directories_atexit);
 
-	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvP",
+	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvPm:",
 							long_options, &option_index)) != -1)
 	{
 		switch (c)
@@ -2234,6 +2363,9 @@ main(int argc, char **argv)
 			case 3:
 				verify_checksums = false;
 				break;
+			case 'm':
+				manifest_checksums = pg_strdup(optarg);
+				break;
 			default:
 
 				/*
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 07ed281bd6..d5b594c928 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -12,6 +12,7 @@
 #ifndef _BASEBACKUP_H
 #define _BASEBACKUP_H
 
+#include "lib/stringinfo.h"
 #include "nodes/replnodes.h"
 
 /*
@@ -29,8 +30,12 @@ typedef struct
 	int64		size;
 } tablespaceinfo;
 
+struct manifest_info;
+typedef struct manifest_info manifest_info;
+
 extern void SendBaseBackup(BaseBackupCmd *cmd);
 
-extern int64 sendTablespace(char *path, bool sizeonly);
+extern int64 sendTablespace(char *path, char *oid, bool sizeonly,
+							manifest_info *manifest);
 
 #endif							/* _BASEBACKUP_H */
diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h
index fd4305e53f..40d81b87f0 100644
--- a/src/include/replication/walsender.h
+++ b/src/include/replication/walsender.h
@@ -38,6 +38,7 @@ extern bool log_replication_commands;
 extern void InitWalSender(void);
 extern bool exec_replication_command(const char *query_string);
 extern void WalSndErrorCleanup(void);
+extern void WalSndResourceCleanup(bool isCommit);
 extern void WalSndSignals(void);
 extern Size WalSndShmemSize(void);
 extern void WalSndShmemInit(void);
-- 
2.17.2 (Apple Git-113)