v16-0004-WIP-Store-WAL-ranges-in-manifest-and-validate-th.patch

application/octet-stream

Filename: v16-0004-WIP-Store-WAL-ranges-in-manifest-and-validate-th.patch
Type: application/octet-stream
Part: 2
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v16-0004
Subject: WIP: Store WAL ranges in manifest and validate them w/pg_waldump.
File+
src/backend/replication/basebackup.c 102 7
src/bin/pg_validatebackup/parse_manifest.c 217 53
src/bin/pg_validatebackup/parse_manifest.h 5 0
src/bin/pg_validatebackup/pg_validatebackup.c 166 17
src/bin/pg_validatebackup/t/005_bad_manifest.pl 2 7
From a5f48cabe5e2de4b38b099c9ccbeb35c093ba4f8 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 31 Mar 2020 11:50:05 -0400
Subject: [PATCH v16 4/4] WIP: Store WAL ranges in manifest and validate them
 w/pg_waldump.

---
 src/backend/replication/basebackup.c          | 109 ++++++-
 src/bin/pg_validatebackup/parse_manifest.c    | 270 ++++++++++++++----
 src/bin/pg_validatebackup/parse_manifest.h    |   5 +
 src/bin/pg_validatebackup/pg_validatebackup.c | 183 ++++++++++--
 .../pg_validatebackup/t/005_bad_manifest.pl   |   9 +-
 5 files changed, 492 insertions(+), 84 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index deaa4f1c34..f56d2c97b5 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -16,6 +16,7 @@
 #include <unistd.h>
 #include <time.h>
 
+#include "access/timeline.h"
 #include "access/xlog_internal.h"	/* for pg_start/stop_backup */
 #include "catalog/pg_type.h"
 #include "common/checksum_helper.h"
@@ -99,6 +100,9 @@ static void AppendStringToManifest(manifest_info *manifest, char *s);
 static void AddFileToManifest(manifest_info *manifest, const char *spcoid,
 							  const char *pathname, size_t size, time_t mtime,
 							  pg_checksum_context *checksum_ctx);
+static void AddWALInfoToManifest(manifest_info *manifest, XLogRecPtr startptr,
+								 TimeLineID starttli, XLogRecPtr endptr,
+								 TimeLineID endtli);
 static void SendBackupManifest(manifest_info *manifest);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
@@ -740,6 +744,8 @@ perform_base_backup(basebackup_options *opt)
 		pq_putemptymessage('c');
 	}
 
+	AddWALInfoToManifest(&manifest, startptr, starttli, endptr, endtli);
+
 	SendBackupManifest(&manifest);
 
 	SendXlogRecPtrResult(endptr, endtli);
@@ -1209,6 +1215,101 @@ AddFileToManifest(manifest_info *manifest, const char *spcoid,
 	pfree(buf.data);
 }
 
+/*
+ * Add information about the WAL that will need to be replayed when restoring
+ * this backup to the manifest.
+ */
+static void
+AddWALInfoToManifest(manifest_info *manifest, XLogRecPtr startptr,
+					 TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
+{
+	List *timelines = readTimeLineHistory(endtli);
+	ListCell *lc;
+	bool	first_wal_range = true;
+	bool	found_ending_tli = false;
+
+	/* If there is no buffile, then the user doesn't want a manifest. */
+	if (manifest->buffile == NULL)
+		return;
+
+	/* Terminate the list of files. */
+	AppendStringToManifest(manifest, "\n],\n");
+
+	/* Start a list of LSN ranges. */
+	AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
+
+	foreach (lc, timelines)
+	{
+		TimeLineHistoryEntry *entry = lfirst(lc);
+		XLogRecPtr	tl_endptr;
+
+		/*
+		 * We only care about timelines that were active during the backup.
+		 * Skip any that ended before the backup started. (Note that if
+		 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
+		 * yet ended.)
+		 */
+		if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
+			continue;
+
+		/*
+		 * Because the timeline history file lists older timelines before
+		 * newer ones, the first timeline we encounter that is new enough to
+		 * matter ought to match the starting timeline of the backup.
+		 */
+		if (first_wal_range && starttli != entry->tli)
+			ereport(ERROR,
+					errmsg("start timeline %u does not match timeline history",
+						   starttli));
+
+		if (!XLogRecPtrIsInvalid(entry->end))
+			tl_endptr = entry->end;
+		else
+		{
+			tl_endptr = endptr;
+
+			/*
+			 * If we reach a TLI that has no end LSN, there can't be any more
+			 * timelines in the history after this point, so we'd better have
+			 * arrived at the expected ending TLI. If not, something's gone
+			 * horribly wrong.
+			 */
+			if (endtli != entry->tli)
+				ereport(ERROR,
+						errmsg("end timeline %u does not match timeline history",
+							   endtli));
+		}
+
+		AppendToManifest(manifest,
+						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
+						 first_wal_range ? "" : ",\n",
+						 entry->tli,
+						 (uint32) (startptr >> 32), (uint32) startptr,
+						 (uint32) (tl_endptr >> 32), (uint32) tl_endptr);
+
+		startptr = entry->end;
+		if (endtli == entry->tli)
+		{
+			found_ending_tli = true;
+			break;
+		}
+
+		first_wal_range = false;
+	}
+
+	/*
+	 * The last entry in the timeline history for the ending timeline should
+	 * be the ending timeline itself. Verify that this is what we observed.
+	 */
+	if (!found_ending_tli)
+		ereport(ERROR,
+				errmsg("ending timeline %u not found in timeline history",
+					   endtli));
+
+	/* Terminate the list of WAL ranges. */
+	AppendStringToManifest(manifest, "\n],\n");
+}
+
 /*
  * Finalize the backup manifest, and send it to the client.
  */
@@ -1220,16 +1321,10 @@ SendBackupManifest(manifest_info *manifest)
 	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
 	size_t		manifest_bytes_done = 0;
 
-	/*
-	 * If there is no buffile, then the user doesn't want a manifest, so
-	 * don't waste any time generating one.
-	 */
+	/* If there is no buffile, then the user doesn't want a manifest. */
 	if (manifest->buffile == NULL)
 		return;
 
-	/* Terminate the list of files. */
-	AppendStringToManifest(manifest, "],\n");
-
 	/*
 	 * Append manifest checksum, so that the problems with the manifest itself
 	 * can be detected.
diff --git a/src/bin/pg_validatebackup/parse_manifest.c b/src/bin/pg_validatebackup/parse_manifest.c
index e6b42adfda..461ac36b76 100644
--- a/src/bin/pg_validatebackup/parse_manifest.c
+++ b/src/bin/pg_validatebackup/parse_manifest.c
@@ -23,14 +23,16 @@ typedef enum
 {
 	JM_EXPECT_TOPLEVEL_START,
 	JM_EXPECT_TOPLEVEL_END,
-	JM_EXPECT_VERSION_FIELD,
+	JM_EXPECT_TOPLEVEL_FIELD,
 	JM_EXPECT_VERSION_VALUE,
-	JM_EXPECT_FILES_FIELD,
-	JM_EXPECT_FILES_ARRAY_START,
-	JM_EXPECT_FILES_ARRAY_NEXT,
+	JM_EXPECT_FILES_START,
+	JM_EXPECT_FILES_NEXT,
 	JM_EXPECT_THIS_FILE_FIELD,
 	JM_EXPECT_THIS_FILE_VALUE,
-	JM_EXPECT_MANIFEST_CHECKSUM_FIELD,
+	JM_EXPECT_WAL_RANGES_START,
+	JM_EXPECT_WAL_RANGES_NEXT,
+	JM_EXPECT_THIS_WAL_RANGE_FIELD,
+	JM_EXPECT_THIS_WAL_RANGE_VALUE,
 	JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
 	JM_EXPECT_EOF
 } JsonManifestSemanticState;
@@ -48,6 +50,16 @@ typedef enum
 	JMFF_CHECKSUM
 } JsonManifestFileField;
 
+/*
+ * Possible fields for one file as described by the manifest.
+ */
+typedef enum
+{
+	JMWRF_TIMELINE,
+	JMWRF_START_LSN,
+	JMWRF_END_LSN
+} JsonManifestWALRangeField;
+
 /*
  * Internal state used while decoding the JSON-format backup manifest.
  */
@@ -55,13 +67,24 @@ typedef struct
 {
 	JsonManifestParseContext *context;
 	JsonManifestSemanticState state;
-	JsonManifestFileField field;
+
+	/* These fields are used for parsing objects in the list of files. */
+	JsonManifestFileField file_field;
 	char	   *pathname;
 	char	   *encoded_pathname;
 	char	   *size;
 	char	   *algorithm;
 	pg_checksum_type checksum_algorithm;
 	char	   *checksum;
+
+	/* These fields are used for parsing objects in the list of WAL ranges. */
+	JsonManifestWALRangeField wal_range_field;
+	char	   *timeline;
+	char	   *start_lsn;
+	char	   *end_lsn;
+
+	/* Miscellaneous other stuff. */
+	bool		saw_version_field;
 	char	   *manifest_checksum;
 } JsonManifestParseState;
 
@@ -74,6 +97,7 @@ static void json_manifest_object_field_start(void *state, char *fname,
 static void json_manifest_scalar(void *state, char *token,
 								 JsonTokenType tokentype);
 static void json_manifest_finalize_file(JsonManifestParseState *parse);
+static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
 static void verify_manifest_checksum(JsonManifestParseState *parse,
 									 char *buffer, size_t size);
 static void json_manifest_parse_failure(JsonManifestParseContext *context,
@@ -81,6 +105,7 @@ static void json_manifest_parse_failure(JsonManifestParseContext *context,
 
 static int	hexdecode_char(char c);
 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
+static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
 
 /*
  * Main entrypoint to parse a JSON-format backup manifest.
@@ -100,8 +125,9 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer,
 	JsonManifestParseState parse;
 
 	/* Set up our private parsing context. */
-	parse.state = JM_EXPECT_TOPLEVEL_START;
 	parse.context = context;
+	parse.state = JM_EXPECT_TOPLEVEL_START;
+	parse.saw_version_field = false;
 
 	/* Create a JSON lexing context. */
 	lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
@@ -132,11 +158,9 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer,
 /*
  * Invoked at the start of each object in the JSON document.
  *
- * The document as a whole is expected to be an object with three keys
- * (PostgreSQL-Backup-Manifest-Version, Files, Manifest-Checksum) and each
- * file is expected to be an object with various keys (Path, Size, etc.).
- * If we're not at the beginning of either the toplevel object or the object
- * for a particular file, it's an error.
+ * The document as a whole is expected to be an object; each file and each
+ * WAL range is also expected to be an object. If we're anywhere else in the
+ * document, it's an error.
  */
 static void
 json_manifest_object_start(void *state)
@@ -146,9 +170,9 @@ json_manifest_object_start(void *state)
 	switch (parse->state)
 	{
 		case JM_EXPECT_TOPLEVEL_START:
-			parse->state = JM_EXPECT_VERSION_FIELD;
+			parse->state = JM_EXPECT_TOPLEVEL_FIELD;
 			break;
-		case JM_EXPECT_FILES_ARRAY_NEXT:
+		case JM_EXPECT_FILES_NEXT:
 			parse->state = JM_EXPECT_THIS_FILE_FIELD;
 			parse->pathname = NULL;
 			parse->encoded_pathname = NULL;
@@ -156,6 +180,12 @@ json_manifest_object_start(void *state)
 			parse->algorithm = NULL;
 			parse->checksum = NULL;
 			break;
+		case JM_EXPECT_WAL_RANGES_NEXT:
+			parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
+			parse->timeline = NULL;
+			parse->start_lsn = NULL;
+			parse->end_lsn = NULL;
+			break;
 		default:
 			json_manifest_parse_failure(parse->context,
 										"unexpected object start");
@@ -168,8 +198,8 @@ json_manifest_object_start(void *state)
  *
  * The possible cases here are the same as for json_manifest_object_start.
  * There's nothing special to do at the end of the document, but when we
- * reach the end of an object representing a particular file, we must call
- * json_manifest_finalize_file() to save the associated details.
+ * reach the end of an object representing a particular file or WAL range,
+ * we must call json_manifest_finalize_file() to save the associated details.
  */
 static void
 json_manifest_object_end(void *state)
@@ -183,7 +213,11 @@ json_manifest_object_end(void *state)
 			break;
 		case JM_EXPECT_THIS_FILE_FIELD:
 			json_manifest_finalize_file(parse);
-			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+			parse->state = JM_EXPECT_FILES_NEXT;
+			break;
+		case JM_EXPECT_THIS_WAL_RANGE_FIELD:
+			json_manifest_finalize_wal_range(parse);
+			parse->state = JM_EXPECT_WAL_RANGES_NEXT;
 			break;
 		default:
 			json_manifest_parse_failure(parse->context,
@@ -196,7 +230,8 @@ json_manifest_object_end(void *state)
  * Invoked at the start of each array in the JSON document.
  *
  * Within the toplevel object, the value associated with the "Files" key
- * should be an array. No other arrays are expected.
+ * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
+ * are expected.
  */
 static void
 json_manifest_array_start(void *state)
@@ -205,8 +240,11 @@ json_manifest_array_start(void *state)
 
 	switch (parse->state)
 	{
-		case JM_EXPECT_FILES_ARRAY_START:
-			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+		case JM_EXPECT_FILES_START:
+			parse->state = JM_EXPECT_FILES_NEXT;
+			break;
+		case JM_EXPECT_WAL_RANGES_START:
+			parse->state = JM_EXPECT_WAL_RANGES_NEXT;
 			break;
 		default:
 			json_manifest_parse_failure(parse->context,
@@ -218,8 +256,7 @@ json_manifest_array_start(void *state)
 /*
  * Invoked at the end of each array in the JSON document.
  *
- * Just like json_manifest_array_start, there's only one expected case
- * here.
+ * The cases here are analogous to those in json_manifest_array_start.
  */
 static void
 json_manifest_array_end(void *state)
@@ -228,8 +265,9 @@ json_manifest_array_end(void *state)
 
 	switch (parse->state)
 	{
-		case JM_EXPECT_FILES_ARRAY_NEXT:
-			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_FIELD;
+		case JM_EXPECT_FILES_NEXT:
+		case JM_EXPECT_WAL_RANGES_NEXT:
+			parse->state = JM_EXPECT_TOPLEVEL_FIELD;
 			break;
 		default:
 			json_manifest_parse_failure(parse->context,
@@ -248,46 +286,82 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull)
 
 	switch (parse->state)
 	{
-		case JM_EXPECT_VERSION_FIELD:
-			/* Inside toplevel object, expecting version indicator. */
-			if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
-				json_manifest_parse_failure(parse->context,
-											"expected version indicator");
-			parse->state = JM_EXPECT_VERSION_VALUE;
-			break;
-		case JM_EXPECT_FILES_FIELD:
-			/* Inside toplevel object, expecting "Files" next. */
-			if (strcmp(fname, "Files") != 0)
-				json_manifest_parse_failure(parse->context,
-											"expected file list");
-			parse->state = JM_EXPECT_FILES_ARRAY_START;
+		case JM_EXPECT_TOPLEVEL_FIELD:
+			/*
+			 * Inside toplevel object. The version indicator should always
+			 * be the first field.
+			 */
+			if (!parse->saw_version_field)
+			{
+				if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
+					json_manifest_parse_failure(parse->context,
+												"expected version indicator");
+				parse->state = JM_EXPECT_VERSION_VALUE;
+				parse->saw_version_field = true;
+				break;
+			}
+
+			/* Is this the list of files? */
+			if (strcmp(fname, "Files") == 0)
+			{
+				parse->state = JM_EXPECT_FILES_START;
+				break;
+			}
+
+			/* Is this the list of WAL ranges? */
+			if (strcmp(fname, "WAL-Ranges") == 0)
+			{
+				parse->state = JM_EXPECT_WAL_RANGES_START;
+				break;
+			}
+
+			/* Is this the manifest checksum? */
+			if (strcmp(fname, "Manifest-Checksum") == 0)
+			{
+				parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
+				break;
+			}
+
+			/* It's not a field we recognize. */
+			fprintf(stderr, "fname = %s\n", fname);
+			json_manifest_parse_failure(parse->context,
+										"unknown toplevel field");
 			break;
+
 		case JM_EXPECT_THIS_FILE_FIELD:
 			/* Inside object for one file; which key have we got? */
 			if (strcmp(fname, "Path") == 0)
-				parse->field = JMFF_PATH;
+				parse->file_field = JMFF_PATH;
 			else if (strcmp(fname, "Encoded-Path") == 0)
-				parse->field = JMFF_ENCODED_PATH;
+				parse->file_field = JMFF_ENCODED_PATH;
 			else if (strcmp(fname, "Size") == 0)
-				parse->field = JMFF_SIZE;
+				parse->file_field = JMFF_SIZE;
 			else if (strcmp(fname, "Last-Modified") == 0)
-				parse->field = JMFF_LAST_MODIFIED;
+				parse->file_field = JMFF_LAST_MODIFIED;
 			else if (strcmp(fname, "Checksum-Algorithm") == 0)
-				parse->field = JMFF_CHECKSUM_ALGORITHM;
+				parse->file_field = JMFF_CHECKSUM_ALGORITHM;
 			else if (strcmp(fname, "Checksum") == 0)
-				parse->field = JMFF_CHECKSUM;
+				parse->file_field = JMFF_CHECKSUM;
 			else
 				json_manifest_parse_failure(parse->context,
 											"unexpected file field");
 			parse->state = JM_EXPECT_THIS_FILE_VALUE;
 			break;
-		case JM_EXPECT_MANIFEST_CHECKSUM_FIELD:
-			/* Inside toplevel object, expecting "Manifest-Checksum" next. */
-			if (strcmp(fname, "Manifest-Checksum") != 0)
+
+		case JM_EXPECT_THIS_WAL_RANGE_FIELD:
+			/* Inside object for one file; which key have we got? */
+			if (strcmp(fname, "Timeline") == 0)
+				parse->wal_range_field = JMWRF_TIMELINE;
+			else if (strcmp(fname, "Start-LSN") == 0)
+				parse->wal_range_field = JMWRF_START_LSN;
+			else if (strcmp(fname, "End-LSN") == 0)
+				parse->wal_range_field = JMWRF_END_LSN;
+			else
 				json_manifest_parse_failure(parse->context,
-											"expected manifest checksum");
-			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
+											"unexpected wal range field");
+			parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
 			break;
+
 		default:
 			json_manifest_parse_failure(parse->context,
 										"unexpected object field");
@@ -300,9 +374,9 @@ json_manifest_object_field_start(void *state, char *fname, bool isnull)
  *
  * Object field names don't reach this code; those are handled by
  * json_manifest_object_field_start. When we're inside of the object for
- * a particular file, that function will have noticed the name of the field,
- * and we'll get the corresponding value here. When we're in the toplevel
- * object, the parse state itself tells us which field this is.
+ * a particular file or WAL range, that function will have noticed the name
+ * of the field, and we'll get the corresponding value here. When we're in
+ * the toplevel object, the parse state itself tells us which field this is.
  *
  * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
  * can just check on the spot, the goal here is just to save the value in
@@ -321,10 +395,11 @@ json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
 			if (strcmp(token, "1") != 0)
 				json_manifest_parse_failure(parse->context,
 											"unexpected manifest version");
-			parse->state = JM_EXPECT_FILES_FIELD;
+			parse->state = JM_EXPECT_TOPLEVEL_FIELD;
 			break;
+
 		case JM_EXPECT_THIS_FILE_VALUE:
-			switch (parse->field)
+			switch (parse->file_field)
 			{
 				case JMFF_PATH:
 					parse->pathname = token;
@@ -347,10 +422,28 @@ json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
 			}
 			parse->state = JM_EXPECT_THIS_FILE_FIELD;
 			break;
+
+		case JM_EXPECT_THIS_WAL_RANGE_VALUE:
+			switch (parse->wal_range_field)
+			{
+				case JMWRF_TIMELINE:
+					parse->timeline = token;
+					break;
+				case JMWRF_START_LSN:
+					parse->start_lsn = token;
+					break;
+				case JMWRF_END_LSN:
+					parse->end_lsn = token;
+					break;
+			}
+			parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
+			break;
+
 		case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
 			parse->state = JM_EXPECT_TOPLEVEL_END;
 			parse->manifest_checksum = token;
 			break;
+
 		default:
 			json_manifest_parse_failure(parse->context, "unexpected scalar");
 			break;
@@ -459,6 +552,62 @@ json_manifest_finalize_file(JsonManifestParseState *parse)
 	}
 }
 
+/*
+ * Do additional parsing and sanity-checking of the details gathered for one
+ * WAL range, and invoke the per-WAL-range callback so that the caller gets
+ * those details. This happens for each WAL range when the corresponding JSON
+ * object is completely parsed.
+ */
+static void
+json_manifest_finalize_wal_range(JsonManifestParseState *parse)
+{
+	JsonManifestParseContext *context = parse->context;
+	TimeLineID	tli;
+	XLogRecPtr	start_lsn,
+				end_lsn;
+	char	   *ep;
+
+	/* Make sure all fields are present. */
+	if (parse->timeline == NULL)
+		json_manifest_parse_failure(parse->context, "missing timeline");
+	if (parse->start_lsn == NULL)
+		json_manifest_parse_failure(parse->context, "missing start LSN");
+	if (parse->end_lsn == NULL)
+		json_manifest_parse_failure(parse->context, "missing end LSN");
+
+	/* Parse timeline. */
+	tli = strtoul(parse->timeline, &ep, 10);
+	if (*ep)
+		json_manifest_parse_failure(parse->context,
+									"timeline is not an integer");
+	if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
+		json_manifest_parse_failure(parse->context,
+									"unable to parse start LSN");
+	if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
+		json_manifest_parse_failure(parse->context,
+									"unable to parse end LSN");
+
+	/* Invoke the callback with the details we've gathered. */
+	context->perwalrange_cb(context, tli, start_lsn, end_lsn);
+
+	/* Free memory we no longer need. */
+	if (parse->timeline != NULL)
+	{
+		pfree(parse->timeline);
+		parse->timeline = NULL;
+	}
+	if (parse->start_lsn != NULL)
+	{
+		pfree(parse->start_lsn);
+		parse->start_lsn = NULL;
+	}
+	if (parse->end_lsn != NULL)
+	{
+		pfree(parse->end_lsn);
+		parse->end_lsn = NULL;
+	}
+}
+
 /*
  * Verify that the manifest checksum is correct.
  *
@@ -574,3 +723,18 @@ hexdecode_string(uint8 *result, char *input, int nbytes)
 
 	return true;
 }
+
+/*
+ * Parse an XLogRecPtr expressed using the usual string format.
+ */
+static bool
+parse_xlogrecptr(XLogRecPtr *result, char *input)
+{
+	uint32	hi;
+	uint32 	lo;
+
+	if (sscanf(input, "%X/%X", &hi, &lo) != 2)
+		return false;
+	*result = ((uint64) hi) << 32 | lo;
+	return true;
+}
diff --git a/src/bin/pg_validatebackup/parse_manifest.h b/src/bin/pg_validatebackup/parse_manifest.h
index 25d140f72f..f0a4fac36b 100644
--- a/src/bin/pg_validatebackup/parse_manifest.h
+++ b/src/bin/pg_validatebackup/parse_manifest.h
@@ -14,6 +14,7 @@
 #ifndef PARSE_MANIFEST_H
 #define PARSE_MANIFEST_H
 
+#include "access/xlogdefs.h"
 #include "common/checksum_helper.h"
 #include "mb/pg_wchar.h"
 
@@ -24,6 +25,9 @@ typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *,
 								 char *pathname,
 								 size_t size, pg_checksum_type checksum_type,
 								 int checksum_length, uint8 *checksum_payload);
+typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *,
+								 TimeLineID tli,
+								 XLogRecPtr start_lsn, XLogRecPtr end_lsn);
 typedef void (*json_manifest_error_callback)(JsonManifestParseContext *,
 								 char *fmt, ...) pg_attribute_printf(2, 3);
 
@@ -31,6 +35,7 @@ struct JsonManifestParseContext
 {
 	void	   *private_data;
 	json_manifest_perfile_callback perfile_cb;
+	json_manifest_perwalrange_callback perwalrange_cb;
 	json_manifest_error_callback error_cb;
 };
 
diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c
index eb1473d9d0..2c9d06a3a1 100644
--- a/src/bin/pg_validatebackup/pg_validatebackup.c
+++ b/src/bin/pg_validatebackup/pg_validatebackup.c
@@ -43,8 +43,8 @@
 #define READ_CHUNK_SIZE				4096
 
 /*
- * Information about each file described by the manifest file is parsed to
- * produce an object like this.
+ * Each file described by the manifest file is parsed to produce an object
+ * like this.
  */
 typedef struct manifestfile
 {
@@ -75,6 +75,29 @@ static uint32 hash_string_pointer(char *s);
 #define SH_DEFINE
 #include "lib/simplehash.h"
 
+/*
+ * Each WAL range described by the manifest file is parsed to produce an
+ * object like this.
+ */
+typedef struct manifest_wal_range
+{
+	TimeLineID		tli;
+	XLogRecPtr		start_lsn;
+	XLogRecPtr		end_lsn;
+	struct manifest_wal_range *next;
+	struct manifest_wal_range *prev;
+} manifest_wal_range;
+
+/*
+ * Details we need in callbacks that occur while parsing a backup manifest.
+ */
+typedef struct parser_context
+{
+	manifestfiles_hash *ht;
+	manifest_wal_range *first_wal_range;
+	manifest_wal_range *last_wal_range;
+} parser_context;
+
 /*
  * All of the context information we need while checking a backup manifest.
  */
@@ -87,13 +110,18 @@ typedef struct validator_context
 	bool		saw_any_error;
 } validator_context;
 
-static manifestfiles_hash *parse_manifest_file(char *manifest_path);
+static void parse_manifest_file(char *manifest_path, manifestfiles_hash **ht_p,
+					manifest_wal_range **first_wal_range_p);
 
 static void record_manifest_details_for_file(JsonManifestParseContext *context,
 											 char *pathname, size_t size,
 											 pg_checksum_type checksum_type,
 											 int checksum_length,
 											 uint8 *checksum_payload);
+static void record_manifest_details_for_wal_range(JsonManifestParseContext *context,
+											 TimeLineID tli,
+											 XLogRecPtr start_lsn,
+											 XLogRecPtr end_lsn);
 static void report_manifest_error(JsonManifestParseContext *context,
 								  char *fmt, ...)
 			pg_attribute_printf(2, 3) pg_attribute_noreturn();
@@ -106,6 +134,10 @@ static void report_extra_backup_files(validator_context *context);
 static void validate_backup_checksums(validator_context *context);
 static void validate_file_checksum(validator_context *context,
 								   manifestfile *tabent, char *pathname);
+static void parse_required_wal(validator_context *context,
+							   char *pg_waldump_path,
+							   char *wal_directory,
+							   manifest_wal_range *first_wal_range);
 
 static void report_backup_error(validator_context *context,
 							   const char *pg_restrict fmt,...)
@@ -128,16 +160,23 @@ main(int argc, char **argv)
 		{"exit-on-error", no_argument, NULL, 'e'},
 		{"ignore", required_argument, NULL, 'i'},
 		{"manifest-path", required_argument, NULL, 'm'},
+		{"no-parse-wal", no_argument, NULL, 'n'},
+		{"print-parse-wal", no_argument, NULL, 'p'},
 		{"quiet", no_argument, NULL, 'q'},
 		{"skip-checksums", no_argument, NULL, 's'},
+		{"wal-directory", required_argument, NULL, 'w'},
 		{NULL, 0, NULL, 0}
 	};
 
 	int			c;
 	validator_context context;
+	manifest_wal_range *first_wal_range;
 	char	   *manifest_path = NULL;
+	bool		no_parse_wal = false;
 	bool		quiet = false;
 	bool		skip_checksums = false;
+	char	   *wal_directory = NULL;
+	char	   *pg_waldump_path = NULL;
 
 	pg_logging_init(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup"));
@@ -167,7 +206,7 @@ main(int argc, char **argv)
 	 *
 	 * Ignore the pg_wal directory, because those files are not included in
 	 * the backup manifest either, since they are fetched separately from the
-	 * backup itself.
+	 * backup itself, and validated via a separate mechanism.
 	 *
 	 * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
 	 * because we expect that those files may sometimes be created or changed
@@ -180,7 +219,7 @@ main(int argc, char **argv)
 	simple_string_list_append(&context.ignore_list, "recovery.signal");
 	simple_string_list_append(&context.ignore_list, "standby.signal");
 
-	while ((c = getopt_long(argc, argv, "ei:m:qs", long_options, NULL)) != -1)
+	while ((c = getopt_long(argc, argv, "ei:m:nqsw", long_options, NULL)) != -1)
 	{
 		switch (c)
 		{
@@ -199,12 +238,19 @@ main(int argc, char **argv)
 				manifest_path = pstrdup(optarg);
 				canonicalize_path(manifest_path);
 				break;
+			case 'n':
+				no_parse_wal = true;
+				break;
 			case 'q':
 				quiet = true;
 				break;
 			case 's':
 				skip_checksums = true;
 				break;
+			case 'w':
+				wal_directory = pstrdup(optarg);
+				canonicalize_path(wal_directory);
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						progname);
@@ -233,17 +279,49 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	/* Unless --no-parse-wal was specified, we will need pg_waldump. */
+	if (!no_parse_wal)
+	{
+		int		ret;
+
+		pg_waldump_path = pg_malloc(MAXPGPATH);
+		ret = find_other_exec(argv[0], "pg_waldump",
+							  "pg_waldump (PostgreSQL) " PG_VERSION "\n",
+							 pg_waldump_path);
+		if (ret < 0)
+		{
+			char	full_path[MAXPGPATH];
+
+			if (find_my_exec(argv[0], full_path) < 0)
+				strlcpy(full_path, progname, sizeof(full_path));
+			if (ret == -1)
+				pg_log_fatal("The program \"%s\" is needed by %s but was\n"
+							 "not found in the same directory as \"%s\".\n"
+							 "Check your installation.",
+							 "pg_waldump", "pg_validatebackup", full_path);
+			else
+				pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n"
+							 "not the same version as %s.\n"
+							 "Check your installation.",
+							 "pg_waldump", full_path, "pg_validatebackup");
+		}
+	}
+
 	/* By default, look for the manifest in the backup directory. */
 	if (manifest_path == NULL)
 		manifest_path = psprintf("%s/backup_manifest",
 								 context.backup_directory);
 
+	/* By default, look for the WAL in the backup directory, too. */
+	if (wal_directory == NULL)
+		wal_directory = psprintf("%s/pg_wal", context.backup_directory);
+
 	/*
 	 * Try to read the manifest. We treat any errors encountered while parsing
 	 * the manifest as fatal; there doesn't seem to be much point in trying to
 	 * validate the backup directory against a corrupted manifest.
 	 */
-	context.ht = parse_manifest_file(manifest_path);
+	parse_manifest_file(manifest_path, &context.ht, &first_wal_range);
 
 	/*
 	 * Now scan the files in the backup directory. At this stage, we verify
@@ -261,12 +339,20 @@ main(int argc, char **argv)
 	report_extra_backup_files(&context);
 
 	/*
-	 * Finally, do the expensive work of verifying file checksums, unless we
-	 * were told to skip it.
+	 * Now do the expensive work of verifying file checksums, unless we were
+	 * told to skip it.
 	 */
 	if (!skip_checksums)
 		validate_backup_checksums(&context);
 
+	/*
+	 * Try to parse the required ranges of WAL records, unless we were told
+	 * not to do so.
+	 */
+	if (!no_parse_wal)
+		parse_required_wal(&context, pg_waldump_path,
+						   wal_directory, first_wal_range);
+
 	/*
 	 * If everything looks OK, tell the user this, unless we were asked to
 	 * work quietly.
@@ -278,11 +364,13 @@ main(int argc, char **argv)
 }
 
 /*
- * Parse a manifest file and construct a hash table with information about
- * all the files it mentions.
+ * Parse a manifest file. Construct a hash table with information about
+ * all the files it mentions, and a linked list of all the WAL ranges it
+ * mentions.
  */
-static manifestfiles_hash *
-parse_manifest_file(char *manifest_path)
+static void
+parse_manifest_file(char *manifest_path, manifestfiles_hash **ht_p,
+					manifest_wal_range **first_wal_range_p)
 {
 	int			fd;
 	struct stat statbuf;
@@ -291,6 +379,7 @@ parse_manifest_file(char *manifest_path)
 	manifestfiles_hash *ht;
 	char	   *buffer;
 	int			rc;
+	parser_context	private_context;
 	JsonManifestParseContext	context;
 
 	/* Open the manifest file. */
@@ -329,17 +418,22 @@ parse_manifest_file(char *manifest_path)
 	/* Close the manifest file. */
 	close(fd);
 
-	/* Parse the manifest as JSON. */
-	context.private_data = ht;
+	/* Parse the manifest. */
+	private_context.ht = ht;
+	private_context.first_wal_range = NULL;
+	private_context.last_wal_range = NULL;
+	context.private_data = &private_context;
 	context.perfile_cb = record_manifest_details_for_file;
+	context.perwalrange_cb = record_manifest_details_for_wal_range;
 	context.error_cb = report_manifest_error;
 	json_parse_manifest(&context, buffer, statbuf.st_size);
 
 	/* Done with the buffer. */
 	pfree(buffer);
 
-	/* Return the hash table we constructed. */
-	return ht;
+	/* Return the file hash table and WAL range list we constructed. */
+	*ht_p = ht;
+	*first_wal_range_p = private_context.first_wal_range;
 }
 
 /*
@@ -369,7 +463,8 @@ record_manifest_details_for_file(JsonManifestParseContext *context,
 								 pg_checksum_type checksum_type,
 								 int checksum_length, uint8 *checksum_payload)
 {
-	manifestfiles_hash *ht = context->private_data;
+	parser_context *pcxt = context->private_data;
+	manifestfiles_hash *ht = pcxt->ht;
 	manifestfile *tabent;
 	bool		found;
 
@@ -388,6 +483,31 @@ record_manifest_details_for_file(JsonManifestParseContext *context,
 	tabent->bad = false;
 }
 
+/*
+ * Record details extracted from the backup manifest for one WAL range.
+ */
+static void
+record_manifest_details_for_wal_range(JsonManifestParseContext *context,
+									  TimeLineID tli,
+									  XLogRecPtr start_lsn, XLogRecPtr end_lsn)
+{
+	parser_context *pcxt = context->private_data;
+	manifest_wal_range *range;
+
+	/* Allocate and initialize a struct describing this WAL range. */
+	range = palloc(sizeof(manifest_wal_range));
+	range->tli = tli;
+	range->start_lsn = start_lsn;
+	range->end_lsn = end_lsn;
+	range->prev = pcxt->last_wal_range;
+	range->next = NULL;
+
+	/* Add it to the list. */
+	if (pcxt->first_wal_range == NULL)
+		pcxt->first_wal_range = range;
+	pcxt->last_wal_range = range;
+}
+
 /*
  * Validate one directory.
  *
@@ -641,6 +761,35 @@ validate_file_checksum(validator_context *context, manifestfile *tabent,
 						   relpath);
 }
 
+/*
+ * Attempt to parse the WAL files required to restore from backup using
+ * pg_waldump.
+ */
+static void
+parse_required_wal(validator_context *context, char *pg_waldump_path,
+				   char *wal_directory, manifest_wal_range *first_wal_range)
+{
+	manifest_wal_range *this_wal_range = first_wal_range;
+
+	while (this_wal_range != NULL)
+	{
+		char *pg_waldump_cmd;
+
+		pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n",
+			   pg_waldump_path, wal_directory, this_wal_range->tli,
+			   (uint32) (this_wal_range->start_lsn >> 32),
+			   (uint32) this_wal_range->start_lsn,
+			   (uint32) (this_wal_range->end_lsn >> 32),
+			   (uint32) this_wal_range->end_lsn);
+		if (system(pg_waldump_cmd) != 0)
+			report_backup_error(context,
+								"WAL parsing failed for timeline %u",
+								this_wal_range->tli);
+
+		this_wal_range = this_wal_range->next;
+	}
+}
+
 /*
  * Report a problem with the backup.
  *
diff --git a/src/bin/pg_validatebackup/t/005_bad_manifest.pl b/src/bin/pg_validatebackup/t/005_bad_manifest.pl
index 9c503600d2..23c2f8338c 100644
--- a/src/bin/pg_validatebackup/t/005_bad_manifest.pl
+++ b/src/bin/pg_validatebackup/t/005_bad_manifest.pl
@@ -7,7 +7,7 @@ use Cwd;
 use Config;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 44;
+use Test::More tests => 42;
 
 my $tempdir = TestLib::tempdir;
 
@@ -37,7 +37,7 @@ test_parse_error('unexpected scalar', <<EOM);
 {"PostgreSQL-Backup-Manifest-Version": 1, "Files": true}
 EOM
 
-test_parse_error('expected file list', <<EOM);
+test_parse_error('unknown toplevel field', <<EOM);
 {"PostgreSQL-Backup-Manifest-Version": 1, "Oops": 1}
 EOM
 
@@ -104,11 +104,6 @@ test_fatal_error('invalid checksum for file', <<EOM);
 ]}
 EOM
 
-test_parse_error('expected manifest checksum', <<EOM);
-{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [],
- "Oops": 1}
-EOM
-
 test_parse_error('expected at least 2 lines', <<EOM);
 {"PostgreSQL-Backup-Manifest-Version": 1, "Files": [], "Manifest-Checksum": null}
 EOM
-- 
2.17.2 (Apple Git-113)