v9-0005-Adjust-pg_validate-to-validate-a-JSON-format-mani.patch

application/octet-stream

Filename: v9-0005-Adjust-pg_validate-to-validate-a-JSON-format-mani.patch
Type: application/octet-stream
Part: 4
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v9-0005
Subject: Adjust pg_validate to validate a JSON-format manifest.
File+
src/bin/pg_validatebackup/pg_validatebackup.c 358 340
From fc6ad5b9812bd417ab3d3aee534219a75e64bb6a Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Thu, 5 Mar 2020 11:27:14 -0500
Subject: [PATCH v9 5/5] Adjust pg_validate to validate a JSON-format manifest.

This is still somewhat rough around the edges.
---
 src/bin/pg_validatebackup/pg_validatebackup.c | 698 +++++++++---------
 1 file changed, 358 insertions(+), 340 deletions(-)

diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c
index d4d041ef7d..1af83204b1 100644
--- a/src/bin/pg_validatebackup/pg_validatebackup.c
+++ b/src/bin/pg_validatebackup/pg_validatebackup.c
@@ -19,9 +19,11 @@
 
 #include "common/checksum_helper.h"
 #include "common/hashfn.h"
+#include "common/jsonapi.h"
 #include "common/logging.h"
 #include "fe_utils/simple_list.h"
 #include "getopt_long.h"
+#include "mb/pg_wchar.h"
 
 /*
  * For efficiency, we'd like our hash table containing information about the
@@ -60,8 +62,8 @@
 #define FIELDS_PER_FILE_LINE		4
 
 /*
- * Each "File" line in the manifest file is parsed to produce an object
- * like this.
+ * Information about each file described by the manifest file is parsed to
+ * produce an object like this.
  */
 typedef struct manifestfile
 {
@@ -92,6 +94,50 @@ static uint32 hash_string_pointer(char *s);
 #define SH_DEFINE
 #include "lib/simplehash.h"
 
+/*
+ * Semantic states for JSON manifest parsing.
+ */
+typedef enum
+{
+	JM_EXPECT_TOPLEVEL_START,
+	JM_EXPECT_TOPLEVEL_END,
+	JM_EXPECT_VERSION_FIELD,
+	JM_EXPECT_VERSION_VALUE,
+	JM_EXPECT_FILES_FIELD,
+	JM_EXPECT_FILES_ARRAY_START,
+	JM_EXPECT_FILES_ARRAY_NEXT,
+	JM_EXPECT_THIS_FILE_FIELD,
+	JM_EXPECT_THIS_FILE_VALUE,
+	JM_EXPECT_MANIFEST_CHECKSUM_FIELD,
+	JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
+	JM_EXPECT_EOF
+} JsonManifestSemanticState;
+
+/*
+ * Possible fields for one file as described by the manifest.
+ */
+typedef enum
+{
+	JMFF_PATH,
+	JMFF_SIZE,
+	JMFF_LAST_MODIFIED,
+	JMFF_CHECKSUM_ALGORITHM,
+	JMFF_CHECKSUM
+} JsonManifestFileField;
+
+typedef struct
+{
+	JsonManifestSemanticState state;
+	JsonManifestFileField field;
+	manifestfiles_hash *ht;
+	char	   *pathname;
+	char	   *size;
+	char	   *algorithm;
+	pg_checksum_type checksum_algorithm;
+	char	   *checksum;
+	char	   *manifest_checksum;
+} JsonManifestParseState;
+
 /*
  * All of the context information we need while checking a backup manifest.
  */
@@ -105,8 +151,15 @@ typedef struct validator_context
 } validator_context;
 
 static manifestfiles_hash * parse_manifest_file(char *manifest_path);
-static void parse_file_line_from_manifest(manifestfile *f, char *rest,
-										  int restlen);
+static void json_manifest_object_start(void *state);
+static void json_manifest_object_end(void *state);
+static void json_manifest_array_start(void *state);
+static void json_manifest_array_end(void *state);
+static void json_manifest_object_field_start(void *state, char *fname,
+											 bool isnull);
+static void json_manifest_scalar(void *state, char *token,
+								 JsonTokenType tokentype);
+
 static void validate_backup_directory(validator_context *context,
 									  char *relpath, char *fullpath);
 static void validate_backup_file(validator_context *context,
@@ -123,9 +176,6 @@ static void pg_validator_fatal(const char *pg_restrict fmt,...)
 			pg_attribute_printf(1, 2) pg_attribute_noreturn();
 static bool should_ignore_relpath(validator_context *context, char *relpath);
 
-static char *extractstr(char *buffer, int length);
-static int	findchar(char *buffer, int size, char c, int start_position);
-static int	findfield(char *buffer, char *end, char **result);
 static int	hexdecode_char(char c);
 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
 static void usage(void);
@@ -301,19 +351,20 @@ parse_manifest_file(char *manifest_path)
 	int			fd;
 	struct stat statbuf;
 	off_t		estimate;
-	off_t		bytes_read = 0;
-	off_t		bytes_consumed = 0;
 	uint32		initial_size;
 	manifestfiles_hash *ht;
 	char	   *buffer;
+	int			rc;
+	size_t		number_of_newlines = 0;
+	size_t		ultimate_newline = 0;
+	size_t		penultimate_newline = 0;
+	pg_sha256_ctx manifest_ctx;
 	uint8		manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
 	uint8		manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
-	int			buffer_position = 0;
-	int			buffer_size = 0;
-	int			buffer_maxsize = 2 * READ_CHUNK_SIZE;
-	int			line_number = 0;
-	bool		saw_manifest_checksum_line = false;
-	pg_sha256_ctx manifest_ctx;
+	JsonLexContext *lex;
+	JsonParseErrorType json_error;
+	JsonSemAction sem;
+	JsonManifestParseState parse;
 
 	/* Prepare to compute a checksum of the manifest itself. */
 	pg_sha256_init(&manifest_ctx);
@@ -333,252 +384,335 @@ parse_manifest_file(char *manifest_path)
 	/* Create the hash table. */
 	ht = manifestfiles_create(initial_size, NULL);
 
-	/* Initialize our read buffer. */
-	buffer = pg_malloc(buffer_maxsize);
-
 	/*
-	 * Loop until we've read it all.
+	 * Slurp in the whole file.
 	 *
-	 * The file size shouldn't be changing, so it seems fine to just error out
-	 * if the final length is different from what stat() told us.
+	 * This is not ideal, but there's currently no easy way to get
+	 * pg_parse_json() to perform incremental parsing.
 	 */
-	while (bytes_consumed < statbuf.st_size)
+	buffer = pg_malloc(statbuf.st_size);
+	rc = read(fd, buffer, statbuf.st_size);
+	if (rc != statbuf.st_size)
 	{
-		int			line_length;
-		int			first_field_length;
-		char	   *rest;
-		int			restlen;
-
-		/* Find next newline if any. */
-		line_length = findchar(buffer, buffer_size, '\n', buffer_position);
+		if (rc < 0)
+			pg_validator_fatal("could not read file \"%s\": %m",
+							   manifest_path);
+		else
+			pg_validator_fatal("could not read file \"%s\": read %d of %zu",
+							   manifest_path, rc, (size_t) statbuf.st_size);
+	}
 
-		/* If no newline was found, we need to read more data and try again. */
-		if (line_length == -1)
+	/* Find the last two newlines in the file. */
+	for (size_t i = 0; i < statbuf.st_size; ++i)
+	{
+		if (buffer[i] == '\n')
 		{
-			size_t		bytes_to_read;
-			int			rc;
-
-			bytes_to_read = Min(statbuf.st_size - bytes_read, READ_CHUNK_SIZE);
-			if (bytes_to_read == 0)
-				pg_validator_fatal("manifest file line not terminated by newline");
-			if (bytes_to_read + READ_CHUNK_SIZE > buffer_maxsize)
-			{
-				buffer_maxsize += READ_CHUNK_SIZE;
-				buffer = pg_realloc(buffer, buffer_maxsize);
-				Assert(bytes_to_read + READ_CHUNK_SIZE <= buffer_maxsize);
-			}
-			rc = read(fd, buffer + buffer_size, bytes_to_read);
-			if (rc != bytes_to_read)
-			{
-				if (rc < 0)
-					pg_validator_fatal("could not read file \"%s\": %m",
-									   manifest_path);
-				else
-					pg_validator_fatal("could not read file \"%s\": read %d of %zu",
-									   manifest_path, rc, bytes_to_read);
-			}
-			buffer_size += rc;
-			bytes_read += rc;
-			continue;
+			++number_of_newlines;
+			penultimate_newline = ultimate_newline;
+			ultimate_newline = i;
 		}
+	}
 
-		/* Increment line number. */
-		++line_number;
+	/*
+	 * Make sure that the last newline is right at the end, and that there
+	 * are at least two lines total.
+	 */
+	if (number_of_newlines < 2)
+		pg_validator_fatal("could not parse backup manifest: %s",
+						   "expected at least 2 lines");
+	if (ultimate_newline != statbuf.st_size - 1)
+		pg_validator_fatal("could not parse backup manifest: %s",
+						   "last line not newline-terminated");
 
-		/* The manifest checksum should be the last thing in the file. */
-		if (saw_manifest_checksum_line)
-			pg_validator_fatal("unexpected data follows manifest checksum");
+	/*
+	 * The manifest checksum covers everything in the file except for the
+	 * very last line.
+	 */
+	pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1);
+	pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
 
-		/* Find first field on line, and remaining line contents. */
-		first_field_length =
-			findchar(buffer, buffer_size, '\t', buffer_position);
-		rest = buffer + buffer_position + first_field_length + 1;
-		restlen = line_length - (first_field_length + 1);
+	/* Create a JSON lexing context. */
+	lex = makeJsonLexContextCstringLen(buffer, statbuf.st_size, PG_UTF8, true);
+
+	/* Set up semantic actions. */
+	parse.state = JM_EXPECT_TOPLEVEL_START;
+	parse.ht = ht;
+	sem.semstate = &parse;
+	sem.object_start = json_manifest_object_start;
+	sem.object_end = json_manifest_object_end;
+	sem.array_start = json_manifest_array_start;
+	sem.array_end = json_manifest_array_end;
+	sem.object_field_start = json_manifest_object_field_start;
+	sem.object_field_end = NULL;
+	sem.array_element_start = NULL;
+	sem.array_element_end = NULL;
+	sem.scalar = json_manifest_scalar;
+
+	/* Parse JSON. */
+	json_error = pg_parse_json(lex, &sem);
+	if (json_error != JSON_SUCCESS)
+		pg_validator_fatal("could not parse backup manifest: %s",
+						   json_errdetail(json_error, lex));
+	if (parse.state != JM_EXPECT_EOF)
+		pg_validator_fatal("could not parse backup manifest: %s",
+						   "manifest ended unexpectedly");
+
+	/* Verify manifest checksum. */
+	if (parse.manifest_checksum == NULL)
+		pg_validator_fatal("backup manifest checksum is missing");
+	if (strlen(parse.manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
+		!hexdecode_string(manifest_checksum_expected, parse.manifest_checksum,
+						  PG_SHA256_DIGEST_LENGTH))
+		pg_validator_fatal("invalid manifest checksum: \"%s\"",
+						   parse.manifest_checksum);
+	if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
+			   PG_SHA256_DIGEST_LENGTH) != 0)
+		pg_validator_fatal("manifest checksum mismatch");
 
-		/*
-		 * Check the first word of the line to see what kind of line it is.
-		 */
-		if (first_field_length == KWL_MANIFEST_VERSION &&
-			memcmp(buffer + buffer_position, KW_MANIFEST_VERSION,
-				   KWL_MANIFEST_VERSION) == 0)
-		{
-			if (line_number != 1)
-				pg_validator_fatal("manifest file version should only be specified at line 1");
-			else
-			{
-				char	   *line = buffer + buffer_position;
-				char	   *version;
-
-				version = extractstr(line + first_field_length + 1,
-									 line_length - (first_field_length + 1));
-				if (strcmp(version, "1") != 0)
-					pg_validator_fatal("unrecognized manifest version: \"%s\"",
-									   version);
-			}
-		}
-		else if (first_field_length == KWL_MANIFEST_FILE &&
-				 memcmp(buffer + buffer_position, KW_MANIFEST_FILE,
-						KWL_MANIFEST_FILE) == 0)
-		{
-			manifestfile f;
-			manifestfile *tabent;
-			bool		found;
+	/* OK, we're done with the manifest file. */
+	close(fd);
 
-			/* Parse this line. */
-			parse_file_line_from_manifest(&f, rest, restlen);
+	/* Return the hash table we constructed. */
+	return ht;
+}
 
-			/* Make a new entry in the hash table for it. */
-			tabent = manifestfiles_insert(ht, f.pathname, &found);
+static void
+json_manifest_parse_failure(char *msg)
+{
+	pg_validator_fatal("could not parse backup manifest: %s", msg);
+}
+
+static void
+json_manifest_object_start(void *state)
+{
+	JsonManifestParseState *parse = state;
+
+	switch (parse->state)
+	{
+		case JM_EXPECT_TOPLEVEL_START:
+			parse->state = JM_EXPECT_VERSION_FIELD;
+			break;
+		case JM_EXPECT_FILES_ARRAY_NEXT:
+			parse->state = JM_EXPECT_THIS_FILE_FIELD;
+			parse->pathname = NULL;
+			parse->algorithm = NULL;
+			parse->checksum = NULL;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object start");
+			break;
+	}
+}
+
+static void
+json_manifest_object_end(void *state)
+{
+	JsonManifestParseState *parse = state;
+	manifestfile *tabent;
+	bool		found;
+	int			checksum_string_length;
+	char	   *ep;
+
+	switch (parse->state)
+	{
+		case JM_EXPECT_TOPLEVEL_END:
+			parse->state = JM_EXPECT_EOF;
+			break;
+		case JM_EXPECT_THIS_FILE_FIELD:
+			/* Pathname and size are required. */
+			if (parse->pathname == NULL)
+				json_manifest_parse_failure("missing pathname");
+			if (parse->size == NULL)
+				json_manifest_parse_failure("missing size");
+			if (parse->algorithm == NULL && parse->checksum != NULL)
+				json_manifest_parse_failure("checksum without algorithm");
+
+			/* Make a new entry in the hash table for this file. */
+			tabent = manifestfiles_insert(parse->ht, parse->pathname, &found);
 			if (found)
 				pg_validator_fatal("duplicate pathname in backup manifest: \"%s\"",
-								   f.pathname);
+								   parse->pathname);
 
-			/* Copy in all the relevant details. */
-			tabent->size = f.size;
-			tabent->checksum_type = f.checksum_type;
-			tabent->checksum_length = f.checksum_length;
-			tabent->checksum_payload = f.checksum_payload;
+			/* Initialize some fields. */
 			tabent->matched = false;
 			tabent->bad = false;
-		}
-		else if (first_field_length == KWL_MANIFEST_CHECKSUM &&
-				 memcmp(buffer + buffer_position, KW_MANIFEST_CHECKSUM,
-						KWL_MANIFEST_CHECKSUM) == 0)
-		{
-			saw_manifest_checksum_line = true;
-			if (restlen != PG_SHA256_DIGEST_STRING_LENGTH - 1)
-				pg_validator_fatal("manifest file checksum has unexpected length: %d",
-								   restlen);
-			if (!hexdecode_string(manifest_checksum_expected, rest,
-								  PG_SHA256_DIGEST_LENGTH))
-				pg_validator_fatal("invalid manifest checksum: \"%s\"",
-								   extractstr(rest, restlen));
-		}
-		else if (first_field_length == -1)
-			pg_validator_fatal("manifest file keyword not terminated by tab");
-		else
-		{
-			char	   *kw;
 
-			kw = extractstr(buffer + buffer_position, first_field_length);
-			pg_validator_fatal("unrecognized manifest file keyword: \"%s\"", kw);
-		}
-
-		/* Update manifest checksum, if needed. */
-		if (!saw_manifest_checksum_line)
-			pg_sha256_update(&manifest_ctx, (uint8 *) buffer + buffer_position,
-							 line_length + 1);
-
-		/* Advance buffer position over the data we just read. */
-		buffer_position += line_length + 1;
-
-		/* Also mark these bytes as consumed so we know when to stop. */
-		bytes_consumed += line_length + 1;
+			/* Parse size. */
+			tabent->size = strtoul(parse->size, &ep, 10);
+			if (*ep)
+				json_manifest_parse_failure("file size is not an integer");
+
+			/* Parse the checksum algorithm, if it's present. */
+			if (parse->algorithm == NULL)
+				tabent->checksum_type = CHECKSUM_TYPE_NONE;
+			else if (!pg_checksum_parse_type(parse->algorithm,
+											 &tabent->checksum_type))
+				pg_validator_fatal("unrecognized checksum algorithm: \"%s\"",
+								   parse->algorithm);
+
+			/* Parse the checksum payload, if it's present. */
+			checksum_string_length = parse->checksum == NULL ? 0
+				: strlen(parse->checksum);
+			if (checksum_string_length == 0)
+			{
+				tabent->checksum_length = 0;
+				tabent->checksum_payload = NULL;
+			}
+			else
+			{
+				tabent->checksum_length = checksum_string_length / 2;
+				tabent->checksum_payload = palloc(tabent->checksum_length);
+				if (checksum_string_length % 2 != 0 ||
+					!hexdecode_string(tabent->checksum_payload,
+									  parse->checksum,
+									  tabent->checksum_length))
+					pg_validator_fatal("invalid checksum for file \"%s\": \"%s\"",
+									   parse->pathname,
+									   tabent->checksum_payload);
+			}
 
-		/*
-		 * We don't want to incur the expensive of using memmove() to discard
-		 * data after every line, because the lines are short compared to the
-		 * chunk size -- but we must do it at least now and then, or we'll
-		 * have to keep growing the buffer.
-		 */
-		if (buffer_position >= READ_CHUNK_SIZE)
-		{
-			int			leftover_bytes = buffer_size - buffer_position;
+			/* Free memory we no longer need. */
+			if (parse->size != NULL)
+			{
+				pfree(parse->size);
+				parse->size = NULL;
+			}
+			if (parse->algorithm != NULL)
+			{
+				pfree(parse->algorithm);
+				parse->algorithm = NULL;
+			}
+			if (parse->checksum != NULL)
+			{
+				pfree(parse->checksum);
+				parse->checksum = NULL;
+			}
 
-			if (leftover_bytes > 0)
-				memmove(buffer, buffer + buffer_position, leftover_bytes);
-			buffer_size -= buffer_position;
-			buffer_position = 0;
-		}
+			/* Expect next file (or end of list). */
+			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object end");
+			break;
 	}
+}
 
-	/* Checksum verification. */
-	if (!saw_manifest_checksum_line)
-		pg_validator_fatal("manifest has no checksum");
-	pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
-	if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
-			   PG_SHA256_DIGEST_LENGTH) != 0)
-		pg_validator_fatal("manifest checksum does not match");
-
-	/* OK, we're done with the manifest file. */
-	close(fd);
+static void
+json_manifest_array_start(void *state)
+{
+	JsonManifestParseState *parse = state;
 
-	/* Return the hash table we constructed. */
-	return ht;
+	switch (parse->state)
+	{
+		case JM_EXPECT_FILES_ARRAY_START:
+			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected array start");
+			break;
+	}
 }
 
-/*
- * The caller passes the remainder of the line, excluding the initial "File\t"
- * portion.
- */
 static void
-parse_file_line_from_manifest(manifestfile *f, char *rest, int restlen)
+json_manifest_array_end(void *state)
 {
-	char	   *end = rest + restlen;
-	char	   *field[FIELDS_PER_FILE_LINE];
-	unsigned long filesize;
-	char	   *ep;
-	pg_checksum_type checksum_type;
-	int			raw_checksum_length = 0;
-	char	   *raw_checksum_payload = NULL;
-	int			checksum_length;
-	uint8	   *checksum_payload;
-	int			i;
-	char	   *s;
+	JsonManifestParseState *parse = state;
 
-	/* Split the line into fields. */
-	for (i = 0; i < FIELDS_PER_FILE_LINE; ++i)
+	switch (parse->state)
 	{
-		int			toklen;
-
-		toklen = findfield(rest, end, &field[i]);
-		if (rest + toklen >= end && i + 1 < FIELDS_PER_FILE_LINE)
-			pg_validator_fatal("manifest file line has too few fields");
-		rest += toklen + 1;
+		case JM_EXPECT_FILES_ARRAY_NEXT:
+			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_FIELD;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected array end");
+			break;
 	}
+}
 
-	/* We expect to have used the entire line. */
-	if (rest < end)
-		pg_validator_fatal("manifest file line has too many fields");
-
-	/* Parse the size. */
-	filesize = strtoul(field[1], &ep, 10);
-	if (*ep)
-		pg_validator_fatal("manifest file size for file \"%s\" is not a number",
-						   field[0]);
+static void
+json_manifest_object_field_start(void *state, char *fname, bool isnull)
+{
+	JsonManifestParseState *parse = state;
 
-	/* Parse the checksum type. */
-	for (s = field[3]; s[0] != '\0' && s[0] != ':'; ++s)
-		;
-	if (*s)
+	switch (parse->state)
 	{
-		raw_checksum_payload = s + 1;
-		raw_checksum_length = strlen(raw_checksum_payload);
-		*s = '\0';
+		case JM_EXPECT_VERSION_FIELD:
+			if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
+				json_manifest_parse_failure("expected version indicator");
+			parse->state = JM_EXPECT_VERSION_VALUE;
+			break;
+		case JM_EXPECT_FILES_FIELD:
+			if (strcmp(fname, "Files") != 0)
+				json_manifest_parse_failure("expected file list");
+			parse->state = JM_EXPECT_FILES_ARRAY_START;
+			break;
+		case JM_EXPECT_THIS_FILE_FIELD:
+			if (strcmp(fname, "Path") == 0)
+				parse->field = JMFF_PATH;
+			else if (strcmp(fname, "Size") == 0)
+				parse->field = JMFF_SIZE;
+			else if (strcmp(fname, "Last-Modified") == 0)
+				parse->field = JMFF_LAST_MODIFIED;
+			else if (strcmp(fname, "Checksum-Algorithm") == 0)
+				parse->field = JMFF_CHECKSUM_ALGORITHM;
+			else if (strcmp(fname, "Checksum") == 0)
+				parse->field = JMFF_CHECKSUM;
+			else
+				json_manifest_parse_failure("unexpected file field");
+			parse->state = JM_EXPECT_THIS_FILE_VALUE;
+			break;
+		case JM_EXPECT_MANIFEST_CHECKSUM_FIELD:
+			if (strcmp(fname, "Manifest-Checksum") != 0)
+				json_manifest_parse_failure("expected manifest checksum");
+			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object field");
+			break;
 	}
-	if (!pg_checksum_parse_type(field[3], &checksum_type))
-		pg_validator_fatal("unrecognized checksum algorithm for file \"%s\": \"%s\"",
-						   field[0], field[3]);
-
-	/* Decode the checksum payload. */
-	checksum_length = raw_checksum_length / 2;
-	if (checksum_length == 0)
-		checksum_payload = NULL;
-	else
+}
+
+static void
+json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	JsonManifestParseState *parse = state;
+
+	switch (parse->state)
 	{
-		checksum_payload = palloc(checksum_length);
-		if (!hexdecode_string(checksum_payload, raw_checksum_payload,
-							  checksum_length))
-			pg_validator_fatal("invalid checksum for file \"%s\": \"%s\"",
-							   field[0], raw_checksum_payload);
+		case JM_EXPECT_VERSION_VALUE:
+			if (strcmp(token, "1") != 0)
+				json_manifest_parse_failure("unexpected manifest version");
+			parse->state = JM_EXPECT_FILES_FIELD;
+			break;
+		case JM_EXPECT_THIS_FILE_VALUE:
+			switch (parse->field)
+			{
+				case JMFF_PATH:
+					parse->pathname = token;
+					break;
+				case JMFF_SIZE:
+					parse->size = token;
+					break;
+				case JMFF_LAST_MODIFIED:
+					pfree(token);	/* unused */
+					break;
+				case JMFF_CHECKSUM_ALGORITHM:
+					parse->algorithm = token;
+					break;
+				case JMFF_CHECKSUM:
+					parse->checksum = token;
+					break;
+			}
+			parse->state = JM_EXPECT_THIS_FILE_FIELD;
+			break;
+		case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
+			parse->state = JM_EXPECT_TOPLEVEL_END;
+			parse->manifest_checksum = token;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected scalar");
+			break;
 	}
-
-	/* Fill the output struct. */
-	f->pathname = field[0];
-	f->size = filesize;
-	f->checksum_type = checksum_type;
-	f->checksum_length = checksum_length;
-	f->checksum_payload = checksum_payload;
 }
 
 /*
@@ -912,122 +1046,6 @@ should_ignore_relpath(validator_context *context, char *relpath)
 	return false;
 }
 
-/*
- * Extract a NUL-terminated string from a larger buffer.
- */
-static char *
-extractstr(char *buffer, int length)
-{
-	char	   *s = palloc(length + 1);
-
-	memcpy(s, buffer, length);
-	s[length] = '\0';
-
-	return s;
-}
-
-/*
- * Find the next instance of a given character within a buffer that
- * occurs at or after start_position. If there is none, returns -1; else
- * returns the difference between the position at which the character was
- * found and the start position.
- */
-static int
-findchar(char *buffer, int size, char c, int start_position)
-{
-	int			i;
-
-	for (i = start_position; i < size; ++i)
-		if (buffer[i] == c)
-			return i - start_position;
-	return -1;
-}
-
-/*
- * Extract the next field from a line of text read from the manifest file.
- */
-static int
-findfield(char *buffer, char *end, char **result)
-{
-	int			qoffset = 1;
-	int			dqcount = 0;
-	int			toklen;
-	int			bufpos;
-	int			resultpos;
-
-	/*
-	 * If this field is unquoted, we just stop at the next tab; if there's
-	 * none, we stop at the end of the line. Note that if buffer == end, it
-	 * just means that the last field on the line is empty.
-	 */
-	if (buffer == end || *buffer != '"')
-	{
-		toklen = findchar(buffer, end - buffer, '\t', 0);
-
-		if (toklen == -1)
-			toklen = end - buffer;
-		*result = extractstr(buffer, toklen);
-		return toklen;
-	}
-
-	/*
-	 * Our escaping convention is that if the field contains a tab, it must be
-	 * surrounded by double-quotes and any internal double-quotes must be
-	 * doubled.
-	 */
-	while (1)
-	{
-		/* Where's the next double quote? */
-		qoffset += findchar(buffer, end - buffer, '"', qoffset);
-		if (qoffset == -1)
-			pg_validator_fatal("quoted field in backup manifest is not terminated");
-
-		/*
-		 * If the double-quote we found is the last character on the line or
-		 * if it's followed by a tab, we've reached the end of this field.
-		 */
-		if (buffer + qoffset >= end || buffer[qoffset + 1] == '\t')
-			break;
-
-		/* Otherwise, the next character should be another double-quote. */
-		if (buffer[qoffset + 1] != '"')
-		{
-			pg_log_fatal("invalid quoted field in backup manifest");
-			exit(1);
-		}
-
-		/* Skip both double-quotes and go around again. */
-		qoffset += 2;
-		++dqcount;
-	}
-
-	/*
-	 * At this point, we know that qoffset is the offset, relative to buffer,
-	 * of the closing double-quote, and that dqcount is the number of escaped
-	 * double-quotes within the field, and that all of those escape sequences
-	 * are proper. Extract and de-escape the data in the field.
-	 *
-	 * The amount of space needed for the result is equal to the raw token
-	 * length, minus two for the double quotes at the start and end, minus one
-	 * for each doubled double-quote within the token, plus one for the
-	 * trailing zero byte.
-	 */
-	toklen = qoffset + 1;
-	*result = palloc(toklen - dqcount - 1);
-	bufpos = 1;
-	resultpos = 0;
-	while (bufpos < qoffset)
-	{
-		(*result)[resultpos] = buffer[bufpos];
-		bufpos += (buffer[bufpos] == '"' ? 2 : 1);
-		++resultpos;
-	}
-	(*result)[resultpos] = '\0';
-	Assert(resultpos == toklen - dqcount - 2);
-
-	return toklen;
-}
-
 /*
  * Helper function for manifestfiles hash table.
  */
-- 
2.17.2 (Apple Git-113)