v8-0005-WIP-Validate-JSON-format-manifest.patch

application/octet-stream

Filename: v8-0005-WIP-Validate-JSON-format-manifest.patch
Type: application/octet-stream
Part: 3
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v8-0005
Subject: WIP: Validate JSON-format manifest.
File+
src/bin/pg_validatebackup/pg_validatebackup.c 315 375
From 527eaf616026b8132937c543e961204b7051145c Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Thu, 27 Feb 2020 21:05:02 +0530
Subject: [PATCH v8 5/5] WIP: Validate JSON-format manifest.

---
 src/bin/pg_validatebackup/pg_validatebackup.c | 690 ++++++++----------
 1 file changed, 315 insertions(+), 375 deletions(-)

diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c
index 4f47b20855..1b0a449470 100644
--- a/src/bin/pg_validatebackup/pg_validatebackup.c
+++ b/src/bin/pg_validatebackup/pg_validatebackup.c
@@ -19,9 +19,11 @@
 
 #include "common/checksum_helper.h"
 #include "common/hashfn.h"
+#include "common/jsonapi.h"
 #include "common/logging.h"
 #include "fe_utils/simple_list.h"
 #include "getopt_long.h"
+#include "mb/pg_wchar.h"
 
 /*
  * For efficiency, we'd like our hash table containing information about the
@@ -60,8 +62,8 @@
 #define FIELDS_PER_FILE_LINE		4
 
 /*
- * Each "File" line in the manifest file is parsed to produce an object
- * like this.
+ * Information about each file described by the manifest file is parsed to
+ * produce an object like this.
  */
 typedef struct manifestfile
 {
@@ -92,6 +94,49 @@ static uint32 hash_string_pointer(char *s);
 #define SH_DEFINE
 #include "lib/simplehash.h"
 
+/*
+ * Semantic states for JSON manifest parsing.
+ */
+typedef enum
+{
+	JM_EXPECT_TOPLEVEL_START,
+	JM_EXPECT_TOPLEVEL_END,
+	JM_EXPECT_VERSION_FIELD,
+	JM_EXPECT_VERSION_VALUE,
+	JM_EXPECT_FILES_FIELD,
+	JM_EXPECT_FILES_ARRAY_START,
+	JM_EXPECT_FILES_ARRAY_NEXT,
+	JM_EXPECT_THIS_FILE_FIELD,
+	JM_EXPECT_THIS_FILE_VALUE,
+	JM_EXPECT_MANIFEST_CHECKSUM_FIELD,
+	JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
+	JM_EXPECT_EOF
+} JsonManifestSemanticState;
+
+/*
+ * Possible fields for one file as described by the manifest.
+ */
+typedef enum
+{
+	JMFF_PATH,
+	JMFF_SIZE,
+	JMFF_LAST_MODIFIED,
+	JMFF_CHECKSUM_ALGORITHM,
+	JMFF_CHECKSUM
+} JsonManifestFileField;
+
+typedef struct
+{
+	JsonManifestSemanticState	state;
+	JsonManifestFileField	field;
+	manifestfiles_hash *ht;
+	char *pathname;
+	char *size;
+	char *algorithm;
+	pg_checksum_type	checksum_algorithm;
+	char *checksum;
+} JsonManifestParseState;
+
 /*
  * All of the context information we need while checking a backup manifest.
  */
@@ -105,8 +150,15 @@ typedef struct validator_context
 } validator_context;
 
 static manifestfiles_hash * parse_manifest_file(char *manifest_path);
-static void parse_file_line_from_manifest(manifestfile *f, char *rest,
-										  int restlen);
+static void json_manifest_object_start(void *state);
+static void json_manifest_object_end(void *state);
+static void json_manifest_array_start(void *state);
+static void json_manifest_array_end(void *state);
+static void json_manifest_object_field_start(void *state, char *fname,
+											 bool isnull);
+static void json_manifest_scalar(void *state, char *token,
+								 JsonTokenType tokentype);
+
 static void validate_backup_directory(validator_context *context,
 									  char *relpath, char *fullpath);
 static void validate_backup_file(validator_context *context,
@@ -121,9 +173,6 @@ static void pg_validator_error(validator_context *context,
 			pg_attribute_printf(2, 3);
 static bool should_ignore_relpath(validator_context *context, char *relpath);
 
-static char *extractstr(char *buffer, int length);
-static int	findchar(char *buffer, int size, char c, int start_position);
-static int	findfield(char *buffer, char *end, char **result);
 static int	hexdecode_char(char c);
 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
 static void usage(void);
@@ -275,19 +324,15 @@ parse_manifest_file(char *manifest_path)
 	int			fd;
 	struct stat statbuf;
 	off_t		estimate;
-	off_t		bytes_read = 0;
-	off_t		bytes_consumed = 0;
 	uint32		initial_size;
 	manifestfiles_hash *ht;
 	char	   *buffer;
-	uint8		manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
-	uint8		manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
-	int			buffer_position = 0;
-	int			buffer_size = 0;
-	int			buffer_maxsize = 2 * READ_CHUNK_SIZE;
-	int			line_number = 0;
-	bool		saw_manifest_checksum_line = false;
+	int			rc;
 	pg_sha256_ctx manifest_ctx;
+	JsonLexContext *lex;
+	JsonParseErrorType json_error;
+	JsonSemAction	sem;
+	JsonManifestParseState	parse;
 
 	/* Prepare to compute a checksum of the manifest itself. */
 	pg_sha256_init(&manifest_ctx);
@@ -313,296 +358,310 @@ parse_manifest_file(char *manifest_path)
 	/* Create the hash table. */
 	ht = manifestfiles_create(initial_size, NULL);
 
-	/* Initialize our read buffer. */
-	buffer = pg_malloc(buffer_maxsize);
-
 	/*
-	 * Loop until we've read it all.
+	 * Slurp in the whole file.
 	 *
-	 * The file size shouldn't be changing, so it seems fine to just error out
-	 * if the final length is different from what stat() told us.
+	 * This is not ideal, but there's currently no easy way to get
+	 * pg_parse_json() to perform incremental parsing.
 	 */
-	while (bytes_consumed < statbuf.st_size)
+	buffer = pg_malloc(statbuf.st_size);
+	rc = read(fd, buffer, statbuf.st_size);
+	if (rc != statbuf.st_size)
 	{
-		int			line_length;
-		int			first_field_length;
-		char	   *rest;
-		int			restlen;
+		if (rc < 0)
+			pg_log_fatal("could not read file \"%s\": %m",
+						 manifest_path);
+		else
+			pg_log_fatal("could not read file \"%s\": read %d of %zu",
+						 manifest_path, rc, (size_t) statbuf.st_size);
+		exit(1);
+	}
 
-		/* Find next newline if any. */
-		line_length = findchar(buffer, buffer_size, '\n', buffer_position);
+	/* Create a JSON lexing context. */
+	lex = makeJsonLexContextCstringLen(buffer, statbuf.st_size, PG_UTF8, true);
+
+	/* Set up semantic actions. */
+	parse.state = JM_EXPECT_TOPLEVEL_START;
+	parse.ht = ht;
+	sem.semstate = &parse;
+	sem.object_start = json_manifest_object_start;
+	sem.object_end = json_manifest_object_end;
+	sem.array_start = json_manifest_array_start;
+	sem.array_end = json_manifest_array_end;
+	sem.object_field_start = json_manifest_object_field_start;
+	sem.object_field_end = NULL;
+	sem.array_element_start = NULL;
+	sem.array_element_end = NULL;
+	sem.scalar = json_manifest_scalar;
+
+	/* Parse JSON. */
+	json_error = pg_parse_json(lex, &sem);
+	if (json_error != JSON_SUCCESS)
+	{
+		pg_log_fatal("could not parse backup manifest: %s",
+					 json_errdetail(json_error, lex));
+		exit(1);
+	}
+	if (parse.state != JM_EXPECT_EOF)
+	{
+		pg_log_fatal("could not parse backup manifest: %s",
+					 "manifest ended unexpectedly");
+	}
 
-		/* If no newline was found, we need to read more data and try again. */
-		if (line_length == -1)
-		{
-			size_t		bytes_to_read;
-			int			rc;
+	/* OK, we're done with the manifest file. */
+	close(fd);
 
-			bytes_to_read = Min(statbuf.st_size - bytes_read, READ_CHUNK_SIZE);
-			if (bytes_to_read == 0)
-			{
-				pg_log_fatal("manifest file line not terminated by newline");
-				exit(1);
-			}
-			if (bytes_to_read + READ_CHUNK_SIZE > buffer_maxsize)
-			{
-				buffer_maxsize += READ_CHUNK_SIZE;
-				buffer = pg_realloc(buffer, buffer_maxsize);
-				Assert(bytes_to_read + READ_CHUNK_SIZE <= buffer_maxsize);
-			}
-			rc = read(fd, buffer + buffer_size, bytes_to_read);
-			if (rc != bytes_to_read)
+	/* Return the hash table we constructed. */
+	return ht;
+}
+
+static void
+json_manifest_parse_failure(char *msg)
+{
+	pg_log_fatal("could not parse backup manifest: %s", msg);
+	exit(1);
+}
+
+static void
+json_manifest_object_start(void *state)
+{
+	JsonManifestParseState *parse = state;
+
+	switch (parse->state)
+	{
+		case JM_EXPECT_TOPLEVEL_START:
+			parse->state = JM_EXPECT_VERSION_FIELD;
+			break;
+		case JM_EXPECT_FILES_ARRAY_NEXT:
+			parse->state = JM_EXPECT_THIS_FILE_FIELD;
+			parse->pathname = NULL;
+			parse->algorithm = NULL;
+			parse->checksum = NULL;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object start");
+			break;
+	}
+}
+
+static void
+json_manifest_object_end(void *state)
+{
+	JsonManifestParseState *parse = state;
+	manifestfile *tabent;
+	bool        found;
+	int			checksum_string_length;
+	char	   *ep;
+
+	switch (parse->state)
+	{
+		case JM_EXPECT_TOPLEVEL_END:
+			parse->state = JM_EXPECT_EOF;
+			break;
+		case JM_EXPECT_THIS_FILE_FIELD:
+			/* Pathname and size are required. */
+			if (parse->pathname == NULL)
+				json_manifest_parse_failure("missing pathname");
+			if (parse->size == NULL)
+				json_manifest_parse_failure("missing size");
+			if (parse->algorithm == NULL && parse->checksum != NULL)
+				json_manifest_parse_failure("checksum without algorithm");
+
+			/* Make a new entry in the hash table for this file. */
+			tabent = manifestfiles_insert(parse->ht, parse->pathname, &found);
+			if (found)
 			{
-				if (rc < 0)
-					pg_log_fatal("could not read file \"%s\": %m",
-								 manifest_path);
-				else
-					pg_log_fatal("could not read file \"%s\": read %d of %zu",
-								 manifest_path, rc, bytes_to_read);
+				pg_log_fatal("duplicate pathname in backup manifest: \"%s\"",
+							 parse->pathname);
 				exit(1);
 			}
-			buffer_size += rc;
-			bytes_read += rc;
-			continue;
-		}
 
-		/* Increment line number. */
-		++line_number;
-
-		/* The manifest checksum should be the last thing in the file. */
-		if (saw_manifest_checksum_line)
-		{
-			pg_log_fatal("unexpected data follows manifest checksum");
-			exit(1);
-		}
+			/* Initialize some fields. */
+			tabent->matched = false;
+			tabent->bad = false;
 
-		/* Find first field on line, and remaining line contents. */
-		first_field_length =
-			findchar(buffer, buffer_size, '\t', buffer_position);
-		rest = buffer + buffer_position + first_field_length + 1;
-		restlen = line_length - (first_field_length + 1);
+			/* Parse size. */
+			tabent->size = strtoll(parse->size, &ep, 10);
+			if (*ep)
+				json_manifest_parse_failure("file size is not an integer");
 
-		/*
-		 * Check the first word of the line to see what kind of line it is.
-		 */
-		if (first_field_length == KWL_MANIFEST_VERSION &&
-			memcmp(buffer + buffer_position, KW_MANIFEST_VERSION,
-				   KWL_MANIFEST_VERSION) == 0)
-		{
-			if (line_number != 1)
+			/* Parse the checksum algorithm, if it's present. */
+			if (parse->algorithm == NULL)
+				tabent->checksum_type = CHECKSUM_TYPE_NONE;
+			else if (!pg_checksum_parse_type(parse->algorithm,
+											 &tabent->checksum_type))
 			{
-				pg_log_fatal("manifest file version should only be specified at line 1");
+				pg_log_fatal("unrecognized checksum algorithm: \"%s\"",
+							 parse->algorithm);
 				exit(1);
 			}
+
+			/* Parse the checksum payload, if it's present. */
+			checksum_string_length = parse->checksum == NULL ? 0
+				: strlen(parse->checksum);
+			if (checksum_string_length == 0)
+			{
+				tabent->checksum_length = 0;
+				tabent->checksum_payload = NULL;
+			}
 			else
 			{
-				char	   *line = buffer + buffer_position;
-				char	   *version;
 
-				version = extractstr(line + first_field_length + 1,
-									 line_length - (first_field_length + 1));
-				if (strcmp(version, "1") != 0)
+				tabent->checksum_length = checksum_string_length / 2;
+				tabent->checksum_payload = palloc(tabent->checksum_length);
+				if (checksum_string_length % 2 != 0 ||
+					!hexdecode_string(tabent->checksum_payload,
+									  parse->checksum,
+									  tabent->checksum_length))
 				{
-					pg_log_fatal("unrecognized manifest version: \"%s\"",
-								 version);
+					pg_log_fatal("invalid checksum for file \"%s\": \"%s\"",
+								 parse->pathname, tabent->checksum_payload);
 					exit(1);
 				}
 			}
-		}
-		else if (first_field_length == KWL_MANIFEST_FILE &&
-				 memcmp(buffer + buffer_position, KW_MANIFEST_FILE,
-						KWL_MANIFEST_FILE) == 0)
-		{
-			manifestfile f;
-			manifestfile *tabent;
-			bool		found;
 
-			/* Parse this line. */
-			parse_file_line_from_manifest(&f, rest, restlen);
-
-			/* Make a new entry in the hash table for it. */
-			tabent = manifestfiles_insert(ht, f.pathname, &found);
-			if (found)
+			/* Free memory we no longer need. */
+			if (parse->size != NULL)
 			{
-				pg_log_fatal("duplicate pathname in backup manifest: \"%s\"",
-							 f.pathname);
-				exit(1);
+				pfree(parse->size);
+				parse->size = NULL;
 			}
-
-			/* Copy in all the relevant details. */
-			tabent->size = f.size;
-			tabent->checksum_type = f.checksum_type;
-			tabent->checksum_length = f.checksum_length;
-			tabent->checksum_payload = f.checksum_payload;
-			tabent->matched = false;
-			tabent->bad = false;
-		}
-		else if (first_field_length == KWL_MANIFEST_CHECKSUM &&
-				 memcmp(buffer + buffer_position, KW_MANIFEST_CHECKSUM,
-						KWL_MANIFEST_CHECKSUM) == 0)
-		{
-			saw_manifest_checksum_line = true;
-			if (restlen != PG_SHA256_DIGEST_STRING_LENGTH - 1)
+			if (parse->algorithm != NULL)
 			{
-				pg_log_fatal("manifest file checksum has unexpected length: %d",
-							 restlen);
-				exit(1);
+				pfree(parse->algorithm);
+				parse->algorithm = NULL;
 			}
-			if (!hexdecode_string(manifest_checksum_expected, rest,
-								  PG_SHA256_DIGEST_LENGTH))
+			if (parse->checksum != NULL)
 			{
-				pg_log_fatal("invalid manifest checksum: \"%s\"",
-							 extractstr(rest, restlen));
-				exit(1);
+				pfree(parse->checksum);
+				parse->checksum = NULL;
 			}
-		}
-		else if (first_field_length == -1)
-		{
-			pg_log_fatal("manifest file keyword not terminated by tab");
-			exit(1);
-		}
-		else
-		{
-			char	   *kw;
-
-			kw = extractstr(buffer + buffer_position, first_field_length);
-			pg_log_fatal("unrecognized manifest file keyword: \"%s\"", kw);
-			exit(1);
-		}
-
-		/* Update manifest checksum, if needed. */
-		if (!saw_manifest_checksum_line)
-			pg_sha256_update(&manifest_ctx, (uint8 *) buffer + buffer_position,
-							 line_length + 1);
 
-		/* Advance buffer position over the data we just read. */
-		buffer_position += line_length + 1;
-
-		/* Also mark these bytes as consumed so we know when to stop. */
-		bytes_consumed += line_length + 1;
-
-		/*
-		 * We don't want to incur the expensive of using memmove() to discard
-		 * data after every line, because the lines are short compared to the
-		 * chunk size -- but we must do it at least now and then, or we'll
-		 * have to keep growing the buffer.
-		 */
-		if (buffer_position >= READ_CHUNK_SIZE)
-		{
-			int			leftover_bytes = buffer_size - buffer_position;
-
-			if (leftover_bytes > 0)
-				memmove(buffer, buffer + buffer_position, leftover_bytes);
-			buffer_size -= buffer_position;
-			buffer_position = 0;
-		}
+			/* Expect next file (or end of list). */
+			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object end");
+			break;
 	}
+}
+
+static void
+json_manifest_array_start(void *state)
+{
+	JsonManifestParseState *parse = state;
 
-	/* Checksum verification. */
-	if (!saw_manifest_checksum_line)
-		pg_log_fatal("manifest has no checksum");
-	pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
-	if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
-			   PG_SHA256_DIGEST_LENGTH) != 0)
+	switch (parse->state)
 	{
-		pg_log_fatal("manifest checksum does not match");
-		exit(1);
+		case JM_EXPECT_FILES_ARRAY_START:
+			parse->state = JM_EXPECT_FILES_ARRAY_NEXT;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected array start");
+			break;
 	}
-
-	/* OK, we're done with the manifest file. */
-	close(fd);
-
-	/* Return the hash table we constructed. */
-	return ht;
 }
 
-/*
- * The caller passes the remainder of the line, excluding the initial "File\t"
- * portion.
- */
 static void
-parse_file_line_from_manifest(manifestfile *f, char *rest, int restlen)
+json_manifest_array_end(void *state)
 {
-	char	   *end = rest + restlen;
-	char	   *field[FIELDS_PER_FILE_LINE];
-	unsigned long filesize;
-	char	   *ep;
-	pg_checksum_type checksum_type;
-	int			raw_checksum_length = 0;
-	char	   *raw_checksum_payload = NULL;
-	int			checksum_length;
-	uint8	   *checksum_payload;
-	int			i;
-	char	   *s;
+	JsonManifestParseState *parse = state;
 
-	/* Split the line into fields. */
-	for (i = 0; i < FIELDS_PER_FILE_LINE; ++i)
+	switch (parse->state)
 	{
-		int			toklen;
-
-		toklen = findfield(rest, end, &field[i]);
-		if (rest + toklen >= end && i + 1 < FIELDS_PER_FILE_LINE)
-		{
-			pg_log_fatal("manifest file line has too few fields");
-			exit(1);
-		}
-		rest += toklen + 1;
+		case JM_EXPECT_FILES_ARRAY_NEXT:
+			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_FIELD;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected array end");
+			break;
 	}
+}
 
-	/* We expect to have used the entire line. */
-	if (rest < end)
-	{
-		pg_log_fatal("manifest file line has too many fields");
-		exit(1);
-	}
+static void
+json_manifest_object_field_start(void *state, char *fname, bool isnull)
+{
+	JsonManifestParseState *parse = state;
 
-	/* Parse the size. */
-	filesize = strtoul(field[1], &ep, 10);
-	if (*ep)
+	switch (parse->state)
 	{
-		pg_log_fatal("manifest file size for file \"%s\" is not a number",
-					 field[0]);
-		exit(1);
+		case JM_EXPECT_VERSION_FIELD:
+			if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
+				json_manifest_parse_failure("expected version indicator");
+			parse->state = JM_EXPECT_VERSION_VALUE;
+			break;
+		case JM_EXPECT_FILES_FIELD:
+			if (strcmp(fname, "Files") != 0)
+				json_manifest_parse_failure("expected file list");
+			parse->state = JM_EXPECT_FILES_ARRAY_START;
+			break;
+		case JM_EXPECT_THIS_FILE_FIELD:
+			if (strcmp(fname, "Path") == 0)
+				parse->field = JMFF_PATH;
+			else if (strcmp(fname, "Size") == 0)
+				parse->field = JMFF_SIZE;
+			else if (strcmp(fname, "Last-Modified") == 0)
+				parse->field = JMFF_LAST_MODIFIED;
+			else if (strcmp(fname, "Checksum-Algorithm") == 0)
+				parse->field = JMFF_CHECKSUM_ALGORITHM;
+			else if (strcmp(fname, "Checksum") == 0)
+				parse->field = JMFF_CHECKSUM;
+			else
+				json_manifest_parse_failure("unexpected file field");
+			parse->state = JM_EXPECT_THIS_FILE_VALUE;
+			break;
+		case JM_EXPECT_MANIFEST_CHECKSUM_FIELD:
+			if (strcmp(fname, "Manifest-Checksum") != 0)
+				json_manifest_parse_failure("expected manifest checksum");
+			parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected object field");
+			break;
 	}
+}
 
-	/* Parse the checksum type. */
-	for (s = field[3]; s[0] != '\0' && s[0] != ':'; ++s)
-		;
-	if (*s)
-	{
-		raw_checksum_payload = s + 1;
-		raw_checksum_length = strlen(raw_checksum_payload);
-		*s = '\0';
-	}
-	if (!pg_checksum_parse_type(field[3], &checksum_type))
-	{
-		pg_log_fatal("unrecognized checksum algorithm for file \"%s\": \"%s\"",
-					 field[0], field[3]);
-		exit(1);
-	}
+static void
+json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	JsonManifestParseState *parse = state;
 
-	/* Decode the checksum payload. */
-	checksum_length = raw_checksum_length / 2;
-	if (checksum_length == 0)
-		checksum_payload = NULL;
-	else
+	switch (parse->state)
 	{
-		checksum_payload = palloc(checksum_length);
-		if (!hexdecode_string(checksum_payload, raw_checksum_payload,
-							  checksum_length))
-		{
-			pg_log_fatal("invalid checksum for file \"%s\": \"%s\"",
-						 field[0], raw_checksum_payload);
-			exit(1);
-		}
+		case JM_EXPECT_VERSION_VALUE:
+			if (strcmp(token, "1") != 0)
+				json_manifest_parse_failure("unexpected manifest version");
+			parse->state = JM_EXPECT_FILES_FIELD;
+			break;
+		case JM_EXPECT_THIS_FILE_VALUE:
+			switch (parse->field)
+			{
+				case JMFF_PATH:
+					parse->pathname = token;
+					break;
+				case JMFF_SIZE:
+					parse->size = token;
+					break;
+				case JMFF_LAST_MODIFIED:
+					pfree(token);		/* unused */
+					break;
+				case JMFF_CHECKSUM_ALGORITHM:
+					parse->algorithm = token;
+					break;
+				case JMFF_CHECKSUM:
+					parse->checksum = token;
+					break;
+			}
+			parse->state = JM_EXPECT_THIS_FILE_FIELD;
+			break;
+		case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
+			pg_log_info("* manifest_checksum = %s", token);
+			parse->state = JM_EXPECT_TOPLEVEL_END;
+			break;
+		default:
+			json_manifest_parse_failure("unexpected scalar");
+			break;
 	}
-
-	/* Fill the output struct. */
-	f->pathname = field[0];
-	f->size = filesize;
-	f->checksum_type = checksum_type;
-	f->checksum_length = checksum_length;
-	f->checksum_payload = checksum_payload;
 }
 
 /*
@@ -917,125 +976,6 @@ should_ignore_relpath(validator_context *context, char *relpath)
 	return false;
 }
 
-/*
- * Extract a NUL-terminated string from a larger buffer.
- */
-static char *
-extractstr(char *buffer, int length)
-{
-	char	   *s = palloc(length + 1);
-
-	memcpy(s, buffer, length);
-	s[length] = '\0';
-
-	return s;
-}
-
-/*
- * Find the next instance of a given character within a buffer that
- * occurs at or after start_position. If there is none, returns -1; else
- * returns the difference between the position at which the character was
- * found and the start position.
- */
-static int
-findchar(char *buffer, int size, char c, int start_position)
-{
-	int			i;
-
-	for (i = start_position; i < size; ++i)
-		if (buffer[i] == c)
-			return i - start_position;
-	return -1;
-}
-
-/*
- * Extract the next field from a line of text read from the manifest file.
- */
-static int
-findfield(char *buffer, char *end, char **result)
-{
-	int			qoffset = 1;
-	int			dqcount = 0;
-	int			toklen;
-	int			bufpos;
-	int			resultpos;
-
-	/*
-	 * If this field is unquoted, we just stop at the next tab; if there's
-	 * none, we stop at the end of the line. Note that if buffer == end, it
-	 * just means that the last field on the line is empty.
-	 */
-	if (buffer == end || *buffer != '"')
-	{
-		toklen = findchar(buffer, end - buffer, '\t', 0);
-
-		if (toklen == -1)
-			toklen = end - buffer;
-		*result = extractstr(buffer, toklen);
-		return toklen;
-	}
-
-	/*
-	 * Our escaping convention is that if the field contains a tab, it must be
-	 * surrounded by double-quotes and any internal double-quotes must be
-	 * doubled.
-	 */
-	while (1)
-	{
-		/* Where's the next double quote? */
-		qoffset += findchar(buffer, end - buffer, '"', qoffset);
-		if (qoffset == -1)
-		{
-			pg_log_fatal("quoted field in backup manifest is not terminated");
-			exit(1);
-		}
-
-		/*
-		 * If the double-quote we found is the last character on the line or
-		 * if it's followed by a tab, we've reached the end of this field.
-		 */
-		if (buffer + qoffset >= end || buffer[qoffset + 1] == '\t')
-			break;
-
-		/* Otherwise, the next character should be another double-quote. */
-		if (buffer[qoffset + 1] != '"')
-		{
-			pg_log_fatal("invalid quoted field in backup manifest");
-			exit(1);
-		}
-
-		/* Skip both double-quotes and go around again. */
-		qoffset += 2;
-		++dqcount;
-	}
-
-	/*
-	 * At this point, we know that qoffset is the offset, relative to buffer,
-	 * of the closing double-quote, and that dqcount is the number of escaped
-	 * double-quotes within the field, and that all of those escape sequences
-	 * are proper. Extract and de-escape the data in the field.
-	 *
-	 * The amount of space needed for the result is equal to the raw token
-	 * length, minus two for the double quotes at the start and end, minus one
-	 * for each doubled double-quote within the token, plus one for the
-	 * trailing zero byte.
-	 */
-	toklen = qoffset + 1;
-	*result = palloc(toklen - dqcount - 1);
-	bufpos = 1;
-	resultpos = 0;
-	while (bufpos < qoffset)
-	{
-		(*result)[resultpos] = buffer[bufpos];
-		bufpos += (buffer[bufpos] == '"' ? 2 : 1);
-		++resultpos;
-	}
-	(*result)[resultpos] = '\0';
-	Assert(resultpos == toklen - dqcount - 2);
-
-	return toklen;
-}
-
 /*
  * Helper function for manifestfiles hash table.
  */
-- 
2.17.2 (Apple Git-113)