v9-0003-pg_validatebackup-Validate-a-backup-against-the-b.patch
application/octet-stream
Filename: v9-0003-pg_validatebackup-Validate-a-backup-against-the-b.patch
Type: application/octet-stream
Part: 2
Message:
Re: backup manifests
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v9-0003
Subject: pg_validatebackup: Validate a backup against the backup manifest.
| File | + | − |
|---|---|---|
| src/backend/replication/basebackup.c | 3 | 3 |
| src/bin/Makefile | 1 | 0 |
| src/bin/pg_validatebackup/.gitignore | 1 | 0 |
| src/bin/pg_validatebackup/Makefile | 32 | 0 |
| src/bin/pg_validatebackup/pg_validatebackup.c | 1081 | 0 |
From bc37c64ac691e97bc16292466dccad9889a4d521 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Fri, 7 Feb 2020 17:17:52 -0500
Subject: [PATCH v9 3/5] pg_validatebackup: Validate a backup against the
backup manifest.
Patch by me; review by Tushar Ahuja and Rajkumar Raghuwanshi, and also
off-list by Mark Dilger, Davinder Singh, and Jeevan Chalke.
---
src/backend/replication/basebackup.c | 6 +-
src/bin/Makefile | 1 +
src/bin/pg_validatebackup/.gitignore | 1 +
src/bin/pg_validatebackup/Makefile | 32 +
src/bin/pg_validatebackup/pg_validatebackup.c | 1081 +++++++++++++++++
5 files changed, 1118 insertions(+), 3 deletions(-)
create mode 100644 src/bin/pg_validatebackup/.gitignore
create mode 100644 src/bin/pg_validatebackup/Makefile
create mode 100644 src/bin/pg_validatebackup/pg_validatebackup.c
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 914859aea8..27fa67f321 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -64,7 +64,7 @@ struct manifest_info
pg_checksum_type checksum_type;
pg_sha256_ctx manifest_ctx;
uint64 manifest_size;
- int still_checksumming;
+ bool still_checksumming;
};
@@ -85,7 +85,7 @@ static void SendBackupHeader(List *tablespaces);
static void InitializeManifest(manifest_info *manifest, pg_checksum_type);
static void AppendStringToManifest(manifest_info *manifest, char *s);
static void AddFileToManifest(manifest_info *manifest, const char *spcoid,
- const char *filename, size_t size, time_t mtime,
+ const char *pathname, size_t size, time_t mtime,
pg_checksum_context *checksum_ctx);
static void SendBackupManifest(manifest_info *manifest);
static char *escape_field_for_manifest(const char *s);
@@ -1033,7 +1033,7 @@ AppendStringToManifest(manifest_info *manifest, char *s)
*/
static void
AddFileToManifest(manifest_info *manifest, const char *spcoid,
- const char *filename, size_t size, time_t mtime,
+ const char *pathname, size_t size, time_t mtime,
pg_checksum_context *checksum_ctx)
{
char pathbuf[MAXPGPATH];
diff --git a/src/bin/Makefile b/src/bin/Makefile
index 7f4120a34f..77bceea4fe 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -27,6 +27,7 @@ SUBDIRS = \
pg_test_fsync \
pg_test_timing \
pg_upgrade \
+ pg_validatebackup \
pg_waldump \
pgbench \
psql \
diff --git a/src/bin/pg_validatebackup/.gitignore b/src/bin/pg_validatebackup/.gitignore
new file mode 100644
index 0000000000..3ae1c1f03a
--- /dev/null
+++ b/src/bin/pg_validatebackup/.gitignore
@@ -0,0 +1 @@
+/pg_validatebackup
diff --git a/src/bin/pg_validatebackup/Makefile b/src/bin/pg_validatebackup/Makefile
new file mode 100644
index 0000000000..aeb97d21d2
--- /dev/null
+++ b/src/bin/pg_validatebackup/Makefile
@@ -0,0 +1,32 @@
+# src/bin/pg_validatebackup/Makefile
+
+PGFILEDESC = "pg_validatebackup - validate a backup against a backup manifest"
+PGAPPICON = win32
+
+subdir = src/bin/pg_validatebackup
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+# We need libpq only because fe_utils does.
+LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
+
+OBJS = \
+ $(WIN32RES) \
+ pg_validatebackup.o
+
+all: pg_validatebackup
+
+pg_validatebackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pg_validatebackup$(X) '$(DESTDIR)$(bindir)/pg_validatebackup$(X)'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(bindir)/pg_validatebackup$(X)'
+
+clean distclean maintainer-clean:
+ rm -f pg_validatebackup$(X) $(OBJS)
diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c
new file mode 100644
index 0000000000..d4d041ef7d
--- /dev/null
+++ b/src/bin/pg_validatebackup/pg_validatebackup.c
@@ -0,0 +1,1081 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_validatebackup.c
+ * Validate a backup against a backup manifest.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_validatebackup/pg_validatebackup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "common/checksum_helper.h"
+#include "common/hashfn.h"
+#include "common/logging.h"
+#include "fe_utils/simple_list.h"
+#include "getopt_long.h"
+
+/*
+ * For efficiency, we'd like our hash table containing information about the
+ * manifest to start out with approximately the correct number of entries.
+ * There's no way to know the exact number of entries without reading the whole
+ * file, but we can get an estimate by dividing the file size by the estimated
+ * number of bytes per line.
+ *
+ * This could be off by about a factor of two in either direction, because the
+ * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
+ * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
+ * might be no checksum at all.
+ */
+#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
+
+/*
+ * How many bytes should we try to read from a file at once?
+ */
+#define READ_CHUNK_SIZE 4096
+
+/*
+ * The first word of each line of the manifest file should be one of these
+ * key words. We define constants for the relevant lengths as well.
+ */
+#define KW_MANIFEST_VERSION "PostgreSQL-Backup-Manifest-Version"
+#define KW_MANIFEST_FILE "File"
+#define KW_MANIFEST_CHECKSUM "Manifest-Checksum"
+#define KWL_MANIFEST_VERSION (sizeof(KW_MANIFEST_VERSION)-1)
+#define KWL_MANIFEST_FILE (sizeof(KW_MANIFEST_FILE)-1)
+#define KWL_MANIFEST_CHECKSUM (sizeof(KW_MANIFEST_CHECKSUM)-1)
+
+/*
+ * How many fields are there for each "File" line in the manifest?
+ * Currently we have: file name, file size, timestamp, checksum.
+ */
+#define FIELDS_PER_FILE_LINE 4
+
+/*
+ * Each "File" line in the manifest file is parsed to produce an object
+ * like this.
+ */
+typedef struct manifestfile
+{
+ uint32 status; /* hash status */
+ char *pathname;
+ size_t size;
+ pg_checksum_type checksum_type;
+ int checksum_length;
+ uint8 *checksum_payload;
+ bool matched;
+ bool bad;
+} manifestfile;
+
+/*
+ * Define a hash table which we can use to store information about the files
+ * mentioned in the backup manifest.
+ */
+static uint32 hash_string_pointer(char *s);
+#define SH_PREFIX manifestfiles
+#define SH_ELEMENT_TYPE manifestfile
+#define SH_KEY_TYPE char *
+#define SH_KEY pathname
+#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_RAW_ALLOCATOR pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+/*
+ * All of the context information we need while checking a backup manifest.
+ */
+typedef struct validator_context
+{
+ manifestfiles_hash *ht;
+ char *backup_directory;
+ SimpleStringList ignore_list;
+ bool exit_on_error;
+ bool saw_any_error;
+} validator_context;
+
+static manifestfiles_hash * parse_manifest_file(char *manifest_path);
+static void parse_file_line_from_manifest(manifestfile *f, char *rest,
+ int restlen);
+static void validate_backup_directory(validator_context *context,
+ char *relpath, char *fullpath);
+static void validate_backup_file(validator_context *context,
+ char *relpath, char *fullpath);
+static void report_extra_backup_files(validator_context *context);
+static void validate_backup_checksums(validator_context *context);
+static void validate_file_checksum(validator_context *context,
+ manifestfile *tabent, char *pathname);
+
+static void pg_validator_error(validator_context *context,
+ const char *pg_restrict fmt,...)
+ pg_attribute_printf(2, 3);
+static void pg_validator_fatal(const char *pg_restrict fmt,...)
+ pg_attribute_printf(1, 2) pg_attribute_noreturn();
+static bool should_ignore_relpath(validator_context *context, char *relpath);
+
+static char *extractstr(char *buffer, int length);
+static int findchar(char *buffer, int size, char c, int start_position);
+static int findfield(char *buffer, char *end, char **result);
+static int hexdecode_char(char c);
+static bool hexdecode_string(uint8 *result, char *input, int nbytes);
+static void usage(void);
+
+static const char *progname;
+
+/*
+ * Main entry point.
+ */
+int
+main(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"exit-on-error", no_argument, NULL, 'e'},
+ {"ignore", required_argument, NULL, 'i'},
+ {"manifest-path", required_argument, NULL, 'm'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"skip-checksums", no_argument, NULL, 's'},
+ {NULL, 0, NULL, 0}
+ };
+
+ int c;
+ validator_context context;
+ char *manifest_path = NULL;
+ bool quiet = false;
+ bool skip_checksums = false;
+
+ pg_logging_init(argv[0]);
+ set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup"));
+ progname = get_progname(argv[0]);
+
+ memset(&context, 0, sizeof(context));
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+ {
+ usage();
+ exit(0);
+ }
+ if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+ {
+ puts("pg_validatebackup (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+ /*
+ * Skip certain files in the toplevel directory.
+ *
+ * Ignore the backup_manifest file, because it's not included in the
+ * backup manifest.
+ *
+ * Ignore the pg_wal directory, because those files are not included in
+ * the backup manifest either, since they are fetched separately from the
+ * backup itself.
+ *
+ * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
+ * because we expect that those files may sometimes be created or changed
+ * as part of the backup process. For example, pg_basebackup -R will
+ * modify postgresql.auto.conf and create standby.signal.
+ */
+ simple_string_list_append(&context.ignore_list, "backup_manifest");
+ simple_string_list_append(&context.ignore_list, "pg_wal");
+ simple_string_list_append(&context.ignore_list, "postgresql.auto.conf");
+ simple_string_list_append(&context.ignore_list, "recovery.signal");
+ simple_string_list_append(&context.ignore_list, "standby.signal");
+
+ while ((c = getopt_long(argc, argv, "ei:m:qs", long_options, NULL)) != -1)
+ {
+ switch (c)
+ {
+ case 'e':
+ context.exit_on_error = true;
+ break;
+ case 'i':
+ {
+ char *arg = pstrdup(optarg);
+
+ canonicalize_path(arg);
+ simple_string_list_append(&context.ignore_list, arg);
+ break;
+ }
+ case 'm':
+ manifest_path = pstrdup(optarg);
+ canonicalize_path(manifest_path);
+ break;
+ case 'q':
+ quiet = true;
+ break;
+ case 's':
+ skip_checksums = true;
+ break;
+ default:
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ }
+
+ /* Get backup directory name */
+ if (optind >= argc)
+ {
+ pg_log_fatal("no backup directory specified");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ context.backup_directory = pstrdup(argv[optind++]);
+ canonicalize_path(context.backup_directory);
+
+ /* Complain if any arguments remain */
+ if (optind < argc)
+ {
+ pg_log_fatal("too many command-line arguments (first is \"%s\")",
+ argv[optind]);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ /* By default, look for the manifest in the backup directory. */
+ if (manifest_path == NULL)
+ manifest_path = psprintf("%s/backup_manifest",
+ context.backup_directory);
+
+ /*
+ * Try to read the manifest. We treat any errors encountered while parsing
+ * the manifest as fatal; there doesn't seem to be much point in trying to
+ * validate the backup directory against a corrupted manifest.
+ */
+ context.ht = parse_manifest_file(manifest_path);
+
+ /*
+ * Now scan the files in the backup directory. At this stage, we verify
+ * that every file on disk is present in the manifest and that the sizes
+ * match. We also set the "matched" flag on every manifest entry that
+ * corresponds to a file on disk.
+ */
+ validate_backup_directory(&context, NULL, context.backup_directory);
+
+ /*
+ * The "matched" flag should now be set on every entry in the hash table.
+ * Any entries for which the bit is not set are files mentioned in the
+ * manifest that don't exist on disk.
+ */
+ report_extra_backup_files(&context);
+
+ /*
+ * Finally, do the expensive work of verifying file checksums, unless we
+ * were told to skip it.
+ */
+ if (!skip_checksums)
+ validate_backup_checksums(&context);
+
+ /*
+ * If everything looks OK, tell the user this, unless we were asked to
+ * work quietly.
+ */
+ if (!context.saw_any_error && !quiet)
+ pg_log_info("backup successfully verified");
+
+ exit(context.saw_any_error ? 1 : 0);
+}
+
+/*
+ * Parse a manifest file and construct a hash table with information about
+ * all the files it mentions.
+ */
+static manifestfiles_hash *
+parse_manifest_file(char *manifest_path)
+{
+ int fd;
+ struct stat statbuf;
+ off_t estimate;
+ off_t bytes_read = 0;
+ off_t bytes_consumed = 0;
+ uint32 initial_size;
+ manifestfiles_hash *ht;
+ char *buffer;
+ uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
+ uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
+ int buffer_position = 0;
+ int buffer_size = 0;
+ int buffer_maxsize = 2 * READ_CHUNK_SIZE;
+ int line_number = 0;
+ bool saw_manifest_checksum_line = false;
+ pg_sha256_ctx manifest_ctx;
+
+ /* Prepare to compute a checksum of the manifest itself. */
+ pg_sha256_init(&manifest_ctx);
+
+ /* Open the manifest file. */
+ if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
+ pg_validator_fatal("could not open file \"%s\": %m", manifest_path);
+
+ /* Figure out how big the manifest is. */
+ if (fstat(fd, &statbuf) != 0)
+ pg_validator_fatal("could not stat file \"%s\": %m", manifest_path);
+
+ /* Guess how large to make the hash table based on the manifest size. */
+ estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
+ initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
+
+ /* Create the hash table. */
+ ht = manifestfiles_create(initial_size, NULL);
+
+ /* Initialize our read buffer. */
+ buffer = pg_malloc(buffer_maxsize);
+
+ /*
+ * Loop until we've read it all.
+ *
+ * The file size shouldn't be changing, so it seems fine to just error out
+ * if the final length is different from what stat() told us.
+ */
+ while (bytes_consumed < statbuf.st_size)
+ {
+ int line_length;
+ int first_field_length;
+ char *rest;
+ int restlen;
+
+ /* Find next newline if any. */
+ line_length = findchar(buffer, buffer_size, '\n', buffer_position);
+
+ /* If no newline was found, we need to read more data and try again. */
+ if (line_length == -1)
+ {
+ size_t bytes_to_read;
+ int rc;
+
+ bytes_to_read = Min(statbuf.st_size - bytes_read, READ_CHUNK_SIZE);
+ if (bytes_to_read == 0)
+ pg_validator_fatal("manifest file line not terminated by newline");
+ if (bytes_to_read + READ_CHUNK_SIZE > buffer_maxsize)
+ {
+ buffer_maxsize += READ_CHUNK_SIZE;
+ buffer = pg_realloc(buffer, buffer_maxsize);
+ Assert(bytes_to_read + READ_CHUNK_SIZE <= buffer_maxsize);
+ }
+ rc = read(fd, buffer + buffer_size, bytes_to_read);
+ if (rc != bytes_to_read)
+ {
+ if (rc < 0)
+ pg_validator_fatal("could not read file \"%s\": %m",
+ manifest_path);
+ else
+ pg_validator_fatal("could not read file \"%s\": read %d of %zu",
+ manifest_path, rc, bytes_to_read);
+ }
+ buffer_size += rc;
+ bytes_read += rc;
+ continue;
+ }
+
+ /* Increment line number. */
+ ++line_number;
+
+ /* The manifest checksum should be the last thing in the file. */
+ if (saw_manifest_checksum_line)
+ pg_validator_fatal("unexpected data follows manifest checksum");
+
+ /* Find first field on line, and remaining line contents. */
+ first_field_length =
+ findchar(buffer, buffer_size, '\t', buffer_position);
+ rest = buffer + buffer_position + first_field_length + 1;
+ restlen = line_length - (first_field_length + 1);
+
+ /*
+ * Check the first word of the line to see what kind of line it is.
+ */
+ if (first_field_length == KWL_MANIFEST_VERSION &&
+ memcmp(buffer + buffer_position, KW_MANIFEST_VERSION,
+ KWL_MANIFEST_VERSION) == 0)
+ {
+ if (line_number != 1)
+ pg_validator_fatal("manifest file version should only be specified at line 1");
+ else
+ {
+ char *line = buffer + buffer_position;
+ char *version;
+
+ version = extractstr(line + first_field_length + 1,
+ line_length - (first_field_length + 1));
+ if (strcmp(version, "1") != 0)
+ pg_validator_fatal("unrecognized manifest version: \"%s\"",
+ version);
+ }
+ }
+ else if (first_field_length == KWL_MANIFEST_FILE &&
+ memcmp(buffer + buffer_position, KW_MANIFEST_FILE,
+ KWL_MANIFEST_FILE) == 0)
+ {
+ manifestfile f;
+ manifestfile *tabent;
+ bool found;
+
+ /* Parse this line. */
+ parse_file_line_from_manifest(&f, rest, restlen);
+
+ /* Make a new entry in the hash table for it. */
+ tabent = manifestfiles_insert(ht, f.pathname, &found);
+ if (found)
+ pg_validator_fatal("duplicate pathname in backup manifest: \"%s\"",
+ f.pathname);
+
+ /* Copy in all the relevant details. */
+ tabent->size = f.size;
+ tabent->checksum_type = f.checksum_type;
+ tabent->checksum_length = f.checksum_length;
+ tabent->checksum_payload = f.checksum_payload;
+ tabent->matched = false;
+ tabent->bad = false;
+ }
+ else if (first_field_length == KWL_MANIFEST_CHECKSUM &&
+ memcmp(buffer + buffer_position, KW_MANIFEST_CHECKSUM,
+ KWL_MANIFEST_CHECKSUM) == 0)
+ {
+ saw_manifest_checksum_line = true;
+ if (restlen != PG_SHA256_DIGEST_STRING_LENGTH - 1)
+ pg_validator_fatal("manifest file checksum has unexpected length: %d",
+ restlen);
+ if (!hexdecode_string(manifest_checksum_expected, rest,
+ PG_SHA256_DIGEST_LENGTH))
+ pg_validator_fatal("invalid manifest checksum: \"%s\"",
+ extractstr(rest, restlen));
+ }
+ else if (first_field_length == -1)
+ pg_validator_fatal("manifest file keyword not terminated by tab");
+ else
+ {
+ char *kw;
+
+ kw = extractstr(buffer + buffer_position, first_field_length);
+ pg_validator_fatal("unrecognized manifest file keyword: \"%s\"", kw);
+ }
+
+ /* Update manifest checksum, if needed. */
+ if (!saw_manifest_checksum_line)
+ pg_sha256_update(&manifest_ctx, (uint8 *) buffer + buffer_position,
+ line_length + 1);
+
+ /* Advance buffer position over the data we just read. */
+ buffer_position += line_length + 1;
+
+ /* Also mark these bytes as consumed so we know when to stop. */
+ bytes_consumed += line_length + 1;
+
+ /*
+ * We don't want to incur the expensive of using memmove() to discard
+ * data after every line, because the lines are short compared to the
+ * chunk size -- but we must do it at least now and then, or we'll
+ * have to keep growing the buffer.
+ */
+ if (buffer_position >= READ_CHUNK_SIZE)
+ {
+ int leftover_bytes = buffer_size - buffer_position;
+
+ if (leftover_bytes > 0)
+ memmove(buffer, buffer + buffer_position, leftover_bytes);
+ buffer_size -= buffer_position;
+ buffer_position = 0;
+ }
+ }
+
+ /* Checksum verification. */
+ if (!saw_manifest_checksum_line)
+ pg_validator_fatal("manifest has no checksum");
+ pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
+ if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
+ PG_SHA256_DIGEST_LENGTH) != 0)
+ pg_validator_fatal("manifest checksum does not match");
+
+ /* OK, we're done with the manifest file. */
+ close(fd);
+
+ /* Return the hash table we constructed. */
+ return ht;
+}
+
+/*
+ * The caller passes the remainder of the line, excluding the initial "File\t"
+ * portion.
+ */
+static void
+parse_file_line_from_manifest(manifestfile *f, char *rest, int restlen)
+{
+ char *end = rest + restlen;
+ char *field[FIELDS_PER_FILE_LINE];
+ unsigned long filesize;
+ char *ep;
+ pg_checksum_type checksum_type;
+ int raw_checksum_length = 0;
+ char *raw_checksum_payload = NULL;
+ int checksum_length;
+ uint8 *checksum_payload;
+ int i;
+ char *s;
+
+ /* Split the line into fields. */
+ for (i = 0; i < FIELDS_PER_FILE_LINE; ++i)
+ {
+ int toklen;
+
+ toklen = findfield(rest, end, &field[i]);
+ if (rest + toklen >= end && i + 1 < FIELDS_PER_FILE_LINE)
+ pg_validator_fatal("manifest file line has too few fields");
+ rest += toklen + 1;
+ }
+
+ /* We expect to have used the entire line. */
+ if (rest < end)
+ pg_validator_fatal("manifest file line has too many fields");
+
+ /* Parse the size. */
+ filesize = strtoul(field[1], &ep, 10);
+ if (*ep)
+ pg_validator_fatal("manifest file size for file \"%s\" is not a number",
+ field[0]);
+
+ /* Parse the checksum type. */
+ for (s = field[3]; s[0] != '\0' && s[0] != ':'; ++s)
+ ;
+ if (*s)
+ {
+ raw_checksum_payload = s + 1;
+ raw_checksum_length = strlen(raw_checksum_payload);
+ *s = '\0';
+ }
+ if (!pg_checksum_parse_type(field[3], &checksum_type))
+ pg_validator_fatal("unrecognized checksum algorithm for file \"%s\": \"%s\"",
+ field[0], field[3]);
+
+ /* Decode the checksum payload. */
+ checksum_length = raw_checksum_length / 2;
+ if (checksum_length == 0)
+ checksum_payload = NULL;
+ else
+ {
+ checksum_payload = palloc(checksum_length);
+ if (!hexdecode_string(checksum_payload, raw_checksum_payload,
+ checksum_length))
+ pg_validator_fatal("invalid checksum for file \"%s\": \"%s\"",
+ field[0], raw_checksum_payload);
+ }
+
+ /* Fill the output struct. */
+ f->pathname = field[0];
+ f->size = filesize;
+ f->checksum_type = checksum_type;
+ f->checksum_length = checksum_length;
+ f->checksum_payload = checksum_payload;
+}
+
+/*
+ * Validate one directory.
+ *
+ * 'relpath' is NULL if we are to validate the top-level backup directory,
+ * and otherwise the relative path to the directory that is to be validated.
+ *
+ * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
+ * filesystem path at which it can be found.
+ */
+static void
+validate_backup_directory(validator_context *context, char *relpath,
+ char *fullpath)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(fullpath);
+ if (dir == NULL)
+ {
+ /*
+ * If even the toplevel backup directory cannot be found, treat this
+ * as a fatal error.
+ */
+ if (relpath == NULL)
+ pg_validator_fatal("could not open directory \"%s\": %m", fullpath);
+
+ /*
+ * Otherwise, treat this as a non-fatal error, but ignore any further
+ * errors related to this path and anything beneath it.
+ */
+ pg_validator_error(context,
+ "could not open directory \"%s\": %m", fullpath);
+ simple_string_list_append(&context->ignore_list, relpath);
+
+ return;
+ }
+
+ while (errno = 0, (dirent = readdir(dir)) != NULL)
+ {
+ char *filename = dirent->d_name;
+ char *newfullpath = psprintf("%s/%s", fullpath, filename);
+ char *newrelpath;
+
+ /* Skip "." and ".." */
+ if (filename[0] == '.' && (filename[1] == '\0'
+ || strcmp(filename, "..") == 0))
+ continue;
+
+ if (relpath == NULL)
+ newrelpath = pstrdup(filename);
+ else
+ newrelpath = psprintf("%s/%s", relpath, filename);
+
+ if (!should_ignore_relpath(context, newrelpath))
+ validate_backup_file(context, newrelpath, newfullpath);
+
+ pfree(newfullpath);
+ pfree(newrelpath);
+ }
+
+ if (closedir(dir))
+ {
+ pg_validator_error(context,
+ "could not close directory \"%s\": %m", fullpath);
+ return;
+ }
+}
+
+/*
+ * Validate one file (which might actually be a directory or a symlink).
+ *
+ * The arguments to this function have the same meaning as the arguments to
+ * validate_backup_directory.
+ */
+static void
+validate_backup_file(validator_context *context, char *relpath, char *fullpath)
+{
+ struct stat sb;
+ manifestfile *tabent;
+
+ if (stat(fullpath, &sb) != 0)
+ {
+ pg_validator_error(context,
+ "could not stat file or directory \"%s\": %m",
+ relpath);
+
+ /*
+ * Suppress further errors related to this path name and, if it's a
+ * directory, anything underneath it.
+ */
+ simple_string_list_append(&context->ignore_list, relpath);
+
+ return;
+ }
+
+ /* If it's a directory, just recurse. */
+ if (S_ISDIR(sb.st_mode))
+ {
+ validate_backup_directory(context, relpath, fullpath);
+ return;
+ }
+
+ /* If it's not a directory, it should be a plain file. */
+ if (!S_ISREG(sb.st_mode))
+ {
+ pg_validator_error(context,
+ "\"%s\" is not a file or directory",
+ relpath);
+ return;
+ }
+
+ /* Check whether there's an entry in the manifest hash. */
+ tabent = manifestfiles_lookup(context->ht, relpath);
+ if (tabent == NULL)
+ {
+ pg_validator_error(context,
+ "\"%s\" is present on disk but not in the manifest",
+ relpath);
+ return;
+ }
+
+ /* Flag this entry as having been encountered in the filesystem. */
+ tabent->matched = true;
+
+ /* Check that the size matches. */
+ if (tabent->size != sb.st_size)
+ {
+ pg_validator_error(context,
+ "\"%s\" has size %zu on disk but size %zu in the manifest",
+ relpath, (size_t) sb.st_size, tabent->size);
+ tabent->bad = true;
+ }
+
+ /*
+ * We don't validate checksums at this stage. We first finish validating
+ * that we have the expected set of files with the expected sizes, and
+ * only afterwards verify the checksums. That's because computing
+ * checksums may take a while, and we'd like to report more obvious
+ * problems quickly.
+ */
+}
+
+/*
+ * Scan the hash table for entries where the 'matched' flag is not set; report
+ * that such files are present in the manifest but not on disk.
+ */
+static void
+report_extra_backup_files(validator_context *context)
+{
+ manifestfiles_iterator it;
+ manifestfile *tabent;
+
+ manifestfiles_start_iterate(context->ht, &it);
+ while ((tabent = manifestfiles_iterate(context->ht, &it)) != NULL)
+ if (!tabent->matched &&
+ !should_ignore_relpath(context, tabent->pathname))
+ pg_validator_error(context,
+ "\"%s\" is present in the manifest but not on disk",
+ tabent->pathname);
+}
+
+/*
+ * Validate checksums for hash table entries that are otherwise unproblematic.
+ * If we've already reported some problem related to a hash table entry, or
+ * if it has no checksum, just skip it.
+ */
+static void
+validate_backup_checksums(validator_context *context)
+{
+ manifestfiles_iterator it;
+ manifestfile *tabent;
+
+ manifestfiles_start_iterate(context->ht, &it);
+ while ((tabent = manifestfiles_iterate(context->ht, &it)) != NULL)
+ {
+ if (tabent->matched && !tabent->bad &&
+ tabent->checksum_type != CHECKSUM_TYPE_NONE &&
+ !should_ignore_relpath(context, tabent->pathname))
+ {
+ char *fullpath;
+
+ /* Compute the full pathname to the target file. */
+ fullpath = psprintf("%s/%s", context->backup_directory,
+ tabent->pathname);
+
+ /* Do the actual checksum validation. */
+ validate_file_checksum(context, tabent, fullpath);
+
+ /* Avoid leaking memory. */
+ pfree(fullpath);
+ }
+ }
+}
+
+/*
+ * Validate the checksum of a single file.
+ */
+static void
+validate_file_checksum(validator_context *context, manifestfile *tabent,
+ char *fullpath)
+{
+ pg_checksum_context checksum_ctx;
+ char *relpath = tabent->pathname;
+ int fd;
+ int rc;
+ uint8 buffer[READ_CHUNK_SIZE];
+ uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+ int checksumlen;
+
+ /* Open the target file. */
+ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
+ {
+ pg_validator_error(context, "could not open file \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ /* Initialize checksum context. */
+ pg_checksum_init(&checksum_ctx, tabent->checksum_type);
+
+ /* Read the file chunk by chunk, updating the checksum as we go. */
+ while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
+ pg_checksum_update(&checksum_ctx, buffer, rc);
+ if (rc < 0)
+ pg_validator_error(context, "could not read file \"%s\": %m",
+ relpath);
+
+ /* Close the file. */
+ if (close(fd) != 0)
+ {
+ pg_validator_error(context, "could not close file \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ /* If we didn't manage to read the whole file, bail out now. */
+ if (rc < 0)
+ return;
+
+ /* Get the final checksum. */
+ checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf);
+
+ /* And check it against the manifest. */
+ if (checksumlen != tabent->checksum_length)
+ pg_validator_error(context,
+ "file \"%s\" has checksum of length %d, but expected %d",
+ relpath, tabent->checksum_length, checksumlen);
+ else if (memcmp(checksumbuf, tabent->checksum_payload, checksumlen) != 0)
+ pg_validator_error(context,
+ "checksum mismatch for file \"%s\"",
+ relpath);
+}
+
+/*
+ * Print out usage information and exit.
+ */
+static void
+usage(void)
+{
+ printf(_("%s validates a backup against the backup manifest.\n\n"), progname);
+ printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
+ printf(_("Options:\n"));
+ printf(_(" -e, --exit-on-error exit immediately on error\n"));
+ printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
+ printf(_(" -m, --manifest=PATH use specified path for manifest\n"));
+ printf(_(" -s, --skip-checksums skip checksum verification\n"));
+ printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -?, --help show this help, then exit\n"));
+ printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
+}
+
+/*
+ * Report an error. Update the context to indicate that we saw an error, and
+ * exit if the context says we should.
+ */
+static void
+pg_validator_error(validator_context *context, const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(PG_LOG_ERROR, fmt, ap);
+ va_end(ap);
+
+ context->saw_any_error = true;
+ if (context->exit_on_error)
+ exit(1);
+}
+
+/*
+ * Report a fatal error and exit
+ */
+static void
+pg_validator_fatal(const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(PG_LOG_FATAL, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+/*
+ * Is the specified relative path, or some prefix of it, listed in the set
+ * of paths to ignore?
+ *
+ * Note that by "prefix" we mean a parent directory; for this purpose,
+ * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
+ */
+static bool
+should_ignore_relpath(validator_context *context, char *relpath)
+{
+ SimpleStringListCell *cell;
+
+ for (cell = context->ignore_list.head; cell != NULL; cell = cell->next)
+ {
+ char *r = relpath;
+ char *v = cell->val;
+
+ while (*v != '\0' && *r == *v)
+ ++r, ++v;
+
+ if (*v == '\0' && (*r == '\0' || *r == '/'))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Extract a NUL-terminated string from a larger buffer.
+ */
+static char *
+extractstr(char *buffer, int length)
+{
+ char *s = palloc(length + 1);
+
+ memcpy(s, buffer, length);
+ s[length] = '\0';
+
+ return s;
+}
+
+/*
+ * Find the next instance of a given character within a buffer that
+ * occurs at or after start_position. If there is none, returns -1; else
+ * returns the difference between the position at which the character was
+ * found and the start position.
+ */
+static int
+findchar(char *buffer, int size, char c, int start_position)
+{
+ int i;
+
+ for (i = start_position; i < size; ++i)
+ if (buffer[i] == c)
+ return i - start_position;
+ return -1;
+}
+
+/*
+ * Extract the next field from a line of text read from the manifest file.
+ */
+static int
+findfield(char *buffer, char *end, char **result)
+{
+ int qoffset = 1;
+ int dqcount = 0;
+ int toklen;
+ int bufpos;
+ int resultpos;
+
+ /*
+ * If this field is unquoted, we just stop at the next tab; if there's
+ * none, we stop at the end of the line. Note that if buffer == end, it
+ * just means that the last field on the line is empty.
+ */
+ if (buffer == end || *buffer != '"')
+ {
+ toklen = findchar(buffer, end - buffer, '\t', 0);
+
+ if (toklen == -1)
+ toklen = end - buffer;
+ *result = extractstr(buffer, toklen);
+ return toklen;
+ }
+
+ /*
+ * Our escaping convention is that if the field contains a tab, it must be
+ * surrounded by double-quotes and any internal double-quotes must be
+ * doubled.
+ */
+ while (1)
+ {
+ /* Where's the next double quote? */
+ qoffset += findchar(buffer, end - buffer, '"', qoffset);
+ if (qoffset == -1)
+ pg_validator_fatal("quoted field in backup manifest is not terminated");
+
+ /*
+ * If the double-quote we found is the last character on the line or
+ * if it's followed by a tab, we've reached the end of this field.
+ */
+ if (buffer + qoffset >= end || buffer[qoffset + 1] == '\t')
+ break;
+
+ /* Otherwise, the next character should be another double-quote. */
+ if (buffer[qoffset + 1] != '"')
+ {
+ pg_log_fatal("invalid quoted field in backup manifest");
+ exit(1);
+ }
+
+ /* Skip both double-quotes and go around again. */
+ qoffset += 2;
+ ++dqcount;
+ }
+
+ /*
+ * At this point, we know that qoffset is the offset, relative to buffer,
+ * of the closing double-quote, and that dqcount is the number of escaped
+ * double-quotes within the field, and that all of those escape sequences
+ * are proper. Extract and de-escape the data in the field.
+ *
+ * The amount of space needed for the result is equal to the raw token
+ * length, minus two for the double quotes at the start and end, minus one
+ * for each doubled double-quote within the token, plus one for the
+ * trailing zero byte.
+ */
+ toklen = qoffset + 1;
+ *result = palloc(toklen - dqcount - 1);
+ bufpos = 1;
+ resultpos = 0;
+ while (bufpos < qoffset)
+ {
+ (*result)[resultpos] = buffer[bufpos];
+ bufpos += (buffer[bufpos] == '"' ? 2 : 1);
+ ++resultpos;
+ }
+ (*result)[resultpos] = '\0';
+ Assert(resultpos == toklen - dqcount - 2);
+
+ return toklen;
+}
+
+/*
+ * Helper function for manifestfiles hash table.
+ */
+static uint32
+hash_string_pointer(char *s)
+{
+ unsigned char *ss = (unsigned char *) s;
+
+ return hash_bytes(ss, strlen(s));
+}
+
+/*
+ * Convert a character which represents a hexadecimal digit to an integer.
+ *
+ * Returns -1 if the character is not a hexadecimal digit.
+ */
+static int
+hexdecode_char(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+
+ return -1;
+}
+
+/*
+ * Decode a hex string into a byte string, 2 hex chars per byte.
+ *
+ * Returns false if invalid characters are encountered; otherwise true.
+ */
+static bool
+hexdecode_string(uint8 *result, char *input, int nbytes)
+{
+ int i;
+
+ for (i = 0; i < nbytes; ++i)
+ {
+ int n1 = hexdecode_char(input[i * 2]);
+ int n2 = hexdecode_char(input[i * 2 + 1]);
+
+ if (n1 < 0 || n2 < 0)
+ return false;
+ result[i] = n1 * 16 + n2;
+ }
+
+ return true;
+}
--
2.17.2 (Apple Git-113)