From 52b6e04e1e3a2535770c177ab1c0ee0baa2c35a5 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 7 Feb 2020 17:17:52 -0500 Subject: [PATCH v8 3/5] pg_validatebackup: Validate a backup against the backup manifest. Patch by me; some off-list review and testing from Mark Dilger, Davinder Singh, Tushar Ahuja, Rajkumar Raghuwanshi, and Jeevan Chalke. (I chose here to make this a separate utility; Suraj wrote a previous patch for this that made it part of pg_basebackup. Doing it this way lets us have various command line options that are specific to backup validation. I've added a few such options and we might want to add more later. I also arranged things so that checksum failures are reported last, as that is the most expensive part of validation. I believe that my version also does better error checking and reporting.) --- src/backend/replication/basebackup.c | 6 +- src/bin/Makefile | 1 + src/bin/pg_validatebackup/.gitignore | 1 + src/bin/pg_validatebackup/Makefile | 32 + src/bin/pg_validatebackup/pg_validatebackup.c | 1089 +++++++++++++++++ 5 files changed, 1126 insertions(+), 3 deletions(-) create mode 100644 src/bin/pg_validatebackup/.gitignore create mode 100644 src/bin/pg_validatebackup/Makefile create mode 100644 src/bin/pg_validatebackup/pg_validatebackup.c diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 1729931597..99e102b2a7 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -63,7 +63,7 @@ struct manifest_info pg_checksum_type checksum_type; pg_sha256_ctx manifest_ctx; uint64 manifest_size; - int still_checksumming; + bool still_checksumming; }; @@ -84,7 +84,7 @@ static void SendBackupHeader(List *tablespaces); static void InitializeManifest(manifest_info *manifest, pg_checksum_type); static void AppendStringToManifest(manifest_info *manifest, char *s); static void AddFileToManifest(manifest_info *manifest, const char *spcoid, - const char *filename, size_t size, time_t mtime, + const char *pathname, size_t size, time_t mtime, pg_checksum_context *checksum_ctx); static void SendBackupManifest(manifest_info *manifest); static char *escape_field_for_manifest(const char *s); @@ -976,7 +976,7 @@ AppendStringToManifest(manifest_info *manifest, char *s) */ static void AddFileToManifest(manifest_info *manifest, const char *spcoid, - const char *filename, size_t size, time_t mtime, + const char *pathname, size_t size, time_t mtime, pg_checksum_context *checksum_ctx) { char pathbuf[MAXPGPATH]; diff --git a/src/bin/Makefile b/src/bin/Makefile index 7f4120a34f..77bceea4fe 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -27,6 +27,7 @@ SUBDIRS = \ pg_test_fsync \ pg_test_timing \ pg_upgrade \ + pg_validatebackup \ pg_waldump \ pgbench \ psql \ diff --git a/src/bin/pg_validatebackup/.gitignore b/src/bin/pg_validatebackup/.gitignore new file mode 100644 index 0000000000..3ae1c1f03a --- /dev/null +++ b/src/bin/pg_validatebackup/.gitignore @@ -0,0 +1 @@ +/pg_validatebackup diff --git a/src/bin/pg_validatebackup/Makefile b/src/bin/pg_validatebackup/Makefile new file mode 100644 index 0000000000..aeb97d21d2 --- /dev/null +++ b/src/bin/pg_validatebackup/Makefile @@ -0,0 +1,32 @@ +# src/bin/pg_validatebackup/Makefile + +PGFILEDESC = "pg_validatebackup - validate a backup against a backup manifest" +PGAPPICON = win32 + +subdir = src/bin/pg_validatebackup +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +# We need libpq only because fe_utils does. +LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) + +OBJS = \ + $(WIN32RES) \ + pg_validatebackup.o + +all: pg_validatebackup + +pg_validatebackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + +install: all installdirs + $(INSTALL_PROGRAM) pg_validatebackup$(X) '$(DESTDIR)$(bindir)/pg_validatebackup$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/pg_validatebackup$(X)' + +clean distclean maintainer-clean: + rm -f pg_validatebackup$(X) $(OBJS) diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c new file mode 100644 index 0000000000..4f47b20855 --- /dev/null +++ b/src/bin/pg_validatebackup/pg_validatebackup.c @@ -0,0 +1,1089 @@ +/*------------------------------------------------------------------------- + * + * pg_validatebackup.c + * Validate a backup against a backup manifest. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/bin/pg_validatebackup/pg_validatebackup.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include +#include +#include + +#include "common/checksum_helper.h" +#include "common/hashfn.h" +#include "common/logging.h" +#include "fe_utils/simple_list.h" +#include "getopt_long.h" + +/* + * For efficiency, we'd like our hash table containing information about the + * manifest to start out with approximately the correct number of entries. + * There's no way to know the exact number of entries without reading the whole + * file, but we can get an estimate by dividing the file size by the estimated + * number of bytes per line. + * + * This could be off by about a factor of two in either direction, because the + * checksum algorithm has a big impact on the line lengths; e.g. a SHA512 + * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there + * might be no checksum at all. + */ +#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 + +/* + * How many bytes should we try to read from a file at once? + */ +#define READ_CHUNK_SIZE 4096 + +/* + * The first word of each line of the manifest file should be one of these + * key words. We define constants for the relevant lengths as well. + */ +#define KW_MANIFEST_VERSION "PostgreSQL-Backup-Manifest-Version" +#define KW_MANIFEST_FILE "File" +#define KW_MANIFEST_CHECKSUM "Manifest-Checksum" +#define KWL_MANIFEST_VERSION (sizeof(KW_MANIFEST_VERSION)-1) +#define KWL_MANIFEST_FILE (sizeof(KW_MANIFEST_FILE)-1) +#define KWL_MANIFEST_CHECKSUM (sizeof(KW_MANIFEST_CHECKSUM)-1) + +/* + * How many fields are there for each "File" line in the manifest? + * Currently we have: file name, file size, timestamp, checksum. + */ +#define FIELDS_PER_FILE_LINE 4 + +/* + * Each "File" line in the manifest file is parsed to produce an object + * like this. + */ +typedef struct manifestfile +{ + uint32 status; /* hash status */ + char *pathname; + size_t size; + pg_checksum_type checksum_type; + int checksum_length; + uint8 *checksum_payload; + bool matched; + bool bad; +} manifestfile; + +/* + * Define a hash table which we can use to store information about the files + * mentioned in the backup manifest. + */ +static uint32 hash_string_pointer(char *s); +#define SH_PREFIX manifestfiles +#define SH_ELEMENT_TYPE manifestfile +#define SH_KEY_TYPE char * +#define SH_KEY pathname +#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +/* + * All of the context information we need while checking a backup manifest. + */ +typedef struct validator_context +{ + manifestfiles_hash *ht; + char *backup_directory; + SimpleStringList ignore_list; + bool exit_on_error; + bool saw_any_error; +} validator_context; + +static manifestfiles_hash * parse_manifest_file(char *manifest_path); +static void parse_file_line_from_manifest(manifestfile *f, char *rest, + int restlen); +static void validate_backup_directory(validator_context *context, + char *relpath, char *fullpath); +static void validate_backup_file(validator_context *context, + char *relpath, char *fullpath); +static void report_extra_backup_files(validator_context *context); +static void validate_backup_checksums(validator_context *context); +static void validate_file_checksum(validator_context *context, + manifestfile *tabent, char *pathname); + +static void pg_validator_error(validator_context *context, + const char *pg_restrict fmt,...) + pg_attribute_printf(2, 3); +static bool should_ignore_relpath(validator_context *context, char *relpath); + +static char *extractstr(char *buffer, int length); +static int findchar(char *buffer, int size, char c, int start_position); +static int findfield(char *buffer, char *end, char **result); +static int hexdecode_char(char c); +static bool hexdecode_string(uint8 *result, char *input, int nbytes); +static void usage(void); + +static const char *progname; + +/* + * Main entry point. + */ +int +main(int argc, char **argv) +{ + static struct option long_options[] = { + {"exit-on-error", no_argument, NULL, 'e'}, + {"ignore", required_argument, NULL, 'i'}, + {"manifest-path", required_argument, NULL, 'm'}, + {"quiet", no_argument, NULL, 'q'}, + {"skip-checksums", no_argument, NULL, 's'}, + {NULL, 0, NULL, 0} + }; + + int c; + validator_context context; + char *manifest_path = NULL; + bool quiet = false; + bool skip_checksums = false; + + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup")); + progname = get_progname(argv[0]); + + memset(&context, 0, sizeof(context)); + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + usage(); + exit(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_validatebackup (PostgreSQL) " PG_VERSION); + exit(0); + } + } + + /* Always ignore backup_manifest file and pg_wal directory. */ + simple_string_list_append(&context.ignore_list, "backup_manifest"); + simple_string_list_append(&context.ignore_list, "pg_wal"); + + while ((c = getopt_long(argc, argv, "ei:m:qs", long_options, NULL)) != -1) + { + switch (c) + { + case 'e': + context.exit_on_error = true; + break; + case 'i': + simple_string_list_append(&context.ignore_list, optarg); + break; + case 'm': + manifest_path = optarg; + break; + case 'q': + quiet = true; + break; + case 's': + skip_checksums = true; + break; + default: + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + } + + /* Get backup directory name */ + if (optind >= argc) + { + pg_log_fatal("no backup directory specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + context.backup_directory = argv[optind++]; + + /* Complain if any arguments remain */ + if (optind < argc) + { + pg_log_fatal("too many command-line arguments (first is \"%s\")", + argv[optind]); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + + /* By default, look for the manifest in the backup directory. */ + if (manifest_path == NULL) + manifest_path = psprintf("%s/backup_manifest", + context.backup_directory); + + /* + * Try to read the manifest. We treat any errors encountered while parsing + * the manifest as fatal; there doesn't seem to be much point in trying to + * validate the backup directory against a corrupted manifest. + */ + context.ht = parse_manifest_file(manifest_path); + + /* + * Now scan the files in the backup directory. At this stage, we verify + * that every file on disk is present in the manifest and that the sizes + * match. We also set the "matched" flag on every manifest entry that + * corresponds to a file on disk. + */ + validate_backup_directory(&context, NULL, context.backup_directory); + + /* + * The "matched" flag should now be set on every entry in the hash table. + * Any entries for which the bit is not set are files mentioned in the + * manifest that don't exist on disk. + */ + report_extra_backup_files(&context); + + /* + * Finally, do the expensive work of verifying file checksums, unless we + * were told to skip it. + */ + if (!skip_checksums) + validate_backup_checksums(&context); + + /* + * If everything looks OK, tell the user this, unless we were asked to + * work quietly. + */ + if (!context.saw_any_error && !quiet) + pg_log_info("backup successfully verified"); + + exit(context.saw_any_error ? 1 : 0); +} + +/* + * Parse a manifest file and construct a hash table with information about + * all the files it mentions. + */ +static manifestfiles_hash * +parse_manifest_file(char *manifest_path) +{ + int fd; + struct stat statbuf; + off_t estimate; + off_t bytes_read = 0; + off_t bytes_consumed = 0; + uint32 initial_size; + manifestfiles_hash *ht; + char *buffer; + uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH]; + uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH]; + int buffer_position = 0; + int buffer_size = 0; + int buffer_maxsize = 2 * READ_CHUNK_SIZE; + int line_number = 0; + bool saw_manifest_checksum_line = false; + pg_sha256_ctx manifest_ctx; + + /* Prepare to compute a checksum of the manifest itself. */ + pg_sha256_init(&manifest_ctx); + + /* Open the manifest file. */ + if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0) + { + pg_log_fatal("could not open file \"%s\": %m", manifest_path); + exit(1); + } + + /* Figure out how big the manifest is. */ + if (fstat(fd, &statbuf) != 0) + { + pg_log_fatal("could not stat file \"%s\": %m", manifest_path); + exit(1); + } + + /* Guess how large to make the hash table based on the manifest size. */ + estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE; + initial_size = Min(PG_UINT32_MAX, Max(estimate, 256)); + + /* Create the hash table. */ + ht = manifestfiles_create(initial_size, NULL); + + /* Initialize our read buffer. */ + buffer = pg_malloc(buffer_maxsize); + + /* + * Loop until we've read it all. + * + * The file size shouldn't be changing, so it seems fine to just error out + * if the final length is different from what stat() told us. + */ + while (bytes_consumed < statbuf.st_size) + { + int line_length; + int first_field_length; + char *rest; + int restlen; + + /* Find next newline if any. */ + line_length = findchar(buffer, buffer_size, '\n', buffer_position); + + /* If no newline was found, we need to read more data and try again. */ + if (line_length == -1) + { + size_t bytes_to_read; + int rc; + + bytes_to_read = Min(statbuf.st_size - bytes_read, READ_CHUNK_SIZE); + if (bytes_to_read == 0) + { + pg_log_fatal("manifest file line not terminated by newline"); + exit(1); + } + if (bytes_to_read + READ_CHUNK_SIZE > buffer_maxsize) + { + buffer_maxsize += READ_CHUNK_SIZE; + buffer = pg_realloc(buffer, buffer_maxsize); + Assert(bytes_to_read + READ_CHUNK_SIZE <= buffer_maxsize); + } + rc = read(fd, buffer + buffer_size, bytes_to_read); + if (rc != bytes_to_read) + { + if (rc < 0) + pg_log_fatal("could not read file \"%s\": %m", + manifest_path); + else + pg_log_fatal("could not read file \"%s\": read %d of %zu", + manifest_path, rc, bytes_to_read); + exit(1); + } + buffer_size += rc; + bytes_read += rc; + continue; + } + + /* Increment line number. */ + ++line_number; + + /* The manifest checksum should be the last thing in the file. */ + if (saw_manifest_checksum_line) + { + pg_log_fatal("unexpected data follows manifest checksum"); + exit(1); + } + + /* Find first field on line, and remaining line contents. */ + first_field_length = + findchar(buffer, buffer_size, '\t', buffer_position); + rest = buffer + buffer_position + first_field_length + 1; + restlen = line_length - (first_field_length + 1); + + /* + * Check the first word of the line to see what kind of line it is. + */ + if (first_field_length == KWL_MANIFEST_VERSION && + memcmp(buffer + buffer_position, KW_MANIFEST_VERSION, + KWL_MANIFEST_VERSION) == 0) + { + if (line_number != 1) + { + pg_log_fatal("manifest file version should only be specified at line 1"); + exit(1); + } + else + { + char *line = buffer + buffer_position; + char *version; + + version = extractstr(line + first_field_length + 1, + line_length - (first_field_length + 1)); + if (strcmp(version, "1") != 0) + { + pg_log_fatal("unrecognized manifest version: \"%s\"", + version); + exit(1); + } + } + } + else if (first_field_length == KWL_MANIFEST_FILE && + memcmp(buffer + buffer_position, KW_MANIFEST_FILE, + KWL_MANIFEST_FILE) == 0) + { + manifestfile f; + manifestfile *tabent; + bool found; + + /* Parse this line. */ + parse_file_line_from_manifest(&f, rest, restlen); + + /* Make a new entry in the hash table for it. */ + tabent = manifestfiles_insert(ht, f.pathname, &found); + if (found) + { + pg_log_fatal("duplicate pathname in backup manifest: \"%s\"", + f.pathname); + exit(1); + } + + /* Copy in all the relevant details. */ + tabent->size = f.size; + tabent->checksum_type = f.checksum_type; + tabent->checksum_length = f.checksum_length; + tabent->checksum_payload = f.checksum_payload; + tabent->matched = false; + tabent->bad = false; + } + else if (first_field_length == KWL_MANIFEST_CHECKSUM && + memcmp(buffer + buffer_position, KW_MANIFEST_CHECKSUM, + KWL_MANIFEST_CHECKSUM) == 0) + { + saw_manifest_checksum_line = true; + if (restlen != PG_SHA256_DIGEST_STRING_LENGTH - 1) + { + pg_log_fatal("manifest file checksum has unexpected length: %d", + restlen); + exit(1); + } + if (!hexdecode_string(manifest_checksum_expected, rest, + PG_SHA256_DIGEST_LENGTH)) + { + pg_log_fatal("invalid manifest checksum: \"%s\"", + extractstr(rest, restlen)); + exit(1); + } + } + else if (first_field_length == -1) + { + pg_log_fatal("manifest file keyword not terminated by tab"); + exit(1); + } + else + { + char *kw; + + kw = extractstr(buffer + buffer_position, first_field_length); + pg_log_fatal("unrecognized manifest file keyword: \"%s\"", kw); + exit(1); + } + + /* Update manifest checksum, if needed. */ + if (!saw_manifest_checksum_line) + pg_sha256_update(&manifest_ctx, (uint8 *) buffer + buffer_position, + line_length + 1); + + /* Advance buffer position over the data we just read. */ + buffer_position += line_length + 1; + + /* Also mark these bytes as consumed so we know when to stop. */ + bytes_consumed += line_length + 1; + + /* + * We don't want to incur the expensive of using memmove() to discard + * data after every line, because the lines are short compared to the + * chunk size -- but we must do it at least now and then, or we'll + * have to keep growing the buffer. + */ + if (buffer_position >= READ_CHUNK_SIZE) + { + int leftover_bytes = buffer_size - buffer_position; + + if (leftover_bytes > 0) + memmove(buffer, buffer + buffer_position, leftover_bytes); + buffer_size -= buffer_position; + buffer_position = 0; + } + } + + /* Checksum verification. */ + if (!saw_manifest_checksum_line) + pg_log_fatal("manifest has no checksum"); + pg_sha256_final(&manifest_ctx, manifest_checksum_actual); + if (memcmp(manifest_checksum_actual, manifest_checksum_expected, + PG_SHA256_DIGEST_LENGTH) != 0) + { + pg_log_fatal("manifest checksum does not match"); + exit(1); + } + + /* OK, we're done with the manifest file. */ + close(fd); + + /* Return the hash table we constructed. */ + return ht; +} + +/* + * The caller passes the remainder of the line, excluding the initial "File\t" + * portion. + */ +static void +parse_file_line_from_manifest(manifestfile *f, char *rest, int restlen) +{ + char *end = rest + restlen; + char *field[FIELDS_PER_FILE_LINE]; + unsigned long filesize; + char *ep; + pg_checksum_type checksum_type; + int raw_checksum_length = 0; + char *raw_checksum_payload = NULL; + int checksum_length; + uint8 *checksum_payload; + int i; + char *s; + + /* Split the line into fields. */ + for (i = 0; i < FIELDS_PER_FILE_LINE; ++i) + { + int toklen; + + toklen = findfield(rest, end, &field[i]); + if (rest + toklen >= end && i + 1 < FIELDS_PER_FILE_LINE) + { + pg_log_fatal("manifest file line has too few fields"); + exit(1); + } + rest += toklen + 1; + } + + /* We expect to have used the entire line. */ + if (rest < end) + { + pg_log_fatal("manifest file line has too many fields"); + exit(1); + } + + /* Parse the size. */ + filesize = strtoul(field[1], &ep, 10); + if (*ep) + { + pg_log_fatal("manifest file size for file \"%s\" is not a number", + field[0]); + exit(1); + } + + /* Parse the checksum type. */ + for (s = field[3]; s[0] != '\0' && s[0] != ':'; ++s) + ; + if (*s) + { + raw_checksum_payload = s + 1; + raw_checksum_length = strlen(raw_checksum_payload); + *s = '\0'; + } + if (!pg_checksum_parse_type(field[3], &checksum_type)) + { + pg_log_fatal("unrecognized checksum algorithm for file \"%s\": \"%s\"", + field[0], field[3]); + exit(1); + } + + /* Decode the checksum payload. */ + checksum_length = raw_checksum_length / 2; + if (checksum_length == 0) + checksum_payload = NULL; + else + { + checksum_payload = palloc(checksum_length); + if (!hexdecode_string(checksum_payload, raw_checksum_payload, + checksum_length)) + { + pg_log_fatal("invalid checksum for file \"%s\": \"%s\"", + field[0], raw_checksum_payload); + exit(1); + } + } + + /* Fill the output struct. */ + f->pathname = field[0]; + f->size = filesize; + f->checksum_type = checksum_type; + f->checksum_length = checksum_length; + f->checksum_payload = checksum_payload; +} + +/* + * Validate one directory. + * + * 'relpath' is NULL if we are to validate the top-level backup directory, + * and otherwise the relative path to the directory that is to be validated. + * + * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual + * filesystem path at which it can be found. + */ +static void +validate_backup_directory(validator_context *context, char *relpath, + char *fullpath) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir(fullpath); + if (dir == NULL) + { + pg_validator_error(context, + "could not open directory \"%s\": %m", fullpath); + + /* + * Suppress further errors related to this path name and anything + * underneath it. + */ + simple_string_list_append(&context->ignore_list, relpath); + + return; + } + + while (errno = 0, (dirent = readdir(dir)) != NULL) + { + char *filename = dirent->d_name; + char *newfullpath = psprintf("%s/%s", fullpath, filename); + char *newrelpath; + + /* Skip "." and ".." */ + if (filename[0] == '.' && (filename[1] == '\0' + || strcmp(filename, "..") == 0)) + continue; + + if (relpath == NULL) + newrelpath = pstrdup(filename); + else + newrelpath = psprintf("%s/%s", relpath, filename); + + if (!should_ignore_relpath(context, newrelpath)) + validate_backup_file(context, newrelpath, newfullpath); + + pfree(newfullpath); + pfree(newrelpath); + } + + if (closedir(dir)) + { + pg_validator_error(context, + "could not close directory \"%s\": %m", fullpath); + return; + } +} + +/* + * Validate one file (which might actually be a directory or a symlink). + * + * The arguments to this function have the same meaning as the arguments to + * validate_backup_directory. + */ +static void +validate_backup_file(validator_context *context, char *relpath, char *fullpath) +{ + struct stat sb; + manifestfile *tabent; + + if (stat(fullpath, &sb) != 0) + { + pg_validator_error(context, + "could not stat file or directory \"%s\": %m", + relpath); + + /* + * Suppress further errors related to this path name and, if it's a + * directory, anything underneath it. + */ + simple_string_list_append(&context->ignore_list, relpath); + + return; + } + + /* If it's a directory, just recurse. */ + if (S_ISDIR(sb.st_mode)) + { + validate_backup_directory(context, relpath, fullpath); + return; + } + + /* If it's not a directory, it should be a plain file. */ + if (!S_ISREG(sb.st_mode)) + { + pg_validator_error(context, + "\"%s\" is not a file or directory", + relpath); + return; + } + + /* Check whether there's an entry in the manifest hash. */ + tabent = manifestfiles_lookup(context->ht, relpath); + if (tabent == NULL) + { + pg_validator_error(context, + "\"%s\" is present on disk but not in the manifest", + relpath); + return; + } + + /* Flag this entry as having been encountered in the filesystem. */ + tabent->matched = true; + + /* Check that the size matches. */ + if (tabent->size != sb.st_size) + { + pg_validator_error(context, + "\"%s\" has size %zu on disk but size %zu in the manifest", + relpath, (size_t) sb.st_size, tabent->size); + tabent->bad = true; + } + + /* + * We don't validate checksums at this stage. We first finish validating + * that we have the expected set of files with the expected sizes, and + * only afterwards verify the checksums. That's because computing + * checksums may take a while, and we'd like to report more obvious + * problems quickly. + */ +} + +/* + * Scan the hash table for entries where the 'matched' flag is not set; report + * that such files are present in the manifest but not on disk. + */ +static void +report_extra_backup_files(validator_context *context) +{ + manifestfiles_iterator it; + manifestfile *tabent; + + manifestfiles_start_iterate(context->ht, &it); + while ((tabent = manifestfiles_iterate(context->ht, &it)) != NULL) + if (!tabent->matched && + !should_ignore_relpath(context, tabent->pathname)) + pg_validator_error(context, + "\"%s\" is present in the manifest but not on disk", + tabent->pathname); +} + +/* + * Validate checksums for hash table entries that are otherwise unproblematic. + * If we've already reported some problem related to a hash table entry, or + * if it has no checksum, just skip it. + */ +static void +validate_backup_checksums(validator_context *context) +{ + manifestfiles_iterator it; + manifestfile *tabent; + + manifestfiles_start_iterate(context->ht, &it); + while ((tabent = manifestfiles_iterate(context->ht, &it)) != NULL) + { + if (tabent->matched && !tabent->bad && + tabent->checksum_type != CHECKSUM_TYPE_NONE && + !should_ignore_relpath(context, tabent->pathname)) + { + char *fullpath; + + /* Compute the full pathname to the target file. */ + fullpath = psprintf("%s/%s", context->backup_directory, + tabent->pathname); + + /* Do the actual checksum validation. */ + validate_file_checksum(context, tabent, fullpath); + + /* Avoid leaking memory. */ + pfree(fullpath); + } + } +} + +/* + * Validate the checksum of a single file. + */ +static void +validate_file_checksum(validator_context *context, manifestfile *tabent, + char *fullpath) +{ + pg_checksum_context checksum_ctx; + char *relpath = tabent->pathname; + int fd; + int rc; + uint8 buffer[READ_CHUNK_SIZE]; + uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; + int checksumlen; + + /* Open the target file. */ + if ((fd = open(fullpath, O_RDONLY, 0)) < 0) + { + pg_validator_error(context, "could not open file \"%s\": %m", + relpath); + pfree(fullpath); + return; + } + + /* Initialize checksum context. */ + pg_checksum_init(&checksum_ctx, tabent->checksum_type); + + /* Read the file chunk by chunk, updating the checksum as we go. */ + while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) + pg_checksum_update(&checksum_ctx, buffer, rc); + if (rc < 0) + pg_validator_error(context, "could not read file \"%s\": %m", + relpath); + + /* Close the file. */ + if (close(fd) != 0) + { + pg_validator_error(context, "could not close file \"%s\": %m", + relpath); + pfree(fullpath); + return; + } + + /* If we didn't manage to read the whole file, bail out now. */ + if (rc < 0) + return; + + /* Get the final checksum. */ + checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf); + + /* And check it against the manifest. */ + if (checksumlen != tabent->checksum_length) + pg_validator_error(context, + "file \"%s\" has checksum of length %d, but expected %d", + relpath, tabent->checksum_length, checksumlen); + else if (memcmp(checksumbuf, tabent->checksum_payload, checksumlen) != 0) + pg_validator_error(context, + "checksum mismatch for file \"%s\"", + relpath); +} + +/* + * Print out usage information and exit. + */ +static void +usage(void) +{ + printf(_("%s validates a backup against the backup manifest.\n\n"), progname); + printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname); + printf(_("Options:\n")); + printf(_(" -e, --exit-on-error exit immediately on error\n")); + printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n")); + printf(_(" -m, --manifest=PATH use specified path for manifest\n")); + printf(_(" -s, --skip-checksums skip checksum verification\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); + printf(_("\nReport bugs to .\n")); +} + +/* + * Report an error. Update the context to indicate that we saw an error, and + * exit if the context says we should. + */ +static void +pg_validator_error(validator_context *context, const char *pg_restrict fmt,...) +{ + va_list ap; + + va_start(ap, fmt); + pg_log_generic_v(PG_LOG_ERROR, fmt, ap); + va_end(ap); + + context->saw_any_error = true; + if (context->exit_on_error) + exit(1); +} + +/* + * Is the specified relative path, or some prefix of it, listed in the set + * of paths to ignore? + * + * Note that by "prefix" we mean a parent directory; for this purpose, + * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc". + */ +static bool +should_ignore_relpath(validator_context *context, char *relpath) +{ + SimpleStringListCell *cell; + + for (cell = context->ignore_list.head; cell != NULL; cell = cell->next) + { + char *r = relpath; + char *v = cell->val; + + while (*v != '\0' && *r == *v) + ++r, ++v; + + if (*v == '\0' && (*r == '\0' || *r == '/')) + return true; + } + + return false; +} + +/* + * Extract a NUL-terminated string from a larger buffer. + */ +static char * +extractstr(char *buffer, int length) +{ + char *s = palloc(length + 1); + + memcpy(s, buffer, length); + s[length] = '\0'; + + return s; +} + +/* + * Find the next instance of a given character within a buffer that + * occurs at or after start_position. If there is none, returns -1; else + * returns the difference between the position at which the character was + * found and the start position. + */ +static int +findchar(char *buffer, int size, char c, int start_position) +{ + int i; + + for (i = start_position; i < size; ++i) + if (buffer[i] == c) + return i - start_position; + return -1; +} + +/* + * Extract the next field from a line of text read from the manifest file. + */ +static int +findfield(char *buffer, char *end, char **result) +{ + int qoffset = 1; + int dqcount = 0; + int toklen; + int bufpos; + int resultpos; + + /* + * If this field is unquoted, we just stop at the next tab; if there's + * none, we stop at the end of the line. Note that if buffer == end, it + * just means that the last field on the line is empty. + */ + if (buffer == end || *buffer != '"') + { + toklen = findchar(buffer, end - buffer, '\t', 0); + + if (toklen == -1) + toklen = end - buffer; + *result = extractstr(buffer, toklen); + return toklen; + } + + /* + * Our escaping convention is that if the field contains a tab, it must be + * surrounded by double-quotes and any internal double-quotes must be + * doubled. + */ + while (1) + { + /* Where's the next double quote? */ + qoffset += findchar(buffer, end - buffer, '"', qoffset); + if (qoffset == -1) + { + pg_log_fatal("quoted field in backup manifest is not terminated"); + exit(1); + } + + /* + * If the double-quote we found is the last character on the line or + * if it's followed by a tab, we've reached the end of this field. + */ + if (buffer + qoffset >= end || buffer[qoffset + 1] == '\t') + break; + + /* Otherwise, the next character should be another double-quote. */ + if (buffer[qoffset + 1] != '"') + { + pg_log_fatal("invalid quoted field in backup manifest"); + exit(1); + } + + /* Skip both double-quotes and go around again. */ + qoffset += 2; + ++dqcount; + } + + /* + * At this point, we know that qoffset is the offset, relative to buffer, + * of the closing double-quote, and that dqcount is the number of escaped + * double-quotes within the field, and that all of those escape sequences + * are proper. Extract and de-escape the data in the field. + * + * The amount of space needed for the result is equal to the raw token + * length, minus two for the double quotes at the start and end, minus one + * for each doubled double-quote within the token, plus one for the + * trailing zero byte. + */ + toklen = qoffset + 1; + *result = palloc(toklen - dqcount - 1); + bufpos = 1; + resultpos = 0; + while (bufpos < qoffset) + { + (*result)[resultpos] = buffer[bufpos]; + bufpos += (buffer[bufpos] == '"' ? 2 : 1); + ++resultpos; + } + (*result)[resultpos] = '\0'; + Assert(resultpos == toklen - dqcount - 2); + + return toklen; +} + +/* + * Helper function for manifestfiles hash table. + */ +static uint32 +hash_string_pointer(char *s) +{ + unsigned char *ss = (unsigned char *) s; + + return hash_bytes(ss, strlen(s)); +} + +/* + * Convert a character which represents a hexadecimal digit to an integer. + * + * Returns -1 if the character is not a hexadecimal digit. + */ +static int +hexdecode_char(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -1; +} + +/* + * Decode a hex string into a byte string, 2 hex chars per byte. + * + * Returns false if invalid characters are encountered; otherwise true. + */ +static bool +hexdecode_string(uint8 *result, char *input, int nbytes) +{ + int i; + + for (i = 0; i < nbytes; ++i) + { + int n1 = hexdecode_char(input[i * 2]); + int n2 = hexdecode_char(input[i * 2 + 1]); + + if (n1 < 0 || n2 < 0) + return false; + result[i] = n1 * 16 + n2; + } + + return true; +} -- 2.17.2 (Apple Git-113)