From 7e63c0ed8ffeb669759fa760012a511c2a5e647f Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 9 Mar 2020 17:59:38 -0400 Subject: [PATCH v10 2/4] Generate backup manifests for base backups. A manifest is a JSON document which includes the file name, size, last modification time, and a checksum for each file backed up, as well as a checksum for the manifest itself. By default, we use CRC-32C for the checksum algorithm, because we are trying to detect corruption and user error, not foil an adversary. However, pg_basebackup and the server-side BASE_BACKUP command now have options to select the checksum algorithm, so users wanting a cryptographic hash function can select SHA-224, SHA-256, SHA-384, or SHA-512; and users not wanting any checksums at all can disable them. Using a cryptographic hash function in place of CRC-32C consumes significantly more CPU cycles, which may slow down backups in some cases. Robert Haas with help from Rushabh Lathia and Suraj Kharage. --- src/backend/access/transam/xlog.c | 3 +- src/backend/replication/basebackup.c | 363 +++++++++++++++++++++++-- src/backend/replication/repl_gram.y | 6 + src/backend/replication/repl_scanner.l | 1 + src/backend/replication/walsender.c | 30 ++ src/bin/pg_basebackup/pg_basebackup.c | 130 ++++++++- src/include/replication/basebackup.h | 7 +- src/include/replication/walsender.h | 1 + 8 files changed, 513 insertions(+), 28 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b0e953f894..3f9908427f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10551,7 +10551,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ti->oid = pstrdup(de->d_name); ti->path = pstrdup(buflinkpath.data); ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = infotbssize ? sendTablespace(fullpath, true) : -1; + ti->size = infotbssize ? + sendTablespace(fullpath, ti->oid, true, NULL) : -1; if (tablespaces) *tablespaces = lappend(*tablespaces, ti); diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index f66cbc2428..77f15f6233 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -18,6 +18,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" +#include "common/checksum_helper.h" #include "common/file_perm.h" #include "commands/progress.h" #include "lib/stringinfo.h" @@ -32,6 +33,7 @@ #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" +#include "storage/buffile.h" #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/dsm_impl.h" @@ -39,8 +41,10 @@ #include "storage/ipc.h" #include "storage/reinit.h" #include "utils/builtins.h" +#include "utils/json.h" #include "utils/ps_status.h" #include "utils/relcache.h" +#include "utils/resowner.h" #include "utils/timestamp.h" typedef struct @@ -52,20 +56,40 @@ typedef struct bool includewal; uint32 maxrate; bool sendtblspcmapfile; + pg_checksum_type checksum_type; } basebackup_options; +struct manifest_info +{ + BufFile *buffile; + pg_checksum_type checksum_type; + pg_sha256_ctx manifest_ctx; + uint64 manifest_size; + bool first_file; + bool still_checksumming; +}; + static int64 sendDir(const char *path, int basepathlen, bool sizeonly, - List *tablespaces, bool sendtblspclinks); + List *tablespaces, bool sendtblspclinks, + manifest_info *manifest, const char *spcoid); static bool sendFile(const char *readfilename, const char *tarfilename, - struct stat *statbuf, bool missing_ok, Oid dboid); -static void sendFileWithContent(const char *filename, const char *content); + struct stat *statbuf, bool missing_ok, Oid dboid, + manifest_info *manifest, const char *spcoid); +static void sendFileWithContent(const char *filename, const char *content, + manifest_info *manifest); static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly); static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); +static void InitializeManifest(manifest_info *manifest, pg_checksum_type); +static void AppendStringToManifest(manifest_info *manifest, char *s); +static void AddFileToManifest(manifest_info *manifest, const char *spcoid, + const char *pathname, size_t size, time_t mtime, + pg_checksum_context *checksum_ctx); +static void SendBackupManifest(manifest_info *manifest); static void perform_base_backup(basebackup_options *opt); static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); @@ -102,6 +126,16 @@ do { \ (errmsg("could not read from file \"%s\"", filename))); \ } while (0) +/* + * Convenience macro for appending data to the backup manifest. + */ +#define AppendToManifest(manifest, ...) \ + { \ + char *_manifest_s = psprintf(__VA_ARGS__); \ + AppendStringToManifest(manifest, _manifest_s); \ + pfree(_manifest_s); \ + } + /* The actual number of bytes, transfer of which may cause sleep. */ static uint64 throttling_sample; @@ -251,6 +285,7 @@ perform_base_backup(basebackup_options *opt) TimeLineID endtli; StringInfo labelfile; StringInfo tblspc_map_file = NULL; + manifest_info manifest; int datadirpathlen; List *tablespaces = NIL; @@ -258,12 +293,17 @@ perform_base_backup(basebackup_options *opt) backup_streamed = 0; pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid); + /* we're going to use a BufFile, so we need a ResourceOwner */ + Assert(CurrentResourceOwner == NULL); + CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup"); + datadirpathlen = strlen(DataDir); backup_started_in_recovery = RecoveryInProgress(); labelfile = makeStringInfo(); tblspc_map_file = makeStringInfo(); + InitializeManifest(&manifest, opt->checksum_type); total_checksum_failures = 0; @@ -301,7 +341,10 @@ perform_base_backup(basebackup_options *opt) /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1; + if (opt->progress) + ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL); + else + ti->size = -1; tablespaces = lappend(tablespaces, ti); /* @@ -380,7 +423,8 @@ perform_base_backup(basebackup_options *opt) struct stat statbuf; /* In the main tar, include the backup_label first... */ - sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data); + sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data, + &manifest); /* * Send tablespace_map file if required and then the bulk of @@ -388,11 +432,14 @@ perform_base_backup(basebackup_options *opt) */ if (tblspc_map_file && opt->sendtblspcmapfile) { - sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data); - sendDir(".", 1, false, tablespaces, false); + sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data, + &manifest); + sendDir(".", 1, false, tablespaces, false, + &manifest, NULL); } else - sendDir(".", 1, false, tablespaces, true); + sendDir(".", 1, false, tablespaces, true, + &manifest, NULL); /* ... and pg_control after everything else. */ if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) @@ -400,10 +447,11 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", XLOG_CONTROL_FILE))); - sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid); + sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, + false, InvalidOid, &manifest, NULL); } else - sendTablespace(ti->path, false); + sendTablespace(ti->path, ti->oid, false, &manifest); /* * If we're including WAL, and this is the main data directory we @@ -632,7 +680,7 @@ perform_base_backup(basebackup_options *opt) * complete segment. */ StatusFilePath(pathbuf, walFileName, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", &manifest); } /* @@ -655,16 +703,20 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathbuf))); - sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid); + sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid, + &manifest, NULL); /* unconditionally mark file as archived */ StatusFilePath(pathbuf, fname, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", &manifest); } /* Send CopyDone message for the last tar file */ pq_putemptymessage('c'); } + + SendBackupManifest(&manifest); + SendXlogRecPtrResult(endptr, endtli); if (total_checksum_failures) @@ -678,6 +730,9 @@ perform_base_backup(basebackup_options *opt) errmsg("checksum verification failure during base backup"))); } + /* clean up the resource owner we created */ + WalSndResourceCleanup(true); + pgstat_progress_end_command(); } @@ -709,8 +764,11 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_maxrate = false; bool o_tablespace_map = false; bool o_noverify_checksums = false; + bool o_manifest_checksums = false; MemSet(opt, 0, sizeof(*opt)); + opt->checksum_type = CHECKSUM_TYPE_CRC32C; + foreach(lopt, options) { DefElem *defel = (DefElem *) lfirst(lopt); @@ -797,6 +855,21 @@ parse_basebackup_options(List *options, basebackup_options *opt) noverify_checksums = true; o_noverify_checksums = true; } + else if (strcmp(defel->defname, "manifest_checksums") == 0) + { + char *optval = strVal(defel->arg); + + if (o_manifest_checksums) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + if (!pg_checksum_parse_type(optval, &opt->checksum_type)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized checksum algorithm: \"%s\"", + optval))); + o_manifest_checksums = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -918,6 +991,228 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } +/* + * Initialize state so that we can construct a backup manifest. + * + * NB: Although the checksum type for the data files is configurable, the + * checksum for the manifest itself always uses SHA-256. See comments in + * SendBackupManifest. + */ +static void +InitializeManifest(manifest_info *manifest, pg_checksum_type checksum_type) +{ + manifest->buffile = BufFileCreateTemp(false); + manifest->checksum_type = checksum_type; + pg_sha256_init(&manifest->manifest_ctx); + manifest->manifest_size = UINT64CONST(0); + manifest->first_file = true; + manifest->still_checksumming = true; + + AppendToManifest(manifest, + "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n" + "\"Files\": ["); +} + +/* + * Append a cstring to the manifest. + */ +static void +AppendStringToManifest(manifest_info *manifest, char *s) +{ + int len = strlen(s); + size_t written; + + if (manifest->still_checksumming) + pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len); + written = BufFileWrite(manifest->buffile, s, len); + if (written != len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to temporary file: %m"))); + manifest->manifest_size += len; +} + +/* + * Add an entry to the backup manifest for a file. + */ +static void +AddFileToManifest(manifest_info *manifest, const char *spcoid, + const char *pathname, size_t size, time_t mtime, + pg_checksum_context *checksum_ctx) +{ + char pathbuf[MAXPGPATH]; + int pathlen; + StringInfoData buf; + + /* + * If this file is part of a tablespace, the pathname passed to this + * function will be relative to the tar file that contains it. We want the + * pathname relative to the data directory (ignoring the intermediate + * symlink traversal). + */ + if (spcoid != NULL) + { + snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid, + pathname); + pathname = pathbuf; + } + + /* + * Each file's entry need to be separated from any entry that follows + * by a comma, but there's no comma before the first one or after the + * last one. To make that work, adding a file to the manifest starts + * by terminating the most recently added line, with a comma if + * appropriate, but does not terminate the line inserted for this file. + */ + initStringInfo(&buf); + if (manifest->first_file) + { + appendStringInfoString(&buf, "\n"); + manifest->first_file = false; + } + else + appendStringInfoString(&buf, ",\n"); + + /* + * Write the relative pathname to this file out to the manifest. The + * manifest is always stored in UTF-8, so we have to encode paths that + * are not valid in that encoding. + */ + pathlen = strlen(pathname); + if (pg_verify_mbstr(PG_UTF8, pathname, pathlen, true)) + { + appendStringInfoString(&buf, "{ \"Path\": "); + escape_json(&buf, pathname); + appendStringInfoString(&buf, ", "); + } + else + { + appendStringInfoString(&buf, "{ \"Encoded-Path\": \""); + enlargeStringInfo(&buf, 2 * pathlen); + buf.len += hex_encode((char *) pathname, pathlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\", "); + } + + appendStringInfo(&buf, "\"Size\": %zu, ", size); + + /* + * Convert last modification time to a string and append it to the + * manifest. Since it's not clear what time zone to use and since time + * zone definitions can change, possibly causing confusion, use GMT always. + */ + appendStringInfoString(&buf, "\"Last-Modified\": \""); + enlargeStringInfo(&buf, 128); + buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z", + pg_gmtime(&mtime)); + appendStringInfoString(&buf, "\""); + + /* Add checksum information. */ + if (checksum_ctx->type != CHECKSUM_TYPE_NONE) + { + uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; + int checksumlen; + + checksumlen = pg_checksum_final(checksum_ctx, checksumbuf); + + appendStringInfo(&buf, + ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"", + pg_checksum_type_name(checksum_ctx->type)); + enlargeStringInfo(&buf, 2 * checksumlen); + buf.len += hex_encode((char *) checksumbuf, checksumlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\""); + } + + /* Close out the object. */ + appendStringInfoString(&buf, " }"); + + /* OK, add it to the manifest. */ + AppendStringToManifest(manifest, buf.data); + + /* Avoid leaking memory. */ + pfree(buf.data); +} + +/* + * Finalize the backup manifest, and send it to the client. + */ +static void +SendBackupManifest(manifest_info *manifest) +{ + StringInfoData protobuf; + uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH]; + char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; + size_t manifest_bytes_done = 0; + + /* Terminate the list of files. */ + AppendStringToManifest(manifest, "],\n"); + + /* + * Append manifest checksum, so that the problems with the manifest itself + * can be detected. + * + * We always use SHA-256 for this, regardless of what algorithm is chosen + * for checksumming the files. If we ever want to make the checksum + * algorithm used for the manifest file variable, the client will need a + * way to figure out which algorithm to use as close to the beginning of + * the manifest file as possible, to avoid having to read the whole thing + * twice. + */ + manifest->still_checksumming = false; + pg_sha256_final(&manifest->manifest_ctx, checksumbuf); + AppendStringToManifest(manifest, "\"Manifest-Checksum\": \""); + hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf); + checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0'; + AppendStringToManifest(manifest, checksumstringbuf); + AppendStringToManifest(manifest, "\"}\n"); + + /* + * We've written all the data to the manifest file. Rewind the file so + * that we can read it all back. + */ + if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rewind temporary file: %m"))); + + /* Send CopyOutResponse message */ + pq_beginmessage(&protobuf, 'H'); + pq_sendbyte(&protobuf, 0); /* overall format */ + pq_sendint16(&protobuf, 0); /* natts */ + pq_endmessage(&protobuf); + + /* + * Send CopyData messages. + * + * We choose to read back the data from the temporary file in chunks of + * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O + * size, so it seems to make sense to match that value here. + */ + while (manifest_bytes_done < manifest->manifest_size) + { + char manifestbuf[BLCKSZ]; + size_t bytes_to_read; + size_t rc; + + bytes_to_read = Min(sizeof(manifestbuf), + manifest->manifest_size - manifest_bytes_done); + rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read); + if (rc != bytes_to_read) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from temporary file: %m"))); + pq_putmessage('d', manifestbuf, bytes_to_read); + manifest_bytes_done += bytes_to_read; + } + + /* No more data, so send CopyDone message */ + pq_putemptymessage('c'); + + /* Release resources */ + BufFileClose(manifest->buffile); +} + /* * Send a single resultset containing just a single * XLogRecPtr record (in text format) @@ -978,11 +1273,15 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli) * Inject a file with given name and content in the output tar stream. */ static void -sendFileWithContent(const char *filename, const char *content) +sendFileWithContent(const char *filename, const char *content, + manifest_info *manifest) { struct stat statbuf; int pad, len; + pg_checksum_context checksum_ctx; + + pg_checksum_init(&checksum_ctx, manifest->checksum_type); len = strlen(content); @@ -1017,6 +1316,10 @@ sendFileWithContent(const char *filename, const char *content) pq_putmessage('d', buf, pad); update_basebackup_progress(pad); } + + pg_checksum_update(&checksum_ctx, (uint8 *) content, len); + AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime, + &checksum_ctx); } /* @@ -1027,7 +1330,8 @@ sendFileWithContent(const char *filename, const char *content) * Only used to send auxiliary tablespaces, not PGDATA. */ int64 -sendTablespace(char *path, bool sizeonly) +sendTablespace(char *path, char *spcoid, bool sizeonly, + manifest_info *manifest) { int64 size; char pathbuf[MAXPGPATH]; @@ -1060,7 +1364,8 @@ sendTablespace(char *path, bool sizeonly) sizeonly); /* Send all the files in the tablespace version directory */ - size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); + size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest, + spcoid); return size; } @@ -1079,7 +1384,7 @@ sendTablespace(char *path, bool sizeonly) */ static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, - bool sendtblspclinks) + bool sendtblspclinks, manifest_info *manifest, const char *spcoid) { DIR *dir; struct dirent *de; @@ -1359,7 +1664,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, skip_this_dir = true; if (!skip_this_dir) - size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks); + size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, + sendtblspclinks, manifest, spcoid); } else if (S_ISREG(statbuf.st_mode)) { @@ -1367,7 +1673,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, if (!sizeonly) sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf, - true, isDbDir ? atooid(lastDir + 1) : InvalidOid); + true, isDbDir ? atooid(lastDir + 1) : InvalidOid, + manifest, spcoid); if (sent || sizeonly) { @@ -1437,8 +1744,9 @@ is_checksummed_file(const char *fullpath, const char *filename) * and the file did not exist. */ static bool -sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, - bool missing_ok, Oid dboid) +sendFile(const char *readfilename, const char *tarfilename, + struct stat *statbuf, bool missing_ok, Oid dboid, + manifest_info *manifest, const char *spcoid) { FILE *fp; BlockNumber blkno = 0; @@ -1455,6 +1763,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf int segmentno = 0; char *segmentpath; bool verify_checksum = false; + pg_checksum_context checksum_ctx; + + pg_checksum_init(&checksum_ctx, manifest->checksum_type); fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1625,6 +1936,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf (errmsg("base backup could not send data, aborting backup"))); update_basebackup_progress(cnt); + /* Also feed it to the checksum machinery. */ + pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt); + len += cnt; throttle(cnt); @@ -1649,6 +1963,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); + pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt); update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -1657,7 +1972,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf /* * Pad to 512 byte boundary, per tar format requirements. (This small - * piece of data is probably not worth throttling.) + * piece of data is probably not worth throttling, and is not checksummed + * because it's not actually part of the file.) */ pad = ((len + 511) & ~511) - len; if (pad > 0) @@ -1682,6 +1998,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf total_checksum_failures += checksum_failures; + AddFileToManifest(manifest, spcoid, tarfilename, statbuf->st_size, + statbuf->st_mtime, &checksum_ctx); + return true; } diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 14fcd53221..0621884ad8 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void); %token K_EXPORT_SNAPSHOT %token K_NOEXPORT_SNAPSHOT %token K_USE_SNAPSHOT +%token K_MANIFEST_CHECKSUMS %type command %type base_backup start_replication start_logical_replication @@ -214,6 +215,11 @@ base_backup_opt: $$ = makeDefElem("noverify_checksums", (Node *)makeInteger(true), -1); } + | K_MANIFEST_CHECKSUMS SCONST + { + $$ = makeDefElem("manifest_checksums", + (Node *)makeString($2), -1); + } ; create_replication_slot: diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 14c9a1e798..5653d233b5 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -107,6 +107,7 @@ EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; } NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; } USE_SNAPSHOT { return K_USE_SNAPSHOT; } WAIT { return K_WAIT; } +MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; } "," { return ','; } ";" { return ';'; } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ae4a9cbe11..cc0b97627c 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -315,6 +315,8 @@ WalSndErrorCleanup(void) replication_active = false; + WalSndResourceCleanup(false); + if (got_STOPPING || got_SIGUSR2) proc_exit(0); @@ -322,6 +324,34 @@ WalSndErrorCleanup(void) WalSndSetState(WALSNDSTATE_STARTUP); } +/* + * Clean up any ResourceOwner we created. + */ +void +WalSndResourceCleanup(bool isCommit) +{ + ResourceOwner resowner; + + if (CurrentResourceOwner == NULL) + return; + + /* + * Deleting CurrentResourceOwner is not allowed, so we must save a + * pointer in a local variable and clear it first. + */ + resowner = CurrentResourceOwner; + CurrentResourceOwner = NULL; + + /* Now we can release resources and delete it. */ + ResourceOwnerRelease(resowner, + RESOURCE_RELEASE_BEFORE_LOCKS, isCommit, true); + ResourceOwnerRelease(resowner, + RESOURCE_RELEASE_LOCKS, isCommit, true); + ResourceOwnerRelease(resowner, + RESOURCE_RELEASE_AFTER_LOCKS, isCommit, true); + ResourceOwnerDelete(resowner); +} + /* * Handle a client's connection abort in an orderly manner. */ diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 48bd838803..235416a7c2 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -88,6 +88,12 @@ typedef struct UnpackTarState FILE *file; } UnpackTarState; +typedef struct WriteManifestState +{ + char filename[MAXPGPATH]; + FILE *file; +} WriteManifestState; + typedef void (*WriteDataCallback) (size_t nbytes, char *buf, void *callback_data); @@ -135,6 +141,7 @@ static bool temp_replication_slot = true; static bool create_slot = false; static bool no_slot = false; static bool verify_checksums = true; +static char *manifest_checksums = NULL; static bool success = false; static bool made_new_pgdata = false; @@ -180,6 +187,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data); static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum); static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data); +static void ReceiveBackupManifest(PGconn *conn); +static void ReceiveBackupManifestChunk(size_t r, char *copybuf, + void *callback_data); +static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf); +static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf, + void *callback_data); static void BaseBackup(void); static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, @@ -386,6 +399,8 @@ usage(void) printf(_(" --no-slot prevent creation of temporary replication slot\n")); printf(_(" --no-verify-checksums\n" " do not verify checksums\n")); + printf(_(" --manifest-checksums=SHA{224,256,384,512}|CRC32C|NONE\n" + " use algorithm for manifest checksums\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -1184,6 +1199,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) } } + /* + * Normally, we emit the backup manifest as a separate file, but when + * we're writing a tarfile to stdout, we don't have that option, so + * include it in the one tarfile we've got. + */ + if (strcmp(basedir, "-") == 0) + { + char header[512]; + PQExpBufferData buf; + + initPQExpBuffer(&buf); + ReceiveBackupManifestInMemory(conn, &buf); + if (PQExpBufferDataBroken(buf)) + { + pg_log_error("out of memory"); + exit(1); + } + tarCreateHeader(header, "backup_manifest", NULL, buf.len, + pg_file_create_mode, 04000, 02000, + time(NULL)); + writeTarData(&state, header, sizeof(header)); + writeTarData(&state, buf.data, buf.len); + termPQExpBuffer(&buf); + } + /* 2 * 512 bytes empty data at end of file */ writeTarData(&state, zerobuf, sizeof(zerobuf)); @@ -1655,6 +1695,64 @@ ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data) } /* continuing data in existing file */ } +/* + * Receive the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifest(PGconn *conn) +{ + WriteManifestState state; + + snprintf(state.filename, sizeof(state.filename), + "%s/backup_manifest", basedir); + state.file = fopen(state.filename, "wb"); + if (state.file == NULL) + { + pg_log_error("could not create file \"%s\": %m", state.filename); + exit(1); + } + + ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state); + + fclose(state.file); +} + +/* + * Receive one chunk of the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data) +{ + WriteManifestState *state = callback_data; + + if (fwrite(copybuf, r, 1, state->file) != 1) + { + pg_log_error("could not write to file \"%s\": %m", state->filename); + exit(1); + } +} + +/* + * Receive the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf) +{ + ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf); +} + +/* + * Receive one chunk of the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf, + void *callback_data) +{ + PQExpBuffer buf = callback_data; + + appendPQExpBuffer(buf, copybuf, r); +} + static void BaseBackup(void) { @@ -1665,6 +1763,7 @@ BaseBackup(void) char *basebkp; char escaped_label[MAXPGPATH]; char *maxrate_clause = NULL; + char *manifest_checksums_clause = NULL; int i; char xlogstart[64]; char xlogend[64]; @@ -1672,6 +1771,7 @@ BaseBackup(void) maxServerMajor; int serverVersion, serverMajor; + int writing_to_stdout; Assert(conn != NULL); @@ -1725,6 +1825,9 @@ BaseBackup(void) if (maxrate > 0) maxrate_clause = psprintf("MAX_RATE %u", maxrate); + if (manifest_checksums != NULL) + manifest_checksums_clause = psprintf("MANIFEST_CHECKSUMS '%s'", + manifest_checksums); if (verbose) pg_log_info("initiating base backup, waiting for checkpoint to complete"); @@ -1739,7 +1842,7 @@ BaseBackup(void) } basebkp = - psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s", + psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s", escaped_label, showprogress ? "PROGRESS" : "", includewal == FETCH_WAL ? "WAL" : "", @@ -1747,7 +1850,8 @@ BaseBackup(void) includewal == NO_WAL ? "" : "NOWAIT", maxrate_clause ? maxrate_clause : "", format == 't' ? "TABLESPACE_MAP" : "", - verify_checksums ? "" : "NOVERIFY_CHECKSUMS"); + verify_checksums ? "" : "NOVERIFY_CHECKSUMS", + manifest_checksums_clause ? manifest_checksums_clause : ""); if (PQsendQuery(conn, basebkp) == 0) { @@ -1835,7 +1939,8 @@ BaseBackup(void) /* * When writing to stdout, require a single tablespace */ - if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1) + writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0; + if (writing_to_stdout && PQntuples(res) > 1) { pg_log_error("can only write single tablespace to stdout, database has %d", PQntuples(res)); @@ -1864,6 +1969,19 @@ BaseBackup(void) ReceiveAndUnpackTarFile(conn, res, i); } /* Loop over all tablespaces */ + /* + * Now receive backup manifest, if appropriate. + * + * If we're writing a tarfile to stdout, ReceiveTarFile will have already + * processed the backup manifest and included it in the output tarfile. + * Such a configuration doesn't allow for writing multiple files. + * + * If we're talking to an older server, it won't send a backup manifest, + * so don't try to receive one. + */ + if (!writing_to_stdout && serverMajor >= 1300) + ReceiveBackupManifest(conn); + if (showprogress) { progress_report(PQntuples(res), NULL, true); @@ -2066,6 +2184,7 @@ main(int argc, char **argv) {"waldir", required_argument, NULL, 1}, {"no-slot", no_argument, NULL, 2}, {"no-verify-checksums", no_argument, NULL, 3}, + {"manifest-checksums", required_argument, NULL, 'm'}, {NULL, 0, NULL, 0} }; int c; @@ -2093,7 +2212,7 @@ main(int argc, char **argv) atexit(cleanup_directories_atexit); - while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvP", + while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvPm:", long_options, &option_index)) != -1) { switch (c) @@ -2234,6 +2353,9 @@ main(int argc, char **argv) case 3: verify_checksums = false; break; + case 'm': + manifest_checksums = pg_strdup(optarg); + break; default: /* diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h index 07ed281bd6..d5b594c928 100644 --- a/src/include/replication/basebackup.h +++ b/src/include/replication/basebackup.h @@ -12,6 +12,7 @@ #ifndef _BASEBACKUP_H #define _BASEBACKUP_H +#include "lib/stringinfo.h" #include "nodes/replnodes.h" /* @@ -29,8 +30,12 @@ typedef struct int64 size; } tablespaceinfo; +struct manifest_info; +typedef struct manifest_info manifest_info; + extern void SendBaseBackup(BaseBackupCmd *cmd); -extern int64 sendTablespace(char *path, bool sizeonly); +extern int64 sendTablespace(char *path, char *oid, bool sizeonly, + manifest_info *manifest); #endif /* _BASEBACKUP_H */ diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index fd4305e53f..40d81b87f0 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -38,6 +38,7 @@ extern bool log_replication_commands; extern void InitWalSender(void); extern bool exec_replication_command(const char *query_string); extern void WalSndErrorCleanup(void); +extern void WalSndResourceCleanup(bool isCommit); extern void WalSndSignals(void); extern Size WalSndShmemSize(void); extern void WalSndShmemInit(void); -- 2.17.2 (Apple Git-113)