v10-0002-Generate-backup-manifests-for-base-backups.patch
application/octet-stream
Filename: v10-0002-Generate-backup-manifests-for-base-backups.patch
Type: application/octet-stream
Part: 3
Message:
Re: backup manifests
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v10-0002
Subject: Generate backup manifests for base backups.
| File | + | − |
|---|---|---|
| src/backend/access/transam/xlog.c | 2 | 1 |
| src/backend/replication/basebackup.c | 341 | 22 |
| src/backend/replication/repl_gram.y | 6 | 0 |
| src/backend/replication/repl_scanner.l | 1 | 0 |
| src/backend/replication/walsender.c | 30 | 0 |
| src/bin/pg_basebackup/pg_basebackup.c | 126 | 4 |
| src/include/replication/basebackup.h | 6 | 1 |
| src/include/replication/walsender.h | 1 | 0 |
From 7e63c0ed8ffeb669759fa760012a511c2a5e647f Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 9 Mar 2020 17:59:38 -0400
Subject: [PATCH v10 2/4] Generate backup manifests for base backups.
A manifest is a JSON document which includes the file name, size, last
modification time, and a checksum for each file backed up, as well as
a checksum for the manifest itself. By default, we use CRC-32C for the
checksum algorithm, because we are trying to detect corruption and
user error, not foil an adversary. However, pg_basebackup and the
server-side BASE_BACKUP command now have options to select the
checksum algorithm, so users wanting a cryptographic hash function can
select SHA-224, SHA-256, SHA-384, or SHA-512; and users not wanting
any checksums at all can disable them. Using a cryptographic hash
function in place of CRC-32C consumes significantly more CPU cycles,
which may slow down backups in some cases.
Robert Haas with help from Rushabh Lathia and Suraj Kharage.
---
src/backend/access/transam/xlog.c | 3 +-
src/backend/replication/basebackup.c | 363 +++++++++++++++++++++++--
src/backend/replication/repl_gram.y | 6 +
src/backend/replication/repl_scanner.l | 1 +
src/backend/replication/walsender.c | 30 ++
src/bin/pg_basebackup/pg_basebackup.c | 130 ++++++++-
src/include/replication/basebackup.h | 7 +-
src/include/replication/walsender.h | 1 +
8 files changed, 513 insertions(+), 28 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b0e953f894..3f9908427f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10551,7 +10551,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
ti->oid = pstrdup(de->d_name);
ti->path = pstrdup(buflinkpath.data);
ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+ ti->size = infotbssize ?
+ sendTablespace(fullpath, ti->oid, true, NULL) : -1;
if (tablespaces)
*tablespaces = lappend(*tablespaces, ti);
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index f66cbc2428..77f15f6233 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -18,6 +18,7 @@
#include "access/xlog_internal.h" /* for pg_start/stop_backup */
#include "catalog/pg_type.h"
+#include "common/checksum_helper.h"
#include "common/file_perm.h"
#include "commands/progress.h"
#include "lib/stringinfo.h"
@@ -32,6 +33,7 @@
#include "replication/basebackup.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
+#include "storage/buffile.h"
#include "storage/bufpage.h"
#include "storage/checksum.h"
#include "storage/dsm_impl.h"
@@ -39,8 +41,10 @@
#include "storage/ipc.h"
#include "storage/reinit.h"
#include "utils/builtins.h"
+#include "utils/json.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"
+#include "utils/resowner.h"
#include "utils/timestamp.h"
typedef struct
@@ -52,20 +56,40 @@ typedef struct
bool includewal;
uint32 maxrate;
bool sendtblspcmapfile;
+ pg_checksum_type checksum_type;
} basebackup_options;
+struct manifest_info
+{
+ BufFile *buffile;
+ pg_checksum_type checksum_type;
+ pg_sha256_ctx manifest_ctx;
+ uint64 manifest_size;
+ bool first_file;
+ bool still_checksumming;
+};
+
static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
- List *tablespaces, bool sendtblspclinks);
+ List *tablespaces, bool sendtblspclinks,
+ manifest_info *manifest, const char *spcoid);
static bool sendFile(const char *readfilename, const char *tarfilename,
- struct stat *statbuf, bool missing_ok, Oid dboid);
-static void sendFileWithContent(const char *filename, const char *content);
+ struct stat *statbuf, bool missing_ok, Oid dboid,
+ manifest_info *manifest, const char *spcoid);
+static void sendFileWithContent(const char *filename, const char *content,
+ manifest_info *manifest);
static int64 _tarWriteHeader(const char *filename, const char *linktarget,
struct stat *statbuf, bool sizeonly);
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
bool sizeonly);
static void send_int8_string(StringInfoData *buf, int64 intval);
static void SendBackupHeader(List *tablespaces);
+static void InitializeManifest(manifest_info *manifest, pg_checksum_type);
+static void AppendStringToManifest(manifest_info *manifest, char *s);
+static void AddFileToManifest(manifest_info *manifest, const char *spcoid,
+ const char *pathname, size_t size, time_t mtime,
+ pg_checksum_context *checksum_ctx);
+static void SendBackupManifest(manifest_info *manifest);
static void perform_base_backup(basebackup_options *opt);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
@@ -102,6 +126,16 @@ do { \
(errmsg("could not read from file \"%s\"", filename))); \
} while (0)
+/*
+ * Convenience macro for appending data to the backup manifest.
+ */
+#define AppendToManifest(manifest, ...) \
+ { \
+ char *_manifest_s = psprintf(__VA_ARGS__); \
+ AppendStringToManifest(manifest, _manifest_s); \
+ pfree(_manifest_s); \
+ }
+
/* The actual number of bytes, transfer of which may cause sleep. */
static uint64 throttling_sample;
@@ -251,6 +285,7 @@ perform_base_backup(basebackup_options *opt)
TimeLineID endtli;
StringInfo labelfile;
StringInfo tblspc_map_file = NULL;
+ manifest_info manifest;
int datadirpathlen;
List *tablespaces = NIL;
@@ -258,12 +293,17 @@ perform_base_backup(basebackup_options *opt)
backup_streamed = 0;
pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid);
+ /* we're going to use a BufFile, so we need a ResourceOwner */
+ Assert(CurrentResourceOwner == NULL);
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
+
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
labelfile = makeStringInfo();
tblspc_map_file = makeStringInfo();
+ InitializeManifest(&manifest, opt->checksum_type);
total_checksum_failures = 0;
@@ -301,7 +341,10 @@ perform_base_backup(basebackup_options *opt)
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
+ if (opt->progress)
+ ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL);
+ else
+ ti->size = -1;
tablespaces = lappend(tablespaces, ti);
/*
@@ -380,7 +423,8 @@ perform_base_backup(basebackup_options *opt)
struct stat statbuf;
/* In the main tar, include the backup_label first... */
- sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
+ sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data,
+ &manifest);
/*
* Send tablespace_map file if required and then the bulk of
@@ -388,11 +432,14 @@ perform_base_backup(basebackup_options *opt)
*/
if (tblspc_map_file && opt->sendtblspcmapfile)
{
- sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
- sendDir(".", 1, false, tablespaces, false);
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data,
+ &manifest);
+ sendDir(".", 1, false, tablespaces, false,
+ &manifest, NULL);
}
else
- sendDir(".", 1, false, tablespaces, true);
+ sendDir(".", 1, false, tablespaces, true,
+ &manifest, NULL);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -400,10 +447,11 @@ perform_base_backup(basebackup_options *opt)
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
XLOG_CONTROL_FILE)));
- sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid);
+ sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
+ false, InvalidOid, &manifest, NULL);
}
else
- sendTablespace(ti->path, false);
+ sendTablespace(ti->path, ti->oid, false, &manifest);
/*
* If we're including WAL, and this is the main data directory we
@@ -632,7 +680,7 @@ perform_base_backup(basebackup_options *opt)
* complete segment.
*/
StatusFilePath(pathbuf, walFileName, ".done");
- sendFileWithContent(pathbuf, "");
+ sendFileWithContent(pathbuf, "", &manifest);
}
/*
@@ -655,16 +703,20 @@ perform_base_backup(basebackup_options *opt)
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathbuf)));
- sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
+ sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid,
+ &manifest, NULL);
/* unconditionally mark file as archived */
StatusFilePath(pathbuf, fname, ".done");
- sendFileWithContent(pathbuf, "");
+ sendFileWithContent(pathbuf, "", &manifest);
}
/* Send CopyDone message for the last tar file */
pq_putemptymessage('c');
}
+
+ SendBackupManifest(&manifest);
+
SendXlogRecPtrResult(endptr, endtli);
if (total_checksum_failures)
@@ -678,6 +730,9 @@ perform_base_backup(basebackup_options *opt)
errmsg("checksum verification failure during base backup")));
}
+ /* clean up the resource owner we created */
+ WalSndResourceCleanup(true);
+
pgstat_progress_end_command();
}
@@ -709,8 +764,11 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_maxrate = false;
bool o_tablespace_map = false;
bool o_noverify_checksums = false;
+ bool o_manifest_checksums = false;
MemSet(opt, 0, sizeof(*opt));
+ opt->checksum_type = CHECKSUM_TYPE_CRC32C;
+
foreach(lopt, options)
{
DefElem *defel = (DefElem *) lfirst(lopt);
@@ -797,6 +855,21 @@ parse_basebackup_options(List *options, basebackup_options *opt)
noverify_checksums = true;
o_noverify_checksums = true;
}
+ else if (strcmp(defel->defname, "manifest_checksums") == 0)
+ {
+ char *optval = strVal(defel->arg);
+
+ if (o_manifest_checksums)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ if (!pg_checksum_parse_type(optval, &opt->checksum_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized checksum algorithm: \"%s\"",
+ optval)));
+ o_manifest_checksums = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -918,6 +991,228 @@ SendBackupHeader(List *tablespaces)
pq_puttextmessage('C', "SELECT");
}
+/*
+ * Initialize state so that we can construct a backup manifest.
+ *
+ * NB: Although the checksum type for the data files is configurable, the
+ * checksum for the manifest itself always uses SHA-256. See comments in
+ * SendBackupManifest.
+ */
+static void
+InitializeManifest(manifest_info *manifest, pg_checksum_type checksum_type)
+{
+ manifest->buffile = BufFileCreateTemp(false);
+ manifest->checksum_type = checksum_type;
+ pg_sha256_init(&manifest->manifest_ctx);
+ manifest->manifest_size = UINT64CONST(0);
+ manifest->first_file = true;
+ manifest->still_checksumming = true;
+
+ AppendToManifest(manifest,
+ "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
+ "\"Files\": [");
+}
+
+/*
+ * Append a cstring to the manifest.
+ */
+static void
+AppendStringToManifest(manifest_info *manifest, char *s)
+{
+ int len = strlen(s);
+ size_t written;
+
+ if (manifest->still_checksumming)
+ pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
+ written = BufFileWrite(manifest->buffile, s, len);
+ if (written != len)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to temporary file: %m")));
+ manifest->manifest_size += len;
+}
+
+/*
+ * Add an entry to the backup manifest for a file.
+ */
+static void
+AddFileToManifest(manifest_info *manifest, const char *spcoid,
+ const char *pathname, size_t size, time_t mtime,
+ pg_checksum_context *checksum_ctx)
+{
+ char pathbuf[MAXPGPATH];
+ int pathlen;
+ StringInfoData buf;
+
+ /*
+ * If this file is part of a tablespace, the pathname passed to this
+ * function will be relative to the tar file that contains it. We want the
+ * pathname relative to the data directory (ignoring the intermediate
+ * symlink traversal).
+ */
+ if (spcoid != NULL)
+ {
+ snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
+ pathname);
+ pathname = pathbuf;
+ }
+
+ /*
+ * Each file's entry need to be separated from any entry that follows
+ * by a comma, but there's no comma before the first one or after the
+ * last one. To make that work, adding a file to the manifest starts
+ * by terminating the most recently added line, with a comma if
+ * appropriate, but does not terminate the line inserted for this file.
+ */
+ initStringInfo(&buf);
+ if (manifest->first_file)
+ {
+ appendStringInfoString(&buf, "\n");
+ manifest->first_file = false;
+ }
+ else
+ appendStringInfoString(&buf, ",\n");
+
+ /*
+ * Write the relative pathname to this file out to the manifest. The
+ * manifest is always stored in UTF-8, so we have to encode paths that
+ * are not valid in that encoding.
+ */
+ pathlen = strlen(pathname);
+ if (pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
+ {
+ appendStringInfoString(&buf, "{ \"Path\": ");
+ escape_json(&buf, pathname);
+ appendStringInfoString(&buf, ", ");
+ }
+ else
+ {
+ appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
+ enlargeStringInfo(&buf, 2 * pathlen);
+ buf.len += hex_encode((char *) pathname, pathlen,
+ &buf.data[buf.len]);
+ appendStringInfoString(&buf, "\", ");
+ }
+
+ appendStringInfo(&buf, "\"Size\": %zu, ", size);
+
+ /*
+ * Convert last modification time to a string and append it to the
+ * manifest. Since it's not clear what time zone to use and since time
+ * zone definitions can change, possibly causing confusion, use GMT always.
+ */
+ appendStringInfoString(&buf, "\"Last-Modified\": \"");
+ enlargeStringInfo(&buf, 128);
+ buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
+ pg_gmtime(&mtime));
+ appendStringInfoString(&buf, "\"");
+
+ /* Add checksum information. */
+ if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
+ {
+ uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+ int checksumlen;
+
+ checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
+
+ appendStringInfo(&buf,
+ ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
+ pg_checksum_type_name(checksum_ctx->type));
+ enlargeStringInfo(&buf, 2 * checksumlen);
+ buf.len += hex_encode((char *) checksumbuf, checksumlen,
+ &buf.data[buf.len]);
+ appendStringInfoString(&buf, "\"");
+ }
+
+ /* Close out the object. */
+ appendStringInfoString(&buf, " }");
+
+ /* OK, add it to the manifest. */
+ AppendStringToManifest(manifest, buf.data);
+
+ /* Avoid leaking memory. */
+ pfree(buf.data);
+}
+
+/*
+ * Finalize the backup manifest, and send it to the client.
+ */
+static void
+SendBackupManifest(manifest_info *manifest)
+{
+ StringInfoData protobuf;
+ uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
+ char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
+ size_t manifest_bytes_done = 0;
+
+ /* Terminate the list of files. */
+ AppendStringToManifest(manifest, "],\n");
+
+ /*
+ * Append manifest checksum, so that the problems with the manifest itself
+ * can be detected.
+ *
+ * We always use SHA-256 for this, regardless of what algorithm is chosen
+ * for checksumming the files. If we ever want to make the checksum
+ * algorithm used for the manifest file variable, the client will need a
+ * way to figure out which algorithm to use as close to the beginning of
+ * the manifest file as possible, to avoid having to read the whole thing
+ * twice.
+ */
+ manifest->still_checksumming = false;
+ pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
+ AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
+ hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
+ checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
+ AppendStringToManifest(manifest, checksumstringbuf);
+ AppendStringToManifest(manifest, "\"}\n");
+
+ /*
+ * We've written all the data to the manifest file. Rewind the file so
+ * that we can read it all back.
+ */
+ if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not rewind temporary file: %m")));
+
+ /* Send CopyOutResponse message */
+ pq_beginmessage(&protobuf, 'H');
+ pq_sendbyte(&protobuf, 0); /* overall format */
+ pq_sendint16(&protobuf, 0); /* natts */
+ pq_endmessage(&protobuf);
+
+ /*
+ * Send CopyData messages.
+ *
+ * We choose to read back the data from the temporary file in chunks of
+ * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
+ * size, so it seems to make sense to match that value here.
+ */
+ while (manifest_bytes_done < manifest->manifest_size)
+ {
+ char manifestbuf[BLCKSZ];
+ size_t bytes_to_read;
+ size_t rc;
+
+ bytes_to_read = Min(sizeof(manifestbuf),
+ manifest->manifest_size - manifest_bytes_done);
+ rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
+ if (rc != bytes_to_read)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from temporary file: %m")));
+ pq_putmessage('d', manifestbuf, bytes_to_read);
+ manifest_bytes_done += bytes_to_read;
+ }
+
+ /* No more data, so send CopyDone message */
+ pq_putemptymessage('c');
+
+ /* Release resources */
+ BufFileClose(manifest->buffile);
+}
+
/*
* Send a single resultset containing just a single
* XLogRecPtr record (in text format)
@@ -978,11 +1273,15 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
* Inject a file with given name and content in the output tar stream.
*/
static void
-sendFileWithContent(const char *filename, const char *content)
+sendFileWithContent(const char *filename, const char *content,
+ manifest_info *manifest)
{
struct stat statbuf;
int pad,
len;
+ pg_checksum_context checksum_ctx;
+
+ pg_checksum_init(&checksum_ctx, manifest->checksum_type);
len = strlen(content);
@@ -1017,6 +1316,10 @@ sendFileWithContent(const char *filename, const char *content)
pq_putmessage('d', buf, pad);
update_basebackup_progress(pad);
}
+
+ pg_checksum_update(&checksum_ctx, (uint8 *) content, len);
+ AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
+ &checksum_ctx);
}
/*
@@ -1027,7 +1330,8 @@ sendFileWithContent(const char *filename, const char *content)
* Only used to send auxiliary tablespaces, not PGDATA.
*/
int64
-sendTablespace(char *path, bool sizeonly)
+sendTablespace(char *path, char *spcoid, bool sizeonly,
+ manifest_info *manifest)
{
int64 size;
char pathbuf[MAXPGPATH];
@@ -1060,7 +1364,8 @@ sendTablespace(char *path, bool sizeonly)
sizeonly);
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest,
+ spcoid);
return size;
}
@@ -1079,7 +1384,7 @@ sendTablespace(char *path, bool sizeonly)
*/
static int64
sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
- bool sendtblspclinks)
+ bool sendtblspclinks, manifest_info *manifest, const char *spcoid)
{
DIR *dir;
struct dirent *de;
@@ -1359,7 +1664,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
skip_this_dir = true;
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces,
+ sendtblspclinks, manifest, spcoid);
}
else if (S_ISREG(statbuf.st_mode))
{
@@ -1367,7 +1673,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
if (!sizeonly)
sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
- true, isDbDir ? atooid(lastDir + 1) : InvalidOid);
+ true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
+ manifest, spcoid);
if (sent || sizeonly)
{
@@ -1437,8 +1744,9 @@ is_checksummed_file(const char *fullpath, const char *filename)
* and the file did not exist.
*/
static bool
-sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
- bool missing_ok, Oid dboid)
+sendFile(const char *readfilename, const char *tarfilename,
+ struct stat *statbuf, bool missing_ok, Oid dboid,
+ manifest_info *manifest, const char *spcoid)
{
FILE *fp;
BlockNumber blkno = 0;
@@ -1455,6 +1763,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
int segmentno = 0;
char *segmentpath;
bool verify_checksum = false;
+ pg_checksum_context checksum_ctx;
+
+ pg_checksum_init(&checksum_ctx, manifest->checksum_type);
fp = AllocateFile(readfilename, "rb");
if (fp == NULL)
@@ -1625,6 +1936,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
(errmsg("base backup could not send data, aborting backup")));
update_basebackup_progress(cnt);
+ /* Also feed it to the checksum machinery. */
+ pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
+
len += cnt;
throttle(cnt);
@@ -1649,6 +1963,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
{
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
+ pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
update_basebackup_progress(cnt);
len += cnt;
throttle(cnt);
@@ -1657,7 +1972,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
/*
* Pad to 512 byte boundary, per tar format requirements. (This small
- * piece of data is probably not worth throttling.)
+ * piece of data is probably not worth throttling, and is not checksummed
+ * because it's not actually part of the file.)
*/
pad = ((len + 511) & ~511) - len;
if (pad > 0)
@@ -1682,6 +1998,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
total_checksum_failures += checksum_failures;
+ AddFileToManifest(manifest, spcoid, tarfilename, statbuf->st_size,
+ statbuf->st_mtime, &checksum_ctx);
+
return true;
}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 14fcd53221..0621884ad8 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void);
%token K_EXPORT_SNAPSHOT
%token K_NOEXPORT_SNAPSHOT
%token K_USE_SNAPSHOT
+%token K_MANIFEST_CHECKSUMS
%type <node> command
%type <node> base_backup start_replication start_logical_replication
@@ -214,6 +215,11 @@ base_backup_opt:
$$ = makeDefElem("noverify_checksums",
(Node *)makeInteger(true), -1);
}
+ | K_MANIFEST_CHECKSUMS SCONST
+ {
+ $$ = makeDefElem("manifest_checksums",
+ (Node *)makeString($2), -1);
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 14c9a1e798..5653d233b5 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -107,6 +107,7 @@ EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; }
NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; }
USE_SNAPSHOT { return K_USE_SNAPSHOT; }
WAIT { return K_WAIT; }
+MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
"," { return ','; }
";" { return ';'; }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index ae4a9cbe11..cc0b97627c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -315,6 +315,8 @@ WalSndErrorCleanup(void)
replication_active = false;
+ WalSndResourceCleanup(false);
+
if (got_STOPPING || got_SIGUSR2)
proc_exit(0);
@@ -322,6 +324,34 @@ WalSndErrorCleanup(void)
WalSndSetState(WALSNDSTATE_STARTUP);
}
+/*
+ * Clean up any ResourceOwner we created.
+ */
+void
+WalSndResourceCleanup(bool isCommit)
+{
+ ResourceOwner resowner;
+
+ if (CurrentResourceOwner == NULL)
+ return;
+
+ /*
+ * Deleting CurrentResourceOwner is not allowed, so we must save a
+ * pointer in a local variable and clear it first.
+ */
+ resowner = CurrentResourceOwner;
+ CurrentResourceOwner = NULL;
+
+ /* Now we can release resources and delete it. */
+ ResourceOwnerRelease(resowner,
+ RESOURCE_RELEASE_BEFORE_LOCKS, isCommit, true);
+ ResourceOwnerRelease(resowner,
+ RESOURCE_RELEASE_LOCKS, isCommit, true);
+ ResourceOwnerRelease(resowner,
+ RESOURCE_RELEASE_AFTER_LOCKS, isCommit, true);
+ ResourceOwnerDelete(resowner);
+}
+
/*
* Handle a client's connection abort in an orderly manner.
*/
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 48bd838803..235416a7c2 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -88,6 +88,12 @@ typedef struct UnpackTarState
FILE *file;
} UnpackTarState;
+typedef struct WriteManifestState
+{
+ char filename[MAXPGPATH];
+ FILE *file;
+} WriteManifestState;
+
typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
void *callback_data);
@@ -135,6 +141,7 @@ static bool temp_replication_slot = true;
static bool create_slot = false;
static bool no_slot = false;
static bool verify_checksums = true;
+static char *manifest_checksums = NULL;
static bool success = false;
static bool made_new_pgdata = false;
@@ -180,6 +187,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf,
void *callback_data);
+static void ReceiveBackupManifest(PGconn *conn);
+static void ReceiveBackupManifestChunk(size_t r, char *copybuf,
+ void *callback_data);
+static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf);
+static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+ void *callback_data);
static void BaseBackup(void);
static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
@@ -386,6 +399,8 @@ usage(void)
printf(_(" --no-slot prevent creation of temporary replication slot\n"));
printf(_(" --no-verify-checksums\n"
" do not verify checksums\n"));
+ printf(_(" --manifest-checksums=SHA{224,256,384,512}|CRC32C|NONE\n"
+ " use algorithm for manifest checksums\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=CONNSTR connection string\n"));
@@ -1184,6 +1199,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
}
}
+ /*
+ * Normally, we emit the backup manifest as a separate file, but when
+ * we're writing a tarfile to stdout, we don't have that option, so
+ * include it in the one tarfile we've got.
+ */
+ if (strcmp(basedir, "-") == 0)
+ {
+ char header[512];
+ PQExpBufferData buf;
+
+ initPQExpBuffer(&buf);
+ ReceiveBackupManifestInMemory(conn, &buf);
+ if (PQExpBufferDataBroken(buf))
+ {
+ pg_log_error("out of memory");
+ exit(1);
+ }
+ tarCreateHeader(header, "backup_manifest", NULL, buf.len,
+ pg_file_create_mode, 04000, 02000,
+ time(NULL));
+ writeTarData(&state, header, sizeof(header));
+ writeTarData(&state, buf.data, buf.len);
+ termPQExpBuffer(&buf);
+ }
+
/* 2 * 512 bytes empty data at end of file */
writeTarData(&state, zerobuf, sizeof(zerobuf));
@@ -1655,6 +1695,64 @@ ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data)
} /* continuing data in existing file */
}
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifest(PGconn *conn)
+{
+ WriteManifestState state;
+
+ snprintf(state.filename, sizeof(state.filename),
+ "%s/backup_manifest", basedir);
+ state.file = fopen(state.filename, "wb");
+ if (state.file == NULL)
+ {
+ pg_log_error("could not create file \"%s\": %m", state.filename);
+ exit(1);
+ }
+
+ ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state);
+
+ fclose(state.file);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data)
+{
+ WriteManifestState *state = callback_data;
+
+ if (fwrite(copybuf, r, 1, state->file) != 1)
+ {
+ pg_log_error("could not write to file \"%s\": %m", state->filename);
+ exit(1);
+ }
+}
+
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf)
+{
+ ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+ void *callback_data)
+{
+ PQExpBuffer buf = callback_data;
+
+ appendPQExpBuffer(buf, copybuf, r);
+}
+
static void
BaseBackup(void)
{
@@ -1665,6 +1763,7 @@ BaseBackup(void)
char *basebkp;
char escaped_label[MAXPGPATH];
char *maxrate_clause = NULL;
+ char *manifest_checksums_clause = NULL;
int i;
char xlogstart[64];
char xlogend[64];
@@ -1672,6 +1771,7 @@ BaseBackup(void)
maxServerMajor;
int serverVersion,
serverMajor;
+ int writing_to_stdout;
Assert(conn != NULL);
@@ -1725,6 +1825,9 @@ BaseBackup(void)
if (maxrate > 0)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
+ if (manifest_checksums != NULL)
+ manifest_checksums_clause = psprintf("MANIFEST_CHECKSUMS '%s'",
+ manifest_checksums);
if (verbose)
pg_log_info("initiating base backup, waiting for checkpoint to complete");
@@ -1739,7 +1842,7 @@ BaseBackup(void)
}
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal == FETCH_WAL ? "WAL" : "",
@@ -1747,7 +1850,8 @@ BaseBackup(void)
includewal == NO_WAL ? "" : "NOWAIT",
maxrate_clause ? maxrate_clause : "",
format == 't' ? "TABLESPACE_MAP" : "",
- verify_checksums ? "" : "NOVERIFY_CHECKSUMS");
+ verify_checksums ? "" : "NOVERIFY_CHECKSUMS",
+ manifest_checksums_clause ? manifest_checksums_clause : "");
if (PQsendQuery(conn, basebkp) == 0)
{
@@ -1835,7 +1939,8 @@ BaseBackup(void)
/*
* When writing to stdout, require a single tablespace
*/
- if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1)
+ writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0;
+ if (writing_to_stdout && PQntuples(res) > 1)
{
pg_log_error("can only write single tablespace to stdout, database has %d",
PQntuples(res));
@@ -1864,6 +1969,19 @@ BaseBackup(void)
ReceiveAndUnpackTarFile(conn, res, i);
} /* Loop over all tablespaces */
+ /*
+ * Now receive backup manifest, if appropriate.
+ *
+ * If we're writing a tarfile to stdout, ReceiveTarFile will have already
+ * processed the backup manifest and included it in the output tarfile.
+ * Such a configuration doesn't allow for writing multiple files.
+ *
+ * If we're talking to an older server, it won't send a backup manifest,
+ * so don't try to receive one.
+ */
+ if (!writing_to_stdout && serverMajor >= 1300)
+ ReceiveBackupManifest(conn);
+
if (showprogress)
{
progress_report(PQntuples(res), NULL, true);
@@ -2066,6 +2184,7 @@ main(int argc, char **argv)
{"waldir", required_argument, NULL, 1},
{"no-slot", no_argument, NULL, 2},
{"no-verify-checksums", no_argument, NULL, 3},
+ {"manifest-checksums", required_argument, NULL, 'm'},
{NULL, 0, NULL, 0}
};
int c;
@@ -2093,7 +2212,7 @@ main(int argc, char **argv)
atexit(cleanup_directories_atexit);
- while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvP",
+ while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvPm:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -2234,6 +2353,9 @@ main(int argc, char **argv)
case 3:
verify_checksums = false;
break;
+ case 'm':
+ manifest_checksums = pg_strdup(optarg);
+ break;
default:
/*
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 07ed281bd6..d5b594c928 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -12,6 +12,7 @@
#ifndef _BASEBACKUP_H
#define _BASEBACKUP_H
+#include "lib/stringinfo.h"
#include "nodes/replnodes.h"
/*
@@ -29,8 +30,12 @@ typedef struct
int64 size;
} tablespaceinfo;
+struct manifest_info;
+typedef struct manifest_info manifest_info;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
-extern int64 sendTablespace(char *path, bool sizeonly);
+extern int64 sendTablespace(char *path, char *oid, bool sizeonly,
+ manifest_info *manifest);
#endif /* _BASEBACKUP_H */
diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h
index fd4305e53f..40d81b87f0 100644
--- a/src/include/replication/walsender.h
+++ b/src/include/replication/walsender.h
@@ -38,6 +38,7 @@ extern bool log_replication_commands;
extern void InitWalSender(void);
extern bool exec_replication_command(const char *query_string);
extern void WalSndErrorCleanup(void);
+extern void WalSndResourceCleanup(bool isCommit);
extern void WalSndSignals(void);
extern Size WalSndShmemSize(void);
extern void WalSndShmemInit(void);
--
2.17.2 (Apple Git-113)