0005-Allow-user-to-choose-a-checksum-algorithm-for-manife.patch

text/x-patch

Filename: 0005-Allow-user-to-choose-a-checksum-algorithm-for-manife.patch
Type: text/x-patch
Part: 4
Message: Re: backup manifests

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch 0005
Subject: Allow user to choose a checksum algorithm for manifest file.
File+
src/backend/replication/basebackup.c 153 50
src/backend/replication/repl_gram.y 4 4
src/backend/replication/repl_scanner.l 1 1
src/bin/pg_basebackup/pg_basebackup.c 9 9
From bface992304c76fe7c2d732838e1d485b6795854 Mon Sep 17 00:00:00 2001
From: Rushabh Lathia <rushabh.lathia@enterprisedb.com>
Date: Thu, 5 Dec 2019 14:58:12 +0530
Subject: [PATCH 5/5] Allow user to choose a checksum algorithm for manifest
 file.

---
 src/backend/replication/basebackup.c   | 203 +++++++++++++++++++++++++--------
 src/backend/replication/repl_gram.y    |   8 +-
 src/backend/replication/repl_scanner.l |   2 +-
 src/bin/pg_basebackup/pg_basebackup.c  |  18 +--
 4 files changed, 167 insertions(+), 64 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 66aa0fc..b777d1f 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -54,6 +54,20 @@ typedef struct
 	bool		sendtblspcmapfile;
 } basebackup_options;
 
+/* Checksum algorithm option for manifest */
+enum manifestCheckSum
+{
+	MC_NONE = 0,
+	MC_SHA256,
+	MC_CRC32C
+};
+
+/* checksum algorithm context */
+typedef union checksumCtx
+{
+	pg_sha256_ctx	sha256_ctx;
+	pg_crc32c		crc_ctx;
+}	ChecksumCtx;
 
 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
 					 List *tablespaces, bool sendtblspclinks,
@@ -72,7 +86,7 @@ static void SendBackupHeader(List *tablespaces);
 static void InitializeManifest(StringInfo manifest);
 static void AddFileToManifest(StringInfo manifest, const char *tsoid,
 							  const char *filename, size_t size, time_t mtime,
-							  uint8 *shabuf);
+							  ChecksumCtx *cCtx);
 static void SendBackupManifest(StringInfo manifest);
 static char *escape_field_for_manifest(const char *s);
 static void base_backup_cleanup(int code, Datum arg);
@@ -82,6 +96,9 @@ static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
 static int	compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
 static bool is_checksummed_file(const char *fullpath, const char *filename);
+static void initialize_manifest_checksum(ChecksumCtx *cCtx);
+static void update_manifest_checksum(ChecksumCtx *cCtx, const char *buf, off_t cnt);
+static int finalize_manifest_checksum(ChecksumCtx *cCtx, char *checksumbuf);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
@@ -132,8 +149,8 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
-/* Add file entry in to manifest with checksums. */
-static bool manifest_with_checksums = false;
+
+static enum manifestCheckSum manifest_checksums = MC_NONE;
 
 /*
  * The contents of these directories are removed or recreated during server
@@ -677,6 +694,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 	bool		o_maxrate = false;
 	bool		o_tablespace_map = false;
 	bool		o_noverify_checksums = false;
+	bool		o_manifest_checksums = false;
 
 	MemSet(opt, 0, sizeof(*opt));
 	foreach(lopt, options)
@@ -765,13 +783,25 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 			noverify_checksums = true;
 			o_noverify_checksums = true;
 		}
-		else if (strcmp(defel->defname, "manifest_with_checksums") == 0)
+		else if (strcmp(defel->defname, "manifest_checksums") == 0)
 		{
-			if (manifest_with_checksums)
+			char  *manifest_checksum_algo = NULL;
+			if (o_manifest_checksums)
 				ereport(ERROR,
 						(errcode(ERRCODE_SYNTAX_ERROR),
 						 errmsg("duplicate option \"%s\"", defel->defname)));
-			manifest_with_checksums = true;
+			manifest_checksum_algo = strVal(defel->arg);
+
+			if (pg_strcasecmp(manifest_checksum_algo, "SHA256") == 0)
+				manifest_checksums = MC_SHA256;
+			else if (pg_strcasecmp(manifest_checksum_algo, "CRC32C") == 0)
+				manifest_checksums = MC_CRC32C;
+			else if (pg_strcasecmp(manifest_checksum_algo, "NONE") == 0)
+				manifest_checksums = MC_NONE;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid manifest_checksums option \"%s\"", manifest_checksum_algo)));
 		}
 
 		else
@@ -907,14 +937,16 @@ InitializeManifest(StringInfo manifest)
 static void
 AddFileToManifest(StringInfo manifest, const char *tsoid,
 				  const char *filename, size_t size, time_t mtime,
-				  uint8 *shabuf)
+				  ChecksumCtx *cCtx)
 {
 	char	pathbuf[MAXPGPATH];
 	char   *escaped_filename;
 	static char timebuf[128];
-	static char shatextbuf[PG_SHA256_DIGEST_LENGTH * 2 + 1];
-	int		shatextlen;
+	static char checksumbuf[256];
+	char encode_checksumbuf[256];
 	struct pg_tm *tm;
+	char *checksumlabel = NULL;
+	int	   checksumbuflen;
 
 	/*
 	 * If this file is part of a tablespace, the filename passed to this
@@ -941,19 +973,32 @@ AddFileToManifest(StringInfo manifest, const char *tsoid,
 		elog(ERROR, "could not convert epoch to timestamp: %m");
 	pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", tm);
 
-	/* Convert checksum to hexadecimal. */
-	if (manifest_with_checksums)
+	/* Generate final checksum and Convert it to hexadecimal. */
+	if (manifest_checksums != MC_NONE)
 	{
-		shatextlen =
-			hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf);
-		Assert(shatextlen + 1 == sizeof(shatextbuf));
-		shatextbuf[shatextlen] = '\0';
+		checksumbuflen = finalize_manifest_checksum(cCtx, checksumbuf);
+		switch (manifest_checksums)
+		{
+			case MC_SHA256:
+				checksumlabel = "SHA256:";
+				break;
+			case MC_CRC32C:
+				checksumlabel = "CRC32C:";
+				break;
+			case MC_NONE:
+				break;
+		}
+		checksumbuflen = hex_encode(checksumbuf,
+									checksumbuflen,
+									encode_checksumbuf);
+		encode_checksumbuf[checksumbuflen] = '\0';
 	}
 
 	/* Add to manifest. */
-	appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s\n",
+	appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s%s\n",
 					 escaped_filename == NULL ? filename : escaped_filename,
-					 size, timebuf, manifest_with_checksums ? shatextbuf : "-");
+					 size, timebuf, checksumlabel ? checksumlabel : "",
+					 manifest_checksums != MC_NONE ? encode_checksumbuf : "-");
 
 	/* Avoid leaking memory. */
 	if (escaped_filename != NULL)
@@ -966,24 +1011,33 @@ AddFileToManifest(StringInfo manifest, const char *tsoid,
 static void
 SendBackupManifest(StringInfo manifest)
 {
-	pg_sha256_ctx	sha256_ctx;
-	uint8			shabuf[PG_SHA256_DIGEST_LENGTH];
+	char			checksumbuf[256];
 	StringInfoData	protobuf;
-	int				shastringlen;
+	int				checksumbuflen;
+	ChecksumCtx		cCtx;
 
 	/* Checksum the manifest. */
-	if (manifest_with_checksums)
+	if (manifest_checksums != MC_NONE)
 	{
-		pg_sha256_init(&sha256_ctx);
-		pg_sha256_update(&sha256_ctx, (uint8 *) manifest->data, manifest->len);
-		pg_sha256_final(&sha256_ctx, shabuf);
+		initialize_manifest_checksum(&cCtx);
+		update_manifest_checksum(&cCtx, manifest->data, manifest->len);
+		checksumbuflen = finalize_manifest_checksum(&cCtx, (char *) checksumbuf);
 		appendStringInfoString(manifest, "Manifest-Checksum\t");
-		shastringlen = PG_SHA256_DIGEST_LENGTH * 2;
-		enlargeStringInfo(manifest, shastringlen);
-		shastringlen = hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH,
+		switch (manifest_checksums)
+		{
+			case MC_SHA256:
+				appendStringInfoString(manifest, "SHA256:");
+				break;
+			case MC_CRC32C:
+				appendStringInfoString(manifest, "CRC32C:");
+				break;
+			case MC_NONE:
+				break;
+		}
+		enlargeStringInfo(manifest, checksumbuflen * 2);
+		checksumbuflen = hex_encode(checksumbuf, checksumbuflen,
 				manifest->data + manifest->len);
-		Assert(shastringlen == PG_SHA256_DIGEST_LENGTH * 2);
-		manifest->len += shastringlen;
+		manifest->len += checksumbuflen;
 		appendStringInfoChar(manifest, '\n');
 	}
 
@@ -1115,11 +1169,7 @@ sendFileWithContent(const char *filename, const char *content,
 	struct stat statbuf;
 	int			pad,
 				len;
-	pg_sha256_ctx	sha256_ctx;
-	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
-
-	if (manifest_with_checksums)
-		pg_sha256_init(&sha256_ctx);
+	ChecksumCtx cCtx;
 
 	len = strlen(content);
 
@@ -1153,14 +1203,14 @@ sendFileWithContent(const char *filename, const char *content,
 		pq_putmessage('d', buf, pad);
 	}
 
-	if (manifest_with_checksums)
+	if (manifest_checksums != MC_NONE)
 	{
-		pg_sha256_update(&sha256_ctx, (uint8 *) content, len);
-		pg_sha256_final(&sha256_ctx, shabuf);
+		initialize_manifest_checksum(&cCtx);
+		update_manifest_checksum(&cCtx, content, len);
 	}
 
 	AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
-					  shabuf);
+					  &cCtx);
 }
 
 /*
@@ -1591,11 +1641,9 @@ sendFile(const char *readfilename, const char *tarfilename,
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
-	pg_sha256_ctx	sha256_ctx;
-	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
+	ChecksumCtx cCtx;
 
-	if (manifest_with_checksums)
-		pg_sha256_init(&sha256_ctx);
+	initialize_manifest_checksum(&cCtx);
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1766,8 +1814,7 @@ sendFile(const char *readfilename, const char *tarfilename,
 					(errmsg("base backup could not send data, aborting backup")));
 
 		/* Also feed it to the checksum machinery. */
-		if (manifest_with_checksums)
-			pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
+		update_manifest_checksum(&cCtx, buf, cnt);
 
 		len += cnt;
 		throttle(cnt);
@@ -1793,8 +1840,7 @@ sendFile(const char *readfilename, const char *tarfilename,
 		{
 			cnt = Min(sizeof(buf), statbuf->st_size - len);
 			pq_putmessage('d', buf, cnt);
-			if (manifest_with_checksums)
-				pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
+			update_manifest_checksum(&cCtx, buf, cnt);
 			len += cnt;
 			throttle(cnt);
 		}
@@ -1826,11 +1872,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 	}
 
 	total_checksum_failures += checksum_failures;
-
-	if (manifest_with_checksums)
-		pg_sha256_final(&sha256_ctx, shabuf);
 	AddFileToManifest(manifest, tsoid, tarfilename, statbuf->st_size,
-					  statbuf->st_mtime, shabuf);
+					  statbuf->st_mtime, &cCtx);
 
 	return true;
 }
@@ -1966,3 +2009,63 @@ throttle(size_t increment)
 	 */
 	throttled_last = GetCurrentTimestamp();
 }
+
+/*
+ * Initialize the manifest checksum context according to the provided algorithm.
+ */
+static void
+initialize_manifest_checksum(ChecksumCtx *cCtx)
+{
+	switch (manifest_checksums)
+	{
+		case MC_SHA256:
+			pg_sha256_init(&cCtx->sha256_ctx);
+			break;
+		case MC_CRC32C:
+			INIT_CRC32C(cCtx->crc_ctx);
+			break;
+		case MC_NONE:
+			break;
+	}
+}
+
+static void
+update_manifest_checksum(ChecksumCtx *cCtx, const char *buf, off_t cnt)
+{
+	switch (manifest_checksums)
+	{
+		case MC_SHA256:
+			pg_sha256_update(&cCtx->sha256_ctx, (uint8 *) buf, cnt);
+			break;
+		case MC_CRC32C:
+			COMP_CRC32C(cCtx->crc_ctx, buf, cnt);
+			break;
+		case MC_NONE:
+			break;
+	}
+}
+
+/*
+ * Function calculate the final checksum for the provided context and returns
+ * the length of checksum.
+ */
+static int
+finalize_manifest_checksum(ChecksumCtx *cCtx, char *checksumbuf)
+{
+	int checksumlen = 0;
+	switch (manifest_checksums)
+	{
+		case MC_SHA256:
+			pg_sha256_final(&cCtx->sha256_ctx, (uint8 *)checksumbuf);
+			checksumlen = PG_SHA256_DIGEST_LENGTH;
+			break;
+		case MC_CRC32C:
+			FIN_CRC32C(cCtx->crc_ctx);
+			pg_ltoa(cCtx->crc_ctx, checksumbuf);
+			checksumlen = strlen(checksumbuf);
+			break;
+		case MC_NONE:
+			break;
+	}
+	return checksumlen;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 542a3f7..e527dd2 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -87,7 +87,7 @@ static SQLCmd *make_sqlcmd(void);
 %token K_EXPORT_SNAPSHOT
 %token K_NOEXPORT_SNAPSHOT
 %token K_USE_SNAPSHOT
-%token K_MANIFEST_WITH_CHECKSUMS
+%token K_MANIFEST_CHECKSUMS
 
 %type <node>	command
 %type <node>	base_backup start_replication start_logical_replication
@@ -215,10 +215,10 @@ base_backup_opt:
 				  $$ = makeDefElem("noverify_checksums",
 								   (Node *)makeInteger(true), -1);
 				}
-			| K_MANIFEST_WITH_CHECKSUMS
+			| K_MANIFEST_CHECKSUMS SCONST
 				{
-				  $$ = makeDefElem("manifest_with_checksums",
-								   (Node *)makeInteger(true), -1);
+				  $$ = makeDefElem("manifest_checksums",
+								   (Node *)makeString($2), -1);
 				}
 			;
 
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 4f92bc1..1b73f1e 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -107,7 +107,7 @@ EXPORT_SNAPSHOT		{ return K_EXPORT_SNAPSHOT; }
 NOEXPORT_SNAPSHOT	{ return K_NOEXPORT_SNAPSHOT; }
 USE_SNAPSHOT		{ return K_USE_SNAPSHOT; }
 WAIT				{ return K_WAIT; }
-MANIFEST_WITH_CHECKSUMS { return K_MANIFEST_WITH_CHECKSUMS; }
+MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
 
 ","				{ return ','; }
 ";"				{ return ';'; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index f4f8ffe..13c3918 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -141,7 +141,7 @@ static bool temp_replication_slot = true;
 static bool create_slot = false;
 static bool no_slot = false;
 static bool verify_checksums = true;
-static bool manifest_with_checksums = false;
+static char *manifest_checksums = NULL;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -399,8 +399,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
-	printf(_("      --manifest-with-checksums\n"
-			 "                         do calculate checksums for manifest files\n"));
+	printf(_("      --manifest-checksums=SHA256|CRC32C|NONE\n"
+			 "                         calculate checksums for manifest files using provided algorithm\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -1824,7 +1824,7 @@ BaseBackup(void)
 	}
 
 	basebkp =
-		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s",
+		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s MANIFEST_CHECKSUMS '%s'",
 				 escaped_label,
 				 showprogress ? "PROGRESS" : "",
 				 includewal == FETCH_WAL ? "WAL" : "",
@@ -1833,7 +1833,7 @@ BaseBackup(void)
 				 maxrate_clause ? maxrate_clause : "",
 				 format == 't' ? "TABLESPACE_MAP" : "",
 				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS",
-				 manifest_with_checksums ? "MANIFEST_WITH_CHECKSUMS" : "");
+				 manifest_checksums ? manifest_checksums : "NONE");
 
 	if (PQsendQuery(conn, basebkp) == 0)
 	{
@@ -2166,7 +2166,7 @@ main(int argc, char **argv)
 		{"waldir", required_argument, NULL, 1},
 		{"no-slot", no_argument, NULL, 2},
 		{"no-verify-checksums", no_argument, NULL, 3},
-		{"manifest-with-checksums", no_argument, NULL, 4},
+		{"manifest-checksums", required_argument, NULL, 'm'},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2194,7 +2194,7 @@ main(int argc, char **argv)
 
 	atexit(cleanup_directories_atexit);
 
-	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvP",
+	while ((c = getopt_long(argc, argv, "CD:F:r:RS:T:X:l:nNzZ:d:c:h:p:U:s:wWkvPm:",
 							long_options, &option_index)) != -1)
 	{
 		switch (c)
@@ -2335,8 +2335,8 @@ main(int argc, char **argv)
 			case 3:
 				verify_checksums = false;
 				break;
-			case 4:
-				manifest_with_checksums = true;
+			case 'm':
+				manifest_checksums = pg_strdup(optarg);
 				break;
 			default:
 
-- 
1.8.3.1