0002-Refactor-code-in-basebackup.c.patch
text/x-patch
Filename: 0002-Refactor-code-in-basebackup.c.patch
Type: text/x-patch
Part: 3
Message:
Re: block-level incremental backup
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch 0002
Subject: Refactor code in basebackup.c
| File | + | − |
|---|---|---|
| src/backend/replication/basebackup.c | 176 | 132 |
From 2f55219552f30c2cc5a97b15f855fa402d99a1fd Mon Sep 17 00:00:00 2001
From: Jeevan Chalke <jeevan.chalke@enterprisedb.com>
Date: Fri, 16 Aug 2019 14:10:16 +0530
Subject: [PATCH 2/4] Refactor code in basebackup.c
- Refactor full backup code to the separate function.
- Refactor checksum varifying logic to the separate function.
---
src/backend/replication/basebackup.c | 308 ++++++++++++++++++++---------------
1 file changed, 176 insertions(+), 132 deletions(-)
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 74c954b..18e992c 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -75,6 +75,13 @@ static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const ListCell *a, const ListCell *b);
static void throttle(size_t increment);
static bool is_checksummed_file(const char *fullpath, const char *filename);
+static void verify_page_checksum(const char *readfilename, FILE *fp, char *buf,
+ off_t cnt, int blkindex, BlockNumber blkno, int segmentno,
+ int *checksum_failures);
+static pgoff_t do_full_backup(const char *readfilename,
+ const char *tarfilename, FILE *fp,
+ struct stat *statbuf, int segmentno,
+ bool verify_checksum, int *checksum_failures);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -1377,17 +1384,11 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
bool missing_ok, Oid dboid)
{
FILE *fp;
- BlockNumber blkno = 0;
- bool block_retry = false;
char buf[TAR_SEND_SIZE];
- uint16 checksum;
int checksum_failures = 0;
off_t cnt;
- int i;
pgoff_t len = 0;
- char *page;
size_t pad;
- PageHeader phdr;
int segmentno = 0;
char *segmentpath;
bool verify_checksum = false;
@@ -1402,8 +1403,6 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
errmsg("could not open file \"%s\": %m", readfilename)));
}
- _tarWriteHeader(tarfilename, NULL, statbuf, false);
-
if (!noverify_checksums && DataChecksumsEnabled())
{
char *filename;
@@ -1435,130 +1434,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
}
}
- while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
- {
- /*
- * The checksums are verified at block level, so we iterate over the
- * buffer in chunks of BLCKSZ, after making sure that
- * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
- * BLCKSZ bytes.
- */
- Assert(TAR_SEND_SIZE % BLCKSZ == 0);
-
- if (verify_checksum && (cnt % BLCKSZ != 0))
- {
- ereport(WARNING,
- (errmsg("cannot verify checksum in file \"%s\", block "
- "%d: read buffer size %d and page size %d "
- "differ",
- readfilename, blkno, (int) cnt, BLCKSZ)));
- verify_checksum = false;
- }
-
- if (verify_checksum)
- {
- for (i = 0; i < cnt / BLCKSZ; i++)
- {
- page = buf + BLCKSZ * i;
-
- /*
- * Only check pages which have not been modified since the
- * start of the base backup. Otherwise, they might have been
- * written only halfway and the checksum would not be valid.
- * However, replaying WAL would reinstate the correct page in
- * this case. We also skip completely new pages, since they
- * don't have a checksum yet.
- */
- if (!PageIsNew(page) && PageGetLSN(page) < startptr)
- {
- checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
- phdr = (PageHeader) page;
- if (phdr->pd_checksum != checksum)
- {
- /*
- * Retry the block on the first failure. It's
- * possible that we read the first 4K page of the
- * block just before postgres updated the entire block
- * so it ends up looking torn to us. We only need to
- * retry once because the LSN should be updated to
- * something we can ignore on the next pass. If the
- * error happens again then it is a true validation
- * failure.
- */
- if (block_retry == false)
- {
- /* Reread the failed block */
- if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not fseek in file \"%s\": %m",
- readfilename)));
- }
-
- if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not reread block %d of file \"%s\": %m",
- blkno, readfilename)));
- }
-
- if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not fseek in file \"%s\": %m",
- readfilename)));
- }
-
- /* Set flag so we know a retry was attempted */
- block_retry = true;
-
- /* Reset loop to validate the block again */
- i--;
- continue;
- }
-
- checksum_failures++;
-
- if (checksum_failures <= 5)
- ereport(WARNING,
- (errmsg("checksum verification failed in "
- "file \"%s\", block %d: calculated "
- "%X but expected %X",
- readfilename, blkno, checksum,
- phdr->pd_checksum)));
- if (checksum_failures == 5)
- ereport(WARNING,
- (errmsg("further checksum verification "
- "failures in file \"%s\" will not "
- "be reported", readfilename)));
- }
- }
- block_retry = false;
- blkno++;
- }
- }
-
- /* Send the chunk as a CopyData message */
- if (pq_putmessage('d', buf, cnt))
- ereport(ERROR,
- (errmsg("base backup could not send data, aborting backup")));
-
- len += cnt;
- throttle(cnt);
-
- if (len >= statbuf->st_size)
- {
- /*
- * Reached end of file. The file could be longer, if it was
- * extended while we were sending it, but for a base backup we can
- * ignore such extended data. It will be restored from WAL.
- */
- break;
- }
- }
+ /* Perform full backup */
+ len = do_full_backup(readfilename, tarfilename, fp, statbuf, segmentno,
+ verify_checksum, &checksum_failures);
/* If the file was truncated while we were sending it, pad it with zeros */
if (len < statbuf->st_size)
@@ -1731,3 +1609,169 @@ throttle(size_t increment)
*/
throttled_last = GetCurrentTimestamp();
}
+
+/*
+ * verify_page_checksum
+ *
+ * Verifies checksum for one page.
+ */
+static void
+verify_page_checksum(const char *readfilename, FILE *fp, char *buf,
+ off_t cnt, int blkindex, BlockNumber blkno, int segmentno,
+ int *checksum_failures)
+{
+ char *page;
+ uint16 checksum;
+ bool block_retry = false;
+
+ while (1)
+ {
+ page = buf + BLCKSZ * blkindex;
+
+ /*
+ * Only check pages which have not been modified since the start of the
+ * base backup. Otherwise, they might have been written only halfway
+ * and the checksum would not be valid. However, replaying WAL would
+ * reinstate the correct page in this case. We also skip completely
+ * new pages, since they don't have a checksum yet.
+ */
+ if (!PageIsNew(page) && PageGetLSN(page) < startptr)
+ {
+ PageHeader phdr;
+
+ checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
+ phdr = (PageHeader) page;
+ if (phdr->pd_checksum != checksum)
+ {
+ /*
+ * Retry the block on the first failure. It's possible that we
+ * read the first 4K page of the block just before postgres
+ * updated the entire block so it ends up looking torn to us.
+ * We only need to retry once because the LSN should be updated
+ * to something we can ignore on the next pass. If the error
+ * happens again then it is a true validation failure.
+ */
+ if (block_retry == false)
+ {
+ /* Reread the failed block */
+ if (fseek(fp, -(cnt - BLCKSZ * blkindex), SEEK_CUR) == -1)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fseek in file \"%s\": %m",
+ readfilename)));
+ }
+
+ if (fread(buf + BLCKSZ * blkindex, 1, BLCKSZ, fp) != BLCKSZ)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not reread block %d of file \"%s\": %m",
+ blkno, readfilename)));
+ }
+
+ if (fseek(fp, cnt - BLCKSZ * blkindex - BLCKSZ, SEEK_CUR) == -1)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fseek in file \"%s\": %m",
+ readfilename)));
+ }
+
+ /* Set flag so we know a retry was attempted */
+ block_retry = true;
+
+ /* Re-validate the block again */
+ continue;
+ }
+
+ (*checksum_failures)++;
+
+ if (*checksum_failures <= 5)
+ ereport(WARNING,
+ (errmsg("checksum verification failed in "
+ "file \"%s\", block %d: calculated "
+ "%X but expected %X",
+ readfilename, blkno, checksum,
+ phdr->pd_checksum)));
+ if (*checksum_failures == 5)
+ ereport(WARNING,
+ (errmsg("further checksum verification "
+ "failures in file \"%s\" will not "
+ "be reported", readfilename)));
+ }
+ }
+
+ break;
+ }
+}
+
+/*
+ * do_full_backup
+ *
+ * Perform full backup.
+ */
+static pgoff_t
+do_full_backup(const char *readfilename, const char *tarfilename, FILE *fp,
+ struct stat *statbuf, int segmentno, bool verify_checksum,
+ int *checksum_failures)
+{
+ char buf[TAR_SEND_SIZE];
+ off_t cnt;
+ pgoff_t len = 0;
+ BlockNumber blkno = 0;
+ int i;
+
+ _tarWriteHeader(tarfilename, NULL, statbuf, false);
+
+ while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
+ {
+ /*
+ * The checksums are verified at block level, so we iterate over the
+ * buffer in chunks of BLCKSZ, after making sure that
+ * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
+ * BLCKSZ bytes.
+ */
+ Assert(TAR_SEND_SIZE % BLCKSZ == 0);
+
+ if (verify_checksum && (cnt % BLCKSZ != 0))
+ {
+ ereport(WARNING,
+ (errmsg("cannot verify checksum in file \"%s\", block "
+ "%d: read buffer size %d and page size %d "
+ "differ",
+ readfilename, blkno, (int) cnt, BLCKSZ)));
+ verify_checksum = false;
+ }
+
+ if (verify_checksum)
+ {
+ for (i = 0; i < cnt / BLCKSZ; i++)
+ {
+ verify_page_checksum(readfilename, fp, buf, cnt, i, blkno,
+ segmentno, checksum_failures);
+ blkno++;
+ }
+ }
+
+ /* Send the chunk as a CopyData message */
+ if (pq_putmessage('d', buf, cnt))
+ ereport(ERROR,
+ (errmsg("base backup could not send data, aborting backup")));
+
+ len += cnt;
+ throttle(cnt);
+
+ if (len >= statbuf->st_size)
+ {
+ /*
+ * Reached end of file. The file could be longer, if it was
+ * extended while we were sending it, but for a base backup we can
+ * ignore such extended data. It will be restored from WAL.
+ */
+ break;
+ }
+ }
+
+ return len;
+}
--
1.8.3.1