0003-Add-support-for-the-incremental-backup.patch
text/x-patch
Filename: 0003-Add-support-for-the-incremental-backup.patch
Type: text/x-patch
Part: 2
Message:
Re: block-level incremental backup
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch 0003
Subject: Add support for the incremental backup.
| File | + | − |
|---|---|---|
| doc/src/sgml/protocol.sgml | 49 | 1 |
| doc/src/sgml/ref/pg_basebackup.sgml | 21 | 0 |
| src/backend/access/transam/xlog.c | 4 | 1 |
| src/backend/access/transam/xlogfuncs.c | 4 | 2 |
| src/backend/replication/basebackup.c | 231 | 13 |
| src/backend/storage/file/fd.c | 29 | 0 |
| src/include/access/xlog.h | 2 | 1 |
| src/include/replication/basebackup.h | 13 | 0 |
| src/include/storage/fd.h | 1 | 0 |
From 740c153a712939d6c65f7d31592b75512544f383 Mon Sep 17 00:00:00 2001
From: Jeevan Chalke <jeevan.chalke@enterprisedb.com>
Date: Fri, 16 Aug 2019 14:10:33 +0530
Subject: [PATCH 3/4] Add support for the incremental backup.
If file is modified 90% or more, we send a whole file else we send
only those blocks which are modified. The file is named .partial and
has following header details:
- magic number, currently 0 (4 bytes)
- checksum, currently 0 (4 bytes)
- number of blocks in this .partial file (4 bytes)
- all modified block numbers (4 bytes each)
- modified blocks
---
doc/src/sgml/protocol.sgml | 50 ++++++-
doc/src/sgml/ref/pg_basebackup.sgml | 21 +++
src/backend/access/transam/xlog.c | 5 +-
src/backend/access/transam/xlogfuncs.c | 6 +-
src/backend/replication/basebackup.c | 244 +++++++++++++++++++++++++++++++--
src/backend/storage/file/fd.c | 29 ++++
src/include/access/xlog.h | 3 +-
src/include/replication/basebackup.h | 13 ++
src/include/storage/fd.h | 1 +
9 files changed, 354 insertions(+), 18 deletions(-)
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index b20f169..fb07585 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2466,7 +2466,7 @@ The commands accepted in replication mode are:
</varlistentry>
<varlistentry>
- <term><literal>BASE_BACKUP</literal> [ <literal>LABEL</literal> <replaceable>'label'</replaceable> ] [ <literal>PROGRESS</literal> ] [ <literal>FAST</literal> ] [ <literal>WAL</literal> ] [ <literal>NOWAIT</literal> ] [ <literal>MAX_RATE</literal> <replaceable>rate</replaceable> ] [ <literal>TABLESPACE_MAP</literal> ] [ <literal>NOVERIFY_CHECKSUMS</literal> ]
+ <term><literal>BASE_BACKUP</literal> [ <literal>LABEL</literal> <replaceable>'label'</replaceable> ] [ <literal>PROGRESS</literal> ] [ <literal>FAST</literal> ] [ <literal>WAL</literal> ] [ <literal>NOWAIT</literal> ] [ <literal>MAX_RATE</literal> <replaceable>rate</replaceable> ] [ <literal>TABLESPACE_MAP</literal> ] [ <literal>NOVERIFY_CHECKSUMS</literal> ] [ <literal>LSN</literal> <replaceable>'lsn'</replaceable> ]
<indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
@@ -2576,6 +2576,22 @@ The commands accepted in replication mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>LSN</literal> <replaceable>'lsn'</replaceable></term>
+ <listitem>
+ <para>
+ Includes only those data blocks in backup which has LSN greater than
+ or equal to the given lsn. However, if 90% or more data blocks are
+ modified in the file, then sends the entire file. Otherwise, creates
+ a <filename>.partial</filename> file containing only the blocks which
+ are modified and sends that instead. The <filename>.partial</filename>
+ file has its own header followed by the actual data blocks. Note that
+ only relation files are considered here, all other files are sent as
+ is.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
@@ -2698,6 +2714,38 @@ The commands accepted in replication mode are:
Owner, group, and file mode are set if the underlying file system on
the server supports it.
</para>
+ <para>
+ An incremental backup's <filename>.partial</filename> file has the
+ following format:
+ <itemizedlist>
+ <listitem>
+ <para>
+ Starts with a 4-byte magic number
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Followed by a 4-byte CRC of the header (containing a magic number,
+ count of the number of blocks, and all block numbers)
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Then a 4-byte count of the number of blocks included in the file
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Then the block numbers, each as a 4-byte quantity
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Followed by the actual data blocks in order with the block numbers
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
</listitem>
</varlistentry>
</variablelist>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index fc9e222..00782e0 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -408,6 +408,19 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>--lsn=<replaceable class="parameter">LSN</replaceable></option></term>
+ <listitem>
+ <para>
+ Takes an incremental backup, using LSN as a threshold. Only the blocks
+ which are modified after this given LSN will be backed up. The file
+ which has these partial blocks has .partial as an extension. Backup
+ taken in this manner has to be combined with the full backup with the
+ <command>pg_combinebackup</command> utility.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-n</option></term>
<term><option>--no-clean</option></term>
<listitem>
@@ -792,6 +805,14 @@ PostgreSQL documentation
<prompt>$</prompt> <userinput>pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts</userinput>
</screen>
</para>
+
+ <para>
+ To create an incremental backup having LSN greater than or equal to
+ <literal>5/19000060</literal>:
+<screen>
+<prompt>$</prompt> <userinput>pg_basebackup -D incbackup --lsn='5/19000060'</userinput>
+</screen>
+ </para>
</refsect1>
<refsect1>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f553523..e427c0f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10178,7 +10178,7 @@ XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
StringInfo labelfile, List **tablespaces,
StringInfo tblspcmapfile, bool infotbssize,
- bool needtblspcmapfile)
+ bool needtblspcmapfile, XLogRecPtr ref_lsn)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -10506,6 +10506,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
appendStringInfo(labelfile, "START TIME: %s\n", strfbuf);
appendStringInfo(labelfile, "LABEL: %s\n", backupidstr);
appendStringInfo(labelfile, "START TIMELINE: %u\n", starttli);
+ if (!XLogRecPtrIsInvalid(ref_lsn))
+ appendStringInfo(labelfile, "INCREMENTAL BACKUP REFERENCE WAL LOCATION: %X/%X\n",
+ (uint32) (ref_lsn >> 32), (uint32) ref_lsn);
/*
* Okay, write the file, or return its contents to caller.
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index b35043b..ef8b283 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -89,7 +89,8 @@ pg_start_backup(PG_FUNCTION_ARGS)
if (exclusive)
{
startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
- NULL, NULL, false, true);
+ NULL, NULL, false, true,
+ InvalidXLogRecPtr);
}
else
{
@@ -105,7 +106,8 @@ pg_start_backup(PG_FUNCTION_ARGS)
MemoryContextSwitchTo(oldcontext);
startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file,
- NULL, tblspc_map_file, false, true);
+ NULL, tblspc_map_file, false, true,
+ InvalidXLogRecPtr);
before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 18e992c..a2c0756 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -82,6 +82,12 @@ static pgoff_t do_full_backup(const char *readfilename,
const char *tarfilename, FILE *fp,
struct stat *statbuf, int segmentno,
bool verify_checksum, int *checksum_failures);
+static pgoff_t do_incremental_backup(const char *readfilename,
+ const char *tarfilename, FILE *fp,
+ XLogRecPtr refptr,
+ struct stat *statbuf, int segmentno,
+ bool verify_checksum,
+ int *checksum_failures);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -99,6 +105,11 @@ static char *statrelpath = NULL;
*/
#define THROTTLING_FREQUENCY 8
+/*
+ * When to send the whole file, % blocks modified (90%)
+ */
+#define WHOLE_FILE_THRESHOLD 0.9
+
/* The actual number of bytes, transfer of which may cause sleep. */
static uint64 throttling_sample;
@@ -114,6 +125,9 @@ static TimestampTz throttled_last;
/* The starting XLOG position of the base backup. */
static XLogRecPtr startptr;
+/* The reference XLOG position for the incremental backup. */
+static XLogRecPtr refptr;
+
/* Total number of checksum failures during base backup. */
static long long int total_checksum_failures;
@@ -254,7 +268,9 @@ perform_base_backup(basebackup_options *opt)
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
labelfile, &tablespaces,
tblspc_map_file,
- opt->progress, opt->sendtblspcmapfile);
+ opt->progress, opt->sendtblspcmapfile,
+ opt->lsn);
+ refptr = opt->lsn;
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
@@ -1392,6 +1408,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
int segmentno = 0;
char *segmentpath;
bool verify_checksum = false;
+ char *filename;
fp = AllocateFile(readfilename, "rb");
if (fp == NULL)
@@ -1403,17 +1420,15 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
errmsg("could not open file \"%s\": %m", readfilename)));
}
+ /*
+ * Get the filename (excluding path). As last_dir_separator() includes
+ * the last directory separator, we chop that off by incrementing the
+ * pointer.
+ */
+ filename = last_dir_separator(readfilename) + 1;
+
if (!noverify_checksums && DataChecksumsEnabled())
{
- char *filename;
-
- /*
- * Get the filename (excluding path). As last_dir_separator()
- * includes the last directory separator, we chop that off by
- * incrementing the pointer.
- */
- filename = last_dir_separator(readfilename) + 1;
-
if (is_checksummed_file(readfilename, filename))
{
verify_checksum = true;
@@ -1434,9 +1449,23 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
}
}
- /* Perform full backup */
- len = do_full_backup(readfilename, tarfilename, fp, statbuf, segmentno,
- verify_checksum, &checksum_failures);
+ /*
+ * If incremental backup, see whether the filename is a relation filename
+ * or not.
+ */
+ if (refptr && OidIsValid(dboid) && looks_like_rel_name(filename))
+ {
+ /* Perform incremental backup */
+ len = do_incremental_backup(readfilename, tarfilename, fp,
+ refptr, statbuf, segmentno,
+ verify_checksum, &checksum_failures);
+ }
+ else
+ {
+ /* Perform full backup */
+ len = do_full_backup(readfilename, tarfilename, fp, statbuf, segmentno,
+ verify_checksum, &checksum_failures);
+ }
/* If the file was truncated while we were sending it, pad it with zeros */
if (len < statbuf->st_size)
@@ -1775,3 +1804,192 @@ do_full_backup(const char *readfilename, const char *tarfilename, FILE *fp,
return len;
}
+
+/*
+ * do_incremental_backup
+ *
+ * Perform incremental backup.
+ */
+static pgoff_t
+do_incremental_backup(const char *readfilename, const char *tarfilename,
+ FILE *fp, XLogRecPtr refptr, struct stat *statbuf,
+ int segmentno, bool verify_checksum,
+ int *checksum_failures)
+{
+ char *buf;
+ off_t cnt;
+ pgoff_t len = 0;
+ BlockNumber blkno = 0;
+ int i;
+ bool sendwholefile = false;
+
+ Assert(statbuf->st_size <= (RELSEG_SIZE * BLCKSZ));
+
+ buf = (char *) malloc(statbuf->st_size);
+ if (buf == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ if ((cnt = fread(buf, 1, statbuf->st_size, fp)) > 0)
+ {
+ Bitmapset *mod_blocks = NULL;
+ int nmodblocks = 0;
+
+ if (cnt % BLCKSZ != 0)
+ {
+ if (verify_checksum)
+ {
+ ereport(WARNING,
+ (errmsg("cannot verify checksum in file \"%s\", block "
+ "%d: read buffer size %d and page size %d "
+ "differ",
+ readfilename, blkno, (int) cnt, BLCKSZ)));
+ verify_checksum = false;
+ }
+
+ ereport(WARNING,
+ (errmsg("file size (%d) not in multiple of page size (%d), sending whole file",
+ (int) cnt, BLCKSZ)));
+
+ /* File size is not in multiple of BLCKSZ, send as is. */
+ sendwholefile = true;
+ }
+
+ /*
+ * Check each page LSN and see if it is modified after the given LSN or
+ * not. Create a bitmap of all such modified blocks and then decide
+ * whether we want to send a whole file or a partial file. Skip this
+ * check if we decided to send whole file already.
+ */
+ if (!sendwholefile)
+ {
+ XLogRecPtr pglsn;
+ int nblocks = (cnt / BLCKSZ);
+
+ for (i = 0; i < nblocks; i++)
+ {
+ char *page = buf + BLCKSZ * i;
+
+ pglsn = PageGetLSN(page);
+
+ if (pglsn >= refptr)
+ {
+ /*
+ * Verify checksum, if requested, for the modified blocks.
+ */
+ if (verify_checksum)
+ verify_page_checksum(readfilename, fp, buf, cnt, i,
+ blkno, segmentno,
+ checksum_failures);
+
+ mod_blocks = bms_add_member(mod_blocks, i);
+ }
+
+ blkno++;
+ }
+
+ nmodblocks = bms_num_members(mod_blocks);
+
+ /*
+ * We need to send whole file if the modified block count is equal
+ * to or greater than the WHOLE_FILE_THRESHOLD. Check that.
+ */
+ if (i > 0 && (nmodblocks / (double) i) >= WHOLE_FILE_THRESHOLD)
+ sendwholefile = true;
+ }
+
+ /*
+ * If sendwholefile is true then we need to send the whole file as is.
+ * Otherwise send a partial file.
+ */
+ if (sendwholefile)
+ {
+ _tarWriteHeader(tarfilename, NULL, statbuf, false);
+
+ /* Send the chunk as a CopyData message */
+ if (pq_putmessage('d', buf, cnt))
+ ereport(ERROR,
+ (errmsg("base backup could not send data, aborting backup")));
+
+ len = cnt;
+ throttle(cnt);
+ }
+ else
+ {
+ int part_size = 0;
+ int part_header_size;
+ int blknum;
+ int blknocnt;
+ partial_file_header *pfh;
+ char *partialtarfilename = NULL;
+
+ /* Create a partial file */
+
+ /* Calculate partial file size. */
+ part_header_size = offsetof(partial_file_header, blocknumbers) +
+ (sizeof(uint32) * nmodblocks);
+ part_size = part_header_size + (BLCKSZ * nmodblocks);
+
+ /* Add .partial to filename */
+ partialtarfilename = (char *) palloc(strlen(tarfilename) + 9);
+ snprintf(partialtarfilename, strlen(tarfilename) + 9, "%s.partial", tarfilename);
+
+ statbuf->st_size = part_size;
+ _tarWriteHeader(partialtarfilename, NULL, statbuf, false);
+ pfree(partialtarfilename);
+
+ pfh = (partial_file_header *) palloc(part_header_size);
+ pfh->magic = INCREMENTAL_BACKUP_MAGIC;
+ pfh->nblocks = nmodblocks;
+
+ blknum = -1;
+ blknocnt = 0;
+ while ((blknum = bms_next_member(mod_blocks, blknum)) >= 0)
+ {
+ pfh->blocknumbers[blknocnt] = blknum;
+ /* Calculate CRC for each block to be transferred. */
+ blknocnt++;
+ }
+
+ Assert(blknocnt == nmodblocks);
+
+ /* Now calculate CRC for the header */
+ INIT_CRC32C(pfh->checksum);
+ COMP_CRC32C(pfh->checksum, pfh, part_header_size);
+
+ /* Send header */
+ if (pq_putmessage('d', (char *) pfh, part_header_size))
+ ereport(ERROR,
+ (errmsg("base backup could not send data, aborting backup")));
+ throttle(part_header_size);
+
+ /* Send data blocks */
+ for (blknocnt = 0; blknocnt < nmodblocks; blknocnt++)
+ {
+ int offset = BLCKSZ * pfh->blocknumbers[blknocnt];
+
+ if (pq_putmessage('d', buf + offset, BLCKSZ))
+ ereport(ERROR,
+ (errmsg("base backup could not send data, aborting backup")));
+ throttle(BLCKSZ);
+ }
+
+ Assert(blknocnt == nmodblocks);
+
+ len = part_size;
+ pfree(pfh);
+ }
+ }
+ else
+ {
+ /* Send empty file as is */
+ _tarWriteHeader(tarfilename, NULL, statbuf, false);
+ len = cnt;
+ }
+
+ /* free buffer allocated */
+ free(buf);
+
+ return len;
+}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a76112d..2990c52 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -3111,6 +3111,35 @@ looks_like_temp_rel_name(const char *name)
return true;
}
+/* <digits>, or <digits>.<digits> */
+bool
+looks_like_rel_name(const char *name)
+{
+ int pos;
+
+ /* Look for a non-empty string of digits (that isn't too long). */
+ for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
+ ;
+ if (pos == 0 || pos > OIDCHARS)
+ return false;
+
+ if (name[pos] == '.')
+ {
+ int segchar;
+
+ for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
+ ;
+ if (segchar <= 1)
+ return false;
+ pos += segchar;
+ }
+
+ /* Now we should be at the end. */
+ if (name[pos] != '\0')
+ return false;
+ return true;
+}
+
/*
* Issue fsync recursively on PGDATA and all its contents.
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252..155385d 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -347,7 +347,8 @@ typedef enum SessionBackupState
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
TimeLineID *starttli_p, StringInfo labelfile,
List **tablespaces, StringInfo tblspcmapfile, bool infotbssize,
- bool needtblspcmapfile);
+ bool needtblspcmapfile,
+ XLogRecPtr ref_lsn);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 503a5b9..1b35b08 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -20,6 +20,9 @@
#define MAX_RATE_LOWER 32
#define MAX_RATE_UPPER 1048576
+/* magic number in incremental backup's .partial file */
+#define INCREMENTAL_BACKUP_MAGIC 0x494E4352
+
typedef struct
{
@@ -29,6 +32,16 @@ typedef struct
int64 size;
} tablespaceinfo;
+/* Definition of the partial file header */
+typedef struct
+{
+ uint32 magic;
+ pg_crc32c checksum;
+ uint32 nblocks;
+ uint32 blocknumbers[FLEXIBLE_ARRAY_MEMBER];
+} partial_file_header;
+
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
extern int64 sendTablespace(char *path, bool sizeonly);
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index d2a8c52..d25a390 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -136,6 +136,7 @@ extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid);
extern void RemovePgTempFiles(void);
extern bool looks_like_temp_rel_name(const char *name);
+extern bool looks_like_rel_name(const char *name);
extern int pg_fsync(int fd);
extern int pg_fsync_no_writethrough(int fd);
--
1.8.3.1