0002-Refactor-some-backup-code-to-increase-reusability.-T_v9.patch
application/octet-stream
Filename: 0002-Refactor-some-backup-code-to-increase-reusability.-T_v9.patch
Type: application/octet-stream
Part: 2
Message:
Re: WIP/PoC for parallel backup
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v9-0002
Subject: Refactor some backup code to increase reusability. This commit adds two functions; collect_tablespaces and collect_wal_files. The code related to collect tablespace information is moved from do_pg_start_backup to collect_tablespaces function. Also, the code to collect wal files is moved from perform_base_backup to collect_wal_files.
| File | + | − |
|---|---|---|
| src/backend/access/transam/xlog.c | 100 | 91 |
| src/backend/replication/basebackup.c | 117 | 100 |
| src/include/access/xlog.h | 2 | 0 |
From 1d41fa411fc02db73a49277779baeb022f3ae82d Mon Sep 17 00:00:00 2001
From: Asif Rehman <asif.rehman@highgo.ca>
Date: Mon, 27 Jan 2020 17:48:10 +0500
Subject: [PATCH 2/6] Refactor some backup code to increase reusability. This
commit adds two functions; collect_tablespaces and collect_wal_files. The
code related to collect tablespace information is moved from
do_pg_start_backup to collect_tablespaces function. Also, the code to collect
wal files is moved from perform_base_backup to collect_wal_files.
This does not introduce any functional changes.
---
src/backend/access/transam/xlog.c | 191 ++++++++++++-----------
src/backend/replication/basebackup.c | 217 +++++++++++++++------------
src/include/access/xlog.h | 2 +
3 files changed, 219 insertions(+), 191 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4fa446ffa42..f5670141126 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10348,10 +10348,6 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
- DIR *tblspcdir;
- struct dirent *de;
- tablespaceinfo *ti;
- int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -10477,8 +10473,6 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
if (exclusive)
tblspcmapfile = makeStringInfo();
- datadirpathlen = strlen(DataDir);
-
/*
* Report that we are now estimating the total backup size
* if we're streaming base backup as requested by pg_basebackup
@@ -10487,91 +10481,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE);
- /* Collect information about all tablespaces */
- tblspcdir = AllocateDir("pg_tblspc");
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH + 10];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
- StringInfoData buflinkpath;
- char *s = linkpath;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Add the escape character '\\' before newline in a string to
- * ensure that we can distinguish between the newline in the
- * tablespace path and end of line while reading tablespace_map
- * file during archive recovery.
- */
- initStringInfo(&buflinkpath);
-
- while (*s)
- {
- if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
- appendStringInfoChar(&buflinkpath, '\\');
- appendStringInfoChar(&buflinkpath, *s++);
- }
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(buflinkpath.data);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
-
- if (tablespaces)
- *tablespaces = lappend(*tablespaces, ti);
-
- appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
-
- pfree(buflinkpath.data);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
- FreeDir(tblspcdir);
+ collect_tablespaces(tablespaces, tblspcmapfile, infotbssize, needtblspcmapfile);
/*
* Construct backup label file
@@ -12390,3 +12300,102 @@ XLogRequestWalReceiverReply(void)
{
doRequestWalReceiverReply = true;
}
+
+/*
+ * Collect information about all tablespaces.
+ */
+void
+collect_tablespaces(List **tablespaces, StringInfo tblspcmapfile,
+ bool infotbssize, bool needtblspcmapfile)
+{
+ DIR *tblspcdir;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
+
+ datadirpathlen = strlen(DataDir);
+
+ tblspcdir = AllocateDir("pg_tblspc");
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH + 10];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+ StringInfoData buflinkpath;
+ char *s = linkpath;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Add the escape character '\\' before newline in a string to ensure
+ * that we can distinguish between the newline in the tablespace path
+ * and end of line while reading tablespace_map file during archive
+ * recovery.
+ */
+ initStringInfo(&buflinkpath);
+
+ while (*s)
+ {
+ if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+ appendStringInfoChar(&buflinkpath, '\\');
+ appendStringInfoChar(&buflinkpath, *s++);
+ }
+
+ /*
+ * Relpath holds the relative path of the tablespace directory when
+ * it's located within PGDATA, or NULL if it's located elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(buflinkpath.data);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if (tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
+
+ pfree(buflinkpath.data);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created them.
+ * Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+ FreeDir(tblspcdir);
+}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ca074d59ac9..abc3bad01ee 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -67,6 +67,8 @@ static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *sta
static void send_int8_string(StringInfoData *buf, int64 intval);
static void SendBackupHeader(List *tablespaces);
static void perform_base_backup(basebackup_options *opt);
+static List *collect_wal_files(XLogRecPtr startptr, XLogRecPtr endptr,
+ List **historyFileList);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const ListCell *a, const ListCell *b);
@@ -438,115 +440,16 @@ perform_base_backup(basebackup_options *opt)
*/
char pathbuf[MAXPGPATH];
XLogSegNo segno;
- XLogSegNo startsegno;
- XLogSegNo endsegno;
struct stat statbuf;
List *historyFileList = NIL;
List *walFileList = NIL;
- char firstoff[MAXFNAMELEN];
- char lastoff[MAXFNAMELEN];
- DIR *dir;
- struct dirent *de;
ListCell *lc;
TimeLineID tli;
pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL);
- /*
- * I'd rather not worry about timelines here, so scan pg_wal and
- * include all WAL files in the range between 'startptr' and 'endptr',
- * regardless of the timeline the file is stamped with. If there are
- * some spurious WAL files belonging to timelines that don't belong in
- * this server's history, they will be included too. Normally there
- * shouldn't be such files, but if there are, there's little harm in
- * including them.
- */
- XLByteToSeg(startptr, startsegno, wal_segment_size);
- XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
- XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
- XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
-
- dir = AllocateDir("pg_wal");
- while ((de = ReadDir(dir, "pg_wal")) != NULL)
- {
- /* Does it look like a WAL segment, and is it in the range? */
- if (IsXLogFileName(de->d_name) &&
- strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
- strcmp(de->d_name + 8, lastoff + 8) <= 0)
- {
- walFileList = lappend(walFileList, pstrdup(de->d_name));
- }
- /* Does it look like a timeline history file? */
- else if (IsTLHistoryFileName(de->d_name))
- {
- historyFileList = lappend(historyFileList, pstrdup(de->d_name));
- }
- }
- FreeDir(dir);
-
- /*
- * Before we go any further, check that none of the WAL segments we
- * need were removed.
- */
- CheckXLogRemoved(startsegno, ThisTimeLineID);
-
- /*
- * Sort the WAL filenames. We want to send the files in order from
- * oldest to newest, to reduce the chance that a file is recycled
- * before we get a chance to send it over.
- */
- list_sort(walFileList, compareWalFileNames);
-
- /*
- * There must be at least one xlog file in the pg_wal directory, since
- * we are doing backup-including-xlog.
- */
- if (walFileList == NIL)
- ereport(ERROR,
- (errmsg("could not find any WAL files")));
-
- /*
- * Sanity check: the first and last segment should cover startptr and
- * endptr, with no gaps in between.
- */
- XLogFromFileName((char *) linitial(walFileList),
- &tli, &segno, wal_segment_size);
- if (segno != startsegno)
- {
- char startfname[MAXFNAMELEN];
-
- XLogFileName(startfname, ThisTimeLineID, startsegno,
- wal_segment_size);
- ereport(ERROR,
- (errmsg("could not find WAL file \"%s\"", startfname)));
- }
- foreach(lc, walFileList)
- {
- char *walFileName = (char *) lfirst(lc);
- XLogSegNo currsegno = segno;
- XLogSegNo nextsegno = segno + 1;
-
- XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
- if (!(nextsegno == segno || currsegno == segno))
- {
- char nextfname[MAXFNAMELEN];
-
- XLogFileName(nextfname, ThisTimeLineID, nextsegno,
- wal_segment_size);
- ereport(ERROR,
- (errmsg("could not find WAL file \"%s\"", nextfname)));
- }
- }
- if (segno != endsegno)
- {
- char endfname[MAXFNAMELEN];
-
- XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
- ereport(ERROR,
- (errmsg("could not find WAL file \"%s\"", endfname)));
- }
-
+ walFileList = collect_wal_files(startptr, endptr, &historyFileList);
/* Ok, we have everything we need. Send the WAL files. */
foreach(lc, walFileList)
{
@@ -681,6 +584,120 @@ perform_base_backup(basebackup_options *opt)
pgstat_progress_end_command();
}
+/*
+ * construct a list of WAL files to be included in the backup.
+ */
+static List *
+collect_wal_files(XLogRecPtr startptr, XLogRecPtr endptr, List **historyFileList)
+{
+ XLogSegNo segno;
+ XLogSegNo startsegno;
+ XLogSegNo endsegno;
+ List *walFileList = NIL;
+ char firstoff[MAXFNAMELEN];
+ char lastoff[MAXFNAMELEN];
+ DIR *dir;
+ struct dirent *de;
+ ListCell *lc;
+ TimeLineID tli;
+
+ /*
+ * I'd rather not worry about timelines here, so scan pg_wal and include
+ * all WAL files in the range between 'startptr' and 'endptr', regardless
+ * of the timeline the file is stamped with. If there are some spurious
+ * WAL files belonging to timelines that don't belong in this server's
+ * history, they will be included too. Normally there shouldn't be such
+ * files, but if there are, there's little harm in including them.
+ */
+ XLByteToSeg(startptr, startsegno, wal_segment_size);
+ XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
+ XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
+ XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
+
+ dir = AllocateDir("pg_wal");
+ while ((de = ReadDir(dir, "pg_wal")) != NULL)
+ {
+ /* Does it look like a WAL segment, and is it in the range? */
+ if (IsXLogFileName(de->d_name) &&
+ strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
+ strcmp(de->d_name + 8, lastoff + 8) <= 0)
+ {
+ walFileList = lappend(walFileList, pstrdup(de->d_name));
+ }
+ /* Does it look like a timeline history file? */
+ else if (IsTLHistoryFileName(de->d_name))
+ {
+ if (historyFileList)
+ *historyFileList = lappend(*historyFileList, pstrdup(de->d_name));
+ }
+ }
+ FreeDir(dir);
+
+ /*
+ * Before we go any further, check that none of the WAL segments we need
+ * were removed.
+ */
+ CheckXLogRemoved(startsegno, ThisTimeLineID);
+
+ /*
+ * Sort the WAL filenames. We want to send the files in order from oldest
+ * to newest, to reduce the chance that a file is recycled before we get a
+ * chance to send it over.
+ */
+ list_sort(walFileList, compareWalFileNames);
+
+ /*
+ * There must be at least one xlog file in the pg_wal directory, since we
+ * are doing backup-including-xlog.
+ */
+ if (walFileList == NIL)
+ ereport(ERROR,
+ (errmsg("could not find any WAL files")));
+
+ /*
+ * Sanity check: the first and last segment should cover startptr and
+ * endptr, with no gaps in between.
+ */
+ XLogFromFileName((char *) linitial(walFileList),
+ &tli, &segno, wal_segment_size);
+ if (segno != startsegno)
+ {
+ char startfname[MAXFNAMELEN];
+
+ XLogFileName(startfname, ThisTimeLineID, startsegno,
+ wal_segment_size);
+ ereport(ERROR,
+ (errmsg("could not find WAL file \"%s\"", startfname)));
+ }
+ foreach(lc, walFileList)
+ {
+ char *walFileName = (char *) lfirst(lc);
+ XLogSegNo currsegno = segno;
+ XLogSegNo nextsegno = segno + 1;
+
+ XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
+ if (!(nextsegno == segno || currsegno == segno))
+ {
+ char nextfname[MAXFNAMELEN];
+
+ XLogFileName(nextfname, ThisTimeLineID, nextsegno,
+ wal_segment_size);
+ ereport(ERROR,
+ (errmsg("could not find WAL file \"%s\"", nextfname)));
+ }
+ }
+ if (segno != endsegno)
+ {
+ char endfname[MAXFNAMELEN];
+
+ XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
+ ereport(ERROR,
+ (errmsg("could not find WAL file \"%s\"", endfname)));
+ }
+
+ return walFileList;
+}
+
/*
* list_sort comparison function, to compare log/seg portion of WAL segment
* filenames, ignoring the timeline portion.
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc208..22fe35801dc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -350,6 +350,8 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(int code, Datum arg);
+extern void collect_tablespaces(List **tablespaces, StringInfo tblspcmapfile,
+ bool infotbssize, bool needtblspcmapfile);
extern void register_persistent_abort_backup_handler(void);
extern SessionBackupState get_backup_status(void);
--
2.21.1 (Apple Git-122.3)