0002-Refactor-some-backup-code-to-increase-reusability.-T_v9.patch

application/octet-stream

Filename: 0002-Refactor-some-backup-code-to-increase-reusability.-T_v9.patch
Type: application/octet-stream
Part: 2
Message: Re: WIP/PoC for parallel backup

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v9-0002
Subject: Refactor some backup code to increase reusability. This commit adds two functions; collect_tablespaces and collect_wal_files. The code related to collect tablespace information is moved from do_pg_start_backup to collect_tablespaces function. Also, the code to collect wal files is moved from perform_base_backup to collect_wal_files.
File+
src/backend/access/transam/xlog.c 100 91
src/backend/replication/basebackup.c 117 100
src/include/access/xlog.h 2 0
From 1d41fa411fc02db73a49277779baeb022f3ae82d Mon Sep 17 00:00:00 2001
From: Asif Rehman <asif.rehman@highgo.ca>
Date: Mon, 27 Jan 2020 17:48:10 +0500
Subject: [PATCH 2/6] Refactor some backup code to increase reusability. This
 commit adds two functions; collect_tablespaces and collect_wal_files. The
 code related to collect tablespace information is moved from
 do_pg_start_backup to collect_tablespaces function. Also, the code to collect
 wal files is moved from perform_base_backup to collect_wal_files.

This does not introduce any functional changes.
---
 src/backend/access/transam/xlog.c    | 191 ++++++++++++-----------
 src/backend/replication/basebackup.c | 217 +++++++++++++++------------
 src/include/access/xlog.h            |   2 +
 3 files changed, 219 insertions(+), 191 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4fa446ffa42..f5670141126 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10348,10 +10348,6 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 	PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
 	{
 		bool		gotUniqueStartpoint = false;
-		DIR		   *tblspcdir;
-		struct dirent *de;
-		tablespaceinfo *ti;
-		int			datadirpathlen;
 
 		/*
 		 * Force an XLOG file switch before the checkpoint, to ensure that the
@@ -10477,8 +10473,6 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 		if (exclusive)
 			tblspcmapfile = makeStringInfo();
 
-		datadirpathlen = strlen(DataDir);
-
 		/*
 		 * Report that we are now estimating the total backup size
 		 * if we're streaming base backup as requested by pg_basebackup
@@ -10487,91 +10481,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 			pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
 										 PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE);
 
-		/* Collect information about all tablespaces */
-		tblspcdir = AllocateDir("pg_tblspc");
-		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
-		{
-			char		fullpath[MAXPGPATH + 10];
-			char		linkpath[MAXPGPATH];
-			char	   *relpath = NULL;
-			int			rllen;
-			StringInfoData buflinkpath;
-			char	   *s = linkpath;
-
-			/* Skip special stuff */
-			if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
-				continue;
-
-			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
-			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
-			if (rllen < 0)
-			{
-				ereport(WARNING,
-						(errmsg("could not read symbolic link \"%s\": %m",
-								fullpath)));
-				continue;
-			}
-			else if (rllen >= sizeof(linkpath))
-			{
-				ereport(WARNING,
-						(errmsg("symbolic link \"%s\" target is too long",
-								fullpath)));
-				continue;
-			}
-			linkpath[rllen] = '\0';
-
-			/*
-			 * Add the escape character '\\' before newline in a string to
-			 * ensure that we can distinguish between the newline in the
-			 * tablespace path and end of line while reading tablespace_map
-			 * file during archive recovery.
-			 */
-			initStringInfo(&buflinkpath);
-
-			while (*s)
-			{
-				if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
-					appendStringInfoChar(&buflinkpath, '\\');
-				appendStringInfoChar(&buflinkpath, *s++);
-			}
-
-			/*
-			 * Relpath holds the relative path of the tablespace directory
-			 * when it's located within PGDATA, or NULL if it's located
-			 * elsewhere.
-			 */
-			if (rllen > datadirpathlen &&
-				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
-				IS_DIR_SEP(linkpath[datadirpathlen]))
-				relpath = linkpath + datadirpathlen + 1;
-
-			ti = palloc(sizeof(tablespaceinfo));
-			ti->oid = pstrdup(de->d_name);
-			ti->path = pstrdup(buflinkpath.data);
-			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
-
-			if (tablespaces)
-				*tablespaces = lappend(*tablespaces, ti);
-
-			appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
-
-			pfree(buflinkpath.data);
-#else
-
-			/*
-			 * If the platform does not have symbolic links, it should not be
-			 * possible to have tablespaces - clearly somebody else created
-			 * them. Warn about it and ignore.
-			 */
-			ereport(WARNING,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("tablespaces are not supported on this platform")));
-#endif
-		}
-		FreeDir(tblspcdir);
+		collect_tablespaces(tablespaces, tblspcmapfile, infotbssize, needtblspcmapfile);
 
 		/*
 		 * Construct backup label file
@@ -12390,3 +12300,102 @@ XLogRequestWalReceiverReply(void)
 {
 	doRequestWalReceiverReply = true;
 }
+
+/*
+ * Collect information about all tablespaces.
+ */
+void
+collect_tablespaces(List **tablespaces, StringInfo tblspcmapfile,
+					bool infotbssize, bool needtblspcmapfile)
+{
+	DIR		   *tblspcdir;
+	struct dirent *de;
+	tablespaceinfo *ti;
+	int			datadirpathlen;
+
+	datadirpathlen = strlen(DataDir);
+
+	tblspcdir = AllocateDir("pg_tblspc");
+	while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+	{
+		char		fullpath[MAXPGPATH + 10];
+		char		linkpath[MAXPGPATH];
+		char	   *relpath = NULL;
+		int			rllen;
+		StringInfoData buflinkpath;
+		char	   *s = linkpath;
+
+		/* Skip special stuff */
+		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+			continue;
+
+		snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+		rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+		if (rllen < 0)
+		{
+			ereport(WARNING,
+					(errmsg("could not read symbolic link \"%s\": %m",
+							fullpath)));
+			continue;
+		}
+		else if (rllen >= sizeof(linkpath))
+		{
+			ereport(WARNING,
+					(errmsg("symbolic link \"%s\" target is too long",
+							fullpath)));
+			continue;
+		}
+		linkpath[rllen] = '\0';
+
+		/*
+		 * Add the escape character '\\' before newline in a string to ensure
+		 * that we can distinguish between the newline in the tablespace path
+		 * and end of line while reading tablespace_map file during archive
+		 * recovery.
+		 */
+		initStringInfo(&buflinkpath);
+
+		while (*s)
+		{
+			if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+				appendStringInfoChar(&buflinkpath, '\\');
+			appendStringInfoChar(&buflinkpath, *s++);
+		}
+
+		/*
+		 * Relpath holds the relative path of the tablespace directory when
+		 * it's located within PGDATA, or NULL if it's located elsewhere.
+		 */
+		if (rllen > datadirpathlen &&
+			strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+			IS_DIR_SEP(linkpath[datadirpathlen]))
+			relpath = linkpath + datadirpathlen + 1;
+
+		ti = palloc(sizeof(tablespaceinfo));
+		ti->oid = pstrdup(de->d_name);
+		ti->path = pstrdup(buflinkpath.data);
+		ti->rpath = relpath ? pstrdup(relpath) : NULL;
+		ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+		if (tablespaces)
+			*tablespaces = lappend(*tablespaces, ti);
+
+		appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
+
+		pfree(buflinkpath.data);
+#else
+
+		/*
+		 * If the platform does not have symbolic links, it should not be
+		 * possible to have tablespaces - clearly somebody else created them.
+		 * Warn about it and ignore.
+		 */
+		ereport(WARNING,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("tablespaces are not supported on this platform")));
+#endif
+	}
+	FreeDir(tblspcdir);
+}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ca074d59ac9..abc3bad01ee 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -67,6 +67,8 @@ static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *sta
 static void send_int8_string(StringInfoData *buf, int64 intval);
 static void SendBackupHeader(List *tablespaces);
 static void perform_base_backup(basebackup_options *opt);
+static List *collect_wal_files(XLogRecPtr startptr, XLogRecPtr endptr,
+							   List **historyFileList);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
 static int	compareWalFileNames(const ListCell *a, const ListCell *b);
@@ -438,115 +440,16 @@ perform_base_backup(basebackup_options *opt)
 		 */
 		char		pathbuf[MAXPGPATH];
 		XLogSegNo	segno;
-		XLogSegNo	startsegno;
-		XLogSegNo	endsegno;
 		struct stat statbuf;
 		List	   *historyFileList = NIL;
 		List	   *walFileList = NIL;
-		char		firstoff[MAXFNAMELEN];
-		char		lastoff[MAXFNAMELEN];
-		DIR		   *dir;
-		struct dirent *de;
 		ListCell   *lc;
 		TimeLineID	tli;
 
 		pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
 									 PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL);
 
-		/*
-		 * I'd rather not worry about timelines here, so scan pg_wal and
-		 * include all WAL files in the range between 'startptr' and 'endptr',
-		 * regardless of the timeline the file is stamped with. If there are
-		 * some spurious WAL files belonging to timelines that don't belong in
-		 * this server's history, they will be included too. Normally there
-		 * shouldn't be such files, but if there are, there's little harm in
-		 * including them.
-		 */
-		XLByteToSeg(startptr, startsegno, wal_segment_size);
-		XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
-		XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
-		XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
-
-		dir = AllocateDir("pg_wal");
-		while ((de = ReadDir(dir, "pg_wal")) != NULL)
-		{
-			/* Does it look like a WAL segment, and is it in the range? */
-			if (IsXLogFileName(de->d_name) &&
-				strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
-				strcmp(de->d_name + 8, lastoff + 8) <= 0)
-			{
-				walFileList = lappend(walFileList, pstrdup(de->d_name));
-			}
-			/* Does it look like a timeline history file? */
-			else if (IsTLHistoryFileName(de->d_name))
-			{
-				historyFileList = lappend(historyFileList, pstrdup(de->d_name));
-			}
-		}
-		FreeDir(dir);
-
-		/*
-		 * Before we go any further, check that none of the WAL segments we
-		 * need were removed.
-		 */
-		CheckXLogRemoved(startsegno, ThisTimeLineID);
-
-		/*
-		 * Sort the WAL filenames.  We want to send the files in order from
-		 * oldest to newest, to reduce the chance that a file is recycled
-		 * before we get a chance to send it over.
-		 */
-		list_sort(walFileList, compareWalFileNames);
-
-		/*
-		 * There must be at least one xlog file in the pg_wal directory, since
-		 * we are doing backup-including-xlog.
-		 */
-		if (walFileList == NIL)
-			ereport(ERROR,
-					(errmsg("could not find any WAL files")));
-
-		/*
-		 * Sanity check: the first and last segment should cover startptr and
-		 * endptr, with no gaps in between.
-		 */
-		XLogFromFileName((char *) linitial(walFileList),
-						 &tli, &segno, wal_segment_size);
-		if (segno != startsegno)
-		{
-			char		startfname[MAXFNAMELEN];
-
-			XLogFileName(startfname, ThisTimeLineID, startsegno,
-						 wal_segment_size);
-			ereport(ERROR,
-					(errmsg("could not find WAL file \"%s\"", startfname)));
-		}
-		foreach(lc, walFileList)
-		{
-			char	   *walFileName = (char *) lfirst(lc);
-			XLogSegNo	currsegno = segno;
-			XLogSegNo	nextsegno = segno + 1;
-
-			XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
-			if (!(nextsegno == segno || currsegno == segno))
-			{
-				char		nextfname[MAXFNAMELEN];
-
-				XLogFileName(nextfname, ThisTimeLineID, nextsegno,
-							 wal_segment_size);
-				ereport(ERROR,
-						(errmsg("could not find WAL file \"%s\"", nextfname)));
-			}
-		}
-		if (segno != endsegno)
-		{
-			char		endfname[MAXFNAMELEN];
-
-			XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
-			ereport(ERROR,
-					(errmsg("could not find WAL file \"%s\"", endfname)));
-		}
-
+		walFileList = collect_wal_files(startptr, endptr, &historyFileList);
 		/* Ok, we have everything we need. Send the WAL files. */
 		foreach(lc, walFileList)
 		{
@@ -681,6 +584,120 @@ perform_base_backup(basebackup_options *opt)
 	pgstat_progress_end_command();
 }
 
+/*
+ * construct a list of WAL files to be included in the backup.
+ */
+static List *
+collect_wal_files(XLogRecPtr startptr, XLogRecPtr endptr, List **historyFileList)
+{
+	XLogSegNo	segno;
+	XLogSegNo	startsegno;
+	XLogSegNo	endsegno;
+	List	   *walFileList = NIL;
+	char		firstoff[MAXFNAMELEN];
+	char		lastoff[MAXFNAMELEN];
+	DIR		   *dir;
+	struct dirent *de;
+	ListCell   *lc;
+	TimeLineID	tli;
+
+	/*
+	 * I'd rather not worry about timelines here, so scan pg_wal and include
+	 * all WAL files in the range between 'startptr' and 'endptr', regardless
+	 * of the timeline the file is stamped with. If there are some spurious
+	 * WAL files belonging to timelines that don't belong in this server's
+	 * history, they will be included too. Normally there shouldn't be such
+	 * files, but if there are, there's little harm in including them.
+	 */
+	XLByteToSeg(startptr, startsegno, wal_segment_size);
+	XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
+	XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
+	XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
+
+	dir = AllocateDir("pg_wal");
+	while ((de = ReadDir(dir, "pg_wal")) != NULL)
+	{
+		/* Does it look like a WAL segment, and is it in the range? */
+		if (IsXLogFileName(de->d_name) &&
+			strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
+			strcmp(de->d_name + 8, lastoff + 8) <= 0)
+		{
+			walFileList = lappend(walFileList, pstrdup(de->d_name));
+		}
+		/* Does it look like a timeline history file? */
+		else if (IsTLHistoryFileName(de->d_name))
+		{
+			if (historyFileList)
+				*historyFileList = lappend(*historyFileList, pstrdup(de->d_name));
+		}
+	}
+	FreeDir(dir);
+
+	/*
+	 * Before we go any further, check that none of the WAL segments we need
+	 * were removed.
+	 */
+	CheckXLogRemoved(startsegno, ThisTimeLineID);
+
+	/*
+	 * Sort the WAL filenames.  We want to send the files in order from oldest
+	 * to newest, to reduce the chance that a file is recycled before we get a
+	 * chance to send it over.
+	 */
+	list_sort(walFileList, compareWalFileNames);
+
+	/*
+	 * There must be at least one xlog file in the pg_wal directory, since we
+	 * are doing backup-including-xlog.
+	 */
+	if (walFileList == NIL)
+		ereport(ERROR,
+				(errmsg("could not find any WAL files")));
+
+	/*
+	 * Sanity check: the first and last segment should cover startptr and
+	 * endptr, with no gaps in between.
+	 */
+	XLogFromFileName((char *) linitial(walFileList),
+					 &tli, &segno, wal_segment_size);
+	if (segno != startsegno)
+	{
+		char		startfname[MAXFNAMELEN];
+
+		XLogFileName(startfname, ThisTimeLineID, startsegno,
+					 wal_segment_size);
+		ereport(ERROR,
+				(errmsg("could not find WAL file \"%s\"", startfname)));
+	}
+	foreach(lc, walFileList)
+	{
+		char	   *walFileName = (char *) lfirst(lc);
+		XLogSegNo	currsegno = segno;
+		XLogSegNo	nextsegno = segno + 1;
+
+		XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
+		if (!(nextsegno == segno || currsegno == segno))
+		{
+			char		nextfname[MAXFNAMELEN];
+
+			XLogFileName(nextfname, ThisTimeLineID, nextsegno,
+						 wal_segment_size);
+			ereport(ERROR,
+					(errmsg("could not find WAL file \"%s\"", nextfname)));
+		}
+	}
+	if (segno != endsegno)
+	{
+		char		endfname[MAXFNAMELEN];
+
+		XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
+		ereport(ERROR,
+				(errmsg("could not find WAL file \"%s\"", endfname)));
+	}
+
+	return walFileList;
+}
+
 /*
  * list_sort comparison function, to compare log/seg portion of WAL segment
  * filenames, ignoring the timeline portion.
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc208..22fe35801dc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -350,6 +350,8 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
 									TimeLineID *stoptli_p);
 extern void do_pg_abort_backup(int code, Datum arg);
+extern void collect_tablespaces(List **tablespaces, StringInfo tblspcmapfile,
+								bool infotbssize, bool needtblspcmapfile);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
-- 
2.21.1 (Apple Git-122.3)