fix_some_problems_about_cascading_replication_v1.patch

text/x-patch

Filename: fix_some_problems_about_cascading_replication_v1.patch
Type: text/x-patch
Part: 0
Message: Some problems about cascading replication
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index c0a32a3..2ec39dd 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2328,7 +2328,7 @@ reaper(SIGNAL_ARGS)
 			 * XXX should avoid the need for disconnection. When we do,
 			 * am_cascading_walsender should be replaced with RecoveryInProgress()
 			 */
-			if (max_wal_senders > 0)
+			if (max_wal_senders > 0 && CountChildren(BACKEND_TYPE_WALSND) > 0)
 			{
 				ereport(LOG,
 						(errmsg("terminating all walsender processes to force cascaded standby(s) to update timeline and reconnect")));
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 0eadf64..ef6894c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -368,6 +368,22 @@ StartReplication(StartReplicationCmd *cmd)
 	SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);
 
 	/*
+	 * When promoting a cascading standby, postmaster sends SIGUSR2 to
+	 * any cascading walsenders to kill them. But there is a corner-case where
+	 * such walsender fails to receive SIGUSR2 and survives a standby promotion
+	 * unexpectedly. This happens when postmaster sends SIGUSR2 before
+	 * the walsender marks itself as a WAL sender, because postmaster sends
+	 * SIGUSR2 to only the processes marked as a WAL sender.
+	 *
+	 * To avoid this corner-case, if recovery is NOT in progress even though
+	 * the walsender is cascading one, we do the same thing as SIGUSR2 signal
+	 * handler does, i.e., set walsender_ready_to_stop to true. Which causes
+	 * the walsender to end later.
+	 */
+	if (am_cascading_walsender && !RecoveryInProgress())
+		walsender_ready_to_stop = true;
+
+	/*
 	 * We assume here that we're logging enough information in the WAL for
 	 * log-shipping, since this is checked in PostmasterMain().
 	 *