0001-Test-that-replay-of-WAL-logs-on-standby-does-not-aff.patch

application/octet-stream
Filename: 0001-Test-that-replay-of-WAL-logs-on-standby-does-not-aff.patch
Type: application/octet-stream
Part: 0
Message: Unnecessary delay in streaming replication due to replay lag
Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch 0001
Subject: Test that replay of WAL logs on standby does not affect syncrep
File	+	−
src/backend/access/transam/xlog.c	3	0
src/backend/utils/misc/guc.c	11	0
src/include/access/xlog.h	1	0
src/test/recovery/t/018_replay_lag_syncrep.pl	169	0
From 5a1b1062dae7ab5f28b14907f493df106607c5d8 Mon Sep 17 00:00:00 2001
From: Wu Hao <hawu@pivotal.io>
Date: Thu, 16 Jan 2020 17:57:51 +0530
Subject: [PATCH 1/3] Test that replay of WAL logs on standby does not affect
 syncrep

A new debug GUC is introduced to slow down WAL replay on standby.  The
test sets up synchronous replication and sets the GUC, causing startup
process on standby to sleep for a few seconds before replaying each WAL
record.  The test then inserts data on master to build replay lag.

The replication connection is broken and new commits are made on master.
The test expects the commits to not block, in spite of the replay lag.
It fails if the commits take longer than a timeout.  The value of this
timeout is much less than the total replay lag.  If the commits do not
block, it is confirmed that the WAL streaming is re-established without
waiting for the startup process to finish replaying WAL already
available in pg_wal directory.

Co-authored-by: Asim R P <apraveen@pivotal.io>
---
 src/backend/access/transam/xlog.c             |   3 +
 src/backend/utils/misc/guc.c                  |  11 ++
 src/include/access/xlog.h                     |   1 +
 src/test/recovery/t/018_replay_lag_syncrep.pl | 169 ++++++++++++++++++++++++++
 4 files changed, 184 insertions(+)
 create mode 100644 src/test/recovery/t/018_replay_lag_syncrep.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7f4f784c0e..05d72bfd4b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -87,6 +87,7 @@ int			max_wal_size_mb = 1024; /* 1 GB */
 int			min_wal_size_mb = 80;	/* 80 MB */
 int			wal_keep_segments = 0;
 int			XLOGbuffers = -1;
+int			debug_replay_delay = 0;
 int			XLogArchiveTimeout = 0;
 int			XLogArchiveMode = ARCHIVE_MODE_OFF;
 char	   *XLogArchiveCommand = NULL;
@@ -7198,6 +7199,8 @@ StartupXLOG(void)
 
 				/* Now apply the WAL record itself */
 				RmgrTable[record->xl_rmid].rm_redo(xlogreader);
+				if (debug_replay_delay > 0)
+					pg_usleep(debug_replay_delay * 1000 * 1000);
 
 				/*
 				 * After redo, check whether the backup pages associated with
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e5f8a1301f..9d4e97d3d3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1987,6 +1987,17 @@ static struct config_bool ConfigureNamesBool[] =
 
 static struct config_int ConfigureNamesInt[] =
 {
+	{
+		{"debug_replay_delay", PGC_SUSET, WAL_ARCHIVING,
+			gettext_noop("Slow down replay process by sleeping for N seconds"
+						 "before starting to replay a WAL record."),
+			NULL,
+			GUC_UNIT_S
+		},
+		&debug_replay_delay,
+		0, 0, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
 	{
 		{"archive_timeout", PGC_SIGHUP, WAL_ARCHIVING,
 			gettext_noop("Forces a switch to the next WAL file if a "
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc20..3c8046f8a2 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -110,6 +110,7 @@ extern int	max_wal_size_mb;
 extern int	wal_keep_segments;
 extern int	XLOGbuffers;
 extern int	XLogArchiveTimeout;
+extern int  debug_replay_delay;
 extern int	wal_retrieve_retry_interval;
 extern char *XLogArchiveCommand;
 extern bool EnableHotStandby;
diff --git a/src/test/recovery/t/018_replay_lag_syncrep.pl b/src/test/recovery/t/018_replay_lag_syncrep.pl
new file mode 100644
index 0000000000..7218e7e8d3
--- /dev/null
+++ b/src/test/recovery/t/018_replay_lag_syncrep.pl
@@ -0,0 +1,169 @@
+# This test demonstrates that synchronous replication is affected by
+# replay process on standby.  If the replay process lags far behind
+# and the replication connection is broken (e.g. temporary network
+# problem) the connection is not established again until the replay
+# process finishes replaying all WAL.
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 5;
+
+# Query checking sync_priority and sync_state of each standby
+my $check_sql =
+  "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication ORDER BY application_name;";
+
+# Check that sync_state of a standby is expected (waiting till it is).
+# If $setting is given, synchronous_standby_names is set to it and
+# the configuration file is reloaded before the test.
+sub test_sync_state
+{
+	my ($self, $expected, $msg, $setting) = @_;
+
+	if (defined($setting))
+	{
+		$self->safe_psql('postgres',
+						 "ALTER SYSTEM SET synchronous_standby_names = '$setting';");
+		$self->reload;
+	}
+
+	ok($self->poll_query_until('postgres', $check_sql, $expected), $msg);
+	return;
+}
+
+# Start a standby and check that it is registered within the WAL sender
+# array of the given primary.  This polls the primary's pg_stat_replication
+# until the standby is confirmed as registered.
+sub start_standby_and_wait
+{
+	my ($master, $standby) = @_;
+	my $master_name  = $master->name;
+	my $standby_name = $standby->name;
+	my $query =
+	  "SELECT count(1) = 1 FROM pg_stat_replication WHERE application_name = '$standby_name'";
+
+	$standby->start;
+
+	print("### Waiting for standby \"$standby_name\" on \"$master_name\"\n");
+	$master->poll_query_until('postgres', $query);
+	return;
+}
+
+# Initialize master node
+my $node_master = get_new_node('master');
+my @extra = (q[--wal-segsize], q[1]);
+$node_master->init(allows_streaming => 1, extra => \@extra);
+$node_master->start;
+my $backup_name = 'master_backup';
+
+# Setup physical replication slot for streaming replication
+$node_master->safe_psql('postgres',
+	q[SELECT pg_create_physical_replication_slot('phys_slot', true, false);]);
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create standby linking to master
+my $node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, $backup_name,
+								has_streaming => 1);
+$node_standby->append_conf('postgresql.conf',
+						   q[primary_slot_name = 'phys_slot']);
+# Enable debug logging in standby
+$node_standby->append_conf('postgresql.conf',
+						   q[log_min_messages = debug5]);
+
+start_standby_and_wait($node_master, $node_standby);
+
+# Make standby synchronous
+test_sync_state(
+	$node_master,
+	qq(standby|1|sync),
+	'standby is synchronous',
+	'standby');
+
+# Slow down WAL replay by inducing 10 seconds sleep before replaying
+# each WAL record.
+$node_standby->safe_psql('postgres', 'ALTER SYSTEM set debug_replay_delay TO 10;');
+$node_standby->reload;
+
+# Load data on master and induce replay lag in standby.
+$node_master->safe_psql('postgres', 'CREATE TABLE replay_lag_test(a int);');
+$node_master->safe_psql(
+	'postgres',
+	'insert into replay_lag_test select i from generate_series(1,100) i;');
+
+# Obtain WAL sender PID and kill it.
+my $walsender_pid = $node_master->safe_psql(
+	'postgres',
+	q[select active_pid from pg_get_replication_slots() where slot_name = 'phys_slot']);
+
+# Kill walsender, so that the replication connection breaks.
+kill 'SIGTERM', $walsender_pid;
+
+# The replication connection should be re-establised because
+# postmaster will restart WAL receiver in its main loop.  Try to
+# commit a transaction with a timeout of 2 seconds.  The test expects
+# that the commit does not timeout.
+my $timed_out = 0;
+$node_master->safe_psql(
+	'postgres',
+	'insert into replay_lag_test values (1);',
+	timeout => 2,
+	timed_out => \$timed_out);
+
+# The insert should not timeout because synchronous replication is
+# re-established, even when startup process is still replaying
+# WAL already fetched in pg_wal/.
+is($timed_out, 0, 'insert after WAL receiver restart');
+
+# Break the replication connection by restarting standby.
+$node_standby->restart;
+
+# The replication connection should be re-establised by upon standby
+# restart.  Try to commit a transaction with a 2 second timeout.  The
+# timeout should not be hit because synchronous replication should be
+# re-established before startup process finishes the replay of WAL
+# already available in pg_wal/.
+$timed_out = 0;
+$node_master->safe_psql(
+	'postgres',
+	'insert into replay_lag_test values (2);',
+	timeout => 1,
+	timed_out => \$timed_out);
+
+# Reset the debug GUC, so that the replay process is no longer slowed down.
+$node_standby->safe_psql('postgres', 'ALTER SYSTEM set debug_replay_delay TO 0;');
+$node_standby->reload;
+
+# Ideally, the insert after standby restart should not
+# timeout but it currently does, causing the test to fail.
+is($timed_out, 0, 'insert after standby restart');
+
+# Switch to a new WAL file and see if things work well.
+$node_master->safe_psql(
+	'postgres',
+	'select pg_switch_wal();');
+
+# Transactions should work fine on master.
+$timed_out = 0;
+$node_master->safe_psql(
+	'postgres',
+	'insert into replay_lag_test values (3);',
+	timeout => 1,
+	timed_out => \$timed_out);
+
+# Standby should also have identical content, now that we've reset the
+# replay delay.
+my $count_sql = q[select count(*) from replay_lag_test;];
+my $expected = q[103];
+ok($node_standby->poll_query_until('postgres', $count_sql, $expected), 'standby query');
+
+$node_standby->promote;
+$node_master->stop;
+$node_standby->safe_psql('postgres', 'insert into replay_lag_test values (4);');
+
+$expected = q[104];
+ok($node_standby->poll_query_until('postgres', $count_sql, $expected),
+   'standby query after promotion');
-- 
2.14.3 (Apple Git-98)