checksum-test.sh
application/x-shellscript
Filename: checksum-test.sh
Type: application/x-shellscript
Part: 6
#!/usr/bin/bash
DATADIR_PRIMARY=data-primary
DATADIR_STANDBY=data-standby
PGCTLTIMEOUT=3600
killall -9 postgres
rm -Rf $DATADIR_PRIMARY $DATADIR_STANDBY
# init primary
pg_ctl -D $DATADIR_PRIMARY init
echo 'wal_level = logical' >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "log_line_prefix = '%n %m [%p] [%b:%a] [%c:%l] [%s] [%v/%x] '" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
#echo "log_min_duration_statement = 0" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
echo "checkpoint_timeout = '1min'" >> $DATADIR_PRIMARY/postgresql.conf 2>&1
pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
# init a replica
pg_basebackup -D $DATADIR_STANDBY -c fast -R -C -S replica
echo "port = 5433" >> $DATADIR_STANDBY/postgresql.conf 2>&1
pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
function primary_start()
{
r=$1
echo `date` "loop $r start primary"
pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
}
function primary_stop()
{
r=$1
# stop the primary in some way (or not at all)
m=$((RANDOM % 3))
if [ "$m" == "0" ]; then
echo `date` "stopping primary / immediate"
pg_ctl -D $DATADIR_PRIMARY -m immediate stop
elif [ "$m" == "1" ]; then
echo `date` "stopping primary / fast"
pg_ctl -D $DATADIR_PRIMARY -m fast stop
else
echo `date` "not stopping primary"
fi
}
function primary_verify_checksums()
{
r=$1
s=$2
m=$((RANDOM % 2))
if [ "$m" == "0" ]; then
echo `date` "stopping primary / immediate"
pg_ctl -D $DATADIR_PRIMARY -m immediate stop
else
echo `date` "stopping primary / fast"
pg_ctl -D $DATADIR_PRIMARY -m fast stop
fi
if [ "$s" == "enable" ] && [ "$m" != "0" ]; then
echo `date` "primary / verify checksums"
pg_checksums -c $DATADIR_PRIMARY >> checksums-primary.log 2>&1
fi
echo `date` "starting primary"
pg_ctl -D $DATADIR_PRIMARY -l pg-primary.log start
}
function standby_start()
{
r=$1
echo `date` "loop $r start standby"
pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
}
function standby_stop()
{
r=$1
# stop the standby in some way (or not at all)
m=$((RANDOM % 3))
if [ "$m" == "0" ]; then
echo `date` "stopping standby / immediate"
pg_ctl -D $DATADIR_STANDBY -m immediate stop
elif [ "$m" == "1" ]; then
echo `date` "stopping standby / fast"
pg_ctl -D $DATADIR_STANDBY -m fast stop
else
echo `date` "not stopping standby"
fi
}
function standby_verify_checksums()
{
r=$1
s=$2
m=$((RANDOM % 2))
if [ "$m" == "0" ]; then
echo `date` "stopping standby / immediate"
pg_ctl -D $DATADIR_STANDBY -m immediate stop
else
echo `date` "stopping standby / fast"
pg_ctl -D $DATADIR_STANDBY -m fast stop
fi
if [ "$s" == "enable" ] && [ "$m" != "0" ]; then
echo `date` "standby / verify checksums"
pg_checksums -c $DATADIR_STANDBY >> checksums-standby.log 2>&1
fi
echo `date` "starting standby"
pg_ctl -D $DATADIR_STANDBY -l pg-standby.log start
}
function standby_catch_up()
{
r=$1
while /bin/true; do
d=$(psql -t -A postgres -c "select (pg_current_wal_lsn() - replay_lsn) from pg_stat_replication")
x=$(psql -t -A postgres -c "select (pg_current_wal_lsn() - replay_lsn) < 16384 from pg_stat_replication")
if [ "$x" == "t" ]; then
break
fi
echo `date` "loop $r waiting for standby to catch up ($d bytes)"
sleep 1
done
}
function wait_for_checksum_state()
{
r=$1
db=$2
state=$3
while /bin/true; do
c=$(psql -t -A $db -c "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums'")
psql $db -c "SELECT * FROM pg_stat_progress_data_checksums"
if [ "$state" == "enable" ] && [ "$c" == "on" ]; then
break;
elif [ "$state" == "disable" ] && [ "$c" == "off" ]; then
break;
fi
echo `date` "loop $r checksum state: $c (sleeping)"
sleep 1
done
}
function change_checksums()
{
r=$1
db=$2
state=$3
if [ "$state" == "enable" ]; then
fast="false"
x=$((RANDOM % 2))
if [ "$x" == "0" ]; then
fast="true"
fi
echo `date` "loop $r enabling checksums fast $fast"
psql $db -c "select pg_enable_data_checksums(fast := $fast)"
else
echo `date` "loop $r disabling checksums"
psql $db -c "select pg_disable_data_checksums()"
fi
}
function random_sleep()
{
r=$1
sleep=$2
x=$((RANDOM % sleep + 1))
echo `date` "loop $r sleeping for $x seconds"
sleep $x
}
# small test
sleep=10
m=fast
# default is 'on', so start with disable
s="disable"
# 100 loops of the primary restarts
for r in $(seq 1 200); do
x=$((RANDOM % 10))
db="test_$x"
scale=$((RANDOM % 40 + 10))
# maybe create a new DB with random scale
dropdb --if-exists $db
createdb $db
pgbench -i -s $scale $db >> pgbench-init.log 2>&1
# run pgbench in the background
echo `date` "loop $r pgbench scale $scale"
pgbench -c 1 -P 1 -T 3600 $db >> pgbench-primary.log 2>&1 &
pgbench -p 5433 -c 1 -P 1 -T 3600 $db >> pgbench-standby.log 2>&1 &
# sleep for a bit
random_sleep $r $sleep
# start the checksums change
change_checksums $r $db $s
# sleep for a bit
random_sleep $r $sleep
psql $db -c "SELECT * FROM pg_stat_progress_data_checksums"
# stop the primary/standby in some way
primary_stop $r
standby_stop $r
# start the primary/stadby again
primary_start $r
standby_start $r
# run pgbench on primary in the background
echo `date` "loop $r pgbench"
pgbench -c 1 -P 1 -T 3600 $db >> pgbench-primary.log 2>&1 &
pgbench -p 5433 -c 1 -P 1 -T 3600 $db >> pgbench-standby.log 2>&1 &
# sleep for a bit
random_sleep $r $sleep
# start the checksums change (again, to restart the workers)
change_checksums $r $db $s
# wait for the checksums to get enabled/disabled
echo `date` "loop $r waiting for checksums to change in the instance"
wait_for_checksum_state $r $db $s
# stop primary/standby, verify checksums and start again
echo `date` "loop $r verify checksums on primary"
primary_verify_checksums $r $s
echo `date` "loop $r verify checksums on standby"
standby_verify_checksums $r $s
# wait for standy to catch up
echo `date` "loop $r wait for standby to catch up"
standby_catch_up $r
# flip the state for the next loop
if [ "$s" == "disable" ]; then
s="enable"
else
s="disable"
fi
done