test_autovacuum_prioritization.sh
application/x-sh
Filename: test_autovacuum_prioritization.sh
Type: application/x-sh
Part: 2
#!/usr/bin/env bash
set -euo pipefail
# ==============================================================
# Script Name: test_autovacuum_prioritization.sh
# Author: Sami Imseih
#
# Purpose:
# Automates an OLTP + batch workload test to observe and analyze
# PostgreSQL autovacuum prioritization behavior.
#
# What it does:
# - Creates synthetic OLTP and batch tables.
# - Runs concurrent pgbench workloads (updates + inserts).
# - Monitors pg_stat_all_tables for autovacuum and analyze activity.
# - Collects PostgreSQL logs and generates a summary report.
#
# Usage:
# ./test_autovacuum_prioritization.sh /path/to/config_file
#
# Output:
# All logs and temporary files are stored under $BASE_DIR/tmp/
# Key outputs include:
# - oltp.out / batch.out (pgbench logs)
# - relstats_monitor.log (autovacuum stats over time)
# - summary_report.txt (final metrics summary)
# - pg_logs/ (PostgreSQL logs)
#
# Requirements:
# - PostgreSQL (psql, pgbench, pg_ctl) available in PATH
# - Sufficient permissions to restart PostgreSQL
# - Non-production test environment
#
# --------------------------------------------------------------
# Example Config File (test_autovacuum.conf)
# --------------------------------------------------------------
# BASE_DIR="$HOME/scripts/test_autovacuum_prioritization"
# OLTP_TABLES=10
# OLTP_ROWS_START=1000
# OLTP_ROWS_MULTIPLIER=2
# OLTP_MAX_ROWS=5000000
# OLTP_CONNECTIONS=200
# OLTP_RATE=15000
# BATCH_TABLES=2
# BATCH_SIZE=1000
# BATCH_CONNECTIONS=5
# BATCH_SLEEP=20
# TIMEOUT=1800
# BUCKETS=15
# --------------------------------------------------------------
# Example Run:
# ./test_autovacuum_prioritization.sh ./test_autovacuum.conf
# ==============================================================
CONFIG_FILE="$1"
if [ -z "$CONFIG_FILE" ] || [ ! -f "$CONFIG_FILE" ] || [ ! -r "$CONFIG_FILE" ]; then
echo "Error: Config file not found or not readable: '$CONFIG_FILE'"
echo "Usage: $0 /path/to/config_file"
exit 1
fi
# Load the config
source "$CONFIG_FILE"
# ==========================================
# Temporary and log files. DO NOT TOUCH!!
# ==========================================
TMP_DIR="$BASE_DIR/tmp"
TMP_BATCH_DIR="$TMP_DIR/batch_workload"
TMP_OLTP_DIR="$TMP_DIR/oltp_workload"
OLTP_LOG="$TMP_DIR/oltp.out"
BATCH_LOG="$TMP_DIR/batch.out"
READY_FILE="$TMP_DIR/oltp_ready"
SUMMARY_FILE="$TMP_DIR/summary_report.txt"
PG_LOGS="$TMP_DIR/pg_logs"
CONFIG_LOG="$TMP_DIR/config"
RELSTATS_MONITOR_LOG="$TMP_DIR/relstats_monitor.log"
# ==========================================
# Prepare temp directory
# ==========================================
if [ -n "$TMP_DIR" ] && [ "$TMP_DIR" != "/" ]; then
echo "Preparing temp directory at $TMP_DIR..."
rm -rf "$TMP_DIR"
mkdir -p "$TMP_DIR" "$PG_LOGS"
else
echo "Invalid TMP_DIR!"
exit 1
fi
# ==========================================
# Save the config
# ==========================================
cat > "$CONFIG_LOG" <<EOF
BASE_DIR=$BASE_DIR
OLTP_TABLES=$OLTP_TABLES
OLTP_ROWS_START=$OLTP_ROWS_START
OLTP_ROWS_MULTIPLIER=$OLTP_ROWS_MULTIPLIER
OLTP_MAX_ROWS=$OLTP_MAX_ROWS
BATCH_TABLES=$BATCH_TABLES
BATCH_SIZE=$BATCH_SIZE
BATCH_CONNECTIONS=$BATCH_CONNECTIONS
OLTP_CONNECTIONS=$OLTP_CONNECTIONS
TIMEOUT=$TIMEOUT
OLTP_RATE=$OLTP_RATE
BATCH_SLEEP=$BATCH_SLEEP
BUCKETS=$BUCKETS
EOF
# ==========================================
# OLTP Workload
# ==========================================
run_oltp() {
local NUM_TABLES="$1"
local NUM_ROWS="$2"
local READY_FILE="$3"
local CONNECTIONS="$4"
local TIMEOUT="$5"
local MAX_ROWS="$6"
local MULTIPLIER="$7"
local TMP_OLTP_DIR="$8"
local RATE_OPT=""
if [ -n "${OLTP_RATE:-}" ]; then
RATE_OPT="-R${OLTP_RATE}"
echo "Using OLTP rate limit: ${OLTP_RATE} TPS"
fi
echo "=== [OLTP] Creating tables ==="
local CUR_ROWS="$NUM_ROWS"
local TABLE_NAMES=()
for i in $(seq 1 "$NUM_TABLES"); do
local TABLE="table_${i}_${CUR_ROWS}"
TABLE_NAMES+=("$TABLE")
echo "Creating table $TABLE with $CUR_ROWS rows..."
psql <<EOF
CREATE UNLOGGED TABLE $TABLE (
id BIGSERIAL PRIMARY KEY,
update_time timestamp,
col1 TEXT,
col2 BIGINT,
col3 TEXT,
col4 TEXT
);
CREATE INDEX ON $TABLE (update_time);
EOF
echo "Populating $TABLE..."
psql <<EOF
ALTER TABLE $TABLE SET (autovacuum_enabled = OFF);
INSERT INTO $TABLE (update_time, col1, col2, col3, col4)
SELECT
NULL,
md5(random()::text),
(random()*1000000)::BIGINT,
repeat(' ', 1000),
repeat(' ', 1000)
FROM generate_series(1, $CUR_ROWS);
CREATE INDEX ON $TABLE (col3);
CREATE INDEX ON $TABLE (col4);
EOF
CUR_ROWS=$((CUR_ROWS * MULTIPLIER))
if (( CUR_ROWS > MAX_ROWS )); then
CUR_ROWS="$NUM_ROWS"
fi
done
echo "=== [OLTP] Vacuuming tables ==="
for TABLE in "${TABLE_NAMES[@]}"; do
echo "Vacuuming $TABLE..."
psql -c "VACUUM FREEZE $TABLE;" \
-c "ALTER TABLE $TABLE SET (autovacuum_enabled = ON);"
done
echo "=== [OLTP] Restarting PostgreSQL ==="
# Run stop command in background
pg_ctl stop -mf &
pgctl_pid=$!
# Wait for postgres to actually stop
while pgrep -x postgres >/dev/null; do
echo " Waiting for shutdown..."
sleep 1
done
# Optionally wait for the pg_ctl background job to finish
wait $pgctl_pid
echo "PostgreSQL has stopped."
pg_ctl start
until pg_isready -q; do
echo " Waiting for startup..."
sleep 1
done
echo "PostgreSQL restarted."
touch "$READY_FILE"
echo "=== [OLTP] Generating per-table pgbench scripts ==="
mkdir -p "$TMP_OLTP_DIR"
CUR_ROWS="$NUM_ROWS"
local PG_FILES=()
for i in $(seq 1 "$NUM_TABLES"); do
local TABLE="table_${i}_${CUR_ROWS}"
local TABLE_FILE="$TMP_OLTP_DIR/${TABLE}.sql"
echo "Creating pgbench script for ${TABLE} with ${CUR_ROWS} rows..."
cat > "$TABLE_FILE" <<EOF
\set id random(1, $CUR_ROWS)
UPDATE ${TABLE} SET update_time = now() WHERE id = :id;
EOF
PG_FILES+=("-f" "$TABLE_FILE@1")
CUR_ROWS=$((CUR_ROWS * MULTIPLIER))
if (( CUR_ROWS > MAX_ROWS )); then
CUR_ROWS="$NUM_ROWS"
fi
done
echo "=== [OLTP] Starting pgbench workload ==="
pgbench -n "${PG_FILES[@]}" -c"$CONNECTIONS" -T"$TIMEOUT" $RATE_OPT
}
# ==========================================
# Batch Workload
# ==========================================
run_batch() {
local BATCH_SIZE="$1"
local TMP_DIR="$2"
local NUM_TABLES="$3"
local CONNECTIONS="$4"
local TIMEOUT="$5"
local RATE_OPT=""
if [ -n "${BATCH_RATE:-}" ]; then
RATE_OPT="-R${BATCH_RATE}"
echo "Using Batch rate limit: ${BATCH_RATE} TPS"
fi
mkdir -p "$TMP_DIR"
echo "=== [Batch] Creating tables ==="
for i in $(seq 1 "$NUM_TABLES"); do
local TABLE="table_batch_$i"
psql -c "CREATE UNLOGGED TABLE IF NOT EXISTS $TABLE (
id BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT,
c1 TEXT, c2 TEXT
);"
psql -c "CREATE INDEX ON $TABLE (id);"
psql -c "CREATE INDEX ON $TABLE (id2);"
psql -c "CREATE INDEX ON $TABLE (c1);"
done
echo "=== [Batch] Creating SQL files ==="
local PG_FILES=()
for i in $(seq 1 "$NUM_TABLES"); do
local TABLE="table_batch_$i"
local PG_FILE="$TMP_DIR/${TABLE}.sql"
cat > "$PG_FILE" <<EOF
INSERT INTO $TABLE (id, id2, id3, id4, c1, c2)
SELECT n, n, n, n, repeat(' ', 100), repeat(' ', 100)
FROM generate_series(1, $BATCH_SIZE) AS n;
\sleep ${BATCH_SLEEP} s
EOF
PG_FILES+=("-f" "$PG_FILE@1")
done
echo "=== [Batch] Starting pgbench inserts ==="
pgbench -n "${PG_FILES[@]}" -c"$CONNECTIONS" -T"$TIMEOUT"
}
# ==========================================
# Monitor relstats
# ==========================================
monitor_relstats() {
local LOG_FILE="$1"
local TIMEOUT="$2"
local INTERVAL=10
local START_TIME=$(date +%s)
while true; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
if (( ELAPSED >= TIMEOUT )); then
echo "Monitor reached timeout (${ELAPSED}s), stopping." >> "$LOG_FILE"
break
fi
psql -X -q -c "
SELECT 'TAB_DATA',
now() timestamp,
a.relname,
b.reltuples::numeric AS reltuples,
a.n_dead_tup,
a.autovacuum_count AS av_count,
a.autoanalyze_count AS aa_count,
a.total_autovacuum_time AS total_av_time,
a.total_autoanalyze_time AS total_aa_time,
a.n_tup_upd,
a.n_tup_hot_upd,
a.n_tup_ins,
-- Average autovacuum time, safely avoiding divide by zero
ROUND(
CASE WHEN a.autovacuum_count > 0
THEN (a.total_autovacuum_time::numeric / a.autovacuum_count)
ELSE 0
END, 2
) AS avg_av_time,
-- Average autoanalyze time, safely avoiding divide by zero
ROUND(
CASE WHEN a.autoanalyze_count > 0
THEN (a.total_autoanalyze_time::numeric / a.autoanalyze_count)
ELSE 0
END, 2
) AS avg_aa_time
FROM pg_stat_all_tables a
JOIN pg_class b ON a.relname = b.relname
WHERE a.relname LIKE 'table_%'
ORDER BY 4 DESC;
" >> "$LOG_FILE" 2>&1
sleep "$INTERVAL"
done
}
# ==========================================
# Start workloads
# ==========================================
echo "=== Starting OLTP workload in background ==="
(run_oltp "$OLTP_TABLES" "$OLTP_ROWS_START" "$READY_FILE" "$OLTP_CONNECTIONS" "$TIMEOUT" "$OLTP_MAX_ROWS" "$OLTP_ROWS_MULTIPLIER" "$TMP_OLTP_DIR" | tee "$OLTP_LOG") &
OLTP_PID=$!
echo "Waiting for OLTP ready file..."
while [ ! -f "$READY_FILE" ]; do sleep 1; done
if [[ "$BATCH_TABLES" -gt 0 ]]; then
echo "OLTP ready detected! Starting batch workload..."
(run_batch "$BATCH_SIZE" "$TMP_BATCH_DIR" "$BATCH_TABLES" "$BATCH_CONNECTIONS" "$TIMEOUT" | tee "$BATCH_LOG") &
BATCH_PID=$!
else
BATCH_PID=0
fi
echo "=== Starting Monitoring job in background ==="
(monitor_relstats "$RELSTATS_MONITOR_LOG" "$TIMEOUT") &
MONITOR_PID=$!
# ===========================================================
# Main wait loop to terminate tasks on timeout or completion
# ===========================================================
START_TIME=$(date +%s)
LAST_PRINT=0
while true; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
REMAINING=$((TIMEOUT - ELAPSED))
if (( ELAPSED - LAST_PRINT >= 30 )); then
echo "Time remaining: ${REMAINING}s"
LAST_PRINT=$ELAPSED
fi
if ! kill -0 "$OLTP_PID" 2>/dev/null; then
echo "OLTP workload completed — stopping batch..."
if [ "${BATCH_PID:-0}" -gt 0 ]; then
kill -TERM "$BATCH_PID" 2>/dev/null || true
wait "$BATCH_PID" 2>/dev/null || true
fi
break
fi
if [ "${BATCH_PID:-0}" -gt 0 ]; then
if ! kill -0 "$BATCH_PID" 2>/dev/null; then
echo "Batch workload completed — stopping OLTP..."
kill -TERM "$OLTP_PID" 2>/dev/null || true
wait "$OLTP_PID" 2>/dev/null || true
break
fi
fi
if (( ELAPSED >= TIMEOUT )); then
echo "Timeout reached (${ELAPSED}s) — stopping workloads..."
if [ "${BATCH_PID:-0}" -gt 0 ]; then
kill -TERM "$OLTP_PID" "$BATCH_PID" 2>/dev/null || true
wait "$OLTP_PID" 2>/dev/null || true
wait "$BATCH_PID" 2>/dev/null || true
else
kill -TERM "$OLTP_PID" 2>/dev/null || true
wait "$OLTP_PID" 2>/dev/null || true
fi
break
fi
sleep 1
done
echo "Both workloads finished."
echo "Stopping relstats monitor..."
kill -TERM "$MONITOR_PID" 2>/dev/null || true
wait "$MONITOR_PID" 2>/dev/null || true
# ==========================================
# Collect PostgreSQL logs
# ==========================================
cp "$PGDATA"/log/* "$PG_LOGS"/.
# ==========================================
# Summary Report
# ==========================================
{
echo "=== Database Settings ==="
psql <<EOF
SELECT name, setting
FROM pg_settings
WHERE name LIKE '%autovac%'
OR name IN ('shared_buffers', 'max_connections');
EOF
echo "=== Total Activity ==="
psql <<EOF
\x
SELECT
SUM(c.n_dead_tup) AS total_n_dead_tup,
SUM(c.n_mod_since_analyze) AS total_n_mod_since_analyze,
SUM(b.reltuples::numeric) AS total_reltuples,
SUM(c.autovacuum_count) AS total_autovacuum_count,
SUM(c.autoanalyze_count) AS total_autoanalyze_count,
SUM(c.n_tup_upd) AS total_n_tup_upd,
SUM(c.n_tup_hot_upd) AS total_n_tup_hot_upd,
SUM(c.n_tup_newpage_upd) AS total_n_tup_newpage_upd,
SUM(c.n_tup_ins) AS total_n_tup_ins,
SUM(c.total_autovacuum_time) AS total_total_autovacuum_time,
SUM(c.total_autoanalyze_time) AS total_total_autoanalyze_time,
ROUND(
CASE WHEN SUM(c.autovacuum_count) > 0
THEN (SUM(c.total_autovacuum_time)::numeric / SUM(c.autovacuum_count)::numeric)
END, 2
) AS avg_autovacuum_time,
ROUND(
CASE WHEN SUM(c.autoanalyze_count) > 0
THEN (SUM(c.total_autoanalyze_time)::numeric / SUM(c.autoanalyze_count)::numeric)
END, 2
) AS avg_autoanalyze_time,
COUNT(*) AS table_count
FROM pg_class b
JOIN pg_stat_all_tables c ON b.oid = c.relid
WHERE UPPER(b.relname) LIKE '%TABL%'
AND b.relname NOT LIKE 'pg_%';
EOF
} | tee "$SUMMARY_FILE"
echo "=== Stop PostgreSQL ==="
pg_ctl stop -mi
echo "==== Move logfiles ===="
mv $PGDATA/log/* "$PG_LOGS"/.;
cat $RELSTATS_MONITOR_LOG | grep TAB_DATA | sed 's/^ TAB_DATA |//' > $RELSTATS_MONITOR_LOG.clean;
cat $PG_LOGS/* | grep "LOG: adding table:" | awk '{print $2}' FS="LOG: adding table:" | awk '{print $1"|"$2,$3,$4}' FS="," | sed 's/ av=/|/' | sed 's/ aa=/|/' > $TMP_DIR/relstats_scores.clean
# ======================================================
# Load data into analysis database
#
# XXX: Not used in the main script flow, but kept here
# for reference.
# ======================================================
load_for_analysis()
{
echo "=== Loading relstats data for analysis ==="
createdb analysis
psql analysis<<EOF
CREATE TABLE per_table_stats (
timestamp TIMESTAMPTZ,
relname TEXT,
reltuples BIGINT,
n_dead_tup BIGINT,
av_count INT,
aa_count INT,
total_av_time DOUBLE PRECISION,
total_aa_time DOUBLE PRECISION,
n_tup_upd BIGINT,
n_tup_hot_upd BIGINT,
n_tup_ins BIGINT,
avg_av_time DOUBLE PRECISION,
avg_aa_time DOUBLE PRECISION
);
EOF
psql -d analysis<<EOF
\copy per_table_stats FROM '$RELSTATS_MONITOR_LOG.clean' DELIMITER '|' CSV;
EOF
echo "=== Loading relstats scoring data for analysis ==="
psql analysis<<EOF
CREATE TABLE per_table_scores (
relname TEXT,
score DOUBLE PRECISION,
reason_av INT,
reason_aa INT
);
EOF
psql -d analysis<<EOF
\copy per_table_scores FROM '$TMP_DIR/relstats_scores.clean' DELIMITER '|' CSV;
EOF
}
echo "All tasks completed."