run_bench_perf.sh
application/octet-stream
Filename: run_bench_perf.sh
Type: application/octet-stream
Part: 10
#!/usr/bin/env bash
#
# Comprehensive pgbench HOT Update Benchmark Suite Runner
# Includes setup, pgbench execution, perf profiling, and comprehensive reporting
#
set -euo pipefail
#set -x
# ================================================================
# Configuration & Defaults
# ================================================================
PROJECT=${1:-$(basename $PWD)}
CLIENTS=${2:-8}
DURATION=${3:-180}
# Paths
INSTALL_DIR="${PG_INSTALL_DIR:-${PWD}/install}"
PGBIN="$INSTALL_DIR/bin"
PGDATA="${PG_DATA_DIR:-$INSTALL_DIR/data}"
PG_HOST="/tmp/test-db-$(basename $PWD)"
PSQL="$PGBIN/psql"
PGBENCH="$PGBIN/pgbench"
# Bench files directory
BENCH_FILES="${BENCH_FILES:-$PWD}"
# Database
DB_NAME="pgbench_test_$(date +%Y%m%d_%H%M%S)"
# Output
OUTPUT_DIR="/tmp/pgbench-results-$PROJECT"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
RESULTS_FILE="$OUTPUT_DIR/results_$TIMESTAMP.txt"
PERF_DATA_FILE="$OUTPUT_DIR/perf_$TIMESTAMP.data"
# ================================================================
# Validation
# ================================================================
if [[ ! -f "$PSQL" ]]; then
echo "ERROR: PostgreSQL not found at $PGBIN"
echo "Did you build PostgreSQL? Expected: $PGBIN"
exit 1
fi
if [[ ! -f "$BENCH_FILES/setup.sql" ]]; then
echo "ERROR: setup.sql not found at $BENCH_FILES/setup.sql"
exit 1
fi
if [[ ! -f "$BENCH_FILES/hot_test.sql" ]]; then
echo "ERROR: hot_test.sql not found at $BENCH_FILES/hot_test.sql"
exit 1
fi
if [[ ! -f "$BENCH_FILES/report.sql" ]]; then
echo "ERROR: report.sql not found at $BENCH_FILES/report.sql"
exit 1
fi
# Create output directory
mkdir -p "$OUTPUT_DIR"
# ================================================================
# Helper Functions
# ================================================================
log() {
local msg="$1"
local ts=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$ts] $msg" | tee -a "$RESULTS_FILE"
}
log_header() {
local msg="$1"
echo "" | tee -a "$RESULTS_FILE"
echo "===================================================================" | tee -a "$RESULTS_FILE"
echo "$msg" | tee -a "$RESULTS_FILE"
echo "===================================================================" | tee -a "$RESULTS_FILE"
}
# ================================================================
# Startup & Initialization
# ================================================================
log_header "PostgreSQL HOT Update Benchmark Suite"
log "Project: $PROJECT"
log "Database: $DB_NAME"
log "Clients: $CLIENTS"
log "Duration: $DURATION seconds"
log "Output: $OUTPUT_DIR"
log ""
# Start PostgreSQL
#log "Re-creating the database directory..."
#rm -rf "$PGDATA"
#"$PGBIN/initdb" --debug --no-clean "$PGDATA"
#log "Starting PostgreSQL server process..."
#"$PGBIN/pg_ctl" -D "$PGDATA" -l "$OUTPUT_DIR/postgres_$TIMESTAMP.log" start -w
# Verify connection
if ! "$PSQL" -h "$PG_HOST" -d postgres -c "SELECT 1" >/dev/null 2>&1; then
log "ERROR: Failed to connect to PostgreSQL"
exit 1
fi
#log "PostgreSQL started successfully"
# ================================================================
# Database Preparation
# ================================================================
log_header "Setting Up Test Database"
log "Creating database: $DB_NAME..."
"$PSQL" -X -h "$PG_HOST" -d postgres -c "CREATE DATABASE $DB_NAME;" 2>&1 | tee -a "$RESULTS_FILE"
log "Running setup.sql..."
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -f "$BENCH_FILES/setup.sql" 2>&1 | tee -a "$RESULTS_FILE"
# Verify tables created
TABLE_COUNT=$("$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -t -c \
"SELECT count(*) FROM pg_tables WHERE schemaname = 'public' AND tablename LIKE 'phase%';")
log "Setup complete: $TABLE_COUNT benchmark tables created"
# ================================================================
# Pre-Benchmark Statistics
# ================================================================
log_header "Pre-Benchmark Statistics"
log "Initial table statistics:"
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -c \
"SELECT
relname as table_name,
n_live_tup as rows,
round(pg_total_relation_size(relid)/1024/1024, 1) as size_mb
FROM pg_stat_user_tables
WHERE relname LIKE 'phase%'
ORDER BY relname;" | tee -a "$RESULTS_FILE"
# ================================================================
# pgbench Execution with Performance Profiling
# ================================================================
log_header "Starting pgbench update workload"
log "Parameters:"
log " - Clients: $CLIENTS"
log " - Duration: $DURATION seconds"
log " - Database: $DB_NAME"
log " - Workload: hot_test.sql"
log ""
# Run pgbench
PGBENCH_LOG="$OUTPUT_DIR/pgbench_$TIMESTAMP.log"
log "Running pgbench (see detailed log at $PGBENCH_LOG)..."
"$PGBENCH" \
-h "$PG_HOST" \
-d "$DB_NAME" \
-c "$CLIENTS" \
-j "$CLIENTS" \
-T "$DURATION" \
-f "$BENCH_FILES/hot_test.sql" \
-M simple \
-n \
-P 5 \
--progress-timestamp \
--log \
--log-prefix="$OUTPUT_DIR/pgbench_$TIMESTAMP" \
2>&1 | tee -a "$RESULTS_FILE" &
PGBENCH_PID=$!
log "pgbench started (PID: $PGBENCH_PID)"
# Wait for backend connections to establish
log "Waiting for backend connections to establish..."
sleep 2
# Find backend PIDs (processes serving the test database)
# Pattern matches: "postgres: <user> <dbname> [<host>] <state>"
BACKEND_PIDS=$(pgrep -f "postgres:.*$DB_NAME")
if [ -z "$BACKEND_PIDS" ]; then
log "Error: No backend processes found for database $DB_NAME"
kill $PGBENCH_PID 2>/dev/null || true
exit 1
fi
# Convert to comma-separated list for perf
BACKEND_PID_LIST=$(echo $BACKEND_PIDS | tr ' ' ',')
NUM_BACKENDS=$(echo $BACKEND_PIDS | wc -w)
log "Found $NUM_BACKENDS backend processes: $BACKEND_PID_LIST"
# Start perf profiling of backend processes only
log "Starting perf recording postgres backends..."
log "Running pgbench with perf profiling (see detailed log at $PGBENCH_LOG)..."
# Run perf and pgbench together
perf record \
-g \
--call-graph dwarf \
-F 99 \
-o "$PERF_DATA_FILE" \
-p "$BACKEND_PID_LIST" \
&
PERF_PID=$!
echo "Perf recording started (PID: $PERF_PID) (DATA: $PERF_DATA_FILE), profiling backends: $BACKEND_PID_LIST"
# Wait for pgbench to finish
echo ""
echo "Waiting for pgbench to complete..."
wait $PGBENCH_PID
PGBENCH_EXIT=$?
if [ $PGBENCH_EXIT -ne 0 ]; then
echo "Warning: pgbench exited with code $PGBENCH_EXIT"
fi
# Stop perf recording
log "Stopping perf recording..."
sleep 2
kill -SIGINT $PERF_PID 2>/dev/null || true
wait $PERF_PID 2>/dev/null || true
log "Perf data saved to: $PERF_DATA_FILE"
# ================================================================
# Post-Benchmark Analysis
# ================================================================
log_header "pgbench Results"
# Show final pgbench statistics from log
if [[ -f "$PGBENCH_LOG" ]]; then
tail -20 "$PGBENCH_LOG" | tee -a "$RESULTS_FILE"
fi
# ================================================================
# Comprehensive HOT Statistics Reporting
# ================================================================
log_header "Comprehensive HOT Update Statistics Report"
# Quick initial snapshot to console
log "Generating report (this may take a moment)..."
log ""
REPORT_FILE="$OUTPUT_DIR/hot_report_$TIMESTAMP.txt"
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -f "$BENCH_FILES/report.sql" \
>"$REPORT_FILE" 2>&1
# Display report to console
cat "$REPORT_FILE" | tee -a "$RESULTS_FILE"
log ""
log "Full report also saved to: $REPORT_FILE"
# ================================================================
# Performance Analysis with perf
# ================================================================
log_header "Performance Profiling Results"
PERF_REPORT="$OUTPUT_DIR/perf_report_$TIMESTAMP.txt"
if command -v perf &>/dev/null && [[ -f "$PERF_DATA_FILE" ]]; then
log "Generating perf hotspots report..."
perf report -i "$PERF_DATA_FILE" -n --no-children --stdio >"$PERF_REPORT" 2>&1 || true
if [[ -f "$PERF_REPORT" ]]; then
log ""
log "Top 20 hotspots:"
head -40 "$PERF_REPORT" | tee -a "$RESULTS_FILE"
log ""
log "Full perf report saved to: $PERF_REPORT"
fi
else
log "Note: perf not available or data file missing. Skipping perf analysis."
fi
# ================================================================
# Storage Analysis
# ================================================================
log_header "Storage Analysis"
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -c \
"SELECT * FROM show_table_size_stats() ORDER BY table_name;" |
tee -a "$RESULTS_FILE"
# ================================================================
# Index Statistics
# ================================================================
log_header "Index Usage Statistics"
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -c \
"SELECT
schemaname,
relname as table_name,
indexrelname as index_name,
idx_scan as scans,
idx_tup_read as tuples_read,
idx_tup_fetch as tuples_fetched,
round(pg_relation_size(indexrelid)/1024/1024, 1) as size_mb
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan DESC NULLS LAST
LIMIT 20;" | tee -a "$RESULTS_FILE"
# ================================================================
# WAL & Vacuum Activity
# ================================================================
log_header "Checkpoint & Vacuum Summary"
"$PSQL" -X -h "$PG_HOST" -d "$DB_NAME" -c \
"SELECT
schemaname,
relname as table_name,
n_tup_ins as inserts,
n_tup_upd as updates,
n_tup_del as deletes,
n_live_tup as live_rows,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables
WHERE schemaname = 'public' AND relname LIKE 'phase%'
ORDER BY n_tup_upd DESC NULLS LAST;" | tee -a "$RESULTS_FILE"
# ================================================================
# Cleanup & Summary
# ================================================================
log_header "Benchmark Complete"
log "Database: $DB_NAME"
log "Results: $RESULTS_FILE"
log "Report: $REPORT_FILE"
log "Perf Data: $PERF_DATA_FILE"
if [[ -f "$PERF_REPORT" ]]; then
log "Perf Report: $PERF_REPORT"
fi
log "pgbench Log: $PGBENCH_LOG"
log "PostgreSQL Log: $OUTPUT_DIR/postgres_$TIMESTAMP.log"
read -p "Create performance flamegraph? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
log "Building flamegraph... ${OUTPUT_DIR}/${PROJECT}_${TIMESTAMP}_flame.svg"
PATH=$PATH:$HOME/src/flamegraph
(perf script -i "$PERF_DATA_FILE" | stackcollapse-perf.pl | flamegraph.pl --title "${PROJECT}" >"${OUTPUT_DIR}/${PROJECT}_${TIMESTAMP}_flame.svg") || true
fi
# Generate summary
log ""
log "To view full results:"
log " cat $RESULTS_FILE"
log ""
log "To view HOT report only:"
log " cat $REPORT_FILE"
log ""
log "To re-analyze results from this database:"
log " $PSQL -X -h '$PG_HOST' -d '$DB_NAME' -f $BENCH_FILES/report.sql"
log ""
# Optional: cleanup
log_header "Optional Cleanup"
read -p "Drop database $DB_NAME after benchmark? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
log "Dropping database $DB_NAME..."
"$PSQL" -X -h "$PG_HOST" -d postgres -c "DROP DATABASE IF EXISTS $DB_NAME;" 2>&1 | tee -a "$RESULTS_FILE"
log "Database dropped."
else
log "Database $DB_NAME retained for further analysis."
log "To connect: $PSQL -h '$PG_HOST' -d '$DB_NAME'"
fi
log_header "Benchmark Suite Finished Successfully"
log "All results saved to: $OUTPUT_DIR"
ls -lh "$OUTPUT_DIR" | tail -n +2 | sed 's/^/ /' | tee -a "$RESULTS_FILE"
log ""
log "Total elapsed time: $(date -d @$SECONDS -u +%H:%M:%S)"
log ""