#!/bin/bash
#
# Function to calculate time differences between table synchronization
# start and finish events in PostgreSQL logical replication logs.
#
# Usage:
#   source log_analyzer_functions.sh
#   calculate_sync_times "/path/to/logfile"
#   calculate_sync_times "/path/to/logfile" "quiet"  # for minimal output
#

# Function to convert timestamp to epoch milliseconds
timestamp_to_ms() {
    local ts="$1"
    # Extract components: YYYY-MM-DD HH:MM:SS.mmm
    local date_part=$(echo "$ts" | cut -d' ' -f1)
    local time_part=$(echo "$ts" | cut -d' ' -f2)
    local time_no_ms=$(echo "$time_part" | cut -d'.' -f1)
    local ms=$(echo "$time_part" | cut -d'.' -f2)
    
    # Convert to epoch seconds
    local epoch_sec=$(date -d "$date_part $time_no_ms" +%s 2>/dev/null)
    
    # If date command failed (macOS), try different approach
    if [ $? -ne 0 ]; then
        epoch_sec=$(date -j -f "%Y-%m-%d %H:%M:%S" "$date_part $time_no_ms" +%s 2>/dev/null)
    fi
    
    # Return epoch milliseconds
    echo "$((epoch_sec * 1000 + 10#$ms))"
}

# Function to format duration
format_duration() {
    local ms=$1
    local seconds=$((ms / 1000))
    local milliseconds=$((ms % 1000))
    
    if [ $seconds -lt 1 ]; then
        echo "${ms} ms"
    elif [ $seconds -lt 60 ]; then
        printf "%d.%03d seconds" $seconds $milliseconds
    elif [ $seconds -lt 3600 ]; then
        local minutes=$((seconds / 60))
        local secs=$((seconds % 60))
        printf "%dm %d.%03ds" $minutes $secs $milliseconds
    else
        local hours=$((seconds / 3600))
        local minutes=$(((seconds % 3600) / 60))
        local secs=$((seconds % 60))
        printf "%dh %dm %d.%03ds" $hours $minutes $secs $milliseconds
    fi
}

# Main function to calculate sync times
calculate_sync_times() {
    local LOG_FILE="$1"
    local QUIET_MODE="${2:-normal}"  # "quiet" for minimal output, default is "normal"
    
    # Check if log file is provided
    if [ -z "$LOG_FILE" ]; then
        echo "Error: Log file path not provided" >&2
        echo "Usage: calculate_sync_times <log_file_path> [quiet]" >&2
        return 1
    fi
    
    # Check if log file exists
    if [ ! -f "$LOG_FILE" ]; then
        echo "Error: Log file not found at $LOG_FILE" >&2
        return 1
    fi
    
    if [ "$QUIET_MODE" != "quiet" ]; then
        echo "Reading log file: $LOG_FILE"
        echo "================================================================================"
    fi
    
    # Temporary files to store data
    local TEMP_DIR=$(mktemp -d)
    local START_TIMES="$TEMP_DIR/start_times.txt"
    local RESULTS="$TEMP_DIR/results.txt"
    
    # Parse the log file
    while IFS= read -r line; do
        # Check if line contains table synchronization event
        if echo "$line" | grep -q 'logical replication table synchronization worker'; then
            # Extract timestamp
            local timestamp=$(echo "$line" | awk '{print $1, $2}')
            
            # Extract subscription name
            local subscription=$(echo "$line" | grep -oP 'subscription "\K[^"]+')
            
            # Extract table name
            local table=$(echo "$line" | grep -oP 'table "\K[^"]+')
            
            # Check if started or finished
            if echo "$line" | grep -q 'has started'; then
                # Store start time
                local key="${subscription}:${table}"
                echo "${key}|${timestamp}" >> "$START_TIMES"
            elif echo "$line" | grep -q 'has finished'; then
                # Find matching start time
                local key="${subscription}:${table}"
                local start_line=$(grep "^${key}|" "$START_TIMES" | tail -1)
                
                if [ -n "$start_line" ]; then
                    local start_time=$(echo "$start_line" | cut -d'|' -f2)
                    
                    # Calculate duration
                    local start_ms=$(timestamp_to_ms "$start_time")
                    local finish_ms=$(timestamp_to_ms "$timestamp")
                    local duration_ms=$((finish_ms - start_ms))
                    
                    # Store result
                    echo "${subscription}|${table}|${start_time}|${timestamp}|${duration_ms}" >> "$RESULTS"
                    
                    # Remove from start times (mark as processed)
                    sed -i.bak "/^${key}|/d" "$START_TIMES" 2>/dev/null || sed -i '' "/^${key}|/d" "$START_TIMES"
                fi
            fi
        fi
    done < "$LOG_FILE"
    
    # Display results
    if [ -f "$RESULTS" ] && [ -s "$RESULTS" ]; then
        if [ "$QUIET_MODE" != "quiet" ]; then
            echo ""
            echo "Completed Table Synchronizations:"
            echo "--------------------------------------------------------------------------------"
        fi
        
        local total_duration=0
        local count=0
        
        while IFS='|' read -r subscription table start_time finish_time duration_ms; do
            count=$((count + 1))
            total_duration=$((total_duration + duration_ms))
            
            if [ "$QUIET_MODE" != "quiet" ]; then
                echo ""
                echo "Subscription: $subscription"
                echo "Table:        $table"
                echo "Started:      $start_time"
                echo "Finished:     $finish_time"
                echo "Duration:     $(format_duration $duration_ms)"
            fi
        done < "$RESULTS"
        
        # Summary statistics
        if [ "$QUIET_MODE" != "quiet" ]; then
            echo ""
            echo "================================================================================"
        fi
        echo "Summary:"
        echo "  Total tables synchronized: $count"
        echo "  Total time:                $(format_duration $total_duration)"
        
        if [ $count -gt 0 ]; then
            local avg_duration=$((total_duration / count))
            echo "  Average time per table:    $(format_duration $avg_duration)"
        fi
    else
        echo "No completed table synchronization events found in the log file."
    fi
    
    # Display unmatched starts
    if [ -f "$START_TIMES" ] && [ -s "$START_TIMES" ]; then
        if [ "$QUIET_MODE" != "quiet" ]; then
            echo ""
            echo "================================================================================"
        fi
        echo ""
        echo "Warning: Tables with 'started' events but no matching 'finished' events:"
        echo "--------------------------------------------------------------------------------"
        
        while IFS='|' read -r key timestamp; do
            local subscription=$(echo "$key" | cut -d':' -f1)
            local table=$(echo "$key" | cut -d':' -f2)
            echo "  Subscription: $subscription, Table: $table"
            echo "  Started at:   $timestamp"
            echo ""
        done < "$START_TIMES"
    fi
    
    # Cleanup
    rm -rf "$TEMP_DIR"
    
    return 0
}

# If script is executed directly (not sourced), run the function
if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then
    # Default log file path
    LOG_FILE="${1:-$HOME/ajin-setup/dataoss2/logfile}"
    calculate_sync_times "$LOG_FILE"
fi
