Thread

  1. BUG #18724: High data disk utilization during log writing

    PG Bug reporting form <noreply@postgresql.org> — 2024-11-25T12:26:52Z

    The following bug has been logged on the website:
    
    Bug reference:      18724
    Logged by:          Sitnikov Ivan
    Email address:      sit7602@gmail.com
    PostgreSQL version: 16.4
    Operating system:   Ubuntu 24.04.1 LTS
    Description:        
    
    There is a problem with high load on the data disk with query logs enabled
    (log_min_duration_statement = 0), provided that the log file is on a
    separate disk.
    
    I want to clarify that the server setup looks like this:
    
    /dev/vda1 / - System disk - source system packages and utilities (including
    postgres)
    /dev/vdb1 /data - Data disk - a separate disk for storing data
    /dev/vdc1 /var/log/postgresql - Log disk - mounted on the
    /var/log/postgresql folder
    /dev/vdd1 /wal_data - Disk for wal files - a symlink for pg_wal
    
    Postgresql config is listed below:
    data_directory = '/data/postgresql/16/main/main'		# use data in another
    directory
    hba_file = '/etc/postgresql/16/main/pg_hba.conf'	# host-based authentication
    file
    ident_file = '/etc/postgresql/16/main/pg_ident.conf'	# ident configuration
    file
    
    # If external_pid_file is not explicitly set, no extra PID file is
    written.
    external_pid_file = '/var/run/postgresql/16-main.pid'			# write an extra PID
    file
    
    listen_addresses = '*'			# what IP address(es) to listen on;
    port = 5432				# (change requires restart)
    max_connections = 500			# (change requires restart)
    unix_socket_directories = '/var/run/postgresql'	# comma-separated list of
    directories
    
    ssl = on
    
    shared_buffers = 32GB			# min 128kB
    
    work_mem = 262144kB			# min 64kB
    maintenance_work_mem = 8192MB		# min 1MB
    
    dynamic_shared_memory_type = posix	# the default is the first option
    
    
    effective_io_concurrency = 2		# 1-1000; 0 disables prefetching
    max_worker_processes = 8		# (change requires restart)
    max_parallel_maintenance_workers = 2	# taken from max_parallel_workers
    max_parallel_workers_per_gather = 2	# taken from max_parallel_workers
    max_parallel_workers = 8		# maximum number of max_worker_processes that
    
    wal_level = replica			# minimal, replica, or logical
    
    wal_buffers = 16MB			# min 32kB, -1 sets based on shared_buffers
    
    max_wal_size = 4GB
    min_wal_size = 1GB
    checkpoint_completion_target = 0.9	# checkpoint target duration, 0.0 - 1.0
    
    archive_mode = on		# enables archiving; off, on, or always
    archive_command = '/usr/local/bin/wal-g wal-push \"%p\" >>
    /var/log/postgresql/archive_command.log 2>&1'		# command to use to archive a
    logfile segment
    archive_timeout = 60		# force a logfile segment switch after this
    
    restore_command = '/usr/local/bin/wal-g wal-fetch \"%f\" \"%p\" >>
    /var/log/postgresql/restore_command.log 2>&1'		# command to use to restore
    an archived 
    
    max_wal_senders = 10		# max number of walsender processes
    				# (change requires restart)
    wal_keep_size = 10000		# in megabytes; 0 disables
    hot_standby = on			# "off" disallows queries during recovery
    
    max_standby_streaming_delay = 10s	# max delay before canceling queries
    					# when reading streaming WAL;
    
    hot_standby_feedback = on		# send info from standby to prevent
    					# query conflicts
    
    random_page_cost = 4			# same scale as above
    
    effective_cache_size = 96GB
    
    default_statistics_target = 100		# range 1-10000
    
    logging_collector = on			# Enable capturing of stderr and csvlog
    
    
    log_directory = '/var/log/postgresql'	# directory where log files are
    written,
    log_filename = 'postgresql-16-main.log'	# log file name pattern,
    log_file_mode = 0644			# creation mode for log files,
    log_truncate_on_rotation = on		# If on, an existing log file with the
    log_rotation_age = 1d			# Automatic rotation of logfiles will
    log_rotation_size = 1024MB		# Automatic rotation of logfiles will
    log_min_duration_statement = 0		# -1 is disabled, 0 logs all statements
    
    log_connections = on
    log_disconnections = on
    
    log_line_prefix= '%m [%p] %d %a %u %h %c '		# special values:
    
    cluster_name = '16/main'			# added to process titles if nonempty
    
    track_io_timing = on
    
    
    statement_timeout = 10000			# in milliseconds, 0 is disabled
    
    datestyle = 'iso, mdy'
    
    lc_messages = 'en_US.UTF-8'			# locale for system error message
    lc_monetary = 'en_US.UTF-8'			# locale for monetary formatting
    lc_numeric = 'en_US.UTF-8'			# locale for number formatting
    lc_time = 'en_US.UTF-8'				# locale for time formatting
    
    default_text_search_config = 'pg_catalog.english'
    
    shared_preload_libraries =
    'pg_stat_statements,passwordcheck,auto_explain,auth_delay,pgaudit'
    pg_stat_statements.max = 500
    pg_stat_statements.track = top
    pg_stat_statements.track_utility = true
    pg_stat_statements.save = false
    
    
    include_dir = 'conf.d'			# include files ending in '.conf' from
    auth_delay.milliseconds = 1000
    
    Our typical QPS metrics are from 700 to 2500 queries per second
    Log size is around 270 Mb per minute (6600 lines per minute)
    
    The lsof utility did not help in investigating this problem, as did
    restarting the server
    Setting log_min_duration_statement = -1 shows a significant decrease in the
    load on the data disk
    So I can't assume why data disk (/dev/vdb1 /data) utilized up to 60-80%
    while statements are logged to separate block device (/dev/vdc1
    /var/log/postgresql) and need some help to get off the ground
    
    
  2. Re: BUG #18724: High data disk utilization during log writing

    Christophe Pettus <xof@thebuild.com> — 2024-11-25T14:47:23Z

    
    > On Nov 25, 2024, at 04:26, PG Bug reporting form <noreply@postgresql.org> wrote:
    > The lsof utility did not help in investigating this problem, as did
    > restarting the server
    > Setting log_min_duration_statement = -1 shows a significant decrease in the
    > load on the data disk
    > So I can't assume why data disk (/dev/vdb1 /data) utilized up to 60-80%
    > while statements are logged to separate block device (/dev/vdc1
    > /var/log/postgresql) and need some help to get off the ground
    
    The question seems to answer itself here: You are logging every statement to the text logs, and it is very likely that the lines in the text log are larger than the actual data for that statement being written to the data disk.  It can be very expensive to log every statement, especially when you have a significant query load (which you do here).
    
    
    
  3. Re: BUG #18724: High data disk utilization during log writing

    Иван Ситников <sit7602@gmail.com> — 2024-11-26T10:33:57Z

    Thank you for your reply. I think I'm close to solving the mystery.
    As a result of the research, I found that in addition to recording logs,
    there is also a record in the pgss_query_texts.stat file, which is located
    in the data directory.
    And at the moment, this is what I consider to be the reason for the high
    utilization of the data disk.
    But this file is not used at all if I unload pg_stat_statements module
    from shared_preload_libraries
    
    пн, 25 нояб. 2024 г. в 19:47, Christophe Pettus <xof@thebuild.com>:
    
    >
    >
    > > On Nov 25, 2024, at 04:26, PG Bug reporting form <noreply@postgresql.org>
    > wrote:
    > > The lsof utility did not help in investigating this problem, as did
    > > restarting the server
    > > Setting log_min_duration_statement = -1 shows a significant decrease in
    > the
    > > load on the data disk
    > > So I can't assume why data disk (/dev/vdb1 /data) utilized up to 60-80%
    > > while statements are logged to separate block device (/dev/vdc1
    > > /var/log/postgresql) and need some help to get off the ground
    >
    > The question seems to answer itself here: You are logging every statement
    > to the text logs, and it is very likely that the lines in the text log are
    > larger than the actual data for that statement being written to the data
    > disk.  It can be very expensive to log every statement, especially when you
    > have a significant query load (which you do here).