v2-0002-Add-file_extend_method_threshold-setting.patch

text/x-patch

Filename: v2-0002-Add-file_extend_method_threshold-setting.patch
Type: text/x-patch
Part: 1
Message: Re: [PING] fallocate() causes btrfs to never compress postgresql files
From c5b1fd2fdcf41de11d2701602d3e243df9bbb049 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 15 Dec 2025 16:16:23 +1300
Subject: [PATCH v2 2/3] Add file_extend_method_threshold setting.

Previously, write_zeros behavior was used at or below a hard-coded
extension size of 8, based on tests with common Linux file systems.
Make it user-adjustable, to allow testing on other systems.

Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
---
 doc/src/sgml/config.sgml                      | 21 ++++++++++++++++++-
 src/backend/storage/file/fd.c                 |  3 +++
 src/backend/storage/smgr/md.c                 |  6 ++----
 src/backend/utils/misc/guc_parameters.dat     |  8 +++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/storage/fd.h                      |  8 +++++++
 6 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0b4922b35c4..5a298646100 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2442,7 +2442,26 @@ include_dir 'conf.d'
          </listitem>
         </itemizedlist>
         The <literal>write_zeros</literal> method is always used when data
-        files are extended by 8 blocks or fewer.
+        files are extended by <literal>file_extend_method_threshold</literal>
+        or fewer blocks.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-file-extend-method-threshold" xreflabel="file_extend_method_threshold">
+      <term><varname>file_extend_method_threshold</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>file_extend_method_threshold</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        <literal>posix_fallocate</literal> is known to interfere with
+        delayed allocation heuristics on some file systems, when the extension
+        size is small.  This setting specifies the size up to which
+        <literal>write_zeros</literal> is used, overriding the
+        <literal>file_extend_method</literal> setting.  The default is 8
+        blocks.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a2fd55cc408..7eb537ab15e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -167,6 +167,9 @@ int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 /* How data files should be bulk-extended with zeros. */
 int			file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
 
+/* At what size file_extend_method is used instead of write_zeros. */
+int			file_extend_method_threshold = DEFAULT_FILE_EXTEND_METHOD_THRESHOLD;
+
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index df0aa20708d..f893687814b 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -598,11 +598,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		 * to allocate page cache space for the extended pages.
 		 *
 		 * However, we don't use FileFallocate() for small extensions, as it
-		 * defeats delayed allocation on some filesystems. Not clear where
-		 * that decision should be made though? For now just use a cutoff of
-		 * 8, anything between 4 and 8 worked OK in some local testing.
+		 * defeats delayed allocation on some filesystems.
 		 */
-		if (numblocks > 8 &&
+		if (numblocks > file_extend_method_threshold &&
 			file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
 		{
 			int			ret = 0;
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 220a092ef52..964e107c7a5 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1046,6 +1046,14 @@
   options => 'file_extend_method_options',
 },
 
+{ name => 'file_extend_method_threshold', type => 'int', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
+  short_desc => 'Specifies the extension size above which file_extend_method is used.',
+  variable => 'file_extend_method_threshold',
+  boot_val => 'DEFAULT_FILE_EXTEND_METHOD_THRESHOLD',
+  min => '1',
+  max => 'INT_MAX',
+},
+
 { name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
   short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
   long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 753a42e8ca5..b745e31a38d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -179,6 +179,7 @@
                                         # in kilobytes, or -1 for no limit
 
 #file_copy_method = copy                # copy, clone (if supported by OS)
+#file_extend_method_threshold = 8       # size up to which write_zeros is used
 #file_extend_method = posix_fallocate   # the default is the first option supported
                                         # by the operating system:
                                         #   posix_fallocate (most Unix-like systems)
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index f21ac4545a8..7074c3f118b 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -66,12 +66,20 @@ enum FileExtendMethod
 /* Default to the first available file_extend_method. */
 #define DEFAULT_FILE_EXTEND_METHOD 0
 
+/*
+ * Values 4-8 were experimentally determined to avoid interference between
+ * posix_fallocate() and delayed allocation on common Linux file systems, but
+ * other systems might vary.
+ */
+#define DEFAULT_FILE_EXTEND_METHOD_THRESHOLD 8
+
 /* GUC parameter */
 extern PGDLLIMPORT int max_files_per_process;
 extern PGDLLIMPORT bool data_sync_retry;
 extern PGDLLIMPORT int recovery_init_sync_method;
 extern PGDLLIMPORT int io_direct_flags;
 extern PGDLLIMPORT int file_extend_method;
+extern PGDLLIMPORT int file_extend_method_threshold;
 
 /*
  * This is private to fd.c, but exported for save/restore_backend_variables()
-- 
2.51.2