v2-0002-Add-file_extend_method_threshold-setting.patch
text/x-patch
Filename: v2-0002-Add-file_extend_method_threshold-setting.patch
Type: text/x-patch
Part: 1
From c5b1fd2fdcf41de11d2701602d3e243df9bbb049 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 15 Dec 2025 16:16:23 +1300
Subject: [PATCH v2 2/3] Add file_extend_method_threshold setting.
Previously, write_zeros behavior was used at or below a hard-coded
extension size of 8, based on tests with common Linux file systems.
Make it user-adjustable, to allow testing on other systems.
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
---
doc/src/sgml/config.sgml | 21 ++++++++++++++++++-
src/backend/storage/file/fd.c | 3 +++
src/backend/storage/smgr/md.c | 6 ++----
src/backend/utils/misc/guc_parameters.dat | 8 +++++++
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/include/storage/fd.h | 8 +++++++
6 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0b4922b35c4..5a298646100 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2442,7 +2442,26 @@ include_dir 'conf.d'
</listitem>
</itemizedlist>
The <literal>write_zeros</literal> method is always used when data
- files are extended by 8 blocks or fewer.
+ files are extended by <literal>file_extend_method_threshold</literal>
+ or fewer blocks.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-file-extend-method-threshold" xreflabel="file_extend_method_threshold">
+ <term><varname>file_extend_method_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>file_extend_method_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ <literal>posix_fallocate</literal> is known to interfere with
+ delayed allocation heuristics on some file systems, when the extension
+ size is small. This setting specifies the size up to which
+ <literal>write_zeros</literal> is used, overriding the
+ <literal>file_extend_method</literal> setting. The default is 8
+ blocks.
</para>
</listitem>
</varlistentry>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a2fd55cc408..7eb537ab15e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -167,6 +167,9 @@ int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* How data files should be bulk-extended with zeros. */
int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
+/* At what size file_extend_method is used instead of write_zeros. */
+int file_extend_method_threshold = DEFAULT_FILE_EXTEND_METHOD_THRESHOLD;
+
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index df0aa20708d..f893687814b 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -598,11 +598,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* to allocate page cache space for the extended pages.
*
* However, we don't use FileFallocate() for small extensions, as it
- * defeats delayed allocation on some filesystems. Not clear where
- * that decision should be made though? For now just use a cutoff of
- * 8, anything between 4 and 8 worked OK in some local testing.
+ * defeats delayed allocation on some filesystems.
*/
- if (numblocks > 8 &&
+ if (numblocks > file_extend_method_threshold &&
file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
int ret = 0;
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 220a092ef52..964e107c7a5 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1046,6 +1046,14 @@
options => 'file_extend_method_options',
},
+{ name => 'file_extend_method_threshold', type => 'int', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
+ short_desc => 'Specifies the extension size above which file_extend_method is used.',
+ variable => 'file_extend_method_threshold',
+ boot_val => 'DEFAULT_FILE_EXTEND_METHOD_THRESHOLD',
+ min => '1',
+ max => 'INT_MAX',
+},
+
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 753a42e8ca5..b745e31a38d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -179,6 +179,7 @@
# in kilobytes, or -1 for no limit
#file_copy_method = copy # copy, clone (if supported by OS)
+#file_extend_method_threshold = 8 # size up to which write_zeros is used
#file_extend_method = posix_fallocate # the default is the first option supported
# by the operating system:
# posix_fallocate (most Unix-like systems)
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index f21ac4545a8..7074c3f118b 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -66,12 +66,20 @@ enum FileExtendMethod
/* Default to the first available file_extend_method. */
#define DEFAULT_FILE_EXTEND_METHOD 0
+/*
+ * Values 4-8 were experimentally determined to avoid interference between
+ * posix_fallocate() and delayed allocation on common Linux file systems, but
+ * other systems might vary.
+ */
+#define DEFAULT_FILE_EXTEND_METHOD_THRESHOLD 8
+
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
extern PGDLLIMPORT int file_extend_method;
+extern PGDLLIMPORT int file_extend_method_threshold;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
--
2.51.2