0005-Use-anonymous-files-to-back-shared-memory-s-20250113.patch

text/x-patch

Filename: 0005-Use-anonymous-files-to-back-shared-memory-s-20250113.patch
Type: text/x-patch
Part: 1
Message: Re: Changing shared_buffers without restart

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch 0005
Subject: Use anonymous files to back shared memory segments
File+
src/backend/port/sysv_shmem.c 56 8
src/include/portability/mem.h 1 1
From 746970c489f975b0d3add01b8d85d7cdab601b6d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 15 Oct 2024 16:18:45 +0200
Subject: [PATCH 5/7] Use anonymous files to back shared memory segments

Allow to use anonymous files for shared memory, instead of plain
anonymous memory. Such an anonymous file is created via memfd_create, it
lives in memory, behaves like a regular file and semantically equivalent
to an anonymous memory allocated via mmap with MAP_ANONYMOUS.

Advantages of using anon files are following:

* We've got a file descriptor, which could be used for regular file
  operations (modification, truncation, you name it).

* The file could be given a name, which improves readability when it
  comes to process maps. Here is how it looks like

7f5a2bd04000-7f5a32e52000 rw-s 00000000 00:01 1845 /memfd:strategy (deleted)
7f5a39252000-7f5a4030e000 rw-s 00000000 00:01 1842 /memfd:checkpoint (deleted)
7f5a4670e000-7f5a4d7ba000 rw-s 00000000 00:01 1839 /memfd:iocv (deleted)
7f5a53bba000-7f5a5ad26000 rw-s 00000000 00:01 1836 /memfd:descriptors (deleted)
7f5a9ad26000-7f5aa9d94000 rw-s 00000000 00:01 1833 /memfd:buffers (deleted)
7f5d29d94000-7f5d30e00000 rw-s 00000000 00:01 1830 /memfd:main (deleted)

* By default, Linux will not add file-backed shared mappings into a core dump,
  making it more convenient to work with them in PostgreSQL: no more huge dumps
  to process.

The downside is that memfd_create is Linux specific.
---
 src/backend/port/sysv_shmem.c | 64 ++++++++++++++++++++++++++++++-----
 src/include/portability/mem.h |  2 +-
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 72e823618ef..b2173e1a078 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -103,6 +103,7 @@ typedef struct AnonymousMapping
 	void *shmem; 				/* Pointer to the start of the mapped memory */
 	void *seg_addr; 			/* SysV shared memory for the header */
 	unsigned long seg_id; 		/* IPC key */
+	int segment_fd; 			/* fd for the backing anon file */
 } AnonymousMapping;
 
 static AnonymousMapping Mappings[ANON_MAPPINGS];
@@ -116,7 +117,7 @@ static int next_free_slot = 0;
  * 00400000-00490000         /path/bin/postgres
  * ...
  * 012d9000-0133e000         [heap]
- * 7f443a800000-7f470a800000 /dev/zero (deleted)
+ * 7f443a800000-7f470a800000 /memfd:main (deleted)
  * 7f470a800000-7f471831d000 /usr/lib/locale/locale-archive
  * 7f4718400000-7f4718401000 /usr/lib64/libicudata.so.74.2
  * ...
@@ -143,9 +144,9 @@ static int next_free_slot = 0;
  * The result would look like this:
  *
  * 012d9000-0133e000         [heap]
- * 7f4426f54000-7f442e010000 /dev/zero (deleted)
+ * 7f4426f54000-7f442e010000 /memfd:main (deleted)
  * [...free space...]
- * 7f443a800000-7f444196c000 /dev/zero (deleted)
+ * 7f443a800000-7f444196c000 /memfd:buffers (deleted)
  * [...free space...]
  * 7f470a800000-7f471831d000 /usr/lib/locale/locale-archive
  * 7f4718400000-7f4718401000 /usr/lib64/libicudata.so.74.2
@@ -708,6 +709,18 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 	void	   *ptr = MAP_FAILED;
 	int			mmap_errno = 0;
 
+	/*
+	 * Prepare an anonymous file backing the segment. Its size will be
+	 * specified later via ftruncate.
+	 *
+	 * The file behaves like a regular file, but lives in memory. Once all
+	 * references to the file are dropped,  it is automatically released.
+	 * Anonymous memory is used for all backing pages of the file, thus it has
+	 * the same semantics as anonymous memory allocations using mmap with the
+	 * MAP_ANONYMOUS flag.
+	 */
+	mapping->segment_fd = memfd_create(MappingName(mapping->shmem_slot), 0);
+
 #ifndef MAP_HUGETLB
 	/* PGSharedMemoryCreate should have dealt with this case */
 	Assert(huge_pages != HUGE_PAGES_ON);
@@ -725,8 +738,13 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 		if (allocsize % hugepagesize != 0)
 			allocsize += hugepagesize - (allocsize % hugepagesize);
 
+		/*
+		 * Do not use an anonymous file here yet. When adding it, do not forget
+		 * to use ftruncate and flags MFD_HUGETLB & MFD_HUGE_2MB/MFD_HUGE_1GB
+		 * in memfd_create.
+		 */
 		ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
-				   PG_MMAP_FLAGS | mmap_flags, -1, 0);
+				   PG_MMAP_FLAGS | MAP_ANONYMOUS | mmap_flags, -1, 0);
 		mmap_errno = errno;
 		if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
 		{
@@ -762,7 +780,8 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 		 * - First create the temporary probe mapping of a fixed size and let
 		 *   kernel to place it at address of its choice. By the virtue of the
 		 *   probe mapping size we expect it to be located at the lowest
-		 *   possible address, expecting some non mapped space above.
+		 *   possible address, expecting some non mapped space above. The probe
+		 *   is does not need to be  backed by an anonymous file.
 		 *
 		 * - Unmap the probe mapping, remember the address.
 		 *
@@ -777,7 +796,7 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 		 *   without a restart.
 		 */
 		probe = mmap(NULL, PROBE_MAPPING_SIZE, PROT_READ | PROT_WRITE,
-				   PG_MMAP_FLAGS, -1, 0);
+				   PG_MMAP_FLAGS | MAP_ANONYMOUS, -1, 0);
 
 		if (probe == MAP_FAILED)
 		{
@@ -795,8 +814,20 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 
 			munmap(probe, PROBE_MAPPING_SIZE);
 
+			/*
+			 * Specify the segment file size using allocsize, which contains
+			 * potentially modified size.
+			 */
+			if (ftruncate(mapping->segment_fd, allocsize) < 0)
+			{
+				DebugMappings();
+				elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+					 MappingName(mapping->shmem_slot), allocsize);
+
+			}
+
 			ptr = mmap(mapping_addr, allocsize, PROT_READ | PROT_WRITE,
-					   PG_MMAP_FLAGS | MAP_FIXED_NOREPLACE, -1, 0);
+					   PG_MMAP_FLAGS | MAP_FIXED_NOREPLACE, mapping->segment_fd, 0);
 			mmap_errno = errno;
 			if (ptr == MAP_FAILED)
 			{
@@ -815,8 +846,17 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
 		 */
 		allocsize = mapping->shmem_size;
 
+		/* Specify the segment file size using allocsize. */
+		if (ftruncate(mapping->segment_fd, allocsize) < 0)
+		{
+			DebugMappings();
+			elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+				 MappingName(mapping->shmem_slot), allocsize);
+
+		}
+
 		ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
-						   PG_MMAP_FLAGS, -1, 0);
+						   PG_MMAP_FLAGS, mapping->segment_fd, 0);
 		mmap_errno = errno;
 	}
 
@@ -905,6 +945,14 @@ AnonymousShmemResize(int newval, void *extra)
 		if (m->shmem_size == new_size)
 			continue;
 
+		/* Resize the backing anon file. */
+		if (ftruncate(m->segment_fd, new_size) < 0)
+		{
+			DebugMappings();
+			elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+				 MappingName(m->shmem_slot), new_size);
+		}
+
 		if (mremap(m->shmem, m->shmem_size, new_size, 0) < 0)
 			elog(LOG, "mremap(%p, %zu) failed: %m",
 				 m->shmem, m->shmem_size);
diff --git a/src/include/portability/mem.h b/src/include/portability/mem.h
index 2cd05313b82..50db0da28dc 100644
--- a/src/include/portability/mem.h
+++ b/src/include/portability/mem.h
@@ -38,7 +38,7 @@
 #define MAP_NOSYNC			0
 #endif
 
-#define PG_MMAP_FLAGS			(MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
+#define PG_MMAP_FLAGS			(MAP_SHARED|MAP_HASSEMAPHORE)
 
 /* Some really old systems don't define MAP_FAILED. */
 #ifndef MAP_FAILED
-- 
2.34.1