0005-Use-anonymous-files-to-back-shared-memory-s-20250113.patch
text/x-patch
Filename: 0005-Use-anonymous-files-to-back-shared-memory-s-20250113.patch
Type: text/x-patch
Part: 1
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch 0005
Subject: Use anonymous files to back shared memory segments
| File | + | − |
|---|---|---|
| src/backend/port/sysv_shmem.c | 56 | 8 |
| src/include/portability/mem.h | 1 | 1 |
From 746970c489f975b0d3add01b8d85d7cdab601b6d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 15 Oct 2024 16:18:45 +0200
Subject: [PATCH 5/7] Use anonymous files to back shared memory segments
Allow to use anonymous files for shared memory, instead of plain
anonymous memory. Such an anonymous file is created via memfd_create, it
lives in memory, behaves like a regular file and semantically equivalent
to an anonymous memory allocated via mmap with MAP_ANONYMOUS.
Advantages of using anon files are following:
* We've got a file descriptor, which could be used for regular file
operations (modification, truncation, you name it).
* The file could be given a name, which improves readability when it
comes to process maps. Here is how it looks like
7f5a2bd04000-7f5a32e52000 rw-s 00000000 00:01 1845 /memfd:strategy (deleted)
7f5a39252000-7f5a4030e000 rw-s 00000000 00:01 1842 /memfd:checkpoint (deleted)
7f5a4670e000-7f5a4d7ba000 rw-s 00000000 00:01 1839 /memfd:iocv (deleted)
7f5a53bba000-7f5a5ad26000 rw-s 00000000 00:01 1836 /memfd:descriptors (deleted)
7f5a9ad26000-7f5aa9d94000 rw-s 00000000 00:01 1833 /memfd:buffers (deleted)
7f5d29d94000-7f5d30e00000 rw-s 00000000 00:01 1830 /memfd:main (deleted)
* By default, Linux will not add file-backed shared mappings into a core dump,
making it more convenient to work with them in PostgreSQL: no more huge dumps
to process.
The downside is that memfd_create is Linux specific.
---
src/backend/port/sysv_shmem.c | 64 ++++++++++++++++++++++++++++++-----
src/include/portability/mem.h | 2 +-
2 files changed, 57 insertions(+), 9 deletions(-)
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 72e823618ef..b2173e1a078 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -103,6 +103,7 @@ typedef struct AnonymousMapping
void *shmem; /* Pointer to the start of the mapped memory */
void *seg_addr; /* SysV shared memory for the header */
unsigned long seg_id; /* IPC key */
+ int segment_fd; /* fd for the backing anon file */
} AnonymousMapping;
static AnonymousMapping Mappings[ANON_MAPPINGS];
@@ -116,7 +117,7 @@ static int next_free_slot = 0;
* 00400000-00490000 /path/bin/postgres
* ...
* 012d9000-0133e000 [heap]
- * 7f443a800000-7f470a800000 /dev/zero (deleted)
+ * 7f443a800000-7f470a800000 /memfd:main (deleted)
* 7f470a800000-7f471831d000 /usr/lib/locale/locale-archive
* 7f4718400000-7f4718401000 /usr/lib64/libicudata.so.74.2
* ...
@@ -143,9 +144,9 @@ static int next_free_slot = 0;
* The result would look like this:
*
* 012d9000-0133e000 [heap]
- * 7f4426f54000-7f442e010000 /dev/zero (deleted)
+ * 7f4426f54000-7f442e010000 /memfd:main (deleted)
* [...free space...]
- * 7f443a800000-7f444196c000 /dev/zero (deleted)
+ * 7f443a800000-7f444196c000 /memfd:buffers (deleted)
* [...free space...]
* 7f470a800000-7f471831d000 /usr/lib/locale/locale-archive
* 7f4718400000-7f4718401000 /usr/lib64/libicudata.so.74.2
@@ -708,6 +709,18 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
void *ptr = MAP_FAILED;
int mmap_errno = 0;
+ /*
+ * Prepare an anonymous file backing the segment. Its size will be
+ * specified later via ftruncate.
+ *
+ * The file behaves like a regular file, but lives in memory. Once all
+ * references to the file are dropped, it is automatically released.
+ * Anonymous memory is used for all backing pages of the file, thus it has
+ * the same semantics as anonymous memory allocations using mmap with the
+ * MAP_ANONYMOUS flag.
+ */
+ mapping->segment_fd = memfd_create(MappingName(mapping->shmem_slot), 0);
+
#ifndef MAP_HUGETLB
/* PGSharedMemoryCreate should have dealt with this case */
Assert(huge_pages != HUGE_PAGES_ON);
@@ -725,8 +738,13 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
if (allocsize % hugepagesize != 0)
allocsize += hugepagesize - (allocsize % hugepagesize);
+ /*
+ * Do not use an anonymous file here yet. When adding it, do not forget
+ * to use ftruncate and flags MFD_HUGETLB & MFD_HUGE_2MB/MFD_HUGE_1GB
+ * in memfd_create.
+ */
ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
- PG_MMAP_FLAGS | mmap_flags, -1, 0);
+ PG_MMAP_FLAGS | MAP_ANONYMOUS | mmap_flags, -1, 0);
mmap_errno = errno;
if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
{
@@ -762,7 +780,8 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
* - First create the temporary probe mapping of a fixed size and let
* kernel to place it at address of its choice. By the virtue of the
* probe mapping size we expect it to be located at the lowest
- * possible address, expecting some non mapped space above.
+ * possible address, expecting some non mapped space above. The probe
+ * is does not need to be backed by an anonymous file.
*
* - Unmap the probe mapping, remember the address.
*
@@ -777,7 +796,7 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
* without a restart.
*/
probe = mmap(NULL, PROBE_MAPPING_SIZE, PROT_READ | PROT_WRITE,
- PG_MMAP_FLAGS, -1, 0);
+ PG_MMAP_FLAGS | MAP_ANONYMOUS, -1, 0);
if (probe == MAP_FAILED)
{
@@ -795,8 +814,20 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
munmap(probe, PROBE_MAPPING_SIZE);
+ /*
+ * Specify the segment file size using allocsize, which contains
+ * potentially modified size.
+ */
+ if (ftruncate(mapping->segment_fd, allocsize) < 0)
+ {
+ DebugMappings();
+ elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+ MappingName(mapping->shmem_slot), allocsize);
+
+ }
+
ptr = mmap(mapping_addr, allocsize, PROT_READ | PROT_WRITE,
- PG_MMAP_FLAGS | MAP_FIXED_NOREPLACE, -1, 0);
+ PG_MMAP_FLAGS | MAP_FIXED_NOREPLACE, mapping->segment_fd, 0);
mmap_errno = errno;
if (ptr == MAP_FAILED)
{
@@ -815,8 +846,17 @@ CreateAnonymousSegment(AnonymousMapping *mapping)
*/
allocsize = mapping->shmem_size;
+ /* Specify the segment file size using allocsize. */
+ if (ftruncate(mapping->segment_fd, allocsize) < 0)
+ {
+ DebugMappings();
+ elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+ MappingName(mapping->shmem_slot), allocsize);
+
+ }
+
ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
- PG_MMAP_FLAGS, -1, 0);
+ PG_MMAP_FLAGS, mapping->segment_fd, 0);
mmap_errno = errno;
}
@@ -905,6 +945,14 @@ AnonymousShmemResize(int newval, void *extra)
if (m->shmem_size == new_size)
continue;
+ /* Resize the backing anon file. */
+ if (ftruncate(m->segment_fd, new_size) < 0)
+ {
+ DebugMappings();
+ elog(DEBUG1, "slot[%s]: ftruncate(%zu) failed: %m",
+ MappingName(m->shmem_slot), new_size);
+ }
+
if (mremap(m->shmem, m->shmem_size, new_size, 0) < 0)
elog(LOG, "mremap(%p, %zu) failed: %m",
m->shmem, m->shmem_size);
diff --git a/src/include/portability/mem.h b/src/include/portability/mem.h
index 2cd05313b82..50db0da28dc 100644
--- a/src/include/portability/mem.h
+++ b/src/include/portability/mem.h
@@ -38,7 +38,7 @@
#define MAP_NOSYNC 0
#endif
-#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
+#define PG_MMAP_FLAGS (MAP_SHARED|MAP_HASSEMAPHORE)
/* Some really old systems don't define MAP_FAILED. */
#ifndef MAP_FAILED
--
2.34.1