0008-Support-resize-for-hugetlb-20250610.patch

text/x-patch

Filename: 0008-Support-resize-for-hugetlb-20250610.patch
Type: text/x-patch
Part: 6
Message: Re: Changing shared_buffers without restart

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch 0008
Subject: Support resize for hugetlb
File+
src/backend/port/sysv_shmem.c 44 16
From 2ebc737cd5b22c4cb3fbcafb583c0bbd61fe93d0 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 5 Apr 2025 19:51:33 +0200
Subject: [PATCH 08/17] Support resize for hugetlb

Linux kernel has a set of limitations on remapping hugetlb segments: it
can't increase size of such segment [1], and shrinking it will not
release the memory back. In fact support for hugetlb mremap was
implemented no so long time ago [2].

As a workaround, avoid mremap for resizing shared memory. Instead unmap
the whole segment and map it back at the same address with the new size,
relying on the fact that fd for the anon file behind the segment is
still open and will keep the memory content.

[1]: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/mremap.c?id=f4d2ef48250ad057e4f00087967b5ff366da9f39#n1593
[2]: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/mm/mremap.c?id=550a7d60bd5e35a56942dba6d8a26752beb26c9f
---
 src/backend/port/sysv_shmem.c | 60 +++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 87000a24eea..f0b53ce1d7c 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -1109,6 +1109,7 @@ AnonymousShmemResize(void)
 		/* Note that CalculateShmemSize indirectly depends on NBuffers */
 		Size new_size = CalculateShmemSize(&numSemas, i);
 		AnonymousMapping *m = &Mappings[i];
+		int	mmap_flags = PG_MMAP_FLAGS;
 
 		if (m->shmem == NULL)
 			continue;
@@ -1116,6 +1117,44 @@ AnonymousShmemResize(void)
 		if (m->shmem_size == new_size)
 			continue;
 
+#ifndef MAP_HUGETLB
+		/* ReserveAnonymousMemory should have dealt with this case */
+		Assert(huge_pages != HUGE_PAGES_ON && !huge_pages_on);
+#else
+		if (huge_pages_on)
+		{
+			Size		hugepagesize;
+
+			/* Make sure nothing is messed up */
+			Assert(huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY);
+
+			/* Round up the new size to a suitable large value */
+			GetHugePageSize(&hugepagesize, &mmap_flags, NULL);
+
+			if (new_size % hugepagesize != 0)
+				new_size += hugepagesize - (new_size % hugepagesize);
+
+			mmap_flags = PG_MMAP_FLAGS | mmap_flags;
+		}
+#endif
+
+		/*
+		 * Linux limitations do not allow us to mremap hugetlb in the way we
+		 * want. E.g. no size increase is allowed, and for shrinking the memory
+		 * will not be released back. To work around this unmap the segment and
+		 * create a new one at the same address. Thanks for the backing anon
+		 * file the content will still be kept in memory.
+		 */
+		elog(DEBUG1, "segment[%s]: remap from %zu to %zu at address %p",
+					 MappingName(m->shmem_segment), m->shmem_size,
+					 new_size, m->shmem);
+
+		if (munmap(m->shmem, m->shmem_size) < 0)
+			ereport(FATAL,
+					(errcode(ERRCODE_SYSTEM_ERROR),
+					 errmsg("could not unmap shared memory segment %s [%p]: %m",
+							MappingName(m->shmem_segment), m->shmem)));
+
 		/* Resize the backing anon file. */
 		if(ftruncate(m->segment_fd, new_size) == -1)
 			ereport(FATAL,
@@ -1123,25 +1162,14 @@ AnonymousShmemResize(void)
 					 errmsg("could not truncase anonymous file for \"%s\": %m",
 							MappingName(m->shmem_segment))));
 
-		/* Clean up some reserved space to resize into */
-		if (munmap(m->shmem + m->shmem_size, new_size - m->shmem_size) == -1)
-			ereport(FATAL,
-					(errcode(ERRCODE_SYSTEM_ERROR),
-					 errmsg("could not unmap %zu from reserved shared memory %p: %m",
-							new_size - m->shmem_size, m->shmem)));
-
-		/* Claim the unused space */
-		elog(DEBUG1, "segment[%s]: remap from %zu to %zu at address %p",
-					 MappingName(m->shmem_segment), m->shmem_size,
-					 new_size, m->shmem);
-
-		ptr = mremap(m->shmem, m->shmem_size, new_size, 0);
+		/* Reclaim the space */
+		ptr = mmap(m->shmem, new_size, PROT_READ | PROT_WRITE,
+				   mmap_flags | MAP_FIXED, m->segment_fd, 0);
 		if (ptr == MAP_FAILED)
 			ereport(FATAL,
 					(errcode(ERRCODE_SYSTEM_ERROR),
-					 errmsg("could not resize shared memory segment %s [%p] to %d (%zu): %m",
-							MappingName(m->shmem_segment), m->shmem, NBuffers,
-							new_size)));
+					 errmsg("could not map shared memory segment %s [%p] with size %zu: %m",
+							MappingName(m->shmem_segment), m->shmem, new_size)));
 
 		reinit = true;
 		m->shmem_size = new_size;
-- 
2.34.1