v8-0001-Enable-the-Microsoft-Windows-ARM64-MSVC-platform.patch
application/octet-stream
Filename: v8-0001-Enable-the-Microsoft-Windows-ARM64-MSVC-platform.patch
Type: application/octet-stream
Part: 0
From cfdfea317cd3a0ae53f0dfaa439ada8ded3bebf7 Mon Sep 17 00:00:00 2001
From: Dave Cramer <davecramer@gmail.com>
Date: Sun, 13 Jul 2025 06:33:17 -0400
Subject: [PATCH v8] Enable the Microsoft Windows ARM64/MSVC platform
Add support for the ARM64 architecture on Windows 11 using MSVC compiler
addressing build issues and implementing proper memory synchronization
semantics for this platform.
* Implement spin_delay() with __isb(_ARM64_BARRIER_SY) intrinsic to emit
the "ISB SY" instruction which matches the GCC/Clang approach to
spinloop delay and emperical evidence that it out-scales the YIELD
instruction in practice.
* Unconditionally choose to use the MSVC supplied intrinsic
for CRC32 on ARM64.
* Implement the S_UNLOCK() macro using the InterlockedExchange()
intrinsic.
Author: Greg Burd <greg@burd.me>
Author: Dave Cramer <davecramer@gmail.com>
Discussion: https://postgr.es/m/3c576ad7-d2da-4137-b791-5821da7cc370%40app.fastmail.com
---
doc/src/sgml/installation.sgml | 2 +-
meson.build | 9 +++--
src/include/storage/s_lock.h | 60 +++++++++++++++++++++++++++-------
src/port/pg_crc32c_armv8.c | 6 ++++
src/tools/msvc_gendef.pl | 8 ++---
5 files changed, 67 insertions(+), 18 deletions(-)
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index fe8d73e1f8c..3f8d512a906 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -3967,7 +3967,7 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib"
<sect3 id="install-windows-full-64-bit">
<title>Special Considerations for 64-Bit Windows</title>
<para>
- PostgreSQL will only build for the x64 architecture on 64-bit Windows.
+ PostgreSQL will only build for the x64 and ARM64 architectures on 64-bit Windows.
</para>
<para>
Mixing 32- and 64-bit versions in the same build tree is not supported.
diff --git a/meson.build b/meson.build
index 1256094fa57..6c463d14749 100644
--- a/meson.build
+++ b/meson.build
@@ -2527,7 +2527,12 @@ int main(void)
}
'''
- if cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd without -march=armv8-a+crc',
+ # Check first for a MSVC/ARM64 combo because the test prog above won't
+ # compile (as it doesn't '#ifdef _MSC_VER #include <intrin.h>'), which
+ # is okay as we know for a fact that this platform combo supports the
+ # intrinsic for ARM64 CRC the test performs, so use that unconditionally.
+ if (host_cpu == 'aarch64' and cc.get_id() == 'msvc') or \
+ cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd without -march=armv8-a+crc',
args: test_c_args)
# Use ARM CRC Extension unconditionally
cdata.set('USE_ARMV8_CRC32C', 1)
@@ -2546,7 +2551,7 @@ int main(void)
cdata.set('USE_ARMV8_CRC32C', false)
cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 1)
have_optimized_crc = true
- endif
+endif
elif host_cpu == 'loongarch64'
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7f8f566bd40..50262cca887 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -594,7 +594,8 @@ tas(volatile slock_t *lock)
#if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */
-#ifdef _MSC_VER
+/* When compiling for Microsoft Windows using MSVC */
+#if defined(_MSC_VER)
typedef LONG slock_t;
#define HAS_TEST_AND_SET
@@ -602,34 +603,71 @@ typedef LONG slock_t;
#define SPIN_DELAY() spin_delay()
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
+/*
+ * _InterlockedExchange() generates a full memory barrier (or release
+ * semantics that ensures all prior memory operations are visible to
+ * other cores before the lock is released.
+ */
+#define S_UNLOCK(lock) (InterlockedExchange(lock, 0))
+
+#if defined(_WIN64) /* Microsoft Windows x64 */
+
+#if defined(_M_ARM64) /* aarch64 */
+/*
+ * While there is support for a __yield() intrinsic for MSVC/ARM64[1], there
+ * is a wealth of real-world testing across databases and languages as well
+ * as a blog post by ARM[2] suggest that ISB is the most scalable and power
+ * friendly instruction to use for spinlock delay loops. So we use the only
+ * supported intrinsic/flag combination availble for this platform combo[3].
+ * This matches what we do above when compiling with either GCC or Clang.
+ *
+ * [1] https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
+ * [2] https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
+ * [3] https://github.com/MicrosoftDocs/cpp-docs/blob/main/docs/intrinsics/arm64-intrinsics.md
+ */
+static __forceinline void
+spin_delay(void)
+{
+ __isb(_ARM64_BARRIER_SY);
+}
+
+#elif defined(_M_X64) /* x86-64 */
+
+/*
+ * Use _mm_pause() intrinsic for x86-64. This emits the PAUSE instruction,
+ * which improves performance in spin-wait loops by preventing pipeline flush
+ * on Hyper-Threading systems.
*/
-#if defined(_WIN64)
static __forceinline void
spin_delay(void)
{
_mm_pause();
}
-#else
+
+#endif /* defined(_M_ARM64|_M_X64) */
+
+#else /* !defined(_WIN64) */
+
+#ifdef _M_IX86 /* x86-specific */
+
+/* Use no-op for MSVC 32bit x86 */
static __forceinline void
spin_delay(void)
{
/* See comment for gcc code. Same code, MASM syntax */
__asm rep nop;
}
-#endif
#include <intrin.h>
#pragma intrinsic(_ReadWriteBarrier)
-#define S_UNLOCK(lock) \
+#define S_UNLOCK(lock) \
do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-#endif
-
-
-#endif /* !defined(HAS_TEST_AND_SET) */
+#endif /* defined(_M_IX86) */
+#endif /* defined(_WIN64) */
+#endif /* defined(_MSC_VER) */
+#endif /* !defined(HAS_TEST_AND_SET) */
/* Blow up if we didn't have any way to do spinlocks */
diff --git a/src/port/pg_crc32c_armv8.c b/src/port/pg_crc32c_armv8.c
index 5ba070bb99d..29a91dca62f 100644
--- a/src/port/pg_crc32c_armv8.c
+++ b/src/port/pg_crc32c_armv8.c
@@ -14,7 +14,13 @@
*/
#include "c.h"
+#ifdef _MSC_VER
+ /* MSVC ARM64 intrinsics */
+#include <intrin.h>
+#else
+ /* GCC/Clang: Use ACLE intrinsics from arm_acle.h */
#include <arm_acle.h>
+#endif
#include "port/pg_crc32c.h"
diff --git a/src/tools/msvc_gendef.pl b/src/tools/msvc_gendef.pl
index 868aad51b09..c92c94c4775 100644
--- a/src/tools/msvc_gendef.pl
+++ b/src/tools/msvc_gendef.pl
@@ -118,9 +118,9 @@ sub writedef
{
my $isdata = $def->{$f} eq 'data';
- # Strip the leading underscore for win32, but not x64
+ # Strip the leading underscore for win32, but not x64 and aarch64
$f =~ s/^_//
- unless ($arch eq "x86_64");
+ unless ($arch eq "x86_64" || $arch eq "aarch64");
# Emit just the name if it's a function symbol, or emit the name
# decorated with the DATA option for variables.
@@ -141,7 +141,7 @@ sub writedef
sub usage
{
die("Usage: msvc_gendef.pl --arch <arch> --deffile <deffile> --tempdir <tempdir> files-or-directories\n"
- . " arch: x86 | x86_64\n"
+ . " arch: x86 | x86_64 | aarch64\n"
. " deffile: path of the generated file\n"
. " tempdir: directory for temporary files\n"
. " files or directories: object files or directory containing object files\n"
@@ -158,7 +158,7 @@ GetOptions(
'tempdir:s' => \$tempdir,) or usage();
usage("arch: $arch")
- unless ($arch eq 'x86' || $arch eq 'x86_64');
+ unless ($arch eq 'x86' || $arch eq 'x86_64' || $arch eq 'aarch64');
my @files;
--
2.52.0.windows.1