v4-0002-Add-RISC-V-popcount-using-Zbb-extension.patch
text/x-patch
Filename: v4-0002-Add-RISC-V-popcount-using-Zbb-extension.patch
Type: text/x-patch
Part: 4
From 9b7f5a7be2123e79a71c781191681c8eb972d795 Mon Sep 17 00:00:00 2001
From: Greg Burd <greg@burd.me>
Date: Sun, 22 Mar 2026 11:15:41 -0400
Subject: [PATCH v4 2/3] Add RISC-V popcount using Zbb extension
Implement hardware popcount support for RISC-V using the Zbb (basic bit
manipulation) extension when present. The Zbb extension provides the
'cpop' instruction which GCC and Clang emit from __builtin_popcountll()
when compiling with -march=rv64gc_zbb.
This patch adds:
- Build-time detection of Zbb support (configure.ac, meson.build)
- Runtime detection using __riscv_hwprobe() on Linux
- Optimized popcount implementation using cpop instruction
The implementation follows established pattern for hardware acceleration
(similar to x86 POPCNT and ARM SVE). Zbb-optimized code is compiled
separately with -march=rv64gc_zbb, while the main binary remains
portable across all RISC-V 64-bit systems.
---
configure.ac | 29 ++++++
meson.build | 32 ++++++
src/include/port/pg_bitutils.h | 2 +-
src/port/meson.build | 7 +-
src/port/pg_bitutils.c | 5 +-
src/port/pg_popcount_riscv.c | 183 +++++++++++++++++++++++++++++++++
6 files changed, 253 insertions(+), 5 deletions(-)
create mode 100644 src/port/pg_popcount_riscv.c
diff --git a/configure.ac b/configure.ac
index 8d176bd3468..da4d3bceb94 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2187,6 +2187,35 @@ if test x"$host_cpu" = x"aarch64"; then
fi
fi
+# Check for RISC-V Zbb bitmanip extension (provides 'cpop' for popcount).
+#
+# The Zbb extension provides the 'cpop' instruction for hardware popcount.
+# GCC/Clang emit the cpop instruction from __builtin_popcountll() when
+# -march=rv64gc_zbb is used. We test compilation with this flag, then
+# restore CFLAGS to avoid global march flags (for binary portability).
+# We define USE_RISCV_ZBB_WITH_RUNTIME_CHECK and use __riscv_hwprobe()
+# for runtime detection. We compile src/port/pg_popcount_riscv.c with
+# -march=rv64gc_zbb separately (like ARM SVE and x86 POPCNT).
+AC_MSG_CHECKING([for RISC-V Zbb extension (cpop/popcount)])
+if test x"$host_cpu" = x"riscv64"; then
+ pgac_save_CFLAGS_zbb="$CFLAGS"
+ CFLAGS="$CFLAGS -march=rv64gc_zbb"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [/* Test that the compiler will emit cpop from __builtin_popcountll */
+ static inline int test_cpop(unsigned long long x)
+ { return __builtin_popcountll(x); }],
+ [volatile int r = test_cpop(0xdeadbeefULL); (void) r;])],
+ [AC_DEFINE(USE_RISCV_ZBB_WITH_RUNTIME_CHECK, 1,
+ [Define to 1 to use RISC-V Zbb popcount with runtime detection.])
+ CFLAGS="$pgac_save_CFLAGS_zbb"
+ AC_MSG_RESULT([yes, with runtime check])],
+ [CFLAGS="$pgac_save_CFLAGS_zbb"
+ AC_MSG_RESULT([no])])
+else
+ AC_MSG_RESULT([not on RISC-V])
+fi
+
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
PGAC_SSE42_CRC32_INTRINSICS()
diff --git a/meson.build b/meson.build
index 20b887f1a1b..cf7f41715d8 100644
--- a/meson.build
+++ b/meson.build
@@ -2601,6 +2601,38 @@ int main(void)
endif
+# ---------------------------------------------------------------------------
+# Check for RISC-V Zbb bitmanip extension (provides 'cpop' for popcount).
+#
+# The Zbb extension provides the 'cpop' instruction for hardware popcount.
+# GCC/Clang emit the cpop instruction from __builtin_popcountll() when
+# -march=rv64gc_zbb is used. We test compilation with this flag, but
+# do NOT add it globally (for binary portability). Instead, we define
+# USE_RISCV_ZBB_WITH_RUNTIME_CHECK and compile src/port/pg_popcount_riscv.c
+# with -march=rv64gc_zbb separately (like ARM SVE and x86 POPCNT).
+# Runtime detection uses __riscv_hwprobe().
+# ---------------------------------------------------------------------------
+zbb_test_code = '''
+static inline int test_cpop(unsigned long long x)
+{ return __builtin_popcountll(x); }
+int main(void) {
+ volatile int r = test_cpop(0xdeadbeefULL);
+ (void) r;
+ return 0;
+}
+'''
+
+cflags_zbb = []
+if host_cpu == 'riscv64'
+ if cc.compiles(zbb_test_code,
+ args: ['-march=rv64gc_zbb'],
+ name: 'RISC-V Zbb cpop')
+ cdata.set('USE_RISCV_ZBB_WITH_RUNTIME_CHECK', 1)
+ # Flag will be added only to pg_popcount_riscv.c in src/port/meson.build
+ cflags_zbb = ['-march=rv64gc_zbb']
+ endif
+endif
+
###############################################################
# Select CRC-32C implementation.
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 7a00d197013..cb8d8b6e626 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -279,7 +279,7 @@ pg_ceil_log2_64(uint64 num)
extern uint64 pg_popcount_portable(const char *buf, int bytes);
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, uint8 mask);
-#if defined(HAVE_X86_64_POPCNTQ) || defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK)
+#if defined(HAVE_X86_64_POPCNTQ) || defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK) || defined(USE_RISCV_ZBB_WITH_RUNTIME_CHECK)
/*
* Attempt to use specialized CPU instructions, but perform a runtime check
* first.
diff --git a/src/port/meson.build b/src/port/meson.build
index 922b3f64676..2c0486f5373 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -100,12 +100,15 @@ replace_funcs_pos = [
# loongarch
['pg_crc32c_loongarch', 'USE_LOONGARCH_CRC32C'],
+ # riscv
+ ['pg_popcount_riscv', 'USE_RISCV_ZBB_WITH_RUNTIME_CHECK', 'zbb'],
+
# generic fallback
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]
-pgport_cflags = {'crc': cflags_crc}
-pgport_sources_cflags = {'crc': []}
+pgport_cflags = {'crc': cflags_crc, 'zbb': cflags_zbb}
+pgport_sources_cflags = {'crc': [], 'zbb': []}
foreach f : replace_funcs_neg
func = f.get(0)
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 7b11c38c417..23af6c54477 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -162,7 +162,7 @@ pg_popcount_masked_portable(const char *buf, int bytes, uint8 mask)
return popcnt;
}
-#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON)
+#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON) && !defined(USE_RISCV_ZBB_WITH_RUNTIME_CHECK)
/*
* When special CPU instructions are not available, there's no point in using
@@ -191,4 +191,5 @@ pg_popcount_masked_optimized(const char *buf, int bytes, uint8 mask)
return pg_popcount_masked_portable(buf, bytes, mask);
}
-#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON */
+#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON && !
+ * USE_RISCV_ZBB_WITH_RUNTIME_CHECK */
diff --git a/src/port/pg_popcount_riscv.c b/src/port/pg_popcount_riscv.c
new file mode 100644
index 00000000000..dce68d15c44
--- /dev/null
+++ b/src/port/pg_popcount_riscv.c
@@ -0,0 +1,183 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_popcount_riscv.c
+ * Holds the RISC-V Zbb popcount implementations.
+ *
+ * Copyright (c) 2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/port/pg_popcount_riscv.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#ifdef USE_RISCV_ZBB_WITH_RUNTIME_CHECK
+
+#if defined(__linux__)
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/*
+ * Try to pull in <asm/hwprobe.h> for RISCV_HWPROBE_* / struct riscv_hwprobe.
+ * On older kernel-headers packages (or non-RISC-V Linux distros configured
+ * without multiarch headers) the file may be absent; provide minimal
+ * fallback definitions so this file still builds. The runtime check below
+ * will gracefully report "unavailable" if the syscall fails.
+ */
+#if defined(__has_include)
+#if __has_include(<asm/hwprobe.h>)
+#include <asm/hwprobe.h>
+#define HAVE_ASM_HWPROBE_H 1
+#endif
+#endif
+
+#ifndef HAVE_ASM_HWPROBE_H
+struct riscv_hwprobe
+{
+ int64 key;
+ uint64 value;
+};
+#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
+#define RISCV_HWPROBE_EXT_ZBB (UINT64CONST(1) << 4)
+#endif
+
+#ifndef __NR_riscv_hwprobe
+#define __NR_riscv_hwprobe 258
+#endif
+#endif /* __linux__ */
+
+#include "port/pg_bitutils.h"
+
+/*
+ * Hardware implementation using RISC-V Zbb cpop instruction.
+ */
+static uint64 pg_popcount_zbb(const char *buf, int bytes);
+static uint64 pg_popcount_masked_zbb(const char *buf, int bytes, uint8 mask);
+
+/*
+ * The function pointers are initially set to "choose" functions. These
+ * functions will first set the pointers to the right implementations (based on
+ * what the current CPU supports) and then will call the pointer to fulfill the
+ * caller's request.
+ */
+static uint64 pg_popcount_choose(const char *buf, int bytes);
+static uint64 pg_popcount_masked_choose(const char *buf, int bytes, uint8 mask);
+uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
+uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, uint8 mask) = pg_popcount_masked_choose;
+
+static inline bool
+pg_popcount_zbb_available(void)
+{
+#if defined(__linux__)
+ struct riscv_hwprobe pair = {.key = RISCV_HWPROBE_KEY_IMA_EXT_0};
+
+ if (syscall(__NR_riscv_hwprobe, &pair, 1, 0, NULL, 0) != 0)
+ return false;
+
+ return (pair.value & RISCV_HWPROBE_EXT_ZBB) != 0;
+#else
+ return false;
+#endif
+}
+
+static inline void
+choose_popcount_functions(void)
+{
+ if (pg_popcount_zbb_available())
+ {
+ pg_popcount_optimized = pg_popcount_zbb;
+ pg_popcount_masked_optimized = pg_popcount_masked_zbb;
+ }
+ else
+ {
+ pg_popcount_optimized = pg_popcount_portable;
+ pg_popcount_masked_optimized = pg_popcount_masked_portable;
+ }
+}
+
+static uint64
+pg_popcount_choose(const char *buf, int bytes)
+{
+ choose_popcount_functions();
+ return pg_popcount_optimized(buf, bytes);
+}
+
+static uint64
+pg_popcount_masked_choose(const char *buf, int bytes, uint8 mask)
+{
+ choose_popcount_functions();
+ return pg_popcount_masked_optimized(buf, bytes, mask);
+}
+
+/*
+ * pg_popcount64_zbb
+ * Return the number of 1 bits set in word
+ *
+ * Uses the RISC-V Zbb 'cpop' (count population) instruction via
+ * __builtin_popcountll(). When compiled with -march=rv64gc_zbb, GCC and
+ * Clang will emit the cpop instruction for this builtin.
+ */
+static inline int
+pg_popcount64_zbb(uint64 word)
+{
+ return __builtin_popcountll(word);
+}
+
+/*
+ * pg_popcount_zbb
+ * Returns number of 1 bits in buf
+ *
+ * Similar approach to x86 SSE4.2 POPCNT: process data in 8-byte chunks using
+ * the cpop instruction, with byte-by-byte fallback for remaining data.
+ */
+static uint64
+pg_popcount_zbb(const char *buf, int bytes)
+{
+ uint64 popcnt = 0;
+ const uint64 *words = (const uint64 *) buf;
+
+ /* Process 8-byte chunks */
+ while (bytes >= 8)
+ {
+ popcnt += pg_popcount64_zbb(*words++);
+ bytes -= 8;
+ }
+
+ buf = (const char *) words;
+
+ /* Process any remaining bytes */
+ while (bytes--)
+ popcnt += pg_number_of_ones[(unsigned char) *buf++];
+
+ return popcnt;
+}
+
+/*
+ * pg_popcount_masked_zbb
+ * Returns number of 1 bits in buf after applying the mask to each byte
+ */
+static uint64
+pg_popcount_masked_zbb(const char *buf, int bytes, uint8 mask)
+{
+ uint64 popcnt = 0;
+ uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
+ const uint64 *words = (const uint64 *) buf;
+
+ /* Process 8-byte chunks */
+ while (bytes >= 8)
+ {
+ popcnt += pg_popcount64_zbb(*words++ & maskv);
+ bytes -= 8;
+ }
+
+ buf = (const char *) words;
+
+ /* Process any remaining bytes */
+ while (bytes--)
+ popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
+
+ return popcnt;
+}
+
+#endif /* USE_RISCV_ZBB_WITH_RUNTIME_CHECK */
--
2.51.2