vAB3-0001-Add-regression-test-for-ProcKill-lock-group-pro.patch
application/octet-stream
Filename: vAB3-0001-Add-regression-test-for-ProcKill-lock-group-pro.patch
Type: application/octet-stream
Part: 0
From 51d4335998d2bca24a3cb0a16c3655baaadf0f78 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Thu, 7 May 2026 11:29:09 +0500
Subject: [PATCH vAB3 1/2] Add regression test for ProcKill lock-group
procLatch recycle race
Two backends form a lock group via prockill_become_lock_group_leader /
prockill_become_lock_group_member (a new test module), then are terminated
concurrently. Injection points placed in ProcKill() right after
LWLockRelease(leader_lwlock) pause both victims there, letting the test
verify that a freshly forked backend can claim the recycled PGPROC slot
without hitting "latch already owned by PID ..." PANIC.
Standard observability (pg_stat_activity, BackendPidGetProc) is unavailable
at that point in ProcKill: pgstat_beshutdown_hook and RemoveProcFromArray,
both registered as on_shmem_exit callbacks, run before ProcKill. The new
prockill_backend_in_injection() helper reads PGPROC->wait_event_info from
ProcGlobal->allProcs directly, which remains valid until ProcKill zeroes
proc->pid near its end.
Injection waits must be attached from a controller session rather than from
the victims themselves. injection_points_attach() from the victim would
register a before_shmem_exit cleanup that detaches the point before ProcKill
runs. The new prockill_attach_injection_wait() calls InjectionPointAttach()
directly, leaving no such hook on the victims.
The test expects a fixed server and must land together with the matching
ProcKill fix.
---
src/test/modules/injection_points/Makefile | 5 +-
.../injection_point_condition.h | 25 ++++
.../injection_points--1.0.sql | 31 +++++
.../injection_points/injection_points.c | 26 +---
src/test/modules/injection_points/meson.build | 6 +
.../injection_points/regress_prockill.c | 115 ++++++++++++++++++
.../t/001_prockill_lockgroup.pl | 99 +++++++++++++++
7 files changed, 282 insertions(+), 25 deletions(-)
create mode 100644 src/test/modules/injection_points/injection_point_condition.h
create mode 100644 src/test/modules/injection_points/regress_prockill.c
create mode 100644 src/test/modules/injection_points/t/001_prockill_lockgroup.pl
diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile
index f057d143d1a..9b15adf7e60 100644
--- a/src/test/modules/injection_points/Makefile
+++ b/src/test/modules/injection_points/Makefile
@@ -4,7 +4,8 @@ MODULE_big = injection_points
OBJS = \
$(WIN32RES) \
injection_points.o \
- regress_injection.o
+ regress_injection.o \
+ regress_prockill.o
EXTENSION = injection_points
DATA = injection_points--1.0.sql
PGFILEDESC = "injection_points - facility for injection points"
@@ -12,6 +13,8 @@ PGFILEDESC = "injection_points - facility for injection points"
REGRESS = injection_points hashagg reindex_conc vacuum
REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
+TAP_TESTS = 1
+
ISOLATION = basic \
inplace \
repack \
diff --git a/src/test/modules/injection_points/injection_point_condition.h b/src/test/modules/injection_points/injection_point_condition.h
new file mode 100644
index 00000000000..a3200673eed
--- /dev/null
+++ b/src/test/modules/injection_points/injection_point_condition.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ * injection_point_condition.h
+ * Condition payload for injection_wait callbacks
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * src/test/modules/injection_points/injection_point_condition.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef INJECTION_POINT_CONDITION_H
+#define INJECTION_POINT_CONDITION_H
+
+typedef enum InjectionPointConditionType
+{
+ INJ_CONDITION_ALWAYS = 0,
+ INJ_CONDITION_PID,
+} InjectionPointConditionType;
+
+typedef struct InjectionPointCondition
+{
+ InjectionPointConditionType type;
+ int pid;
+} InjectionPointCondition;
+
+#endif /* INJECTION_POINT_CONDITION_H */
diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql
index 861c7355d4e..fa7afa9995e 100644
--- a/src/test/modules/injection_points/injection_points--1.0.sql
+++ b/src/test/modules/injection_points/injection_points--1.0.sql
@@ -108,3 +108,34 @@ CREATE FUNCTION removable_cutoff(rel regclass)
RETURNS xid8
AS 'MODULE_PATHNAME'
LANGUAGE C CALLED ON NULL INPUT;
+
+--
+-- regress_prockill.c functions
+--
+-- Form a lock group without parallel query so ProcKill lock-group teardown
+-- can be tested with injection points.
+--
+CREATE FUNCTION prockill_become_lock_group_leader()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION prockill_become_lock_group_member(leader_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Attach injection_wait to point_name scoped to target_pid. Must be called
+-- from a controller session, not from the victim itself (see source).
+CREATE FUNCTION prockill_attach_injection_wait(point_name text, target_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Return true if target_pid is currently waiting at the named injection point,
+-- read directly from its PGPROC slot (works inside ProcKill where
+-- pg_stat_activity and ProcArray are already torn down).
+CREATE FUNCTION prockill_backend_in_injection(target_pid int, point_name text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c
index 0f1af513673..fb7a7d477cc 100644
--- a/src/test/modules/injection_points/injection_points.c
+++ b/src/test/modules/injection_points/injection_points.c
@@ -34,36 +34,14 @@
#include "utils/tuplestore.h"
#include "utils/wait_event.h"
+#include "injection_point_condition.h"
+
PG_MODULE_MAGIC;
/* Maximum number of waits usable in injection points at once */
#define INJ_MAX_WAIT 8
#define INJ_NAME_MAXLEN 64
-/*
- * Conditions related to injection points. This tracks in shared memory the
- * runtime conditions under which an injection point is allowed to run,
- * stored as private_data when an injection point is attached, and passed as
- * argument to the callback.
- *
- * If more types of runtime conditions need to be tracked, this structure
- * should be expanded.
- */
-typedef enum InjectionPointConditionType
-{
- INJ_CONDITION_ALWAYS = 0, /* always run */
- INJ_CONDITION_PID, /* PID restriction */
-} InjectionPointConditionType;
-
-typedef struct InjectionPointCondition
-{
- /* Type of the condition */
- InjectionPointConditionType type;
-
- /* ID of the process where the injection point is allowed to run */
- int pid;
-} InjectionPointCondition;
-
/*
* List of injection points stored in TopMemoryContext attached
* locally to this process.
diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build
index fb1418e2caa..4ac9e54adb8 100644
--- a/src/test/modules/injection_points/meson.build
+++ b/src/test/modules/injection_points/meson.build
@@ -7,6 +7,7 @@ endif
injection_points_sources = files(
'injection_points.c',
'regress_injection.c',
+ 'regress_prockill.c',
)
if host_system == 'windows'
@@ -41,6 +42,11 @@ tests += {
# The injection points are cluster-wide, so disable installcheck
'runningcheck': false,
},
+ 'tap': {
+ 'tests': [
+ 't/001_prockill_lockgroup.pl',
+ ],
+ },
'isolation': {
'specs': [
'basic',
diff --git a/src/test/modules/injection_points/regress_prockill.c b/src/test/modules/injection_points/regress_prockill.c
new file mode 100644
index 00000000000..9720875d9e2
--- /dev/null
+++ b/src/test/modules/injection_points/regress_prockill.c
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * regress_prockill.c
+ * SQL helpers for the ProcKill lock-group TAP test
+ *
+ * Exposes lock-group formation without parallel query and helpers that
+ * observe victim backends while they are inside ProcKill().
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/injection_points/regress_prockill.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/injection_point.h"
+#include "utils/wait_event.h"
+
+#include "injection_point_condition.h"
+
+/* Read a uint32 field exactly once (matches waitfuncs.c idiom). */
+#define UINT32_ACCESS_ONCE(var) ((uint32) (*((volatile uint32 *) &(var))))
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader);
+
+Datum
+prockill_become_lock_group_leader(PG_FUNCTION_ARGS)
+{
+ BecomeLockGroupLeader();
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_member);
+
+Datum
+prockill_become_lock_group_member(PG_FUNCTION_ARGS)
+{
+ int leader_pid = PG_GETARG_INT32(0);
+ PGPROC *leader;
+
+ leader = BackendPidGetProc(leader_pid);
+ if (leader == NULL)
+ elog(ERROR, "backend with PID %d not found", leader_pid);
+
+ if (!BecomeLockGroupMember(leader, leader_pid))
+ elog(ERROR, "could not join lock group of backend %d", leader_pid);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Attach injection_wait to point_name, scoped to target_pid via
+ * INJ_CONDITION_PID.
+ *
+ * Must be called from a controller session that is not one of the victims.
+ * Using injection_points_attach() from the victim itself would register a
+ * before_shmem_exit(injection_points_cleanup) hook that fires before ProcKill
+ * (which is on_shmem_exit) and would detach the point before the victim ever
+ * reaches it. Calling InjectionPointAttach() directly from a separate
+ * controller session leaves no such hook on the victims.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait);
+
+Datum
+prockill_attach_injection_wait(PG_FUNCTION_ARGS)
+{
+ char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ int target_pid = PG_GETARG_INT32(1);
+ InjectionPointCondition cond = {INJ_CONDITION_PID, target_pid};
+
+ InjectionPointAttach(name, "injection_points", "injection_wait",
+ &cond, sizeof(cond));
+ PG_RETURN_VOID();
+}
+
+/*
+ * Return true if the backend with target_pid is currently waiting at the
+ * named injection point, by reading PGPROC->wait_event_info directly.
+ *
+ * At the injection points in ProcKill(), the standard observability surfaces
+ * (pg_stat_activity, BackendPidGetProc) are already unavailable: both
+ * pgstat_beshutdown_hook and RemoveProcFromArray run as on_shmem_exit
+ * callbacks before ProcKill. The PGPROC slot itself remains intact until
+ * ProcKill zeroes proc->pid near its end, so a direct allProcs scan works.
+ */
+PG_FUNCTION_INFO_V1(prockill_backend_in_injection);
+
+Datum
+prockill_backend_in_injection(PG_FUNCTION_ARGS)
+{
+ int target_pid = PG_GETARG_INT32(0);
+ char *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1));
+ uint32 n = ProcGlobal->allProcCount;
+
+ for (uint32 i = 0; i < n; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+ const char *ev;
+
+ if (proc->pid != target_pid)
+ continue;
+
+ ev = pgstat_get_wait_event(UINT32_ACCESS_ONCE(proc->wait_event_info));
+ PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
diff --git a/src/test/modules/injection_points/t/001_prockill_lockgroup.pl b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
new file mode 100644
index 00000000000..a2ead24cf10
--- /dev/null
+++ b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
@@ -0,0 +1,99 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Regression test for the ProcKill lock-group vs. procLatch recycle race.
+#
+# Two backends form a lock group, then are terminated concurrently. The test
+# uses injection points placed inside ProcKill() to pause both victims there
+# and verify that a freshly forked backend can claim the recycled PGPROC slot
+# without hitting "latch already owned by PID ..." PANIC.
+#
+# Requires --enable-injection-points.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if (($ENV{enable_injection_points} // '') ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('prockill_race');
+$node->init;
+$node->append_conf('postgresql.conf',
+ q{shared_preload_libraries = 'injection_points'});
+$node->start;
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+# Form a two-backend lock group.
+my $leader = $node->background_psql('postgres');
+my $follower = $node->background_psql('postgres');
+
+$leader->query_safe('SELECT prockill_become_lock_group_leader()');
+my $leader_pid = $leader->query_safe('SELECT pg_backend_pid()');
+my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()');
+$leader_pid =~ s/\s+//g;
+$follower_pid =~ s/\s+//g;
+
+$follower->query_safe("SELECT prockill_become_lock_group_member($leader_pid)");
+
+# Attach PID-scoped injection waits from a throwaway controller session, not
+# from $leader/$follower. injection_points_attach() via the victim would
+# register a before_shmem_exit cleanup that fires before ProcKill and detaches
+# the point mid-exit. prockill_attach_injection_wait() calls
+# InjectionPointAttach() directly, leaving no such hook on the victims.
+$node->safe_psql('postgres', qq{
+ SELECT prockill_attach_injection_wait('prockill-after-lockgroup-leader', $leader_pid);
+ SELECT prockill_attach_injection_wait('prockill-after-lockgroup-follower', $follower_pid);
+});
+
+# Kill the leader and wait for it to pause inside ProcKill.
+# prockill_backend_in_injection() reads PGPROC->wait_event_info directly
+# because pg_stat_activity and ProcArray are already torn down before ProcKill.
+$node->safe_psql('postgres', "SELECT pg_terminate_backend($leader_pid)");
+my $leader_ok = $node->poll_query_until(
+ 'postgres',
+ "SELECT prockill_backend_in_injection($leader_pid, 'prockill-after-lockgroup-leader')"
+);
+
+# Kill the follower and wait for it similarly. Use eval: if the fix has
+# regressed, the postmaster may have already PANICed.
+eval {
+ $node->safe_psql('postgres',
+ "SELECT pg_terminate_backend($follower_pid)");
+};
+my $follower_ok = $node->poll_query_until(
+ 'postgres',
+ "SELECT prockill_backend_in_injection($follower_pid, 'prockill-after-lockgroup-follower')"
+);
+
+# Release both victims.
+eval {
+ $node->safe_psql('postgres',
+ q{SELECT injection_points_wakeup('prockill-after-lockgroup-follower');
+ SELECT injection_points_wakeup('prockill-after-lockgroup-leader');});
+};
+
+# A new backend must be able to claim the recycled PGPROC slot without PANIC.
+my $select_ok = eval { $node->safe_psql('postgres', 'SELECT 1'); 1 };
+
+ok($leader_ok && $follower_ok,
+ 'leader and follower reached ProcKill injection points');
+ok($select_ok,
+ 'new backend claimed recycled PGPROC without latch-recycle PANIC');
+
+eval {
+ $node->safe_psql('postgres',
+ q{SELECT injection_points_detach('prockill-after-lockgroup-leader');
+ SELECT injection_points_detach('prockill-after-lockgroup-follower');});
+};
+
+eval { $leader->quit };
+eval { $follower->quit };
+$node->stop('fast', fail_ok => 1);
+
+done_testing();
--
2.50.1 (Apple Git-155)