v3-0001-Fix-bug-in-following-update-chain-when-locking-a-.patch
text/x-patch
Filename: v3-0001-Fix-bug-in-following-update-chain-when-locking-a-.patch
Type: text/x-patch
Part: 0
Message:
Re: Visibility bug in tuple lock
From e522298608362461c8348ed883633668ad9ff187 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Mon, 15 Dec 2025 18:44:23 +0200
Subject: [PATCH v3 1/1] Fix bug in following update chain when locking a heap
tuple
Author: Jasper Smit <jasper.smit@servicenow.com>
Discussion: https://www.postgresql.org/message-id/CAOG%2BRQ74x0q%3DkgBBQ%3DmezuvOeZBfSxM1qu_o0V28bwDz3dHxLw%40mail.gmail.com
---
src/backend/access/heap/heapam.c | 47 ++++++++----
src/test/modules/injection_points/Makefile | 3 +-
.../expected/heap_lock_update.out | 73 +++++++++++++++++++
src/test/modules/injection_points/meson.build | 1 +
.../specs/heap_lock_update.spec | 71 ++++++++++++++++++
5 files changed, 179 insertions(+), 16 deletions(-)
create mode 100644 src/test/modules/injection_points/expected/heap_lock_update.out
create mode 100644 src/test/modules/injection_points/specs/heap_lock_update.spec
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 225f9829f22..53b217ea50b 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -85,8 +85,11 @@ static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
LockTupleMode mode, bool is_update,
TransactionId *result_xmax, uint16 *result_infomask,
uint16 *result_infomask2);
-static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple,
- const ItemPointerData *ctid, TransactionId xid,
+static TM_Result heap_lock_updated_tuple(Relation rel,
+ uint16 prior_infomask,
+ TransactionId prior_rawxmax,
+ const ItemPointerData *prior_ctid,
+ TransactionId xid,
LockTupleMode mode);
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
uint16 *new_infomask2);
@@ -4816,11 +4819,12 @@ l3:
* If there are updates, follow the update chain; bail out if
* that cannot be done.
*/
- if (follow_updates && updated)
+ if (follow_updates && updated && !ItemPointerEquals(&tuple->t_self, &t_ctid))
{
TM_Result res;
- res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ res = heap_lock_updated_tuple(relation,
+ infomask, xwait, &t_ctid,
GetCurrentTransactionId(),
mode);
if (res != TM_Ok)
@@ -5063,11 +5067,13 @@ l3:
}
/* if there are updates, follow the update chain */
- if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+ !ItemPointerEquals(&tuple->t_self, &t_ctid))
{
TM_Result res;
- res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ res = heap_lock_updated_tuple(relation,
+ infomask, xwait, &t_ctid,
GetCurrentTransactionId(),
mode);
if (res != TM_Ok)
@@ -5721,7 +5727,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
* version as well.
*/
static TM_Result
-heap_lock_updated_tuple_rec(Relation rel, const ItemPointerData *tid, TransactionId xid,
+heap_lock_updated_tuple_rec(Relation rel, TransactionId priorXmax,
+ const ItemPointerData *tid, TransactionId xid,
LockTupleMode mode)
{
TM_Result result;
@@ -5734,7 +5741,6 @@ heap_lock_updated_tuple_rec(Relation rel, const ItemPointerData *tid, Transactio
old_infomask2;
TransactionId xmax,
new_xmax;
- TransactionId priorXmax = InvalidTransactionId;
bool cleared_all_frozen = false;
bool pinned_desired_page;
Buffer vmbuffer = InvalidBuffer;
@@ -6048,7 +6054,10 @@ out_unlocked:
* Follow update chain when locking an updated tuple, acquiring locks (row
* marks) on the updated versions.
*
- * The initial tuple is assumed to be already locked.
+ * 'priorInfomask', 'priorRawXmax' and 'priorCtid' are the corresponding
+ * fields from the initial tuple. We will update the tuples starting from the
+ * one that 'priorCtid' points to. Locking the initial tuple where those
+ * values came from is the caller's responsibility.
*
* This function doesn't check visibility, it just unconditionally marks the
* tuple(s) as locked. If any tuple in the updated chain is being deleted
@@ -6066,16 +6075,22 @@ out_unlocked:
* levels, because that would lead to a serializability failure.
*/
static TM_Result
-heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ctid,
+heap_lock_updated_tuple(Relation rel,
+ uint16 prior_infomask,
+ TransactionId prior_raw_xmax,
+ const ItemPointerData *prior_ctid,
TransactionId xid, LockTupleMode mode)
{
+ INJECTION_POINT("heap_lock_updated_tuple", NULL);
+
/*
- * If the tuple has not been updated, or has moved into another partition
- * (effectively a delete) stop here.
+ * If the tuple has moved into another partition (effectively a delete)
+ * stop here.
*/
- if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) &&
- !ItemPointerEquals(&tuple->t_self, ctid))
+ if (!ItemPointerIndicatesMovedPartitions(prior_ctid))
{
+ TransactionId prior_xmax;
+
/*
* If this is the first possibly-multixact-able operation in the
* current transaction, set my per-backend OldestMemberMXactId
@@ -6087,7 +6102,9 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ct
*/
MultiXactIdSetOldestMember();
- return heap_lock_updated_tuple_rec(rel, ctid, xid, mode);
+ prior_xmax = (prior_infomask & HEAP_XMAX_IS_MULTI) ?
+ MultiXactIdGetUpdateXid(prior_raw_xmax, prior_infomask) : prior_raw_xmax;
+ return heap_lock_updated_tuple_rec(rel, prior_xmax, prior_ctid, xid, mode);
}
/* nothing to lock */
diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile
index bfdb3f53377..cc9be6dcdd2 100644
--- a/src/test/modules/injection_points/Makefile
+++ b/src/test/modules/injection_points/Makefile
@@ -14,7 +14,8 @@ REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
ISOLATION = basic \
inplace \
- syscache-update-pruned
+ syscache-update-pruned \
+ heap_lock_update
# Temporarily disabled because of flakiness
#ISOLATION =+
diff --git a/src/test/modules/injection_points/expected/heap_lock_update.out b/src/test/modules/injection_points/expected/heap_lock_update.out
new file mode 100644
index 00000000000..9828e2b151b
--- /dev/null
+++ b/src/test/modules/injection_points/expected/heap_lock_update.out
@@ -0,0 +1,73 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1begin s1update s2lock s1abort vacuum reinsert wake
+step s1begin: begin;
+step s1update: update t set id = 1000 where id = 1;
+step s2lock: select * from t where id = 1 for update; <waiting ...>
+step s1abort: abort;
+step vacuum: VACUUM (TRUNCATE off);
+step reinsert:
+ insert into t values (453) returning ctid; -- Should be (2,1)
+ update t set id = 454 where id = 453 returning ctid;
+
+ctid
+-----
+(2,1)
+(1 row)
+
+ctid
+-----
+(2,2)
+(1 row)
+
+step wake:
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+ <waiting ...>
+step s2lock: <... completed>
+id
+--
+ 1
+(1 row)
+
+step wake: <... completed>
+
+starting permutation: s1begin s1update s2lock s1abort vacuum reinsert_and_lock wake
+step s1begin: begin;
+step s1update: update t set id = 1000 where id = 1;
+step s2lock: select * from t where id = 1 for update; <waiting ...>
+step s1abort: abort;
+step vacuum: VACUUM (TRUNCATE off);
+step reinsert_and_lock:
+ begin;
+ insert into t values (453) returning ctid; -- Should be (2,1)
+ select ctid, * from t where id = 1 for update;
+ commit;
+ update t set id = 454 where id = 453 returning ctid;
+
+ctid
+-----
+(2,1)
+(1 row)
+
+ctid |id
+-----+--
+(0,1)| 1
+(1 row)
+
+ctid
+-----
+(2,2)
+(1 row)
+
+step wake:
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+ <waiting ...>
+step s2lock: <... completed>
+id
+--
+ 1
+(1 row)
+
+step wake: <... completed>
diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build
index 493e11053dc..c9186ae04d1 100644
--- a/src/test/modules/injection_points/meson.build
+++ b/src/test/modules/injection_points/meson.build
@@ -46,6 +46,7 @@ tests += {
'basic',
'inplace',
'syscache-update-pruned',
+ 'heap_lock_update',
# temporarily disabled because of flakiness
# 'index-concurrently-upsert',
# 'index-concurrently-upsert-predicate',
diff --git a/src/test/modules/injection_points/specs/heap_lock_update.spec b/src/test/modules/injection_points/specs/heap_lock_update.spec
new file mode 100644
index 00000000000..93887312815
--- /dev/null
+++ b/src/test/modules/injection_points/specs/heap_lock_update.spec
@@ -0,0 +1,71 @@
+# Test race condition in tuple locking
+#
+# This is a reproducer for the bug reported at:
+# https://www.postgresql.org/message-id/CAOG%2BRQ74x0q%3DkgBBQ%3DmezuvOeZBfSxM1qu_o0V28bwDz3dHxLw%40mail.gmail.com
+
+setup
+{
+ CREATE EXTENSION injection_points;
+
+ create table t (id int);
+ insert into t (select generate_series(1, 452));
+}
+teardown
+{
+ drop table t;
+ DROP EXTENSION injection_points;
+}
+
+session s1
+step s1begin { begin; }
+step s1update { update t set id = 1000 where id = 1; }
+step s1abort { abort; }
+step vacuum { VACUUM (TRUNCATE off); }
+step reinsert {
+ insert into t values (453) returning ctid; -- Should be (2,1)
+ update t set id = 454 where id = 453 returning ctid;
+}
+
+# Same as 'reinsert', but we also stamp the original tuple with the
+# same 'xmax' as the re-inserted one.
+step reinsert_and_lock {
+ begin;
+ insert into t values (453) returning ctid; -- Should be (2,1)
+ select ctid, * from t where id = 1 for update;
+ commit;
+ update t set id = 454 where id = 453 returning ctid;
+}
+
+step wake {
+ SELECT FROM injection_points_detach('heap_lock_updated_tuple');
+ SELECT FROM injection_points_wakeup('heap_lock_updated_tuple');
+}
+
+session s2
+setup {
+ SELECT FROM injection_points_set_local();
+ SELECT FROM injection_points_attach('heap_lock_updated_tuple', 'wait');
+}
+step s2lock { select * from t where id = 1 for update; }
+
+# Basic case
+permutation
+ s1begin
+ s1update
+ s2lock
+ s1abort
+ vacuum
+ reinsert
+ wake(s2lock)
+
+# Variant where the re-inserted tuple is also locked by the inserting
+# transaction. This failed an earlier version of the fix during
+# development.
+permutation
+ s1begin
+ s1update
+ s2lock
+ s1abort
+ vacuum
+ reinsert_and_lock
+ wake(s2lock)
--
2.47.3