v20240111-0007-decode-all-sequence-relfilenodes.patch
text/x-patch
Filename: v20240111-0007-decode-all-sequence-relfilenodes.patch
Type: text/x-patch
Part: 6
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v20240111-0007
Subject: decode all sequence relfilenodes
| File | + | − |
|---|---|---|
| src/backend/replication/logical/decode.c | 6 | 25 |
| src/backend/replication/logical/reorderbuffer.c | 23 | 3 |
From 8f1598ac7795d70a9baecc3492b2930dcd828f35 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Thu, 11 Jan 2024 15:48:44 +0100
Subject: [PATCH v20240111 7/7] decode all sequence relfilenodes
---
src/backend/replication/logical/decode.c | 31 ++++---------------
.../replication/logical/reorderbuffer.c | 26 ++++++++++++++--
2 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 95f224df4a7..a141f444772 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -1432,27 +1432,12 @@ seq_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
* change as transactional and queue it. Otherwise it needs to be treated
* as non-transactional, in which case we just send it to the plugin right
* away.
- *
- * XXX This may incorrectly identify some changes as non-transactional,
- * if the relfilenode was created before the snapshot switched to FULL
- * (so that smgr_decode skipped it, and it's not in the hash table).
- * But we still handle this correctly, because the snapshot can't be
- * in CONSISTENT state yet, so the following check will skip the change
- * too, and we won't try to apply it (which would fail due to unknown
- * relfilenode).
*/
transactional = ReorderBufferSequenceIsTransactional(ctx->reorder,
target_locator,
NULL);
- /*
- * Skip the change if already processed (per the snapshot).
- *
- * XXX It's important to skip all non-transactional changes with snapshot
- * that is not consistent yet. The sequence change may be incorrectly
- * identified as non-transactional (see above), and we must not attempt
- * to process it.
- */
+ /* Skip the change if already processed (per the snapshot). */
if (transactional &&
!SnapBuildProcessChange(builder, xid, buf->origptr))
return;
@@ -1516,11 +1501,15 @@ seq_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
* We only care about XLOG_SMGR_CREATE records for "our" database (logical
* decoding is restricted to a single database), and we do the filtering
* and skipping, as appropriate.
+ *
+ * Note that the relfilenode is decoded and added to the reorder buffer even
+ * before the snapshot switches to SNAPBUILD_FULL_SNAPSHOT. This is necessary
+ * so that seq_decode can correctly identify transactional changes when the
+ * state changes to FULL after the relfilenode is created.
*/
void
smgr_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- SnapBuild *builder = ctx->snapshot_builder;
XLogReaderState *r = buf->record;
TransactionId xid = XLogRecGetXid(r);
uint8 info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
@@ -1542,14 +1531,6 @@ smgr_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr);
- /*
- * If we don't have snapshot or we are just fast-forwarding, there is no
- * point in decoding relfilenode information.
- */
- if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
- ctx->fast_forward)
- return;
-
/* only interested in our database */
xlrec = (xl_smgr_create *) XLogRecGetData(r);
if (xlrec->rlocator.dbOid != ctx->slot->data.database)
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index acaa6dce8e4..017fd9c09d7 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -1280,10 +1280,30 @@ ReorderBufferAddRelFileLocator(ReorderBuffer *rb, TransactionId xid,
&found);
/*
- * We've just decoded creation of the relfilenode, so if we found it
- * in the hash table, something is wrong.
+ * We've just decoded creation of the relfilenode, but it's possible a
+ * the transaction aborted without triggering a reorderbuffer cleanup,
+ * and the relfilenode was allocated again. While quite unlikely (the
+ * hash table entry should be cleaned when processing RUNNING_XACTS,
+ * and we should not reuse the relfilenode this fast).
+ *
+ * So if we find an entry for the relfilenode, we simply point it to
+ * the new transaction, but we need to remove it from the hash table
+ * in the original transaction (otherwise it'd confuse cleanup).
*/
- Assert(!found);
+ if (found)
+ {
+ /*
+ * The entry must be pointing to a transaction, and there must be
+ * must be a hash table of relfilenodes.
+ */
+ Assert(entry->txn);
+ Assert(entry->txn->sequences_hash);
+
+ if (hash_search(entry->txn->sequences_hash,
+ (void *) &entry->rlocator,
+ HASH_REMOVE, NULL) == NULL)
+ elog(ERROR, "hash table of sequence relfilenode corrupted");
+ }
entry->txn = txn;
--
2.43.0