v15-0003-Refactor-code-for-in-core-local-sequences.patch
text/x-diff
Filename: v15-0003-Refactor-code-for-in-core-local-sequences.patch
Type: text/x-diff
Part: 2
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v15-0003
Subject: Refactor code for in-core "local" sequences
| File | + | − |
|---|---|---|
| src/backend/access/rmgrdesc/Makefile | 1 | 1 |
| src/backend/access/rmgrdesc/meson.build | 1 | 1 |
| src/backend/access/rmgrdesc/seqlocaldesc.c | 10 | 10 |
| src/backend/access/sequence/Makefile | 1 | 1 |
| src/backend/access/sequence/meson.build | 2 | 0 |
| src/backend/access/sequence/seqlocalam.c | 634 | 0 |
| src/backend/access/sequence/seqlocalxlog.c | 82 | 0 |
| src/backend/access/transam/rmgr.c | 1 | 0 |
| src/backend/commands/sequence.c | 28 | 606 |
| src/bin/pg_waldump/.gitignore | 1 | 1 |
| src/bin/pg_waldump/rmgrdesc.c | 1 | 0 |
| src/bin/pg_waldump/t/001_basic.pl | 1 | 1 |
| src/include/access/rmgrlist.h | 1 | 1 |
| src/include/access/seqlocalam.h | 58 | 0 |
From b25345d4a709b40cec04f39d28b0c1f510d249ac Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 30 Apr 2025 08:26:43 +0900
Subject: [PATCH v15 3/7] Refactor code for in-core "local" sequences
This commit restructures the code of in-core sequences into a new set of
files:
- seqdesc.c is renamed to seqlocaldesc.c.
- seqlocalxlog.c to the code in the WAL replay logic.
- seqlocalam.c to store a set of routines called from sequence.c,
finishing the separation between the main sequence logic and the in-core
sequences.
- seqlocalam.h to store the AM-specific structures and routine related
to the in-core sequences.
WAL records are renamed to "SequenceLocal" with structures, variables
and file structures mapping to that.
---
src/include/access/rmgrlist.h | 2 +-
src/include/access/seqlocalam.h | 58 ++
src/backend/access/rmgrdesc/Makefile | 2 +-
src/backend/access/rmgrdesc/meson.build | 2 +-
.../rmgrdesc/{seqdesc.c => seqlocaldesc.c} | 20 +-
src/backend/access/sequence/Makefile | 2 +-
src/backend/access/sequence/meson.build | 2 +
src/backend/access/sequence/seqlocalam.c | 634 ++++++++++++++++++
src/backend/access/sequence/seqlocalxlog.c | 82 +++
src/backend/access/transam/rmgr.c | 1 +
src/backend/commands/sequence.c | 634 +-----------------
src/bin/pg_waldump/.gitignore | 2 +-
src/bin/pg_waldump/rmgrdesc.c | 1 +
src/bin/pg_waldump/t/001_basic.pl | 2 +-
14 files changed, 822 insertions(+), 622 deletions(-)
create mode 100644 src/include/access/seqlocalam.h
rename src/backend/access/rmgrdesc/{seqdesc.c => seqlocaldesc.c} (64%)
create mode 100644 src/backend/access/sequence/seqlocalam.c
create mode 100644 src/backend/access/sequence/seqlocalxlog.c
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 8e7fc9db8778..b942b25350bc 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -40,7 +40,7 @@ PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL)
PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL)
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL)
-PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL)
+PG_RMGR(RM_SEQ_LOCAL_ID, "SequenceLocal", seq_local_redo, seq_local_desc, seq_local_identify, NULL, NULL, seq_local_mask, NULL)
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL)
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL)
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL)
diff --git a/src/include/access/seqlocalam.h b/src/include/access/seqlocalam.h
new file mode 100644
index 000000000000..225fb9a2cbeb
--- /dev/null
+++ b/src/include/access/seqlocalam.h
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * seqlocalam.h
+ * Local sequence access method.
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/seqlocalam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SEQLOCALAM_H
+#define SEQLOCALAM_H
+
+#include "access/xlogreader.h"
+#include "storage/relfilelocator.h"
+#include "utils/rel.h"
+
+/* XLOG stuff */
+#define XLOG_SEQ_LOCAL_LOG 0x00
+
+typedef struct xl_seq_local_rec
+{
+ RelFileLocator locator;
+ /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */
+} xl_seq_local_rec;
+
+/*
+ * The "special area" of a local sequence's buffer page looks like this.
+ */
+#define SEQ_LOCAL_MAGIC 0x1717
+
+typedef struct seq_local_magic
+{
+ uint32 magic;
+} seq_local_magic;
+
+extern void seq_local_redo(XLogReaderState *record);
+extern void seq_local_desc(StringInfo buf, XLogReaderState *record);
+extern const char *seq_local_identify(uint8 info);
+extern void seq_local_mask(char *page, BlockNumber blkno);
+
+/* access routines */
+extern int64 seq_local_nextval(Relation rel, int64 incby, int64 maxv,
+ int64 minv, int64 cache, bool cycle,
+ int64 *last);
+extern const char *seq_local_get_table_am(void);
+extern void seq_local_init(Relation rel, int64 last_value, bool is_called);
+extern void seq_local_setval(Relation rel, int64 next, bool iscalled);
+extern void seq_local_reset(Relation rel, int64 startv, bool is_called,
+ bool reset_state);
+extern void seq_local_get_state(Relation rel, int64 *last_value,
+ bool *is_called);
+extern void seq_local_change_persistence(Relation rel,
+ char newrelpersistence);
+
+#endif /* SEQLOCALAM_H */
diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile
index cd95eec37f14..e5900ed77af5 100644
--- a/src/backend/access/rmgrdesc/Makefile
+++ b/src/backend/access/rmgrdesc/Makefile
@@ -24,7 +24,7 @@ OBJS = \
relmapdesc.o \
replorigindesc.o \
rmgrdesc_utils.o \
- seqdesc.o \
+ seqlocaldesc.o \
smgrdesc.o \
spgdesc.o \
standbydesc.o \
diff --git a/src/backend/access/rmgrdesc/meson.build b/src/backend/access/rmgrdesc/meson.build
index 96c98e800c22..6b3b3e1e44c1 100644
--- a/src/backend/access/rmgrdesc/meson.build
+++ b/src/backend/access/rmgrdesc/meson.build
@@ -17,7 +17,7 @@ rmgr_desc_sources = files(
'relmapdesc.c',
'replorigindesc.c',
'rmgrdesc_utils.c',
- 'seqdesc.c',
+ 'seqlocaldesc.c',
'smgrdesc.c',
'spgdesc.c',
'standbydesc.c',
diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqlocaldesc.c
similarity index 64%
rename from src/backend/access/rmgrdesc/seqdesc.c
rename to src/backend/access/rmgrdesc/seqlocaldesc.c
index 0d289d77fcf7..6ffbcb2c4735 100644
--- a/src/backend/access/rmgrdesc/seqdesc.c
+++ b/src/backend/access/rmgrdesc/seqlocaldesc.c
@@ -1,44 +1,44 @@
/*-------------------------------------------------------------------------
*
- * seqdesc.c
- * rmgr descriptor routines for commands/sequence.c
+ * seqlocaldesc.c
+ * rmgr descriptor routines for sequence/seqlocal.c
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * src/backend/access/rmgrdesc/seqdesc.c
+ * src/backend/access/rmgrdesc/seqlocaldesc.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "commands/sequence.h"
+#include "access/seqlocalam.h"
void
-seq_desc(StringInfo buf, XLogReaderState *record)
+seq_local_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
- xl_seq_rec *xlrec = (xl_seq_rec *) rec;
+ xl_seq_local_rec *xlrec = (xl_seq_local_rec *) rec;
- if (info == XLOG_SEQ_LOG)
+ if (info == XLOG_SEQ_LOCAL_LOG)
appendStringInfo(buf, "rel %u/%u/%u",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber);
}
const char *
-seq_identify(uint8 info)
+seq_local_identify(uint8 info)
{
const char *id = NULL;
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SEQ_LOG:
- id = "LOG";
+ case XLOG_SEQ_LOCAL_LOG:
+ id = "SEQ_LOCAL_LOG";
break;
}
diff --git a/src/backend/access/sequence/Makefile b/src/backend/access/sequence/Makefile
index 9f9d31f5425a..a15ceec1c0a0 100644
--- a/src/backend/access/sequence/Makefile
+++ b/src/backend/access/sequence/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/access/sequence
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = sequence.o
+OBJS = seqlocalam.o seqlocalxlog.o sequence.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/sequence/meson.build b/src/backend/access/sequence/meson.build
index ec9ab9b7e9db..8bc0e95e68c0 100644
--- a/src/backend/access/sequence/meson.build
+++ b/src/backend/access/sequence/meson.build
@@ -1,5 +1,7 @@
# Copyright (c) 2022-2025, PostgreSQL Global Development Group
backend_sources += files(
+ 'seqlocalam.c',
+ 'seqlocalxlog.c',
'sequence.c',
)
diff --git a/src/backend/access/sequence/seqlocalam.c b/src/backend/access/sequence/seqlocalam.c
new file mode 100644
index 000000000000..e019a6f5a95d
--- /dev/null
+++ b/src/backend/access/sequence/seqlocalam.c
@@ -0,0 +1,634 @@
+/*-------------------------------------------------------------------------
+ *
+ * seqlocalam.c
+ * Local sequence access manager
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/sequence/seqlocalam.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/multixact.h"
+#include "access/seqlocalam.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "access/xlogutils.h"
+#include "catalog/storage_xlog.h"
+#include "commands/tablecmds.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+
+
+/*
+ * We don't want to log each fetching of a value from a sequence,
+ * so we pre-log a few fetches in advance. In the event of
+ * crash we can lose (skip over) as many values as we pre-logged.
+ */
+#define SEQ_LOCAL_LOG_VALS 32
+
+/* Format of tuples stored in heap table associated to local sequences */
+typedef struct FormData_pg_seq_local_data
+{
+ int64 last_value;
+ int64 log_cnt;
+ bool is_called;
+} FormData_pg_seq_local_data;
+
+typedef FormData_pg_seq_local_data *Form_pg_seq_local_data;
+
+/*
+ * Columns of a local sequence relation
+ */
+#define SEQ_LOCAL_COL_LASTVAL 1
+#define SEQ_LOCAL_COL_LOG 2
+#define SEQ_LOCAL_COL_CALLED 3
+
+#define SEQ_LOCAL_COL_FIRSTCOL SEQ_LOCAL_COL_LASTVAL
+#define SEQ_LOCAL_COL_LASTCOL SEQ_LOCAL_COL_CALLED
+
+
+/*
+ * We don't want to log each fetching of a value from a sequence,
+ * so we pre-log a few fetches in advance. In the event of
+ * crash we can lose (skip over) as many values as we pre-logged.
+ */
+#define SEQ_LOCAL_LOG_VALS 32
+
+static Form_pg_seq_local_data read_seq_tuple(Relation rel,
+ Buffer *buf,
+ HeapTuple seqdatatuple);
+static void fill_seq_with_data(Relation rel, HeapTuple tuple);
+static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple,
+ ForkNumber forkNum);
+
+/*
+ * Given an opened sequence relation, lock the page buffer and find the tuple
+ *
+ * *buf receives the reference to the pinned-and-ex-locked buffer
+ * *seqdatatuple receives the reference to the sequence tuple proper
+ * (this arg should point to a local variable of type HeapTupleData)
+ *
+ * Function's return value points to the data payload of the tuple
+ */
+static Form_pg_seq_local_data
+read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
+{
+ Page page;
+ ItemId lp;
+ seq_local_magic *sm;
+ Form_pg_seq_local_data seq;
+
+ *buf = ReadBuffer(rel, 0);
+ LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
+
+ page = BufferGetPage(*buf);
+ sm = (seq_local_magic *) PageGetSpecialPointer(page);
+
+ if (sm->magic != SEQ_LOCAL_MAGIC)
+ elog(ERROR, "bad magic number in sequence \"%s\": %08X",
+ RelationGetRelationName(rel), sm->magic);
+
+ lp = PageGetItemId(page, FirstOffsetNumber);
+ Assert(ItemIdIsNormal(lp));
+
+ /* Note we currently only bother to set these two fields of *seqdatatuple */
+ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ seqdatatuple->t_len = ItemIdGetLength(lp);
+
+ /*
+ * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
+ * a sequence, which would leave a non-frozen XID in the sequence tuple's
+ * xmax, which eventually leads to clog access failures or worse. If we
+ * see this has happened, clean up after it. We treat this like a hint
+ * bit update, ie, don't bother to WAL-log it, since we can certainly do
+ * this again if the update gets lost.
+ */
+ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
+ {
+ HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+ seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
+ seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ MarkBufferDirtyHint(*buf, true);
+ }
+
+ seq = (Form_pg_seq_local_data) GETSTRUCT(seqdatatuple);
+
+ return seq;
+}
+
+/*
+ * Initialize a sequence's relation with the specified tuple as content
+ *
+ * This handles unlogged sequences by writing to both the main and the init
+ * fork as necessary.
+ */
+static void
+fill_seq_with_data(Relation rel, HeapTuple tuple)
+{
+ fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM);
+
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+ {
+ SMgrRelation srel;
+
+ srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER);
+ smgrcreate(srel, INIT_FORKNUM, false);
+ log_smgrcreate(&rel->rd_locator, INIT_FORKNUM);
+ fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
+ FlushRelationBuffers(rel);
+ smgrclose(srel);
+ }
+}
+
+/*
+ * Initialize a sequence's relation fork with the specified tuple as content
+ */
+static void
+fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
+{
+ Buffer buf;
+ Page page;
+ seq_local_magic *sm;
+ OffsetNumber offnum;
+
+ /* Initialize first page of relation with special magic number */
+
+ buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL,
+ EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
+ Assert(BufferGetBlockNumber(buf) == 0);
+
+ page = BufferGetPage(buf);
+
+ PageInit(page, BufferGetPageSize(buf), sizeof(seq_local_magic));
+ sm = (seq_local_magic *) PageGetSpecialPointer(page);
+ sm->magic = SEQ_LOCAL_MAGIC;
+
+ /* Now insert sequence tuple */
+
+ /*
+ * Since VACUUM does not process sequences, we have to force the tuple to
+ * have xmin = FrozenTransactionId now. Otherwise it would become
+ * invisible to SELECTs after 2G transactions. It is okay to do this
+ * because if the current transaction aborts, no other xact will ever
+ * examine the sequence tuple anyway.
+ */
+ HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
+ HeapTupleHeaderSetXminFrozen(tuple->t_data);
+ HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
+ HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
+
+ /* check the comment above nextval_internal()'s equivalent call. */
+ if (RelationNeedsWAL(rel))
+ GetTopTransactionId();
+
+ START_CRIT_SECTION();
+
+ MarkBufferDirty(buf);
+
+ offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len,
+ InvalidOffsetNumber, false, false);
+ if (offnum != FirstOffsetNumber)
+ elog(ERROR, "failed to add sequence tuple to page");
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM)
+ {
+ xl_seq_local_rec xlrec;
+ XLogRecPtr recptr;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ xlrec.locator = rel->rd_locator;
+
+ XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec));
+ XLogRegisterData(tuple->t_data, tuple->t_len);
+
+ recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+}
+
+/*
+ * seq_local_nextval()
+ *
+ * Allocate a new value for a local sequence, based on the sequence
+ * configuration.
+ */
+int64
+seq_local_nextval(Relation rel, int64 incby, int64 maxv,
+ int64 minv, int64 cache, bool cycle,
+ int64 *last)
+{
+ int64 result;
+ int64 fetch;
+ int64 next;
+ int64 rescnt = 0;
+ int64 log;
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ Form_pg_seq_local_data seq;
+ Page page;
+ bool logit = false;
+
+ /* lock page buffer and read tuple */
+ seq = read_seq_tuple(rel, &buf, &seqdatatuple);
+ page = BufferGetPage(buf);
+
+ *last = next = result = seq->last_value;
+ fetch = cache;
+ log = seq->log_cnt;
+
+ if (!seq->is_called)
+ {
+ rescnt++; /* return last_value if not is_called */
+ fetch--;
+ }
+
+ /*
+ * Decide whether we should emit a WAL log record. If so, force up the
+ * fetch count to grab SEQ_LOCAL_LOG_VALS more values than we actually
+ * need to cache. (These will then be usable without logging.)
+ *
+ * If this is the first nextval after a checkpoint, we must force a new
+ * WAL record to be written anyway, else replay starting from the
+ * checkpoint would fail to advance the sequence past the logged values.
+ * In this case we may as well fetch extra values.
+ */
+ if (log < fetch || !seq->is_called)
+ {
+ /* forced log to satisfy local demand for values */
+ fetch = log = fetch + SEQ_LOCAL_LOG_VALS;
+ logit = true;
+ }
+ else
+ {
+ XLogRecPtr redoptr = GetRedoRecPtr();
+
+ if (PageGetLSN(page) <= redoptr)
+ {
+ /* last update of seq was before checkpoint */
+ fetch = log = fetch + SEQ_LOCAL_LOG_VALS;
+ logit = true;
+ }
+ }
+
+ while (fetch) /* try to fetch cache [+ log ] numbers */
+ {
+ /*
+ * Check MAXVALUE for ascending sequences and MINVALUE for descending
+ * sequences
+ */
+ if (incby > 0)
+ {
+ /* ascending sequence */
+ if ((maxv >= 0 && next > maxv - incby) ||
+ (maxv < 0 && next + incby > maxv))
+ {
+ if (rescnt > 0)
+ break; /* stop fetching */
+ if (!cycle)
+ ereport(ERROR,
+ (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+ errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")",
+ RelationGetRelationName(rel),
+ maxv)));
+ next = minv;
+ }
+ else
+ next += incby;
+ }
+ else
+ {
+ /* descending sequence */
+ if ((minv < 0 && next < minv - incby) ||
+ (minv >= 0 && next + incby < minv))
+ {
+ if (rescnt > 0)
+ break; /* stop fetching */
+ if (!cycle)
+ ereport(ERROR,
+ (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+ errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")",
+ RelationGetRelationName(rel),
+ minv)));
+ next = maxv;
+ }
+ else
+ next += incby;
+ }
+ fetch--;
+ if (rescnt < cache)
+ {
+ log--;
+ rescnt++;
+ *last = next;
+ if (rescnt == 1) /* if it's first result - */
+ result = next; /* it's what to return */
+ }
+ }
+
+ log -= fetch; /* adjust for any unfetched numbers */
+ Assert(log >= 0);
+
+ /*
+ * If something needs to be WAL logged, acquire an xid, so this
+ * transaction's commit will trigger a WAL flush and wait for syncrep.
+ * It's sufficient to ensure the toplevel transaction has an xid, no need
+ * to assign xids subxacts, that'll already trigger an appropriate wait.
+ * (Have to do that here, so we're outside the critical section)
+ */
+ if (logit && RelationNeedsWAL(rel))
+ GetTopTransactionId();
+
+ /* ready to change the on-disk (or really, in-buffer) tuple */
+ START_CRIT_SECTION();
+
+ /*
+ * We must mark the buffer dirty before doing XLogInsert(); see notes in
+ * SyncOneBuffer(). However, we don't apply the desired changes just yet.
+ * This looks like a violation of the buffer update protocol, but it is in
+ * fact safe because we hold exclusive lock on the buffer. Any other
+ * process, including a checkpoint, that tries to examine the buffer
+ * contents will block until we release the lock, and then will see the
+ * final state that we install below.
+ */
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (logit && RelationNeedsWAL(rel))
+ {
+ xl_seq_local_rec xlrec;
+ XLogRecPtr recptr;
+
+ /*
+ * We don't log the current state of the tuple, but rather the state
+ * as it would appear after "log" more fetches. This lets us skip
+ * that many future WAL records, at the cost that we lose those
+ * sequence values if we crash.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ /* set values that will be saved in xlog */
+ seq->last_value = next;
+ seq->is_called = true;
+ seq->log_cnt = 0;
+
+ xlrec.locator = rel->rd_locator;
+
+ XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec));
+ XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len);
+
+ recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ /* Now update sequence tuple to the intended final state */
+ seq->last_value = *last; /* last fetched number */
+ seq->is_called = true;
+ seq->log_cnt = log; /* how much is logged */
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+
+ return result;
+}
+
+/*
+ * seq_local_get_table_am()
+ *
+ * Return the table access method used by this sequence.
+ */
+const char *
+seq_local_get_table_am(void)
+{
+ return "heap";
+}
+
+/*
+ * seq_local_init()
+ *
+ * Add the sequence attributes to the relation created for this sequence
+ * AM and insert a tuple of metadata into the sequence relation, based on
+ * the information guessed from pg_sequences. This is the first tuple
+ * inserted after the relation has been created, filling in its heap
+ * table.
+ */
+void
+seq_local_init(Relation rel, int64 last_value, bool is_called)
+{
+ Datum value[SEQ_LOCAL_COL_LASTCOL];
+ bool null[SEQ_LOCAL_COL_LASTCOL];
+ List *elts = NIL;
+ List *atcmds = NIL;
+ ListCell *lc;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+
+ /*
+ * Create relation (and fill value[] and null[] for the initial tuple).
+ */
+ for (int i = SEQ_LOCAL_COL_FIRSTCOL; i <= SEQ_LOCAL_COL_LASTCOL; i++)
+ {
+ ColumnDef *coldef = NULL;
+
+ switch (i)
+ {
+ case SEQ_LOCAL_COL_LASTVAL:
+ coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid);
+ value[i - 1] = Int64GetDatumFast(last_value);
+ break;
+ case SEQ_LOCAL_COL_LOG:
+ coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid);
+ value[i - 1] = Int64GetDatum(0);
+ break;
+ case SEQ_LOCAL_COL_CALLED:
+ coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid);
+ value[i - 1] = BoolGetDatum(is_called);
+ break;
+ }
+
+ coldef->is_not_null = true;
+ null[i - 1] = false;
+ elts = lappend(elts, coldef);
+ }
+
+ /* Add all the attributes to the sequence */
+ foreach(lc, elts)
+ {
+ AlterTableCmd *atcmd;
+
+ atcmd = makeNode(AlterTableCmd);
+ atcmd->subtype = AT_AddColumnToSequence;
+ atcmd->def = (Node *) lfirst(lc);
+ atcmds = lappend(atcmds, atcmd);
+ }
+
+ /*
+ * No recursion needed. Note that EventTriggerAlterTableStart() should
+ * have been called.
+ */
+ AlterTableInternal(RelationGetRelid(rel), atcmds, false);
+ CommandCounterIncrement();
+
+ tupdesc = RelationGetDescr(rel);
+ tuple = heap_form_tuple(tupdesc, value, null);
+ fill_seq_with_data(rel, tuple);
+}
+
+/*
+ * seq_local_setval()
+ *
+ * Callback for setval().
+ */
+void
+seq_local_setval(Relation rel, int64 next, bool iscalled)
+{
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ Form_pg_seq_local_data seq;
+
+ /* lock page buffer and read tuple */
+ seq = read_seq_tuple(rel, &buf, &seqdatatuple);
+
+ /* ready to change the on-disk (or really, in-buffer) tuple */
+ START_CRIT_SECTION();
+ seq->last_value = next; /* last fetched number */
+ seq->is_called = iscalled;
+ seq->log_cnt = 0;
+
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(rel))
+ {
+ xl_seq_local_rec xlrec;
+ XLogRecPtr recptr;
+ Page page = BufferGetPage(buf);
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ xlrec.locator = rel->rd_locator;
+ XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec));
+ XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len);
+
+ recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+}
+
+/*
+ * seq_local_reset()
+ *
+ * Perform a hard reset on the local sequence, rewriting its heap data
+ * entirely.
+ */
+void
+seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state)
+{
+ Form_pg_seq_local_data seq;
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ HeapTuple tuple;
+
+ /* lock buffer page and read tuple */
+ (void) read_seq_tuple(rel, &buf, &seqdatatuple);
+
+ /*
+ * Copy the existing sequence tuple.
+ */
+ tuple = heap_copytuple(&seqdatatuple);
+
+ /* Now we're done with the old page */
+ UnlockReleaseBuffer(buf);
+
+ /*
+ * Modify the copied tuple to execute the restart (compare the RESTART
+ * action in AlterSequence)
+ */
+ seq = (Form_pg_seq_local_data) GETSTRUCT(tuple);
+ seq->last_value = startv;
+ seq->is_called = is_called;
+ if (reset_state)
+ seq->log_cnt = 0;
+
+ /*
+ * Create a new storage file for the sequence.
+ */
+ RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence);
+
+ /*
+ * Ensure sequence's relfrozenxid is at 0, since it won't contain any
+ * unfrozen XIDs. Same with relminmxid, since a sequence will never
+ * contain multixacts.
+ */
+ Assert(rel->rd_rel->relfrozenxid == InvalidTransactionId);
+ Assert(rel->rd_rel->relminmxid == InvalidMultiXactId);
+
+ /*
+ * Insert the modified tuple into the new storage file.
+ */
+ fill_seq_with_data(rel, tuple);
+}
+
+/*
+ * seq_local_get_state()
+ *
+ * Retrieve the state of a local sequence.
+ */
+void
+seq_local_get_state(Relation rel, int64 *last_value, bool *is_called)
+{
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ Form_pg_seq_local_data seq;
+
+ /* lock page buffer and read tuple */
+ seq = read_seq_tuple(rel, &buf, &seqdatatuple);
+
+ *last_value = seq->last_value;
+ *is_called = seq->is_called;
+
+ UnlockReleaseBuffer(buf);
+}
+
+/*
+ * seq_local_change_persistence()
+ *
+ * Persistence change for the local sequence Relation.
+ */
+void
+seq_local_change_persistence(Relation rel, char newrelpersistence)
+{
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+
+ (void) read_seq_tuple(rel, &buf, &seqdatatuple);
+ RelationSetNewRelfilenumber(rel, newrelpersistence);
+ fill_seq_with_data(rel, &seqdatatuple);
+ UnlockReleaseBuffer(buf);
+}
diff --git a/src/backend/access/sequence/seqlocalxlog.c b/src/backend/access/sequence/seqlocalxlog.c
new file mode 100644
index 000000000000..db0ad969dbf2
--- /dev/null
+++ b/src/backend/access/sequence/seqlocalxlog.c
@@ -0,0 +1,82 @@
+/*-------------------------------------------------------------------------
+ *
+ * seqlocalxlog.c
+ * WAL replay logic for local sequence access manager
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/sequence/seqlocalxlog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/bufmask.h"
+#include "access/seqlocalam.h"
+#include "access/xlogutils.h"
+#include "storage/block.h"
+
+/*
+ * Mask a Sequence page before performing consistency checks on it.
+ */
+void
+seq_local_mask(char *page, BlockNumber blkno)
+{
+ mask_page_lsn_and_checksum(page);
+
+ mask_unused_space(page);
+}
+
+void
+seq_local_redo(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ Buffer buffer;
+ Page page;
+ Page localpage;
+ char *item;
+ Size itemsz;
+ xl_seq_local_rec *xlrec = (xl_seq_local_rec *) XLogRecGetData(record);
+ seq_local_magic *sm;
+
+ if (info != XLOG_SEQ_LOCAL_LOG)
+ elog(PANIC, "seq_redo: unknown op code %u", info);
+
+ buffer = XLogInitBufferForRedo(record, 0);
+ page = (Page) BufferGetPage(buffer);
+
+ /*
+ * We always reinit the page. However, since this WAL record type is also
+ * used for updating sequences, it's possible that a hot-standby backend
+ * is examining the page concurrently; so we mustn't transiently trash the
+ * buffer. The solution is to build the correct new page contents in
+ * local workspace and then memcpy into the buffer. Then only bytes that
+ * are supposed to change will change, even transiently. We must palloc
+ * the local page for alignment reasons.
+ */
+ localpage = (Page) palloc(BufferGetPageSize(buffer));
+
+ PageInit(localpage, BufferGetPageSize(buffer), sizeof(seq_local_magic));
+ sm = (seq_local_magic *) PageGetSpecialPointer(localpage);
+ sm->magic = SEQ_LOCAL_MAGIC;
+
+ item = (char *) xlrec + sizeof(xl_seq_local_rec);
+ itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_local_rec);
+
+ if (PageAddItem(localpage, (Item) item, itemsz,
+ FirstOffsetNumber, false, false) == InvalidOffsetNumber)
+ elog(PANIC, "seq_local_redo: failed to add item to page");
+
+ PageSetLSN(localpage, lsn);
+
+ memcpy(page, localpage, BufferGetPageSize(buffer));
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+
+ pfree(localpage);
+}
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index 1b7499726eb0..cc92268937b1 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -27,6 +27,7 @@
#include "access/gistxlog.h"
#include "access/hash_xlog.h"
#include "access/heapam_xlog.h"
+#include "access/seqlocalam.h"
#include "access/multixact.h"
#include "access/nbtxlog.h"
#include "access/spgxlog.h"
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 20e8978c9346..15ea0e24970c 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -16,6 +16,7 @@
#include "access/bufmask.h"
#include "access/htup_details.h"
+#include "access/seqlocalam.h"
#include "access/multixact.h"
#include "access/relation.h"
#include "access/sequence.h"
@@ -50,23 +51,6 @@
#include "utils/varlena.h"
-/*
- * We don't want to log each fetching of a value from a sequence,
- * so we pre-log a few fetches in advance. In the event of
- * crash we can lose (skip over) as many values as we pre-logged.
- */
-#define SEQ_LOG_VALS 32
-
-/*
- * The "special area" of a sequence's buffer page looks like this.
- */
-#define SEQ_MAGIC 0x1717
-
-typedef struct sequence_magic
-{
- uint32 magic;
-} sequence_magic;
-
/*
* We store a SeqTable item for every sequence we have touched in the current
* session. This is needed to hold onto nextval/currval state. (We can't
@@ -96,13 +80,9 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */
*/
static SeqTableData *last_used_seq = NULL;
-static void fill_seq_with_data(Relation rel, HeapTuple tuple);
-static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum);
static Relation lock_and_open_sequence(SeqTable seq);
static void create_seq_hashtable(void);
static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
-static Form_pg_sequence_data read_seq_tuple(Relation rel,
- Buffer *buf, HeapTuple seqdatatuple);
static void init_params(ParseState *pstate, List *options, bool for_identity,
bool isInit,
Form_pg_sequence seqform,
@@ -134,14 +114,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
Relation rel;
HeapTuple tuple;
TupleDesc tupDesc;
- Datum value[SEQ_COL_LASTCOL];
- bool null[SEQ_COL_LASTCOL];
- List *elts = NIL;
- List *atcmds = NIL;
- ListCell *lc;
Datum pgs_values[Natts_pg_sequence];
bool pgs_nulls[Natts_pg_sequence];
- int i;
/*
* If if_not_exists was given and a relation with the same name already
@@ -174,35 +148,6 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
&seqform, &last_value, &reset_state, &is_called,
&need_seq_rewrite, &owned_by);
- /*
- * Create relation (and fill value[] and null[] for the tuple)
- */
- for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++)
- {
- ColumnDef *coldef = NULL;
-
- switch (i)
- {
- case SEQ_COL_LASTVAL:
- coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid);
- value[i - 1] = Int64GetDatumFast(last_value);
- break;
- case SEQ_COL_LOG:
- coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid);
- value[i - 1] = Int64GetDatum((int64) 0);
- break;
- case SEQ_COL_CALLED:
- coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid);
- value[i - 1] = BoolGetDatum(false);
- break;
- }
-
- coldef->is_not_null = true;
- null[i - 1] = false;
-
- elts = lappend(elts, coldef);
- }
-
stmt->relation = seq->sequence;
stmt->inhRelations = NIL;
stmt->constraints = NIL;
@@ -215,35 +160,20 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
*/
stmt->tableElts = NIL;
+ /*
+ * Initial relation has no attributes, these can be added later via the
+ * "init" AM callback.
+ */
+ stmt->tableElts = NIL;
+
address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
seqoid = address.objectId;
Assert(seqoid != InvalidOid);
rel = sequence_open(seqoid, AccessExclusiveLock);
- /* Add all the attributes to the sequence */
- foreach(lc, elts)
- {
- AlterTableCmd *atcmd;
-
- atcmd = makeNode(AlterTableCmd);
- atcmd->subtype = AT_AddColumnToSequence;
- atcmd->def = (Node *) lfirst(lc);
- atcmds = lappend(atcmds, atcmd);
- }
-
- /*
- * No recursion needed. Note that EventTriggerAlterTableStart() should
- * have been called.
- */
- AlterTableInternal(RelationGetRelid(rel), atcmds, false);
- CommandCounterIncrement();
-
- tupDesc = RelationGetDescr(rel);
-
- /* now initialize the sequence's data */
- tuple = heap_form_tuple(tupDesc, value, null);
- fill_seq_with_data(rel, tuple);
+ /* now initialize the sequence table structure and its data */
+ seq_local_init(rel, last_value, is_called);
/* process OWNED BY if given */
if (owned_by)
@@ -292,10 +222,6 @@ ResetSequence(Oid seq_relid)
{
Relation seq_rel;
SeqTable elm;
- Form_pg_sequence_data seq;
- Buffer buf;
- HeapTupleData seqdatatuple;
- HeapTuple tuple;
HeapTuple pgstuple;
Form_pg_sequence pgsform;
int64 startv;
@@ -306,7 +232,6 @@ ResetSequence(Oid seq_relid)
* indeed a sequence.
*/
init_sequence(seq_relid, &elm, &seq_rel);
- (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple);
pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid));
if (!HeapTupleIsValid(pgstuple))
@@ -315,40 +240,8 @@ ResetSequence(Oid seq_relid)
startv = pgsform->seqstart;
ReleaseSysCache(pgstuple);
- /*
- * Copy the existing sequence tuple.
- */
- tuple = heap_copytuple(&seqdatatuple);
-
- /* Now we're done with the old page */
- UnlockReleaseBuffer(buf);
-
- /*
- * Modify the copied tuple to execute the restart (compare the RESTART
- * action in AlterSequence)
- */
- seq = (Form_pg_sequence_data) GETSTRUCT(tuple);
- seq->last_value = startv;
- seq->is_called = false;
- seq->log_cnt = 0;
-
- /*
- * Create a new storage file for the sequence.
- */
- RelationSetNewRelfilenumber(seq_rel, seq_rel->rd_rel->relpersistence);
-
- /*
- * Ensure sequence's relfrozenxid is at 0, since it won't contain any
- * unfrozen XIDs. Same with relminmxid, since a sequence will never
- * contain multixacts.
- */
- Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId);
- Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId);
-
- /*
- * Insert the modified tuple into the new storage file.
- */
- fill_seq_with_data(seq_rel, tuple);
+ /* Sequence state is forcibly reset here. */
+ seq_local_reset(seq_rel, startv, false, true);
/* Clear local cache so that we don't think we have cached numbers */
/* Note that we do not change the currval() state */
@@ -357,106 +250,6 @@ ResetSequence(Oid seq_relid)
sequence_close(seq_rel, NoLock);
}
-/*
- * Initialize a sequence's relation with the specified tuple as content
- *
- * This handles unlogged sequences by writing to both the main and the init
- * fork as necessary.
- */
-static void
-fill_seq_with_data(Relation rel, HeapTuple tuple)
-{
- fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM);
-
- if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
- {
- SMgrRelation srel;
-
- srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER);
- smgrcreate(srel, INIT_FORKNUM, false);
- log_smgrcreate(&rel->rd_locator, INIT_FORKNUM);
- fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
- FlushRelationBuffers(rel);
- smgrclose(srel);
- }
-}
-
-/*
- * Initialize a sequence's relation fork with the specified tuple as content
- */
-static void
-fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
-{
- Buffer buf;
- Page page;
- sequence_magic *sm;
- OffsetNumber offnum;
-
- /* Initialize first page of relation with special magic number */
-
- buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL,
- EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
- Assert(BufferGetBlockNumber(buf) == 0);
-
- page = BufferGetPage(buf);
-
- PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
- sm = (sequence_magic *) PageGetSpecialPointer(page);
- sm->magic = SEQ_MAGIC;
-
- /* Now insert sequence tuple */
-
- /*
- * Since VACUUM does not process sequences, we have to force the tuple to
- * have xmin = FrozenTransactionId now. Otherwise it would become
- * invisible to SELECTs after 2G transactions. It is okay to do this
- * because if the current transaction aborts, no other xact will ever
- * examine the sequence tuple anyway.
- */
- HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
- HeapTupleHeaderSetXminFrozen(tuple->t_data);
- HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
- tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
- ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
-
- /* check the comment above nextval_internal()'s equivalent call. */
- if (RelationNeedsWAL(rel))
- GetTopTransactionId();
-
- START_CRIT_SECTION();
-
- MarkBufferDirty(buf);
-
- offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len,
- InvalidOffsetNumber, false, false);
- if (offnum != FirstOffsetNumber)
- elog(ERROR, "failed to add sequence tuple to page");
-
- /* XLOG stuff */
- if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM)
- {
- xl_seq_rec xlrec;
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
-
- xlrec.locator = rel->rd_locator;
-
- XLogRegisterData(&xlrec, sizeof(xl_seq_rec));
- XLogRegisterData(tuple->t_data, tuple->t_len);
-
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
-
- PageSetLSN(page, recptr);
- }
-
- END_CRIT_SECTION();
-
- UnlockReleaseBuffer(buf);
-}
-
/*
* AlterSequence
*
@@ -468,10 +261,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
Oid relid;
SeqTable elm;
Relation seqrel;
- Buffer buf;
- HeapTupleData datatuple;
Form_pg_sequence seqform;
- Form_pg_sequence_data newdataform;
bool need_seq_rewrite;
List *owned_by;
ObjectAddress address;
@@ -480,7 +270,6 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
bool reset_state = false;
bool is_called;
int64 last_value;
- HeapTuple newdatatuple;
/* Open and lock sequence, and check for ownership along the way. */
relid = RangeVarGetRelidExtended(stmt->sequence,
@@ -507,16 +296,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
seqform = (Form_pg_sequence) GETSTRUCT(seqtuple);
- /* lock page buffer and read tuple into new sequence structure */
- (void) read_seq_tuple(seqrel, &buf, &datatuple);
-
- /* copy the existing sequence data tuple, so it can be modified locally */
- newdatatuple = heap_copytuple(&datatuple);
- newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple);
- last_value = newdataform->last_value;
- is_called = newdataform->is_called;
-
- UnlockReleaseBuffer(buf);
+ /* Read sequence data */
+ seq_local_get_state(seqrel, &last_value, &is_called);
/* Check and set new values */
init_params(pstate, stmt->options, stmt->for_identity, false,
@@ -526,32 +307,10 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
/* If needed, rewrite the sequence relation itself */
if (need_seq_rewrite)
{
- /* check the comment above nextval_internal()'s equivalent call. */
if (RelationNeedsWAL(seqrel))
GetTopTransactionId();
- /*
- * Create a new storage file for the sequence, making the state
- * changes transactional.
- */
- RelationSetNewRelfilenumber(seqrel, seqrel->rd_rel->relpersistence);
-
- /*
- * Ensure sequence's relfrozenxid is at 0, since it won't contain any
- * unfrozen XIDs. Same with relminmxid, since a sequence will never
- * contain multixacts.
- */
- Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId);
- Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId);
-
- /*
- * Insert the modified tuple into the new storage file.
- */
- newdataform->last_value = last_value;
- newdataform->is_called = is_called;
- if (reset_state)
- newdataform->log_cnt = 0;
- fill_seq_with_data(seqrel, newdatatuple);
+ seq_local_reset(seqrel, last_value, is_called, reset_state);
}
/* Clear local cache so that we don't think we have cached numbers */
@@ -580,8 +339,6 @@ SequenceChangePersistence(Oid relid, char newrelpersistence)
{
SeqTable elm;
Relation seqrel;
- Buffer buf;
- HeapTupleData seqdatatuple;
/*
* ALTER SEQUENCE acquires this lock earlier. If we're processing an
@@ -596,10 +353,7 @@ SequenceChangePersistence(Oid relid, char newrelpersistence)
if (RelationNeedsWAL(seqrel))
GetTopTransactionId();
- (void) read_seq_tuple(seqrel, &buf, &seqdatatuple);
- RelationSetNewRelfilenumber(seqrel, newrelpersistence);
- fill_seq_with_data(seqrel, &seqdatatuple);
- UnlockReleaseBuffer(buf);
+ seq_local_change_persistence(seqrel, newrelpersistence);
sequence_close(seqrel, NoLock);
}
@@ -662,24 +416,15 @@ nextval_internal(Oid relid, bool check_permissions)
{
SeqTable elm;
Relation seqrel;
- Buffer buf;
- Page page;
HeapTuple pgstuple;
Form_pg_sequence pgsform;
- HeapTupleData seqdatatuple;
- Form_pg_sequence_data seq;
int64 incby,
maxv,
minv,
cache,
- log,
- fetch,
last;
- int64 result,
- next,
- rescnt = 0;
+ int64 result;
bool cycle;
- bool logit = false;
/* open and lock sequence */
init_sequence(relid, &elm, &seqrel);
@@ -724,105 +469,9 @@ nextval_internal(Oid relid, bool check_permissions)
cycle = pgsform->seqcycle;
ReleaseSysCache(pgstuple);
- /* lock page buffer and read tuple */
- seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
- page = BufferGetPage(buf);
-
- last = next = result = seq->last_value;
- fetch = cache;
- log = seq->log_cnt;
-
- if (!seq->is_called)
- {
- rescnt++; /* return last_value if not is_called */
- fetch--;
- }
-
- /*
- * Decide whether we should emit a WAL log record. If so, force up the
- * fetch count to grab SEQ_LOG_VALS more values than we actually need to
- * cache. (These will then be usable without logging.)
- *
- * If this is the first nextval after a checkpoint, we must force a new
- * WAL record to be written anyway, else replay starting from the
- * checkpoint would fail to advance the sequence past the logged values.
- * In this case we may as well fetch extra values.
- */
- if (log < fetch || !seq->is_called)
- {
- /* forced log to satisfy local demand for values */
- fetch = log = fetch + SEQ_LOG_VALS;
- logit = true;
- }
- else
- {
- XLogRecPtr redoptr = GetRedoRecPtr();
-
- if (PageGetLSN(page) <= redoptr)
- {
- /* last update of seq was before checkpoint */
- fetch = log = fetch + SEQ_LOG_VALS;
- logit = true;
- }
- }
-
- while (fetch) /* try to fetch cache [+ log ] numbers */
- {
- /*
- * Check MAXVALUE for ascending sequences and MINVALUE for descending
- * sequences
- */
- if (incby > 0)
- {
- /* ascending sequence */
- if ((maxv >= 0 && next > maxv - incby) ||
- (maxv < 0 && next + incby > maxv))
- {
- if (rescnt > 0)
- break; /* stop fetching */
- if (!cycle)
- ereport(ERROR,
- (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
- errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")",
- RelationGetRelationName(seqrel),
- maxv)));
- next = minv;
- }
- else
- next += incby;
- }
- else
- {
- /* descending sequence */
- if ((minv < 0 && next < minv - incby) ||
- (minv >= 0 && next + incby < minv))
- {
- if (rescnt > 0)
- break; /* stop fetching */
- if (!cycle)
- ereport(ERROR,
- (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
- errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")",
- RelationGetRelationName(seqrel),
- minv)));
- next = maxv;
- }
- else
- next += incby;
- }
- fetch--;
- if (rescnt < cache)
- {
- log--;
- rescnt++;
- last = next;
- if (rescnt == 1) /* if it's first result - */
- result = next; /* it's what to return */
- }
- }
-
- log -= fetch; /* adjust for any unfetched numbers */
- Assert(log >= 0);
+ /* retrieve next value from the access method */
+ result = seq_local_nextval(seqrel, incby, maxv, minv, cache, cycle,
+ &last);
/* save info in local cache */
elm->increment = incby;
@@ -832,69 +481,6 @@ nextval_internal(Oid relid, bool check_permissions)
last_used_seq = elm;
- /*
- * If something needs to be WAL logged, acquire an xid, so this
- * transaction's commit will trigger a WAL flush and wait for syncrep.
- * It's sufficient to ensure the toplevel transaction has an xid, no need
- * to assign xids subxacts, that'll already trigger an appropriate wait.
- * (Have to do that here, so we're outside the critical section)
- */
- if (logit && RelationNeedsWAL(seqrel))
- GetTopTransactionId();
-
- /* ready to change the on-disk (or really, in-buffer) tuple */
- START_CRIT_SECTION();
-
- /*
- * We must mark the buffer dirty before doing XLogInsert(); see notes in
- * SyncOneBuffer(). However, we don't apply the desired changes just yet.
- * This looks like a violation of the buffer update protocol, but it is in
- * fact safe because we hold exclusive lock on the buffer. Any other
- * process, including a checkpoint, that tries to examine the buffer
- * contents will block until we release the lock, and then will see the
- * final state that we install below.
- */
- MarkBufferDirty(buf);
-
- /* XLOG stuff */
- if (logit && RelationNeedsWAL(seqrel))
- {
- xl_seq_rec xlrec;
- XLogRecPtr recptr;
-
- /*
- * We don't log the current state of the tuple, but rather the state
- * as it would appear after "log" more fetches. This lets us skip
- * that many future WAL records, at the cost that we lose those
- * sequence values if we crash.
- */
- XLogBeginInsert();
- XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
-
- /* set values that will be saved in xlog */
- seq->last_value = next;
- seq->is_called = true;
- seq->log_cnt = 0;
-
- xlrec.locator = seqrel->rd_locator;
-
- XLogRegisterData(&xlrec, sizeof(xl_seq_rec));
- XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len);
-
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
-
- PageSetLSN(page, recptr);
- }
-
- /* Now update sequence tuple to the intended final state */
- seq->last_value = last; /* last fetched number */
- seq->is_called = true;
- seq->log_cnt = log; /* how much is logged */
-
- END_CRIT_SECTION();
-
- UnlockReleaseBuffer(buf);
-
sequence_close(seqrel, NoLock);
return result;
@@ -984,9 +570,6 @@ do_setval(Oid relid, int64 next, bool iscalled)
{
SeqTable elm;
Relation seqrel;
- Buffer buf;
- HeapTupleData seqdatatuple;
- Form_pg_sequence_data seq;
HeapTuple pgstuple;
Form_pg_sequence pgsform;
int64 maxv,
@@ -1020,9 +603,6 @@ do_setval(Oid relid, int64 next, bool iscalled)
*/
PreventCommandIfParallelMode("setval()");
- /* lock page buffer and read tuple */
- seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
-
if ((next < minv) || (next > maxv))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
@@ -1044,37 +624,8 @@ do_setval(Oid relid, int64 next, bool iscalled)
if (RelationNeedsWAL(seqrel))
GetTopTransactionId();
- /* ready to change the on-disk (or really, in-buffer) tuple */
- START_CRIT_SECTION();
-
- seq->last_value = next; /* last fetched number */
- seq->is_called = iscalled;
- seq->log_cnt = 0;
-
- MarkBufferDirty(buf);
-
- /* XLOG stuff */
- if (RelationNeedsWAL(seqrel))
- {
- xl_seq_rec xlrec;
- XLogRecPtr recptr;
- Page page = BufferGetPage(buf);
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
-
- xlrec.locator = seqrel->rd_locator;
- XLogRegisterData(&xlrec, sizeof(xl_seq_rec));
- XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len);
-
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
-
- PageSetLSN(page, recptr);
- }
-
- END_CRIT_SECTION();
-
- UnlockReleaseBuffer(buf);
+ /* Call the access method callback */
+ seq_local_setval(seqrel, next, iscalled);
sequence_close(seqrel, NoLock);
}
@@ -1215,62 +766,6 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
}
-/*
- * Given an opened sequence relation, lock the page buffer and find the tuple
- *
- * *buf receives the reference to the pinned-and-ex-locked buffer
- * *seqdatatuple receives the reference to the sequence tuple proper
- * (this arg should point to a local variable of type HeapTupleData)
- *
- * Function's return value points to the data payload of the tuple
- */
-static Form_pg_sequence_data
-read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
-{
- Page page;
- ItemId lp;
- sequence_magic *sm;
- Form_pg_sequence_data seq;
-
- *buf = ReadBuffer(rel, 0);
- LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
-
- page = BufferGetPage(*buf);
- sm = (sequence_magic *) PageGetSpecialPointer(page);
-
- if (sm->magic != SEQ_MAGIC)
- elog(ERROR, "bad magic number in sequence \"%s\": %08X",
- RelationGetRelationName(rel), sm->magic);
-
- lp = PageGetItemId(page, FirstOffsetNumber);
- Assert(ItemIdIsNormal(lp));
-
- /* Note we currently only bother to set these two fields of *seqdatatuple */
- seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
- seqdatatuple->t_len = ItemIdGetLength(lp);
-
- /*
- * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
- * a sequence, which would leave a non-frozen XID in the sequence tuple's
- * xmax, which eventually leads to clog access failures or worse. If we
- * see this has happened, clean up after it. We treat this like a hint
- * bit update, ie, don't bother to WAL-log it, since we can certainly do
- * this again if the update gets lost.
- */
- Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
- if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
- {
- HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
- seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
- seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
- MarkBufferDirtyHint(*buf, true);
- }
-
- seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple);
-
- return seq;
-}
-
/*
* init_params: process the options list of CREATE or ALTER SEQUENCE, and
* store the values into appropriate fields of seqform, for changes that go
@@ -1855,16 +1350,13 @@ pg_get_sequence_data(PG_FUNCTION_ARGS)
!RELATION_IS_OTHER_TEMP(seqrel) &&
(RelationIsPermanent(seqrel) || !RecoveryInProgress()))
{
- Buffer buf;
- HeapTupleData seqtuple;
- Form_pg_sequence_data seq;
+ bool is_called;
+ int64 last_value;
- seq = read_seq_tuple(seqrel, &buf, &seqtuple);
+ seq_local_get_state(seqrel, &last_value, &is_called);
- values[0] = Int64GetDatum(seq->last_value);
- values[1] = BoolGetDatum(seq->is_called);
-
- UnlockReleaseBuffer(buf);
+ values[0] = Int64GetDatum(last_value);
+ values[1] = BoolGetDatum(is_called);
}
else
memset(isnull, true, sizeof(isnull));
@@ -1908,17 +1400,9 @@ pg_sequence_last_value(PG_FUNCTION_ARGS)
!RELATION_IS_OTHER_TEMP(seqrel) &&
(RelationIsPermanent(seqrel) || !RecoveryInProgress()))
{
- Buffer buf;
- HeapTupleData seqtuple;
- Form_pg_sequence_data seq;
-
- seq = read_seq_tuple(seqrel, &buf, &seqtuple);
-
- is_called = seq->is_called;
- result = seq->last_value;
-
- UnlockReleaseBuffer(buf);
+ seq_local_get_state(seqrel, &result, &is_called);
}
+
sequence_close(seqrel, NoLock);
if (is_called)
@@ -1927,57 +1411,6 @@ pg_sequence_last_value(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
}
-
-void
-seq_redo(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
- Buffer buffer;
- Page page;
- Page localpage;
- char *item;
- Size itemsz;
- xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
- sequence_magic *sm;
-
- if (info != XLOG_SEQ_LOG)
- elog(PANIC, "seq_redo: unknown op code %u", info);
-
- buffer = XLogInitBufferForRedo(record, 0);
- page = (Page) BufferGetPage(buffer);
-
- /*
- * We always reinit the page. However, since this WAL record type is also
- * used for updating sequences, it's possible that a hot-standby backend
- * is examining the page concurrently; so we mustn't transiently trash the
- * buffer. The solution is to build the correct new page contents in
- * local workspace and then memcpy into the buffer. Then only bytes that
- * are supposed to change will change, even transiently. We must palloc
- * the local page for alignment reasons.
- */
- localpage = (Page) palloc(BufferGetPageSize(buffer));
-
- PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic));
- sm = (sequence_magic *) PageGetSpecialPointer(localpage);
- sm->magic = SEQ_MAGIC;
-
- item = (char *) xlrec + sizeof(xl_seq_rec);
- itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
-
- if (PageAddItem(localpage, (Item) item, itemsz,
- FirstOffsetNumber, false, false) == InvalidOffsetNumber)
- elog(PANIC, "seq_redo: failed to add item to page");
-
- PageSetLSN(localpage, lsn);
-
- memcpy(page, localpage, BufferGetPageSize(buffer));
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- pfree(localpage);
-}
-
/*
* Flush cached sequence information.
*/
@@ -1992,14 +1425,3 @@ ResetSequenceCaches(void)
last_used_seq = NULL;
}
-
-/*
- * Mask a Sequence page before performing consistency checks on it.
- */
-void
-seq_mask(char *page, BlockNumber blkno)
-{
- mask_page_lsn_and_checksum(page);
-
- mask_unused_space(page);
-}
diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore
index ec51f41c767e..8d1195de2637 100644
--- a/src/bin/pg_waldump/.gitignore
+++ b/src/bin/pg_waldump/.gitignore
@@ -10,13 +10,13 @@
/gistdesc.c
/hashdesc.c
/heapdesc.c
+/seqlocaldesc.c
/logicalmsgdesc.c
/mxactdesc.c
/nbtdesc.c
/relmapdesc.c
/replorigindesc.c
/rmgrdesc_utils.c
-/seqdesc.c
/smgrdesc.c
/spgdesc.c
/standbydesc.c
diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c
index fac509ed134e..2fcf9fc4392a 100644
--- a/src/bin/pg_waldump/rmgrdesc.c
+++ b/src/bin/pg_waldump/rmgrdesc.c
@@ -16,6 +16,7 @@
#include "access/gistxlog.h"
#include "access/hash_xlog.h"
#include "access/heapam_xlog.h"
+#include "access/seqlocalam.h"
#include "access/multixact.h"
#include "access/nbtxlog.h"
#include "access/rmgr.h"
diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl
index f26d75e01cfd..2495a28d26a1 100644
--- a/src/bin/pg_waldump/t/001_basic.pl
+++ b/src/bin/pg_waldump/t/001_basic.pl
@@ -67,7 +67,7 @@ Btree
Hash
Gin
Gist
-Sequence
+SequenceLocal
SPGist
BRIN
CommitTs
--
2.49.0