[PATCH v36 05/17] Prevent orphan storage files after server crash
Kyotaro Horiguchi <horikyota.ntt@gmail.com>
From: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
To:
Date: 2024-11-06T08:35:33Z
Lists: pgsql-hackers
When a server crashes during a transaction that creates tables, newly
created but unused storage files are not removed. This patch prevents
such orphan files by utilizing the UNDO log system for storage files.
---
src/backend/access/heap/heapam_handler.c | 22 +--
src/backend/access/rmgrdesc/Makefile | 1 +
src/backend/access/rmgrdesc/smgrundodesc.c | 62 ++++++
src/backend/access/rmgrdesc/undologdesc.c | 2 +
src/backend/access/transam/undolog.c | 1 +
src/backend/catalog/index.c | 4 +-
src/backend/catalog/storage.c | 212 +++++++++++++++++++--
src/backend/commands/sequence.c | 4 +-
src/backend/commands/tablecmds.c | 19 +-
src/backend/storage/buffer/bufmgr.c | 4 +-
src/backend/storage/file/reinit.c | 92 +++++++++
src/backend/storage/smgr/smgr.c | 9 +
src/include/access/rmgrlist.h | 2 +-
src/include/catalog/storage.h | 2 +
src/include/catalog/storage_ulog.h | 48 +++++
src/include/storage/reinit.h | 4 +
src/include/storage/smgr.h | 1 +
src/test/recovery/t/013_crash_restart.pl | 19 ++
18 files changed, 465 insertions(+), 43 deletions(-)
create mode 100644 src/backend/access/rmgrdesc/smgrundodesc.c
create mode 100644 src/include/catalog/storage_ulog.h
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 53f572f384b..239442f0cb2 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -611,8 +611,7 @@ heapam_relation_set_new_filelocator(Relation rel,
{
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
- smgrcreate(srel, INIT_FORKNUM, false);
- log_smgrcreate(newrlocator, INIT_FORKNUM);
+ RelationCreateFork(srel, INIT_FORKNUM, true, true);
}
smgrclose(srel);
@@ -656,16 +655,17 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
{
if (smgrexists(RelationGetSmgr(rel), forkNum))
{
- smgrcreate(dstrel, forkNum, false);
-
- /*
- * WAL log creation if the relation is persistent, or this is the
- * init fork of an unlogged relation.
- */
- if (RelationIsPermanent(rel) ||
+ bool wal_log = RelationIsPermanent(rel) |
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
- forkNum == INIT_FORKNUM))
- log_smgrcreate(newrlocator, forkNum);
+ forkNum == INIT_FORKNUM);
+
+ /*
+ * Usually, we don't use UNDO log for FSM or VM forks, as their
+ * creation is not transactional. However, we're currently copying
+ * the entire relation in a transactional manner, which requires
+ * after-crash cleanup.
+ */
+ RelationCreateFork(dstrel, forkNum, wal_log, true);
RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
rel->rd_rel->relpersistence);
}
diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile
index 542fd3d6a8e..fc4605bd30b 100644
--- a/src/backend/access/rmgrdesc/Makefile
+++ b/src/backend/access/rmgrdesc/Makefile
@@ -26,6 +26,7 @@ OBJS = \
rmgrdesc_utils.o \
seqdesc.o \
smgrdesc.o \
+ smgrundodesc.o \
spgdesc.o \
standbydesc.o \
tblspcdesc.o \
diff --git a/src/backend/access/rmgrdesc/smgrundodesc.c b/src/backend/access/rmgrdesc/smgrundodesc.c
new file mode 100644
index 00000000000..9939ef2b61d
--- /dev/null
+++ b/src/backend/access/rmgrdesc/smgrundodesc.c
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------
+ *
+ * smgrundodesc.c
+ * rmgr undolog descriptor routines for catalog/storage.c
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/rmgrdesc/smgrundodesc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "catalog/storage_ulog.h"
+#include "lib/stringinfo.h"
+
+void
+smgr_undodesc(StringInfo buf, UndoLogRecord *record)
+{
+ uint8 info = ULogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ if (info == ULOG_SMGR_CREATE)
+ {
+ ul_smgr_create *urec = (ul_smgr_create *) ULogRecGetData(record);
+
+ appendStringInfo(buf, ": %d/%d/%d, fork %d, backend %d",
+ urec->rlocator.spcOid,
+ urec->rlocator.dbOid,
+ urec->rlocator.relNumber,
+ urec->forknum, urec->backend);
+ }
+ else if (info == ULOG_SMGR_PRESERVE)
+ {
+ ul_smgr_preserve *urec = (ul_smgr_preserve *) ULogRecGetData(record);
+
+ appendStringInfo(buf, ": %d/%d/%d, fork %d, backend %d",
+ urec->rlocator.spcOid,
+ urec->rlocator.dbOid,
+ urec->rlocator.relNumber,
+ urec->forknum, urec->backend);
+ }
+}
+
+const char *
+smgr_undoidentify(uint8 info)
+{
+ const char *id = NULL;
+
+ switch (info & ~XLR_INFO_MASK)
+ {
+ case ULOG_SMGR_CREATE:
+ id = "SMGRCREATE";
+ break;
+ case ULOG_SMGR_PRESERVE:
+ id = "SMGRPRESERVE";
+ break;
+ }
+
+ return id;
+}
diff --git a/src/backend/access/rmgrdesc/undologdesc.c b/src/backend/access/rmgrdesc/undologdesc.c
index e7559cdd33c..fa88705f99e 100644
--- a/src/backend/access/rmgrdesc/undologdesc.c
+++ b/src/backend/access/rmgrdesc/undologdesc.c
@@ -14,6 +14,8 @@
#include "postgres.h"
#include "access/undolog.h"
+#include "catalog/storage.h"
+#include "catalog/storage_ulog.h"
typedef struct UndoDescData
{
diff --git a/src/backend/access/transam/undolog.c b/src/backend/access/transam/undolog.c
index 196e02e652f..b2fdbfcd0f9 100644
--- a/src/backend/access/transam/undolog.c
+++ b/src/backend/access/transam/undolog.c
@@ -28,6 +28,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "lib/dshash.h"
+#include "catalog/storage_ulog.h"
#include "miscadmin.h"
#include "storage/fd.h"
#include "storage/procarray.h"
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 6976249e9e9..7613192e343 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3059,8 +3059,8 @@ index_build(Relation heapRelation,
if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
!smgrexists(RelationGetSmgr(indexRelation), INIT_FORKNUM))
{
- smgrcreate(RelationGetSmgr(indexRelation), INIT_FORKNUM, false);
- log_smgrcreate(&indexRelation->rd_locator, INIT_FORKNUM);
+ RelationCreateFork(RelationGetSmgr(indexRelation),
+ INIT_FORKNUM, true, true);
indexRelation->rd_indam->ambuildempty(indexRelation);
}
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 5b22cf10990..d546d169d34 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -19,13 +19,16 @@
#include "postgres.h"
+#include "access/undolog.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
+#include "catalog/storage_ulog.h"
#include "catalog/storage_xlog.h"
+#include "common/hashfn_unstable.h"
#include "miscadmin.h"
#include "storage/bulk_write.h"
#include "storage/freespace.h"
@@ -76,6 +79,14 @@ typedef struct PendingRelSync
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
static HTAB *pendingSyncHash = NULL;
+/* Storage for smgr_undo()/smgr_undoevent() */
+static RelFileLocator *rlocs = NULL;
+static int rlocs_cap = 0;
+static int rlocs_len = 0;
+
+/* local functions */
+static void ulog_smgrcreate(SMgrRelation srel, ForkNumber forkNum);
+static void ulog_smgrpreserve(RelFileLocator rloc, ForkNumber forkNum);
/*
* AddPendingSync
@@ -147,36 +158,54 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
}
srel = smgropen(rlocator, procNumber);
- smgrcreate(srel, MAIN_FORKNUM, false);
- if (needs_wal)
- log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
+ RelationCreateFork(srel, MAIN_FORKNUM, needs_wal, register_delete);
- /*
- * Add the relation to the list of stuff to delete at abort, if we are
- * asked to do so.
- */
- if (register_delete)
+ if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
+ {
+ Assert(procNumber == INVALID_PROC_NUMBER);
+ AddPendingSync(&rlocator);
+ }
+
+ return srel;
+}
+
+/*
+ * RelationCreateFork
+ * Create physical storage for a fork of a relation.
+ *
+ * This function creates a relation fork in a transactional manner. When
+ * undo_log is true, the creation is UNDO-logged so that in case of transaction
+ * aborts or server crashes later on, the fork will be removed. If the caller
+ * plans to remove the fork in another way, it should pass false. Additionally,
+ * it is WAL-logged if wal_log is true.
+ */
+void
+RelationCreateFork(SMgrRelation srel, ForkNumber forkNum,
+ bool wal_log, bool undo_log)
+{
+ /* Schedule the removal of this init fork at abort if requested. */
+ if (undo_log)
{
PendingRelDelete *pending;
+ ulog_smgrcreate(srel, forkNum);
+
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
- pending->rlocator = rlocator;
- pending->procNumber = procNumber;
+ pending->rlocator = srel->smgr_rlocator.locator;
+ pending->procNumber = INVALID_PROC_NUMBER;
pending->atCommit = false; /* delete if abort */
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
}
- if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
- {
- Assert(procNumber == INVALID_PROC_NUMBER);
- AddPendingSync(&rlocator);
- }
+ /* WAL-log this creation if requested. */
+ if (wal_log)
+ log_smgrcreate(&srel->smgr_rlocator.locator, forkNum);
- return srel;
+ smgrcreate(srel, forkNum, false);
}
/*
@@ -198,6 +227,35 @@ log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
}
+/*
+ * Perform UndoLogWrite of an XLOG_SMGR_CREATE record to UNDO log.
+ */
+void
+ulog_smgrcreate(SMgrRelation srel, ForkNumber forkNum)
+{
+ ul_smgr_create ulrec;
+
+ ulrec.rlocator = srel->smgr_rlocator.locator;
+ ulrec.backend = srel->smgr_rlocator.backend;
+ ulrec.forknum = forkNum;
+ UndoLogWrite(RM_SMGR_ID, ULOG_SMGR_CREATE, &ulrec, sizeof(ulrec));
+}
+
+/*
+ * Perform UndoLogWrite of an XLOG_SMGR_PRESERVE record to UNDO log.
+ */
+void
+ulog_smgrpreserve(RelFileLocator rloc, ForkNumber forkNum)
+{
+ ul_smgr_preserve ulrec;
+
+ Assert(forkNum == MAIN_FORKNUM);
+ ulrec.rlocator = rloc;
+ ulrec.backend = INVALID_PROC_NUMBER;
+ ulrec.forknum = forkNum;
+ UndoLogWrite(RM_SMGR_ID, ULOG_SMGR_PRESERVE, &ulrec, sizeof(ulrec));
+}
+
/*
* RelationDropStorage
* Schedule unlinking of physical storage at transaction commit.
@@ -253,6 +311,7 @@ RelationPreserveStorage(RelFileLocator rlocator, bool atCommit)
PendingRelDelete *pending;
PendingRelDelete *prev;
PendingRelDelete *next;
+ bool found = false;
prev = NULL;
for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -261,6 +320,8 @@ RelationPreserveStorage(RelFileLocator rlocator, bool atCommit)
if (RelFileLocatorEquals(rlocator, pending->rlocator)
&& pending->atCommit == atCommit)
{
+ found = true;
+
/* unlink and delete list entry */
if (prev)
prev->next = next;
@@ -275,6 +336,9 @@ RelationPreserveStorage(RelFileLocator rlocator, bool atCommit)
prev = pending;
}
}
+
+ if (found)
+ ulog_smgrpreserve(rlocator, MAIN_FORKNUM);
}
/*
@@ -1077,3 +1141,119 @@ smgr_redo(XLogReaderState *record)
else
elog(PANIC, "smgr_redo: unknown op code %u", info);
}
+
+void
+smgr_undo(UndoLogRecord *record, ULogContext cxt, bool redo, bool crashed)
+{
+ uint8 info;
+
+ Assert(CritSectionCount == 0);
+
+ if (cxt == ULOGCXT_CLEANUP)
+ {
+ Assert(record);
+ info = record->ul_info & ~ULR_INFO_MASK;
+
+ if (info == ULOG_SMGR_CREATE)
+ {
+ ul_smgr_create *ulrec = (ul_smgr_create *) ULogRecGetData(record);
+
+ Assert(ulrec->forknum == MAIN_FORKNUM);
+ if (rlocs_cap < rlocs_len + 1)
+ {
+ if (rlocs_cap == 0)
+ {
+ rlocs_cap = 32;
+ rlocs = palloc(sizeof(RelFileLocator) * rlocs_cap);
+ }
+ else
+ {
+ rlocs_cap *= 2;
+ rlocs = repalloc(rlocs, sizeof(RelFileLocator) * rlocs_cap);
+ }
+ }
+ rlocs[rlocs_len++] = ulrec->rlocator;
+ }
+ else if (info == ULOG_SMGR_PRESERVE)
+ {
+ ul_smgr_preserve *ulrec =
+ (ul_smgr_preserve *) ULogRecGetData(record);
+ int j = 0;
+
+ for (int i = 0 ; i < rlocs_len ; i++)
+ {
+ if (RelFileLocatorEquals(ulrec->rlocator, rlocs[i]))
+ continue;
+
+ if (i != j)
+ rlocs[j] = rlocs[i];
+ j++;
+ }
+
+ rlocs_len = j;
+ }
+ else
+ elog(PANIC, "smgr_undo: unknown op code %d", info);
+ }
+ else if (cxt == ULOGCXT_COMMIT || cxt == ULOGCXT_ABORT ||
+ cxt == ULOGCXT_PREPARED)
+ {
+ /* nothing to do here */
+ }
+ else
+ elog(PANIC, "smgr_undo: unknown context code %u", cxt);
+}
+
+void
+smgr_undoevent(ULogEvent event)
+{
+ if (event == ULOGEVENT_XACTEND)
+ {
+ SMgrRelation reln;
+ ForkNumber forks[3];
+ BlockNumber firstblocks[3] = {0};
+ int nforks = 0;
+
+ for (int i = 0 ; i < rlocs_len ; i++)
+ {
+ forks[nforks++] = MAIN_FORKNUM;
+
+ /*
+ * Since the MAIN fork was created in this transaction, rollback
+ * should remove all forks of this relation. Although we could
+ * register an undo record individually for each fork, this may be
+ * more complex because VM and FSM can be created
+ * non-transactionally outside the transaction that created the
+ * MAIN fork.
+ */
+ forks[nforks++] = VISIBILITYMAP_FORKNUM;
+ forks[nforks++] = FSM_FORKNUM;
+
+ /*
+ * Drop buffers, then the files. This can be improved by using
+ * smgrdounlinkall(), but currently I take the simpler way.
+ */
+ reln = smgropen(rlocs[i], INVALID_PROC_NUMBER);
+ DropRelationBuffers(reln, forks, nforks, firstblocks);
+ for (int j = 0 ; j < nforks ; j++)
+ smgrunlink(reln, forks[j], true);
+
+ smgrclose(reln);
+ }
+
+ if (rlocs)
+ {
+ pfree(rlocs);
+ rlocs = NULL;
+ rlocs_cap = rlocs_len = 0;
+ }
+ }
+ else if (event == ULOGEVENT_CLEANUP_INIT ||
+ event == ULOGEVENT_RECOVERY_END)
+ {
+ /* Nothing to do */
+ }
+ else
+ elog(PANIC, "smgr_undoevent: unknown event code %u", event);
+
+}
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 0188e8bbd5b..be6afc7df52 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -31,6 +31,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_sequence.h"
#include "catalog/pg_type.h"
+#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "commands/defrem.h"
#include "commands/sequence.h"
@@ -344,8 +345,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
SMgrRelation srel;
srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER);
- smgrcreate(srel, INIT_FORKNUM, false);
- log_smgrcreate(&rel->rd_locator, INIT_FORKNUM);
+ RelationCreateFork(srel, INIT_FORKNUM, true, true);
fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
FlushRelationBuffers(rel);
smgrclose(srel);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 49374782625..b5766989d8e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -15965,16 +15965,17 @@ index_copy_data(Relation rel, RelFileLocator newrlocator)
{
if (smgrexists(RelationGetSmgr(rel), forkNum))
{
- smgrcreate(dstrel, forkNum, false);
-
- /*
- * WAL log creation if the relation is persistent, or this is the
- * init fork of an unlogged relation.
- */
- if (RelationIsPermanent(rel) ||
+ bool wal_log = RelationIsPermanent(rel) |
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
- forkNum == INIT_FORKNUM))
- log_smgrcreate(&newrlocator, forkNum);
+ forkNum == INIT_FORKNUM);
+
+ /*
+ * Usually, we don't use UNDO log for FSM or VM forks, as their
+ * creation is not transactional. However, we're currently copying
+ * the entire relation in a transactional manner, which requires
+ * after-crash cleanup.
+ */
+ RelationCreateFork(dstrel, forkNum, wal_log, true);
RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
rel->rd_rel->relpersistence);
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 2622221809c..1a9c794374f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4812,8 +4812,7 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator,
/*
* Create and copy all forks of the relation. During create database we
* have a separate cleanup mechanism which deletes complete database
- * directory. Therefore, each individual relation doesn't need to be
- * registered for cleanup.
+ * directory. Therefore, do not issue an UNDO log for this relation.
*/
RelationCreateStorage(dst_rlocator, relpersistence, false);
@@ -4827,6 +4826,7 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator,
{
if (smgrexists(src_rel, forkNum))
{
+ /* Use smgrcreate() directly as no UNDO log is required. */
smgrcreate(dst_rel, forkNum, false);
/*
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 01e267abf9b..d3a42d3f566 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -34,6 +34,39 @@ typedef struct
RelFileNumber relnumber; /* hash key */
} unlogged_relation_entry;
+static char **ignore_files = NULL;
+static int nignore_elems = 0;
+static int nignore_files = 0;
+
+/*
+ * determine if the file should be ignored when resetting unlogged relations
+ */
+static bool
+reinit_ignore_file(const char *dirname, const char *name)
+{
+ char fnamebuf[MAXPGPATH];
+ int len;
+
+ if (nignore_files == 0)
+ return false;
+
+ strncpy(fnamebuf, dirname, MAXPGPATH - 1);
+ strncat(fnamebuf, "/", MAXPGPATH - 1);
+ strncat(fnamebuf, name, MAXPGPATH - 1);
+ fnamebuf[MAXPGPATH - 1] = 0;
+
+ for (int i = 0 ; i < nignore_files ; i++)
+ {
+ /* match ignoring fork part */
+ len = strlen(ignore_files[i]);
+ if (strncmp(fnamebuf, ignore_files[i], len) == 0 &&
+ (fnamebuf[len] == 0 || fnamebuf[len] == '_'))
+ return true;
+ }
+
+ return false;
+}
+
/*
* Reset unlogged relations from before the last restart.
*
@@ -204,6 +237,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
&forkNum, &segno))
continue;
+ /* Skip anything that undo log suggested to ignore */
+ if (reinit_ignore_file(dbspacedirname, de->d_name))
+ continue;
+
/* Also skip it unless this is the init fork. */
if (forkNum != INIT_FORKNUM)
continue;
@@ -243,6 +280,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
&forkNum, &segno))
continue;
+ /* Skip anything that undo log suggested to ignore */
+ if (reinit_ignore_file(dbspacedirname, de->d_name))
+ continue;
+
/* We never remove the init fork. */
if (forkNum == INIT_FORKNUM)
continue;
@@ -294,6 +335,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
&forkNum, &segno))
continue;
+ /* Skip anything that undo log suggested to ignore */
+ if (reinit_ignore_file(dbspacedirname, de->d_name))
+ continue;
+
/* Also skip it unless this is the init fork. */
if (forkNum != INIT_FORKNUM)
continue;
@@ -337,6 +382,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
&forkNum, &segno))
continue;
+ /* Skip anything that undo log suggested to ignore */
+ if (reinit_ignore_file(dbspacedirname, de->d_name))
+ continue;
+
/* Also skip it unless this is the init fork. */
if (forkNum != INIT_FORKNUM)
continue;
@@ -366,6 +415,49 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
}
}
+/*
+ * Record relfilenodes that should be left alone during reinitializing unlogged
+ * relations.
+ */
+void
+ResetUnloggedRelationIgnore(RelFileLocator rloc, ProcNumber backend)
+{
+ RelFileLocatorBackend rbloc;
+
+ if (nignore_files >= nignore_elems)
+ {
+ if (ignore_files == NULL)
+ {
+ nignore_elems = 16;
+ ignore_files = palloc(sizeof(char *) * nignore_elems);
+ }
+ else
+ {
+ nignore_elems *= 2;
+ ignore_files = repalloc(ignore_files,
+ sizeof(char *) * nignore_elems);
+ }
+ }
+
+ rbloc.backend = backend;
+ rbloc.locator = rloc;
+ ignore_files[nignore_files++] = relpath(rbloc, MAIN_FORKNUM);
+}
+
+/*
+ * Clear the ignore list
+ */
+void
+ResetUnloggedRelationIgnoreClear(void)
+{
+ if (nignore_elems == 0)
+ return;
+
+ pfree(ignore_files);
+ ignore_files = NULL;
+ nignore_elems = 0;
+}
+
/*
* Basic parsing of putative relation filenames.
*
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 36ad34aa6ac..8a7654118fe 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -819,6 +819,15 @@ smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
}
+/*
+ * smgrunlink() -- unlink the storage file
+ */
+void
+smgrunlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+{
+ smgrsw[reln->smgr_which].smgr_unlink(reln->smgr_rlocator, forknum, isRedo);
+}
+
/*
* AtEOXact_SMgr
*
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 5909d87d599..b0c4e689950 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -27,7 +27,7 @@
/* symbol name, textual name, redo, desc, identify, startup, cleanup, mask, decode, undo, undo_desc, undo_identify, undo_event */
PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL, xlog_decode, NULL, NULL, NULL, NULL)
PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL, xact_decode, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL, NULL, smgr_undo, smgr_undodesc, smgr_undoidentify, smgr_undoevent)
PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 72ef3ee92c0..3451d6ac80c 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -25,6 +25,8 @@ extern PGDLLIMPORT int wal_skip_threshold;
extern SMgrRelation RelationCreateStorage(RelFileLocator rlocator,
char relpersistence,
bool register_delete);
+extern void RelationCreateFork(SMgrRelation srel, ForkNumber forkNum,
+ bool wal_log, bool undo_log);
extern void RelationDropStorage(Relation rel);
extern void RelationPreserveStorage(RelFileLocator rlocator, bool atCommit);
extern void RelationPreTruncate(Relation rel);
diff --git a/src/include/catalog/storage_ulog.h b/src/include/catalog/storage_ulog.h
new file mode 100644
index 00000000000..9568ab24cfb
--- /dev/null
+++ b/src/include/catalog/storage_ulog.h
@@ -0,0 +1,48 @@
+/*-------------------------------------------------------------------------
+ *
+ * storage_ulog.h
+ * prototypes for Undo Log support for backend/catalog/storage.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/storage_ulog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STORAGE_ULOG_H
+#define STORAGE_ULOG_H
+
+#include "access/undolog.h"
+#include "storage/smgr.h"
+
+/* ULOG gives us high 4 bits (just following xlog) */
+#define ULOG_SMGR_CREATE 0x10
+#define ULOG_SMGR_PRESERVE 0x20
+
+/* undo log entry for storage file creation */
+typedef struct ul_smgr_create
+{
+ RelFileLocator rlocator;
+ ProcNumber backend;
+ ForkNumber forknum;
+} ul_smgr_create;
+
+typedef struct ul_smgr_preserve
+{
+ RelFileLocator rlocator;
+ ProcNumber backend;
+ ForkNumber forknum;
+} ul_smgr_preserve;
+
+extern void smgr_undo(UndoLogRecord *record, ULogContext cxt, bool redo,
+ bool crashed);
+extern void smgr_undodesc(StringInfo buf, UndoLogRecord *record);
+extern const char *smgr_undoidentify(uint8 info);
+extern void smgr_undoevent(ULogEvent event);
+
+#define ULogRecGetData(record) ((char *)record + sizeof(UndoLogRecord))
+#define ULogRecGetInfo(record) ((record)->ul_info)
+
+#endif /* STORAGE_XLOG_H */
diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h
index 1373d509df2..02bf55d3a6b 100644
--- a/src/include/storage/reinit.h
+++ b/src/include/storage/reinit.h
@@ -16,9 +16,13 @@
#define REINIT_H
#include "common/relpath.h"
+#include "storage/relfilelocator.h"
extern void ResetUnloggedRelations(int op);
+extern void ResetUnloggedRelationIgnore(RelFileLocator rloc,
+ ProcNumber backend);
+extern void ResetUnloggedRelationIgnoreClear(void);
extern bool parse_filename_for_nontemp_relation(const char *name,
RelFileNumber *relnumber,
ForkNumber *fork,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 63a186bd346..a2c15d6af90 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -110,6 +110,7 @@ extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
BlockNumber *nblocks);
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void smgrregistersync(SMgrRelation reln, ForkNumber forknum);
+extern void smgrunlink(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void AtEOXact_SMgr(void);
extern bool ProcessBarrierSmgrRelease(void);
diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl
index d5d24e31d90..4df88efeb3d 100644
--- a/src/test/recovery/t/013_crash_restart.pl
+++ b/src/test/recovery/t/013_crash_restart.pl
@@ -86,6 +86,23 @@ ok( pump_until(
$killme_stdout = '';
$killme_stderr = '';
+#also, create a table whose storage should *not* survive.
+$killme_stdin .= q[
+CREATE TABLE should_not_survive (a int);
+SELECT pg_relation_filepath('should_not_survive');
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout,
+ qr/base\/[[:digit:]\/]+[\r\n]$/m),
+ 'created a table');
+my $relfilerelpath = $killme_stdout;
+chomp($relfilerelpath);
+$killme_stdout = '';
+$killme_stderr = '';
+
+my $relfilepath = $node->data_dir . "/" . $relfilerelpath;
+ok( -e $relfilepath,
+ "storage file is created in xact that is going to crash");
# Start longrunning query in second session; its failure will signal that
# crash-restart has occurred. The initial wait for the trivial select is to
@@ -144,6 +161,8 @@ $killme->run();
($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
$monitor->run();
+ok( ! -e $relfilepath,
+ "orphaned storage file is correctly removed");
# Acquire pid of new backend
$killme_stdin .= q[
--
2.43.5
----Next_Part(Fri_Dec_27_17_25_02_2024_357)--
Content-Type: Text/X-Patch; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="v36-0006-new-indexam-bit-for-unlogged-storage-compatibili.patch"