0001-Add-postgres-code-fix-needed-for-VCI.patch
application/octet-stream
Filename: 0001-Add-postgres-code-fix-needed-for-VCI.patch
Type: application/octet-stream
Part: 0
From 64eee1c00e14f8a44c787ab90dc00ca36c2fb6f6 Mon Sep 17 00:00:00 2001
From: Aya Iwata <iwata.aya@fujitsu.com>
Date: Fri, 9 May 2025 18:32:18 +0900
Subject: [PATCH 1/2] Add postgres code fix needed for VCI
---
src/backend/access/common/relation.c | 2 +
src/backend/access/common/reloptions.c | 51 ++++
src/backend/access/heap/heapam.c | 18 ++
src/backend/access/heap/heapam_handler.c | 4 +
src/backend/access/heap/heapam_visibility.c | 6 +
src/backend/access/transam/xact.c | 225 ++++++++++++++++
src/backend/access/transam/xlogfuncs.c | 1 +
src/backend/access/transam/xlogrecovery.c | 18 +-
src/backend/catalog/dependency.c | 7 +
src/backend/catalog/index.c | 27 +-
src/backend/commands/explain.c | 31 ++-
src/backend/commands/indexcmds.c | 109 ++++----
src/backend/commands/tablecmds.c | 125 +++++++++
src/backend/commands/vacuum.c | 16 ++
src/backend/executor/execAmi.c | 2 +
src/backend/executor/execExpr.c | 6 +-
src/backend/executor/execExprInterp.c | 36 +++
src/backend/executor/execGrouping.c | 17 +-
src/backend/executor/execProcnode.c | 1 +
src/backend/executor/execScan.c | 2 +-
src/backend/executor/execUtils.c | 2 +
src/backend/executor/nodeCustom.c | 16 ++
src/backend/executor/nodeModifyTable.c | 10 +
src/backend/executor/nodeSubplan.c | 6 +
src/backend/jit/llvm/llvmjit_expr.c | 12 +
src/backend/jit/llvm/llvmjit_types.c | 2 +
src/backend/nodes/gen_node_support.pl | 57 +++-
src/backend/nodes/params.c | 2 +-
src/backend/optimizer/path/allpaths.c | 17 ++
src/backend/optimizer/plan/createplan.c | 4 +-
src/backend/postmaster/autovacuum.c | 25 ++
src/backend/postmaster/bgworker.c | 109 ++++++++
src/backend/storage/ipc/procarray.c | 10 +-
src/backend/storage/lmgr/lock.c | 26 +-
src/backend/storage/lmgr/lwlock.c | 2 +
src/backend/utils/adt/timestamp.c | 18 +-
src/backend/utils/cache/relcache.c | 66 +++++
src/backend/utils/init/postinit.c | 2 +-
src/backend/utils/mmgr/mcxt.c | 41 ++-
src/backend/utils/sort/tuplesort.c | 261 +-----------------
src/backend/utils/time/combocid.c | 46 ++++
src/bin/pg_dump/common.c | 1 +
src/bin/pg_dump/pg_dump.c | 35 ++-
src/bin/pg_dump/pg_dump.h | 1 +
src/include/access/heapam.h | 4 +
src/include/access/reloptions.h | 3 +-
src/include/access/xact.h | 7 +
src/include/access/xlogrecovery.h | 3 +
src/include/catalog/dependency.h | 3 +
src/include/catalog/index.h | 2 +
src/include/commands/explain.h | 7 +
src/include/commands/tablecmds.h | 6 +
src/include/datatype/timestamp.h | 22 ++
src/include/executor/execExpr.h | 17 ++
src/include/executor/executor.h | 5 +-
src/include/executor/nodeModifyTable.h | 2 +
src/include/executor/nodeSubplan.h | 2 +
src/include/nodes/execnodes.h | 2 +
src/include/nodes/extensible.h | 4 +
src/include/nodes/memnodes.h | 3 +-
src/include/nodes/params.h | 6 +-
src/include/nodes/plannodes.h | 3 +
src/include/optimizer/planner.h | 2 +
src/include/postmaster/autovacuum.h | 3 +
src/include/postmaster/bgworker.h | 13 +
src/include/storage/itemptr.h | 3 +
src/include/storage/lwlock.h | 11 +
src/include/storage/standby.h | 1 +
src/include/utils/combocid.h | 4 +
src/include/utils/memutils.h | 4 +
src/include/utils/memutils_internal.h | 13 +-
src/include/utils/numeric.h | 2 +
src/include/utils/rel.h | 3 +
src/include/utils/relcache.h | 5 +
src/include/utils/snapshot.h | 10 +
src/include/utils/tuplesortstate.h | 285 ++++++++++++++++++++
src/tools/pgindent/typedefs.list | 133 +++++++++
77 files changed, 1677 insertions(+), 391 deletions(-)
create mode 100644 src/include/utils/tuplesortstate.h
diff --git a/src/backend/access/common/relation.c b/src/backend/access/common/relation.c
index 22c4cd5a256..80f9d947847 100644
--- a/src/backend/access/common/relation.c
+++ b/src/backend/access/common/relation.c
@@ -64,9 +64,11 @@ relation_open(Oid relationId, LOCKMODE lockmode)
* If we didn't get the lock ourselves, assert that caller holds one,
* except in bootstrap mode where no locks are used.
*/
+/*
Assert(lockmode != NoLock ||
IsBootstrapProcessingMode() ||
CheckRelationLockedByMe(r, AccessShareLock, true));
+*/
/* Make note that we've accessed a temporary relation */
if (RelationUsesLocalBuffers(r))
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..4057f6a1a63 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -35,6 +35,9 @@
#include "utils/memutils.h"
#include "utils/rel.h"
+/* TODO: Move under contrib/vci. */
+static void validateVciColumnIds(const char *values);
+
/*
* Contents of pg_class.reloptions
*
@@ -557,6 +560,34 @@ static relopt_enum enumRelOpts[] =
static relopt_string stringRelOpts[] =
{
+ /* TODO: Move under contrib/vci. */
+ {
+ /* TODO: fix comment */
+ {
+ "vci_column_ids",
+ "ID for searching columns that are (or will be) indexed by VCI from vci_columns table",
+ RELOPT_KIND_VCI,
+ AccessExclusiveLock
+ },
+ 0,
+ false,
+ validateVciColumnIds,
+ NULL,
+ ""
+ },
+ {
+ {
+ "vci_dropped_column_ids",
+ "ID for storing dropped columns in indexed table",
+ RELOPT_KIND_VCI,
+ AccessExclusiveLock
+ },
+ 0,
+ false,
+ validateVciColumnIds,
+ NULL,
+ ""
+ },
/* list terminator */
{{NULL}}
};
@@ -1907,6 +1938,12 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
offsetof(StdRdOptions, vacuum_truncate), offsetof(StdRdOptions, vacuum_truncate_set)},
{"vacuum_max_eager_freeze_failure_rate", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, vacuum_max_eager_freeze_failure_rate)}
+ ,
+ /* TODO: Move under contrib/vci */
+ {"vci_column_ids", RELOPT_TYPE_STRING,
+ offsetof(StdRdOptions, vci_column_ids_offset)},
+ {"vci_dropped_column_ids", RELOPT_TYPE_STRING,
+ offsetof(StdRdOptions, vci_dropped_column_ids_offset)}
};
return (bytea *) build_reloptions(reloptions, validate, kind,
@@ -2162,3 +2199,17 @@ AlterTableGetRelOptionsLockLevel(List *defList)
return lockmode;
}
+
+/* TODO: Move under contrib/vci. */
+static void
+validateVciColumnIds(const char *values)
+{
+ if (values == NULL)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Dummy validation for vci_column_ids and vci_dropped_column_ids")));
+
+ }
+ elog(DEBUG2, "Empty process of validateVciColumnIds: %s", values);
+}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c1a4de14a59..72703c915f1 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -54,6 +54,7 @@
#include "utils/spccache.h"
#include "utils/syscache.h"
+void (*add_index_delete_hook) (Relation indexRelation, ItemPointer heap_tid, TransactionId xmin) = NULL;
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
@@ -343,6 +344,9 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
* results for a non-MVCC snapshot, the caller must hold some higher-level
* lock that ensures the interesting tuple(s) won't change.)
*/
+ if (keep_startblock)
+ goto skip_get_number_of_blocks;
+
if (scan->rs_base.rs_parallel != NULL)
{
bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel;
@@ -351,6 +355,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
else
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_base.rs_rd);
+skip_get_number_of_blocks:
+
/*
* If the table is large relative to NBuffers, use a bulk-read access
* strategy and enable synchronized scanning (see syncscan.c). Although
@@ -2762,6 +2768,7 @@ heap_delete(Relation relation, ItemPointer tid,
bool all_visible_cleared = false;
HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
bool old_key_copied = false;
+ TransactionId old_xmin;
Assert(ItemPointerIsValid(tid));
@@ -3006,6 +3013,8 @@ l1:
xid, LockTupleExclusive, true,
&new_xmax, &new_infomask, &new_infomask2);
+ old_xmin = HeapTupleHeaderGetXmin(tp.t_data);
+
START_CRIT_SECTION();
/*
@@ -3151,6 +3160,9 @@ l1:
if (old_key_tuple != NULL && old_key_copied)
heap_freetuple(old_key_tuple);
+ if (add_index_delete_hook)
+ add_index_delete_hook(relation, tid, old_xmin);
+
return TM_Ok;
}
@@ -3253,6 +3265,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
infomask2_old_tuple,
infomask_new_tuple,
infomask2_new_tuple;
+ TransactionId old_xmin;
Assert(ItemPointerIsValid(otid));
@@ -3697,6 +3710,8 @@ l2:
&xmax_old_tuple, &infomask_old_tuple,
&infomask2_old_tuple);
+ old_xmin = HeapTupleHeaderGetRawXmin(oldtup.t_data);
+
/*
* And also prepare an Xmax value for the new copy of the tuple. If there
* was no xmax previously, or there was one but all lockers are now gone,
@@ -4180,6 +4195,9 @@ l2:
bms_free(modified_attrs);
bms_free(interesting_attrs);
+ if (add_index_delete_hook)
+ add_index_delete_hook(relation, otid, old_xmin);
+
return TM_Ok;
}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77..ea9d940eaa9 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -46,6 +46,9 @@
#include "utils/builtins.h"
#include "utils/rel.h"
+/* Preserve the original heap tuple that is passed to callback in heapam_index_build_range_scan() */
+HeapTuple IndexHeapTuple;
+
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@@ -1658,6 +1661,7 @@ heapam_index_build_range_scan(Relation heapRelation,
* some index AMs want to do further processing on the data first. So
* pass the values[] and isnull[] arrays, instead.
*/
+ IndexHeapTuple = heapTuple;
if (HeapTupleIsHeapOnly(heapTuple))
{
diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index 05f6946fe60..abb12264c62 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -78,6 +78,8 @@
#include "utils/builtins.h"
#include "utils/snapmgr.h"
+bool (*add_snapshot_satisfies_hook) (HeapTuple tup, Snapshot snapshot, Buffer buffer);
+
/*
* SetHintBits()
@@ -1791,6 +1793,10 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
case SNAPSHOT_NON_VACUUMABLE:
return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
+ case SNAPSHOT_VCI_WOS2ROS:
+ case SNAPSHOT_VCI_LOCALROS:
+ if (add_snapshot_satisfies_hook)
+ return add_snapshot_satisfies_hook(htup, snapshot, buffer);
}
return false; /* keep compiler quiet */
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..7358a88d9cd 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -6444,3 +6444,228 @@ xact_redo(XLogReaderState *record)
else
elog(PANIC, "xact_redo: unknown op code %u", info);
}
+
+/**
+ * This function saves TransactionStateData.
+ * Tracing the pointer to parent's TransactionStateData,
+ * saves them as well as itself.
+ *
+ * @return The pointor to the start address of the area for saved TransactionStateData.
+ */
+TransactionStateData *
+pef_save_transaction_state(void)
+{
+
+ TransactionStateData *dst;
+ TransactionStateData *src;
+ TransactionStateData *child;
+ TransactionStateData *res = NULL;
+
+ src = CurrentTransactionState;
+ child = NULL;
+ while (src)
+ {
+ dst = palloc(sizeof(TransactionStateData));
+
+ if (child)
+ child->parent = dst;
+ else
+ res = dst;
+
+ memcpy(dst, src, sizeof(TransactionStateData));
+ dst->childXids = palloc(sizeof(*src->childXids) * src->nChildXids);
+ memcpy(dst->childXids, src->childXids, sizeof(*src->childXids) * src->nChildXids);
+
+ child = dst;
+ src = src->parent;
+ }
+
+ return res;
+}
+
+static TransactionStateData *
+pef_copy_transaction_state(TransactionStateData *src)
+{
+ TransactionStateData *dst;
+
+ if (!src)
+ return NULL;
+
+ if (src->parent)
+ dst = (TransactionState) palloc(sizeof(TransactionStateData));
+ else
+ dst = &TopTransactionStateData;
+
+ memcpy(dst, src, sizeof(TransactionStateData));
+ dst->childXids = palloc(sizeof(*src->childXids) * src->nChildXids);
+ memcpy(dst->childXids, src->childXids, sizeof(*src->childXids) * src->nChildXids);
+ dst->parent = pef_copy_transaction_state(src->parent);
+
+ if (dst->parent)
+ {
+ dst->curTransactionContext =
+ AllocSetContextCreate(dst->parent->curTransactionContext,
+ "CurTransactionContext",
+ ALLOCSET_DEFAULT_SIZES);
+ dst->curTransactionOwner =
+ ResourceOwnerCreate(dst->parent->curTransactionOwner, "SubTransaction");
+ }
+ else
+ {
+ VirtualTransactionId vxid;
+
+ /* We are in the main transaction state level */
+
+ /*
+ * If we already have a BG Worker resource owner (possibly created for
+ * initializing shm queues) , use it. Otherwise, create a new one
+ * here.
+ */
+ dst->curTransactionOwner =
+ (CurrentResourceOwner ? CurrentResourceOwner :
+ ResourceOwnerCreate(NULL, "Background Worker"));
+
+ dst->curTransactionContext =
+ AllocSetContextCreate(TopMemoryContext,
+ "TopTransactionContext",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Create a toplevel context and resource owner for the transaction. */
+ TopTransactionContext = dst->curTransactionContext;
+ TopTransactionResourceOwner = dst->curTransactionOwner;
+
+ /*
+ * To initialize local transactionId, following code is taken from
+ * StartTransaction(). We possibly need to bring in more code here.
+ */
+
+ /*
+ * Assign a new LocalTransactionId, and combine it with the backendId
+ * to form a virtual transaction id.
+ */
+ vxid.procNumber = MyProcNumber;
+ vxid.localTransactionId = GetNextLocalTransactionId();
+
+ /*
+ * Lock the virtual transaction id before we announce it in the proc
+ * array
+ */
+ VirtualXactLockTableInsert(vxid);
+
+ /*
+ * Advertise it in the proc array. We assume assignment of
+ * LocalTransactionID is atomic, and the backendId should be set
+ * already.
+ */
+ Assert(MyProc->vxid.procNumber == vxid.procNumber);
+ MyProc->vxid.lxid = vxid.localTransactionId;
+ }
+
+ return dst;
+}
+
+MemoryContext
+pef_restore_transaction_state(void *src)
+{
+ TransactionStateData *dst;
+
+ dst = pef_copy_transaction_state((TransactionState) src);
+
+ CurrentTransactionState = dst;
+ CurTransactionContext = dst->curTransactionContext;
+ CurrentResourceOwner = dst->curTransactionOwner;
+ CurTransactionResourceOwner = dst->curTransactionOwner;
+
+ return MemoryContextSwitchTo(CurTransactionContext);
+
+}
+
+/*
+ * pef_cleanup_transaction_state()
+ * During exit, cleanup the transaction state restored by the worker. We don't
+ * want to commit or abort using this transaction state; instead, we need to do
+ * this cleanup.
+ */
+void
+pef_cleanup_transaction_state(void)
+{
+ TransactionState s = CurrentTransactionState;
+
+ /*
+ * We don't bother about cleaning up memory, because we know this is the
+ * end phase, and the worker is going to die. But we do want to reset the
+ * transaction state such that Abort or Commit won't be called.
+ */
+ s->state = TRANS_DEFAULT;
+ s->blockState = TBLOCK_DEFAULT;
+ s->parent = NULL;
+
+ /* Prevent cancel/die interrupt while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /* Make sure we have a valid memory context and resource owner */
+ AtAbort_Memory();
+ AtAbort_ResourceOwner();
+
+ /*
+ * Release any LW locks we might be holding as quickly as possible.
+ * (Regular locks, however, must be held till we finish aborting.)
+ * Releasing LW locks is critical since we might try to grab them again
+ * while cleaning up!
+ */
+ LWLockReleaseAll();
+
+ /* Clean up buffer content locks, too */
+ UnlockBuffers();
+
+ /*
+ * Post-abort cleanup. See notes in CommitTransaction() concerning
+ * ordering. We can skip all of it if the transaction failed before
+ * creating a resource owner.
+ */
+ if (TopTransactionResourceOwner != NULL)
+ {
+ CallXactCallbacks(XACT_EVENT_ABORT);
+
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_BEFORE_LOCKS,
+ false, true);
+ AtEOXact_Buffers(false);
+ AtEOXact_RelationCache(false);
+ AtEOXact_TypeCache();
+ AtEOXact_Inval(false);
+ smgrDoPendingDeletes(false);
+ AtEOXact_MultiXact();
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_LOCKS,
+ false, true);
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_AFTER_LOCKS,
+ false, true);
+
+ AtEOXact_GUC(false, 1);
+ AtEOXact_SPI(false);
+ AtEOXact_on_commit_actions(false);
+ AtEOXact_Namespace(false, false);
+ AtEOXact_SMgr();
+ AtEOXact_Files(false);
+ AtEOXact_ComboCid();
+ AtEOXact_HashTables(false);
+ AtEOXact_PgStat(false, true);
+ AtEOXact_LogicalRepWorkers(false);
+ pgstat_report_xact_timestamp(0);
+ }
+
+ RESUME_INTERRUPTS();
+}
+
+/**
+ * stmtStartTimestamp ??????????????
+ * @param[in] startTimestamp ???????????????????stmtstartTimestamp?
+ */
+void
+pef_restore_start_timestamp(TimestampTz xactSt, TimestampTz stmtSt)
+{
+ xactStartTimestamp = xactSt;
+ stmtStartTimestamp = stmtSt;
+}
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 8c3090165f0..b0b02d7ac9d 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -607,6 +607,7 @@ pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
statestr = "not paused";
break;
case RECOVERY_PAUSE_REQUESTED:
+ case RECOVERY_VCI_PAUSE_REQUESTED:
statestr = "pause requested";
break;
case RECOVERY_PAUSED:
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 6ce979f2d8b..a10f63082d9 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -2947,7 +2947,8 @@ recoveryPausesHere(bool endOfRecovery)
ereport(LOG,
(errmsg("pausing at the end of recovery"),
errhint("Execute pg_wal_replay_resume() to promote.")));
- else
+ /* If pause requested by VCI, the log is not output. */
+ else if (GetRecoveryPauseState() != RECOVERY_VCI_PAUSE_REQUESTED)
ereport(LOG,
(errmsg("recovery has paused"),
errhint("Execute pg_wal_replay_resume() to continue.")));
@@ -3113,6 +3114,18 @@ SetRecoveryPause(bool recoveryPause)
ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV);
}
+/* Set the recovery pause requested for VCI. */
+void
+SetVciRecoveryPause(void)
+{
+ SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+
+ if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
+ XLogRecoveryCtl->recoveryPauseState = RECOVERY_VCI_PAUSE_REQUESTED;
+
+ SpinLockRelease(&XLogRecoveryCtl->info_lck);
+}
+
/*
* Confirm the recovery pause by setting the recovery pause state to
* RECOVERY_PAUSED.
@@ -3122,7 +3135,8 @@ ConfirmRecoveryPaused(void)
{
/* If recovery pause is requested then set it paused */
SpinLockAcquire(&XLogRecoveryCtl->info_lck);
- if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
+ if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED ||
+ XLogRecoveryCtl->recoveryPauseState == RECOVERY_VCI_PAUSE_REQUESTED)
XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED;
SpinLockRelease(&XLogRecoveryCtl->info_lck);
}
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 18316a3968b..6255a01373b 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -85,6 +85,7 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+bool (*add_drop_relation_hook) (const ObjectAddress *object, int flags) = NULL;
/*
* Deletion processing requires additional state for each ObjectAddress that
@@ -1357,6 +1358,12 @@ doDeletion(const ObjectAddress *object, int flags)
{
char relKind = get_rel_relkind(object->objectId);
+ if (add_drop_relation_hook)
+ {
+ if (add_drop_relation_hook(object, flags))
+ break;
+ }
+
if (relKind == RELKIND_INDEX ||
relKind == RELKIND_PARTITIONED_INDEX)
{
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 739a92bdcc1..cfdc85075fa 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -133,6 +133,7 @@ static void ResetReindexProcessing(void);
static void SetReindexPending(List *indexes);
static void RemoveReindexPending(Oid indexOid);
+bool (*add_reindex_index_hook) (Relation) = NULL;
/*
* relationHasPrimaryKey
@@ -342,12 +343,17 @@ ConstructTupleDescriptor(Relation heapRelation,
/* Simple index column */
const FormData_pg_attribute *from;
- Assert(atnum > 0); /* should've been caught above */
-
if (atnum > natts) /* safety check */
elog(ERROR, "invalid column number %d", atnum);
- from = TupleDescAttr(heapTupDesc,
- AttrNumberGetAttrOffset(atnum));
+ if (atnum > 0)
+ {
+ from = TupleDescAttr(heapTupDesc,
+ AttrNumberGetAttrOffset(atnum));
+ }
+ else
+ {
+ from = SystemAttributeDefinition(atnum);
+ }
to->atttypid = from->atttypid;
to->attlen = from->attlen;
@@ -3692,6 +3698,19 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
return;
}
+ if (add_reindex_index_hook)
+ {
+ if (!add_reindex_index_hook(iRel))
+ {
+ RemoveReindexPending(RelationGetRelid(iRel));
+
+ /* Close rels, but keep locks */
+ index_close(iRel, NoLock);
+ table_close(heapRelation, NoLock);
+ return;
+ }
+ }
+
if (progress)
pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
iRel->rd_rel->relam);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 786ee865f14..9a3d2ea844e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1223,6 +1223,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
((ForeignScan *) plan)->fs_base_relids);
break;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
*rels_used = bms_add_members(*rels_used,
((CustomScan *) plan)->custom_relids);
break;
@@ -1518,6 +1519,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
}
break;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
sname = "Custom Scan";
custom_name = ((CustomScan *) plan)->methods->CustomName;
if (custom_name)
@@ -1681,10 +1683,18 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainScanTarget((Scan *) plan, es);
break;
case T_ForeignScan:
- case T_CustomScan:
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
+ case T_CustomScan:
+ case T_CustomPlanMarkPos:
+ {
+ CustomScanState *css = (CustomScanState *) planstate;
+
+ if (css->methods->ExplainCustomPlanTargetRel)
+ css->methods->ExplainCustomPlanTargetRel(css, es);
+ }
+ break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
@@ -2156,6 +2166,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
show_foreignscan_info((ForeignScanState *) planstate, es);
break;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
{
CustomScanState *css = (CustomScanState *) planstate;
@@ -2416,6 +2427,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
"Subquery", NULL, es);
break;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
ExplainCustomChildren((CustomScanState *) planstate,
ancestors, es);
break;
@@ -4415,6 +4427,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_TidRangeScan:
case T_ForeignScan:
case T_CustomScan:
+ case T_CustomPlanMarkPos:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
@@ -4987,3 +5000,19 @@ ExplainFlushWorkersState(ExplainState *es)
pfree(wstate->worker_state_save);
pfree(wstate);
}
+
+extern void
+ExplainPropertySortGroupKeys(PlanState *planstate, const char *qlabel,
+ int nkeys, AttrNumber *keycols,
+ List *ancestors, ExplainState *es)
+{
+ show_sort_group_keys(planstate, qlabel, nkeys, 0, keycols,
+ NULL, NULL, NULL, ancestors, es);
+}
+void
+ExplainPropertyQual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, ExplainState *es)
+{
+ show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 33c2106c17c..b112c4ec52d 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1098,71 +1098,74 @@ DefineIndex(Oid tableId,
}
}
-
- /*
- * We disallow indexes on system columns. They would not necessarily get
- * updated correctly, and they don't seem useful anyway.
- *
- * Also disallow virtual generated columns in indexes (use expression
- * index instead).
- */
- for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
- {
- AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
-
- if (attno < 0)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("index creation on system columns is not supported")));
-
-
- if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
- ereport(ERROR,
- errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- stmt->primary ?
- errmsg("primary keys on virtual generated columns are not supported") :
- stmt->isconstraint ?
- errmsg("unique constraints on virtual generated columns are not supported") :
- errmsg("indexes on virtual generated columns are not supported"));
- }
-
- /*
- * Also check for system and generated columns used in expressions or
- * predicates.
- */
- if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
+ /* Skip disallowing index creation of system columns for VCI access method */
+ if (strcmp(accessMethodName, "vci") != 0)
{
- Bitmapset *indexattrs = NULL;
- int j;
- pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
- pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
-
- for (int i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
+ /*
+ * We disallow indexes on system columns. They would not necessarily
+ * get updated correctly, and they don't seem useful anyway.
+ *
+ * Also disallow virtual generated columns in indexes (use expression
+ * index instead).
+ */
+ for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
- if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
- indexattrs))
+ AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
+
+ if (attno < 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("index creation on system columns is not supported")));
+
+ if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ stmt->primary ?
+ errmsg("primary keys on virtual generated columns are not supported") :
+ stmt->isconstraint ?
+ errmsg("unique constraints on virtual generated columns are not supported") :
+ errmsg("indexes on virtual generated columns are not supported"));
}
/*
- * XXX Virtual generated columns in index expressions or predicates
- * could be supported, but it needs support in
- * RelationGetIndexExpressions() and RelationGetIndexPredicate().
+ * Also check for system and generated columns used in expressions or
+ * predicates.
*/
- j = -1;
- while ((j = bms_next_member(indexattrs, j)) >= 0)
+ if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
{
- AttrNumber attno = j + FirstLowInvalidHeapAttributeNumber;
+ Bitmapset *indexattrs = NULL;
+ int j;
- if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- stmt->isconstraint ?
- errmsg("unique constraints on virtual generated columns are not supported") :
- errmsg("indexes on virtual generated columns are not supported")));
+ pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
+ pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
+
+ for (int i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
+ {
+ if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+ indexattrs))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index creation on system columns is not supported")));
+ }
+
+ /*
+ * XXX Virtual generated columns in index expressions or
+ * predicates could be supported, but it needs support in
+ * RelationGetIndexExpressions() and RelationGetIndexPredicate().
+ */
+ j = -1;
+ while ((j = bms_next_member(indexattrs, j)) >= 0)
+ {
+ AttrNumber attno = j + FirstLowInvalidHeapAttributeNumber;
+
+ if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ stmt->isconstraint ?
+ errmsg("unique constraints on virtual generated columns are not supported") :
+ errmsg("indexes on virtual generated columns are not supported")));
+ }
}
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 54ad38247aa..7384bf3750d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -109,6 +109,10 @@
#include "utils/typcache.h"
#include "utils/usercontext.h"
+bool (*add_alter_tablespace_hook) (Relation rel) = NULL;
+void (*add_alter_table_change_owner_hook) (Oid relOid, char relKind, Oid newOwnerId) = NULL;
+void (*add_alter_table_change_schema_hook) (Oid relOid, char relKind, Oid newNspOid) = NULL;
+
/*
* ON COMMIT action list
*/
@@ -16206,6 +16210,9 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock
/* If it has dependent sequences, recurse to change them too */
change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode);
+
+ if (add_alter_table_change_owner_hook)
+ add_alter_table_change_owner_hook(relationOid, tuple_class->relkind, newOwnerId);
}
InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0);
@@ -16755,6 +16762,112 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
}
table_close(pgclass, RowExclusiveLock);
+
+ /*
+ * Look up the index's access method, save the OID of its handler function
+ */
+ if (rel->rd_rel->relam)
+ {
+ Form_pg_am aform;
+ HeapTuple amtuple;
+
+ amtuple = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam));
+ if (!HeapTupleIsValid(amtuple))
+ elog(ERROR, "cache lookup failed for access method %u",
+ rel->rd_rel->relam);
+ aform = (Form_pg_am) GETSTRUCT(amtuple);
+
+ if (strcmp(NameStr(aform->amname), "vci") == 0)
+
+ /*
+ * if((rel->rd_am) && (strcmp(NameStr(rel->rd_am->amname), "vci")
+ * == 0))
+ */
+ {
+ Relation depRel,
+ viewRel;
+ Oid vci_relid,
+ viewrelid;
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ depRel = table_open(DependRelationId, AccessShareLock);
+ vci_relid = RelationGetRelid(rel);
+ ScanKeyInit(&key,
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(vci_relid));
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 1, &key);
+ while (HeapTupleIsValid((tup = systable_getnext(scan))))
+ {
+ Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
+
+ /* Retrieve objid of the internal function */
+ if (depform->classid == RelationRelationId)
+ {
+ viewrelid = depform->objid;
+ viewRel = table_open(viewrelid, AccessExclusiveLock);
+ pgclass = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(viewrelid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", viewrelid);
+
+ datum = (Datum) 0;
+
+ /* Generate new proposed reloptions (text array) */
+ newOptions = transformRelOptions(datum, defList, NULL, validnsps, false,
+ operation == AT_ResetRelOptions);
+
+ (void) view_reloptions(newOptions, true);
+
+ /*
+ * All we need do here is update the pg_class row; the new
+ * options will be propagated into relcaches during
+ * post-commit cache inval.
+ */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (newOptions != (Datum) 0)
+ repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+ else
+ repl_null[Anum_pg_class_reloptions - 1] = true;
+
+ repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+ repl_val, repl_null, repl_repl);
+
+ /*
+ * simple_heap_update(pgclass, &newtuple->t_self,
+ * newtuple);
+ *
+ * CatalogUpdateIndexes(pgclass, newtuple);
+ */
+ /* Perform actual update */
+ CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+ InvokeObjectPostAlterHook(RelationRelationId, viewrelid, 0);
+
+ heap_freetuple(newtuple);
+
+ ReleaseSysCache(tuple);
+
+ table_close(pgclass, RowExclusiveLock);
+ table_close(viewRel, AccessExclusiveLock);
+
+ }
+ }
+
+ systable_endscan(scan);
+
+ table_close(depRel, AccessShareLock);
+ }
+
+ ReleaseSysCache(amtuple);
+ }
}
/*
@@ -16776,6 +16889,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
*/
rel = relation_open(tableOid, lockmode);
+ if (add_alter_tablespace_hook)
+ {
+ if (add_alter_tablespace_hook(rel))
+ {
+ relation_close(rel, NoLock);
+ return;
+ }
+ }
+
/* Check first if relation can be moved to new tablespace */
if (!CheckRelationTableSpaceMove(rel, newTableSpace))
{
@@ -19027,6 +19149,9 @@ AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
{
add_exact_object_address(&thisobj, objsMoved);
+ if (add_alter_table_change_schema_hook)
+ add_alter_table_change_schema_hook(relOid, classForm->relkind, newNspOid);
+
InvokeObjectPostAlterHook(RelationRelationId, relOid, 0);
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..1eb71c22ae4 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -129,6 +129,8 @@ static double compute_parallel_delay(void);
static VacOptValue get_vacoptval_from_boolean(DefElem *def);
static bool vac_tid_reaped(ItemPointer itemptr, void *state);
+bool (*add_skip_vacuum_hook) (Relation rel);
+
/*
* GUC check function to ensure GUC value specified is within the allowable
* range.
@@ -2123,6 +2125,20 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
return false;
}
+ /*
+ * Silently ignore if it's a VCI internal table.
+ */
+ if (add_skip_vacuum_hook)
+ {
+ if (add_skip_vacuum_hook(rel))
+ {
+ relation_close(rel, lmode);
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ return false;
+ }
+ }
+
/*
* Silently ignore tables that are temp tables of other backends ---
* trying to vacuum these will lead to great unhappiness, since their
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 1d0e8ad57b4..fff1b85ff59 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -437,6 +437,7 @@ ExecSupportsMarkRestore(Path *pathnode)
return true;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
if (castNode(CustomPath, pathnode)->flags & CUSTOMPATH_SUPPORT_MARK_RESTORE)
return true;
return false;
@@ -563,6 +564,7 @@ ExecSupportsBackwardScan(Plan *node)
return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan);
case T_CustomScan:
+ case T_CustomPlanMarkPos:
if (((CustomScan *) node)->flags & CUSTOMPATH_SUPPORT_BACKWARD_SCAN)
return true;
return false;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index f1569879b52..5c29ab337af 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -68,7 +68,6 @@ typedef struct ExprSetupInfo
List *multiexpr_subplans;
} ExprSetupInfo;
-static void ExecReadyExpr(ExprState *state);
static void ExecInitExprRec(Expr *node, ExprState *state,
Datum *resv, bool *resnull);
static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args,
@@ -77,7 +76,6 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args,
static void ExecInitSubPlanExpr(SubPlan *subplan,
ExprState *state,
Datum *resv, bool *resnull);
-static void ExecCreateExprSetupSteps(ExprState *state, Node *node);
static void ExecPushExprSetupSteps(ExprState *state, ExprSetupInfo *info);
static bool expr_setup_walker(Node *node, ExprSetupInfo *info);
static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
@@ -898,7 +896,7 @@ ExecCheck(ExprState *state, ExprContext *econtext)
* Therefore this should be used instead of directly calling
* ExecReadyInterpretedExpr().
*/
-static void
+void
ExecReadyExpr(ExprState *state)
{
if (jit_compile_expr(state))
@@ -2877,7 +2875,7 @@ ExecInitSubPlanExpr(SubPlan *subplan,
* Add expression steps performing setup that's needed before any of the
* main execution of the expression.
*/
-static void
+void
ExecCreateExprSetupSteps(ExprState *state, Node *node)
{
ExprSetupInfo info = {0, 0, 0, 0, 0, NIL};
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 8a72b5e70a4..f25a2370b33 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -188,6 +188,21 @@ static pg_attribute_always_inline void ExecAggPlainTransByRef(AggState *aggstate
int setno);
static char *ExecGetJsonValueItemString(JsonbValue *item, bool *resnull);
+ExprEvalVar_hook_type ExprEvalVar_hook = NULL;
+ExprEvalParam_hook_type ExprEvalParam_hook = NULL;
+void
+VciExprEvalVarHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+ Assert(ExprEvalVar_hook != NULL);
+ (*ExprEvalVar_hook) (state, op, econtext);
+}
+void
+VciExprEvalParamHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+ Assert(ExprEvalParam_hook != NULL);
+ (*ExprEvalParam_hook) (state, op, econtext);
+}
+
/*
* ScalarArrayOpExprHashEntry
* Hash table entry type used during EEOP_HASHED_SCALARARRAYOP
@@ -592,6 +607,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
&&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
&&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
&&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
+ &&CASE_EEOP_VCI_VAR,
+ &&CASE_EEOP_VCI_PARAM_EXEC,
&&CASE_EEOP_LAST
};
@@ -2265,6 +2282,25 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_NEXT();
}
+ EEO_CASE(EEOP_VCI_VAR)
+ {
+ /* TO-do */
+ Assert(ExprEvalVar_hook != NULL);
+ (*ExprEvalVar_hook) (state, op, econtext);
+
+ EEO_NEXT();
+ }
+
+ EEO_CASE(EEOP_VCI_PARAM_EXEC)
+ {
+ /* To-do */
+ Assert(ExprEvalParam_hook != NULL);
+ (*ExprEvalParam_hook) (state, op, econtext);
+
+ EEO_NEXT();
+ }
+
+
EEO_CASE(EEOP_LAST)
{
/* unreachable */
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 255bd795361..a9d212aaec6 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -485,18 +485,11 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
MemoryContextSwitchTo(hashtable->tablecxt);
- /*
- * Copy the first tuple into the table context, and request
- * additionalsize extra bytes before the allocation.
- *
- * The caller can get a pointer to the additional data with
- * TupleHashEntryGetAdditional(), and store arbitrary data there.
- * Placing both the tuple and additional data in the same
- * allocation avoids the need to store an extra pointer in
- * TupleHashEntryData or allocate an additional chunk.
- */
- entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot,
- hashtable->additionalsize);
+ entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+ if (hashtable->additionalsize > 0)
+ entry->additional = palloc0(hashtable->additionalsize);
+ else
+ entry->additional = NULL;
}
}
else
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index f5f9cfbeead..60fd2a4e46a 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -287,6 +287,7 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
break;
case T_CustomScan:
+ case T_CustomPlanMarkPos:
result = (PlanState *) ExecInitCustomScan((CustomScan *) node,
estate, eflags);
break;
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 90726949a87..99f7c600520 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -139,7 +139,7 @@ ExecScanReScan(ScanState *node)
*/
if (IsA(node->ps.plan, ForeignScan))
relids = ((ForeignScan *) node->ps.plan)->fs_base_relids;
- else if (IsA(node->ps.plan, CustomScan))
+ else if (IsA(node->ps.plan, CustomScan) || IsA(node->ps.plan, CustomPlanMarkPos))
relids = ((CustomScan *) node->ps.plan)->custom_relids;
else
elog(ERROR, "unexpected scan node: %d",
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 772c86e70e9..6b8b08ebaf9 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -850,8 +850,10 @@ ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
* than the minimum.
*/
rel = table_open(rte->relid, NoLock);
+/*
Assert(rte->rellockmode == AccessShareLock ||
CheckRelationLockedByMe(rel, rte->rellockmode, false));
+*/
}
else
{
diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c
index ac2196b64c7..2eaef2f2d86 100644
--- a/src/backend/executor/nodeCustom.c
+++ b/src/backend/executor/nodeCustom.c
@@ -49,6 +49,22 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
css->ss.ps.state = estate;
css->ss.ps.ExecProcNode = ExecCustomScan;
+ if (strcmp(cscan->methods->CustomName, "VCI Scan") == 0 ||
+ strcmp(cscan->methods->CustomName, "VCI Sort") == 0 ||
+ strcmp(cscan->methods->CustomName, "VCI Aggregate") == 0 ||
+ strcmp(cscan->methods->CustomName, "VCI HashAggregate") == 0 ||
+ strcmp(cscan->methods->CustomName, "VCI GroupAggregate") == 0 ||
+ strcmp(cscan->methods->CustomName, "VCI Gather") == 0)
+ {
+ /*
+ * The callback of custom-scan provider applies the final
+ * initialization of the custom-scan-state node according to its
+ * logic.
+ */
+ css->methods->BeginCustomScan(css, estate, eflags);
+ return css;
+ }
+
/* create expression context for node */
ExecAssignExprContext(estate, &css->ss.ps);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 46d533b7288..258b60ca2de 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -70,6 +70,7 @@
#include "utils/rel.h"
#include "utils/snapmgr.h"
+List *(*add_should_index_insert_hook) (ResultRelInfo *, TupleTableSlot *, ItemPointer, EState *) = NULL;
typedef struct MTTargetRelLookup
{
@@ -2330,6 +2331,15 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
NULL, NIL,
(updateCxt->updateIndexes == TU_Summarizing));
+ /*
+ * VCI update hook
+ */
+ else if (resultRelInfo->ri_NumIndices > 0 && !updateCxt->updateIndexes)
+ {
+ if (add_should_index_insert_hook)
+ recheckIndexes = add_should_index_insert_hook(resultRelInfo, slot, &slot->tts_tid, context->estate);
+ }
+
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index f7f6fc2da0b..a957e5ce1a4 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -1334,3 +1334,9 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent)
parent->chgParam = bms_add_member(parent->chgParam, paramid);
}
}
+
+Datum
+ExecSubPlanExternal(SubPlanState *node, ExprContext *econtext, bool *isNull)
+{
+ return ExecSubPlan(node, econtext, isNull);
+}
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 890bcb0b0a7..a312d98546b 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -2940,6 +2940,18 @@ llvm_compile_expr(ExprState *state)
LLVMBuildBr(b, opblocks[opno + 1]);
break;
+ case EEOP_VCI_VAR:
+ build_EvalXFunc(b, mod, "VciExprEvalVarHook",
+ v_state, op, v_econtext);
+ LLVMBuildBr(b, opblocks[opno + 1]);
+ break;
+
+ case EEOP_VCI_PARAM_EXEC:
+ build_EvalXFunc(b, mod, "VciExprEvalParamHook",
+ v_state, op, v_econtext);
+ LLVMBuildBr(b, opblocks[opno + 1]);
+ break;
+
case EEOP_LAST:
Assert(false);
break;
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c
index dbe0282e98f..6d6dc3993b9 100644
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -181,4 +181,6 @@ void *referenced_functions[] =
strlen,
varsize_any,
ExecInterpExprStillValid,
+ VciExprEvalParamHook,
+ VciExprEvalVarHook,
};
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..867701f9d36 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -153,6 +153,8 @@ my @extra_tags = qw(
AllocSetContext GenerationContext SlabContext BumpContext
TIDBitmap
WindowObjectData
+ CustomPlanMarkPos
+ SmcAllocSetContext
);
# This is a regular node, but we skip parsing it from its header file
@@ -679,10 +681,21 @@ foreach my $n (@node_types)
my $struct_no_equal = (elem $n, @no_equal);
next if $struct_no_copy && $struct_no_equal;
- print $cfs "\t\tcase T_${n}:\n"
- . "\t\t\tretval = _copy${n}(from);\n"
- . "\t\t\tbreak;\n"
- unless $struct_no_copy;
+ if($n eq 'CustomScan')
+ {
+ print $cfs "\t\tcase T_${n}:\n"
+ . "\t\tcase T_CustomPlanMarkPos:\n"
+ . "\t\t\tretval = _copy${n}(from);\n"
+ . "\t\t\tbreak;\n"
+ unless $struct_no_copy;
+ }
+ else
+ {
+ print $cfs "\t\tcase T_${n}:\n"
+ . "\t\t\tretval = _copy${n}(from);\n"
+ . "\t\t\tbreak;\n"
+ unless $struct_no_copy;
+ }
print $efs "\t\tcase T_${n}:\n"
. "\t\t\tretval = _equal${n}(a, b);\n"
@@ -691,13 +704,35 @@ foreach my $n (@node_types)
next if elem $n, @custom_copy_equal;
- print $cff "
-static $n *
-_copy${n}(const $n *from)
-{
-\t${n} *newnode = makeNode($n);
-
-" unless $struct_no_copy;
+ if ($n eq 'CustomScan')
+ {
+ print $cff "static $n *\n"
+ . "_copy${n}(const $n *from)\n"
+ . "{\n"
+ . "\tCustomScan *newnode;\n\n"
+ . "\tif (strcmp(from->methods->CustomName, \"VCI Scan\") == 0 ||
+ strcmp(from->methods->CustomName, \"VCI Sort\") == 0 ||
+ strcmp(from->methods->CustomName, \"VCI Aggregate\") == 0 ||
+ strcmp(from->methods->CustomName, \"VCI HashAggregate\") == 0 ||
+ strcmp(from->methods->CustomName, \"VCI GroupAggregate\") == 0 ||
+ strcmp(from->methods->CustomName, \"VCI Gather\") == 0)\n"
+ . "\t{\n"
+ . "\t\tnewnode = from->methods->CopyCustomPlan(from);\n"
+ . "\t}\n"
+ . "\telse\n"
+ . "\t\tnewnode = makeNode(CustomScan);\n\n"
+ . "\t((Node *) newnode)->type = nodeTag(from);\n\n" unless $struct_no_copy;
+ }
+ else
+ {
+ print $cff
+ "static $n *\n"
+ . "_copy${n}(const $n *from)\n"
+ . "{\n"
+ ."\t${n} *newnode = makeNode($n);\n\n"
+
+ unless $struct_no_copy;
+ }
print $eff "
static bool
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index ec5946c5777..219f344da02 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -25,11 +25,11 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+ExecInitParam_hook_type ExecInitParam_hook;
static void paramlist_parser_setup(ParseState *pstate, void *arg);
static Node *paramlist_param_ref(ParseState *pstate, ParamRef *pref);
-
/*
* Allocate and initialize a new ParamListInfo structure.
*
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 905250b3325..bc956a25904 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -47,6 +47,7 @@
#include "port/pg_bitutils.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
+#include "utils/guc.h"
/* Bitmask flags for pushdown_safety_info.unsafeFlags */
@@ -768,6 +769,8 @@ static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
+ char *isVCIEnabled;
+ bool is_partition = false;
/*
* We don't support pushing join clauses into the quals of a seqscan, but
@@ -793,6 +796,20 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/* If appropriate, consider parallel sequential scan */
if (rel->consider_parallel && required_outer == NULL)
create_plain_partial_paths(root, rel);
+ /****
+ * Putting the isRelHasVCIIndex after the create_plain_partial_paths because
+ * want to enable oss parallelscan working on VCI tables but disable other
+ * gather plan like parallel_loop,parallel_agg working on VCI tables.
+ * Don't do this for partitioned tables or partitions as parallelscans on partitioned
+ * tables require gather plans
+ */
+ if (isRelHasVCIIndex(rte->relid, &is_partition) && (bms_membership(root->all_baserels) == BMS_SINGLETON) &&
+ !is_partition)
+ {
+ isVCIEnabled = GetConfigOptionByName("vci.enable", NULL, false);
+ if (strcmp(isVCIEnabled, "on") == 0)
+ rel->consider_parallel = false;
+ }
/* Consider index scans */
create_index_paths(root, rel);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..269d80de9b6 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -175,8 +175,8 @@ static Node *fix_indexqual_clause(PlannerInfo *root,
static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
static List *order_qual_clauses(PlannerInfo *root, List *clauses);
+void copy_plan_costsize(Plan *dest, Plan *src);
static void copy_generic_path_info(Plan *dest, Path *src);
-static void copy_plan_costsize(Plan *dest, Plan *src);
static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
double limit_tuples);
static void label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
@@ -5460,7 +5460,7 @@ copy_generic_path_info(Plan *dest, Path *src)
* Copy cost and size info from a lower plan node to an inserted node.
* (Most callers alter the info after copying it.)
*/
-static void
+void
copy_plan_costsize(Plan *dest, Plan *src)
{
dest->disabled_nodes = src->disabled_nodes;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 16756152b71..2b5e9f8d632 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -139,6 +139,9 @@ int Log_autovacuum_min_duration = 600000;
#define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
#define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
+/* Flags to tell if we are in an autovacuum process */
+static bool vci_am_autovacuum_launcher = false;
+
/*
* Variables to save the cost-related storage parameters for the current
* relation being vacuumed by this autovacuum worker. Using these, we can
@@ -361,6 +364,20 @@ static void check_av_worker_gucs(void);
* AUTOVACUUM LAUNCHER CODE
********************************************************************/
+/*
+ * We need this set from the outside, before InitProcess is called
+ */
+void
+vci_AutovacuumLauncherIAm(void)
+{
+ vci_am_autovacuum_launcher = true;
+}
+void
+vci_AutovacuumLauncherNotIAm(void)
+{
+ vci_am_autovacuum_launcher = false;
+}
+
/*
* Main entry point for the autovacuum launcher process.
*/
@@ -369,6 +386,8 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len)
{
sigjmp_buf local_sigjmp_buf;
+ vci_am_autovacuum_launcher = true;
+
Assert(startup_data_len == 0);
/* Release postmaster's working memory context */
@@ -3315,6 +3334,12 @@ autovac_init(void)
check_av_worker_gucs();
}
+bool
+vci_IsAutoVacuumLauncherProcess(void)
+{
+ return vci_am_autovacuum_launcher;
+}
+
/*
* AutoVacuumShmemSize
* Compute space needed for autovacuum-related shared memory
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 116ddf7b835..51e57eb8a18 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -356,6 +356,22 @@ BackgroundWorkerStateChange(bool allow_new_workers)
return;
}
+ /*
+ * Set shmem slot number, and initialize cancel flags.
+ */
+ rw->rw_worker.bgw_shmem_slot = slotno;
+
+ rw->rw_worker.bgw_cancel_databaseId = InvalidOid;
+ rw->rw_worker.bgw_cancel_flags = BGWORKER_CANCEL_NOACCEPT;
+
+ /*
+ * Update the contents in the shared memory also, these are used in
+ * EXEC_BACKEND (win32) case
+ */
+ slot->worker.bgw_shmem_slot = slotno;
+ slot->worker.bgw_cancel_databaseId = InvalidOid;
+ slot->worker.bgw_cancel_flags = BGWORKER_CANCEL_NOACCEPT;
+
/*
* Copy strings in a paranoid way. If shared memory is corrupted, the
* source data might not even be NUL-terminated.
@@ -1395,3 +1411,96 @@ GetBackgroundWorkerTypeByPid(pid_t pid)
return result;
}
+
+/*
+ * Accept background worker cancel.
+ * Set cancel flags and databaseId.
+ */
+void
+AcceptBackgroundWorkerCancel(Oid databaseId, int cancel_flags)
+{
+ int slotno;
+ BackgroundWorkerSlot *slot;
+
+ /* Get shmem slot number from BGW entry. */
+ Assert(MyBgworkerEntry);
+ slotno = MyBgworkerEntry->bgw_shmem_slot;
+
+ /* Get shmem slot address. */
+ Assert(slotno < BackgroundWorkerData->total_slots);
+ slot = &BackgroundWorkerData->slot[slotno];
+
+ /* Set cancel flags and databaseId to sgmem slot. */
+ /* 1st, set databaseId. */
+ slot->worker.bgw_cancel_databaseId = databaseId;
+ /* 2nd, set cancel flags. */
+ slot->worker.bgw_cancel_flags = cancel_flags;
+
+ /*
+ * This operation doesn't need LOCK, because 'bgw_cancel_flags' is 32bit
+ * value.
+ */
+}
+
+/*
+ * Cancel background workers.
+ */
+void
+CancelBackgroundWorkers(Oid databaseId, int cancel_flags)
+{
+ int slotno;
+ bool signal_postmaster = false;
+
+ LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
+
+ for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
+ {
+ BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
+
+ /* Check worker slot. */
+ if (slot->in_use)
+ {
+ /* 1st, check cancel flags. */
+ if (slot->worker.bgw_cancel_flags & cancel_flags)
+ {
+ /* 2nd, compare databaseId. */
+ if (slot->worker.bgw_cancel_databaseId == databaseId)
+ {
+ /*
+ * Set terminate flag in shared memory, unless slot has
+ * been reused.
+ */
+ slot->terminate = true;
+ signal_postmaster = true;
+ }
+ }
+ }
+ }
+
+ LWLockRelease(BackgroundWorkerLock);
+
+ /* Make sure the postmaster notices the change to shared memory. */
+ if (signal_postmaster)
+ SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
+}
+
+/*
+ * Scan background workers list via scanner.
+ */
+void
+ScanBackgroundWorkers(void (*scanner) (BackgroundWorker *worker, pid_t pid, void *data), void *data)
+{
+ int slotno;
+
+ LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
+
+ for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
+ {
+ BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
+
+ if (slot->in_use)
+ scanner(&slot->worker, slot->pid, data);
+ }
+
+ LWLockRelease(BackgroundWorkerLock);
+}
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e5b945a9ee3..29f1179cc49 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -65,6 +65,8 @@
#include "utils/rel.h"
#include "utils/snapmgr.h"
+#include "postmaster/bgworker.h"
+
#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
/* Our shared memory area */
@@ -1977,7 +1979,8 @@ GlobalVisHorizonKindForRel(Relation rel)
Assert(!rel ||
rel->rd_rel->relkind == RELKIND_RELATION ||
rel->rd_rel->relkind == RELKIND_MATVIEW ||
- rel->rd_rel->relkind == RELKIND_TOASTVALUE);
+ rel->rd_rel->relkind == RELKIND_TOASTVALUE ||
+ rel->rd_rel->relkind == RELKIND_INDEX);
if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
return VISHORIZON_SHARED;
@@ -3806,6 +3809,11 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
for (index = 0; index < nautovacs; index++)
(void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
+ /*
+ * Cancel background workers by admin commands.
+ */
+ CancelBackgroundWorkers(databaseId, BGWORKER_CANCEL_ADMIN_COMMANDS);
+
/* sleep, then try again */
pg_usleep(100 * 1000L); /* 100ms */
}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..7c4d06e645e 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -39,6 +39,8 @@
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pg_trace.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
@@ -810,8 +812,18 @@ LockAcquire(const LOCKTAG *locktag,
bool sessionLock,
bool dontWait)
{
- return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait,
- true, NULL, false);
+ /*
+ * Don't lock for VCI parallel workers and other type of workers should go
+ * in normal flow, In case if there is any change in background worker
+ * name for VCI parallel workers, the following code also needs an update.
+ * FIXME: Try to use the community parallelism code, so that we don't need
+ * our own VCI parallel infrastructure.
+ */
+ if (AmBackgroundWorkerProcess() && strstr(MyBgworkerEntry->bgw_name, "backend="))
+ return LOCKACQUIRE_OK;
+ else
+ return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait,
+ true, NULL, false);
}
/*
@@ -2107,6 +2119,16 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
*/
if (!locallock || locallock->nLocks <= 0)
{
+ /*
+ * Don't lock for VCI parallel workers and other type of workers
+ * should go in normal flow, In case if there is any change in
+ * background worker name for VCI parallel workers, the following code
+ * also needs an update. FIXME: Try to use the community parallelism
+ * code, so that we don't need our own VCI parallel infrastructure.
+ */
+ if (AmBackgroundWorkerProcess() && strstr(MyBgworkerEntry->bgw_name, "backend="))
+ return true;
+
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
return false;
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 5148ef982e3..01843639049 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -245,6 +245,8 @@ static inline void LWLockReportWaitStart(LWLock *lock);
static inline void LWLockReportWaitEnd(void);
static const char *GetLWTrancheName(uint16 trancheId);
+LWLockKind Vci_lwlock_kind = -1;
+
#define T_NAME(lock) \
GetLWTrancheName((lock)->tranche)
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 347089b7626..6e9b827d9e5 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -24,6 +24,7 @@
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/int128.h"
+#include "datatype/timestamp.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
@@ -73,19 +74,6 @@ typedef struct
pg_tz *attimezone;
} generate_series_timestamptz_fctx;
-/*
- * The transition datatype for interval aggregates is declared as internal.
- * It's a pointer to an IntervalAggState allocated in the aggregate context.
- */
-typedef struct IntervalAggState
-{
- int64 N; /* count of finite intervals processed */
- Interval sumX; /* sum of finite intervals processed */
- /* These counts are *not* included in N! Use IA_TOTAL_COUNT() as needed */
- int64 pInfcount; /* count of +infinity intervals */
- int64 nInfcount; /* count of -infinity intervals */
-} IntervalAggState;
-
#define IA_TOTAL_COUNT(ia) \
((ia)->N + (ia)->pInfcount + (ia)->nInfcount)
@@ -3502,7 +3490,7 @@ interval_larger(PG_FUNCTION_ARGS)
PG_RETURN_INTERVAL_P(result);
}
-static void
+void
finite_interval_pl(const Interval *span1, const Interval *span2, Interval *result)
{
Assert(!INTERVAL_NOT_FINITE(span1));
@@ -3981,7 +3969,7 @@ in_range_interval_interval(PG_FUNCTION_ARGS)
* context. When the state data needs to be allocated in the current memory
* context, we use palloc0 directly e.g. interval_avg_deserialize().
*/
-static IntervalAggState *
+IntervalAggState *
makeIntervalAggState(FunctionCallInfo fcinfo)
{
IntervalAggState *state;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 68ff67de549..5cc8b69d9d1 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -92,6 +92,8 @@
#define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
+bool (*add_skip_vci_index_hook) (Relation rel) = NULL;
+
/*
* Whether to bother checking if relation cache memory needs to be freed
* eagerly. See also RelationBuildDesc() and pg_config_manual.h.
@@ -5386,6 +5388,16 @@ restart:
indexDesc = index_open(indexOid, AccessShareLock);
+ if (add_skip_vci_index_hook)
+ {
+ if (add_skip_vci_index_hook(indexDesc))
+ {
+ /* Skip if Index is VCI index */
+ index_close(indexDesc, AccessShareLock);
+ continue;
+ }
+ }
+
/*
* Extract index expressions and index predicate. Note: Don't use
* RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
@@ -6964,6 +6976,60 @@ unlink_initfile(const char *initfilename, int elevel)
}
}
+bool
+isRelHasVCIIndex(Oid relid, bool *is_partition)
+{
+ ListCell *l;
+ Relation relation;
+
+ bool hasVCI = false;
+
+ *is_partition = false;
+ relation = table_open(relid, NoLock);
+
+ if ((relation->rd_rel->relispartition == true) || relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ *is_partition = true;
+
+ if (relation->rd_rel->relhasindex)
+ {
+ List *indexoidlist;
+
+ indexoidlist = RelationGetIndexList(relation);
+
+ foreach(l, indexoidlist)
+ {
+ Oid relam;
+ Oid indexoid = lfirst_oid(l);
+ Relation indexRelation;
+ Form_pg_am aform;
+ HeapTuple amtuple;
+
+ indexRelation = index_open(indexoid, NoLock);
+ relam = indexRelation->rd_rel->relam;
+
+ amtuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relam));
+ if (!HeapTupleIsValid(amtuple))
+ elog(ERROR, "cache lookup failed for access method %u",
+ relam);
+ aform = (Form_pg_am) GETSTRUCT(amtuple);
+
+ if (strcmp(NameStr(aform->amname), "vci") == 0)
+ {
+ hasVCI = true;
+ }
+
+ ReleaseSysCache(amtuple);
+ index_close(indexRelation, NoLock);
+
+ if (hasVCI)
+ break;
+ }
+ }
+
+ table_close(relation, NoLock);
+ return hasVCI;
+}
+
/*
* ResourceOwner callbacks
*/
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 28f09a27001..96481bec795 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -836,7 +836,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
before_shmem_exit(ShutdownPostgres, 0);
/* The autovacuum launcher is done here */
- if (AmAutoVacuumLauncherProcess())
+ if (vci_IsAutoVacuumLauncherProcess() || AmAutoVacuumLauncherProcess())
{
/* fill in the remainder of this entry in the PgBackendStatus array */
pgstat_bestart_final();
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 7d28ca706eb..5000bd0667e 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -48,7 +48,7 @@ static Size BogusGetChunkSpace(void *pointer);
[id].get_chunk_context = BogusGetChunkContext, \
[id].get_chunk_space = BogusGetChunkSpace
-static const MemoryContextMethods mcxt_methods[] = {
+static MemoryContextMethods mcxt_methods[] = {
/* aset.c */
[MCTX_ASET_ID].alloc = AllocSetAlloc,
[MCTX_ASET_ID].free_p = AllocSetFree,
@@ -348,7 +348,29 @@ BogusGetChunkSpace(void *pointer)
/*****************************************************************************
* EXPORTED ROUTINES *
*****************************************************************************/
+/*
+ * MemoryContextRegisterMethods
+ * Register methods to mcxt_methods with specified ID.
+ *
+ * This overwrites existing mcxt_methods entry and set old methods
+ * to old_methods. If old methods are not need, specify NULL.
+ * Currently, call for MCTX_1_RESERVED_GLIBC_ID or MCTX_2_RESERVED_GLIBC_ID is allowed.
+ */
+void
+MemoryContextRegisterMethods(int method_id,
+ const MemoryContextMethods *new_methods,
+ MemoryContextMethods *old_methods)
+{
+ Assert(0 <= method_id && method_id < (1 << MEMORY_CONTEXT_METHODID_BITS));
+ if (method_id != MCTX_1_RESERVED_GLIBC_ID && method_id != MCTX_2_RESERVED_GLIBC_ID)
+ elog(FATAL, "memory context method entry for \'%d\' cannot be overwritten", method_id);
+
+ if (old_methods)
+ *old_methods = mcxt_methods[method_id];
+
+ mcxt_methods[method_id] = *new_methods;
+}
/*
* MemoryContextInit
@@ -1172,11 +1194,12 @@ MemoryContextCheck(MemoryContext context)
* so this can contain Assert but not elog/ereport.
*/
void
-MemoryContextCreate(MemoryContext node,
- NodeTag tag,
- MemoryContextMethodID method_id,
- MemoryContext parent,
- const char *name)
+MemoryContextCreateWithExLock(MemoryContext node,
+ NodeTag tag,
+ MemoryContextMethodID method_id,
+ MemoryContext parent,
+ const char *name,
+ LWLock *lock)
{
/* Creating new memory contexts is not allowed in a critical section */
Assert(CritSectionCount == 0);
@@ -1200,12 +1223,18 @@ MemoryContextCreate(MemoryContext node,
/* OK to link node into context tree */
if (parent)
{
+ if (lock)
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+
node->nextchild = parent->firstchild;
if (parent->firstchild != NULL)
parent->firstchild->prevchild = node;
parent->firstchild = node;
/* inherit allowInCritSection flag from parent */
node->allowInCritSection = parent->allowInCritSection;
+
+ if (lock)
+ LWLockRelease(lock);
}
else
{
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 65ab83fff8b..0f0747b5eea 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -109,6 +109,7 @@
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
#include "utils/tuplesort.h"
+#include "utils/tuplesortstate.h"
/*
* Initial size of memtuples array. We're trying to select this size so that
@@ -127,40 +128,6 @@ bool trace_sort = false;
bool optimize_bounded_sort = true;
#endif
-
-/*
- * During merge, we use a pre-allocated set of fixed-size slots to hold
- * tuples. To avoid palloc/pfree overhead.
- *
- * Merge doesn't require a lot of memory, so we can afford to waste some,
- * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the
- * palloc() overhead is not significant anymore.
- *
- * 'nextfree' is valid when this chunk is in the free list. When in use, the
- * slot holds a tuple.
- */
-#define SLAB_SLOT_SIZE 1024
-
-typedef union SlabSlot
-{
- union SlabSlot *nextfree;
- char buffer[SLAB_SLOT_SIZE];
-} SlabSlot;
-
-/*
- * Possible states of a Tuplesort object. These denote the states that
- * persist between calls of Tuplesort routines.
- */
-typedef enum
-{
- TSS_INITIAL, /* Loading tuples; still within memory limit */
- TSS_BOUNDED, /* Loading tuples into bounded-size heap */
- TSS_BUILDRUNS, /* Loading tuples; writing to tape */
- TSS_SORTEDINMEM, /* Sort completed entirely in memory */
- TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */
- TSS_FINALMERGE, /* Performing final merge on-the-fly */
-} TupSortStatus;
-
/*
* Parameters for calculation of number of tapes to use --- see inittapes()
* and tuplesort_merge_order().
@@ -178,232 +145,6 @@ typedef enum
#define TAPE_BUFFER_OVERHEAD BLCKSZ
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
-
-/*
- * Private state of a Tuplesort operation.
- */
-struct Tuplesortstate
-{
- TuplesortPublic base;
- TupSortStatus status; /* enumerated value as shown above */
- bool bounded; /* did caller specify a maximum number of
- * tuples to return? */
- bool boundUsed; /* true if we made use of a bounded heap */
- int bound; /* if bounded, the maximum number of tuples */
- int64 tupleMem; /* memory consumed by individual tuples.
- * storing this separately from what we track
- * in availMem allows us to subtract the
- * memory consumed by all tuples when dumping
- * tuples to tape */
- int64 availMem; /* remaining memory available, in bytes */
- int64 allowedMem; /* total memory allowed, in bytes */
- int maxTapes; /* max number of input tapes to merge in each
- * pass */
- int64 maxSpace; /* maximum amount of space occupied among sort
- * of groups, either in-memory or on-disk */
- bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk
- * space, false when its value for in-memory
- * space */
- TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */
- LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */
-
- /*
- * This array holds the tuples now in sort memory. If we are in state
- * INITIAL, the tuples are in no particular order; if we are in state
- * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS
- * and FINALMERGE, the tuples are organized in "heap" order per Algorithm
- * H. In state SORTEDONTAPE, the array is not used.
- */
- SortTuple *memtuples; /* array of SortTuple structs */
- int memtupcount; /* number of tuples currently present */
- int memtupsize; /* allocated length of memtuples array */
- bool growmemtuples; /* memtuples' growth still underway? */
-
- /*
- * Memory for tuples is sometimes allocated using a simple slab allocator,
- * rather than with palloc(). Currently, we switch to slab allocation
- * when we start merging. Merging only needs to keep a small, fixed
- * number of tuples in memory at any time, so we can avoid the
- * palloc/pfree overhead by recycling a fixed number of fixed-size slots
- * to hold the tuples.
- *
- * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE
- * slots. The allocation is sized to have one slot per tape, plus one
- * additional slot. We need that many slots to hold all the tuples kept
- * in the heap during merge, plus the one we have last returned from the
- * sort, with tuplesort_gettuple.
- *
- * Initially, all the slots are kept in a linked list of free slots. When
- * a tuple is read from a tape, it is put to the next available slot, if
- * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd
- * instead.
- *
- * When we're done processing a tuple, we return the slot back to the free
- * list, or pfree() if it was palloc'd. We know that a tuple was
- * allocated from the slab, if its pointer value is between
- * slabMemoryBegin and -End.
- *
- * When the slab allocator is used, the USEMEM/LACKMEM mechanism of
- * tracking memory usage is not used.
- */
- bool slabAllocatorUsed;
-
- char *slabMemoryBegin; /* beginning of slab memory arena */
- char *slabMemoryEnd; /* end of slab memory arena */
- SlabSlot *slabFreeHead; /* head of free list */
-
- /* Memory used for input and output tape buffers. */
- size_t tape_buffer_mem;
-
- /*
- * When we return a tuple to the caller in tuplesort_gettuple_XXX, that
- * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE
- * modes), we remember the tuple in 'lastReturnedTuple', so that we can
- * recycle the memory on next gettuple call.
- */
- void *lastReturnedTuple;
-
- /*
- * While building initial runs, this is the current output run number.
- * Afterwards, it is the number of initial runs we made.
- */
- int currentRun;
-
- /*
- * Logical tapes, for merging.
- *
- * The initial runs are written in the output tapes. In each merge pass,
- * the output tapes of the previous pass become the input tapes, and new
- * output tapes are created as needed. When nInputTapes equals
- * nInputRuns, there is only one merge pass left.
- */
- LogicalTape **inputTapes;
- int nInputTapes;
- int nInputRuns;
-
- LogicalTape **outputTapes;
- int nOutputTapes;
- int nOutputRuns;
-
- LogicalTape *destTape; /* current output tape */
-
- /*
- * These variables are used after completion of sorting to keep track of
- * the next tuple to return. (In the tape case, the tape's current read
- * position is also critical state.)
- */
- LogicalTape *result_tape; /* actual tape of finished output */
- int current; /* array index (only used if SORTEDINMEM) */
- bool eof_reached; /* reached EOF (needed for cursors) */
-
- /* markpos_xxx holds marked position for mark and restore */
- int64 markpos_block; /* tape block# (only used if SORTEDONTAPE) */
- int markpos_offset; /* saved "current", or offset in tape block */
- bool markpos_eof; /* saved "eof_reached" */
-
- /*
- * These variables are used during parallel sorting.
- *
- * worker is our worker identifier. Follows the general convention that
- * -1 value relates to a leader tuplesort, and values >= 0 worker
- * tuplesorts. (-1 can also be a serial tuplesort.)
- *
- * shared is mutable shared memory state, which is used to coordinate
- * parallel sorts.
- *
- * nParticipants is the number of worker Tuplesortstates known by the
- * leader to have actually been launched, which implies that they must
- * finish a run that the leader needs to merge. Typically includes a
- * worker state held by the leader process itself. Set in the leader
- * Tuplesortstate only.
- */
- int worker;
- Sharedsort *shared;
- int nParticipants;
-
- /*
- * Additional state for managing "abbreviated key" sortsupport routines
- * (which currently may be used by all cases except the hash index case).
- * Tracks the intervals at which the optimization's effectiveness is
- * tested.
- */
- int64 abbrevNext; /* Tuple # at which to next check
- * applicability */
-
- /*
- * Resource snapshot for time of sort start.
- */
- PGRUsage ru_start;
-};
-
-/*
- * Private mutable state of tuplesort-parallel-operation. This is allocated
- * in shared memory.
- */
-struct Sharedsort
-{
- /* mutex protects all fields prior to tapes */
- slock_t mutex;
-
- /*
- * currentWorker generates ordinal identifier numbers for parallel sort
- * workers. These start from 0, and are always gapless.
- *
- * Workers increment workersFinished to indicate having finished. If this
- * is equal to state.nParticipants within the leader, leader is ready to
- * merge worker runs.
- */
- int currentWorker;
- int workersFinished;
-
- /* Temporary file space */
- SharedFileSet fileset;
-
- /* Size of tapes flexible array */
- int nTapes;
-
- /*
- * Tapes array used by workers to report back information needed by the
- * leader to concatenate all worker tapes into one for merging
- */
- TapeShare tapes[FLEXIBLE_ARRAY_MEMBER];
-};
-
-/*
- * Is the given tuple allocated from the slab memory arena?
- */
-#define IS_SLAB_SLOT(state, tuple) \
- ((char *) (tuple) >= (state)->slabMemoryBegin && \
- (char *) (tuple) < (state)->slabMemoryEnd)
-
-/*
- * Return the given tuple to the slab memory free list, or free it
- * if it was palloc'd.
- */
-#define RELEASE_SLAB_SLOT(state, tuple) \
- do { \
- SlabSlot *buf = (SlabSlot *) tuple; \
- \
- if (IS_SLAB_SLOT((state), buf)) \
- { \
- buf->nextfree = (state)->slabFreeHead; \
- (state)->slabFreeHead = buf; \
- } else \
- pfree(buf); \
- } while(0)
-
-#define REMOVEABBREV(state,stup,count) ((*(state)->base.removeabbrev) (state, stup, count))
-#define COMPARETUP(state,a,b) ((*(state)->base.comparetup) (a, b, state))
-#define WRITETUP(state,tape,stup) ((*(state)->base.writetup) (state, tape, stup))
-#define READTUP(state,stup,tape,len) ((*(state)->base.readtup) (state, stup, tape, len))
-#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)
-#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed)
-#define USEMEM(state,amt) ((state)->availMem -= (amt))
-#define FREEMEM(state,amt) ((state)->availMem += (amt))
-#define SERIAL(state) ((state)->shared == NULL)
-#define WORKER(state) ((state)->shared && (state)->worker != -1)
-#define LEADER(state) ((state)->shared && (state)->worker == -1)
-
/*
* NOTES about on-tape representation of tuples:
*
diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c
index 1e815571570..35995edad10 100644
--- a/src/backend/utils/time/combocid.c
+++ b/src/backend/utils/time/combocid.c
@@ -362,3 +362,49 @@ RestoreComboCIDState(char *comboCIDstate)
elog(ERROR, "unexpected command ID while restoring combo CIDs");
}
}
+
+/**
+ * Function for saving combocids.
+ * @return the start address for ComboCidKeyData list
+ */
+struct ComboCidKeyData *
+pef_save_combocids(void)
+{
+ struct ComboCidKeyData *save_addr;
+
+ save_addr = palloc(sizeof(ComboCidKeyData) * usedComboCids);
+ memcpy(save_addr, comboCids, sizeof(ComboCidKeyData) * usedComboCids);
+ return save_addr;
+}
+
+/**
+ * Function for getting the number of elements of ComboCidKeyData list
+ * @return the number of elements of ComboCidKeyData list
+ */
+Size
+pef_get_num_combocids(void)
+{
+ return usedComboCids;
+}
+
+/**
+ * Function for restoring combocids.
+ * @param[in] addr start address of the ComboCidKeyData list
+ * @param[in] the number of the elements of ComboCidKeyData list
+ */
+void
+pef_restore_combocids(struct ComboCidKeyData *addr, Size num)
+{
+ int i;
+ ComboCidKeyData *ccids;
+
+ ccids = (ComboCidKeyData *) addr;
+ for (i = 0; i < num; i++)
+ {
+ /*
+ * GetComboCommandId() register the ccidkey when the key does not
+ * exist in hash table
+ */
+ GetComboCommandId(ccids[i].cmin, ccids[i].cmax);
+ }
+}
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index aa1589e3331..bad40767864 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -359,6 +359,7 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
AssignDumpId(&attachinfo->dobj);
attachinfo->dobj.name = pg_strdup(tblinfo[i].dobj.name);
attachinfo->dobj.namespace = tblinfo[i].dobj.namespace;
+ attachinfo->dobj.isvciview = false;
attachinfo->parentTbl = tblinfo[i].parents[0];
attachinfo->partitionTbl = &tblinfo[i];
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index e2e7975b34e..0a085a88ab7 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -7337,6 +7337,8 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].dummy_view = false; /* might get set during sort */
tblinfo[i].postponed_def = false; /* might get set during sort */
+ tblinfo[i].dobj.isvciview = false;
+
/* Tables have data */
tblinfo[i].dobj.components |= DUMP_COMPONENT_DATA;
@@ -8388,6 +8390,17 @@ getRules(Archive *fout)
ruleinfo[i].ev_enabled = *(PQgetvalue(res, i, i_ev_enabled));
if (ruleinfo[i].ruletable)
{
+ /*
+ * isvciview is set to true when the table is VCI VIEW. The
+ * judgement is done by the logic in vci_isVciRelation function
+ */
+ if ((ruleinfo[i].ruletable->relkind == RELKIND_MATVIEW ||
+ ruleinfo[i].ruletable->relkind == RELKIND_VIEW) &&
+ !strcmp(ruleinfo[i].ruletable->dobj.name, ruleinfo[i].dobj.name))
+ {
+ ruleinfo[i].ruletable->dobj.isvciview = true;
+ }
+
/*
* If the table is a view or materialized view, force its ON
* SELECT rule to be sorted before the view itself --- this
@@ -16752,8 +16765,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
int j,
k;
- /* We had better have loaded per-column details about this table */
- Assert(tbinfo->interesting);
+ /* Do not dump VCI VIEW relations */
+ if (tbinfo->dobj.isvciview)
+ return;
qrelname = pg_strdup(fmtId(tbinfo->dobj.name));
qualrelname = pg_strdup(fmtQualifiedDumpable(tbinfo));
@@ -19743,12 +19757,27 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
case DO_FDW:
case DO_FOREIGN_SERVER:
case DO_TRANSFORM:
+
+ /*
+ * Do not add dependency for VCI VIEW to suppress dependency
+ * loop message
+ */
+ if (dobj->isvciview)
+ break;
/* Pre-data objects: must come before the pre-data boundary */
addObjectDependency(preDataBound, dobj->dumpId);
break;
+ case DO_LARGE_OBJECT:
+
+ /*
+ * Do not add dependency for VCI VIEW to suppress dependency
+ * loop message
+ */
+ if (dobj->isvciview)
+ break;
+ /* fallthrough */
case DO_TABLE_DATA:
case DO_SEQUENCE_SET:
- case DO_LARGE_OBJECT:
case DO_LARGE_OBJECT_DATA:
/* Data objects: must come between the boundaries */
addObjectDependency(dobj, preDataBound->dumpId);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 7417eab6aef..3eb3172b09f 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -159,6 +159,7 @@ typedef struct _dumpableObject
DumpId *dependencies; /* dumpIds of objects this one depends on */
int nDeps; /* number of valid dependencies */
int allocDeps; /* allocated size of dependencies[] */
+ bool isvciview; /* this table is vci view */
} DumpableObject;
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index e48fe434cd3..27870b7bc1b 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -395,6 +395,10 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
OffsetNumber *dead, int ndead,
OffsetNumber *unused, int nunused);
+extern PGDLLIMPORT void (*add_index_delete_hook) (Relation indexRelation, ItemPointer heap_tid, TransactionId xmin);
+extern PGDLLIMPORT bool (*add_snapshot_satisfies_hook) (HeapTuple htup, Snapshot snapshot, Buffer buffer);
+extern PGDLLIMPORT bool (*add_skip_vacuum_hook) (Relation rel);
+
/* in heap/vacuumlazy.c */
struct VacuumParams;
extern void heap_vacuum_rel(Relation rel,
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index dfbb4c85460..3891cbd98b9 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -51,8 +51,9 @@ typedef enum relopt_kind
RELOPT_KIND_VIEW = (1 << 9),
RELOPT_KIND_BRIN = (1 << 10),
RELOPT_KIND_PARTITIONED = (1 << 11),
+ RELOPT_KIND_VCI = (1 << 12),
/* if you add a new kind, make sure you update "last_default" too */
- RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED,
+ RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_VCI,
/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
RELOPT_KIND_MAX = (1 << 30)
} relopt_kind;
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index b2bc10ee041..20c28d8a554 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -533,4 +533,11 @@ extern void EnterParallelMode(void);
extern void ExitParallelMode(void);
extern bool IsInParallelMode(void);
+#ifndef FRONTEND
+struct TransactionStateData *pef_save_transaction_state(void);
+MemoryContext pef_restore_transaction_state(void *src);
+void pef_cleanup_transaction_state(void);
+void pef_restore_start_timestamp(TimestampTz xactSt, TimestampTz stmtSt);
+#endif
+
#endif /* XACT_H */
diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h
index 91446303024..01da29ba8b8 100644
--- a/src/include/access/xlogrecovery.h
+++ b/src/include/access/xlogrecovery.h
@@ -46,6 +46,7 @@ typedef enum RecoveryPauseState
RECOVERY_NOT_PAUSED, /* pause not requested */
RECOVERY_PAUSE_REQUESTED, /* pause requested, but not yet paused */
RECOVERY_PAUSED, /* recovery is paused */
+ RECOVERY_VCI_PAUSE_REQUESTED, /* pause requested for VCI query */
} RecoveryPauseState;
/* User-settable GUC parameters */
@@ -151,6 +152,8 @@ extern void WakeupRecovery(void);
extern void StartupRequestWalReceiverRestart(void);
extern void XLogRequestWalReceiverReply(void);
+extern void SetVciRecoveryPause(void);
+
extern void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue);
extern void xlog_outdesc(StringInfo buf, XLogReaderState *record);
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 0ea7ccf5243..768057a395f 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -225,4 +225,7 @@ extern void shdepDropOwned(List *roleids, DropBehavior behavior);
extern void shdepReassignOwned(List *roleids, Oid newrole);
+/* vci index original hook*/
+extern PGDLLIMPORT bool (*add_drop_relation_hook) (const ObjectAddress *object, int flags);
+
#endif /* DEPENDENCY_H */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 4daa8bef5ee..2b87e629fb5 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -175,6 +175,8 @@ extern void RestoreReindexState(const void *reindexstate);
extern void IndexSetParentIndex(Relation partitionIdx, Oid parentOid);
+extern PGDLLIMPORT bool (*add_reindex_index_hook) (Relation);
+extern PGDLLIMPORT HeapTuple IndexHeapTuple;
/*
* itemptr_encode - Encode ItemPointer as int64/int8
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 03c5b3d73e5..ef09e6c5f3d 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -83,4 +83,11 @@ extern void ExplainQueryText(struct ExplainState *es, QueryDesc *queryDesc);
extern void ExplainQueryParameters(struct ExplainState *es,
ParamListInfo params, int maxlen);
+extern void ExplainPropertySortGroupKeys(PlanState *planstate, const char *qlabel,
+ int nkeys, AttrNumber *keycols,
+ List *ancestors, struct ExplainState *es);
+extern void ExplainPropertyQual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, struct ExplainState *es);
+
#endif /* EXPLAIN_H */
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index 6832470d387..0f44744e5fe 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -107,4 +107,10 @@ extern void RangeVarCallbackOwnsRelation(const RangeVar *relation,
extern bool PartConstraintImpliedByRelConstraint(Relation scanrel,
List *partConstraint);
+extern PGDLLIMPORT bool (*add_alter_tablespace_hook) (Relation rel);
+extern PGDLLIMPORT void (*add_alter_table_change_owner_hook) (Oid relOid,
+ char relKind, Oid newOwnerId);
+extern PGDLLIMPORT void (*add_alter_table_change_schema_hook) (Oid relOid,
+ char relKind, Oid newNspOid);
+
#endif /* TABLECMDS_H */
diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h
index a924d58aabe..44f8bf7106a 100644
--- a/src/include/datatype/timestamp.h
+++ b/src/include/datatype/timestamp.h
@@ -15,6 +15,11 @@
#ifndef DATATYPE_TIMESTAMP_H
#define DATATYPE_TIMESTAMP_H
+#ifndef FRONTEND
+#include "postgres.h"
+#include "fmgr.h"
+#endif
+
/*
* Timestamp represents absolute time.
*
@@ -87,6 +92,23 @@ struct pg_itm_in
int tm_year;
};
+#ifndef FRONTEND
+/*
+ * The transition datatype for interval aggregates is declared as internal.
+ * It's a pointer to an IntervalAggState allocated in the aggregate context.
+ */
+typedef struct IntervalAggState
+{
+ int64 N; /* count of finite intervals processed */
+ Interval sumX; /* sum of finite intervals processed */
+ /* These counts are *not* included in N! Use IA_TOTAL_COUNT() as needed */
+ int64 pInfcount; /* count of +infinity intervals */
+ int64 nInfcount; /* count of -infinity intervals */
+} IntervalAggState;
+
+extern IntervalAggState *makeIntervalAggState(FunctionCallInfo fcinfo);
+extern void finite_interval_pl(const Interval *span1, const Interval *span2, Interval *result);
+#endif
/* Limits on the "precision" option (typmod) for these data types */
#define MAX_TIMESTAMP_PRECISION 6
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 75366203706..1f52d5bf4ad 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -292,6 +292,8 @@ typedef enum ExprEvalOp
EEOP_AGG_ORDERED_TRANS_DATUM,
EEOP_AGG_ORDERED_TRANS_TUPLE,
+ EEOP_VCI_VAR,
+ EEOP_VCI_PARAM_EXEC,
/* non-existent operation, used e.g. to check array lengths */
EEOP_LAST
} ExprEvalOp;
@@ -338,6 +340,7 @@ typedef struct ExprEvalStep
/* but it's just the normal (negative) attr number for SYSVAR */
int attnum;
Oid vartype; /* type OID of variable */
+ PlanState *vci_parent_planstate;
VarReturningType varreturningtype; /* return old/new/default */
} var;
@@ -424,6 +427,7 @@ typedef struct ExprEvalStep
{
int paramid; /* numeric ID for parameter */
Oid paramtype; /* OID of parameter's datatype */
+ Plan *vci_parent_plan;
} param;
/* for EEOP_PARAM_CALLBACK */
@@ -902,6 +906,19 @@ extern void ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op,
ExprContext *econtext);
extern void ExecEvalSysVar(ExprState *state, ExprEvalStep *op,
ExprContext *econtext, TupleTableSlot *slot);
+extern void ExecCreateExprSetupSteps(ExprState *state, Node *node);
+extern void ExecInitExprSlots(ExprState *state, Node *node);
+extern void ExecReadyExpr(ExprState *state);
+
+typedef void (*ExprEvalVar_hook_type) (ExprState *state, ExprEvalStep *op,
+ ExprContext *econtext);
+extern PGDLLIMPORT ExprEvalVar_hook_type ExprEvalVar_hook;
+
+typedef void (*ExprEvalParam_hook_type) (ExprState *state, ExprEvalStep *op,
+ ExprContext *econtext);
+extern PGDLLIMPORT ExprEvalParam_hook_type ExprEvalParam_hook;
+extern void VciExprEvalVarHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
+extern void VciExprEvalParamHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup,
ExprContext *aggcontext);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index ae99407db89..929ffb94454 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -188,10 +188,7 @@ TupleHashEntryGetTuple(TupleHashEntry entry)
static inline void *
TupleHashEntryGetAdditional(TupleHashTable hashtable, TupleHashEntry entry)
{
- if (hashtable->additionalsize > 0)
- return (char *) entry->firstTuple - hashtable->additionalsize;
- else
- return NULL;
+ return entry->additional;
}
#endif
diff --git a/src/include/executor/nodeModifyTable.h b/src/include/executor/nodeModifyTable.h
index bf3b592e28f..b49d55220ce 100644
--- a/src/include/executor/nodeModifyTable.h
+++ b/src/include/executor/nodeModifyTable.h
@@ -27,6 +27,8 @@ extern ModifyTableState *ExecInitModifyTable(ModifyTable *node, EState *estate,
extern void ExecEndModifyTable(ModifyTableState *node);
extern void ExecReScanModifyTable(ModifyTableState *node);
+extern PGDLLIMPORT List *(*add_should_index_insert_hook) (ResultRelInfo *, TupleTableSlot *, ItemPointer, EState *);
+
extern void ExecInitMergeTupleSlots(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo);
diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h
index a1cafbcc694..b973450b784 100644
--- a/src/include/executor/nodeSubplan.h
+++ b/src/include/executor/nodeSubplan.h
@@ -26,4 +26,6 @@ extern void ExecSetParamPlan(SubPlanState *node, ExprContext *econtext);
extern void ExecSetParamPlanMulti(const Bitmapset *params, ExprContext *econtext);
+extern Datum ExecSubPlanExternal(SubPlanState *node, ExprContext *econtext, bool *isNull);
+
#endif /* NODESUBPLAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 5b6cadb5a6c..85c345be24c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -846,6 +846,7 @@ typedef struct TupleHashTableData *TupleHashTable;
typedef struct TupleHashEntryData
{
MinimalTuple firstTuple; /* copy of first tuple in this group */
+ void *additional; /* user data */
uint32 status; /* hash status */
uint32 hash; /* hash value (cached) */
} TupleHashEntryData;
@@ -2124,6 +2125,7 @@ typedef struct ForeignScanState
* the BeginCustomScan method.
* ----------------
*/
+struct LimitState;
struct CustomExecMethods;
typedef struct CustomScanState
diff --git a/src/include/nodes/extensible.h b/src/include/nodes/extensible.h
index 1129c4ba4b1..37b29cf770b 100644
--- a/src/include/nodes/extensible.h
+++ b/src/include/nodes/extensible.h
@@ -115,6 +115,7 @@ typedef struct CustomScanMethods
/* Create execution state (CustomScanState) from a CustomScan plan node */
Node *(*CreateCustomScanState) (CustomScan *cscan);
+ struct CustomScan *(*CopyCustomPlan) (const struct CustomScan *from);
} CustomScanMethods;
/*
@@ -155,6 +156,9 @@ typedef struct CustomExecMethods
void (*ExplainCustomScan) (CustomScanState *node,
List *ancestors,
ExplainState *es);
+ void (*SetBoundCustomScan) (const struct LimitState *limit,
+ struct CustomScanState *cps);
+ void (*ExplainCustomPlanTargetRel) (struct CustomScanState *node, struct ExplainState *es);
} CustomExecMethods;
extern void RegisterCustomScanMethods(const CustomScanMethods *methods);
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index 5807ef805bd..2dc040211e6 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -147,6 +147,7 @@ typedef struct MemoryContextData
(IsA((context), AllocSetContext) || \
IsA((context), SlabContext) || \
IsA((context), GenerationContext) || \
- IsA((context), BumpContext)))
+ IsA((context), BumpContext) || \
+ IsA((context), SmcAllocSetContext)))
#endif /* MEMNODES_H */
diff --git a/src/include/nodes/params.h b/src/include/nodes/params.h
index 4321ca6329b..ef69f2d6bba 100644
--- a/src/include/nodes/params.h
+++ b/src/include/nodes/params.h
@@ -19,7 +19,7 @@ struct Bitmapset;
struct ExprState;
struct Param;
struct ParseState;
-
+struct PlanState;
/*
* ParamListInfo
@@ -148,6 +148,8 @@ typedef struct ParamExecData
void *execPlan; /* should be "SubPlanState *" */
Datum value;
bool isnull;
+ bool noNeedLoadFromMain;
+ bool loadedFromMain;
} ParamExecData;
/* type of argument for ParamsErrorCallback */
@@ -166,5 +168,7 @@ extern ParamListInfo RestoreParamList(char **start_address);
extern char *BuildParamLogString(ParamListInfo params, char **knownTextValues,
int maxlen);
extern void ParamsErrorCallback(void *arg);
+typedef struct ExprState *(*ExecInitParam_hook_type) (struct Param *param, struct PlanState *parent);
+extern PGDLLIMPORT ExecInitParam_hook_type ExecInitParam_hook;
#endif /* PARAMS_H */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 658d76225e4..c104b65825e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -194,6 +194,9 @@ typedef struct Plan
/* OK to use as part of parallel plan? */
bool parallel_safe;
+ /* plan number (1-origin) in the Query */
+ AttrNumber plan_no;
+
/*
* information needed for asynchronous execution
*/
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 347c582a789..59c705379e3 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -37,6 +37,8 @@ typedef void (*create_upper_paths_hook_type) (PlannerInfo *root,
void *extra);
extern PGDLLIMPORT create_upper_paths_hook_type create_upper_paths_hook;
+extern void copy_plan_costsize(Plan *dest, Plan *src);
+
extern PlannedStmt *standard_planner(Query *parse, const char *query_string,
int cursorOptions,
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index e8135f41a1c..cdbac78aea6 100644
--- a/src/include/postmaster/autovacuum.h
+++ b/src/include/postmaster/autovacuum.h
@@ -51,6 +51,7 @@ extern PGDLLIMPORT int Log_autovacuum_min_duration;
/* Status inquiry functions */
extern bool AutoVacuumingActive(void);
+extern bool vci_IsAutoVacuumLauncherProcess(void);
/* called from postmaster at server startup */
extern void autovac_init(void);
@@ -63,6 +64,8 @@ pg_noreturn extern void AutoVacWorkerMain(const void *startup_data, size_t start
extern bool AutoVacuumRequestWork(AutoVacuumWorkItemType type,
Oid relationId, BlockNumber blkno);
+extern void vci_AutovacuumLauncherIAm(void);
+extern void vci_AutovacuumLauncherNotIAm(void);
/* shared memory stuff */
extern Size AutoVacuumShmemSize(void);
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
index 058667a47a0..a5fc9a2e420 100644
--- a/src/include/postmaster/bgworker.h
+++ b/src/include/postmaster/bgworker.h
@@ -68,6 +68,11 @@
#define BGWORKER_CLASS_PARALLEL 0x0010
/* add additional bgworker classes here */
+/*
+ * Flags for cancel by admin commands.
+ */
+#define BGWORKER_CANCEL_NOACCEPT 0x0000
+#define BGWORKER_CANCEL_ADMIN_COMMANDS 0x0001
typedef void (*bgworker_main_type) (Datum main_arg);
@@ -98,6 +103,9 @@ typedef struct BackgroundWorker
Datum bgw_main_arg;
char bgw_extra[BGW_EXTRALEN];
pid_t bgw_notify_pid; /* SIGUSR1 this backend on start/stop */
+ int bgw_shmem_slot; /* shmem slot ID */
+ Oid bgw_cancel_databaseId; /* cancel target */
+ int bgw_cancel_flags; /* cancel by admin commands */
} BackgroundWorker;
typedef enum BgwHandleStatus
@@ -161,4 +169,9 @@ extern void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, ui
extern void BackgroundWorkerBlockSignals(void);
extern void BackgroundWorkerUnblockSignals(void);
+/* Cancel background workers. */
+extern void AcceptBackgroundWorkerCancel(Oid databaseId, int cancel_flags);
+extern void CancelBackgroundWorkers(Oid databaseId, int cancel_flags);
+extern void ScanBackgroundWorkers(void (*scanner) (BackgroundWorker *worker, pid_t pid, void *data), void *data);
+
#endif /* BGWORKER_H */
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
index 74b87a9114a..d97d1c5230b 100644
--- a/src/include/storage/itemptr.h
+++ b/src/include/storage/itemptr.h
@@ -46,6 +46,9 @@ typedef struct ItemPointerData
#endif
ItemPointerData;
+#define SizeOfIptrData \
+ (offsetof(ItemPointerData, ip_posid) + sizeof(OffsetNumber))
+
typedef ItemPointerData *ItemPointer;
/* ----------------
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 2b4cbda39a5..786a6404f70 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -233,4 +233,15 @@ typedef enum BuiltinTrancheIds
*/
typedef LWLock *LWLockId;
+typedef int LWLockKind;
+
+extern PGDLLIMPORT LWLockKind Vci_lwlock_kind;
+
+#define LWLockAcquireVci(_lock, _mode, _kind) \
+ do { \
+ Vci_lwlock_kind = (_kind); \
+ LWLockAcquire((_lock), (_mode)); \
+ Vci_lwlock_kind = -1; \
+ } while (0)
+
#endif /* LWLOCK_H */
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 24e2f5082bc..ed80f07bacf 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -24,6 +24,7 @@
extern PGDLLIMPORT int max_standby_archive_delay;
extern PGDLLIMPORT int max_standby_streaming_delay;
extern PGDLLIMPORT bool log_recovery_conflict_waits;
+extern PGDLLIMPORT int vacuum_defer_cleanup_age;
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h
index 4a2069b2869..b520e2d6627 100644
--- a/src/include/utils/combocid.h
+++ b/src/include/utils/combocid.h
@@ -24,5 +24,9 @@ extern void AtEOXact_ComboCid(void);
extern void RestoreComboCIDState(char *comboCIDstate);
extern void SerializeComboCIDState(Size maxsize, char *start_address);
extern Size EstimateComboCIDStateSpace(void);
+struct ComboCidKeyData;
+extern struct ComboCidKeyData *pef_save_combocids(void);
+extern Size pef_get_num_combocids(void);
+extern void pef_restore_combocids(struct ComboCidKeyData *addr, Size size);
#endif /* COMBOCID_H */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index c0987dca155..d22ab72c713 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -20,6 +20,7 @@
#include "nodes/memnodes.h"
#include "storage/condition_variable.h"
#include "storage/lmgr.h"
+#include "storage/lwlock.h"
#include "utils/dsa.h"
@@ -90,6 +91,9 @@ extern PGDLLIMPORT MemoryContext PortalContext;
/*
* Memory-context-type-independent functions in mcxt.c
*/
+extern void MemoryContextRegisterMethods(int method_id,
+ const MemoryContextMethods *new_methods,
+ MemoryContextMethods *old_methods);
extern void MemoryContextInit(void);
extern void MemoryContextReset(MemoryContext context);
extern void MemoryContextDelete(MemoryContext context);
diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h
index a6caa6335e3..31523ad1c7c 100644
--- a/src/include/utils/memutils_internal.h
+++ b/src/include/utils/memutils_internal.h
@@ -151,11 +151,14 @@ typedef enum MemoryContextMethodID
* context creation. It's intended to be called from context-type-
* specific creation routines, and noplace else.
*/
-extern void MemoryContextCreate(MemoryContext node,
- NodeTag tag,
- MemoryContextMethodID method_id,
- MemoryContext parent,
- const char *name);
+#define MemoryContextCreate(node, tag, method_id, parent, name) \
+ MemoryContextCreateWithExLock(node, tag, method_id, parent, name, NULL)
+extern void MemoryContextCreateWithExLock(MemoryContext node,
+ NodeTag tag,
+ MemoryContextMethodID method_id,
+ MemoryContext parent,
+ const char *name,
+ LWLock *lock);
extern void *MemoryContextAllocationFailure(MemoryContext context, Size size,
int flags);
diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h
index 9e79fc376cb..ac3dd0211a0 100644
--- a/src/include/utils/numeric.h
+++ b/src/include/utils/numeric.h
@@ -107,4 +107,6 @@ extern int64 numeric_int8_opt_error(Numeric num, bool *have_error);
extern Numeric random_numeric(pg_prng_state *state,
Numeric rmin, Numeric rmax);
+struct NumericVar;
+
#endif /* _PG_NUMERIC_H_ */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index b552359915f..ab1eb5b231d 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -354,6 +354,9 @@ typedef struct StdRdOptions
* to freeze. 0 if disabled, -1 if unspecified.
*/
double vacuum_max_eager_freeze_failure_rate;
+ int vci_column_ids_offset; /* TODO: Move under contrib/vci */
+ int vci_dropped_column_ids_offset; /* TODO: Move under
+ * contrib/vci */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 3561c6bef0b..7c2bcdfbd5f 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -61,6 +61,8 @@ extern List *RelationGetDummyIndexExpressions(Relation relation);
extern List *RelationGetIndexPredicate(Relation relation);
extern bytea **RelationGetIndexAttOptions(Relation relation, bool copy);
+extern bool isRelHasVCIIndex(Oid relid, bool *is_partition);
+
/*
* Which set of columns to return by RelationGetIndexAttrBitmap.
*/
@@ -160,4 +162,7 @@ extern PGDLLIMPORT bool criticalRelcachesBuilt;
/* should be used only by relcache.c and postinit.c */
extern PGDLLIMPORT bool criticalSharedRelcachesBuilt;
+/* vci index original hook*/
+extern PGDLLIMPORT bool (*add_skip_vci_index_hook) (Relation rel);
+
#endif /* RELCACHE_H */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 0e546ec1497..18154d7d90c 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -112,6 +112,16 @@ typedef enum SnapshotType
* horizon to use.
*/
SNAPSHOT_NON_VACUUMABLE,
+
+ /*
+ * VCI WOS2ROS visible
+ */
+ SNAPSHOT_VCI_WOS2ROS,
+
+ /*
+ * VCI Local ROS visible
+ */
+ SNAPSHOT_VCI_LOCALROS
} SnapshotType;
typedef struct SnapshotData *Snapshot;
diff --git a/src/include/utils/tuplesortstate.h b/src/include/utils/tuplesortstate.h
new file mode 100644
index 00000000000..492f7d56a02
--- /dev/null
+++ b/src/include/utils/tuplesortstate.h
@@ -0,0 +1,285 @@
+/*-------------------------------------------------------------------------
+ *
+ * tuplesortstate.h
+ * tuple sorting state structure.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/tuplesortstate.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TUPLESORTSTATE_H
+#define TUPLESORTSTATE_H
+
+#include "executor/executor.h"
+#include "utils/logtape.h"
+#include "utils/pg_rusage.h"
+#include "utils/sortsupport.h"
+#include "utils/tuplestore.h"
+
+/* Following are all moved from tuplesort.c to here for VCI Feature. */
+
+/*
+ * During merge, we use a pre-allocated set of fixed-size slots to hold
+ * tuples. To avoid palloc/pfree overhead.
+ *
+ * Merge doesn't require a lot of memory, so we can afford to waste some,
+ * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the
+ * palloc() overhead is not significant anymore.
+ *
+ * 'nextfree' is valid when this chunk is in the free list. When in use, the
+ * slot holds a tuple.
+ */
+#define SLAB_SLOT_SIZE 1024
+
+typedef union SlabSlot
+{
+ union SlabSlot *nextfree;
+ char buffer[SLAB_SLOT_SIZE];
+} SlabSlot;
+
+/*
+ * Possible states of a Tuplesort object. These denote the states that
+ * persist between calls of Tuplesort routines.
+ */
+typedef enum
+{
+ TSS_INITIAL, /* Loading tuples; still within memory limit */
+ TSS_BOUNDED, /* Loading tuples into bounded-size heap */
+ TSS_BUILDRUNS, /* Loading tuples; writing to tape */
+ TSS_SORTEDINMEM, /* Sort completed entirely in memory */
+ TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */
+ TSS_FINALMERGE, /* Performing final merge on-the-fly */
+} TupSortStatus;
+
+/*
+ * Private state of a Tuplesort operation.
+ */
+struct Tuplesortstate
+{
+ TuplesortPublic base;
+ TupSortStatus status; /* enumerated value as shown above */
+ bool bounded; /* did caller specify a maximum number of
+ * tuples to return? */
+ bool boundUsed; /* true if we made use of a bounded heap */
+ int bound; /* if bounded, the maximum number of tuples */
+ int64 tupleMem; /* memory consumed by individual tuples.
+ * storing this separately from what we track
+ * in availMem allows us to subtract the
+ * memory consumed by all tuples when dumping
+ * tuples to tape */
+ int64 availMem; /* remaining memory available, in bytes */
+ int64 allowedMem; /* total memory allowed, in bytes */
+ int maxTapes; /* max number of input tapes to merge in each
+ * pass */
+ int64 maxSpace; /* maximum amount of space occupied among sort
+ * of groups, either in-memory or on-disk */
+ bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk
+ * space, false when its value for in-memory
+ * space */
+ TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */
+ LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */
+
+ /*
+ * This array holds the tuples now in sort memory. If we are in state
+ * INITIAL, the tuples are in no particular order; if we are in state
+ * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS
+ * and FINALMERGE, the tuples are organized in "heap" order per Algorithm
+ * H. In state SORTEDONTAPE, the array is not used.
+ */
+ SortTuple *memtuples; /* array of SortTuple structs */
+ int memtupcount; /* number of tuples currently present */
+ int memtupsize; /* allocated length of memtuples array */
+ bool growmemtuples; /* memtuples' growth still underway? */
+
+ /*
+ * Memory for tuples is sometimes allocated using a simple slab allocator,
+ * rather than with palloc(). Currently, we switch to slab allocation
+ * when we start merging. Merging only needs to keep a small, fixed
+ * number of tuples in memory at any time, so we can avoid the
+ * palloc/pfree overhead by recycling a fixed number of fixed-size slots
+ * to hold the tuples.
+ *
+ * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE
+ * slots. The allocation is sized to have one slot per tape, plus one
+ * additional slot. We need that many slots to hold all the tuples kept
+ * in the heap during merge, plus the one we have last returned from the
+ * sort, with tuplesort_gettuple.
+ *
+ * Initially, all the slots are kept in a linked list of free slots. When
+ * a tuple is read from a tape, it is put to the next available slot, if
+ * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd
+ * instead.
+ *
+ * When we're done processing a tuple, we return the slot back to the free
+ * list, or pfree() if it was palloc'd. We know that a tuple was
+ * allocated from the slab, if its pointer value is between
+ * slabMemoryBegin and -End.
+ *
+ * When the slab allocator is used, the USEMEM/LACKMEM mechanism of
+ * tracking memory usage is not used.
+ */
+ bool slabAllocatorUsed;
+
+ char *slabMemoryBegin; /* beginning of slab memory arena */
+ char *slabMemoryEnd; /* end of slab memory arena */
+ SlabSlot *slabFreeHead; /* head of free list */
+
+ /* Memory used for input and output tape buffers. */
+ size_t tape_buffer_mem;
+
+ /*
+ * When we return a tuple to the caller in tuplesort_gettuple_XXX, that
+ * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE
+ * modes), we remember the tuple in 'lastReturnedTuple', so that we can
+ * recycle the memory on next gettuple call.
+ */
+ void *lastReturnedTuple;
+
+ /*
+ * While building initial runs, this is the current output run number.
+ * Afterwards, it is the number of initial runs we made.
+ */
+ int currentRun;
+
+ /*
+ * Logical tapes, for merging.
+ *
+ * The initial runs are written in the output tapes. In each merge pass,
+ * the output tapes of the previous pass become the input tapes, and new
+ * output tapes are created as needed. When nInputTapes equals
+ * nInputRuns, there is only one merge pass left.
+ */
+ LogicalTape **inputTapes;
+ int nInputTapes;
+ int nInputRuns;
+
+ LogicalTape **outputTapes;
+ int nOutputTapes;
+ int nOutputRuns;
+
+ LogicalTape *destTape; /* current output tape */
+
+ /*
+ * These variables are used after completion of sorting to keep track of
+ * the next tuple to return. (In the tape case, the tape's current read
+ * position is also critical state.)
+ */
+ LogicalTape *result_tape; /* actual tape of finished output */
+ int current; /* array index (only used if SORTEDINMEM) */
+ bool eof_reached; /* reached EOF (needed for cursors) */
+
+ /* markpos_xxx holds marked position for mark and restore */
+ int64 markpos_block; /* tape block# (only used if SORTEDONTAPE) */
+ int markpos_offset; /* saved "current", or offset in tape block */
+ bool markpos_eof; /* saved "eof_reached" */
+
+ /*
+ * These variables are used during parallel sorting.
+ *
+ * worker is our worker identifier. Follows the general convention that
+ * -1 value relates to a leader tuplesort, and values >= 0 worker
+ * tuplesorts. (-1 can also be a serial tuplesort.)
+ *
+ * shared is mutable shared memory state, which is used to coordinate
+ * parallel sorts.
+ *
+ * nParticipants is the number of worker Tuplesortstates known by the
+ * leader to have actually been launched, which implies that they must
+ * finish a run that the leader needs to merge. Typically includes a
+ * worker state held by the leader process itself. Set in the leader
+ * Tuplesortstate only.
+ */
+ int worker;
+ Sharedsort *shared;
+ int nParticipants;
+
+ /*
+ * Additional state for managing "abbreviated key" sortsupport routines
+ * (which currently may be used by all cases except the hash index case).
+ * Tracks the intervals at which the optimization's effectiveness is
+ * tested.
+ */
+ int64 abbrevNext; /* Tuple # at which to next check
+ * applicability */
+
+ /*
+ * Resource snapshot for time of sort start.
+ */
+ PGRUsage ru_start;
+};
+
+extern bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
+ SortTuple *stup);
+
+/*
+ * Private mutable state of tuplesort-parallel-operation. This is allocated
+ * in shared memory.
+ */
+struct Sharedsort
+{
+ /* mutex protects all fields prior to tapes */
+ slock_t mutex;
+
+ /*
+ * currentWorker generates ordinal identifier numbers for parallel sort
+ * workers. These start from 0, and are always gapless.
+ *
+ * Workers increment workersFinished to indicate having finished. If this
+ * is equal to state.nParticipants within the leader, leader is ready to
+ * merge worker runs.
+ */
+ int currentWorker;
+ int workersFinished;
+
+ /* Temporary file space */
+ SharedFileSet fileset;
+
+ /* Size of tapes flexible array */
+ int nTapes;
+
+ /*
+ * Tapes array used by workers to report back information needed by the
+ * leader to concatenate all worker tapes into one for merging
+ */
+ TapeShare tapes[FLEXIBLE_ARRAY_MEMBER];
+};
+
+/*
+ * Is the given tuple allocated from the slab memory arena?
+ */
+#define IS_SLAB_SLOT(state, tuple) \
+ ((char *) (tuple) >= (state)->slabMemoryBegin && \
+ (char *) (tuple) < (state)->slabMemoryEnd)
+
+/*
+ * Return the given tuple to the slab memory free list, or free it
+ * if it was palloc'd.
+ */
+#define RELEASE_SLAB_SLOT(state, tuple) \
+ do { \
+ SlabSlot *buf = (SlabSlot *) tuple; \
+ \
+ if (IS_SLAB_SLOT((state), buf)) \
+ { \
+ buf->nextfree = (state)->slabFreeHead; \
+ (state)->slabFreeHead = buf; \
+ } else \
+ pfree(buf); \
+ } while(0)
+
+#define REMOVEABBREV(state,stup,count) ((*(state)->base.removeabbrev) (state, stup, count))
+#define COMPARETUP(state,a,b) ((*(state)->base.comparetup) (a, b, state))
+#define WRITETUP(state,tape,stup) ((*(state)->base.writetup) (state, tape, stup))
+#define READTUP(state,stup,tape,len) ((*(state)->base.readtup) (state, stup, tape, len))
+#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0)
+#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed)
+#define USEMEM(state,amt) ((state)->availMem -= (amt))
+#define FREEMEM(state,amt) ((state)->availMem += (amt))
+#define SERIAL(state) ((state)->shared == NULL)
+#define WORKER(state) ((state)->shared && (state)->worker != -1)
+#define LEADER(state) ((state)->shared && (state)->worker == -1)
+
+#endif
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9ea573fae21..71fe57acc87 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4309,3 +4309,136 @@ zic_t
ExplainExtensionOption
ExplainOptionHandler
overexplain_options
+
+vci_applicable_udf_template
+vci_aggtranstype_kind
+AggrefTransInfo
+vci_search_vci_scan_context_t
+vci_table_info_t
+vci_gather_used_attrs_t
+vci_renumber_attrs_t
+father_gather_plans
+FuncExprinfo
+VciScalarArrayOpExprHashEntry
+VciScalarArrayOpExprHashTable
+VciAggStatePerAggData
+VciAggStatePerGroupData
+RosChunkBuffer
+RosChunkStorage
+vci_PointerAndLength
+vci_PackBitsEncoder
+vci_PackBitsDecoder
+vcis_c_extent_t
+vcis_c_common_dict_t
+vcis_column_meta_t
+vcis_common_dict_t
+vci_ColumnRelations
+vci_fetch_placeholder_t
+vci_index_placeholder_t
+vci_plan_info_t
+vci_query_context_t
+vci_vp_item_id
+VciVPExecOp_func
+VciVPNode
+VciVPContext
+VciTupleHotHashTableData
+VciTupleHotHashTable
+VciProjectionInfoSlot
+VciProjectionInfo
+VciPlan
+VciPlanState
+VciScan
+VciScanState
+VciSort
+VciSortState
+VciAgg
+VciAggStatePerAgg
+VciAggStatePerGroup
+VciAggState
+VciAdvanceAggref_Func
+VciGather
+VciGatherState
+VciVarState
+VciParamState
+vci_initexpr_t
+vci_topmost_plan_cb_t
+vci_mutator_t
+VciCopyDatumFunc
+vci_agg_trans_copy_funcs
+vci_CSQueryContextData
+vci_CSQueryContext
+vci_seq_scan_buffer_t
+vci_CSFetchContextData
+vci_CSFetchContext
+vci_minmax_t
+vci_extent_status_t
+vci_read_vector_status_t
+vci_virtual_tuples_column_info_t
+vci_virtual_tuples_t
+vcis_free_space_t
+VciGucStruct
+VciShmemStruct
+vci_id_t
+vci_memory_entry_t
+vci_memory_entries_t
+vci_inner_plan_type_t
+vci_plan_compat_t
+vci_plan_attr_t
+vci_param_exec_type_t
+vci_param_exec_attr_t
+vci_subplan_type_t
+vci_subplan_attr_t
+vci_rewrite_plan_context_t
+vci_devstat_t
+vci_memory_entry_list_t
+vci_devload_t
+vci_dev_t
+vci_update_info_t
+vci_tid_array_t
+vci_blk_array_t
+vci_RosCommandContext
+vci_target_extent_info_t
+vci_workerslot_t
+vci_ros_command_t
+vci_offset_in_extent_t
+vci_MainRelVar
+vci_MainRelHeaderInfo
+vcis_m_column_t
+vcis_m_extent_t
+vci_wmrv_t
+vcis_attribute_type_t
+vcis_compression_type_t
+vcis_extent_type_t
+vcis_tidcrid_item_type_t
+vcis_dict_type_t
+vcis_tid_crid_op_type_t
+vci_RelationPair
+vci_local_delete_list
+vci_local_ros_t
+vci_DictInfo
+vci_meta_item_scanner_t
+vci_wosros_conv_worker_arg_t
+vci_special_udf_info_t
+vcis_Crid
+vcis_tidcrid_meta_item_t
+vcis_tidcrid_meta_t
+vcis_tidcrid_pagetag_t
+vcis_tidcrid_leaf_t
+vcis_tidcrid_trunk_t
+vcis_tidcrid_pair_item_t
+vcis_tidcrid_pair_list_t
+vci_TidCridUpdateListContext
+vci_TidCridRelations
+VciTableScanPolicy
+VciScanMode
+VciFetchPos
+vci_RebuildCommand
+mountpoint_dev_pair_t
+vci_CmpInfo
+CopyCommandInfo
+CEKind
+WosKind
+vci_tid_tid_xid64_t
+message_on_worker_exit_t
+vci_MergeTidCridUpdateListContext
+node_info_t
\ No newline at end of file
--
2.39.3