From 64eee1c00e14f8a44c787ab90dc00ca36c2fb6f6 Mon Sep 17 00:00:00 2001 From: Aya Iwata Date: Fri, 9 May 2025 18:32:18 +0900 Subject: [PATCH 1/2] Add postgres code fix needed for VCI --- src/backend/access/common/relation.c | 2 + src/backend/access/common/reloptions.c | 51 ++++ src/backend/access/heap/heapam.c | 18 ++ src/backend/access/heap/heapam_handler.c | 4 + src/backend/access/heap/heapam_visibility.c | 6 + src/backend/access/transam/xact.c | 225 ++++++++++++++++ src/backend/access/transam/xlogfuncs.c | 1 + src/backend/access/transam/xlogrecovery.c | 18 +- src/backend/catalog/dependency.c | 7 + src/backend/catalog/index.c | 27 +- src/backend/commands/explain.c | 31 ++- src/backend/commands/indexcmds.c | 109 ++++---- src/backend/commands/tablecmds.c | 125 +++++++++ src/backend/commands/vacuum.c | 16 ++ src/backend/executor/execAmi.c | 2 + src/backend/executor/execExpr.c | 6 +- src/backend/executor/execExprInterp.c | 36 +++ src/backend/executor/execGrouping.c | 17 +- src/backend/executor/execProcnode.c | 1 + src/backend/executor/execScan.c | 2 +- src/backend/executor/execUtils.c | 2 + src/backend/executor/nodeCustom.c | 16 ++ src/backend/executor/nodeModifyTable.c | 10 + src/backend/executor/nodeSubplan.c | 6 + src/backend/jit/llvm/llvmjit_expr.c | 12 + src/backend/jit/llvm/llvmjit_types.c | 2 + src/backend/nodes/gen_node_support.pl | 57 +++- src/backend/nodes/params.c | 2 +- src/backend/optimizer/path/allpaths.c | 17 ++ src/backend/optimizer/plan/createplan.c | 4 +- src/backend/postmaster/autovacuum.c | 25 ++ src/backend/postmaster/bgworker.c | 109 ++++++++ src/backend/storage/ipc/procarray.c | 10 +- src/backend/storage/lmgr/lock.c | 26 +- src/backend/storage/lmgr/lwlock.c | 2 + src/backend/utils/adt/timestamp.c | 18 +- src/backend/utils/cache/relcache.c | 66 +++++ src/backend/utils/init/postinit.c | 2 +- src/backend/utils/mmgr/mcxt.c | 41 ++- src/backend/utils/sort/tuplesort.c | 261 +----------------- src/backend/utils/time/combocid.c | 46 ++++ src/bin/pg_dump/common.c | 1 + src/bin/pg_dump/pg_dump.c | 35 ++- src/bin/pg_dump/pg_dump.h | 1 + src/include/access/heapam.h | 4 + src/include/access/reloptions.h | 3 +- src/include/access/xact.h | 7 + src/include/access/xlogrecovery.h | 3 + src/include/catalog/dependency.h | 3 + src/include/catalog/index.h | 2 + src/include/commands/explain.h | 7 + src/include/commands/tablecmds.h | 6 + src/include/datatype/timestamp.h | 22 ++ src/include/executor/execExpr.h | 17 ++ src/include/executor/executor.h | 5 +- src/include/executor/nodeModifyTable.h | 2 + src/include/executor/nodeSubplan.h | 2 + src/include/nodes/execnodes.h | 2 + src/include/nodes/extensible.h | 4 + src/include/nodes/memnodes.h | 3 +- src/include/nodes/params.h | 6 +- src/include/nodes/plannodes.h | 3 + src/include/optimizer/planner.h | 2 + src/include/postmaster/autovacuum.h | 3 + src/include/postmaster/bgworker.h | 13 + src/include/storage/itemptr.h | 3 + src/include/storage/lwlock.h | 11 + src/include/storage/standby.h | 1 + src/include/utils/combocid.h | 4 + src/include/utils/memutils.h | 4 + src/include/utils/memutils_internal.h | 13 +- src/include/utils/numeric.h | 2 + src/include/utils/rel.h | 3 + src/include/utils/relcache.h | 5 + src/include/utils/snapshot.h | 10 + src/include/utils/tuplesortstate.h | 285 ++++++++++++++++++++ src/tools/pgindent/typedefs.list | 133 +++++++++ 77 files changed, 1677 insertions(+), 391 deletions(-) create mode 100644 src/include/utils/tuplesortstate.h diff --git a/src/backend/access/common/relation.c b/src/backend/access/common/relation.c index 22c4cd5a256..80f9d947847 100644 --- a/src/backend/access/common/relation.c +++ b/src/backend/access/common/relation.c @@ -64,9 +64,11 @@ relation_open(Oid relationId, LOCKMODE lockmode) * If we didn't get the lock ourselves, assert that caller holds one, * except in bootstrap mode where no locks are used. */ +/* Assert(lockmode != NoLock || IsBootstrapProcessingMode() || CheckRelationLockedByMe(r, AccessShareLock, true)); +*/ /* Make note that we've accessed a temporary relation */ if (RelationUsesLocalBuffers(r)) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46c1dce222d..4057f6a1a63 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -35,6 +35,9 @@ #include "utils/memutils.h" #include "utils/rel.h" +/* TODO: Move under contrib/vci. */ +static void validateVciColumnIds(const char *values); + /* * Contents of pg_class.reloptions * @@ -557,6 +560,34 @@ static relopt_enum enumRelOpts[] = static relopt_string stringRelOpts[] = { + /* TODO: Move under contrib/vci. */ + { + /* TODO: fix comment */ + { + "vci_column_ids", + "ID for searching columns that are (or will be) indexed by VCI from vci_columns table", + RELOPT_KIND_VCI, + AccessExclusiveLock + }, + 0, + false, + validateVciColumnIds, + NULL, + "" + }, + { + { + "vci_dropped_column_ids", + "ID for storing dropped columns in indexed table", + RELOPT_KIND_VCI, + AccessExclusiveLock + }, + 0, + false, + validateVciColumnIds, + NULL, + "" + }, /* list terminator */ {{NULL}} }; @@ -1907,6 +1938,12 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, vacuum_truncate), offsetof(StdRdOptions, vacuum_truncate_set)}, {"vacuum_max_eager_freeze_failure_rate", RELOPT_TYPE_REAL, offsetof(StdRdOptions, vacuum_max_eager_freeze_failure_rate)} + , + /* TODO: Move under contrib/vci */ + {"vci_column_ids", RELOPT_TYPE_STRING, + offsetof(StdRdOptions, vci_column_ids_offset)}, + {"vci_dropped_column_ids", RELOPT_TYPE_STRING, + offsetof(StdRdOptions, vci_dropped_column_ids_offset)} }; return (bytea *) build_reloptions(reloptions, validate, kind, @@ -2162,3 +2199,17 @@ AlterTableGetRelOptionsLockLevel(List *defList) return lockmode; } + +/* TODO: Move under contrib/vci. */ +static void +validateVciColumnIds(const char *values) +{ + if (values == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Dummy validation for vci_column_ids and vci_dropped_column_ids"))); + + } + elog(DEBUG2, "Empty process of validateVciColumnIds: %s", values); +} diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index c1a4de14a59..72703c915f1 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -54,6 +54,7 @@ #include "utils/spccache.h" #include "utils/syscache.h" +void (*add_index_delete_hook) (Relation indexRelation, ItemPointer heap_tid, TransactionId xmin) = NULL; static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); @@ -343,6 +344,9 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * results for a non-MVCC snapshot, the caller must hold some higher-level * lock that ensures the interesting tuple(s) won't change.) */ + if (keep_startblock) + goto skip_get_number_of_blocks; + if (scan->rs_base.rs_parallel != NULL) { bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel; @@ -351,6 +355,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) else scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_base.rs_rd); +skip_get_number_of_blocks: + /* * If the table is large relative to NBuffers, use a bulk-read access * strategy and enable synchronized scanning (see syncscan.c). Although @@ -2762,6 +2768,7 @@ heap_delete(Relation relation, ItemPointer tid, bool all_visible_cleared = false; HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */ bool old_key_copied = false; + TransactionId old_xmin; Assert(ItemPointerIsValid(tid)); @@ -3006,6 +3013,8 @@ l1: xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); + old_xmin = HeapTupleHeaderGetXmin(tp.t_data); + START_CRIT_SECTION(); /* @@ -3151,6 +3160,9 @@ l1: if (old_key_tuple != NULL && old_key_copied) heap_freetuple(old_key_tuple); + if (add_index_delete_hook) + add_index_delete_hook(relation, tid, old_xmin); + return TM_Ok; } @@ -3253,6 +3265,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, infomask2_old_tuple, infomask_new_tuple, infomask2_new_tuple; + TransactionId old_xmin; Assert(ItemPointerIsValid(otid)); @@ -3697,6 +3710,8 @@ l2: &xmax_old_tuple, &infomask_old_tuple, &infomask2_old_tuple); + old_xmin = HeapTupleHeaderGetRawXmin(oldtup.t_data); + /* * And also prepare an Xmax value for the new copy of the tuple. If there * was no xmax previously, or there was one but all lockers are now gone, @@ -4180,6 +4195,9 @@ l2: bms_free(modified_attrs); bms_free(interesting_attrs); + if (add_index_delete_hook) + add_index_delete_hook(relation, otid, old_xmin); + return TM_Ok; } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ac082fefa77..ea9d940eaa9 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -46,6 +46,9 @@ #include "utils/builtins.h" #include "utils/rel.h" +/* Preserve the original heap tuple that is passed to callback in heapam_index_build_range_scan() */ +HeapTuple IndexHeapTuple; + static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate); @@ -1658,6 +1661,7 @@ heapam_index_build_range_scan(Relation heapRelation, * some index AMs want to do further processing on the data first. So * pass the values[] and isnull[] arrays, instead. */ + IndexHeapTuple = heapTuple; if (HeapTupleIsHeapOnly(heapTuple)) { diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 05f6946fe60..abb12264c62 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -78,6 +78,8 @@ #include "utils/builtins.h" #include "utils/snapmgr.h" +bool (*add_snapshot_satisfies_hook) (HeapTuple tup, Snapshot snapshot, Buffer buffer); + /* * SetHintBits() @@ -1791,6 +1793,10 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer) return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer); case SNAPSHOT_NON_VACUUMABLE: return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer); + case SNAPSHOT_VCI_WOS2ROS: + case SNAPSHOT_VCI_LOCALROS: + if (add_snapshot_satisfies_hook) + return add_snapshot_satisfies_hook(htup, snapshot, buffer); } return false; /* keep compiler quiet */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f765..7358a88d9cd 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -6444,3 +6444,228 @@ xact_redo(XLogReaderState *record) else elog(PANIC, "xact_redo: unknown op code %u", info); } + +/** + * This function saves TransactionStateData. + * Tracing the pointer to parent's TransactionStateData, + * saves them as well as itself. + * + * @return The pointor to the start address of the area for saved TransactionStateData. + */ +TransactionStateData * +pef_save_transaction_state(void) +{ + + TransactionStateData *dst; + TransactionStateData *src; + TransactionStateData *child; + TransactionStateData *res = NULL; + + src = CurrentTransactionState; + child = NULL; + while (src) + { + dst = palloc(sizeof(TransactionStateData)); + + if (child) + child->parent = dst; + else + res = dst; + + memcpy(dst, src, sizeof(TransactionStateData)); + dst->childXids = palloc(sizeof(*src->childXids) * src->nChildXids); + memcpy(dst->childXids, src->childXids, sizeof(*src->childXids) * src->nChildXids); + + child = dst; + src = src->parent; + } + + return res; +} + +static TransactionStateData * +pef_copy_transaction_state(TransactionStateData *src) +{ + TransactionStateData *dst; + + if (!src) + return NULL; + + if (src->parent) + dst = (TransactionState) palloc(sizeof(TransactionStateData)); + else + dst = &TopTransactionStateData; + + memcpy(dst, src, sizeof(TransactionStateData)); + dst->childXids = palloc(sizeof(*src->childXids) * src->nChildXids); + memcpy(dst->childXids, src->childXids, sizeof(*src->childXids) * src->nChildXids); + dst->parent = pef_copy_transaction_state(src->parent); + + if (dst->parent) + { + dst->curTransactionContext = + AllocSetContextCreate(dst->parent->curTransactionContext, + "CurTransactionContext", + ALLOCSET_DEFAULT_SIZES); + dst->curTransactionOwner = + ResourceOwnerCreate(dst->parent->curTransactionOwner, "SubTransaction"); + } + else + { + VirtualTransactionId vxid; + + /* We are in the main transaction state level */ + + /* + * If we already have a BG Worker resource owner (possibly created for + * initializing shm queues) , use it. Otherwise, create a new one + * here. + */ + dst->curTransactionOwner = + (CurrentResourceOwner ? CurrentResourceOwner : + ResourceOwnerCreate(NULL, "Background Worker")); + + dst->curTransactionContext = + AllocSetContextCreate(TopMemoryContext, + "TopTransactionContext", + ALLOCSET_DEFAULT_SIZES); + + /* Create a toplevel context and resource owner for the transaction. */ + TopTransactionContext = dst->curTransactionContext; + TopTransactionResourceOwner = dst->curTransactionOwner; + + /* + * To initialize local transactionId, following code is taken from + * StartTransaction(). We possibly need to bring in more code here. + */ + + /* + * Assign a new LocalTransactionId, and combine it with the backendId + * to form a virtual transaction id. + */ + vxid.procNumber = MyProcNumber; + vxid.localTransactionId = GetNextLocalTransactionId(); + + /* + * Lock the virtual transaction id before we announce it in the proc + * array + */ + VirtualXactLockTableInsert(vxid); + + /* + * Advertise it in the proc array. We assume assignment of + * LocalTransactionID is atomic, and the backendId should be set + * already. + */ + Assert(MyProc->vxid.procNumber == vxid.procNumber); + MyProc->vxid.lxid = vxid.localTransactionId; + } + + return dst; +} + +MemoryContext +pef_restore_transaction_state(void *src) +{ + TransactionStateData *dst; + + dst = pef_copy_transaction_state((TransactionState) src); + + CurrentTransactionState = dst; + CurTransactionContext = dst->curTransactionContext; + CurrentResourceOwner = dst->curTransactionOwner; + CurTransactionResourceOwner = dst->curTransactionOwner; + + return MemoryContextSwitchTo(CurTransactionContext); + +} + +/* + * pef_cleanup_transaction_state() + * During exit, cleanup the transaction state restored by the worker. We don't + * want to commit or abort using this transaction state; instead, we need to do + * this cleanup. + */ +void +pef_cleanup_transaction_state(void) +{ + TransactionState s = CurrentTransactionState; + + /* + * We don't bother about cleaning up memory, because we know this is the + * end phase, and the worker is going to die. But we do want to reset the + * transaction state such that Abort or Commit won't be called. + */ + s->state = TRANS_DEFAULT; + s->blockState = TBLOCK_DEFAULT; + s->parent = NULL; + + /* Prevent cancel/die interrupt while cleaning up */ + HOLD_INTERRUPTS(); + + /* Make sure we have a valid memory context and resource owner */ + AtAbort_Memory(); + AtAbort_ResourceOwner(); + + /* + * Release any LW locks we might be holding as quickly as possible. + * (Regular locks, however, must be held till we finish aborting.) + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! + */ + LWLockReleaseAll(); + + /* Clean up buffer content locks, too */ + UnlockBuffers(); + + /* + * Post-abort cleanup. See notes in CommitTransaction() concerning + * ordering. We can skip all of it if the transaction failed before + * creating a resource owner. + */ + if (TopTransactionResourceOwner != NULL) + { + CallXactCallbacks(XACT_EVENT_ABORT); + + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + AtEOXact_Buffers(false); + AtEOXact_RelationCache(false); + AtEOXact_TypeCache(); + AtEOXact_Inval(false); + smgrDoPendingDeletes(false); + AtEOXact_MultiXact(); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_LOCKS, + false, true); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_AFTER_LOCKS, + false, true); + + AtEOXact_GUC(false, 1); + AtEOXact_SPI(false); + AtEOXact_on_commit_actions(false); + AtEOXact_Namespace(false, false); + AtEOXact_SMgr(); + AtEOXact_Files(false); + AtEOXact_ComboCid(); + AtEOXact_HashTables(false); + AtEOXact_PgStat(false, true); + AtEOXact_LogicalRepWorkers(false); + pgstat_report_xact_timestamp(0); + } + + RESUME_INTERRUPTS(); +} + +/** + * stmtStartTimestamp ?????????????? + * @param[in] startTimestamp ???????????????????stmtstartTimestamp? + */ +void +pef_restore_start_timestamp(TimestampTz xactSt, TimestampTz stmtSt) +{ + xactStartTimestamp = xactSt; + stmtStartTimestamp = stmtSt; +} diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 8c3090165f0..b0b02d7ac9d 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -607,6 +607,7 @@ pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS) statestr = "not paused"; break; case RECOVERY_PAUSE_REQUESTED: + case RECOVERY_VCI_PAUSE_REQUESTED: statestr = "pause requested"; break; case RECOVERY_PAUSED: diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6ce979f2d8b..a10f63082d9 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -2947,7 +2947,8 @@ recoveryPausesHere(bool endOfRecovery) ereport(LOG, (errmsg("pausing at the end of recovery"), errhint("Execute pg_wal_replay_resume() to promote."))); - else + /* If pause requested by VCI, the log is not output. */ + else if (GetRecoveryPauseState() != RECOVERY_VCI_PAUSE_REQUESTED) ereport(LOG, (errmsg("recovery has paused"), errhint("Execute pg_wal_replay_resume() to continue."))); @@ -3113,6 +3114,18 @@ SetRecoveryPause(bool recoveryPause) ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV); } +/* Set the recovery pause requested for VCI. */ +void +SetVciRecoveryPause(void) +{ + SpinLockAcquire(&XLogRecoveryCtl->info_lck); + + if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED) + XLogRecoveryCtl->recoveryPauseState = RECOVERY_VCI_PAUSE_REQUESTED; + + SpinLockRelease(&XLogRecoveryCtl->info_lck); +} + /* * Confirm the recovery pause by setting the recovery pause state to * RECOVERY_PAUSED. @@ -3122,7 +3135,8 @@ ConfirmRecoveryPaused(void) { /* If recovery pause is requested then set it paused */ SpinLockAcquire(&XLogRecoveryCtl->info_lck); - if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED) + if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED || + XLogRecoveryCtl->recoveryPauseState == RECOVERY_VCI_PAUSE_REQUESTED) XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED; SpinLockRelease(&XLogRecoveryCtl->info_lck); } diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 18316a3968b..6255a01373b 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -85,6 +85,7 @@ #include "utils/lsyscache.h" #include "utils/syscache.h" +bool (*add_drop_relation_hook) (const ObjectAddress *object, int flags) = NULL; /* * Deletion processing requires additional state for each ObjectAddress that @@ -1357,6 +1358,12 @@ doDeletion(const ObjectAddress *object, int flags) { char relKind = get_rel_relkind(object->objectId); + if (add_drop_relation_hook) + { + if (add_drop_relation_hook(object, flags)) + break; + } + if (relKind == RELKIND_INDEX || relKind == RELKIND_PARTITIONED_INDEX) { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 739a92bdcc1..cfdc85075fa 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -133,6 +133,7 @@ static void ResetReindexProcessing(void); static void SetReindexPending(List *indexes); static void RemoveReindexPending(Oid indexOid); +bool (*add_reindex_index_hook) (Relation) = NULL; /* * relationHasPrimaryKey @@ -342,12 +343,17 @@ ConstructTupleDescriptor(Relation heapRelation, /* Simple index column */ const FormData_pg_attribute *from; - Assert(atnum > 0); /* should've been caught above */ - if (atnum > natts) /* safety check */ elog(ERROR, "invalid column number %d", atnum); - from = TupleDescAttr(heapTupDesc, - AttrNumberGetAttrOffset(atnum)); + if (atnum > 0) + { + from = TupleDescAttr(heapTupDesc, + AttrNumberGetAttrOffset(atnum)); + } + else + { + from = SystemAttributeDefinition(atnum); + } to->atttypid = from->atttypid; to->attlen = from->attlen; @@ -3692,6 +3698,19 @@ reindex_index(const ReindexStmt *stmt, Oid indexId, return; } + if (add_reindex_index_hook) + { + if (!add_reindex_index_hook(iRel)) + { + RemoveReindexPending(RelationGetRelid(iRel)); + + /* Close rels, but keep locks */ + index_close(iRel, NoLock); + table_close(heapRelation, NoLock); + return; + } + } + if (progress) pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID, iRel->rd_rel->relam); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 786ee865f14..9a3d2ea844e 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1223,6 +1223,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) ((ForeignScan *) plan)->fs_base_relids); break; case T_CustomScan: + case T_CustomPlanMarkPos: *rels_used = bms_add_members(*rels_used, ((CustomScan *) plan)->custom_relids); break; @@ -1518,6 +1519,7 @@ ExplainNode(PlanState *planstate, List *ancestors, } break; case T_CustomScan: + case T_CustomPlanMarkPos: sname = "Custom Scan"; custom_name = ((CustomScan *) plan)->methods->CustomName; if (custom_name) @@ -1681,10 +1683,18 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainScanTarget((Scan *) plan, es); break; case T_ForeignScan: - case T_CustomScan: if (((Scan *) plan)->scanrelid > 0) ExplainScanTarget((Scan *) plan, es); break; + case T_CustomScan: + case T_CustomPlanMarkPos: + { + CustomScanState *css = (CustomScanState *) planstate; + + if (css->methods->ExplainCustomPlanTargetRel) + css->methods->ExplainCustomPlanTargetRel(css, es); + } + break; case T_IndexScan: { IndexScan *indexscan = (IndexScan *) plan; @@ -2156,6 +2166,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_foreignscan_info((ForeignScanState *) planstate, es); break; case T_CustomScan: + case T_CustomPlanMarkPos: { CustomScanState *css = (CustomScanState *) planstate; @@ -2416,6 +2427,7 @@ ExplainNode(PlanState *planstate, List *ancestors, "Subquery", NULL, es); break; case T_CustomScan: + case T_CustomPlanMarkPos: ExplainCustomChildren((CustomScanState *) planstate, ancestors, es); break; @@ -4415,6 +4427,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) case T_TidRangeScan: case T_ForeignScan: case T_CustomScan: + case T_CustomPlanMarkPos: case T_ModifyTable: /* Assert it's on a real relation */ Assert(rte->rtekind == RTE_RELATION); @@ -4987,3 +5000,19 @@ ExplainFlushWorkersState(ExplainState *es) pfree(wstate->worker_state_save); pfree(wstate); } + +extern void +ExplainPropertySortGroupKeys(PlanState *planstate, const char *qlabel, + int nkeys, AttrNumber *keycols, + List *ancestors, ExplainState *es) +{ + show_sort_group_keys(planstate, qlabel, nkeys, 0, keycols, + NULL, NULL, NULL, ancestors, es); +} +void +ExplainPropertyQual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) +{ + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 33c2106c17c..b112c4ec52d 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1098,71 +1098,74 @@ DefineIndex(Oid tableId, } } - - /* - * We disallow indexes on system columns. They would not necessarily get - * updated correctly, and they don't seem useful anyway. - * - * Also disallow virtual generated columns in indexes (use expression - * index instead). - */ - for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++) - { - AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i]; - - if (attno < 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("index creation on system columns is not supported"))); - - - if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) - ereport(ERROR, - errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - stmt->primary ? - errmsg("primary keys on virtual generated columns are not supported") : - stmt->isconstraint ? - errmsg("unique constraints on virtual generated columns are not supported") : - errmsg("indexes on virtual generated columns are not supported")); - } - - /* - * Also check for system and generated columns used in expressions or - * predicates. - */ - if (indexInfo->ii_Expressions || indexInfo->ii_Predicate) + /* Skip disallowing index creation of system columns for VCI access method */ + if (strcmp(accessMethodName, "vci") != 0) { - Bitmapset *indexattrs = NULL; - int j; - pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs); - pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); - - for (int i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++) + /* + * We disallow indexes on system columns. They would not necessarily + * get updated correctly, and they don't seem useful anyway. + * + * Also disallow virtual generated columns in indexes (use expression + * index instead). + */ + for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { - if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, - indexattrs)) + AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i]; + + if (attno < 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("index creation on system columns is not supported"))); + + if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + stmt->primary ? + errmsg("primary keys on virtual generated columns are not supported") : + stmt->isconstraint ? + errmsg("unique constraints on virtual generated columns are not supported") : + errmsg("indexes on virtual generated columns are not supported")); } /* - * XXX Virtual generated columns in index expressions or predicates - * could be supported, but it needs support in - * RelationGetIndexExpressions() and RelationGetIndexPredicate(). + * Also check for system and generated columns used in expressions or + * predicates. */ - j = -1; - while ((j = bms_next_member(indexattrs, j)) >= 0) + if (indexInfo->ii_Expressions || indexInfo->ii_Predicate) { - AttrNumber attno = j + FirstLowInvalidHeapAttributeNumber; + Bitmapset *indexattrs = NULL; + int j; - if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - stmt->isconstraint ? - errmsg("unique constraints on virtual generated columns are not supported") : - errmsg("indexes on virtual generated columns are not supported"))); + pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs); + pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); + + for (int i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++) + { + if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, + indexattrs)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index creation on system columns is not supported"))); + } + + /* + * XXX Virtual generated columns in index expressions or + * predicates could be supported, but it needs support in + * RelationGetIndexExpressions() and RelationGetIndexPredicate(). + */ + j = -1; + while ((j = bms_next_member(indexattrs, j)) >= 0) + { + AttrNumber attno = j + FirstLowInvalidHeapAttributeNumber; + + if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + stmt->isconstraint ? + errmsg("unique constraints on virtual generated columns are not supported") : + errmsg("indexes on virtual generated columns are not supported"))); + } } } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 54ad38247aa..7384bf3750d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -109,6 +109,10 @@ #include "utils/typcache.h" #include "utils/usercontext.h" +bool (*add_alter_tablespace_hook) (Relation rel) = NULL; +void (*add_alter_table_change_owner_hook) (Oid relOid, char relKind, Oid newOwnerId) = NULL; +void (*add_alter_table_change_schema_hook) (Oid relOid, char relKind, Oid newNspOid) = NULL; + /* * ON COMMIT action list */ @@ -16206,6 +16210,9 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock /* If it has dependent sequences, recurse to change them too */ change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode); + + if (add_alter_table_change_owner_hook) + add_alter_table_change_owner_hook(relationOid, tuple_class->relkind, newOwnerId); } InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0); @@ -16755,6 +16762,112 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, } table_close(pgclass, RowExclusiveLock); + + /* + * Look up the index's access method, save the OID of its handler function + */ + if (rel->rd_rel->relam) + { + Form_pg_am aform; + HeapTuple amtuple; + + amtuple = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam)); + if (!HeapTupleIsValid(amtuple)) + elog(ERROR, "cache lookup failed for access method %u", + rel->rd_rel->relam); + aform = (Form_pg_am) GETSTRUCT(amtuple); + + if (strcmp(NameStr(aform->amname), "vci") == 0) + + /* + * if((rel->rd_am) && (strcmp(NameStr(rel->rd_am->amname), "vci") + * == 0)) + */ + { + Relation depRel, + viewRel; + Oid vci_relid, + viewrelid; + ScanKeyData key; + SysScanDesc scan; + HeapTuple tup; + + depRel = table_open(DependRelationId, AccessShareLock); + vci_relid = RelationGetRelid(rel); + ScanKeyInit(&key, + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(vci_relid)); + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 1, &key); + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); + + /* Retrieve objid of the internal function */ + if (depform->classid == RelationRelationId) + { + viewrelid = depform->objid; + viewRel = table_open(viewrelid, AccessExclusiveLock); + pgclass = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(viewrelid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", viewrelid); + + datum = (Datum) 0; + + /* Generate new proposed reloptions (text array) */ + newOptions = transformRelOptions(datum, defList, NULL, validnsps, false, + operation == AT_ResetRelOptions); + + (void) view_reloptions(newOptions, true); + + /* + * All we need do here is update the pg_class row; the new + * options will be propagated into relcaches during + * post-commit cache inval. + */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (newOptions != (Datum) 0) + repl_val[Anum_pg_class_reloptions - 1] = newOptions; + else + repl_null[Anum_pg_class_reloptions - 1] = true; + + repl_repl[Anum_pg_class_reloptions - 1] = true; + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass), + repl_val, repl_null, repl_repl); + + /* + * simple_heap_update(pgclass, &newtuple->t_self, + * newtuple); + * + * CatalogUpdateIndexes(pgclass, newtuple); + */ + /* Perform actual update */ + CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple); + InvokeObjectPostAlterHook(RelationRelationId, viewrelid, 0); + + heap_freetuple(newtuple); + + ReleaseSysCache(tuple); + + table_close(pgclass, RowExclusiveLock); + table_close(viewRel, AccessExclusiveLock); + + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + } + + ReleaseSysCache(amtuple); + } } /* @@ -16776,6 +16889,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) */ rel = relation_open(tableOid, lockmode); + if (add_alter_tablespace_hook) + { + if (add_alter_tablespace_hook(rel)) + { + relation_close(rel, NoLock); + return; + } + } + /* Check first if relation can be moved to new tablespace */ if (!CheckRelationTableSpaceMove(rel, newTableSpace)) { @@ -19027,6 +19149,9 @@ AlterRelationNamespaceInternal(Relation classRel, Oid relOid, { add_exact_object_address(&thisobj, objsMoved); + if (add_alter_table_change_schema_hook) + add_alter_table_change_schema_hook(relOid, classForm->relkind, newNspOid); + InvokeObjectPostAlterHook(RelationRelationId, relOid, 0); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 33a33bf6b1c..1eb71c22ae4 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -129,6 +129,8 @@ static double compute_parallel_delay(void); static VacOptValue get_vacoptval_from_boolean(DefElem *def); static bool vac_tid_reaped(ItemPointer itemptr, void *state); +bool (*add_skip_vacuum_hook) (Relation rel); + /* * GUC check function to ensure GUC value specified is within the allowable * range. @@ -2123,6 +2125,20 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, return false; } + /* + * Silently ignore if it's a VCI internal table. + */ + if (add_skip_vacuum_hook) + { + if (add_skip_vacuum_hook(rel)) + { + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + } + /* * Silently ignore tables that are temp tables of other backends --- * trying to vacuum these will lead to great unhappiness, since their diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 1d0e8ad57b4..fff1b85ff59 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -437,6 +437,7 @@ ExecSupportsMarkRestore(Path *pathnode) return true; case T_CustomScan: + case T_CustomPlanMarkPos: if (castNode(CustomPath, pathnode)->flags & CUSTOMPATH_SUPPORT_MARK_RESTORE) return true; return false; @@ -563,6 +564,7 @@ ExecSupportsBackwardScan(Plan *node) return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan); case T_CustomScan: + case T_CustomPlanMarkPos: if (((CustomScan *) node)->flags & CUSTOMPATH_SUPPORT_BACKWARD_SCAN) return true; return false; diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index f1569879b52..5c29ab337af 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -68,7 +68,6 @@ typedef struct ExprSetupInfo List *multiexpr_subplans; } ExprSetupInfo; -static void ExecReadyExpr(ExprState *state); static void ExecInitExprRec(Expr *node, ExprState *state, Datum *resv, bool *resnull); static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, @@ -77,7 +76,6 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, static void ExecInitSubPlanExpr(SubPlan *subplan, ExprState *state, Datum *resv, bool *resnull); -static void ExecCreateExprSetupSteps(ExprState *state, Node *node); static void ExecPushExprSetupSteps(ExprState *state, ExprSetupInfo *info); static bool expr_setup_walker(Node *node, ExprSetupInfo *info); static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op); @@ -898,7 +896,7 @@ ExecCheck(ExprState *state, ExprContext *econtext) * Therefore this should be used instead of directly calling * ExecReadyInterpretedExpr(). */ -static void +void ExecReadyExpr(ExprState *state) { if (jit_compile_expr(state)) @@ -2877,7 +2875,7 @@ ExecInitSubPlanExpr(SubPlan *subplan, * Add expression steps performing setup that's needed before any of the * main execution of the expression. */ -static void +void ExecCreateExprSetupSteps(ExprState *state, Node *node) { ExprSetupInfo info = {0, 0, 0, 0, 0, NIL}; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 8a72b5e70a4..f25a2370b33 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -188,6 +188,21 @@ static pg_attribute_always_inline void ExecAggPlainTransByRef(AggState *aggstate int setno); static char *ExecGetJsonValueItemString(JsonbValue *item, bool *resnull); +ExprEvalVar_hook_type ExprEvalVar_hook = NULL; +ExprEvalParam_hook_type ExprEvalParam_hook = NULL; +void +VciExprEvalVarHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext) +{ + Assert(ExprEvalVar_hook != NULL); + (*ExprEvalVar_hook) (state, op, econtext); +} +void +VciExprEvalParamHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext) +{ + Assert(ExprEvalParam_hook != NULL); + (*ExprEvalParam_hook) (state, op, econtext); +} + /* * ScalarArrayOpExprHashEntry * Hash table entry type used during EEOP_HASHED_SCALARARRAYOP @@ -592,6 +607,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI, &&CASE_EEOP_AGG_ORDERED_TRANS_DATUM, &&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE, + &&CASE_EEOP_VCI_VAR, + &&CASE_EEOP_VCI_PARAM_EXEC, &&CASE_EEOP_LAST }; @@ -2265,6 +2282,25 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_VCI_VAR) + { + /* TO-do */ + Assert(ExprEvalVar_hook != NULL); + (*ExprEvalVar_hook) (state, op, econtext); + + EEO_NEXT(); + } + + EEO_CASE(EEOP_VCI_PARAM_EXEC) + { + /* To-do */ + Assert(ExprEvalParam_hook != NULL); + (*ExprEvalParam_hook) (state, op, econtext); + + EEO_NEXT(); + } + + EEO_CASE(EEOP_LAST) { /* unreachable */ diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 255bd795361..a9d212aaec6 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -485,18 +485,11 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, MemoryContextSwitchTo(hashtable->tablecxt); - /* - * Copy the first tuple into the table context, and request - * additionalsize extra bytes before the allocation. - * - * The caller can get a pointer to the additional data with - * TupleHashEntryGetAdditional(), and store arbitrary data there. - * Placing both the tuple and additional data in the same - * allocation avoids the need to store an extra pointer in - * TupleHashEntryData or allocate an additional chunk. - */ - entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot, - hashtable->additionalsize); + entry->firstTuple = ExecCopySlotMinimalTuple(slot); + if (hashtable->additionalsize > 0) + entry->additional = palloc0(hashtable->additionalsize); + else + entry->additional = NULL; } } else diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f5f9cfbeead..60fd2a4e46a 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -287,6 +287,7 @@ ExecInitNode(Plan *node, EState *estate, int eflags) break; case T_CustomScan: + case T_CustomPlanMarkPos: result = (PlanState *) ExecInitCustomScan((CustomScan *) node, estate, eflags); break; diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index 90726949a87..99f7c600520 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -139,7 +139,7 @@ ExecScanReScan(ScanState *node) */ if (IsA(node->ps.plan, ForeignScan)) relids = ((ForeignScan *) node->ps.plan)->fs_base_relids; - else if (IsA(node->ps.plan, CustomScan)) + else if (IsA(node->ps.plan, CustomScan) || IsA(node->ps.plan, CustomPlanMarkPos)) relids = ((CustomScan *) node->ps.plan)->custom_relids; else elog(ERROR, "unexpected scan node: %d", diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 772c86e70e9..6b8b08ebaf9 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -850,8 +850,10 @@ ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel) * than the minimum. */ rel = table_open(rte->relid, NoLock); +/* Assert(rte->rellockmode == AccessShareLock || CheckRelationLockedByMe(rel, rte->rellockmode, false)); +*/ } else { diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c index ac2196b64c7..2eaef2f2d86 100644 --- a/src/backend/executor/nodeCustom.c +++ b/src/backend/executor/nodeCustom.c @@ -49,6 +49,22 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags) css->ss.ps.state = estate; css->ss.ps.ExecProcNode = ExecCustomScan; + if (strcmp(cscan->methods->CustomName, "VCI Scan") == 0 || + strcmp(cscan->methods->CustomName, "VCI Sort") == 0 || + strcmp(cscan->methods->CustomName, "VCI Aggregate") == 0 || + strcmp(cscan->methods->CustomName, "VCI HashAggregate") == 0 || + strcmp(cscan->methods->CustomName, "VCI GroupAggregate") == 0 || + strcmp(cscan->methods->CustomName, "VCI Gather") == 0) + { + /* + * The callback of custom-scan provider applies the final + * initialization of the custom-scan-state node according to its + * logic. + */ + css->methods->BeginCustomScan(css, estate, eflags); + return css; + } + /* create expression context for node */ ExecAssignExprContext(estate, &css->ss.ps); diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 46d533b7288..258b60ca2de 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -70,6 +70,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" +List *(*add_should_index_insert_hook) (ResultRelInfo *, TupleTableSlot *, ItemPointer, EState *) = NULL; typedef struct MTTargetRelLookup { @@ -2330,6 +2331,15 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, NULL, NIL, (updateCxt->updateIndexes == TU_Summarizing)); + /* + * VCI update hook + */ + else if (resultRelInfo->ri_NumIndices > 0 && !updateCxt->updateIndexes) + { + if (add_should_index_insert_hook) + recheckIndexes = add_should_index_insert_hook(resultRelInfo, slot, &slot->tts_tid, context->estate); + } + /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(context->estate, resultRelInfo, NULL, NULL, diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index f7f6fc2da0b..a957e5ce1a4 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -1334,3 +1334,9 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent) parent->chgParam = bms_add_member(parent->chgParam, paramid); } } + +Datum +ExecSubPlanExternal(SubPlanState *node, ExprContext *econtext, bool *isNull) +{ + return ExecSubPlan(node, econtext, isNull); +} diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 890bcb0b0a7..a312d98546b 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -2940,6 +2940,18 @@ llvm_compile_expr(ExprState *state) LLVMBuildBr(b, opblocks[opno + 1]); break; + case EEOP_VCI_VAR: + build_EvalXFunc(b, mod, "VciExprEvalVarHook", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + + case EEOP_VCI_PARAM_EXEC: + build_EvalXFunc(b, mod, "VciExprEvalParamHook", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + case EEOP_LAST: Assert(false); break; diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index dbe0282e98f..6d6dc3993b9 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -181,4 +181,6 @@ void *referenced_functions[] = strlen, varsize_any, ExecInterpExprStillValid, + VciExprEvalParamHook, + VciExprEvalVarHook, }; diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f760..867701f9d36 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -153,6 +153,8 @@ my @extra_tags = qw( AllocSetContext GenerationContext SlabContext BumpContext TIDBitmap WindowObjectData + CustomPlanMarkPos + SmcAllocSetContext ); # This is a regular node, but we skip parsing it from its header file @@ -679,10 +681,21 @@ foreach my $n (@node_types) my $struct_no_equal = (elem $n, @no_equal); next if $struct_no_copy && $struct_no_equal; - print $cfs "\t\tcase T_${n}:\n" - . "\t\t\tretval = _copy${n}(from);\n" - . "\t\t\tbreak;\n" - unless $struct_no_copy; + if($n eq 'CustomScan') + { + print $cfs "\t\tcase T_${n}:\n" + . "\t\tcase T_CustomPlanMarkPos:\n" + . "\t\t\tretval = _copy${n}(from);\n" + . "\t\t\tbreak;\n" + unless $struct_no_copy; + } + else + { + print $cfs "\t\tcase T_${n}:\n" + . "\t\t\tretval = _copy${n}(from);\n" + . "\t\t\tbreak;\n" + unless $struct_no_copy; + } print $efs "\t\tcase T_${n}:\n" . "\t\t\tretval = _equal${n}(a, b);\n" @@ -691,13 +704,35 @@ foreach my $n (@node_types) next if elem $n, @custom_copy_equal; - print $cff " -static $n * -_copy${n}(const $n *from) -{ -\t${n} *newnode = makeNode($n); - -" unless $struct_no_copy; + if ($n eq 'CustomScan') + { + print $cff "static $n *\n" + . "_copy${n}(const $n *from)\n" + . "{\n" + . "\tCustomScan *newnode;\n\n" + . "\tif (strcmp(from->methods->CustomName, \"VCI Scan\") == 0 || + strcmp(from->methods->CustomName, \"VCI Sort\") == 0 || + strcmp(from->methods->CustomName, \"VCI Aggregate\") == 0 || + strcmp(from->methods->CustomName, \"VCI HashAggregate\") == 0 || + strcmp(from->methods->CustomName, \"VCI GroupAggregate\") == 0 || + strcmp(from->methods->CustomName, \"VCI Gather\") == 0)\n" + . "\t{\n" + . "\t\tnewnode = from->methods->CopyCustomPlan(from);\n" + . "\t}\n" + . "\telse\n" + . "\t\tnewnode = makeNode(CustomScan);\n\n" + . "\t((Node *) newnode)->type = nodeTag(from);\n\n" unless $struct_no_copy; + } + else + { + print $cff + "static $n *\n" + . "_copy${n}(const $n *from)\n" + . "{\n" + ."\t${n} *newnode = makeNode($n);\n\n" + + unless $struct_no_copy; + } print $eff " static bool diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c index ec5946c5777..219f344da02 100644 --- a/src/backend/nodes/params.c +++ b/src/backend/nodes/params.c @@ -25,11 +25,11 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" +ExecInitParam_hook_type ExecInitParam_hook; static void paramlist_parser_setup(ParseState *pstate, void *arg); static Node *paramlist_param_ref(ParseState *pstate, ParamRef *pref); - /* * Allocate and initialize a new ParamListInfo structure. * diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 905250b3325..bc956a25904 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -47,6 +47,7 @@ #include "port/pg_bitutils.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/guc.h" /* Bitmask flags for pushdown_safety_info.unsafeFlags */ @@ -768,6 +769,8 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { Relids required_outer; + char *isVCIEnabled; + bool is_partition = false; /* * We don't support pushing join clauses into the quals of a seqscan, but @@ -793,6 +796,20 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* If appropriate, consider parallel sequential scan */ if (rel->consider_parallel && required_outer == NULL) create_plain_partial_paths(root, rel); + /**** + * Putting the isRelHasVCIIndex after the create_plain_partial_paths because + * want to enable oss parallelscan working on VCI tables but disable other + * gather plan like parallel_loop,parallel_agg working on VCI tables. + * Don't do this for partitioned tables or partitions as parallelscans on partitioned + * tables require gather plans + */ + if (isRelHasVCIIndex(rte->relid, &is_partition) && (bms_membership(root->all_baserels) == BMS_SINGLETON) && + !is_partition) + { + isVCIEnabled = GetConfigOptionByName("vci.enable", NULL, false); + if (strcmp(isVCIEnabled, "on") == 0) + rel->consider_parallel = false; + } /* Consider index scans */ create_index_paths(root, rel); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 4ad30b7627e..269d80de9b6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -175,8 +175,8 @@ static Node *fix_indexqual_clause(PlannerInfo *root, static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); static List *get_switched_clauses(List *clauses, Relids outerrelids); static List *order_qual_clauses(PlannerInfo *root, List *clauses); +void copy_plan_costsize(Plan *dest, Plan *src); static void copy_generic_path_info(Plan *dest, Path *src); -static void copy_plan_costsize(Plan *dest, Plan *src); static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples); static void label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan, @@ -5460,7 +5460,7 @@ copy_generic_path_info(Plan *dest, Path *src) * Copy cost and size info from a lower plan node to an inserted node. * (Most callers alter the info after copying it.) */ -static void +void copy_plan_costsize(Plan *dest, Plan *src) { dest->disabled_nodes = src->disabled_nodes; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 16756152b71..2b5e9f8d632 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -139,6 +139,9 @@ int Log_autovacuum_min_duration = 600000; #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */ #define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */ +/* Flags to tell if we are in an autovacuum process */ +static bool vci_am_autovacuum_launcher = false; + /* * Variables to save the cost-related storage parameters for the current * relation being vacuumed by this autovacuum worker. Using these, we can @@ -361,6 +364,20 @@ static void check_av_worker_gucs(void); * AUTOVACUUM LAUNCHER CODE ********************************************************************/ +/* + * We need this set from the outside, before InitProcess is called + */ +void +vci_AutovacuumLauncherIAm(void) +{ + vci_am_autovacuum_launcher = true; +} +void +vci_AutovacuumLauncherNotIAm(void) +{ + vci_am_autovacuum_launcher = false; +} + /* * Main entry point for the autovacuum launcher process. */ @@ -369,6 +386,8 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len) { sigjmp_buf local_sigjmp_buf; + vci_am_autovacuum_launcher = true; + Assert(startup_data_len == 0); /* Release postmaster's working memory context */ @@ -3315,6 +3334,12 @@ autovac_init(void) check_av_worker_gucs(); } +bool +vci_IsAutoVacuumLauncherProcess(void) +{ + return vci_am_autovacuum_launcher; +} + /* * AutoVacuumShmemSize * Compute space needed for autovacuum-related shared memory diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 116ddf7b835..51e57eb8a18 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -356,6 +356,22 @@ BackgroundWorkerStateChange(bool allow_new_workers) return; } + /* + * Set shmem slot number, and initialize cancel flags. + */ + rw->rw_worker.bgw_shmem_slot = slotno; + + rw->rw_worker.bgw_cancel_databaseId = InvalidOid; + rw->rw_worker.bgw_cancel_flags = BGWORKER_CANCEL_NOACCEPT; + + /* + * Update the contents in the shared memory also, these are used in + * EXEC_BACKEND (win32) case + */ + slot->worker.bgw_shmem_slot = slotno; + slot->worker.bgw_cancel_databaseId = InvalidOid; + slot->worker.bgw_cancel_flags = BGWORKER_CANCEL_NOACCEPT; + /* * Copy strings in a paranoid way. If shared memory is corrupted, the * source data might not even be NUL-terminated. @@ -1395,3 +1411,96 @@ GetBackgroundWorkerTypeByPid(pid_t pid) return result; } + +/* + * Accept background worker cancel. + * Set cancel flags and databaseId. + */ +void +AcceptBackgroundWorkerCancel(Oid databaseId, int cancel_flags) +{ + int slotno; + BackgroundWorkerSlot *slot; + + /* Get shmem slot number from BGW entry. */ + Assert(MyBgworkerEntry); + slotno = MyBgworkerEntry->bgw_shmem_slot; + + /* Get shmem slot address. */ + Assert(slotno < BackgroundWorkerData->total_slots); + slot = &BackgroundWorkerData->slot[slotno]; + + /* Set cancel flags and databaseId to sgmem slot. */ + /* 1st, set databaseId. */ + slot->worker.bgw_cancel_databaseId = databaseId; + /* 2nd, set cancel flags. */ + slot->worker.bgw_cancel_flags = cancel_flags; + + /* + * This operation doesn't need LOCK, because 'bgw_cancel_flags' is 32bit + * value. + */ +} + +/* + * Cancel background workers. + */ +void +CancelBackgroundWorkers(Oid databaseId, int cancel_flags) +{ + int slotno; + bool signal_postmaster = false; + + LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE); + + for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno) + { + BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; + + /* Check worker slot. */ + if (slot->in_use) + { + /* 1st, check cancel flags. */ + if (slot->worker.bgw_cancel_flags & cancel_flags) + { + /* 2nd, compare databaseId. */ + if (slot->worker.bgw_cancel_databaseId == databaseId) + { + /* + * Set terminate flag in shared memory, unless slot has + * been reused. + */ + slot->terminate = true; + signal_postmaster = true; + } + } + } + } + + LWLockRelease(BackgroundWorkerLock); + + /* Make sure the postmaster notices the change to shared memory. */ + if (signal_postmaster) + SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE); +} + +/* + * Scan background workers list via scanner. + */ +void +ScanBackgroundWorkers(void (*scanner) (BackgroundWorker *worker, pid_t pid, void *data), void *data) +{ + int slotno; + + LWLockAcquire(BackgroundWorkerLock, LW_SHARED); + + for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno) + { + BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; + + if (slot->in_use) + scanner(&slot->worker, slot->pid, data); + } + + LWLockRelease(BackgroundWorkerLock); +} diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index e5b945a9ee3..29f1179cc49 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -65,6 +65,8 @@ #include "utils/rel.h" #include "utils/snapmgr.h" +#include "postmaster/bgworker.h" + #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) /* Our shared memory area */ @@ -1977,7 +1979,8 @@ GlobalVisHorizonKindForRel(Relation rel) Assert(!rel || rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_MATVIEW || - rel->rd_rel->relkind == RELKIND_TOASTVALUE); + rel->rd_rel->relkind == RELKIND_TOASTVALUE || + rel->rd_rel->relkind == RELKIND_INDEX); if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress()) return VISHORIZON_SHARED; @@ -3806,6 +3809,11 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) for (index = 0; index < nautovacs; index++) (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */ + /* + * Cancel background workers by admin commands. + */ + CancelBackgroundWorkers(databaseId, BGWORKER_CANCEL_ADMIN_COMMANDS); + /* sleep, then try again */ pg_usleep(100 * 1000L); /* 100ms */ } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 86b06b9223f..7c4d06e645e 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -39,6 +39,8 @@ #include "access/xlogutils.h" #include "miscadmin.h" #include "pg_trace.h" +#include "pgstat.h" +#include "postmaster/bgworker.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" @@ -810,8 +812,18 @@ LockAcquire(const LOCKTAG *locktag, bool sessionLock, bool dontWait) { - return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait, - true, NULL, false); + /* + * Don't lock for VCI parallel workers and other type of workers should go + * in normal flow, In case if there is any change in background worker + * name for VCI parallel workers, the following code also needs an update. + * FIXME: Try to use the community parallelism code, so that we don't need + * our own VCI parallel infrastructure. + */ + if (AmBackgroundWorkerProcess() && strstr(MyBgworkerEntry->bgw_name, "backend=")) + return LOCKACQUIRE_OK; + else + return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait, + true, NULL, false); } /* @@ -2107,6 +2119,16 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) */ if (!locallock || locallock->nLocks <= 0) { + /* + * Don't lock for VCI parallel workers and other type of workers + * should go in normal flow, In case if there is any change in + * background worker name for VCI parallel workers, the following code + * also needs an update. FIXME: Try to use the community parallelism + * code, so that we don't need our own VCI parallel infrastructure. + */ + if (AmBackgroundWorkerProcess() && strstr(MyBgworkerEntry->bgw_name, "backend=")) + return true; + elog(WARNING, "you don't own a lock of type %s", lockMethodTable->lockModeNames[lockmode]); return false; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 5148ef982e3..01843639049 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -245,6 +245,8 @@ static inline void LWLockReportWaitStart(LWLock *lock); static inline void LWLockReportWaitEnd(void); static const char *GetLWTrancheName(uint16 trancheId); +LWLockKind Vci_lwlock_kind = -1; + #define T_NAME(lock) \ GetLWTrancheName((lock)->tranche) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 347089b7626..6e9b827d9e5 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -24,6 +24,7 @@ #include "catalog/pg_type.h" #include "common/int.h" #include "common/int128.h" +#include "datatype/timestamp.h" #include "funcapi.h" #include "libpq/pqformat.h" #include "miscadmin.h" @@ -73,19 +74,6 @@ typedef struct pg_tz *attimezone; } generate_series_timestamptz_fctx; -/* - * The transition datatype for interval aggregates is declared as internal. - * It's a pointer to an IntervalAggState allocated in the aggregate context. - */ -typedef struct IntervalAggState -{ - int64 N; /* count of finite intervals processed */ - Interval sumX; /* sum of finite intervals processed */ - /* These counts are *not* included in N! Use IA_TOTAL_COUNT() as needed */ - int64 pInfcount; /* count of +infinity intervals */ - int64 nInfcount; /* count of -infinity intervals */ -} IntervalAggState; - #define IA_TOTAL_COUNT(ia) \ ((ia)->N + (ia)->pInfcount + (ia)->nInfcount) @@ -3502,7 +3490,7 @@ interval_larger(PG_FUNCTION_ARGS) PG_RETURN_INTERVAL_P(result); } -static void +void finite_interval_pl(const Interval *span1, const Interval *span2, Interval *result) { Assert(!INTERVAL_NOT_FINITE(span1)); @@ -3981,7 +3969,7 @@ in_range_interval_interval(PG_FUNCTION_ARGS) * context. When the state data needs to be allocated in the current memory * context, we use palloc0 directly e.g. interval_avg_deserialize(). */ -static IntervalAggState * +IntervalAggState * makeIntervalAggState(FunctionCallInfo fcinfo) { IntervalAggState *state; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 68ff67de549..5cc8b69d9d1 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -92,6 +92,8 @@ #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */ +bool (*add_skip_vci_index_hook) (Relation rel) = NULL; + /* * Whether to bother checking if relation cache memory needs to be freed * eagerly. See also RelationBuildDesc() and pg_config_manual.h. @@ -5386,6 +5388,16 @@ restart: indexDesc = index_open(indexOid, AccessShareLock); + if (add_skip_vci_index_hook) + { + if (add_skip_vci_index_hook(indexDesc)) + { + /* Skip if Index is VCI index */ + index_close(indexDesc, AccessShareLock); + continue; + } + } + /* * Extract index expressions and index predicate. Note: Don't use * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because @@ -6964,6 +6976,60 @@ unlink_initfile(const char *initfilename, int elevel) } } +bool +isRelHasVCIIndex(Oid relid, bool *is_partition) +{ + ListCell *l; + Relation relation; + + bool hasVCI = false; + + *is_partition = false; + relation = table_open(relid, NoLock); + + if ((relation->rd_rel->relispartition == true) || relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + *is_partition = true; + + if (relation->rd_rel->relhasindex) + { + List *indexoidlist; + + indexoidlist = RelationGetIndexList(relation); + + foreach(l, indexoidlist) + { + Oid relam; + Oid indexoid = lfirst_oid(l); + Relation indexRelation; + Form_pg_am aform; + HeapTuple amtuple; + + indexRelation = index_open(indexoid, NoLock); + relam = indexRelation->rd_rel->relam; + + amtuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relam)); + if (!HeapTupleIsValid(amtuple)) + elog(ERROR, "cache lookup failed for access method %u", + relam); + aform = (Form_pg_am) GETSTRUCT(amtuple); + + if (strcmp(NameStr(aform->amname), "vci") == 0) + { + hasVCI = true; + } + + ReleaseSysCache(amtuple); + index_close(indexRelation, NoLock); + + if (hasVCI) + break; + } + } + + table_close(relation, NoLock); + return hasVCI; +} + /* * ResourceOwner callbacks */ diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 28f09a27001..96481bec795 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -836,7 +836,7 @@ InitPostgres(const char *in_dbname, Oid dboid, before_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ - if (AmAutoVacuumLauncherProcess()) + if (vci_IsAutoVacuumLauncherProcess() || AmAutoVacuumLauncherProcess()) { /* fill in the remainder of this entry in the PgBackendStatus array */ pgstat_bestart_final(); diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 7d28ca706eb..5000bd0667e 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -48,7 +48,7 @@ static Size BogusGetChunkSpace(void *pointer); [id].get_chunk_context = BogusGetChunkContext, \ [id].get_chunk_space = BogusGetChunkSpace -static const MemoryContextMethods mcxt_methods[] = { +static MemoryContextMethods mcxt_methods[] = { /* aset.c */ [MCTX_ASET_ID].alloc = AllocSetAlloc, [MCTX_ASET_ID].free_p = AllocSetFree, @@ -348,7 +348,29 @@ BogusGetChunkSpace(void *pointer) /***************************************************************************** * EXPORTED ROUTINES * *****************************************************************************/ +/* + * MemoryContextRegisterMethods + * Register methods to mcxt_methods with specified ID. + * + * This overwrites existing mcxt_methods entry and set old methods + * to old_methods. If old methods are not need, specify NULL. + * Currently, call for MCTX_1_RESERVED_GLIBC_ID or MCTX_2_RESERVED_GLIBC_ID is allowed. + */ +void +MemoryContextRegisterMethods(int method_id, + const MemoryContextMethods *new_methods, + MemoryContextMethods *old_methods) +{ + Assert(0 <= method_id && method_id < (1 << MEMORY_CONTEXT_METHODID_BITS)); + if (method_id != MCTX_1_RESERVED_GLIBC_ID && method_id != MCTX_2_RESERVED_GLIBC_ID) + elog(FATAL, "memory context method entry for \'%d\' cannot be overwritten", method_id); + + if (old_methods) + *old_methods = mcxt_methods[method_id]; + + mcxt_methods[method_id] = *new_methods; +} /* * MemoryContextInit @@ -1172,11 +1194,12 @@ MemoryContextCheck(MemoryContext context) * so this can contain Assert but not elog/ereport. */ void -MemoryContextCreate(MemoryContext node, - NodeTag tag, - MemoryContextMethodID method_id, - MemoryContext parent, - const char *name) +MemoryContextCreateWithExLock(MemoryContext node, + NodeTag tag, + MemoryContextMethodID method_id, + MemoryContext parent, + const char *name, + LWLock *lock) { /* Creating new memory contexts is not allowed in a critical section */ Assert(CritSectionCount == 0); @@ -1200,12 +1223,18 @@ MemoryContextCreate(MemoryContext node, /* OK to link node into context tree */ if (parent) { + if (lock) + LWLockAcquire(lock, LW_EXCLUSIVE); + node->nextchild = parent->firstchild; if (parent->firstchild != NULL) parent->firstchild->prevchild = node; parent->firstchild = node; /* inherit allowInCritSection flag from parent */ node->allowInCritSection = parent->allowInCritSection; + + if (lock) + LWLockRelease(lock); } else { diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 65ab83fff8b..0f0747b5eea 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -109,6 +109,7 @@ #include "utils/memutils.h" #include "utils/pg_rusage.h" #include "utils/tuplesort.h" +#include "utils/tuplesortstate.h" /* * Initial size of memtuples array. We're trying to select this size so that @@ -127,40 +128,6 @@ bool trace_sort = false; bool optimize_bounded_sort = true; #endif - -/* - * During merge, we use a pre-allocated set of fixed-size slots to hold - * tuples. To avoid palloc/pfree overhead. - * - * Merge doesn't require a lot of memory, so we can afford to waste some, - * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the - * palloc() overhead is not significant anymore. - * - * 'nextfree' is valid when this chunk is in the free list. When in use, the - * slot holds a tuple. - */ -#define SLAB_SLOT_SIZE 1024 - -typedef union SlabSlot -{ - union SlabSlot *nextfree; - char buffer[SLAB_SLOT_SIZE]; -} SlabSlot; - -/* - * Possible states of a Tuplesort object. These denote the states that - * persist between calls of Tuplesort routines. - */ -typedef enum -{ - TSS_INITIAL, /* Loading tuples; still within memory limit */ - TSS_BOUNDED, /* Loading tuples into bounded-size heap */ - TSS_BUILDRUNS, /* Loading tuples; writing to tape */ - TSS_SORTEDINMEM, /* Sort completed entirely in memory */ - TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ - TSS_FINALMERGE, /* Performing final merge on-the-fly */ -} TupSortStatus; - /* * Parameters for calculation of number of tapes to use --- see inittapes() * and tuplesort_merge_order(). @@ -178,232 +145,6 @@ typedef enum #define TAPE_BUFFER_OVERHEAD BLCKSZ #define MERGE_BUFFER_SIZE (BLCKSZ * 32) - -/* - * Private state of a Tuplesort operation. - */ -struct Tuplesortstate -{ - TuplesortPublic base; - TupSortStatus status; /* enumerated value as shown above */ - bool bounded; /* did caller specify a maximum number of - * tuples to return? */ - bool boundUsed; /* true if we made use of a bounded heap */ - int bound; /* if bounded, the maximum number of tuples */ - int64 tupleMem; /* memory consumed by individual tuples. - * storing this separately from what we track - * in availMem allows us to subtract the - * memory consumed by all tuples when dumping - * tuples to tape */ - int64 availMem; /* remaining memory available, in bytes */ - int64 allowedMem; /* total memory allowed, in bytes */ - int maxTapes; /* max number of input tapes to merge in each - * pass */ - int64 maxSpace; /* maximum amount of space occupied among sort - * of groups, either in-memory or on-disk */ - bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk - * space, false when its value for in-memory - * space */ - TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ - LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ - - /* - * This array holds the tuples now in sort memory. If we are in state - * INITIAL, the tuples are in no particular order; if we are in state - * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS - * and FINALMERGE, the tuples are organized in "heap" order per Algorithm - * H. In state SORTEDONTAPE, the array is not used. - */ - SortTuple *memtuples; /* array of SortTuple structs */ - int memtupcount; /* number of tuples currently present */ - int memtupsize; /* allocated length of memtuples array */ - bool growmemtuples; /* memtuples' growth still underway? */ - - /* - * Memory for tuples is sometimes allocated using a simple slab allocator, - * rather than with palloc(). Currently, we switch to slab allocation - * when we start merging. Merging only needs to keep a small, fixed - * number of tuples in memory at any time, so we can avoid the - * palloc/pfree overhead by recycling a fixed number of fixed-size slots - * to hold the tuples. - * - * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE - * slots. The allocation is sized to have one slot per tape, plus one - * additional slot. We need that many slots to hold all the tuples kept - * in the heap during merge, plus the one we have last returned from the - * sort, with tuplesort_gettuple. - * - * Initially, all the slots are kept in a linked list of free slots. When - * a tuple is read from a tape, it is put to the next available slot, if - * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd - * instead. - * - * When we're done processing a tuple, we return the slot back to the free - * list, or pfree() if it was palloc'd. We know that a tuple was - * allocated from the slab, if its pointer value is between - * slabMemoryBegin and -End. - * - * When the slab allocator is used, the USEMEM/LACKMEM mechanism of - * tracking memory usage is not used. - */ - bool slabAllocatorUsed; - - char *slabMemoryBegin; /* beginning of slab memory arena */ - char *slabMemoryEnd; /* end of slab memory arena */ - SlabSlot *slabFreeHead; /* head of free list */ - - /* Memory used for input and output tape buffers. */ - size_t tape_buffer_mem; - - /* - * When we return a tuple to the caller in tuplesort_gettuple_XXX, that - * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE - * modes), we remember the tuple in 'lastReturnedTuple', so that we can - * recycle the memory on next gettuple call. - */ - void *lastReturnedTuple; - - /* - * While building initial runs, this is the current output run number. - * Afterwards, it is the number of initial runs we made. - */ - int currentRun; - - /* - * Logical tapes, for merging. - * - * The initial runs are written in the output tapes. In each merge pass, - * the output tapes of the previous pass become the input tapes, and new - * output tapes are created as needed. When nInputTapes equals - * nInputRuns, there is only one merge pass left. - */ - LogicalTape **inputTapes; - int nInputTapes; - int nInputRuns; - - LogicalTape **outputTapes; - int nOutputTapes; - int nOutputRuns; - - LogicalTape *destTape; /* current output tape */ - - /* - * These variables are used after completion of sorting to keep track of - * the next tuple to return. (In the tape case, the tape's current read - * position is also critical state.) - */ - LogicalTape *result_tape; /* actual tape of finished output */ - int current; /* array index (only used if SORTEDINMEM) */ - bool eof_reached; /* reached EOF (needed for cursors) */ - - /* markpos_xxx holds marked position for mark and restore */ - int64 markpos_block; /* tape block# (only used if SORTEDONTAPE) */ - int markpos_offset; /* saved "current", or offset in tape block */ - bool markpos_eof; /* saved "eof_reached" */ - - /* - * These variables are used during parallel sorting. - * - * worker is our worker identifier. Follows the general convention that - * -1 value relates to a leader tuplesort, and values >= 0 worker - * tuplesorts. (-1 can also be a serial tuplesort.) - * - * shared is mutable shared memory state, which is used to coordinate - * parallel sorts. - * - * nParticipants is the number of worker Tuplesortstates known by the - * leader to have actually been launched, which implies that they must - * finish a run that the leader needs to merge. Typically includes a - * worker state held by the leader process itself. Set in the leader - * Tuplesortstate only. - */ - int worker; - Sharedsort *shared; - int nParticipants; - - /* - * Additional state for managing "abbreviated key" sortsupport routines - * (which currently may be used by all cases except the hash index case). - * Tracks the intervals at which the optimization's effectiveness is - * tested. - */ - int64 abbrevNext; /* Tuple # at which to next check - * applicability */ - - /* - * Resource snapshot for time of sort start. - */ - PGRUsage ru_start; -}; - -/* - * Private mutable state of tuplesort-parallel-operation. This is allocated - * in shared memory. - */ -struct Sharedsort -{ - /* mutex protects all fields prior to tapes */ - slock_t mutex; - - /* - * currentWorker generates ordinal identifier numbers for parallel sort - * workers. These start from 0, and are always gapless. - * - * Workers increment workersFinished to indicate having finished. If this - * is equal to state.nParticipants within the leader, leader is ready to - * merge worker runs. - */ - int currentWorker; - int workersFinished; - - /* Temporary file space */ - SharedFileSet fileset; - - /* Size of tapes flexible array */ - int nTapes; - - /* - * Tapes array used by workers to report back information needed by the - * leader to concatenate all worker tapes into one for merging - */ - TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; -}; - -/* - * Is the given tuple allocated from the slab memory arena? - */ -#define IS_SLAB_SLOT(state, tuple) \ - ((char *) (tuple) >= (state)->slabMemoryBegin && \ - (char *) (tuple) < (state)->slabMemoryEnd) - -/* - * Return the given tuple to the slab memory free list, or free it - * if it was palloc'd. - */ -#define RELEASE_SLAB_SLOT(state, tuple) \ - do { \ - SlabSlot *buf = (SlabSlot *) tuple; \ - \ - if (IS_SLAB_SLOT((state), buf)) \ - { \ - buf->nextfree = (state)->slabFreeHead; \ - (state)->slabFreeHead = buf; \ - } else \ - pfree(buf); \ - } while(0) - -#define REMOVEABBREV(state,stup,count) ((*(state)->base.removeabbrev) (state, stup, count)) -#define COMPARETUP(state,a,b) ((*(state)->base.comparetup) (a, b, state)) -#define WRITETUP(state,tape,stup) ((*(state)->base.writetup) (state, tape, stup)) -#define READTUP(state,stup,tape,len) ((*(state)->base.readtup) (state, stup, tape, len)) -#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0) -#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) -#define USEMEM(state,amt) ((state)->availMem -= (amt)) -#define FREEMEM(state,amt) ((state)->availMem += (amt)) -#define SERIAL(state) ((state)->shared == NULL) -#define WORKER(state) ((state)->shared && (state)->worker != -1) -#define LEADER(state) ((state)->shared && (state)->worker == -1) - /* * NOTES about on-tape representation of tuples: * diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 1e815571570..35995edad10 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -362,3 +362,49 @@ RestoreComboCIDState(char *comboCIDstate) elog(ERROR, "unexpected command ID while restoring combo CIDs"); } } + +/** + * Function for saving combocids. + * @return the start address for ComboCidKeyData list + */ +struct ComboCidKeyData * +pef_save_combocids(void) +{ + struct ComboCidKeyData *save_addr; + + save_addr = palloc(sizeof(ComboCidKeyData) * usedComboCids); + memcpy(save_addr, comboCids, sizeof(ComboCidKeyData) * usedComboCids); + return save_addr; +} + +/** + * Function for getting the number of elements of ComboCidKeyData list + * @return the number of elements of ComboCidKeyData list + */ +Size +pef_get_num_combocids(void) +{ + return usedComboCids; +} + +/** + * Function for restoring combocids. + * @param[in] addr start address of the ComboCidKeyData list + * @param[in] the number of the elements of ComboCidKeyData list + */ +void +pef_restore_combocids(struct ComboCidKeyData *addr, Size num) +{ + int i; + ComboCidKeyData *ccids; + + ccids = (ComboCidKeyData *) addr; + for (i = 0; i < num; i++) + { + /* + * GetComboCommandId() register the ccidkey when the key does not + * exist in hash table + */ + GetComboCommandId(ccids[i].cmin, ccids[i].cmax); + } +} diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index aa1589e3331..bad40767864 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -359,6 +359,7 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables, AssignDumpId(&attachinfo->dobj); attachinfo->dobj.name = pg_strdup(tblinfo[i].dobj.name); attachinfo->dobj.namespace = tblinfo[i].dobj.namespace; + attachinfo->dobj.isvciview = false; attachinfo->parentTbl = tblinfo[i].parents[0]; attachinfo->partitionTbl = &tblinfo[i]; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2e7975b34e..0a085a88ab7 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -7337,6 +7337,8 @@ getTables(Archive *fout, int *numTables) tblinfo[i].dummy_view = false; /* might get set during sort */ tblinfo[i].postponed_def = false; /* might get set during sort */ + tblinfo[i].dobj.isvciview = false; + /* Tables have data */ tblinfo[i].dobj.components |= DUMP_COMPONENT_DATA; @@ -8388,6 +8390,17 @@ getRules(Archive *fout) ruleinfo[i].ev_enabled = *(PQgetvalue(res, i, i_ev_enabled)); if (ruleinfo[i].ruletable) { + /* + * isvciview is set to true when the table is VCI VIEW. The + * judgement is done by the logic in vci_isVciRelation function + */ + if ((ruleinfo[i].ruletable->relkind == RELKIND_MATVIEW || + ruleinfo[i].ruletable->relkind == RELKIND_VIEW) && + !strcmp(ruleinfo[i].ruletable->dobj.name, ruleinfo[i].dobj.name)) + { + ruleinfo[i].ruletable->dobj.isvciview = true; + } + /* * If the table is a view or materialized view, force its ON * SELECT rule to be sorted before the view itself --- this @@ -16752,8 +16765,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) int j, k; - /* We had better have loaded per-column details about this table */ - Assert(tbinfo->interesting); + /* Do not dump VCI VIEW relations */ + if (tbinfo->dobj.isvciview) + return; qrelname = pg_strdup(fmtId(tbinfo->dobj.name)); qualrelname = pg_strdup(fmtQualifiedDumpable(tbinfo)); @@ -19743,12 +19757,27 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, case DO_FDW: case DO_FOREIGN_SERVER: case DO_TRANSFORM: + + /* + * Do not add dependency for VCI VIEW to suppress dependency + * loop message + */ + if (dobj->isvciview) + break; /* Pre-data objects: must come before the pre-data boundary */ addObjectDependency(preDataBound, dobj->dumpId); break; + case DO_LARGE_OBJECT: + + /* + * Do not add dependency for VCI VIEW to suppress dependency + * loop message + */ + if (dobj->isvciview) + break; + /* fallthrough */ case DO_TABLE_DATA: case DO_SEQUENCE_SET: - case DO_LARGE_OBJECT: case DO_LARGE_OBJECT_DATA: /* Data objects: must come between the boundaries */ addObjectDependency(dobj, preDataBound->dumpId); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 7417eab6aef..3eb3172b09f 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -159,6 +159,7 @@ typedef struct _dumpableObject DumpId *dependencies; /* dumpIds of objects this one depends on */ int nDeps; /* number of valid dependencies */ int allocDeps; /* allocated size of dependencies[] */ + bool isvciview; /* this table is vci view */ } DumpableObject; /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index e48fe434cd3..27870b7bc1b 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -395,6 +395,10 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused); +extern PGDLLIMPORT void (*add_index_delete_hook) (Relation indexRelation, ItemPointer heap_tid, TransactionId xmin); +extern PGDLLIMPORT bool (*add_snapshot_satisfies_hook) (HeapTuple htup, Snapshot snapshot, Buffer buffer); +extern PGDLLIMPORT bool (*add_skip_vacuum_hook) (Relation rel); + /* in heap/vacuumlazy.c */ struct VacuumParams; extern void heap_vacuum_rel(Relation rel, diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index dfbb4c85460..3891cbd98b9 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -51,8 +51,9 @@ typedef enum relopt_kind RELOPT_KIND_VIEW = (1 << 9), RELOPT_KIND_BRIN = (1 << 10), RELOPT_KIND_PARTITIONED = (1 << 11), + RELOPT_KIND_VCI = (1 << 12), /* if you add a new kind, make sure you update "last_default" too */ - RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED, + RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_VCI, /* some compilers treat enums as signed ints, so we can't use 1 << 31 */ RELOPT_KIND_MAX = (1 << 30) } relopt_kind; diff --git a/src/include/access/xact.h b/src/include/access/xact.h index b2bc10ee041..20c28d8a554 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -533,4 +533,11 @@ extern void EnterParallelMode(void); extern void ExitParallelMode(void); extern bool IsInParallelMode(void); +#ifndef FRONTEND +struct TransactionStateData *pef_save_transaction_state(void); +MemoryContext pef_restore_transaction_state(void *src); +void pef_cleanup_transaction_state(void); +void pef_restore_start_timestamp(TimestampTz xactSt, TimestampTz stmtSt); +#endif + #endif /* XACT_H */ diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index 91446303024..01da29ba8b8 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -46,6 +46,7 @@ typedef enum RecoveryPauseState RECOVERY_NOT_PAUSED, /* pause not requested */ RECOVERY_PAUSE_REQUESTED, /* pause requested, but not yet paused */ RECOVERY_PAUSED, /* recovery is paused */ + RECOVERY_VCI_PAUSE_REQUESTED, /* pause requested for VCI query */ } RecoveryPauseState; /* User-settable GUC parameters */ @@ -151,6 +152,8 @@ extern void WakeupRecovery(void); extern void StartupRequestWalReceiverRestart(void); extern void XLogRequestWalReceiverReply(void); +extern void SetVciRecoveryPause(void); + extern void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue); extern void xlog_outdesc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 0ea7ccf5243..768057a395f 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -225,4 +225,7 @@ extern void shdepDropOwned(List *roleids, DropBehavior behavior); extern void shdepReassignOwned(List *roleids, Oid newrole); +/* vci index original hook*/ +extern PGDLLIMPORT bool (*add_drop_relation_hook) (const ObjectAddress *object, int flags); + #endif /* DEPENDENCY_H */ diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 4daa8bef5ee..2b87e629fb5 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -175,6 +175,8 @@ extern void RestoreReindexState(const void *reindexstate); extern void IndexSetParentIndex(Relation partitionIdx, Oid parentOid); +extern PGDLLIMPORT bool (*add_reindex_index_hook) (Relation); +extern PGDLLIMPORT HeapTuple IndexHeapTuple; /* * itemptr_encode - Encode ItemPointer as int64/int8 diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 03c5b3d73e5..ef09e6c5f3d 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -83,4 +83,11 @@ extern void ExplainQueryText(struct ExplainState *es, QueryDesc *queryDesc); extern void ExplainQueryParameters(struct ExplainState *es, ParamListInfo params, int maxlen); +extern void ExplainPropertySortGroupKeys(PlanState *planstate, const char *qlabel, + int nkeys, AttrNumber *keycols, + List *ancestors, struct ExplainState *es); +extern void ExplainPropertyQual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, struct ExplainState *es); + #endif /* EXPLAIN_H */ diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 6832470d387..0f44744e5fe 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -107,4 +107,10 @@ extern void RangeVarCallbackOwnsRelation(const RangeVar *relation, extern bool PartConstraintImpliedByRelConstraint(Relation scanrel, List *partConstraint); +extern PGDLLIMPORT bool (*add_alter_tablespace_hook) (Relation rel); +extern PGDLLIMPORT void (*add_alter_table_change_owner_hook) (Oid relOid, + char relKind, Oid newOwnerId); +extern PGDLLIMPORT void (*add_alter_table_change_schema_hook) (Oid relOid, + char relKind, Oid newNspOid); + #endif /* TABLECMDS_H */ diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h index a924d58aabe..44f8bf7106a 100644 --- a/src/include/datatype/timestamp.h +++ b/src/include/datatype/timestamp.h @@ -15,6 +15,11 @@ #ifndef DATATYPE_TIMESTAMP_H #define DATATYPE_TIMESTAMP_H +#ifndef FRONTEND +#include "postgres.h" +#include "fmgr.h" +#endif + /* * Timestamp represents absolute time. * @@ -87,6 +92,23 @@ struct pg_itm_in int tm_year; }; +#ifndef FRONTEND +/* + * The transition datatype for interval aggregates is declared as internal. + * It's a pointer to an IntervalAggState allocated in the aggregate context. + */ +typedef struct IntervalAggState +{ + int64 N; /* count of finite intervals processed */ + Interval sumX; /* sum of finite intervals processed */ + /* These counts are *not* included in N! Use IA_TOTAL_COUNT() as needed */ + int64 pInfcount; /* count of +infinity intervals */ + int64 nInfcount; /* count of -infinity intervals */ +} IntervalAggState; + +extern IntervalAggState *makeIntervalAggState(FunctionCallInfo fcinfo); +extern void finite_interval_pl(const Interval *span1, const Interval *span2, Interval *result); +#endif /* Limits on the "precision" option (typmod) for these data types */ #define MAX_TIMESTAMP_PRECISION 6 diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 75366203706..1f52d5bf4ad 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -292,6 +292,8 @@ typedef enum ExprEvalOp EEOP_AGG_ORDERED_TRANS_DATUM, EEOP_AGG_ORDERED_TRANS_TUPLE, + EEOP_VCI_VAR, + EEOP_VCI_PARAM_EXEC, /* non-existent operation, used e.g. to check array lengths */ EEOP_LAST } ExprEvalOp; @@ -338,6 +340,7 @@ typedef struct ExprEvalStep /* but it's just the normal (negative) attr number for SYSVAR */ int attnum; Oid vartype; /* type OID of variable */ + PlanState *vci_parent_planstate; VarReturningType varreturningtype; /* return old/new/default */ } var; @@ -424,6 +427,7 @@ typedef struct ExprEvalStep { int paramid; /* numeric ID for parameter */ Oid paramtype; /* OID of parameter's datatype */ + Plan *vci_parent_plan; } param; /* for EEOP_PARAM_CALLBACK */ @@ -902,6 +906,19 @@ extern void ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext, TupleTableSlot *slot); +extern void ExecCreateExprSetupSteps(ExprState *state, Node *node); +extern void ExecInitExprSlots(ExprState *state, Node *node); +extern void ExecReadyExpr(ExprState *state); + +typedef void (*ExprEvalVar_hook_type) (ExprState *state, ExprEvalStep *op, + ExprContext *econtext); +extern PGDLLIMPORT ExprEvalVar_hook_type ExprEvalVar_hook; + +typedef void (*ExprEvalParam_hook_type) (ExprState *state, ExprEvalStep *op, + ExprContext *econtext); +extern PGDLLIMPORT ExprEvalParam_hook_type ExprEvalParam_hook; +extern void VciExprEvalVarHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext); +extern void VciExprEvalParamHook(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup, ExprContext *aggcontext); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index ae99407db89..929ffb94454 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -188,10 +188,7 @@ TupleHashEntryGetTuple(TupleHashEntry entry) static inline void * TupleHashEntryGetAdditional(TupleHashTable hashtable, TupleHashEntry entry) { - if (hashtable->additionalsize > 0) - return (char *) entry->firstTuple - hashtable->additionalsize; - else - return NULL; + return entry->additional; } #endif diff --git a/src/include/executor/nodeModifyTable.h b/src/include/executor/nodeModifyTable.h index bf3b592e28f..b49d55220ce 100644 --- a/src/include/executor/nodeModifyTable.h +++ b/src/include/executor/nodeModifyTable.h @@ -27,6 +27,8 @@ extern ModifyTableState *ExecInitModifyTable(ModifyTable *node, EState *estate, extern void ExecEndModifyTable(ModifyTableState *node); extern void ExecReScanModifyTable(ModifyTableState *node); +extern PGDLLIMPORT List *(*add_should_index_insert_hook) (ResultRelInfo *, TupleTableSlot *, ItemPointer, EState *); + extern void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo); diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h index a1cafbcc694..b973450b784 100644 --- a/src/include/executor/nodeSubplan.h +++ b/src/include/executor/nodeSubplan.h @@ -26,4 +26,6 @@ extern void ExecSetParamPlan(SubPlanState *node, ExprContext *econtext); extern void ExecSetParamPlanMulti(const Bitmapset *params, ExprContext *econtext); +extern Datum ExecSubPlanExternal(SubPlanState *node, ExprContext *econtext, bool *isNull); + #endif /* NODESUBPLAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5b6cadb5a6c..85c345be24c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -846,6 +846,7 @@ typedef struct TupleHashTableData *TupleHashTable; typedef struct TupleHashEntryData { MinimalTuple firstTuple; /* copy of first tuple in this group */ + void *additional; /* user data */ uint32 status; /* hash status */ uint32 hash; /* hash value (cached) */ } TupleHashEntryData; @@ -2124,6 +2125,7 @@ typedef struct ForeignScanState * the BeginCustomScan method. * ---------------- */ +struct LimitState; struct CustomExecMethods; typedef struct CustomScanState diff --git a/src/include/nodes/extensible.h b/src/include/nodes/extensible.h index 1129c4ba4b1..37b29cf770b 100644 --- a/src/include/nodes/extensible.h +++ b/src/include/nodes/extensible.h @@ -115,6 +115,7 @@ typedef struct CustomScanMethods /* Create execution state (CustomScanState) from a CustomScan plan node */ Node *(*CreateCustomScanState) (CustomScan *cscan); + struct CustomScan *(*CopyCustomPlan) (const struct CustomScan *from); } CustomScanMethods; /* @@ -155,6 +156,9 @@ typedef struct CustomExecMethods void (*ExplainCustomScan) (CustomScanState *node, List *ancestors, ExplainState *es); + void (*SetBoundCustomScan) (const struct LimitState *limit, + struct CustomScanState *cps); + void (*ExplainCustomPlanTargetRel) (struct CustomScanState *node, struct ExplainState *es); } CustomExecMethods; extern void RegisterCustomScanMethods(const CustomScanMethods *methods); diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h index 5807ef805bd..2dc040211e6 100644 --- a/src/include/nodes/memnodes.h +++ b/src/include/nodes/memnodes.h @@ -147,6 +147,7 @@ typedef struct MemoryContextData (IsA((context), AllocSetContext) || \ IsA((context), SlabContext) || \ IsA((context), GenerationContext) || \ - IsA((context), BumpContext))) + IsA((context), BumpContext) || \ + IsA((context), SmcAllocSetContext))) #endif /* MEMNODES_H */ diff --git a/src/include/nodes/params.h b/src/include/nodes/params.h index 4321ca6329b..ef69f2d6bba 100644 --- a/src/include/nodes/params.h +++ b/src/include/nodes/params.h @@ -19,7 +19,7 @@ struct Bitmapset; struct ExprState; struct Param; struct ParseState; - +struct PlanState; /* * ParamListInfo @@ -148,6 +148,8 @@ typedef struct ParamExecData void *execPlan; /* should be "SubPlanState *" */ Datum value; bool isnull; + bool noNeedLoadFromMain; + bool loadedFromMain; } ParamExecData; /* type of argument for ParamsErrorCallback */ @@ -166,5 +168,7 @@ extern ParamListInfo RestoreParamList(char **start_address); extern char *BuildParamLogString(ParamListInfo params, char **knownTextValues, int maxlen); extern void ParamsErrorCallback(void *arg); +typedef struct ExprState *(*ExecInitParam_hook_type) (struct Param *param, struct PlanState *parent); +extern PGDLLIMPORT ExecInitParam_hook_type ExecInitParam_hook; #endif /* PARAMS_H */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 658d76225e4..c104b65825e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -194,6 +194,9 @@ typedef struct Plan /* OK to use as part of parallel plan? */ bool parallel_safe; + /* plan number (1-origin) in the Query */ + AttrNumber plan_no; + /* * information needed for asynchronous execution */ diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 347c582a789..59c705379e3 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -37,6 +37,8 @@ typedef void (*create_upper_paths_hook_type) (PlannerInfo *root, void *extra); extern PGDLLIMPORT create_upper_paths_hook_type create_upper_paths_hook; +extern void copy_plan_costsize(Plan *dest, Plan *src); + extern PlannedStmt *standard_planner(Query *parse, const char *query_string, int cursorOptions, diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index e8135f41a1c..cdbac78aea6 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -51,6 +51,7 @@ extern PGDLLIMPORT int Log_autovacuum_min_duration; /* Status inquiry functions */ extern bool AutoVacuumingActive(void); +extern bool vci_IsAutoVacuumLauncherProcess(void); /* called from postmaster at server startup */ extern void autovac_init(void); @@ -63,6 +64,8 @@ pg_noreturn extern void AutoVacWorkerMain(const void *startup_data, size_t start extern bool AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId, BlockNumber blkno); +extern void vci_AutovacuumLauncherIAm(void); +extern void vci_AutovacuumLauncherNotIAm(void); /* shared memory stuff */ extern Size AutoVacuumShmemSize(void); diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h index 058667a47a0..a5fc9a2e420 100644 --- a/src/include/postmaster/bgworker.h +++ b/src/include/postmaster/bgworker.h @@ -68,6 +68,11 @@ #define BGWORKER_CLASS_PARALLEL 0x0010 /* add additional bgworker classes here */ +/* + * Flags for cancel by admin commands. + */ +#define BGWORKER_CANCEL_NOACCEPT 0x0000 +#define BGWORKER_CANCEL_ADMIN_COMMANDS 0x0001 typedef void (*bgworker_main_type) (Datum main_arg); @@ -98,6 +103,9 @@ typedef struct BackgroundWorker Datum bgw_main_arg; char bgw_extra[BGW_EXTRALEN]; pid_t bgw_notify_pid; /* SIGUSR1 this backend on start/stop */ + int bgw_shmem_slot; /* shmem slot ID */ + Oid bgw_cancel_databaseId; /* cancel target */ + int bgw_cancel_flags; /* cancel by admin commands */ } BackgroundWorker; typedef enum BgwHandleStatus @@ -161,4 +169,9 @@ extern void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, ui extern void BackgroundWorkerBlockSignals(void); extern void BackgroundWorkerUnblockSignals(void); +/* Cancel background workers. */ +extern void AcceptBackgroundWorkerCancel(Oid databaseId, int cancel_flags); +extern void CancelBackgroundWorkers(Oid databaseId, int cancel_flags); +extern void ScanBackgroundWorkers(void (*scanner) (BackgroundWorker *worker, pid_t pid, void *data), void *data); + #endif /* BGWORKER_H */ diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h index 74b87a9114a..d97d1c5230b 100644 --- a/src/include/storage/itemptr.h +++ b/src/include/storage/itemptr.h @@ -46,6 +46,9 @@ typedef struct ItemPointerData #endif ItemPointerData; +#define SizeOfIptrData \ + (offsetof(ItemPointerData, ip_posid) + sizeof(OffsetNumber)) + typedef ItemPointerData *ItemPointer; /* ---------------- diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 2b4cbda39a5..786a6404f70 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -233,4 +233,15 @@ typedef enum BuiltinTrancheIds */ typedef LWLock *LWLockId; +typedef int LWLockKind; + +extern PGDLLIMPORT LWLockKind Vci_lwlock_kind; + +#define LWLockAcquireVci(_lock, _mode, _kind) \ + do { \ + Vci_lwlock_kind = (_kind); \ + LWLockAcquire((_lock), (_mode)); \ + Vci_lwlock_kind = -1; \ + } while (0) + #endif /* LWLOCK_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 24e2f5082bc..ed80f07bacf 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -24,6 +24,7 @@ extern PGDLLIMPORT int max_standby_archive_delay; extern PGDLLIMPORT int max_standby_streaming_delay; extern PGDLLIMPORT bool log_recovery_conflict_waits; +extern PGDLLIMPORT int vacuum_defer_cleanup_age; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h index 4a2069b2869..b520e2d6627 100644 --- a/src/include/utils/combocid.h +++ b/src/include/utils/combocid.h @@ -24,5 +24,9 @@ extern void AtEOXact_ComboCid(void); extern void RestoreComboCIDState(char *comboCIDstate); extern void SerializeComboCIDState(Size maxsize, char *start_address); extern Size EstimateComboCIDStateSpace(void); +struct ComboCidKeyData; +extern struct ComboCidKeyData *pef_save_combocids(void); +extern Size pef_get_num_combocids(void); +extern void pef_restore_combocids(struct ComboCidKeyData *addr, Size size); #endif /* COMBOCID_H */ diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index c0987dca155..d22ab72c713 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -20,6 +20,7 @@ #include "nodes/memnodes.h" #include "storage/condition_variable.h" #include "storage/lmgr.h" +#include "storage/lwlock.h" #include "utils/dsa.h" @@ -90,6 +91,9 @@ extern PGDLLIMPORT MemoryContext PortalContext; /* * Memory-context-type-independent functions in mcxt.c */ +extern void MemoryContextRegisterMethods(int method_id, + const MemoryContextMethods *new_methods, + MemoryContextMethods *old_methods); extern void MemoryContextInit(void); extern void MemoryContextReset(MemoryContext context); extern void MemoryContextDelete(MemoryContext context); diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h index a6caa6335e3..31523ad1c7c 100644 --- a/src/include/utils/memutils_internal.h +++ b/src/include/utils/memutils_internal.h @@ -151,11 +151,14 @@ typedef enum MemoryContextMethodID * context creation. It's intended to be called from context-type- * specific creation routines, and noplace else. */ -extern void MemoryContextCreate(MemoryContext node, - NodeTag tag, - MemoryContextMethodID method_id, - MemoryContext parent, - const char *name); +#define MemoryContextCreate(node, tag, method_id, parent, name) \ + MemoryContextCreateWithExLock(node, tag, method_id, parent, name, NULL) +extern void MemoryContextCreateWithExLock(MemoryContext node, + NodeTag tag, + MemoryContextMethodID method_id, + MemoryContext parent, + const char *name, + LWLock *lock); extern void *MemoryContextAllocationFailure(MemoryContext context, Size size, int flags); diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h index 9e79fc376cb..ac3dd0211a0 100644 --- a/src/include/utils/numeric.h +++ b/src/include/utils/numeric.h @@ -107,4 +107,6 @@ extern int64 numeric_int8_opt_error(Numeric num, bool *have_error); extern Numeric random_numeric(pg_prng_state *state, Numeric rmin, Numeric rmax); +struct NumericVar; + #endif /* _PG_NUMERIC_H_ */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index b552359915f..ab1eb5b231d 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -354,6 +354,9 @@ typedef struct StdRdOptions * to freeze. 0 if disabled, -1 if unspecified. */ double vacuum_max_eager_freeze_failure_rate; + int vci_column_ids_offset; /* TODO: Move under contrib/vci */ + int vci_dropped_column_ids_offset; /* TODO: Move under + * contrib/vci */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 3561c6bef0b..7c2bcdfbd5f 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -61,6 +61,8 @@ extern List *RelationGetDummyIndexExpressions(Relation relation); extern List *RelationGetIndexPredicate(Relation relation); extern bytea **RelationGetIndexAttOptions(Relation relation, bool copy); +extern bool isRelHasVCIIndex(Oid relid, bool *is_partition); + /* * Which set of columns to return by RelationGetIndexAttrBitmap. */ @@ -160,4 +162,7 @@ extern PGDLLIMPORT bool criticalRelcachesBuilt; /* should be used only by relcache.c and postinit.c */ extern PGDLLIMPORT bool criticalSharedRelcachesBuilt; +/* vci index original hook*/ +extern PGDLLIMPORT bool (*add_skip_vci_index_hook) (Relation rel); + #endif /* RELCACHE_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 0e546ec1497..18154d7d90c 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -112,6 +112,16 @@ typedef enum SnapshotType * horizon to use. */ SNAPSHOT_NON_VACUUMABLE, + + /* + * VCI WOS2ROS visible + */ + SNAPSHOT_VCI_WOS2ROS, + + /* + * VCI Local ROS visible + */ + SNAPSHOT_VCI_LOCALROS } SnapshotType; typedef struct SnapshotData *Snapshot; diff --git a/src/include/utils/tuplesortstate.h b/src/include/utils/tuplesortstate.h new file mode 100644 index 00000000000..492f7d56a02 --- /dev/null +++ b/src/include/utils/tuplesortstate.h @@ -0,0 +1,285 @@ +/*------------------------------------------------------------------------- + * + * tuplesortstate.h + * tuple sorting state structure. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/tuplesortstate.h + * + *------------------------------------------------------------------------- + */ +#ifndef TUPLESORTSTATE_H +#define TUPLESORTSTATE_H + +#include "executor/executor.h" +#include "utils/logtape.h" +#include "utils/pg_rusage.h" +#include "utils/sortsupport.h" +#include "utils/tuplestore.h" + +/* Following are all moved from tuplesort.c to here for VCI Feature. */ + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE, /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TuplesortPublic base; + TupSortStatus status; /* enumerated value as shown above */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + int64 tupleMem; /* memory consumed by individual tuples. + * storing this separately from what we track + * in availMem allows us to subtract the + * memory consumed by all tuples when dumping + * tuples to tape */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* max number of input tapes to merge in each + * pass */ + int64 maxSpace; /* maximum amount of space occupied among sort + * of groups, either in-memory or on-disk */ + bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk + * space, false when its value for in-memory + * space */ + TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Memory used for input and output tape buffers. */ + size_t tape_buffer_mem; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Logical tapes, for merging. + * + * The initial runs are written in the output tapes. In each merge pass, + * the output tapes of the previous pass become the input tapes, and new + * output tapes are created as needed. When nInputTapes equals + * nInputRuns, there is only one merge pass left. + */ + LogicalTape **inputTapes; + int nInputTapes; + int nInputRuns; + + LogicalTape **outputTapes; + int nOutputTapes; + int nOutputRuns; + + LogicalTape *destTape; /* current output tape */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + LogicalTape *result_tape; /* actual tape of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + int64 markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run that the leader needs to merge. Typically includes a + * worker state held by the leader process itself. Set in the leader + * Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * Resource snapshot for time of sort start. + */ + PGRUsage ru_start; +}; + +extern bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup); + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define REMOVEABBREV(state,stup,count) ((*(state)->base.removeabbrev) (state, stup, count)) +#define COMPARETUP(state,a,b) ((*(state)->base.comparetup) (a, b, state)) +#define WRITETUP(state,tape,stup) ((*(state)->base.writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->base.readtup) (state, stup, tape, len)) +#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +#endif diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9ea573fae21..71fe57acc87 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4309,3 +4309,136 @@ zic_t ExplainExtensionOption ExplainOptionHandler overexplain_options + +vci_applicable_udf_template +vci_aggtranstype_kind +AggrefTransInfo +vci_search_vci_scan_context_t +vci_table_info_t +vci_gather_used_attrs_t +vci_renumber_attrs_t +father_gather_plans +FuncExprinfo +VciScalarArrayOpExprHashEntry +VciScalarArrayOpExprHashTable +VciAggStatePerAggData +VciAggStatePerGroupData +RosChunkBuffer +RosChunkStorage +vci_PointerAndLength +vci_PackBitsEncoder +vci_PackBitsDecoder +vcis_c_extent_t +vcis_c_common_dict_t +vcis_column_meta_t +vcis_common_dict_t +vci_ColumnRelations +vci_fetch_placeholder_t +vci_index_placeholder_t +vci_plan_info_t +vci_query_context_t +vci_vp_item_id +VciVPExecOp_func +VciVPNode +VciVPContext +VciTupleHotHashTableData +VciTupleHotHashTable +VciProjectionInfoSlot +VciProjectionInfo +VciPlan +VciPlanState +VciScan +VciScanState +VciSort +VciSortState +VciAgg +VciAggStatePerAgg +VciAggStatePerGroup +VciAggState +VciAdvanceAggref_Func +VciGather +VciGatherState +VciVarState +VciParamState +vci_initexpr_t +vci_topmost_plan_cb_t +vci_mutator_t +VciCopyDatumFunc +vci_agg_trans_copy_funcs +vci_CSQueryContextData +vci_CSQueryContext +vci_seq_scan_buffer_t +vci_CSFetchContextData +vci_CSFetchContext +vci_minmax_t +vci_extent_status_t +vci_read_vector_status_t +vci_virtual_tuples_column_info_t +vci_virtual_tuples_t +vcis_free_space_t +VciGucStruct +VciShmemStruct +vci_id_t +vci_memory_entry_t +vci_memory_entries_t +vci_inner_plan_type_t +vci_plan_compat_t +vci_plan_attr_t +vci_param_exec_type_t +vci_param_exec_attr_t +vci_subplan_type_t +vci_subplan_attr_t +vci_rewrite_plan_context_t +vci_devstat_t +vci_memory_entry_list_t +vci_devload_t +vci_dev_t +vci_update_info_t +vci_tid_array_t +vci_blk_array_t +vci_RosCommandContext +vci_target_extent_info_t +vci_workerslot_t +vci_ros_command_t +vci_offset_in_extent_t +vci_MainRelVar +vci_MainRelHeaderInfo +vcis_m_column_t +vcis_m_extent_t +vci_wmrv_t +vcis_attribute_type_t +vcis_compression_type_t +vcis_extent_type_t +vcis_tidcrid_item_type_t +vcis_dict_type_t +vcis_tid_crid_op_type_t +vci_RelationPair +vci_local_delete_list +vci_local_ros_t +vci_DictInfo +vci_meta_item_scanner_t +vci_wosros_conv_worker_arg_t +vci_special_udf_info_t +vcis_Crid +vcis_tidcrid_meta_item_t +vcis_tidcrid_meta_t +vcis_tidcrid_pagetag_t +vcis_tidcrid_leaf_t +vcis_tidcrid_trunk_t +vcis_tidcrid_pair_item_t +vcis_tidcrid_pair_list_t +vci_TidCridUpdateListContext +vci_TidCridRelations +VciTableScanPolicy +VciScanMode +VciFetchPos +vci_RebuildCommand +mountpoint_dev_pair_t +vci_CmpInfo +CopyCommandInfo +CEKind +WosKind +vci_tid_tid_xid64_t +message_on_worker_exit_t +vci_MergeTidCridUpdateListContext +node_info_t \ No newline at end of file -- 2.39.3