v20251223-0004-VCI-main-part3.patch
application/octet-stream
Filename: v20251223-0004-VCI-main-part3.patch
Type: application/octet-stream
Part: 3
From 063c8cc9db40a76581515db12f50432b97725abb Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Tue, 23 Dec 2025 15:25:30 +1100
Subject: [PATCH v20251223] VCI - main - part3
---
contrib/vci/include/vci_supported_oid.h | 34 +
contrib/vci/storage/Makefile | 8 +-
contrib/vci/storage/meson.build | 8 +-
contrib/vci/storage/vci_index.c | 2142 +++++++++++++++++++++++++++++++
contrib/vci/storage/vci_internal_view.c | 663 ++++++++++
contrib/vci/storage/vci_tidcrid.c | 1774 +++++++++++++++++++++++++
contrib/vci/storage/vci_wos.c | 263 ++++
7 files changed, 4884 insertions(+), 8 deletions(-)
create mode 100644 contrib/vci/include/vci_supported_oid.h
create mode 100644 contrib/vci/storage/vci_index.c
create mode 100644 contrib/vci/storage/vci_internal_view.c
create mode 100644 contrib/vci/storage/vci_tidcrid.c
create mode 100644 contrib/vci/storage/vci_wos.c
diff --git a/contrib/vci/include/vci_supported_oid.h b/contrib/vci/include/vci_supported_oid.h
new file mode 100644
index 0000000..504de68
--- /dev/null
+++ b/contrib/vci/include/vci_supported_oid.h
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_supported_oid.h
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/vci/include/vci_supported_oid.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef VCI_SUPPORTED_OID_H
+#define VCI_SUPPORTED_OID_H
+
+#include "utils/snapshot.h"
+
+#define VCI_MAX_APPLICABLE_UDFS (32)
+
+typedef struct
+{
+ int num_applicable_udfs;
+ Oid applicable_udfs[VCI_MAX_APPLICABLE_UDFS];
+ Oid vci_runs_in_plan_funcoid;
+ Oid vci_always_return_true_funcoid;
+} vci_special_udf_info_t;
+
+extern vci_special_udf_info_t vci_special_udf_info;
+
+extern bool vci_is_supported_type(Oid oid);
+extern bool vci_is_supported_function(Oid oid);
+extern void vci_register_applicable_udf(Snapshot snapshot);
+
+#endif /* VCI_SUPPORTED_OID_H */
diff --git a/contrib/vci/storage/Makefile b/contrib/vci/storage/Makefile
index 2ea8365..364a944 100644
--- a/contrib/vci/storage/Makefile
+++ b/contrib/vci/storage/Makefile
@@ -6,15 +6,15 @@ SUBOBJS = \
# vci_columns_data.o \
# vci_fetch.o \
# vci_freelist.o \
-# vci_index.o \
-# vci_internal_view.o \
+ vci_index.o \
+ vci_internal_view.o \
# vci_low_utils.o \
# vci_memory_entry.o \
vci_ros.o \
vci_ros_command.o \
vci_ros_daemon.o \
-# vci_tidcrid.o \
-# vci_wos.o \
+ vci_tidcrid.o \
+ vci_wos.o \
# vci_xact.o
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
diff --git a/contrib/vci/storage/meson.build b/contrib/vci/storage/meson.build
index fefe15b..87fa17a 100644
--- a/contrib/vci/storage/meson.build
+++ b/contrib/vci/storage/meson.build
@@ -6,14 +6,14 @@ vci_storage_sources = files(
# 'vci_columns_data.c',
# 'vci_fetch.c',
# 'vci_freelist.c',
-# 'vci_index.c',
-# 'vci_internal_view.c',
+ 'vci_index.c',
+ 'vci_internal_view.c',
# 'vci_low_utils.c',
# 'vci_memory_entry.c',
'vci_ros.c',
'vci_ros_command.c',
'vci_ros_daemon.c',
-# 'vci_tidcrid.c',
-# 'vci_wos.c',
+ 'vci_tidcrid.c',
+ 'vci_wos.c',
# 'vci_xact.c',
)
diff --git a/contrib/vci/storage/vci_index.c b/contrib/vci/storage/vci_index.c
new file mode 100644
index 0000000..7dbaa31
--- /dev/null
+++ b/contrib/vci/storage/vci_index.c
@@ -0,0 +1,2142 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_index.c
+ * Index Access Method
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/vci/storage/vci_index.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/heapam_xlog.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/toast_compression.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_rewrite.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/tablecmds.h"
+#include "executor/executor.h"
+#include "executor/nodeModifyTable.h"
+#include "executor/spi.h"
+#include "fmgr.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "nodes/execnodes.h"
+#include "nodes/makefuncs.h"
+#include "nodes/pathnodes.h"
+#include "access/relation.h"
+#include "port.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteRemove.h"
+#include "rewrite/rewriteSupport.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+#include "vci.h"
+#include "vci_columns.h"
+#include "vci_columns_data.h"
+
+#include "vci_mem.h"
+#include "vci_ros.h"
+#include "vci_ros_command.h"
+#include "vci_ros_daemon.h"
+#include "vci_supported_oid.h"
+#include "vci_tidcrid.h"
+#include "vci_wos.h"
+#include "vci_xact.h"
+
+#ifdef WIN32
+#define __func__ __FUNCTION__
+#endif
+
+#ifdef HAVE_DESIGNATED_INITIALIZERS
+#define SFINIT(f, ...) f = __VA_ARGS__
+#else
+#define SFINIT(f, ...) __VA_ARGS__
+#endif
+
+/**
+ * Data Relation
+ */
+#define VCI_RELTYPE_DATA ('d')
+
+/**
+ * Meta Relation
+ */
+#define VCI_RELTYPE_META ('m')
+
+/**
+ * WOS Relation
+ */
+#define VCI_RELTYPE_WOS ('W')
+
+/**
+ * ROS Relation
+ */
+#define VCI_RELTYPE_ROS ('R')
+
+/**
+ * TIDCRID Relation
+ */
+#define VCI_RELTYPE_TIDCRID ('T')
+
+/* local functions */
+static TupleDesc get_tuple_desc_for_build(Relation heapRel, Relation indexRel, bool isctid);
+static IndexBuildResult *vci_inner_build(Relation, Relation, IndexInfo *);
+static void vci_inner_buildempty(Relation indexRelation);
+static bool vci_inner_insert(Relation, ItemPointer);
+static bool vci_inner_insert_in_copy(Relation, ItemPointer);
+static IndexBulkDeleteResult *vci_inner_vacuumcleanup(IndexVacuumInfo *, IndexBulkDeleteResult *);
+static void vci_modify_column_information(bool isctid, Relation indexRel, Relation heapRel);
+
+IndexBuildResult *vci_build(Relation heap, Relation index, IndexInfo *indexInfo);
+void vci_buildempty(Relation index);
+bool vci_insert(Relation indexRel, Datum *values, bool *isnull,
+ ItemPointer heap_tid, Relation heapRel,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ struct IndexInfo *indexInfo);
+IndexBulkDeleteResult *vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+ IndexBulkDeleteCallback callback, void *callback_state);
+IndexBulkDeleteResult *vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats);
+void vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages);
+int vci_gettreeheight(Relation rel);
+bytea *vci_options(Datum reloptions, bool validate);
+IndexScanDesc vci_beginscan(Relation rel, int nkeys, int norderbys);
+void vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys);
+bool vci_validate(Oid opclassoid);
+void vci_endscan(IndexScanDesc scan);
+void vci_markpos(IndexScanDesc scan);
+void vci_restrpos(IndexScanDesc scan);
+
+static char relNameBuf[NAMEDATALEN];
+
+static bool copy_with_freeze_option;
+
+bool
+vci_isVciAdditionalRelation(Relation rel)
+{
+ return vci_isVciAdditionalRelationTuple(rel->rd_id, rel->rd_rel);
+}
+
+bool
+vci_isVciAdditionalRelationTuple(Oid reloid, Form_pg_class reltuple)
+{
+ if (reltuple->relkind == RELKIND_MATVIEW)
+ {
+ int ret;
+ int dummy1;
+ int dummy2;
+ char dummy3;
+
+ ret = sscanf(NameStr(reltuple->relname), VCI_INTERNAL_RELATION_TEMPLATE,
+ &dummy1, &dummy2, &dummy3);
+
+ return (ret == 3);
+ }
+
+ return false;
+}
+
+/* custom index */
+
+IndexBuildResult *
+vci_build(Relation heapRel, Relation indexRel, IndexInfo *indexInfo)
+{
+ IndexBuildResult *result;
+ vci_id_t vciid;
+
+ if (!fullPageWrites)
+ {
+ if (vci_rebuild_command == vcirc_invalid)
+ /* CREATE INDEX */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING)));
+ else
+ /* TRUNCATE, VACUUM FULL */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING),
+ errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel))));
+ }
+
+ result = vci_inner_build(heapRel, indexRel, indexInfo);
+
+ vciid.oid = RelationGetRelid(indexRel);
+ vciid.dbid = MyDatabaseId;
+
+ vci_TouchMemoryEntry(&vciid,
+ get_rel_tablespace(indexRel->rd_id));
+
+ return result;
+}
+
+void
+vci_buildempty(Relation indexRel)
+{
+ vci_inner_buildempty(indexRel);
+
+ return;
+}
+
+/* for COPY command */
+#define EXTENT_LIST_UNIT_EXTENSION (1024)
+
+typedef struct CopyCommandInfo
+{
+ TransactionId xid;
+ CommandId cid;
+ uint64 numAppendedRows;
+ uint32 *extentList;
+ uint32 numFilledExtent;
+ uint32 numAllocatedExtent;
+} CopyCommandInfo;
+
+static CopyCommandInfo copyInfo = {
+ SFINIT(xid, InvalidTransactionId),
+ SFINIT(cid, InvalidCommandId),
+ SFINIT(numAppendedRows, 0),
+ SFINIT(extentList, NULL),
+ SFINIT(numFilledExtent, 0),
+ SFINIT(numAllocatedExtent, 0)
+};
+static vci_RosCommandContext copyConvContext;
+
+bool
+vci_insert(Relation indexRel, Datum *values, bool *isnull,
+ ItemPointer heap_tid, Relation heapRel,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ struct IndexInfo *indexInfo)
+{
+ bool result;
+ TransactionId xid = GetCurrentTransactionId();
+ CommandId cid = GetCurrentCommandId(false);
+
+ Assert(TransactionIdIsValid(xid));
+ Assert(InvalidCommandId != cid);
+
+ if (!fullPageWrites)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING),
+ errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel))));
+
+ if (ItemPointerGetOffsetNumber(heap_tid) == FirstOffsetNumber)
+ {
+ vci_id_t vciid;
+
+ vciid.oid = RelationGetRelid(indexRel);
+ vciid.dbid = MyDatabaseId;
+
+ vci_TouchMemoryEntry(&vciid,
+ get_rel_tablespace(indexRel->rd_id));
+ }
+
+ if (TransactionIdEquals(xid, copyInfo.xid) && (cid == copyInfo.cid))
+ result = vci_inner_insert_in_copy(indexRel, heap_tid); /* LCOV_EXCL_LINE */
+ else
+ result = vci_inner_insert(indexRel, heap_tid);
+
+ return result;
+}
+
+/**
+ * vci_bulkdelete
+ */
+IndexBulkDeleteResult *
+vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+ IndexBulkDeleteCallback callback, void *callback_state)
+{
+ return stats;
+}
+
+/**
+ * vci_vacuumcleanup
+ */
+IndexBulkDeleteResult *
+vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+ if (info->analyze_only)
+ return stats;
+
+ vci_inner_vacuumcleanup(info, stats);
+
+ return stats;
+}
+
+/**
+ * vci_costestimate
+ */
+void
+vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ /*
+ * PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath
+ * *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count =
+ * PG_GETARG_FLOAT8(2);
+ */
+
+ /* always return worst cost value */
+ *indexStartupCost = DBL_MAX;
+ *indexTotalCost = DBL_MAX;
+ *indexSelectivity = 1.0;
+ *indexCorrelation = 0.0;
+ *indexPages = ((BlockNumber) 0xFFFFFFFE); /* MaxBlockNumber */
+
+ /**
+ * Disabled nodes are also a cost metric (see Commit e222534), so set a
+ * high value to ensure an Index Scan will not be chosen.
+ */
+ path->path.disabled_nodes = INT_MAX;
+
+ return;
+}
+
+int
+vci_gettreeheight(Relation rel)
+{
+ int result;
+
+ result = 0;
+ return result;
+}
+
+bytea *
+vci_options(Datum reloptions, bool validate)
+{
+ return NULL;
+}
+
+bool
+vci_validate(Oid opclassoid)
+{
+ /* pass */
+ return true;
+}
+
+/* LCOV_EXCL_START */
+IndexScanDesc
+vci_beginscan(Relation rel, int nkeys, int norderbys)
+{
+ IndexScanDesc result;
+
+ /*
+ * Relation indexRel = (Relation) PG_GETARG_POINTER(0); int nkeys =
+ * PG_GETARG_INT32(1); int norderbys = PG_GETARG_INT32(2);
+ */
+
+ result = NULL;
+
+ ereport(PANIC,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("unexpected index access method call : \"%s\" ", __func__)));
+
+ return result;
+}
+
+void
+vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys)
+{
+ /*
+ * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey keys
+ * = (ScanKey) PG_GETARG_POINTER(1); int nkeys = PG_GETARG_INT32(2);
+ * ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); int norderbys =
+ * PG_GETARG_INT32(4);
+ */
+
+ /* pass */
+ return;
+}
+
+void
+vci_endscan(IndexScanDesc scan)
+{
+ /*
+ * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ */
+
+ ereport(PANIC,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("unexpected index access method call : \"%s\" ", __func__)));
+
+ /* pass */
+ return;
+}
+
+void
+vci_markpos(IndexScanDesc scan)
+{
+ /*
+ * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ */
+ ereport(PANIC,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("unexpected index access method call : \"%s\" ", __func__)));
+
+ /* pass */
+ return;
+}
+
+void
+vci_restrpos(IndexScanDesc scan)
+{
+ /*
+ * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ */
+ ereport(PANIC,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("unexpected index access method call : \"%s\" ", __func__)));
+
+ /* pass */
+ return;
+}
+
+/* LCOV_EXCL_STOP */
+
+/* --body-- */
+
+static Oid
+vci_create_relation(const char *rel_identifier, Relation indexRel, IndexInfo *indexInfo, char vci_reltype)
+{
+ int natts;
+
+ /* system catalog relation id */
+ Relation pg_class;
+ Relation pg_attr;
+
+ /* new rel, oid, tupdesc */
+ Relation new_rel;
+ Oid new_oid;
+ TupleDesc new_tupdesc;
+
+ /* attributes */
+ Oid ownerid = GetUserId();
+
+ char relkind = RELKIND_MATVIEW;
+
+ Oid new_type_oid = InvalidOid;
+ Oid reloftypeid = InvalidOid;
+ CatalogIndexState indstate;
+
+ char relname[NAMEDATALEN]; /* max 64 characters */
+ Oid reltablespace;
+ Oid relnamespace;
+ char relpersistence;
+ Oid accessmtd;
+
+ /* variables for pg_class */
+ Form_pg_class new_rel_reltup;
+
+ RangeVar *relrv;
+
+ /* Insert pg_depend table */
+ ObjectAddress oaIndex;
+ ObjectAddress oaNewRel;
+
+ relnamespace = indexRel->rd_rel->relnamespace;
+ reltablespace = indexRel->rd_rel->reltablespace;
+ relpersistence = indexRel->rd_rel->relpersistence;
+ accessmtd = HEAP_TABLE_AM_OID;
+
+ /* function start */
+ memset(relname, 0, sizeof(relname));
+ strncpy(relname, rel_identifier, sizeof(relname));
+
+ relrv = makeRangeVar(get_namespace_name(relnamespace), relname, -1);
+ new_oid = RangeVarGetRelid(relrv, AccessShareLock, true);
+
+ if (OidIsValid(new_oid))
+ {
+ new_rel = relation_open(new_oid, AccessExclusiveLock);
+ RelationSetNewRelfilenumber(new_rel, new_rel->rd_rel->relpersistence);
+
+ /*
+ * if (new_rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+ * heap_create_init_fork(new_rel);
+ */
+
+ relation_close(new_rel, NoLock); /* do not unlock till end of xact */
+
+ return new_oid;
+ }
+
+ /* Generate Data WOS */
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ /* 4.6.1 get new Oid for new relation */
+
+ new_oid = GetNewRelFileNumber(reltablespace, pg_class, relpersistence);
+
+ /* TODO */
+
+ /*
+ * The following line is meaningful? Or shoud we remove it?
+ */
+ get_user_default_acl(OBJECT_TABLE, ownerid, relnamespace);
+
+ /* 4.6.1.2 create new relation cache entry */
+
+ /* new tuple descriptor has TID column */
+
+ switch (vci_reltype)
+ {
+ /* WOS */
+ case VCI_RELTYPE_WOS:
+ natts = 2;
+ new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */
+ TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "original_tid", TIDOID, -1, 0);
+ TupleDescInitEntry(new_tupdesc, (AttrNumber) 2, "xid", INT8OID, -1, 0);
+ break;
+
+ /* ROS */
+ case VCI_RELTYPE_ROS:
+ natts = 1;
+ new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */
+ TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0); /* */
+ break;
+
+ /* TID-CRID */
+ case VCI_RELTYPE_TIDCRID:
+ natts = 1;
+ new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */
+ TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0); /* */
+ break;
+
+ /* LCOV_EXCL_START */
+ default:
+ elog(ERROR, "unexpected vci_reltype");
+ break;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /*
+ * Create the relcache entry (mostly dummy at this point) and the physical
+ * disk file. (If we fail further down, it's the smgr's responsibility to
+ * remove the disk file again.)
+ */
+ new_rel = RelationBuildLocalRelation(relname,
+ relnamespace,
+ new_tupdesc,
+ new_oid,
+ accessmtd,
+ new_oid, /* relfilenumber */
+ reltablespace,
+ false, /* shared_relation */
+ false, /* mapped_relation */
+ relpersistence,
+ relkind);
+
+ /* 4.6.1.3 create new starge for new relation */
+ RelationCreateStorage(new_rel->rd_locator, relpersistence, true);
+
+ Assert(new_oid == RelationGetRelid(new_rel));
+
+ /* 4.6.1.4 add new entry into pg_class */
+ new_rel_reltup = new_rel->rd_rel;
+ new_rel_reltup->relpages = 0;
+ new_rel_reltup->reltuples = -1;
+ new_rel_reltup->relallvisible = 0;
+ new_rel_reltup->relfrozenxid = RecentXmin;
+ new_rel_reltup->relminmxid = GetOldestMultiXactId();
+ new_rel_reltup->relowner = ownerid;
+ new_rel_reltup->reltype = new_type_oid;
+ new_rel_reltup->reloftype = reloftypeid;
+
+ /*
+ * Flag the VCI internal relation MATVIEW as already populated.
+ *
+ * Users are not supposed to be querying these internal relations, but
+ * just in case they do, setting 'relispopulated' prevents an error saying
+ * the view has not been populated, hinting a "REFRESH MATERIALIZED VIEW"
+ * is needed. That hint only causes confusion, since the REFRESH is
+ * disallowed for VCI internal relations.
+ */
+ new_rel_reltup->relispopulated = true;
+
+ /*
+ * @see
+ * https://www.postgresql.jp/document/9.4/html/catalog-pg-rewrite.html
+ */
+ new_rel_reltup->relhasrules = true;
+
+ new_rel->rd_att->tdtypeid = new_type_oid;
+
+ InsertPgClassTuple(pg_class, new_rel, new_oid, (Datum) 0, (Datum) 0);
+
+ /*
+ * 4.6.1.5 -now add tuples to pg_attribute for the attributes in our new
+ * relation.
+ */
+
+ /*
+ * open pg_attribute and its indexes.
+ */
+ pg_attr = table_open(AttributeRelationId, RowExclusiveLock);
+ indstate = CatalogOpenIndexes(pg_attr);
+
+ /*
+ * First we add the user attributes. This is also a convenient place to
+ * add dependencies on their datatypes and collations.
+ */
+ for (int i = 0; i < natts; i++)
+ {
+ Form_pg_attribute attrs;
+
+ /* [TODO] Make sure these are OK? */
+ new_tupdesc->compact_attrs[i].attcacheoff = -1;
+ attrs = TupleDescAttr(new_tupdesc, i);
+ attrs->attstorage = TYPSTORAGE_PLAIN;
+ attrs->attcompression = InvalidCompressionMethod;
+ }
+ InsertPgAttributeTuples(pg_attr, new_tupdesc, new_oid, NULL, indstate);
+
+ /*
+ * clean up pg_attribute
+ */
+ CatalogCloseIndexes(indstate);
+ table_close(pg_attr, RowExclusiveLock);
+
+ /*
+ * VCI internal relations are dependent on the parent index.
+ */
+ ObjectAddressSet(oaIndex, RelationRelationId, indexRel->rd_id);
+ ObjectAddressSet(oaNewRel, RelationRelationId, new_oid);
+ recordDependencyOn(&oaNewRel, &oaIndex, DEPENDENCY_INTERNAL);
+
+ table_close(new_rel, NoLock); /* do not unlock till end of xact */
+ table_close(pg_class, RowExclusiveLock);
+
+ return new_oid;
+}
+
+static char *
+GenRelName(Relation rel, int16 columnId, char suffix)
+{
+ snprintf(relNameBuf, NAMEDATALEN, VCI_INTERNAL_RELATION_TEMPLATE, RelationGetRelid(rel),
+ (0xFFFF & columnId), suffix);
+
+ return relNameBuf;
+}
+
+static void
+CheckIndexedRelationKind(Relation rel)
+{
+ if (rel->rd_rel->relkind == RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support index on materialized view", VCI_STRING)));
+
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support index on temporary table", VCI_STRING)));
+}
+
+static void
+CheckIndexInfo(IndexInfo *indexInfo, Relation indexRel)
+{
+ /* check Concurrent option first. */
+ if (indexInfo->ii_Concurrent)
+ /* LCOV_EXCL_START */
+ elog(PANIC, "should not reach here");
+ /* LCOV_EXCL_STOP */
+
+ if (indexInfo->ii_Predicate != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support partial-index", VCI_STRING)));
+
+ if (indexInfo->ii_Expressions != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support to CREATE INDEX on the expression", VCI_STRING)));
+
+ if (indexInfo->ii_ExclusionOps != NULL ||
+ indexInfo->ii_ExclusionProcs != NULL ||
+ indexInfo->ii_ExclusionStrats != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support EXCLUDE clause", VCI_STRING)));
+
+ for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ AttrNumber an = indexInfo->ii_IndexAttrNumbers[i];
+
+ for (int j = i + 1; j < indexInfo->ii_NumIndexAttrs; j++)
+ {
+ TupleDesc tupdesc = RelationGetDescr(indexRel);
+
+ if (an == indexInfo->ii_IndexAttrNumbers[j])
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("duplicated columns in vci index creation: %s",
+ NameStr(TupleDescAttr(tupdesc, an - 1)->attname)),
+ errhint("duplicated columns are specified")));
+ }
+ }
+}
+
+static void
+CheckIndexColumnTypes(TupleDesc tupdesc, bool *isctid)
+{
+ *isctid = false;
+
+ for (int i = 0; i < tupdesc->natts; i++)
+ {
+ Oid typeoid = TupleDescAttr(tupdesc, i)->atttypid;
+
+ /*
+ * In general, the type 'tid' is not supported. However, 'ctid' column
+ * (that is exist in all tables) is accepted as a dummy column. In
+ * this case, the real columns should be registered in the
+ * 'vci_column_ids' option.
+ */
+ if (!vci_is_supported_type(typeoid))
+ {
+ if (strcmp(NameStr(TupleDescAttr(tupdesc, i)->attname), "ctid") != 0)
+ {
+ HeapTuple tuple;
+ Form_pg_type typetuple;
+
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for type %u", typeoid);
+
+ typetuple = (Form_pg_type) GETSTRUCT(tuple);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("data type %s is not supported for access method \"%s\"",
+ NameStr(typetuple->typname), VCI_STRING)));
+
+ ReleaseSysCache(tuple);
+ }
+ else if (tupdesc->natts != 1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use \"ctid\" column with other columns")));
+ }
+ *isctid = true;
+ }
+ }
+}
+
+/* TODO - is this function needed? */
+static void
+CheckColumnReloptions(Relation indexRel, bool isctid)
+{
+ char *ids = NULL;
+ bool hasoption = false;
+
+ if (hasoption)
+ ereport(DEBUG2,
+ (errmsg_internal("vci_column_ids: %s", ids)));
+
+ if (isctid == hasoption)
+ return;
+ else if (isctid && !hasoption)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("The \"vci_column_ids\" option is required when \"ctid\" column is specified")));
+ else if (!isctid && hasoption)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("The \"vci_column_ids\" option cannot be used without \"ctid\" column")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg_internal("unrecognized state in vci_inner_build: isctid = %d, ids = %p", isctid, ids)));
+}
+
+/*
+ * get_tuple_desc_for_build -- create TupleDesc for build.
+ *
+ * The VCI can be created by 2 interfaces. The first interface is the same to
+ * an ordinal index (Ex. CREATE INDEX idx ON table USING vci(c1, c2)). The
+ * second interface is by the original function 'vci_create' (SELECT vci_create
+ * ('idx', 'table', ARRAY['c1', 'c2'])). It generates such SQL as 'CREATE
+ * INDEX idx ON table USING vci(ctid) WITH (vci_column_ids = '1,2')'. The
+ * following codes distinguish this 2 cases.
+ *
+ * XXX - function vci_create is not implemented by this OSS patch, so this
+ * code may be able to be further simplified.
+ */
+static TupleDesc
+get_tuple_desc_for_build(Relation heapRel, Relation indexRel, bool isctid)
+{
+ if (isctid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"ctid\" column is specified")));
+
+ return RelationGetDescr(indexRel);
+}
+
+static void
+vci_modify_column_information(bool isctid, Relation indexRel, Relation heapRel)
+{
+ if (!isctid)
+ return;
+
+ /*
+ * XXX. The code which previously existed below here is now removed. It
+ * relied on vci_MakeDroppedColumnBitmap which asserted
+ * vci_IsExtendedToMoreThan32Columns, and that is no longer possible since
+ * "vci_create() function is not supported by this OSS patch.
+ */
+}
+
+static IndexBuildResult *
+vci_inner_build(Relation heapRel, Relation indexRel, IndexInfo *indexInfo)
+{
+ IndexBuildResult *result;
+ Oid oid;
+
+ vci_MainRelHeaderInfo *vmr_info;
+
+ TupleDesc tupdesc;
+ bool isctid;
+
+ /* for checking type after getting 'real' TupleDesc. */
+ bool dummy_isctid;
+
+ uint32 offsetToExtentInfo;
+
+ double reltuples = -1;
+
+ CheckIndexedRelationKind(heapRel);
+ CheckIndexInfo(indexInfo, indexRel);
+ CheckIndexColumnTypes(RelationGetDescr(indexRel), &isctid);
+ CheckColumnReloptions(indexRel, isctid);
+
+ vci_modify_column_information(isctid, indexRel, heapRel);
+
+ /* create VCI main relation */
+ vmr_info = palloc0_object(vci_MainRelHeaderInfo);
+ vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_ros_conv_build);
+
+ if (RelationGetNumberOfBlocks(indexRel) != 0)
+ elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(indexRel));
+
+ /* create blank page * VCI_NUM_MAIN_REL_HEADER_PAGES */
+ vci_PreparePagesWithOneItemIfNecessary(indexRel,
+ lengthof(vmr_info->buffer) - 1);
+
+ vci_KeepMainRelHeaderWithoutVersionCheck(vmr_info);
+
+ /* write ROS format version */
+ vci_SetMainRelVar(vmr_info, vcimrv_ros_version_major, 0,
+ VCI_ROS_VERSION_MAJOR);
+ vci_SetMainRelVar(vmr_info, vcimrv_ros_version_minor, 0,
+ VCI_ROS_VERSION_MINOR);
+
+ /* create WOS relations */
+ /* register WOS relation's OID to VCI Main relation */
+
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DATA_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS);
+ vci_SetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0, oid);
+
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_WHITEOUT_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS);
+ vci_SetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0, oid);
+
+ /* create ROS relations */
+
+ /* TID */
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_data_oid, 0, oid);
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_meta_oid, 0, oid);
+
+ /* NUll */
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_null_data_oid, 0, oid);
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_null_meta_oid, 0, oid);
+
+ /* Delete Vector */
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_delete_data_oid, 0, oid);
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ vci_SetMainRelVar(vmr_info, vcimrv_delete_meta_oid, 0, oid);
+
+ /* Column Stores */
+ tupdesc = get_tuple_desc_for_build(heapRel, indexRel, isctid);
+ CheckIndexColumnTypes(tupdesc, &dummy_isctid);
+
+ /*
+ * When using 'vci_create', PostgreSQL registers only a 'ctid' column as
+ * as a dependency object. So self registration is required in such case.
+ *
+ * Note: A tupdesc->attrs[i]->attnum doesn't point an attribute number of
+ * the heap but is a sequential number in the index.
+ */
+ if (isctid)
+ {
+ ObjectAddress myself,
+ referenced;
+ TupleDesc heapTupleDesc;
+ ObjectAddresses *addrs;
+
+ heapTupleDesc = RelationGetDescr(heapRel);
+
+ addrs = new_object_addresses();
+
+ ObjectAddressSet(myself, RelationRelationId, RelationGetRelid(indexRel));
+ ObjectAddressSet(referenced, RelationRelationId, RelationGetRelid(heapRel));
+
+ for (int i = 0; i < tupdesc->natts; i++)
+ {
+ referenced.objectSubId = vci_GetAttNum(heapTupleDesc,
+ NameStr(TupleDescAttr(tupdesc, i)->attname));
+
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_AUTO);
+ free_object_addresses(addrs);
+ }
+
+ vci_SetMainRelVar(vmr_info, vcimrv_num_columns, 0, tupdesc->natts);
+ for (int i = 0; i < tupdesc->natts; i++)
+ {
+ Oid column_store_oid;
+ Oid column_meta_oid;
+ vcis_m_column_t *columnPointer;
+
+ column_store_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS);
+ column_meta_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS);
+
+ /* set ROS column pointer, */
+ columnPointer = vci_GetMColumn(vmr_info, i);
+
+ columnPointer->meta_oid = column_meta_oid;
+ columnPointer->data_oid = column_store_oid;
+ columnPointer->max_columns_size = vci_GetColumnWorstSize(TupleDescAttr(tupdesc, i));
+ if (TupleDescAttr(tupdesc, i)->attlen == -1)
+ {
+ columnPointer->comp_type = vcis_compression_type_variable_raw;
+ }
+ else if (TupleDescAttr(tupdesc, i)->attlen > 0)
+ {
+ columnPointer->comp_type = vcis_compression_type_fixed_raw;
+ }
+ else
+ {
+ Assert(false);
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("unexpected attribute length")));
+ }
+ /* put default extent(free_page) to each columns */
+ }
+ vci_SetMainRelVar(vmr_info, vcimrv_num_nullable_columns, 0,
+ vci_GetNumberOfNullableColumn(tupdesc));
+ vci_SetMainRelVar(vmr_info, vcimrv_null_width_in_byte, 0,
+ (vci_GetNumberOfNullableColumn(tupdesc) + BITS_PER_BYTE - 1) /
+ BITS_PER_BYTE);
+
+ /* create TID-CRID relations */
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_TIDCRID);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_meta_oid, 0, oid);
+
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_TIDCRID);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_data_oid, 0, oid);
+
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '0'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_0, 0, oid);
+
+ oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '1'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_1, 0, oid);
+
+ /* other variables */
+ vci_SetMainRelVar(vmr_info, vcimrv_column_info_offset, 0, vcimrv_column_info - VCI_MIN_PAGE_HEADER);
+
+ offsetToExtentInfo = (vci_MRVGetBlockNumber(vcimrv_extent_info) * VCI_MAX_PAGE_SPACE) +
+ vci_MRVGetOffset(vcimrv_extent_info) - VCI_MIN_PAGE_HEADER;
+
+ vci_SetMainRelVar(vmr_info, vcimrv_extent_info_offset, 0, offsetToExtentInfo);
+ vci_SetMainRelVar(vmr_info, vcimrv_size_mr, 0, offsetToExtentInfo);
+ vci_SetMainRelVar(vmr_info, vcimrv_size_mr_old, 0, offsetToExtentInfo);
+
+ vci_SetMainRelVar(vmr_info, vcimrv_current_ros_version, 0, FrozenTransactionId);
+ vci_SetMainRelVar(vmr_info, vcimrv_last_ros_version, 0, FrozenTransactionId);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel, 0, 0);
+ vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel_old, 0, 0);
+
+ vci_SetMainRelVar(vmr_info, vcimrv_xid_generation, 0, 1); /* xid generation starts
+ * from 1 */
+ vci_SetMainRelVar(vmr_info, vcimrv_xid_gen_update_xid, 0, GetCurrentTransactionId());
+
+ vci_SetMainRelVar(vmr_info, vcimrv_ros_command, 0, vci_rc_invalid);
+ vci_SetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0, 0);
+
+ vci_SetMainRelVar(vmr_info, vcimrv_num_extents, 0, 0);
+ vci_SetMainRelVar(vmr_info, vcimrv_num_extents_old, 0, 0);
+
+ /* flush */
+ vci_WriteMainRelVar(vmr_info, vci_wmrv_all);
+
+ /* initialize meta data relations and data relations */
+ vci_InitializeColumnRelations(vmr_info, tupdesc, heapRel);
+
+ /* initialize meta data relations and data relations */
+ vci_InitializeTidCridUpdateLists(vmr_info);
+ vci_InitializeTidCridTree(vmr_info);
+
+ /* unlock */
+ vci_ReleaseMainRelHeader(vmr_info);
+ pfree(vmr_info);
+
+ /* convert data in the relations */
+ if (vcirc_truncate != vci_rebuild_command &&
+ indexRel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED)
+ reltuples = vci_ConvertWos2RosForBuild(indexRel,
+ VciGuc.maintenance_work_mem * (Size) 1024, indexInfo);
+
+ /*
+ * create statistics for return to caller
+ */
+ result = palloc0_object(IndexBuildResult);
+ result->heap_tuples = reltuples;
+ result->index_tuples = -1;
+
+ return result;
+}
+
+/*
+ * Put or Copy page into INIT_FORK.
+ * If valid page is given, that page will be put into INIT_FORK.
+ * If invalid page (NULL pointer) is given, MAIN_FORK page will be copied.
+ */
+static void
+vci_putInitPage(Oid oid, Page page, BlockNumber blkno)
+{
+ Relation rel;
+ Page pageCopyFrom;
+ Buffer buffer = InvalidBuffer;
+
+ rel = relation_open(oid, AccessExclusiveLock);
+
+ /*
+ * If there is no INIT_FORK, create it. VCI Main Relation may have, but
+ * others may not have.
+ */
+
+ if (!smgrexists(RelationGetSmgr(rel), INIT_FORKNUM))
+ smgrcreate(RelationGetSmgr(rel), INIT_FORKNUM, false);
+
+ pageCopyFrom = page;
+
+ if (pageCopyFrom == NULL)
+ {
+ buffer = ReadBuffer(rel, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ pageCopyFrom = BufferGetPage(buffer);
+ }
+
+ PageSetChecksumInplace(pageCopyFrom, blkno);
+ smgrwrite(RelationGetSmgr(rel), INIT_FORKNUM, blkno,
+ (char *) pageCopyFrom, true);
+
+ if (XLogIsNeeded())
+ log_newpage(&rel->rd_smgr->smgr_rlocator.locator, INIT_FORKNUM,
+ blkno, pageCopyFrom, false);
+
+ smgrimmedsync(RelationGetSmgr(rel), INIT_FORKNUM);
+
+ if (buffer != InvalidBuffer)
+ UnlockReleaseBuffer(buffer);
+ relation_close(rel, AccessExclusiveLock);
+}
+
+static void
+vci_inner_buildempty(Relation indexRel)
+{
+ Oid oid;
+ Page tmpPage;
+ TupleDesc itupDesc;
+
+ IndexInfo *indexInfo;
+
+ vci_MainRelHeaderInfo vmr_infoData;
+ vci_MainRelHeaderInfo *vmr_info = &vmr_infoData;
+
+ Relation heapRel;
+ bool isctid;
+
+ /* for checking type after getting 'real' TupleDesc. */
+ bool dummy_isctid;
+
+ CheckIndexColumnTypes(RelationGetDescr(indexRel), &isctid);
+
+ /* create VCI main relation */
+ vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_ros_conv_build);
+ vci_KeepMainRelHeader(vmr_info);
+
+ /*
+ * WOS relation : a blank page is put again, because the ambuild data
+ * might been inserted in WOS. (it may be OK, WOS can be assumed heap
+ * relation.)
+ */
+
+ tmpPage = (Page) palloc(BLCKSZ);
+ PageInit(tmpPage, BLCKSZ, 0);
+
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0);
+ vci_putInitPage(oid, tmpPage, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0);
+ vci_putInitPage(oid, tmpPage, 0);
+
+ pfree(tmpPage);
+
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_meta_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_null_meta_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_delete_meta_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_data_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_null_data_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_delete_data_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+
+ /* column store */
+ heapRel = table_open(indexRel->rd_index->indrelid, AccessShareLock);
+ itupDesc = get_tuple_desc_for_build(heapRel, indexRel, isctid);
+ table_close(heapRel, AccessShareLock);
+
+ CheckIndexColumnTypes(itupDesc, &dummy_isctid);
+
+ for (int attn = 0; attn < itupDesc->natts; attn++)
+ {
+ /* get ROS column pointer, */
+ vcis_m_column_t *columnPointer;
+
+ columnPointer = vci_GetMColumn(vmr_info, attn);
+
+ vci_putInitPage(columnPointer->meta_oid, NULL, 0);
+ vci_putInitPage(columnPointer->data_oid, NULL, 0);
+ }
+
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_meta_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_data_oid, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_0, 0);
+ vci_putInitPage(oid, NULL, 0);
+ oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_1, 0);
+ vci_putInitPage(oid, NULL, 0);
+ /* Copy default content into VCI Main rel INIT_FORK */
+ oid = indexRel->rd_id;
+ for (BlockNumber blkno = 0; blkno < lengthof(vmr_info->buffer); blkno++)
+ {
+ vci_putInitPage(oid, NULL, blkno);
+ }
+
+ vci_ReleaseMainRelHeader(vmr_info);
+
+ if (vcirc_truncate != vci_rebuild_command)
+ {
+ /* extract index key information from the index's pg_index info */
+ indexInfo = BuildIndexInfo(indexRel);
+ vci_ConvertWos2RosForBuild(indexRel,
+ VciGuc.maintenance_work_mem * (Size) 1024, indexInfo);
+ }
+}
+
+/* LCOV_EXCL_START */
+void
+vci_set_copy_transaction_and_command_id(TransactionId xid, CommandId cid)
+{
+ Assert(NULL == copyInfo.extentList);
+ Assert(0 == copyInfo.numAllocatedExtent);
+ copyInfo.xid = xid;
+ copyInfo.cid = cid;
+ copyInfo.numAppendedRows = 0;
+ copyInfo.extentList = NULL;
+ copyInfo.numFilledExtent = 0;
+ copyInfo.numAllocatedExtent = 0;
+}
+
+/* LCOV_EXCL_STOP */
+
+static bool
+vci_inner_insert(Relation indexRel, ItemPointer heap_tid)
+{
+ TransactionId xid = GetCurrentTransactionId();
+ TupleDesc tdesc;
+ HeapTuple htup;
+ int options = 0;
+
+ Oid data_wos_oid;
+ Relation data_wos_rel;
+
+ Datum new_values[2];
+ bool new_isnull[2];
+
+ vci_MainRelHeaderInfo *vmr_info;
+
+ /* get Data WOS relation from vci main rel */
+ vmr_info = palloc0_object(vci_MainRelHeaderInfo);
+ vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_insert);
+ vci_KeepMainRelHeader(vmr_info);
+ data_wos_oid = (Oid) vci_GetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0);
+
+ data_wos_rel = table_open(data_wos_oid, RowExclusiveLock);
+
+ /* get tuple desc */
+ tdesc = RelationGetDescr(data_wos_rel); /* */
+
+ /* create new tuple for insert */
+ new_values[0] = ItemPointerGetDatum(heap_tid);
+ new_values[1] = Int64GetDatum(vci_GenerateXid64(xid, vmr_info));
+ new_isnull[0] = false;
+ new_isnull[1] = false;
+ htup = heap_form_tuple(tdesc, new_values, new_isnull);
+
+ /* insert (+ WAL) */
+
+ if (copy_with_freeze_option)
+ options |= HEAP_INSERT_FROZEN;
+
+ heap_insert(data_wos_rel, htup, GetCurrentCommandId(true), options, NULL);
+
+ heap_freetuple(htup);
+ table_close(data_wos_rel, RowExclusiveLock);
+
+ /* unlock */
+ vci_ReleaseMainRelHeader(vmr_info);
+
+ return false;
+}
+
+/* LCOV_EXCL_START */
+static void
+WriteOneExtentForCopy(Relation indexRel)
+{
+ const LOCKMODE lockmode = ShareUpdateExclusiveLock;
+
+ LockRelation(indexRel, lockmode);
+ vci_InitMainRelHeaderInfo(&(copyConvContext.info),
+ indexRel, vci_rc_copy_command);
+ vci_KeepMainRelHeader(&(copyConvContext.info));
+ /* obtain target extent ID */
+ copyConvContext.extentId = vci_GetFreeExtentId(&(copyConvContext.info));
+ if (copyInfo.numAllocatedExtent <= copyInfo.numFilledExtent)
+ {
+ copyInfo.numAllocatedExtent += EXTENT_LIST_UNIT_EXTENSION;
+ copyInfo.extentList = repalloc_array(copyInfo.extentList, uint32, copyInfo.numAllocatedExtent);
+ }
+ copyInfo.extentList[++(copyInfo.numFilledExtent)] =
+ copyConvContext.extentId;
+
+ /* write one extent into ROS */
+ vci_AddTidCridUpdateList(&(copyConvContext.info),
+ &(copyConvContext.storage),
+ copyConvContext.extentId);
+ vci_WriteOneExtent(&(copyConvContext.info),
+ &(copyConvContext.storage),
+ copyConvContext.extentId,
+ InvalidTransactionId,
+ copyConvContext.xid,
+ copyConvContext.xid);
+ /* write header of the main relation */
+ vci_WriteMainRelVar(&(copyConvContext.info),
+ vci_wmrv_update);
+ UnlockRelation(indexRel, lockmode);
+ vci_ReleaseMainRelInCommandContext(©ConvContext);
+
+ vci_ResetRosChunkStorage(&(copyConvContext.storage));
+}
+
+static bool
+vci_inner_insert_in_copy(Relation indexRel, ItemPointer heap_tid)
+{
+ vci_MainRelHeaderInfo *vmr_info = &(copyConvContext.info);
+
+ if (0 == copyInfo.numAppendedRows)
+ {
+ uint32 val;
+
+ vci_InitRosCommandContext0(©ConvContext, indexRel,
+ vci_rc_copy_command);
+ vci_RecoverOneVCIIfNecessary(vmr_info);
+
+ vci_InitRosCommandContext1(©ConvContext,
+ VciGuc.maintenance_work_mem * INT64CONST(1024),
+ VCI_NUM_ROWS_IN_EXTENT, 0,
+ false);
+ vci_ResetRosChunkStorage(&(copyConvContext.storage));
+
+ vci_WriteExtentInfoInMainRosForCopyInit(vmr_info,
+ copyConvContext.extentId,
+ copyConvContext.xid);
+
+ /* increment number of copy commands */
+ val = vci_GetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0);
+ ++val;
+ vci_SetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0, val);
+
+ vci_SetMainRelVar(vmr_info, vcimrv_ros_command, 0, vci_rc_copy_command);
+
+ /* flush */
+ vci_WriteMainRelVar(vmr_info, vci_wmrv_update);
+
+ /* unlock */
+ vci_ReleaseMainRelInCommandContext(©ConvContext);
+
+ /* close heap relation */
+ vci_CloseHeapRelInCommandContext(©ConvContext);
+ }
+
+ {
+ Relation rel = table_open(copyConvContext.heapOid, AccessShareLock);
+ Buffer buffer = ReadBuffer(rel, ItemPointerGetBlockNumber(heap_tid));
+ Page page = BufferGetPage(buffer);
+ ItemId lp = PageGetItemId(page, ItemPointerGetOffsetNumber(heap_tid));
+ HeapTupleData tupleData;
+ HeapTuple tuple = &tupleData;
+
+ Assert(ItemIdIsNormal(lp));
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ tuple->t_len = ItemIdGetLength(lp);
+ tuple->t_tableOid = RelationGetRelid(rel);
+ tuple->t_self = *heap_tid;
+
+ vci_FillOneRowInRosChunkBuffer(&(copyConvContext.buffer),
+ &(copyConvContext.info),
+ &tuple->t_self,
+ tuple,
+ copyConvContext.indxColumnIdList,
+ copyConvContext.heapAttrNumList,
+ vci_GetTupleDescr(vmr_info));
+
+ if (copyConvContext.buffer.numRowsAtOnce <= copyConvContext.buffer.numFilled)
+ vci_RegisterChunkBuffer(&(copyConvContext.storage),
+ &(copyConvContext.buffer));
+
+ if (copyConvContext.numRowsToConvert <= copyConvContext.storage.numTotalRows)
+ {
+ Assert(copyConvContext.numRowsToConvert == copyConvContext.storage.numTotalRows);
+ WriteOneExtentForCopy(indexRel);
+ }
+
+ table_close(rel, AccessShareLock);
+ }
+
+ return false;
+}
+
+void
+vci_FinalizeCopyCommand(void)
+{
+ if (0 < copyConvContext.storage.numTotalRows)
+ {
+ Relation rel = table_open(copyConvContext.indexOid, RowExclusiveLock);
+
+ WriteOneExtentForCopy(rel);
+ table_close(rel, RowExclusiveLock);
+ }
+
+ vci_FinRosCommandContext(©ConvContext, false);
+}
+
+static IndexBulkDeleteResult *
+vci_inner_vacuumcleanup(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats)
+{
+ elog(DEBUG2, "%s is called.", __func__);
+
+ LockRelation(info->index, ShareUpdateExclusiveLock);
+
+ vci_VacuumRos(info->index, info);
+
+ UnlockRelation(info->index, ShareUpdateExclusiveLock);
+
+ return NULL;
+}
+
+/* LCOV_EXCL_STOP */
+
+/**
+ * vci_add_index_delete
+ */
+void
+vci_add_index_delete(Relation heapRel, const ItemPointerData *heap_tid, TransactionId xmin)
+{
+ List *indexoidlist;
+ ListCell *l;
+
+ /* Fast path if definitely no indexes */
+ if (!RelationGetForm(heapRel)->relhasindex)
+ return;
+
+ /*
+ * Get cached list of index OIDs
+ */
+ indexoidlist = RelationGetIndexList(heapRel);
+
+ /* Iterate for indexes */
+ foreach(l, indexoidlist)
+ {
+ Oid indexOid = lfirst_oid(l);
+ Relation indexRel;
+
+ Oid whiteoutWosOid;
+ Relation whiteoutWOSRel;
+ Datum new_values[2];
+ bool new_isnull[2];
+ HeapTuple htup;
+ TupleDesc tdesc;
+
+ vci_MainRelHeaderInfo vmr_info_data;
+ vci_MainRelHeaderInfo *vmr_info = &vmr_info_data;
+
+ TransactionId xid;
+
+ /* Skip if Index is NOT VCI index */
+ indexRel = index_open(indexOid, RowExclusiveLock);
+ if (!isVciIndexRelation(indexRel))
+ {
+ index_close(indexRel, RowExclusiveLock);
+ continue;
+ }
+
+ if (!fullPageWrites)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING),
+ errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel))));
+
+ vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_delete);
+ vci_KeepMainRelHeader(vmr_info);
+
+ /* Open Whiteout WOS */
+ whiteoutWosOid = (Oid) vci_GetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0);
+ whiteoutWOSRel = table_open(whiteoutWosOid, RowExclusiveLock);
+
+ tdesc = RelationGetDescr(whiteoutWOSRel);
+
+ /* @see generateXidDiff() in vci_ros_command.c */
+ if (!TransactionIdEquals(xmin, FrozenTransactionId))
+ xid = xmin;
+ else
+ xid = GetCurrentTransactionId();
+
+ /* create new tuple for insert */
+ new_values[0] = ItemPointerGetDatum(heap_tid);
+ new_values[1] = Int64GetDatum(vci_GenerateXid64(xid, vmr_info));
+ new_isnull[0] = false;
+ new_isnull[1] = false;
+
+ htup = heap_form_tuple(tdesc, new_values, new_isnull);
+
+ /* insert TID into Whiteout WOS */
+ simple_heap_insert(whiteoutWOSRel, htup);
+ heap_freetuple(htup);
+ table_close(whiteoutWOSRel, RowExclusiveLock);
+
+ /* flush & unlock */
+ vci_ReleaseMainRelHeader(vmr_info);
+
+ index_close(indexRel, RowExclusiveLock);
+ }
+
+ list_free(indexoidlist);
+}
+
+List *
+vci_add_should_index_insert(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ ItemPointer tupleid,
+ EState *estate)
+{
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (int i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+
+ if (indexRelation == NULL)
+ continue;
+
+ /* Skip if Index is NOT VCI index */
+ if (!isVciIndexRelation(indexRelation))
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ index_insert(indexRelation, /* index relation */
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ tupleid, /* tid of heap tuple */
+ heapRelation, /* heap relation */
+ UNIQUE_CHECK_NO, /* it is ignored in VCI */
+ false, /* 'logically unchanged index' hint */
+ indexInfo); /* index AM may need this */
+ }
+
+ return NIL;
+}
+
+static bool
+vci_add_drop_column(const ObjectAddress *object, int flags)
+{
+ Relation tableRel;
+
+ if (vci_rebuild_command != vcirc_alter_table)
+ return false;
+
+ Assert(object->objectSubId != 0);
+
+ /*
+ * If object->objectSubId < 0, it means that the column is a system
+ * column. Such case occurs only when OID column is modified, but this is
+ * checked in other places. So simply skip in this place.
+ */
+ if (object->objectSubId < 0)
+ return false;
+
+ tableRel = relation_open(object->objectId, AccessExclusiveLock);
+
+ if (tableRel->rd_rel->relkind != RELKIND_RELATION)
+ {
+ relation_close(tableRel, AccessExclusiveLock);
+ return false;
+ }
+
+ relation_close(tableRel, AccessExclusiveLock);
+
+ return false;
+}
+
+bool
+vci_add_drop_relation(const ObjectAddress *object, int flags)
+{
+ Relation rel;
+ Oid ruleId;
+ Oid oid = object->objectId;
+ char relKind = get_rel_relkind(oid);
+ bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY)
+ == PERFORM_DELETION_CONCURRENTLY);
+ bool concurrent_lock_mode = ((flags & PERFORM_DELETION_CONCURRENT_LOCK) != 0);
+ vci_id_t vciid;
+
+ if (object->objectSubId != 0)
+ return vci_add_drop_column(object, flags);
+
+ if (relKind == RELKIND_INDEX)
+ {
+ rel = relation_open(oid, AccessExclusiveLock);
+
+ if (!isVciIndexRelation(rel))
+ {
+ relation_close(rel, NoLock);
+ return false;
+ }
+ relation_close(rel, NoLock);
+
+ /*
+ * Deletion of VCI index by ALTER TABLE command is not supported
+ *
+ * Ereport only if the relation is vci main relation so that it does
+ * not give unnecessary messages.
+ *
+ * Return true when so that the post-processing does not continue.
+ */
+ if (vci_rebuild_command == vcirc_alter_table)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter table because the table is indexed by VCI"),
+ errhint("You must drop index \"%s\" before using this command.", RelationGetRelationName(rel))));
+ }
+
+ if (concurrent)
+ elog(PANIC, "should not reach here");
+
+ index_drop(oid, concurrent, concurrent_lock_mode);
+
+ vciid.oid = oid;
+ vciid.dbid = MyDatabaseId;
+ vci_freeMemoryEntry(&vciid);
+ }
+ else
+ {
+ rel = relation_open(oid, AccessExclusiveLock);
+
+ if (!vci_isVciAdditionalRelation(rel))
+ {
+ relation_close(rel, NoLock);
+ return false;
+ }
+
+ /*
+ * Deletion of VCI index by ALTER TABLE command is not supported
+ *
+ * Ereport only if the relation is vci main relation so that it does
+ * not give unneccesary messages.
+ *
+ * Return true when so that the post-processing does not continue.
+ */
+ if (vci_rebuild_command == vcirc_alter_table)
+ {
+ relation_close(rel, NoLock);
+ return true;
+ }
+
+ if (concurrent)
+ elog(PANIC, "should not reach here");
+
+ /* 2.1 Is relation used? */
+ CheckTableNotInUse(rel, "DROP TABLE");
+ CheckTableForSerializableConflictIn(rel);
+
+ ruleId = get_rewrite_oid(oid, rel->rd_rel->relname.data, true);
+
+ /* 2.2 Drop relation storage */
+ RelationDropStorage(rel);
+
+ relation_close(rel, NoLock);
+ remove_on_commit_action(oid);
+
+ /* 2.3 release relation cache */
+ RelationForgetRelation(oid);
+
+ /* 2.4 remove statistic info */
+ RemoveStatistics(oid, 0);
+
+ /* 2.5 remove pg_rewrite entry */
+ if (OidIsValid(ruleId))
+ RemoveRewriteRuleById(ruleId);
+
+ /* 2.6 remove pg_attributes entry */
+ DeleteAttributeTuples(oid);
+
+ /* 2.7 remove pg_system entry */
+ DeleteRelationTuple(oid);
+
+ }
+
+ return true;
+}
+
+bool
+vci_add_reindex_index(Relation indexRel)
+{
+ bool continue_after_return;
+
+ /* if it is not VCI relation */
+ if (!isVciIndexRelation(indexRel))
+ continue_after_return = true;
+
+ /* it is the VCI indexed relation */
+ else
+ {
+ switch (vci_rebuild_command)
+ {
+ case vcirc_reindex:
+ /* called by the command REINDEX except REINDEX INDEX */
+ continue_after_return = false;
+ break;
+
+ case vcirc_alter_table:
+
+ /*
+ * alter table for columns indexed by vci index, it is not
+ * work
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter table because the table is indexed by VCI"),
+ errhint("You must drop index \"%s\" before using this command.", RelationGetRelationName(indexRel))));
+ /* remaining work of reindex_index() must be cancelled */
+ continue_after_return = false;
+ break;
+
+ case vcirc_truncate:
+
+ /*
+ * This is reindex_index called in truncation Command. In this
+ * case, before RelationSetNewRelfilenumber(indexRel,...) we
+ * must drop other relations for VCI.
+ */
+ /* vci_add_drop_index(indexRel->rd_id); */
+ continue_after_return = true;
+ break;
+
+ case vcirc_cluster:
+ case vcirc_vacuum_full:
+ /* called by the command CLUSTER or VACUUM FULL */
+ continue_after_return = true;
+ break;
+
+ default:
+ elog(ERROR, "unexpected vci_RebuildCommand");
+ break;
+ }
+ }
+
+ return continue_after_return;
+}
+
+bool
+vci_add_skip_vci_index(Relation indexRel)
+{
+ return isVciIndexRelation(indexRel);
+}
+
+bool
+vci_add_alter_tablespace(Relation indexRel)
+{
+ if (isVciIndexRelation(indexRel))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ALTER INDEX SET TABLESPACE is not supported for VCI"),
+ errhint("DROP INDEX and CREATE INDEX instead")));
+ return true;
+ }
+ else
+ return false;
+}
+
+static uint32
+GetNumberOfBlocksFromOid(Oid oid)
+{
+ uint32 result;
+ Relation rel = relation_open(oid, AccessShareLock);
+
+ result = RelationGetNumberOfBlocks(rel);
+ relation_close(rel, AccessShareLock);
+
+ return result;
+}
+
+static int64
+GetNumDBPagesOfVCIElement(vcis_attribute_type_t attrType,
+ int index,
+ vci_MainRelHeaderInfo *info)
+{
+#ifdef USE_ASSERT_CHECKING
+ int numColumns = vci_GetMainRelVar(info, vcimrv_num_columns, 0);
+#endif /* #ifdef USE_ASSERT_CHECKING */
+ Oid dataOid = InvalidOid;
+ Oid metaOid = InvalidOid;
+ int64 result = 0;
+
+ Assert((0 <= attrType) && (attrType < num_vcis_attribute_type));
+ Assert((0 <= index) && (index < vci_GetNumIndexForAttributeType(attrType, numColumns)));
+ switch (attrType)
+ {
+ case vcis_attribute_type_main:
+ return RelationGetNumberOfBlocks(info->rel);
+ case vcis_attribute_type_data_wos:
+ dataOid = vci_GetMainRelVar(info, vcimrv_data_wos_oid, 0);
+ break;
+ case vcis_attribute_type_whiteout_wos:
+ dataOid = vci_GetMainRelVar(info, vcimrv_whiteout_wos_oid, 0);
+ break;
+ case vcis_attribute_type_tid_crid:
+ dataOid = vci_GetMainRelVar(info, vcimrv_tid_crid_data_oid, 0);
+ metaOid = vci_GetMainRelVar(info, vcimrv_tid_crid_meta_oid, 0);
+ break;
+ case vcis_attribute_type_tid_crid_update:
+ dataOid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, index);
+ break;
+ case vcis_attribute_type_delete_vec:
+ dataOid = vci_GetMainRelVar(info, vcimrv_delete_data_oid, 0);
+ metaOid = vci_GetMainRelVar(info, vcimrv_delete_meta_oid, 0);
+ break;
+ case vcis_attribute_type_null_vec:
+ dataOid = vci_GetMainRelVar(info, vcimrv_null_data_oid, 0);
+ metaOid = vci_GetMainRelVar(info, vcimrv_null_meta_oid, 0);
+ break;
+ case vcis_attribute_type_tid:
+ dataOid = vci_GetMainRelVar(info, vcimrv_tid_data_oid, 0);
+ metaOid = vci_GetMainRelVar(info, vcimrv_tid_meta_oid, 0);
+ break;
+ case vcis_attribute_type_pgsql:
+ {
+ vcis_m_column_t *mColumn;
+
+ mColumn = vci_GetMColumn(info, index);
+ dataOid = mColumn->data_oid;
+ metaOid = mColumn->meta_oid;
+ break;
+ }
+ default:
+ elog(ERROR, "internal error. invalid attribute type");
+ }
+
+ if (OidIsValid(dataOid))
+ result += GetNumberOfBlocksFromOid(dataOid);
+ if (OidIsValid(metaOid))
+ result += GetNumberOfBlocksFromOid(metaOid);
+
+ return result;
+}
+
+PG_FUNCTION_INFO_V1(vci_index_size);
+Datum
+vci_index_size(PG_FUNCTION_ARGS)
+{
+ Relation rel;
+ uint32 numColumns;
+ uint32 numEntries;
+ int64 result = 0;
+ vci_MainRelHeaderInfo infoData;
+ vci_MainRelHeaderInfo *info = &infoData;
+ LOCKMODE lockmode = AccessShareLock;
+
+ text *relname = PG_GETARG_TEXT_P(0);
+
+ if (PG_NARGS() != 1)
+ ereport(ERROR,
+ (errmsg("vci_index_size requires 1 argument")));
+
+ {
+ RangeVar *relrv;
+
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+ rel = relation_openrv(relrv, lockmode);
+ if (!isVciIndexRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("only VCI index is supported")));
+ }
+
+ vci_InitMainRelHeaderInfo(info, rel, vci_rc_probe);
+ vci_KeepMainRelHeader(info);
+ numColumns = vci_GetMainRelVar(info, vcimrv_num_columns, 0);
+ numEntries = vci_GetSumOfAttributeIndices(numColumns);
+
+ for (uint32 aId = 0; aId < numEntries; ++aId)
+ {
+ vcis_attribute_type_t attrType;
+ int index;
+
+ vci_GetAttrTypeAndIndexFromSumOfIndices(&attrType,
+ &index,
+ numColumns,
+ aId);
+ result += GetNumDBPagesOfVCIElement(attrType, index, info);
+ }
+
+ vci_ReleaseMainRelHeader(info);
+ relation_close(rel, lockmode);
+
+ PG_RETURN_INT64(result * BLCKSZ);
+}
+
+/*
+ * Process Utility Hook
+ */
+
+void
+vci_process_utility(PlannedStmt *pstmt,
+ const char *queryString,
+ bool readOnlyTree,
+ ProcessUtilityContext context,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv,
+ DestReceiver *dest,
+ QueryCompletion *qc)
+{
+ bool creating_vci_extension = false;
+ volatile bool saved_vci_is_in_vci_create_extension;
+
+ Node *parseTree = pstmt->utilityStmt;
+
+ vci_check_prohibited_operation(parseTree, &creating_vci_extension);
+
+ saved_vci_is_in_vci_create_extension = vci_is_in_vci_create_extension;
+
+ if (creating_vci_extension)
+ vci_is_in_vci_create_extension = true;
+
+ vci_rebuild_command = vcirc_invalid;
+ copy_with_freeze_option = false;
+
+#define UNUSE_COPY_INSERT
+
+ switch (nodeTag(parseTree))
+ {
+ /* check if the statement is a "COPY table FROM ..." statement */
+ case T_CopyStmt:
+ {
+ CopyStmt *stmt;
+ ListCell *lc;
+
+#ifndef UNUSE_COPY_INSERT
+ TransactionId xid = GetCurrentTransactionId();
+ CommandId cid = GetCurrentCommandId(false);
+
+ Assert(TransactionIdIsValid(xid));
+ Assert(InvalidCommandId != cid);
+ vci_set_copy_transaction_and_command_id(xid, cid);
+#endif /* #ifndef UNUSE_COPY_INSERT */
+
+ stmt = (CopyStmt *) parseTree;
+
+ foreach(lc, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "freeze") == 0)
+ {
+ if (defGetBoolean(defel))
+ {
+ copy_with_freeze_option = true;
+ break;
+ }
+ }
+ }
+ }
+ break;
+
+ /* check if the statement is a TRUNCATE for VCI Indexed table */
+ case T_TruncateStmt:
+ vci_rebuild_command = vcirc_truncate;
+ break;
+
+ /* check if the statement is a REINDEX for VCI Indexed table */
+ case T_ReindexStmt:
+ vci_rebuild_command = vcirc_reindex;
+ break;
+
+ /* check if the statement is a REINDEX for VCI Indexed table */
+ case T_AlterTableStmt:
+ vci_rebuild_command = vcirc_alter_table;
+ break;
+
+ /* check if the statement is a VACUUM for VCI Indexed table */
+ case T_VacuumStmt:
+ vci_rebuild_command = vcirc_vacuum_full;
+ break;
+
+ /* check if the statement is a CLUSTER for VCI Indexed table */
+ case T_ClusterStmt:
+ vci_rebuild_command = vcirc_cluster;
+ break;
+
+ default:
+ break;
+ }
+
+ if (creating_vci_extension)
+ {
+ PG_TRY();
+ {
+ if (process_utility_prev != NULL)
+ process_utility_prev(pstmt, queryString, readOnlyTree,
+ context, params, queryEnv,
+ dest, qc);
+ else
+ standard_ProcessUtility(pstmt, queryString, readOnlyTree,
+ context, params, queryEnv,
+ dest, qc);
+ }
+ PG_CATCH();
+ {
+ vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension;
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+ }
+ else
+ {
+ if (process_utility_prev != NULL)
+ process_utility_prev(pstmt, queryString, readOnlyTree,
+ context, params, queryEnv,
+ dest, qc);
+ else
+ standard_ProcessUtility(pstmt, queryString, readOnlyTree,
+ context, params, queryEnv,
+ dest, qc);
+ }
+
+ vci_rebuild_command = vcirc_invalid;
+
+ vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension;
+
+#ifndef UNUSE_COPY_INSERT
+ /* check if the statement is a "COPY table FROM ..." statement */
+ if (nodeTag(parseTree) == T_CopyStmt)
+ vci_FinalizeCopyCommand();
+#endif /* #ifndef UNUSE_COPY_INSERT */
+}
+
+/*
+ * VCI handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+PG_FUNCTION_INFO_V1(vci_handler);
+
+Datum
+vci_handler(PG_FUNCTION_ARGS)
+{
+ IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+ amroutine->amstrategies = 1;
+ amroutine->amsupport = 0;
+ amroutine->amoptsprocnum = 0;
+ amroutine->amcanorder = false;
+ amroutine->amcanorderbyop = false;
+ amroutine->amcanhash = false;
+ amroutine->amconsistentequality = false;
+ amroutine->amconsistentordering = false;
+ amroutine->amcanbackward = false;
+ amroutine->amcanunique = false;
+ amroutine->amcanmulticol = true;
+ amroutine->amoptionalkey = false;
+ amroutine->amsearcharray = false;
+ amroutine->amsearchnulls = false;
+ amroutine->amstorage = false;
+ amroutine->amclusterable = false;
+ amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
+ amroutine->amcanbuildparallel = false;
+ amroutine->amcaninclude = false;
+ amroutine->amusemaintenanceworkmem = false;
+ amroutine->amsummarizing = false;
+ amroutine->amparallelvacuumoptions = VACUUM_OPTION_NO_PARALLEL;
+ amroutine->amkeytype = InvalidOid;
+
+ amroutine->ambuild = vci_build;
+ amroutine->ambuildempty = vci_buildempty;
+ amroutine->aminsert = vci_insert;
+ amroutine->aminsertcleanup = NULL;
+ amroutine->ambulkdelete = vci_bulkdelete;
+ amroutine->amvacuumcleanup = vci_vacuumcleanup;
+ amroutine->amcanreturn = NULL;
+ amroutine->amcostestimate = vci_costestimate;
+ amroutine->amgettreeheight = vci_gettreeheight;
+ amroutine->amoptions = vci_options;
+ amroutine->amproperty = NULL;
+ amroutine->ambuildphasename = NULL;
+ amroutine->amvalidate = vci_validate;
+ amroutine->amadjustmembers = NULL;
+ amroutine->ambeginscan = vci_beginscan;
+ amroutine->amrescan = vci_rescan;
+ amroutine->amgettuple = NULL;
+ amroutine->amgetbitmap = NULL;
+ amroutine->amendscan = vci_endscan;
+ amroutine->ammarkpos = vci_markpos;
+ amroutine->amrestrpos = vci_restrpos;
+
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
+
+ amroutine->amtranslatestrategy = NULL;
+ amroutine->amtranslatecmptype = NULL;
+
+ PG_RETURN_POINTER(amroutine);
+}
diff --git a/contrib/vci/storage/vci_internal_view.c b/contrib/vci/storage/vci_internal_view.c
new file mode 100644
index 0000000..d5422d7
--- /dev/null
+++ b/contrib/vci/storage/vci_internal_view.c
@@ -0,0 +1,663 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_internal_view.c
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/vci/storage/vci_internal_view.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
+#include "access/skey.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h" /* for RangeVarGetRelid */
+#include "catalog/pg_am.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_namespace.h"
+#include "commands/tablecmds.h"
+#include "commands/defrem.h"
+#include "nodes/nodes.h"
+#include "nodes/parsenodes.h"
+#include "nodes/primnodes.h"
+#include "storage/lock.h"
+#include "utils/acl.h"
+#include "utils/fmgroids.h" /* for F_OIDEQ */
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/syscache.h"
+
+#include "vci.h"
+
+#include "vci_ros.h"
+
+bool vci_is_in_vci_create_extension;
+
+static List *make_dependent_view_list(Oid relOid);
+static void change_owner_or_schema_of_internal_view_list(List *internal_view_oid_list, Oid newOid, bool is_owner);
+static void check_prohibited_operation_for_extension(const char *extname);
+static void check_prohibited_operation_for_access_method(const char *amname);
+static void check_prohibited_operation_for_range_var(RangeVar *rel);
+static void check_prohibited_operation_for_object(ObjectType objtype, Node *object);
+static void check_prohibited_operation_for_relation(Relation rel);
+static bool is_vci_access_method(Oid accessMethodObjectId);
+
+void
+vci_alter_table_change_owner(Oid relOid, char relKind, Oid newOwnerId)
+{
+ List *view_oid_list = NIL;
+
+ if (relKind != RELKIND_INDEX)
+ return;
+
+ view_oid_list = make_dependent_view_list(relOid);
+
+ if (view_oid_list == NIL)
+ return;
+
+ change_owner_or_schema_of_internal_view_list(view_oid_list, newOwnerId, true);
+
+ list_free(view_oid_list);
+}
+
+void
+vci_alter_table_change_schema(Oid relOid, char relKind, Oid newNspOid)
+{
+ List *view_oid_list = NIL;
+
+ if (relKind != RELKIND_INDEX)
+ return;
+
+ view_oid_list = make_dependent_view_list(relOid);
+
+ if (view_oid_list == NIL)
+ return;
+
+ change_owner_or_schema_of_internal_view_list(view_oid_list, newNspOid, false);
+
+ list_free(view_oid_list);
+}
+
+static List *
+make_dependent_view_list(Oid relOid)
+{
+ Relation depRel;
+ ScanKeyData key[2];
+ SysScanDesc depScan;
+ HeapTuple depTup;
+ List *view_oid_list = NIL;
+
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relOid));
+
+ depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+ {
+ Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+
+ Assert(pg_depend->refclassid == RelationRelationId);
+ Assert(pg_depend->refobjid == relOid);
+
+ /* Ignore dependees that aren't user columns of relations */
+ /* (we assume system columns are never of rowtypes) */
+ if (pg_depend->classid != RelationRelationId ||
+ pg_depend->refobjsubid != 0)
+ continue;
+
+ view_oid_list = lappend_oid(view_oid_list, pg_depend->objid);
+ }
+
+ systable_endscan(depScan);
+
+ relation_close(depRel, AccessShareLock);
+
+ return view_oid_list;
+}
+
+static void
+change_owner_or_schema_of_internal_view_list(List *view_oid_list, Oid newOid, bool is_owner)
+{
+ ListCell *lc;
+
+ foreach(lc, view_oid_list)
+ {
+ Oid childRelOid = lfirst_oid(lc);
+ Relation class_rel;
+ HeapTuple tuple;
+ Form_pg_class tuple_class;
+
+ class_rel = table_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(childRelOid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", childRelOid);
+
+ tuple_class = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (vci_isVciAdditionalRelationTuple(childRelOid, tuple_class))
+ {
+ Datum repl_val[Natts_pg_class];
+ bool repl_null[Natts_pg_class];
+ bool repl_repl[Natts_pg_class];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+ HeapTuple newtuple;
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (is_owner)
+ {
+ repl_repl[Anum_pg_class_relowner - 1] = true;
+ repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOid);
+
+ aclDatum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relacl,
+ &isNull);
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ tuple_class->relowner, newOid);
+ repl_repl[Anum_pg_class_relacl - 1] = true;
+ repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl);
+ }
+ }
+ else
+ {
+ repl_repl[Anum_pg_class_relnamespace - 1] = true;
+ repl_val[Anum_pg_class_relnamespace - 1] = ObjectIdGetDatum(newOid);
+ }
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple);
+
+ heap_freetuple(newtuple);
+ }
+
+ ReleaseSysCache(tuple);
+ table_close(class_rel, RowExclusiveLock);
+ }
+}
+
+void
+vci_check_prohibited_operation(Node *parseTree, bool *creating_vci_extension)
+{
+ switch (nodeTag(parseTree))
+ {
+ case T_CreateExtensionStmt:
+ {
+ CreateExtensionStmt *stmt = (CreateExtensionStmt *) parseTree;
+
+ if (strcmp(stmt->extname, VCI_STRING) == 0)
+ {
+ ListCell *lc;
+
+ foreach(lc, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "schema") == 0
+ && get_namespace_oid(defGetString(defel), false) != PG_PUBLIC_NAMESPACE)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" cannot specify a schema name", VCI_STRING)));
+ }
+ }
+
+ *creating_vci_extension = true;
+ }
+ }
+ break;
+
+ case T_AlterExtensionStmt:
+ check_prohibited_operation_for_extension(((AlterExtensionStmt *) parseTree)->extname);
+ break;
+
+ case T_AlterExtensionContentsStmt:
+ check_prohibited_operation_for_extension(((AlterExtensionContentsStmt *) parseTree)->extname);
+ break;
+
+ case T_ViewStmt: /* CREATE (OR REPLACE) VIEW */
+ check_prohibited_operation_for_range_var(((ViewStmt *) parseTree)->view);
+ break;
+
+ case T_AlterTableStmt: /* ALTER VIEW */
+ check_prohibited_operation_for_range_var(((AlterTableStmt *) parseTree)->relation);
+ break;
+
+ case T_RuleStmt: /* CREATE RULE */
+ check_prohibited_operation_for_range_var(((RuleStmt *) parseTree)->relation);
+ break;
+
+ case T_CreateTrigStmt: /* CREATE TRIGGER */
+ check_prohibited_operation_for_range_var(((CreateTrigStmt *) parseTree)->relation);
+ break;
+
+ case T_GrantStmt:
+ {
+ GrantStmt *stmt = (GrantStmt *) parseTree;
+
+ if ((stmt->targtype == ACL_TARGET_OBJECT) && (stmt->objtype == OBJECT_TABLE))
+ {
+ ListCell *lc;
+
+ foreach(lc, stmt->objects)
+ check_prohibited_operation_for_range_var((RangeVar *) lfirst(lc));
+ }
+ }
+ break;
+
+ case T_GrantRoleStmt:
+ break;
+
+ case T_CreateOpClassStmt:
+ if (!vci_is_in_vci_create_extension)
+ check_prohibited_operation_for_access_method(((CreateOpClassStmt *) parseTree)->amname);
+ break;
+
+ case T_CreateOpFamilyStmt:
+ if (!vci_is_in_vci_create_extension)
+ check_prohibited_operation_for_access_method(((CreateOpFamilyStmt *) parseTree)->amname);
+ break;
+
+ case T_AlterOpFamilyStmt:
+ if (!vci_is_in_vci_create_extension)
+ check_prohibited_operation_for_access_method(((AlterOpFamilyStmt *) parseTree)->amname);
+ break;
+
+ case T_ReindexStmt:
+ {
+ ReindexStmt *stmt = (ReindexStmt *) parseTree;
+ Relation rel;
+
+ if (stmt->kind != REINDEX_OBJECT_INDEX)
+ break;
+
+ rel = relation_openrv_extended(stmt->relation, AccessShareLock, true);
+
+ if (rel == NULL)
+ break;
+
+ if (isVciIndexRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("REINDEX is not supported for VCI"),
+ errhint("DROP INDEX and CREATE INDEX instead")));
+
+ relation_close(rel, AccessShareLock);
+ }
+ break;
+
+ case T_ClusterStmt:
+ {
+ ClusterStmt *stmt = (ClusterStmt *) parseTree;
+ Relation rel;
+
+ /*
+ * Do nothing, if CLUSTER command issued without relation
+ * name. As this command will only cluster previously
+ * clustered tables, VCI indexed tables will not be clustered
+ * anyways
+ */
+ if (stmt->relation == NULL)
+ break;
+
+ rel = relation_openrv_extended(stmt->relation, AccessShareLock, true);
+
+ if (rel == NULL)
+ break;
+
+ if (RelationGetForm(rel)->relhasindex)
+ {
+ List *indexoidlist;
+ ListCell *lc;
+
+ indexoidlist = RelationGetIndexList(rel);
+
+ foreach(lc, indexoidlist)
+ {
+ Oid indexOid = lfirst_oid(lc);
+ Relation indexRel;
+
+ indexRel = index_open(indexOid, AccessShareLock);
+
+ if (isVciIndexRelation(indexRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster tables including %s index(es)", VCI_STRING),
+ errhint("Use DROP INDEX %s first", RelationGetRelationName(indexRel))));
+
+ index_close(indexRel, AccessShareLock);
+ }
+ }
+
+ relation_close(rel, AccessShareLock);
+ }
+ break;
+
+ case T_CommentStmt: /* COMMENT */
+ {
+ CommentStmt *stmt = (CommentStmt *) parseTree;
+
+ if (stmt->objtype == OBJECT_MATVIEW)
+ check_prohibited_operation_for_object(stmt->objtype, stmt->object);
+ }
+ break;
+
+ case T_SecLabelStmt: /* SECURITY LABEL */
+ {
+ SecLabelStmt *stmt = (SecLabelStmt *) parseTree;
+
+ if (stmt->objtype == OBJECT_MATVIEW)
+ check_prohibited_operation_for_object(stmt->objtype, stmt->object);
+ }
+ break;
+
+ case T_RenameStmt:
+ {
+ RenameStmt *stmt = (RenameStmt *) parseTree;
+
+ switch (stmt->renameType)
+ {
+ case OBJECT_MATVIEW:
+ check_prohibited_operation_for_range_var(stmt->relation);
+ break;
+
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ check_prohibited_operation_for_object(stmt->renameType, stmt->object);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case T_AlterObjectSchemaStmt:
+ {
+ AlterObjectSchemaStmt *stmt = (AlterObjectSchemaStmt *) parseTree;
+
+ switch (stmt->objectType)
+ {
+ case OBJECT_MATVIEW:
+ check_prohibited_operation_for_range_var(stmt->relation);
+ break;
+
+ case OBJECT_EXTENSION:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ check_prohibited_operation_for_object(stmt->objectType, stmt->object);
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case T_AlterOwnerStmt:
+ {
+ AlterOwnerStmt *stmt = (AlterOwnerStmt *) parseTree;
+
+ switch (stmt->objectType)
+ {
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ check_prohibited_operation_for_object(stmt->objectType, stmt->object);
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case T_IndexStmt:
+ {
+ IndexStmt *stmt = (IndexStmt *) parseTree;
+
+ if (strcmp(stmt->accessMethod, VCI_STRING) == 0)
+ {
+ if (stmt->concurrent)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support concurrent index build", VCI_STRING),
+ errhint("Use DROP INDEX to remove an vci index and try again without CONCURRENTLY option")));
+ }
+ }
+ }
+ break;
+
+ case T_DropStmt:
+ {
+ DropStmt *stmt = (DropStmt *) parseTree;
+
+ if (stmt->removeType == OBJECT_INDEX)
+ {
+ ListCell *lc;
+
+ if (stmt->concurrent)
+ {
+ foreach(lc, stmt->objects)
+ {
+ RangeVar *range_var = makeRangeVarFromNameList((List *) lfirst(lc));
+ Relation relation;
+
+ relation = relation_openrv_extended(range_var, AccessShareLock, true);
+
+ if (relation == NULL)
+ break;
+
+ if (isVciIndexRelation(relation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support concurrent index drop", VCI_STRING),
+ errhint("Try again without CONCURRENTLY option")));
+
+ relation_close(relation, AccessShareLock);
+ }
+ }
+ }
+ }
+ break;
+
+ /*
+ * REFRESH MATERIALIZED VIEW on a VCI internal materialized view
+ * is prohibited.
+ */
+ case T_RefreshMatViewStmt:
+ check_prohibited_operation_for_range_var(((RefreshMatViewStmt *) parseTree)->relation);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+check_prohibited_operation_for_extension(const char *extname)
+{
+ if (strcmp(extname, VCI_STRING) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" prohibits this operation", VCI_STRING)));
+}
+
+static void
+check_prohibited_operation_for_access_method(const char *amname)
+{
+ if (strcmp(amname, VCI_STRING) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" prohibits this operation on access method \"%s\"",
+ VCI_STRING, VCI_STRING)));
+}
+
+static void
+check_prohibited_operation_for_range_var(RangeVar *range_var)
+{
+ Relation rel;
+
+ rel = relation_openrv_extended(range_var, AccessShareLock, true);
+
+ if (rel == NULL)
+ return;
+
+ check_prohibited_operation_for_relation(rel);
+
+ relation_close(rel, AccessShareLock);
+}
+
+static void
+check_prohibited_operation_for_object(ObjectType objtype, Node *object)
+{
+ switch (objtype)
+ {
+ case OBJECT_EXTENSION:
+ check_prohibited_operation_for_extension(strVal(object));
+ break;
+
+ case OBJECT_MATVIEW:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ {
+ ObjectAddress address;
+ Relation relation = NULL;
+
+ address = get_object_address(objtype, object, &relation, AccessShareLock, true);
+
+ if (!OidIsValid(address.objectId))
+ goto done;
+
+ switch (objtype)
+ {
+ case OBJECT_MATVIEW:
+ check_prohibited_operation_for_relation(relation);
+ break;
+
+ case OBJECT_OPCLASS:
+ {
+ Relation opclass_rel;
+ HeapTuple opclass_tuple;
+ Form_pg_opclass opclass_form;
+
+ opclass_rel = table_open(OperatorClassRelationId, AccessShareLock);
+
+ opclass_tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(address.objectId));
+ if (!HeapTupleIsValid(opclass_tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for opclass %u", address.objectId);
+
+ opclass_form = (Form_pg_opclass) GETSTRUCT(opclass_tuple);
+
+ if (is_vci_access_method(opclass_form->opcmethod))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" prohibits this operation on operation class \"%s\"",
+ VCI_STRING, NameStr(opclass_form->opcname))));
+
+ ReleaseSysCache(opclass_tuple);
+ table_close(opclass_rel, AccessShareLock);
+ }
+ break;
+
+ case OBJECT_OPFAMILY:
+ {
+ Relation opfamily_rel;
+ HeapTuple opfamily_tuple;
+ Form_pg_opfamily opfamily_form;
+
+ opfamily_rel = table_open(OperatorFamilyRelationId, AccessShareLock);
+
+ opfamily_tuple = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(address.objectId));
+ if (!HeapTupleIsValid(opfamily_tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for opfamily %u", address.objectId);
+
+ opfamily_form = (Form_pg_opfamily) GETSTRUCT(opfamily_tuple);
+
+ if (is_vci_access_method(opfamily_form->opfmethod))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" prohibits this operation on operation family \"%s\"",
+ VCI_STRING, NameStr(opfamily_form->opfname))));
+
+ ReleaseSysCache(opfamily_tuple);
+ table_close(opfamily_rel, AccessShareLock);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ done:
+ if (relation != NULL)
+ relation_close(relation, AccessShareLock);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+check_prohibited_operation_for_relation(Relation rel)
+{
+ if (vci_isVciAdditionalRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" prohibits this operation on view \"%s\"",
+ VCI_STRING, NameStr(rel->rd_rel->relname))));
+}
+
+static bool
+is_vci_access_method(Oid accessMethodObjectId)
+{
+ HeapTuple amtuple;
+ bool result = false;
+ Form_pg_am amform;
+
+ amtuple = SearchSysCache1(AMOID,
+ ObjectIdGetDatum(accessMethodObjectId));
+
+ if (!HeapTupleIsValid(amtuple))
+ {
+ elog(WARNING,
+ "cache lookup failed for access method %u", accessMethodObjectId);
+
+ return false;
+ }
+
+ amform = (Form_pg_am) GETSTRUCT(amtuple);
+
+ if (strcmp(NameStr(amform->amname), VCI_STRING) == 0)
+ result = true;
+
+ ReleaseSysCache(amtuple);
+
+ return result;
+}
diff --git a/contrib/vci/storage/vci_tidcrid.c b/contrib/vci/storage/vci_tidcrid.c
new file mode 100644
index 0000000..8174b47
--- /dev/null
+++ b/contrib/vci/storage/vci_tidcrid.c
@@ -0,0 +1,1774 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_tidcrid.c
+ * TIDCRID update list and TIDCRID Tree relation handlings
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/vci/storage/vci_tidcrid.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <stdlib.h>
+
+#include "catalog/storage.h"
+#include "utils/tuplesort.h"
+
+#include "vci.h"
+#include "vci_freelist.h"
+#include "vci_ros.h"
+#include "vci_tidcrid.h"
+
+/*
+ * Add TID-CRID tree page to the free list if number of free items exceeds
+ * VCI_TID_CRID_FREESPACE_THRESHOLD
+ */
+#define VCI_TID_CRID_FREESPACE_THRESHOLD (4)
+
+/*
+ * Dummy column id for the main relation
+ */
+#define VCI_TID_CRID_COLID_DUMMY ((int16) 1)
+
+#define VCI_TID_CRID_RECOVERY_CURRENT_VAL (InvalidOffsetNumber)
+
+static void InitializeTidCridUpdateList(Oid relOid);
+
+static void WriteTidCridUpdateList(vci_MainRelHeaderInfo *info, int sel, bool (*callback) (vcis_tidcrid_pair_item_t *item, void *data), void *data);
+static void SampleTidCridUpdateList(Relation rel, uint64 count, vcis_tidcrid_pair_list_t *dest);
+
+static vcis_tidcrid_meta_t *vci_GetTidCridMeta(vci_TidCridRelations *relPair);
+static vcis_tidcrid_pagetag_t *vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk);
+static void GetTidCridMetaItemPosition(BlockNumber *blockNumber, uint32 *offset, BlockNumber blkNum);
+static vcis_tidcrid_meta_item_t *vci_GetTidCridMetaItem(vci_TidCridRelations *relPair, BlockNumber blkNum);
+static char *vci_GetTidCridTreeNode(vci_TidCridRelations *relPair, ItemPointer trunkPtr, int64 leafNo, ItemPointer retPtr);
+
+static void RemoveLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo);
+static void AddNewLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo);
+
+static uint64 SearchFromTidCridTree(vci_MainRelHeaderInfo *info, ItemPointer tId);
+
+static uint64 SearchCridFromTidCridUpdateListContext(vci_TidCridUpdateListContext *context, ItemPointer tId);
+static uint64 SearchCridInBlockRange(vci_TidCridUpdateListContext *context, ItemPointer tId, BlockNumber start, BlockNumber end);
+static uint64 SearchCridInBlock(vci_TidCridUpdateListContext *context, ItemPointer tId, vcis_tidcrid_pair_item_t *array, int first, int last);
+
+static OffsetNumber FindFreeItem(vci_TidCridRelations *relPair, BlockNumber freeBlk);
+
+static void SetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber bit);
+static void UnsetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber bit);
+
+static void WriteRecoveryRecordForTidCridTrunk(vci_TidCridRelations *relPair, BlockNumber origBlkno, BlockNumber trunkBlkno, OffsetNumber trunkOffset);
+static void WriteRecoveryRecordForTidCridLeaf(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo, BlockNumber leafBlkno, OffsetNumber leafOffset);
+static void WriteRecoveryRecordForTidCridCommon(vci_TidCridRelations *relPair, vcis_tid_crid_op_type_t operation, BlockNumber targetBlkno, uint32 targetInfo, BlockNumber freeBlkno, OffsetNumber freeOffset);
+
+/**
+ * function to cast from Page to (vcis_tidcrid_pair_list_t *).
+ */
+#define vci_GetTidCridPairListT(page) \
+ ((vcis_tidcrid_pair_list_t *) &((page)[VCI_MIN_PAGE_HEADER]))
+
+#define vci_GetTidCridPairItemT(page) \
+ ((vcis_tidcrid_pair_item_t *) &((page)[VCI_MIN_PAGE_HEADER]))
+
+#define ROUND_UP(value, size) ((((value) + (size) - 1) / (size)) * (size))
+
+/*
+ * Initialize TID-CRID update list and create on the storage
+ */
+static void
+InitializeTidCridUpdateList(Oid relOid)
+{
+ Relation rel = table_open(relOid, ShareLock);
+ Buffer buffer;
+ Page page;
+ vcis_tidcrid_pair_list_t *pairList;
+ BlockNumber blockNumber = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID;
+
+ Assert(offsetof(vcis_tidcrid_pair_list_t, body) == VCI_TID_CRID_UPDATE_PAGE_SPACE);
+
+ vci_PreparePagesWithOneItemIfNecessary(rel, blockNumber);
+ buffer = ReadBuffer(rel, blockNumber);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ pairList = vci_GetTidCridPairListT(page);
+ pairList->num = 0;
+
+ vci_WriteOneItemPage(rel, buffer);
+ UnlockReleaseBuffer(buffer);
+ table_close(rel, ShareLock);
+}
+
+/*
+ * Same as above, but the argument is the main relation info
+ */
+void
+vci_InitializeTidCridUpdateLists(vci_MainRelHeaderInfo *info)
+{
+ Oid oid;
+
+ oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, 0);
+ InitializeTidCridUpdateList(oid);
+ oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_1, 0);
+ InitializeTidCridUpdateList(oid);
+}
+
+/*
+ * Initialize TID-CRID tree relation and create on the storage
+ */
+void
+vci_InitializeTidCridTree(vci_MainRelHeaderInfo *info)
+{
+ LOCKMODE lockmode = ShareLock;
+
+ vci_TidCridRelations relPairData = {0};
+ vci_TidCridRelations *relPair = &relPairData;
+ vcis_tidcrid_meta_t *tidcridMeta;
+ vcis_tidcrid_pagetag_t *tidcridTag;
+
+ vci_OpenTidCridRelations(relPair, info, lockmode);
+
+ /* --- Meta --- */
+
+ vci_FormatPageWithOneItem(relPair->meta,
+ VCI_TID_CRID_DATA_FIRST_PAGE_ID);
+
+ tidcridMeta = vci_GetTidCridMeta(relPair);
+ LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE);
+
+ tidcridMeta->free_page_begin_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID;
+ tidcridMeta->free_page_begin_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID;
+ tidcridMeta->free_page_end_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID;
+ tidcridMeta->free_page_end_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID;
+ tidcridMeta->free_page_prev_id = InvalidBlockNumber;
+ tidcridMeta->free_page_next_id = InvalidBlockNumber;
+ tidcridMeta->num_free_pages = 1;
+ tidcridMeta->num_free_pages_old = 1;
+ tidcridMeta->num_free_page_blocks = 1;
+ tidcridMeta->num_free_page_blocks_old = 1;
+
+ tidcridMeta->num = 0;
+ tidcridMeta->num_old = 0;
+ tidcridMeta->free_block_number = 1;
+ tidcridMeta->offset = offsetof(vcis_tidcrid_meta_t, body);
+
+ /* need to set invalid to first item ? */
+
+ vci_WriteOneItemPage(relPair->meta, relPair->bufMeta);
+ UnlockReleaseBuffer(relPair->bufMeta);
+
+ /* --- Data --- */
+
+ vci_FormatPageWithItems(relPair->data,
+ VCI_TID_CRID_DATA_FIRST_PAGE_ID,
+ VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE);
+
+ tidcridTag = vci_GetTidCridTag(relPair, VCI_TID_CRID_DATA_FIRST_PAGE_ID);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+
+ tidcridTag->size = MaxBlockNumber;
+ tidcridTag->type = vcis_tidcrid_type_pagetag;
+ tidcridTag->prev_pos = InvalidBlockNumber;
+ tidcridTag->next_pos = InvalidBlockNumber;
+
+ tidcridTag->num = 0;
+
+ /* Meta data has already been added, so subtract from the free_size */
+ tidcridTag->free_size = VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - 1;
+ tidcridTag->bitmap = 0x1;
+
+ vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID);
+ UnlockReleaseBuffer(relPair->bufData);
+
+ vci_CloseTidCridRelations(relPair, lockmode);
+}
+
+/* **************************************
+ * TID CRID Update List Functions
+ * *************************************
+ */
+
+/*
+ * Open TID-CRID Update List
+ *
+ * Returns the alloced vci_TidCridUpdateListContext
+ */
+vci_TidCridUpdateListContext *
+vci_OpenTidCridUpdateList(vci_MainRelHeaderInfo *info, int sel)
+{
+ Oid oid;
+ Buffer buffer;
+ Page page;
+ BlockNumber blkno;
+ vcis_tidcrid_pair_list_t *src;
+ vci_TidCridUpdateListContext *context;
+
+ context = palloc0_object(vci_TidCridUpdateListContext);
+
+ Assert((0 <= sel) && (sel < 2));
+ oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel);
+
+ context->info = info;
+ context->rel = table_open(oid, AccessShareLock);
+
+ blkno = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID;
+
+ buffer = vci_ReadBufferWithPageInit(context->rel, blkno);
+
+ page = BufferGetPage(buffer);
+ src = vci_GetTidCridPairListT(page);
+
+ /* Copy header parts */
+ MemCpy(&context->header, src, offsetof(vcis_tidcrid_pair_list_t, body));
+
+ ReleaseBuffer(buffer);
+
+ context->count = src->num;
+
+ /* Calculate number of blocks in CRID-TID Update List */
+ context->nblocks =
+ VCI_TID_CRID_UPDATE_BODY_PAGE_ID + ROUND_UP(context->count, VCI_TID_CRID_UPDATE_PAGE_ITEMS) / VCI_TID_CRID_UPDATE_PAGE_ITEMS;
+
+ return context;
+}
+
+/*
+ * Close TID-CRID Update List
+ */
+void
+vci_CloseTidCridUpdateList(vci_TidCridUpdateListContext *context)
+{
+ table_close(context->rel, AccessShareLock);
+
+ pfree(context);
+}
+
+/*
+ * Read one TID-CRID pair from TID-CRID update list
+ */
+void
+vci_ReadOneBlockFromTidCridUpdateList(vci_TidCridUpdateListContext *context, BlockNumber blkno, vcis_tidcrid_pair_item_t *array)
+{
+ Buffer buffer;
+ Page page;
+
+ buffer = vci_ReadBufferWithPageInit(context->rel, blkno);
+ page = BufferGetPage(buffer);
+ MemCpy(array, &page[VCI_MIN_PAGE_HEADER], VCI_TID_CRID_UPDATE_PAGE_SPACE);
+ ReleaseBuffer(buffer);
+}
+
+/*
+ * Get the length of TID-CRID update list
+ */
+int32
+vci_GetTidCridUpdateListLength(vci_MainRelHeaderInfo *info, int sel)
+{
+ Oid oid;
+ Relation rel;
+ Buffer buffer;
+ Page page;
+ vcis_tidcrid_pair_list_t *src;
+ int32 length;
+ BlockNumber blockNumber;
+
+ Assert((0 <= sel) && (sel < 2));
+ oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel);
+ rel = table_open(oid, AccessShareLock);
+
+ blockNumber = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID;
+ buffer = vci_ReadBufferWithPageInit(rel, blockNumber);
+ page = BufferGetPage(buffer);
+
+ src = vci_GetTidCridPairListT(page);
+ length = src->num;
+ ReleaseBuffer(buffer);
+
+ table_close(rel, AccessShareLock);
+
+ return length;
+}
+
+/*
+ * Serialize TID-CRID update list
+ */
+static void
+WriteTidCridUpdateList(vci_MainRelHeaderInfo *info,
+ int sel,
+ bool (*callback) (vcis_tidcrid_pair_item_t *item, void *data),
+ void *data)
+{
+ Oid oid;
+ Relation rel;
+ BlockNumber blockNumber;
+ vcis_tidcrid_pair_item_t *array;
+ Page page;
+ Buffer buffer;
+ bool is_terminated = false;
+ vcis_tidcrid_pair_list_t tidcrid_pair_list = {0};
+ uint64 count = 0;
+
+ array = palloc_array(vcis_tidcrid_pair_item_t, VCI_TID_CRID_UPDATE_PAGE_ITEMS);
+
+ Assert((0 <= sel) && (sel < 2));
+ oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel);
+ rel = table_open(oid, AccessExclusiveLock);
+
+ RelationTruncate(rel, 0);
+
+ vci_PreparePagesWithOneItemIfNecessary(rel, VCI_TID_CRID_UPDATE_HEADER_PAGE_ID);
+
+ blockNumber = VCI_TID_CRID_UPDATE_BODY_PAGE_ID;
+
+ while (!is_terminated)
+ {
+ int count_in_page = 0;
+
+ for (int i = 0; i < VCI_TID_CRID_UPDATE_PAGE_ITEMS; i++)
+ {
+ if (!callback(&array[i], data))
+ {
+ is_terminated = true;
+ break;
+ }
+
+ count_in_page++;
+ }
+
+ if (count_in_page > 0)
+ {
+ vci_PreparePagesWithOneItemIfNecessary(rel, blockNumber);
+ buffer = ReadBuffer(rel, blockNumber);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ MemCpy(&page[VCI_MIN_PAGE_HEADER], array, VCI_TID_CRID_UPDATE_PAGE_SPACE);
+ vci_WriteOneItemPage(rel, buffer);
+ UnlockReleaseBuffer(buffer);
+
+ blockNumber++;
+ count += count_in_page;
+ }
+ }
+
+ /* Write the initial block */
+ tidcrid_pair_list.num = count;
+
+ if (count > 0)
+ SampleTidCridUpdateList(rel, count, &tidcrid_pair_list);
+
+ buffer = vci_ReadBufferWithPageInit(rel, VCI_TID_CRID_UPDATE_HEADER_PAGE_ID);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ MemCpy(&page[VCI_MIN_PAGE_HEADER], &tidcrid_pair_list, offsetof(vcis_tidcrid_pair_list_t, body));
+ vci_WriteOneItemPage(rel, buffer);
+ UnlockReleaseBuffer(buffer);
+
+ table_close(rel, AccessExclusiveLock);
+
+ vci_SetMainRelVar(info, vcimrv_tid_crid_diff_sel, 0, sel);
+
+ pfree(array);
+}
+
+static void
+SampleTidCridUpdateList(Relation rel, uint64 count, vcis_tidcrid_pair_list_t *dest)
+{
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ Buffer buffer;
+ Page page;
+
+ nblocks = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + ROUND_UP(count, VCI_TID_CRID_UPDATE_PAGE_ITEMS) / VCI_TID_CRID_UPDATE_PAGE_ITEMS;
+
+ dest->blocks_per_samp =
+ ROUND_UP(nblocks - 1 /* Except the header */ , VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES) / VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES;
+
+ blkno = VCI_TID_CRID_UPDATE_BODY_PAGE_ID;
+
+ while (blkno < nblocks)
+ {
+ buffer = vci_ReadBufferWithPageInit(rel, blkno);
+ page = BufferGetPage(buffer);
+
+ Assert(dest->num_samples < VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES);
+
+ dest->sample_tids[dest->num_samples++] = vci_GetTidCridPairItemT(page)[0].page_item_id;
+
+ ReleaseBuffer(buffer);
+
+ blkno += dest->blocks_per_samp;
+ }
+
+ /* Put final entry */
+ buffer = vci_ReadBufferWithPageInit(rel, nblocks - 1);
+ page = BufferGetPage(buffer);
+
+ dest->sample_tids[dest->num_samples++] = vci_GetTidCridPairItemT(page)[(count - 1) % VCI_TID_CRID_UPDATE_PAGE_ITEMS].page_item_id;
+
+ ReleaseBuffer(buffer);
+
+ /* Discard if the final entry is duplicated */
+ if (ItemPointerEquals(&dest->sample_tids[dest->num_samples - 1],
+ &dest->sample_tids[dest->num_samples - 2]))
+ dest->num_samples--;
+}
+
+/* **************************************
+ * TID CRID Tree Functions
+ * *************************************
+ */
+
+/*
+ * Open the meta and data relation for TID-CRID tree relation
+ *
+ * Caller must release via vci_CloseTidCridRelations()
+ */
+void
+vci_OpenTidCridRelations(vci_TidCridRelations *rel,
+ vci_MainRelHeaderInfo *info,
+ LOCKMODE lockmode)
+{
+ rel->meta = table_open(vci_GetMainRelVar(info, vcimrv_tid_crid_meta_oid, 0), lockmode);
+ rel->data = table_open(vci_GetMainRelVar(info, vcimrv_tid_crid_data_oid, 0), lockmode);
+
+ rel->info = info;
+}
+
+/*
+ * Close TID-CRID tree relation
+ */
+void
+vci_CloseTidCridRelations(vci_TidCridRelations *rel, LOCKMODE lockmode)
+{
+ if (rel)
+ {
+ if (RelationIsValid(rel->data))
+ table_close(rel->data, lockmode);
+ if (RelationIsValid(rel->meta))
+ table_close(rel->meta, lockmode);
+ }
+}
+
+#define vci_GetTidCridMetaT(page) \
+ ((vcis_tidcrid_meta_t *)& ((page)[VCI_MIN_PAGE_HEADER]))
+
+/*
+ * Read metadata from the relation
+ */
+static vcis_tidcrid_meta_t *
+vci_GetTidCridMeta(vci_TidCridRelations *relPair)
+{
+ Page page;
+
+ relPair->bufMeta = vci_ReadBufferWithPageInit(relPair->meta, VCI_COLUMN_META_HEADER_PAGE_ID);
+ page = BufferGetPage(relPair->bufMeta);
+
+ return vci_GetTidCridMetaT(page);
+}
+
+/*
+ * Read the metadata in the initial tuple of pages
+ */
+static vcis_tidcrid_pagetag_t *
+vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk)
+{
+ Page page;
+ HeapTupleHeader htup;
+
+ relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, blk);
+ page = BufferGetPage(relPair->bufData);
+
+ htup = (HeapTupleHeader) PageGetItem(page,
+ PageGetItemId(page, VCI_TID_CRID_PAGETAG_ITEM_ID));
+
+ return (vcis_tidcrid_pagetag_t *) ((char *) htup + htup->t_hoff);
+}
+
+/*
+ * Calculate offset (page number and the position in the page) to access the
+ * flexible array in meta relation
+ */
+static void
+GetTidCridMetaItemPosition(BlockNumber *blockNumber,
+ uint32 *offset,
+ BlockNumber blkNum)
+{
+ const int maxTidCridMetaItemInFirstPage =
+ (VCI_MAX_PAGE_SPACE - offsetof(vcis_tidcrid_meta_t, body)) / sizeof(vcis_tidcrid_meta_item_t);
+ const int maxTidCridMetaItem = VCI_MAX_PAGE_SPACE / sizeof(vcis_tidcrid_meta_item_t);
+
+ Assert(blockNumber);
+ Assert(offset);
+
+ if (blkNum < maxTidCridMetaItemInFirstPage)
+ {
+ *blockNumber = 0;
+ *offset = VCI_MIN_PAGE_HEADER + offsetof(vcis_tidcrid_meta_t, body) +
+ (blkNum * sizeof(vcis_tidcrid_meta_item_t));
+ }
+ else
+ {
+ int32 blkNumRem = blkNum - maxTidCridMetaItemInFirstPage;
+
+ *blockNumber = blkNumRem / maxTidCridMetaItem;
+ blkNumRem -= *blockNumber * maxTidCridMetaItem;
+ *blockNumber += 1;
+ *offset = VCI_MIN_PAGE_HEADER +
+ (blkNumRem * sizeof(vcis_tidcrid_meta_item_t));
+ }
+}
+
+/*
+ * read an entry from vcis_tidcrid_meta
+ */
+static vcis_tidcrid_meta_item_t *
+vci_GetTidCridMetaItem(vci_TidCridRelations *relPair, BlockNumber blkNum)
+{
+ BlockNumber blockNumber;
+ uint32 offset;
+ Page page;
+ BlockNumber currentBlocks = RelationGetNumberOfBlocks(relPair->meta);
+
+ GetTidCridMetaItemPosition(&blockNumber, &offset, blkNum);
+
+ if (blockNumber >= currentBlocks)
+ vci_FormatPageWithOneItem(relPair->meta, blockNumber);
+ else
+ vci_PreparePagesWithOneItemIfNecessary(relPair->meta, blockNumber);
+
+ relPair->bufMeta = ReadBuffer(relPair->meta, blockNumber);
+ page = BufferGetPage(relPair->bufMeta);
+
+ return (vcis_tidcrid_meta_item_t *) &(((char *) page)[offset]);
+}
+
+/*
+ * Returns the pointer to nodes (trunk or leaf)
+ */
+static char *
+vci_GetTidCridTreeNode(vci_TidCridRelations *relPair, ItemPointer trunkPtr, int64 leafNo,
+ ItemPointer retPtr)
+{
+ Page page;
+ HeapTupleHeader htup;
+ vcis_tidcrid_trunk_t *trunk;
+ ItemPointerData leafPtrData;
+ ItemPointer leafPtr = &leafPtrData;
+
+ Assert(ItemPointerIsValid(trunkPtr));
+
+ relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(trunkPtr));
+ page = BufferGetPage(relPair->bufData);
+ htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, ItemPointerGetOffsetNumber(trunkPtr)));
+ trunk = (vcis_tidcrid_trunk_t *) ((char *) htup + htup->t_hoff);
+
+ if (leafNo == VCI_TID_CRID_TRUNKNODE)
+ {
+ Assert(retPtr == NULL);
+
+ return (char *) trunk;
+ }
+
+ Assert(leafNo >= 0 && leafNo < VCI_TID_CRID_LEAF_CAPACITY);
+ leafPtrData = trunk->leaf_item[leafNo]; /* copy */
+
+ ReleaseBuffer(relPair->bufData);
+
+ if (ItemPointerIsValid(leafPtr))
+ {
+ vcis_tidcrid_pagetag_t *tag PG_USED_FOR_ASSERTS_ONLY;
+
+ tag = vci_GetTidCridTag(relPair, ItemPointerGetBlockNumber(leafPtr));
+
+ Assert(tag->bitmap & (1U << (ItemPointerGetOffsetNumber(leafPtr) - 1)));
+
+ ReleaseBuffer(relPair->bufData);
+
+ relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(leafPtr));
+ page = BufferGetPage(relPair->bufData);
+ htup = (HeapTupleHeader) PageGetItem(page,
+ PageGetItemId(page, ItemPointerGetOffsetNumber(leafPtr)));
+
+ if (retPtr)
+ *retPtr = leafPtrData;
+
+ return (char *) htup + htup->t_hoff;
+ }
+
+ return NULL;
+}
+
+/*
+ * Removes LeafNode
+ */
+static void
+RemoveLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo)
+{
+ vcis_tidcrid_leaf_t *leaf PG_USED_FOR_ASSERTS_ONLY;
+ vcis_tidcrid_trunk_t *trunk;
+
+ ItemPointerData leafPtrData;
+ ItemPointer leafPtr = &leafPtrData;
+
+ /* leaf */
+ leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr,
+ leafNo, leafPtr);
+ ReleaseBuffer(relPair->bufData);
+ Assert(leaf);
+
+ /* Write recovery record */
+ WriteRecoveryRecordForTidCridLeaf(relPair, trunkPtr, leafNo,
+ ItemPointerGetBlockNumber(leafPtr),
+ ItemPointerGetOffsetNumber(leafPtr));
+
+ UnsetFreeSpaceBitmap(relPair,
+ ItemPointerGetBlockNumber(leafPtr),
+ ItemPointerGetOffsetNumber(leafPtr));
+
+ /* Remove forom the trunk node */
+ trunk = (vcis_tidcrid_trunk_t *)
+ vci_GetTidCridTreeNode(relPair, trunkPtr, VCI_TID_CRID_TRUNKNODE, NULL);
+ Assert(trunk->type == vcis_tidcrid_type_trunk);
+ Assert((trunk->bitmap & (UINT64CONST(1) << leafNo)) != 0);
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ trunk->bitmap &= ~(UINT64CONST(1) << leafNo);
+ MemSet(&trunk->leaf_item[leafNo], 0, sizeof(ItemPointerData));
+
+ vci_WriteItem(relPair->data, relPair->bufData,
+ ItemPointerGetOffsetNumber(trunkPtr));
+ UnlockReleaseBuffer(relPair->bufData);
+}
+
+/*
+ * Add new leaf node
+ */
+static void
+AddNewLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo)
+{
+ Page page;
+ HeapTupleHeader htup;
+ BlockNumber freeBlk;
+ OffsetNumber newOffset;
+ vcis_tidcrid_leaf_t *leaf;
+ vcis_tidcrid_trunk_t *trunk;
+ vcis_tidcrid_pagetag_t *tag;
+
+ ItemPointerData leafPtrData;
+ ItemPointer leafPtr = &leafPtrData;
+
+ /* Firstly search from the same page as trunk */
+ tag = vci_GetTidCridTag(relPair, ItemPointerGetBlockNumber(trunkPtr));
+ Assert(tag->type == vcis_tidcrid_type_pagetag);
+ newOffset = vci_GetLowestBit(~tag->bitmap) + 1;
+ ReleaseBuffer(relPair->bufData);
+
+ if (newOffset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE)
+ {
+ /* Free space is found */
+ freeBlk = ItemPointerGetBlockNumber(trunkPtr);
+ }
+ else
+ {
+ freeBlk = vci_FindFreeSpaceForExtent((vci_ColumnRelations *) relPair, 1);
+ newOffset = FindFreeItem(relPair, freeBlk);
+ }
+
+ WriteRecoveryRecordForTidCridLeaf(relPair, trunkPtr, leafNo, freeBlk, VCI_TID_CRID_RECOVERY_CURRENT_VAL);
+
+ ItemPointerSet(leafPtr, freeBlk, newOffset);
+
+ /* Connect to the leaf from the trunk */
+ trunk = (vcis_tidcrid_trunk_t *)
+ vci_GetTidCridTreeNode(relPair, trunkPtr, VCI_TID_CRID_TRUNKNODE, NULL);
+ Assert(trunk->type == vcis_tidcrid_type_trunk);
+ Assert((trunk->bitmap & (UINT64CONST(1) << leafNo)) == 0);
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ trunk->bitmap |= (UINT64CONST(1) << leafNo);
+ trunk->leaf_item[leafNo] = *leafPtr;
+
+ vci_WriteItem(relPair->data, relPair->bufData,
+ ItemPointerGetOffsetNumber(trunkPtr));
+ UnlockReleaseBuffer(relPair->bufData);
+
+ /* Write a tag to the page */
+ SetFreeSpaceBitmap(relPair, freeBlk, newOffset);
+
+ relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(leafPtr));
+ page = BufferGetPage(relPair->bufData);
+ htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, ItemPointerGetOffsetNumber(leafPtr)));
+
+ leaf = (vcis_tidcrid_leaf_t *) ((char *) htup + htup->t_hoff);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+
+ leaf->type = vcis_tidcrid_type_leaf;
+ leaf->bitmap = UINT64CONST(0);
+
+ for (int i = 0; i < VCI_TID_CRID_LEAF_CAPACITY; i++)
+ {
+ leaf->crid[i] = vci_GetCridFromUint64(VCI_INVALID_CRID);
+ }
+
+ vci_WriteItem(relPair->data, relPair->bufData, newOffset);
+ UnlockReleaseBuffer(relPair->bufData);
+}
+
+/*
+ * Returns the item pointer to the subtree related with original TID
+ */
+void
+vci_GetTidCridSubTree(vci_TidCridRelations *relPair, BlockNumber blkOrig,
+ ItemPointer retPtr)
+{
+ vcis_tidcrid_meta_item_t *metaItem;
+ vcis_tidcrid_pagetag_t *tag PG_USED_FOR_ASSERTS_ONLY;
+
+ metaItem = vci_GetTidCridMetaItem(relPair, blkOrig);
+ ItemPointerSet(retPtr, metaItem->block_number, metaItem->item_id);
+
+ if (ItemPointerIsValid(retPtr))
+ {
+ tag = vci_GetTidCridTag(relPair, metaItem->block_number);
+
+ Assert((tag->bitmap & (UINT64CONST(1) << (metaItem->item_id - 1))) != 0);
+
+ ReleaseBuffer(relPair->bufData);
+ }
+
+ ReleaseBuffer(relPair->bufMeta);
+}
+
+/*
+ * Create a new trunk in the subtree
+ */
+void
+vci_CreateTidCridSubTree(vci_TidCridRelations *relPair, BlockNumber blkOrig,
+ ItemPointer retPtr)
+{
+ BlockNumber freeBlk;
+ OffsetNumber newOffset;
+
+ vcis_tidcrid_trunk_t *trunk;
+ vcis_tidcrid_meta_item_t *metaItem;
+
+ Assert(retPtr);
+
+ /* Find the free page from the list */
+ freeBlk = vci_FindFreeSpaceForExtent((vci_ColumnRelations *) relPair, 1);
+
+ /* Find the free item from the free page */
+ newOffset = FindFreeItem(relPair, freeBlk);
+
+ WriteRecoveryRecordForTidCridTrunk(relPair, blkOrig, freeBlk, VCI_TID_CRID_RECOVERY_CURRENT_VAL);
+
+ /* Set ItemPointer to the meta relation item */
+ metaItem = vci_GetTidCridMetaItem(relPair, blkOrig);
+ LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE);
+ metaItem->block_number = freeBlk;
+ metaItem->item_id = newOffset;
+
+ vci_WriteOneItemPage(relPair->meta, relPair->bufMeta);
+ UnlockReleaseBuffer(relPair->bufMeta);
+
+ /* Write a tag in the page */
+ SetFreeSpaceBitmap(relPair, freeBlk, newOffset);
+
+ ItemPointerSet(retPtr, freeBlk, newOffset);
+ trunk = (vcis_tidcrid_trunk_t *)
+ vci_GetTidCridTreeNode(relPair, retPtr, VCI_TID_CRID_TRUNKNODE, NULL);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ Assert(trunk);
+
+ trunk->type = vcis_tidcrid_type_trunk;
+ trunk->bitmap = UINT64CONST(0);
+
+ MemSet((trunk->leaf_item), 0, sizeof(trunk->leaf_item));
+
+ vci_WriteItem(relPair->data, relPair->bufData, newOffset);
+ UnlockReleaseBuffer(relPair->bufData);
+}
+
+void
+vci_UpdateTidCridSubTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr,
+ vcis_tidcrid_pair_list_t *newItems)
+{
+ for (int i = 0; i < newItems->num; i++)
+ {
+ vcis_tidcrid_leaf_t *leaf;
+ ItemPointerData leafPtrData;
+ ItemPointer leafPtr = &leafPtrData;
+ int prevBitCount = 0;
+ uint32 offset = ItemPointerGetOffsetNumber(&newItems->body[i].page_item_id) - 1;
+ int8 itemIdUpperBits;
+
+ /* Extract upper bits from item_id */
+ itemIdUpperBits = (offset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) &
+ ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1);
+
+ Assert(itemIdUpperBits < VCI_TID_CRID_LEAF_CAPACITY);
+
+ leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr,
+ itemIdUpperBits, leafPtr);
+ if (leaf == NULL)
+ {
+ AddNewLeafTidCridTree(relPair, trunkPtr, itemIdUpperBits);
+ leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr,
+ itemIdUpperBits, leafPtr);
+ }
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+
+ prevBitCount = vci_GetBitCount(leaf->bitmap);
+
+ for (; i < newItems->num; i++)
+ {
+ uint32 innerOffset = ItemPointerGetOffsetNumber(&newItems->body[i].page_item_id) - 1;
+ int8 innerItemIdUpperBits;
+ int8 itemIdLowerBits;
+
+ /* Extract upper bits from item_id */
+ innerItemIdUpperBits = (innerOffset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) &
+ ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1);
+
+ if (itemIdUpperBits != innerItemIdUpperBits)
+ {
+ i--;
+ break;
+ }
+
+ /* Extract lower bits from item_id */
+ itemIdLowerBits = innerOffset & ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1);
+
+ leaf->crid[itemIdLowerBits] = newItems->body[i].crid;
+
+ if (vci_GetUint64FromCrid(leaf->crid[itemIdLowerBits]) == VCI_INVALID_CRID)
+ leaf->bitmap &= ~(UINT64CONST(1) << itemIdLowerBits);
+ else
+ leaf->bitmap |= UINT64CONST(1) << itemIdLowerBits;
+ }
+
+ vci_WriteItem(relPair->data, relPair->bufData,
+ ItemPointerGetOffsetNumber(leafPtr));
+ UnlockReleaseBuffer(relPair->bufData);
+
+ if (prevBitCount != 0 && leaf->bitmap == 0)
+ RemoveLeafTidCridTree(relPair, trunkPtr, itemIdUpperBits);
+ }
+}
+
+/*
+ * Covert TID->CRID from TID-CRID tree
+ *
+ * Returns CRID corresponds to the given tid, otherwise VCI_INVALID_CRID
+ */
+static uint64
+SearchFromTidCridTree(vci_MainRelHeaderInfo *info, ItemPointer tId)
+{
+ const LOCKMODE lockmode = AccessShareLock;
+
+ uint64 retVal = VCI_INVALID_CRID;
+ ItemPointerData trunkNodeData;
+ ItemPointer trunkNode = &trunkNodeData;
+
+ vcis_tidcrid_leaf_t *leaf;
+
+ BlockNumber blk = ItemPointerGetBlockNumber(tId);
+ uint32 offset = ItemPointerGetOffsetNumber(tId) - 1;
+ int8 itemIdLowerBits;
+ int8 itemIdUpperBits;
+ vci_TidCridRelations relPairData;
+ vci_TidCridRelations *relPair = &relPairData;
+
+ /* Separate item id into uppper/lower parts */
+ itemIdLowerBits = offset & ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1);
+ itemIdUpperBits = (offset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) &
+ ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1);
+
+ vci_OpenTidCridRelations(relPair, info, lockmode);
+ vci_GetTidCridSubTree(relPair, blk, trunkNode);
+
+ if (ItemPointerIsValid(trunkNode))
+ {
+ leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkNode, itemIdUpperBits, NULL);
+ if (leaf)
+ {
+ retVal = vci_GetUint64FromCrid(leaf->crid[itemIdLowerBits]);
+ ReleaseBuffer(relPair->bufData);
+ }
+ }
+
+ vci_CloseTidCridRelations(relPair, lockmode);
+
+ return retVal;
+}
+
+/*
+ * Covert TID to CRID
+ *
+ * Firstly checks the TID-CRID update list, then search TID-CRID tree
+ *
+ * @param[in] context context for the TID-CRID update list
+ * @param[in] tId target tid
+ * @param[out] fromTree true if the CRID is found from the tree
+ *
+ * Returns found CID, otherwise VCI_INVALID_CRID
+ */
+uint64
+vci_GetCridFromTid(vci_TidCridUpdateListContext *context, ItemPointer tId, bool *fromTree)
+{
+ bool viaTree = false;
+ uint64 result = VCI_MOVED_CRID;
+
+ if (context->count > 0)
+ result = SearchCridFromTidCridUpdateListContext(context, tId);
+
+ if (result == VCI_MOVED_CRID)
+ {
+ result = SearchFromTidCridTree(context->info, tId);
+ viaTree = true;
+ }
+
+ if (fromTree)
+ *fromTree = viaTree;
+
+ return result;
+}
+
+/*
+ * Search tid from TID-CRID update list
+ */
+static uint64
+SearchCridFromTidCridUpdateListContext(vci_TidCridUpdateListContext *context, ItemPointer tId)
+{
+ int ret;
+ int min,
+ max,
+ pivot;
+ BlockNumber blk_start,
+ blk_end;
+
+ /* Compare with the first sample */
+ ret = ItemPointerCompare(tId, &context->header.sample_tids[0]);
+ if (ret < 0) /* tId < context->samp_tids[0] */
+ return VCI_MOVED_CRID;
+
+ /* Compare with the last sample */
+ ret = ItemPointerCompare(&context->header.sample_tids[context->header.num_samples - 1], tId);
+ if (ret < 0) /* context->samp_tids[context->num_samples -
+ * 1] < tId */
+ return VCI_MOVED_CRID;
+
+ min = 0;
+ max = context->header.num_samples - 1;
+
+ while (max - min > 1)
+ {
+ pivot = (min + max) / 2;
+
+ ret = ItemPointerCompare(tId, &context->header.sample_tids[pivot]);
+
+ if (ret < 0) /* tId < pivot */
+ max = pivot;
+ else if (0 < ret) /* pivot < tId */
+ min = pivot;
+ else
+ min = max = pivot;
+ }
+
+ blk_start = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + min * context->header.blocks_per_samp;
+ blk_end = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + max * context->header.blocks_per_samp + context->header.blocks_per_samp - 1;
+
+ if (context->nblocks <= blk_start)
+ blk_start = context->nblocks - 1;
+
+ if (context->nblocks <= blk_end)
+ blk_end = context->nblocks - 1;
+
+ return SearchCridInBlockRange(context, tId, blk_start, blk_end);
+}
+
+static uint64
+SearchCridInBlockRange(vci_TidCridUpdateListContext *context,
+ ItemPointer tId,
+ BlockNumber start, BlockNumber end /* inclusive */ )
+{
+ bool found = false;
+ uint64 ret = VCI_MOVED_CRID;
+
+ do
+ {
+ BlockNumber pivot;
+ int first,
+ last;
+ Buffer buffer;
+ Page page;
+ vcis_tidcrid_pair_item_t *array;
+ bool less_lower_bound;
+ bool more_upper_bound;
+
+ pivot = (start + end) / 2;
+
+ if (pivot < context->nblocks - 1)
+ {
+ first = 0;
+ last = VCI_TID_CRID_UPDATE_PAGE_ITEMS - 1;
+ }
+ else
+ {
+ first = 0;
+ last = (context->count - 1) % VCI_TID_CRID_UPDATE_PAGE_ITEMS;
+ }
+
+ buffer = vci_ReadBufferWithPageInit(context->rel, pivot);
+ page = BufferGetPage(buffer);
+
+ array = vci_GetTidCridPairItemT(page);
+
+ less_lower_bound = (ItemPointerCompare(tId, &array[first].page_item_id) < 0);
+ more_upper_bound = (ItemPointerCompare(&array[last].page_item_id, tId) < 0);
+
+ if ((start == end) && (less_lower_bound || more_upper_bound))
+ {
+ found = true;
+ ret = VCI_MOVED_CRID;
+ }
+ else if (less_lower_bound)
+ {
+ end = pivot;
+ }
+ else if (more_upper_bound)
+ {
+ start = pivot + 1;
+ }
+ else
+ {
+ found = true;
+ ret = SearchCridInBlock(context, tId, array, first, last);
+ }
+
+ ReleaseBuffer(buffer);
+ } while (!found);
+
+ return ret;
+}
+
+/*
+ * Search CRID from the one block in TID-CRID update list
+ */
+static uint64
+SearchCridInBlock(vci_TidCridUpdateListContext *context,
+ ItemPointer tId,
+ vcis_tidcrid_pair_item_t *array,
+ int first, int last /* inclusive */ )
+{
+ int pivot;
+
+ while (last - first > 1)
+ {
+ int ret;
+
+ pivot = (first + last) / 2;
+
+ ret = ItemPointerCompare(&array[pivot].page_item_id, tId);
+
+ if (ret < 0) /* array[pivot].page_item_id < tId */
+ first = pivot;
+ else if (ret > 0) /* array[pivot].page_item_id > tId */
+ last = pivot;
+ else
+ return vci_GetUint64FromCrid(array[pivot].crid);
+ }
+
+ if (ItemPointerEquals(&array[first].page_item_id, tId))
+ return vci_GetUint64FromCrid(array[first].crid);
+ else if (ItemPointerEquals(&array[last].page_item_id, tId))
+ return vci_GetUint64FromCrid(array[last].crid);
+ else
+ return VCI_MOVED_CRID;
+}
+
+/*
+ * Find free item from pages in data relation of TID-CRID free
+ *
+ * Returns offset to the free item
+ */
+static OffsetNumber
+FindFreeItem(vci_TidCridRelations *relPair, BlockNumber freeBlk)
+{
+ vcis_tidcrid_pagetag_t *tag;
+ OffsetNumber newOffset;
+
+ tag = vci_GetTidCridTag(relPair, freeBlk);
+ Assert(tag->type == vcis_tidcrid_type_pagetag);
+
+ /* Initialize if not done yet */
+ if ((tag->bitmap & 1) == 0)
+ {
+ tag->num = 0;
+ tag->free_size = VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - 1;
+ tag->bitmap = 0x1;
+ }
+
+ newOffset = vci_GetLowestBit(~tag->bitmap) + 1; /* LSB = 0 */
+
+ Assert((newOffset >= 1) && (newOffset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE));
+ ReleaseBuffer(relPair->bufData);
+
+ return newOffset;
+}
+
+/*
+ * Set a bit to the page tag
+ */
+static void
+SetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber offset)
+{
+ vcis_tidcrid_pagetag_t *tag = vci_GetTidCridTag(relPair, blk);
+ uint32 bit = offset - 1; /* one-origin -> zero-origin */
+ uint32 nextBitmap;
+
+ Assert((offset >= 1) && (offset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE));
+ Assert((tag->bitmap & (uint32) (1U << bit)) == 0);
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ nextBitmap = tag->bitmap | (uint32) (1U << bit);
+
+ /*
+ * Remove from the free space list if the number of free items is less
+ * than threshold
+ */
+ if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - vci_GetBitCount(nextBitmap) ==
+ VCI_TID_CRID_FREESPACE_THRESHOLD)
+ {
+ vcis_free_space_t *FS;
+
+ /* Release once to pass relPair to vci_RemoveFreeSpaceFromLinkLis */
+ UnlockReleaseBuffer(relPair->bufData);
+
+ FS = vci_GetFreeSpace((vci_RelationPair *) relPair, blk);
+ vci_WriteRecoveryRecordForFreeSpace(relPair,
+ VCI_TID_CRID_COLID_DUMMY,
+ VCI_INVALID_DICTIONARY_ID,
+ blk,
+ FS);
+ ReleaseBuffer(relPair->bufData);
+
+ vci_RemoveFreeSpaceFromLinkList((vci_ColumnRelations *) relPair, blk, 1);
+
+ /* Adjust size and positions */
+ tag = vci_GetTidCridTag(relPair, blk);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+
+ tag->size = 1;
+ tag->prev_pos = blk;
+ tag->next_pos = blk;
+ }
+
+ tag->bitmap = nextBitmap;
+ vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID);
+ UnlockReleaseBuffer(relPair->bufData);
+
+}
+
+/*
+ * Unset a bit to the page tag
+ */
+static void
+UnsetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber offset)
+{
+ vcis_tidcrid_pagetag_t *tag = vci_GetTidCridTag(relPair, blk);
+ int bit = offset - 1; /* one-origin -> zero-origin */
+
+ Assert((offset >= 1) && (offset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE));
+ Assert((tag->bitmap & (uint32) (1U << bit)) != 0);
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ tag->bitmap &= ~(uint32) (1U << bit);
+ vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID);
+ UnlockReleaseBuffer(relPair->bufData);
+
+ if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - (vci_GetBitCount(tag->bitmap) + 1) ==
+ VCI_TID_CRID_FREESPACE_THRESHOLD)
+ {
+ vcis_free_space_t newFS;
+ BlockNumber newFSBlockNumber;
+
+ vci_MakeFreeSpace((vci_ColumnRelations *) relPair, blk, &newFSBlockNumber, &newFS, false);
+ Assert(newFSBlockNumber == blk);
+
+ vci_WriteRecoveryRecordForFreeSpace(relPair,
+ VCI_TID_CRID_COLID_DUMMY,
+ VCI_INVALID_DICTIONARY_ID,
+ newFSBlockNumber,
+ &newFS);
+
+ vci_AppendFreeSpaceToLinkList((vci_ColumnRelations *) relPair,
+ newFSBlockNumber,
+ newFS.prev_pos,
+ newFS.next_pos,
+ newFS.size);
+ }
+}
+
+/*
+ * Write a recovery record while creating trunk node in the subtree
+ */
+static void
+WriteRecoveryRecordForTidCridTrunk(vci_TidCridRelations *relPair, BlockNumber origBlkno, BlockNumber trunkBlkno, OffsetNumber trunkOffset)
+{
+ WriteRecoveryRecordForTidCridCommon(relPair, vcis_tid_crid_op_trunk, origBlkno, 0, trunkBlkno, trunkOffset);
+}
+
+/*
+ * Write a recovery record while creating leaf node
+ */
+static void
+WriteRecoveryRecordForTidCridLeaf(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo, BlockNumber leafBlkno, OffsetNumber leafOffset)
+{
+ vcis_tid_crid_op_type_t operation;
+ OffsetNumber trunkOffset;
+ uint32 targetInfo;
+
+ if (leafOffset == VCI_TID_CRID_RECOVERY_CURRENT_VAL)
+ operation = vcis_tid_crid_op_leaf_add;
+ else
+ operation = vcis_tid_crid_op_leaf_remove;
+
+ trunkOffset = ItemPointerGetOffsetNumber(trunkPtr);
+ Assert((trunkOffset <= 0xFFFF) && (leafNo <= 0xFFFF));
+ targetInfo = (trunkOffset & 0xFFFF) | ((leafNo & 0xFFFF) << 16);
+
+ WriteRecoveryRecordForTidCridCommon(relPair, operation, ItemPointerGetBlockNumber(trunkPtr), targetInfo, leafBlkno, leafOffset);
+}
+
+/*
+ * Write a recovery record while updating TID-CRID tree
+ */
+static void
+WriteRecoveryRecordForTidCridCommon(vci_TidCridRelations *relPair, vcis_tid_crid_op_type_t operation, BlockNumber targetBlkno, uint32 targetInfo, BlockNumber freeBlkno, OffsetNumber freeOffset)
+{
+ vcis_tidcrid_pagetag_t *tag;
+ uint32 tag_bitmap;
+
+ /*
+ * 1. Obtains the bitmap to write the meta relation
+ */
+ tag = vci_GetTidCridTag(relPair, freeBlkno);
+ Assert(tag->type == vcis_tidcrid_type_pagetag);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+
+ if (freeOffset == VCI_TID_CRID_RECOVERY_CURRENT_VAL)
+ tag_bitmap = tag->bitmap;
+ else
+ tag_bitmap = tag->bitmap & ~(UINT64CONST(1) << (freeOffset - 1));
+
+ UnlockReleaseBuffer(relPair->bufData);
+
+ /* 2. Write information to the meta relation */
+ Assert(relPair->info);
+
+ vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_operation, 0, operation);
+ vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_target_blocknumber, 0, targetBlkno);
+ vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_target_info, 0, targetInfo);
+ vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_free_blocknumber, 0, freeBlkno);
+ vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_tag_bitmap, 0, tag_bitmap);
+ vci_SetMainRelVar(relPair->info, vcimrv_working_column_id, 0, VCI_INVALID_COLUMN_ID);
+ vci_WriteMainRelVar(relPair->info, vci_wmrv_update);
+}
+
+/*
+ * Initialize recovery record for the TID-CRID
+ */
+void
+vci_InitRecoveryRecordForTidCrid(vci_MainRelHeaderInfo *info)
+{
+ vci_SetMainRelVar(info, vcimrv_tid_crid_operation, 0, vcis_tid_crid_op_none);
+
+ vci_SetMainRelVar(info, vcimrv_working_column_id, 0, VCI_INVALID_COLUMN_ID);
+}
+
+/*
+ * Recovery the lastly modifying bitmap
+ *
+ * @param[in] info main relation
+ */
+void
+vci_RecoveryTidCrid(vci_MainRelHeaderInfo *info)
+{
+ LOCKMODE lockmode = RowExclusiveLock;
+
+ vci_TidCridRelations relPairData = {0};
+ vci_TidCridRelations *relPair = &relPairData;
+
+ vcis_tid_crid_op_type_t operation;
+ BlockNumber targetBlkno;
+ uint32 targetInfo;
+ BlockNumber freeBlkno;
+ uint32 tag_bitmap;
+
+ operation = vci_GetMainRelVar(info, vcimrv_tid_crid_operation, 0);
+ targetBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_target_blocknumber, 0);
+ targetInfo = vci_GetMainRelVar(info, vcimrv_tid_crid_target_info, 0);
+ freeBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_free_blocknumber, 0);
+ tag_bitmap = vci_GetMainRelVar(info, vcimrv_tid_crid_tag_bitmap, 0);
+
+ if (operation == vcis_tid_crid_op_none)
+ return;
+
+ Assert(BlockNumberIsValid(freeBlkno));
+ vci_OpenTidCridRelations(relPair, info, lockmode);
+ {
+ vcis_tidcrid_pagetag_t *tag;
+
+ tag = vci_GetTidCridTag(relPair, freeBlkno);
+ Assert(tag->type == vcis_tidcrid_type_pagetag);
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ tag->bitmap = tag_bitmap;
+ vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID);
+ UnlockReleaseBuffer(relPair->bufData);
+ }
+ vci_CloseTidCridRelations(relPair, lockmode);
+
+ switch (operation)
+ {
+ case vcis_tid_crid_op_trunk:
+ {
+ vcis_tidcrid_meta_item_t *metaItem;
+
+ metaItem = vci_GetTidCridMetaItem(relPair, targetBlkno);
+ LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE);
+ metaItem->block_number = InvalidBlockNumber;
+ metaItem->item_id = InvalidOffsetNumber;
+
+ vci_WriteOneItemPage(relPair->meta, relPair->bufMeta);
+ UnlockReleaseBuffer(relPair->bufMeta);
+ }
+ break;
+
+ case vcis_tid_crid_op_leaf_add:
+ case vcis_tid_crid_op_leaf_remove:
+ {
+ vcis_tidcrid_trunk_t *trunk;
+ ItemPointerData trunkItem;
+ uint32 leafNo;
+
+ /*
+ * In vcis_tid_crid_op_leaf, targetBlkno represents a block
+ * number for the trunck, and lower 16 bit of targetInfo is
+ * the offset to the trunk.
+ */
+ ItemPointerSet(&trunkItem, targetBlkno, (targetInfo & 0xFFFF));
+
+ /*
+ * Upper 16 bit of targetInfo represents the leafNo in the
+ * trunk.
+ */
+ leafNo = targetInfo >> 16;
+
+ trunk = (vcis_tidcrid_trunk_t *)
+ vci_GetTidCridTreeNode(relPair, &trunkItem, VCI_TID_CRID_TRUNKNODE, NULL);
+
+ LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE);
+ trunk->bitmap &= ~(UINT64CONST(1) << leafNo);
+ MemSet(&trunk->leaf_item[leafNo], 0, sizeof(ItemPointerData));
+
+ vci_WriteItem(relPair->data, relPair->bufData,
+ ItemPointerGetOffsetNumber(&trunkItem));
+ UnlockReleaseBuffer(relPair->bufData);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Recovery the free list for TID-CRID tree relation
+ */
+void
+vci_RecoveryFreeSpaceForTidCrid(vci_MainRelHeaderInfo *info)
+{
+ LOCKMODE lockmode = RowExclusiveLock;
+
+ int16 colId;
+ vci_ColumnRelations relPairData = {0};
+ vci_ColumnRelations *relPair = &relPairData;
+ vcis_column_meta_t *columnMeta;
+
+ BlockNumber startBlockNumber;
+ BlockNumber prevFreeBlockNumber;
+ BlockNumber nextFreeBlockNumber;
+ uint32 oldSize;
+
+ vci_OpenTidCridRelations(relPair, info, lockmode);
+
+ /* get last working column */
+ colId = vci_GetMainRelVar(info, vcimrv_working_column_id, 0);
+
+ if (colId != VCI_INVALID_COLUMN_ID)
+ {
+ /* get column rel set */
+ columnMeta = vci_GetColumnMeta(&relPair->bufMeta, relPair->meta);
+ LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE);
+
+ /* restore from old fieleds */
+ columnMeta->num_extents = columnMeta->num_extents_old;
+ columnMeta->num_free_pages = columnMeta->num_free_pages_old;
+ columnMeta->num_free_page_blocks = columnMeta->num_free_page_blocks_old;
+
+ /* read freelink list recovery information */
+ startBlockNumber = columnMeta->new_data_head;
+ prevFreeBlockNumber = columnMeta->free_page_prev_id;
+ nextFreeBlockNumber = columnMeta->free_page_next_id;
+ oldSize = columnMeta->free_page_old_size;
+
+ vci_WriteColumnMetaDataHeader(relPair->meta, relPair->bufMeta);
+ UnlockReleaseBuffer(relPair->bufMeta);
+
+ /* Recovery the free link list */
+
+ vci_AppendFreeSpaceToLinkList(relPair, startBlockNumber, prevFreeBlockNumber,
+ nextFreeBlockNumber, oldSize);
+ }
+ else
+ {
+ /*
+ * Connect to the free list if the previous crash was done before leaf
+ * was removed from the trunk.
+ */
+ vcis_tid_crid_op_type_t operation;
+ BlockNumber freeBlkno;
+ uint32 tag_bitmap;
+ vcis_free_space_t newFS;
+ BlockNumber newFSBlockNumber;
+
+ operation = vci_GetMainRelVar(info, vcimrv_tid_crid_operation, 0);
+ freeBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_free_blocknumber, 0);
+ tag_bitmap = vci_GetMainRelVar(info, vcimrv_tid_crid_tag_bitmap, 0);
+
+ switch (operation)
+ {
+ case vcis_tid_crid_op_none:
+ case vcis_tid_crid_op_trunk:
+ case vcis_tid_crid_op_leaf_add:
+ break;
+
+ case vcis_tid_crid_op_leaf_remove:
+ if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - (vci_GetBitCount(tag_bitmap) + 1) ==
+ VCI_TID_CRID_FREESPACE_THRESHOLD)
+ {
+ vci_MakeFreeSpace((vci_ColumnRelations *) relPair, freeBlkno, &newFSBlockNumber, &newFS, false);
+ Assert(newFSBlockNumber == freeBlkno);
+
+ vci_WriteRecoveryRecordForFreeSpace(relPair,
+ VCI_TID_CRID_COLID_DUMMY,
+ VCI_INVALID_DICTIONARY_ID,
+ newFSBlockNumber,
+ &newFS);
+
+ vci_AppendFreeSpaceToLinkList((vci_ColumnRelations *) relPair,
+ newFSBlockNumber,
+ newFS.prev_pos,
+ newFS.next_pos,
+ newFS.size);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ vci_CloseTidCridRelations(relPair, lockmode);
+}
+
+static int
+CmpTidCridPairbyTID(const void *pa, const void *pb)
+{
+ vcis_tidcrid_pair_item_t *a = (vcis_tidcrid_pair_item_t *) pa;
+ vcis_tidcrid_pair_item_t *b = (vcis_tidcrid_pair_item_t *) pb;
+
+ uint64 a_tid = vci_GetTid64FromItemPointer(&a->page_item_id);
+ uint64 b_tid = vci_GetTid64FromItemPointer(&b->page_item_id);
+
+ return (a_tid < b_tid) ? -1 : ((b_tid < a_tid) ? 1 : 0);
+}
+
+static vcis_tidcrid_pair_item_t *
+CreateTidCridUpdateListFromRosChunkStorage(RosChunkStorage *src,
+ int32 extentId)
+{
+ vcis_tidcrid_pair_item_t *dst;
+ int ptr = 0;
+ uint64 crid = vci_CalcCrid64(extentId, 0);
+ vcis_tidcrid_pair_item_t temp;
+
+ Assert(VCI_FIRST_NORMAL_EXTENT_ID <= extentId);
+ dst = palloc_array(vcis_tidcrid_pair_item_t, src->numTotalRows);
+ for (int chunkId = 0; chunkId < src->numFilled; ++chunkId)
+ {
+ RosChunkBuffer *chunk = src->chunk[chunkId];
+
+ for (uint32 lId = 0; lId < chunk->numFilled; ++lId)
+ {
+ temp.page_item_id = *(ItemPointerData *)
+ &(chunk->tidData[lId * sizeof(ItemPointerData)]);
+ temp.crid = vci_GetCridFromUint64(crid);
+ dst[ptr++] = temp;
+ ++crid;
+ }
+ }
+
+ qsort(dst, ptr, sizeof(vcis_tidcrid_pair_item_t), CmpTidCridPairbyTID);
+
+ return dst;
+}
+
+/*
+ * Callback structure passed to MergeTidCridUpdateListCallback
+ */
+typedef struct
+{
+ /*
+ * oldList: base list for the merge
+ */
+
+ /*
+ * Context for TID-CRID Update List
+ */
+ vci_TidCridUpdateListContext *oldListContext;
+
+ /*
+ * Current position in old list
+ */
+ uint64 oldListContextIndex;
+
+ /*
+ * Record one block from the oldListContext
+ */
+ vcis_tidcrid_pair_item_t oldListInBlock[VCI_TID_CRID_UPDATE_PAGE_ITEMS];
+
+ /*
+ * Position of reading block
+ */
+ BlockNumber prevOldListContextBlkno;
+
+ /*
+ * addList1: add different entries to oldList (exclusively used with
+ * addList2)
+ */
+
+ /*
+ * Pair TID-CRID list
+ */
+ vcis_tidcrid_pair_item_t *addList1;
+
+ /*
+ * Maximum entries in addList1
+ */
+ int32 numAddList1;
+
+ /*
+ * Current position in addList1
+ */
+ int32 addList1Index;
+
+ /*
+ * addList2: add different entries to oldList (exclusively used with
+ * addList1)
+ */
+ Tuplesortstate *addList2;
+ ItemPointerData addList2CurrentTid;
+ vcis_Crid addList2Crid;
+ bool addList2Terminated;
+
+} vci_MergeTidCridUpdateListContext;
+
+/*
+ * Callback function passed to WriteTidCridUpdateList()
+ *
+ * Merge oldList and {addList1, addList2} and outputs with TID ordering.
+ */
+static bool
+MergeTidCridUpdateListCallback(vcis_tidcrid_pair_item_t *item, void *data)
+{
+ vci_MergeTidCridUpdateListContext *mergeContext = (vci_MergeTidCridUpdateListContext *) data;
+ bool old_entry_valid;
+ bool add_entry_valid;
+ vcis_tidcrid_pair_item_t old_item,
+ add_item;
+
+retry:
+ old_entry_valid = false;
+ add_entry_valid = false;
+
+ if (mergeContext->addList1)
+ {
+ /* addList1 */
+ if (mergeContext->addList1Index < mergeContext->numAddList1)
+ {
+ add_item = mergeContext->addList1[mergeContext->addList1Index];
+ add_entry_valid = true;
+ }
+ }
+ else
+ {
+ /* addList2 */
+ if (!mergeContext->addList2Terminated)
+ {
+ if (!ItemPointerIsValid(&mergeContext->addList2CurrentTid))
+ {
+ Datum value;
+ bool isnull;
+
+ if (tuplesort_getdatum(mergeContext->addList2, true, true, &value, &isnull, NULL))
+ {
+ mergeContext->addList2CurrentTid = *DatumGetItemPointer(value);
+ }
+ else
+ {
+ mergeContext->addList2Terminated = true;
+ goto get_old_list;
+ }
+ }
+
+ add_item.page_item_id = mergeContext->addList2CurrentTid;
+ add_item.crid = mergeContext->addList2Crid;
+
+ add_entry_valid = true;
+ }
+ }
+
+get_old_list:
+ if (mergeContext->oldListContextIndex < mergeContext->oldListContext->count)
+ {
+ BlockNumber blkno;
+
+ blkno = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + mergeContext->oldListContextIndex / VCI_TID_CRID_UPDATE_PAGE_ITEMS;
+
+ if (blkno != mergeContext->prevOldListContextBlkno)
+ {
+ vci_ReadOneBlockFromTidCridUpdateList(mergeContext->oldListContext, blkno, mergeContext->oldListInBlock);
+ mergeContext->prevOldListContextBlkno = blkno;
+ }
+
+ old_item = mergeContext->oldListInBlock[mergeContext->oldListContextIndex % VCI_TID_CRID_UPDATE_PAGE_ITEMS];
+
+ old_entry_valid = true;
+ }
+
+ if (old_entry_valid && add_entry_valid)
+ {
+ int32 res = ItemPointerCompare(&old_item.page_item_id, &add_item.page_item_id);
+
+ if (res == 0)
+ {
+ /*
+ * Retain latter one if same TID item has come
+ */
+ mergeContext->oldListContextIndex++;
+ mergeContext->addList1Index++;
+ ItemPointerSetInvalid(&mergeContext->addList2CurrentTid);
+
+ if (vci_GetUint64FromCrid(add_item.crid) == VCI_MOVED_CRID)
+ goto retry;
+
+ *item = add_item;
+ }
+ else if (res < 0)
+ {
+ mergeContext->oldListContextIndex++;
+
+ *item = old_item;
+ }
+ else
+ {
+ mergeContext->addList1Index++;
+ ItemPointerSetInvalid(&mergeContext->addList2CurrentTid);
+
+ Assert(vci_GetUint64FromCrid(add_item.crid) != VCI_MOVED_CRID);
+
+ *item = add_item;
+ }
+
+ return true;
+ }
+ else if (old_entry_valid)
+ {
+ mergeContext->oldListContextIndex++;
+
+ *item = old_item;
+
+ return true;
+ }
+ else if (add_entry_valid)
+ {
+ mergeContext->addList1Index++;
+ ItemPointerSetInvalid(&mergeContext->addList2CurrentTid);
+
+ Assert(vci_GetUint64FromCrid(add_item.crid) != VCI_MOVED_CRID);
+
+ *item = add_item;
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*
+ * Add TID-CRID pair into the TID-CRID Update List
+ *
+ * @param[in] info info main relation
+ * @param[in] src extent to be added
+ * @param[in] extentId extent id to be added
+ */
+void
+vci_AddTidCridUpdateList(vci_MainRelHeaderInfo *info,
+ RosChunkStorage *src,
+ int32 extentId)
+{
+ uint32 oldSel = vci_GetMainRelVar(info, vcimrv_tid_crid_diff_sel, 0);
+ uint32 newSel = 1 ^ oldSel;
+ vci_MergeTidCridUpdateListContext mergeContext = {0};
+
+ Assert(VCI_FIRST_NORMAL_EXTENT_ID <= extentId);
+ mergeContext.oldListContext = vci_OpenTidCridUpdateList(info, oldSel);
+
+ mergeContext.addList1 = CreateTidCridUpdateListFromRosChunkStorage(src, extentId);
+ mergeContext.numAddList1 = src->numTotalRows;
+
+ mergeContext.prevOldListContextBlkno = InvalidBlockNumber;
+
+ WriteTidCridUpdateList(info, newSel, MergeTidCridUpdateListCallback, &mergeContext);
+
+ pfree(mergeContext.addList1);
+ vci_CloseTidCridUpdateList(mergeContext.oldListContext);
+}
+
+void
+vci_MergeAndWriteTidCridUpdateList(vci_MainRelHeaderInfo *info,
+ int newSel, int oldSel,
+ Tuplesortstate *newList, vcis_Crid crid)
+{
+ vci_MergeTidCridUpdateListContext mergeContext = {0};
+
+ mergeContext.oldListContext = vci_OpenTidCridUpdateList(info, oldSel);
+
+ mergeContext.addList2 = newList;
+ ItemPointerSetInvalid(&mergeContext.addList2CurrentTid);
+ mergeContext.addList2Crid = crid;
+
+ mergeContext.prevOldListContextBlkno = InvalidBlockNumber;
+
+ WriteTidCridUpdateList(info, newSel, MergeTidCridUpdateListCallback, &mergeContext);
+
+ vci_CloseTidCridUpdateList(mergeContext.oldListContext);
+}
diff --git a/contrib/vci/storage/vci_wos.c b/contrib/vci/storage/vci_wos.c
new file mode 100644
index 0000000..7739b4c
--- /dev/null
+++ b/contrib/vci/storage/vci_wos.c
@@ -0,0 +1,263 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_wos.c
+ * Manipulate WOS
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/vci/storage/vci_wos.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <stdint.h>
+
+#include "access/heapam_xlog.h"
+#include "access/relscan.h"
+#include "access/visibilitymap.h"
+#include "access/xact.h"
+#include "c.h"
+#include "miscadmin.h"
+#include "storage/ipc.h"
+#include "storage/procarray.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+#include "vci.h"
+
+#include "vci_mem.h"
+#include "vci_ros.h"
+#include "vci_wos.h"
+#include "vci_xact.h"
+
+bool HeapTupleSatisfiesWos2Ros(HeapTuple htup, Snapshot snapshot, Buffer buffer);
+bool HeapTupleSatisfiesLocalRos(HeapTuple htup, Snapshot snapshot, Buffer buffer);
+static bool IsXmaxHasCommitted(HeapTuple htup);
+
+/* Cache used by IsXmaxHasCommitted */
+static struct
+{
+ TransactionId xid;
+ bool committed;
+} cachedTransactionInfo;
+
+/*
+ * vci_GetSnapshotForWos2Ros
+ *
+ * Creates a snapshot which is used for WOS->ROS and WOS->Delete vector
+ * conversions.
+ *
+ * WOS entries are created when CRUD commands are executed, and the visibility
+ * check in WOS is done with the normal snapshot.
+ *
+ * ROS control commands can removes WOS entries, and the result can be seen by
+ * everyone as soon as the command is done.
+ *
+ * Caller must call PopActiveSnapshot() afterward.
+ */
+Snapshot
+vci_GetSnapshotForWos2Ros(void)
+{
+ Snapshot snapshot;
+
+ snapshot = vci_GetCurrentSnapshot();
+
+ snapshot->snapshot_type = SNAPSHOT_VCI_WOS2ROS;
+
+ /* Clean up the cache */
+ cachedTransactionInfo.xid = InvalidTransactionId;
+
+ return snapshot;
+}
+
+bool
+HeapTupleSatisfiesWos2Ros(HeapTuple htup, Snapshot snapshot, Buffer buffer)
+{
+
+ SnapshotType temp_snapshot_type;
+
+ temp_snapshot_type = snapshot->snapshot_type;
+ snapshot->snapshot_type = SNAPSHOT_MVCC;
+
+ if (HeapTupleSatisfiesVisibility(htup, snapshot, buffer))
+ {
+ snapshot->snapshot_type = temp_snapshot_type;
+
+ if (IsXmaxHasCommitted(htup))
+ return false;
+
+ return true;
+ }
+
+ snapshot->snapshot_type = temp_snapshot_type;
+ return false;
+}
+
+static TransactionId exclusiveTransactionId;
+
+/*
+ * vci_GetSnapshotForLocalRos
+ *
+ * Creates a snapshot which is used for local ROS conversion
+ *
+ * @param[in] inclusive_xid Visible xid regardless of the MVCC snapshot
+ * @param[in] exclusive_xid Invisible xid regardless of the MVCC snapshot
+ *
+ * Mostly same as vci_GetSnapshotForWos2Ros(), but sometimes results by ROS
+ * control commands cannot be seen by MVCC. Because the transactions creating
+ * local ROS and ROS control commands are sometimes overlapped.
+ */
+Snapshot
+vci_GetSnapshotForLocalRos(TransactionId inclusive_xid, TransactionId exclusive_xid)
+{
+ Snapshot snapshot;
+
+ snapshot = vci_GetCurrentSnapshot();
+
+ snapshot->snapshot_type = SNAPSHOT_VCI_LOCALROS;
+
+ /* Removes transaction inclusive_xid from MVCC control */
+ if (TransactionIdIsValid(inclusive_xid))
+ {
+ for (int i = 0; i < snapshot->xcnt; i++)
+ {
+ if (TransactionIdEquals(snapshot->xip[i], inclusive_xid))
+ {
+ i++;
+ for (; i < snapshot->xcnt; i++)
+ snapshot->xip[i - 1] = snapshot->xip[i];
+ snapshot->xcnt--;
+ break;
+ }
+ }
+ }
+
+ exclusiveTransactionId = exclusive_xid;
+
+ /* Clean up the cache */
+ cachedTransactionInfo.xid = InvalidTransactionId;
+
+ return snapshot;
+}
+
+bool
+HeapTupleSatisfiesLocalRos(HeapTuple htup, Snapshot snapshot, Buffer buffer)
+{
+ SnapshotType temp_snapshot_type;
+
+ /* Store away the VCI specific type and check for MVCC visibility */
+ temp_snapshot_type = snapshot->snapshot_type;
+ snapshot->snapshot_type = SNAPSHOT_MVCC;
+
+ if (HeapTupleSatisfiesVisibility(htup, snapshot, buffer))
+ {
+ snapshot->snapshot_type = temp_snapshot_type;
+ if (IsXmaxHasCommitted(htup))
+ {
+ TransactionId xmax;
+
+ xmax = HeapTupleHeaderGetRawXmax(htup->t_data);
+
+ if (TransactionIdEquals(xmax, exclusiveTransactionId))
+ return true;
+
+ return false;
+ }
+
+ return true;
+ }
+
+ snapshot->snapshot_type = temp_snapshot_type;
+ return false;
+}
+
+/*
+ * Checks whether the htup has been removed
+ */
+static bool
+IsXmaxHasCommitted(HeapTuple htup)
+{
+ TransactionId xmax;
+ bool result = false;
+
+ if (htup->t_data->t_infomask & HEAP_XMAX_COMMITTED)
+ return true;
+
+ xmax = HeapTupleHeaderGetRawXmax(htup->t_data);
+
+ if (!TransactionIdIsValid(xmax))
+ return false;
+
+ if (htup->t_data->t_infomask & HEAP_XMAX_INVALID)
+ return false;
+
+ if (TransactionIdEquals(xmax, cachedTransactionInfo.xid))
+ return cachedTransactionInfo.committed;
+
+ switch (vci_transaction_get_type(xmax))
+ {
+ case VCI_XACT_SELF:
+ case VCI_XACT_DID_COMMIT:
+ result = true;
+ break;
+
+ default:
+ break;
+ }
+
+ cachedTransactionInfo.xid = xmax;
+ cachedTransactionInfo.committed = result;
+
+ return result;
+}
+
+/**
+ * @brief This function estimate the number of items in all pages of a heap
+ * relation, from the item size and number of pages, assuming that all the
+ * entries has the same size, and no HOT chains.
+ *
+ * @param[in] oid Oid of relation.
+ * @return estimated number of items in the relation.
+ */
+uint64
+vci_EstimateNumEntriesInHeapRelation(Oid oid)
+{
+ if (OidIsValid(oid))
+ {
+ Relation rel;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ uint64 result = 0;
+
+ rel = table_open(oid, AccessShareLock);
+ scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+ scan->rs_flags &= ~SO_ALLOW_PAGEMODE;
+ tuple = heap_getnext(scan, ForwardScanDirection);
+
+ if (NULL != tuple)
+ {
+ BlockNumber relallvisible;
+ uint64 numEntriesPerPage = (BLCKSZ - SizeOfPageHeaderData) /
+ (tuple->t_len + sizeof(ItemIdData));
+
+ /*
+ * Estimated value would be calculated as: - Subtract the free
+ * page from the total number of pages, - then multiple the
+ * maximum entries per page.
+ */
+ visibilitymap_count(rel, &relallvisible, NULL);
+ result = (RelationGetNumberOfBlocks(rel) - relallvisible) * numEntriesPerPage;
+ }
+
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+
+ return result;
+ }
+
+ return 0;
+}
--
1.8.3.1