v20251223-0007-VCI-main-part6.patch

application/octet-stream

Filename: v20251223-0007-VCI-main-part6.patch
Type: application/octet-stream
Part: 5
Message: Re: [WIP]Vertical Clustered Index (columnar store extension) - take2
From 73635624b2b752351f23683b92df9cf7d0fa547b Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Tue, 23 Dec 2025 15:50:10 +1100
Subject: [PATCH v20251223] VCI - main - part6

---
 contrib/vci/Makefile       |   6 +-
 contrib/vci/meson.build    |   6 +-
 contrib/vci/vci_main.c     | 183 ++++++++++++++++++
 contrib/vci/vci_read_guc.c | 466 +++++++++++++++++++++++++++++++++++++++++++++
 contrib/vci/vci_shmem.c    | 206 ++++++++++++++++++++
 5 files changed, 861 insertions(+), 6 deletions(-)
 create mode 100644 contrib/vci/vci_main.c
 create mode 100644 contrib/vci/vci_read_guc.c
 create mode 100644 contrib/vci/vci_shmem.c

diff --git a/contrib/vci/Makefile b/contrib/vci/Makefile
index 9e31650..1e02ebb 100644
--- a/contrib/vci/Makefile
+++ b/contrib/vci/Makefile
@@ -3,9 +3,9 @@
 MODULE_big = vci
 
 OBJS = \
-#	vci_main.o \
-#	vci_read_guc.o \
-#	vci_shmem.o \
+	vci_main.o \
+	vci_read_guc.o \
+	vci_shmem.o \
 	vci_supported_funcs.o \
 	vci_supported_types.o
 SUBDIRS = \
diff --git a/contrib/vci/meson.build b/contrib/vci/meson.build
index 7560c1b..130075b 100644
--- a/contrib/vci/meson.build
+++ b/contrib/vci/meson.build
@@ -6,9 +6,9 @@ subdir('utils')
 
 
 vci_sources = files(
-#        'vci_main.c',
-#        'vci_read_guc.c',
-#        'vci_shmem.c',
+        'vci_main.c',
+        'vci_read_guc.c',
+        'vci_shmem.c',
         'vci_supported_funcs.c',
         'vci_supported_types.c',
 )
diff --git a/contrib/vci/vci_main.c b/contrib/vci/vci_main.c
new file mode 100644
index 0000000..564e4a2
--- /dev/null
+++ b/contrib/vci/vci_main.c
@@ -0,0 +1,183 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_main.c
+ *	  VCI main file
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		contrib/vci/vci_main.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam_xlog.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/dependency.h"
+#include "catalog/index.h"
+#include "commands/tablecmds.h"
+#include "common/file_utils.h"
+#include "executor/nodeModifyTable.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/execnodes.h"
+#include "nodes/pg_list.h"
+#include "storage/ipc.h"
+#include "storage/smgr.h"
+#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/varlena.h"
+
+#include "vci.h"
+#include "vci_mem.h"
+#include "vci_ros.h"
+#include "vci_ros_daemon.h"
+
+static void vci_xact_callback(XactEvent event, void *arg);
+static void vci_subxact_callback(SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg);
+
+PG_MODULE_MAGIC_EXT(
+					.name = "vci",
+					.version = PG_VERSION
+);
+
+/* saved hook value in case of unload */
+/**
+ * Commands which re-index VCI.
+ */
+vci_RebuildCommand vci_rebuild_command = vcirc_invalid;
+
+ProcessUtility_hook_type process_utility_prev = NULL;
+
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static void vci_shmem_request(void);
+
+/**
+ * _PG_init: Entry point of this module.
+ * It is called when the module is loaded.
+ */
+void
+_PG_init(void)
+{
+	pg_bindtextdomain(TEXTDOMAIN);
+
+	if (!process_shared_preload_libraries_in_progress)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("\"%s\" must be registered in shared_preload_libraries", VCI_STRING)));
+		return;					/* LCOV_EXCL_LINE */
+	}
+
+	vci_read_guc_variables();
+
+	if (!IsPostmasterEnvironment)
+	{
+		VciGuc.enable = 0;
+		VciGuc.enable_ros_control_daemon = false;
+	}
+
+	vci_setup_shmem();
+
+	vci_setup_executor_hook();
+
+	/* register process utilityhook */
+	process_utility_prev = ProcessUtility_hook;
+	ProcessUtility_hook = vci_process_utility;
+
+	/* register function to custom hook */
+	add_index_delete_hook = vci_add_index_delete;
+	add_should_index_insert_hook = vci_add_should_index_insert;
+	add_drop_relation_hook = vci_add_drop_relation;
+	add_reindex_index_hook = vci_add_reindex_index;
+	add_skip_vci_index_hook = vci_add_skip_vci_index;
+	add_alter_tablespace_hook = vci_add_alter_tablespace;
+	add_alter_table_change_owner_hook = vci_alter_table_change_owner;
+	add_alter_table_change_schema_hook = vci_alter_table_change_schema;
+	add_snapshot_satisfies_hook = VCITupleSatisfiesVisibility;
+	add_skip_vacuum_hook = vci_isVciAdditionalRelation;
+
+	/* If single user mode, not set environment for parallel. */
+	if (IsPostmasterEnvironment)
+	{
+		if (!IsUnderPostmaster)
+		{
+#ifdef WIN32
+			struct stat st;
+			char	   *dir_name = "base/" PG_TEMP_FILES_DIR;
+
+			if (stat(dir_name, &st) == 0)
+			{
+				if (!S_ISDIR(st.st_mode))
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("\"%s\" is not directory", dir_name)));
+			}
+			else
+			{
+				if (errno == ENOENT)
+				{
+					if (mkdir(dir_name, S_IRWXU) < 0)
+						ereport(ERROR,
+								(errcode_for_file_access(),
+								 errmsg("could not create directory \"%s\": %m",
+										dir_name)));
+				}
+				else
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("could not stat directory \"%s\": %m",
+									dir_name)));
+			}
+#endif
+
+			/* Register ROS Control Daemon */
+			vci_ROS_control_daemon_setup();
+		}
+	}
+	else
+		vci_shmem_startup_routine();
+
+	RegisterXactCallback(vci_xact_callback, NULL);
+	RegisterSubXactCallback(vci_subxact_callback, NULL);
+
+	prev_shmem_request_hook = shmem_request_hook;
+	shmem_request_hook = vci_shmem_request;
+
+}
+
+static void
+vci_shmem_request(void)
+{
+	if (prev_shmem_request_hook)
+		prev_shmem_request_hook();
+
+	/* Register LWLocks used by VCI */
+	RequestNamedLWLockTranche("VciStandbyExec", 1);
+	RequestNamedLWLockTranche("VciIOLoad", 1);
+	RequestNamedLWLockTranche("VciMemoryEntries", 1);
+	RequestNamedLWLockTranche("VciQueryContext", 1);
+	RequestNamedLWLockTranche("VciMntpoint2dev", 1);
+}
+
+/*
+ * Callback function for COMMIT/ABORT/PREPARE operations.
+ */
+static void
+vci_xact_callback(XactEvent event, void *arg)
+{
+	vci_xact_change_handler(event);
+}
+
+/*
+ * Callback function for subxact operations.
+ */
+static void
+vci_subxact_callback(SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg)
+{
+	vci_subxact_change_handler(event, mySubid);
+}
diff --git a/contrib/vci/vci_read_guc.c b/contrib/vci/vci_read_guc.c
new file mode 100644
index 0000000..4622af5
--- /dev/null
+++ b/contrib/vci/vci_read_guc.c
@@ -0,0 +1,466 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_read_guc.c
+ *	  GUC parameter settings
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		contrib/vci/vci_read_guc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "miscadmin.h"
+#include "postmaster/postmaster.h"
+#include "storage/procnumber.h"
+#include "utils/guc.h"
+#include "utils/guc_tables.h"
+#include "utils/palloc.h"
+
+#include "vci.h"
+
+#include "vci_executor.h"
+#include "vci_mem.h"
+
+/* GUC parameter holder */
+VciGucStruct VciGuc;
+
+static void check_max_worker_processes(void);
+
+static const struct config_enum_entry table_scan_policy_options[] = {
+
+	{"column store only", VCI_TABLE_SCAN_POLICY_COLUMN_ONLY, false},
+	{"column only", VCI_TABLE_SCAN_POLICY_COLUMN_ONLY, true},
+	{"none", VCI_TABLE_SCAN_POLICY_NONE, false},
+	{NULL, 0, false}
+};
+
+/*
+ * These GUC are defined using same format found in
+ * src/backend/utils/guc_tables.inc.c
+ */
+static struct config_generic VciConfigureNames[] =
+{
+	/*
+	 * Bool GUCs
+	 */
+
+	/* for internal use */
+	{
+		.name = "vci.enable",
+		.context = PGC_USERSET,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Enables VCI."),
+		.flags = GUC_NOT_IN_SAMPLE,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.log_query",
+		.context = PGC_USERSET,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Logs information when a query fails to be executed by VCI."),
+		.flags = GUC_NOT_IN_SAMPLE,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.log_query,
+			.boot_val = false,
+		}
+	},
+
+	{
+		.name = "vci.enable_seqscan",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace sequential-scan plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_seqscan,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_indexscan",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace index-scan plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_indexscan,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_bitmapheapscan",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace bitmap-scan plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_bitmapheapscan,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_sort",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace sort plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_sort,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_hashagg",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace hashed aggregation plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_hashagg,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_sortagg",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace sorted aggregation plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_sortagg,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_plainagg",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace plain aggregation plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_plainagg,
+			.boot_val = true,
+		}
+	},
+
+	{
+		.name = "vci.enable_hashjoin",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace hash join plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_hashjoin,
+			.boot_val = false,
+		}
+	},
+
+	{
+		.name = "vci.enable_nestloop",
+		.context = PGC_USERSET,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Enables VCI planner to replace nested-loop plans."),
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_nestloop,
+			.boot_val = false,
+		}
+	},
+
+	{
+		.name = "vci.enable_ros_control_daemon",
+		.context = PGC_POSTMASTER,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Enables the VCI ROS Control Daemon."),
+		.flags = GUC_NOT_IN_SAMPLE,
+		.vartype = PGC_BOOL,
+		._bool = {
+			.variable = &VciGuc.enable_ros_control_daemon,
+			.boot_val = false,
+		}
+	},
+
+	/*
+	 * Int GUCs
+	 */
+
+	{
+		.name = "vci.cost_threshold",
+		.context = PGC_USERSET,
+		.flags = GUC_NOT_IN_SAMPLE,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Sets the threshold CPU load beyond which the VCI control worker is stopped."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.cost_threshold,
+			.boot_val = 18000,
+			.min = 0,
+			.max = INT_MAX,
+		}
+	},
+
+	{
+		.name = "vci.maintenance_work_mem",
+		.context = PGC_SIGHUP,
+		.flags = GUC_NOT_IN_SAMPLE | GUC_UNIT_KB,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Sets the maximum memory to be used by each control worker for VCI control operations."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.maintenance_work_mem,
+			.boot_val = 256 * 1024,
+			.min = 1024,
+			.max = MAX_KILOBYTES,
+		},
+	},
+
+	/* **************************************** */
+	/* ROS Control Daemon/Worker configurations */
+	/* **************************************** */
+
+	/* Daemon setup */
+
+	{
+		.name = "vci.control_max_workers",
+		.context = PGC_POSTMASTER,
+		.flags = GUC_NOT_IN_SAMPLE,
+		.group = RESOURCES_IO,
+		.short_desc = gettext_noop("Sets the maximum number of simultaneously running VCI control worker processes."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.control_max_workers,
+			.boot_val = 8,
+			.min = 1,
+			.max = MAX_BACKENDS,
+		}
+	},
+
+	{
+		.name = "vci.control_naptime",
+		.context = PGC_SIGHUP,
+		.flags = GUC_NOT_IN_SAMPLE | GUC_UNIT_S,
+		.group = RESOURCES_IO,
+		.short_desc = gettext_noop("Time to sleep between VCI control worker runs."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.control_naptime,
+			.boot_val = 1,
+			.min = 1,
+			.max = INT_MAX / 1000,
+		}
+	},
+
+	/* Worker : ROS control command thresholds  */
+
+	{
+		.name = "vci.wosros_conv_threshold",
+		.context = PGC_SIGHUP,
+		.flags = GUC_NOT_IN_SAMPLE,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Sets the threshold of Data WOS rows to execute WOS->ROS conversion."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.wosros_conv_threshold,
+			.boot_val = 256 * 1024,
+			.min = 1,
+			.max = INT_MAX,
+		}
+	},
+
+	{
+		.name = "vci.cdr_threshold",
+		.context = PGC_SIGHUP,
+		.flags = GUC_NOT_IN_SAMPLE,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Sets the threshold of deleted rows in ROS to execute collect-deleted-rows command."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.cdr_threshold,
+			.boot_val = 128 * 1024,
+			.min = 1,
+			.max = INT_MAX,
+		}
+	},
+
+	/******************************************/
+	/* Custom Plan Execution                  */
+	/******************************************/
+
+	{
+		.name = "vci.max_local_ros",
+		.context = PGC_USERSET,
+		.flags = GUC_NOT_IN_SAMPLE |  GUC_UNIT_KB,
+		.group = RESOURCES_MEM,
+		.short_desc = gettext_noop("Sets the maximum local ROS memory."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.max_local_ros_size,
+			.boot_val = 64 * 1024,
+			.min = 64 * 1024,
+			.max = INT_MAX,
+		}
+	},
+
+	{
+		.name = "vci.table_rows_threshold",
+		.context = PGC_USERSET,
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.group = DEVELOPER_OPTIONS,
+		.short_desc = gettext_noop("Sets the threshold of table rows to execute VCI Scan."),
+		.vartype = PGC_INT,
+		._int = {
+			.variable = &VciGuc.table_rows_threshold,
+			.boot_val = VCI_MAX_FETCHING_ROWS,
+			.min = 0,
+			.max = INT_MAX,
+		}
+	},
+
+	/*
+	 * Enum GUCs
+	 */
+
+	{
+		.name = "vci.table_scan_policy",
+		.short_desc = gettext_noop("Sets the policy that a scan node reads from the column store table(VCI index) or the row store table(original)."),
+		.group = DEVELOPER_OPTIONS,
+		.context = PGC_USERSET,
+		.flags = GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL,
+		.vartype = PGC_ENUM,
+		._enum = {
+			.variable = &VciGuc.table_scan_policy,
+			.boot_val = VCI_TABLE_SCAN_POLICY_COLUMN_ONLY,
+			.options = table_scan_policy_options,
+		}
+	},
+
+};
+
+/*
+ * Set GUC parameters
+ */
+void
+vci_read_guc_variables(void)
+{
+	/*
+	 * TODO: Raise warnings or set parameters to default, when the specified
+	 * value is out-of-range.
+	 */
+
+	/* FIXME: Add initial value to pass Assert() */
+	VciGuc.table_scan_policy = VCI_TABLE_SCAN_POLICY_COLUMN_ONLY;
+
+	for (int i = 0; i < (int) lengthof(VciConfigureNames); i++)
+	{
+		struct config_generic *conf = &VciConfigureNames[i];
+
+		if (conf->vartype == PGC_BOOL)
+		{
+			if (IsPostmasterEnvironment)
+				DefineCustomBoolVariable(conf->name,
+										 conf->short_desc,
+										 conf->long_desc,
+										 conf->_bool.variable,
+										 conf->_bool.boot_val,
+										 conf->context,
+										 conf->flags,
+										 conf->_bool.check_hook,
+										 conf->_bool.assign_hook,
+										 conf->_bool.show_hook);
+			else
+				*(conf->_bool.variable) = conf->_bool.boot_val;
+		}
+
+		else if (conf->vartype == PGC_INT)
+		{
+			if (IsPostmasterEnvironment)
+				DefineCustomIntVariable(conf->name,
+										conf->short_desc,
+										conf->long_desc,
+										conf->_int.variable,
+										conf->_int.boot_val,
+										conf->_int.min,
+										conf->_int.max,
+										conf->context,
+										conf->flags,
+										conf->_int.check_hook,
+										conf->_int.assign_hook,
+										conf->_int.show_hook);
+			else
+				*(conf->_int.variable) = conf->_int.boot_val;
+		}
+
+		else if (conf->vartype == PGC_ENUM)
+		{
+			if (IsPostmasterEnvironment)
+				DefineCustomEnumVariable(conf->name,
+										 conf->short_desc,
+										 conf->long_desc,
+										 conf->_enum.variable,
+										 conf->_enum.boot_val,
+										 conf->_enum.options,
+										 conf->context,
+										 conf->flags,
+										 conf->_enum.check_hook,
+										 conf->_enum.assign_hook,
+										 conf->_enum.show_hook);
+			else
+				*(conf->_enum.variable) = conf->_enum.boot_val;
+		}
+
+		else
+			elog(ERROR, "Unexpected VCI GUC variable type");
+	}
+
+	VciGuc.have_loaded_postgresql_conf = true;
+
+	check_max_worker_processes();
+}
+
+/*
+ * Check for max_worker_processes
+ */
+static void
+check_max_worker_processes(void)
+{
+	int			num_needed_workers;
+
+	num_needed_workers = 1 + VciGuc.control_max_workers;	/* ros control daemon &
+															 * workers */
+	num_needed_workers += 1;	/* parallel control daemon  */
+
+	if (num_needed_workers > MAX_BACKENDS)
+		num_needed_workers = MAX_BACKENDS;
+
+	if (max_worker_processes < num_needed_workers)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg(VCI_STRING " needs to set at least %d to \"max_worker_processes\"",
+						num_needed_workers)));
+}
diff --git a/contrib/vci/vci_shmem.c b/contrib/vci/vci_shmem.c
new file mode 100644
index 0000000..5383446
--- /dev/null
+++ b/contrib/vci/vci_shmem.c
@@ -0,0 +1,206 @@
+/*-------------------------------------------------------------------------
+ *
+ * vci_shmem.c
+ *	  Managing shared memory
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		contrib/vci/vci_shmem.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "storage/ipc.h"
+
+#include "vci.h"
+#include "vci_mem.h"
+#include "vci_ros_daemon.h"
+
+/*
+ * Pointer to fixed-position shared memory area
+ */
+VciShmemStruct *VciShmemAddr;
+
+/* Saved hook value */
+static shmem_startup_hook_type shmem_startup_prev = NULL;
+
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static void vci_shmem_request(void);
+
+/*
+ * Setup shmem_startup_hook
+ */
+void
+vci_setup_shmem(void)
+{
+	Assert(VciGuc.have_loaded_postgresql_conf);
+
+	prev_shmem_request_hook = shmem_request_hook;
+	shmem_request_hook = vci_shmem_request;
+
+	shmem_startup_prev = shmem_startup_hook;
+	shmem_startup_hook = vci_shmem_startup_routine;
+}
+
+/*
+ * Request additional shared resources
+ */
+static void
+vci_shmem_request(void)
+{
+	if (prev_shmem_request_hook)
+		prev_shmem_request_hook();
+
+	RequestAddinShmemSpace(sizeof(VciShmemStruct));
+	RequestAddinShmemSpace(vci_GetSizeOfMemoryEntries());
+
+	/*
+	 *
+	 */
+
+	/*
+	 * The + 1 is for wos->ros conversion of vci assigned to unmonitored
+	 * devices. vci_devload_t is allocated per device. It monitors and stores
+	 * the IO load, holds a set of vci on the device, and is used to determine
+	 * which vci to perform wos->ros conversion. Since only a fixed number of
+	 * vci_devload_t type values are prepared, if more devices are added and
+	 * exceed the number, they fall outside the management scope for wos->ros
+	 * conversion. To ensure that no vci is left without wos->ros conversion,
+	 * a vci_devload_t type value is prepared to store the set of vci on
+	 * devices outside the management scope, and it is handled like other
+	 * vci_devload_t values to convert vci. This is the area allocated by + 1.
+	 * In order to be treated in the program similarly to the device being
+	 * monitored, the value used to determine whether to convert vci on that
+	 * device should be set appropriately as a device for conversion. Such
+	 * devices are processed as if they are collectively one device, so the
+	 * conversion frequency becomes lower.
+	 */
+	RequestAddinShmemSpace(sizeof(vci_devload_t) * (VciGuc.max_devices + 1));
+}
+
+/**
+ * initialize devload info
+ */
+static void
+vci_Initialize_devload_info(void)
+{
+	vci_devload_t *dl_not_monitored;
+	vci_memory_entry_list_t *list;
+
+	LWLockAcquire(VciShmemAddr->io_load_lock, LW_EXCLUSIVE);
+
+	dlist_init(&(VciShmemAddr->free_memory_entry_queue_list));
+
+	/* OSS has no loop for monitored devices: just init for [0] */
+	Assert(VciShmemAddr->max_devices == 0);
+	list = &(VciShmemAddr->memory_entry_queue_array[0]);
+	dlist_init(&(list->head));
+	dlist_push_tail(&(VciShmemAddr->free_memory_entry_queue_list), &(list->link));
+
+	/* Setup for unmonitored device */
+	dl_not_monitored = &(VciShmemAddr->devload_array[0]);
+	strcpy(dl_not_monitored->devname, VCI_PSEUDO_UNMONITORED_DEVICE);
+	list = dlist_container(vci_memory_entry_list_t, link, dlist_pop_head_node(&(VciShmemAddr->free_memory_entry_queue_list)));
+	dl_not_monitored->memory_entry_queue = list;
+
+	/* OSS has just 1 devload_info */
+	VciShmemAddr->num_devload_info = 1;
+
+	LWLockRelease(VciShmemAddr->io_load_lock);
+}
+
+/*
+ * Initialize shared memory
+ */
+void
+vci_shmem_startup_routine(void)
+{
+	bool		found;
+
+	VciGuc.max_devices = 0;
+
+	if (IsPostmasterEnvironment)
+	{
+		if (shmem_startup_prev)
+			shmem_startup_prev();
+
+		VciShmemAddr =
+			(VciShmemStruct *) ShmemInitStruct("vci: shared memory", sizeof(VciShmemStruct), &found);
+		Assert(VciShmemAddr != NULL);
+
+#ifdef WIN32
+		if (IsUnderPostmaster)
+		{
+			/** Later process is only necessary in Postmaster,
+			  * so child process processing ends here
+			  */
+			return;
+		}
+#endif
+
+		/*
+		 * Prepare the same number of vci_id_t as the number of worker This
+		 * area is used to pass parameters from the ros daemon to the worker
+		 * that actually does the conversion Note: The minimum value of
+		 * control_max_workers is set to 1, so the allocation size would not
+		 * be 0
+		 */
+		VciShmemAddr->worker_args_array =
+			ShmemInitStruct("vci: arguments for workers ", sizeof(vci_wosros_conv_worker_arg_t) * VciGuc.control_max_workers, &found);
+		Assert(VciShmemAddr->worker_args_array != NULL);
+
+		VciShmemAddr->memory_entries =
+			ShmemInitStruct("vci: memory entries", vci_GetSizeOfMemoryEntries(), &found);
+		Assert(VciShmemAddr->memory_entries != NULL);
+
+		/*
+		 * + 1 for non-monitored devices: ramfs and the ones that cannot be
+		 * observed because of space limitation
+		 */
+		VciShmemAddr->devload_array =
+			ShmemInitStruct("vci: io load watch", sizeof(vci_devload_t) * (VciGuc.max_devices + 1), &found);
+		Assert(VciShmemAddr->devload_array != NULL);
+		VciShmemAddr->memory_entry_queue_array =
+			ShmemInitStruct("vci: memory entry queue", sizeof(vci_memory_entry_list_t) * (VciGuc.max_devices + 1), &found);
+		Assert(VciShmemAddr->memory_entry_queue_array != NULL);
+	}
+	else
+	{
+		VciShmemAddr = malloc(sizeof(VciShmemStruct));
+		MemSet(VciShmemAddr, 0, sizeof(VciShmemStruct));
+		VciShmemAddr->worker_args_array = malloc(sizeof(vci_wosros_conv_worker_arg_t) * VciGuc.control_max_workers);
+		MemSet(VciShmemAddr->worker_args_array, 0, sizeof(vci_wosros_conv_worker_arg_t) * VciGuc.control_max_workers);
+		VciShmemAddr->memory_entries = malloc(vci_GetSizeOfMemoryEntries());
+		MemSet(VciShmemAddr->memory_entries, 0, vci_GetSizeOfMemoryEntries());
+		VciShmemAddr->devload_array = malloc(sizeof(vci_devload_t) * (VciGuc.max_devices + 1));
+		MemSet(VciShmemAddr->devload_array, 0, sizeof(vci_devload_t) * (VciGuc.max_devices + 1));
+		VciShmemAddr->memory_entry_queue_array = malloc(sizeof(vci_memory_entry_list_t) * (VciGuc.max_devices + 1));
+		MemSet(VciShmemAddr->memory_entry_queue_array, 0, sizeof(vci_memory_entry_list_t) * (VciGuc.max_devices + 1));
+	}
+
+	/*
+	 * Standby server execution control
+	 */
+	VciShmemAddr->standby_exec_loc = &(GetNamedLWLockTranche("VciStandbyExec"))->lock;
+
+	/*
+	 * Set the number of monitorable devices and initialize lock for IO load
+	 * monitoring
+	 */
+	VciShmemAddr->max_devices = VciGuc.max_devices;
+	VciShmemAddr->io_load_lock = &(GetNamedLWLockTranche("VciIOLoad"))->lock;
+
+	/* Additional LWLocks Initialization */
+	VciShmemAddr->vci_memory_entries_lock = &(GetNamedLWLockTranche("VciMemoryEntries"))->lock;
+	VciShmemAddr->vci_query_context_lock = &(GetNamedLWLockTranche("VciQueryContext"))->lock;
+	VciShmemAddr->vci_mnt_point2dev_lock = &(GetNamedLWLockTranche("VciMntpoint2dev"))->lock;
+
+	/* initialize the lists of vci_devload_t */
+	vci_Initialize_devload_info();
+
+	/* Initialize vci-memory-entries */
+	vci_InitMemoryEntries();
+}
-- 
1.8.3.1