v3-0003-dshash-add-partition-scoped-sequential-scan.patch
application/x-patch
Filename: v3-0003-dshash-add-partition-scoped-sequential-scan.patch
Type: application/x-patch
Part: 7
From bf08305059d33b65953a424243d452ac90f00786 Mon Sep 17 00:00:00 2001
From: Sami Imseih <samimseih@gmail.com>
Date: Wed, 27 May 2026 10:02:55 -0500
Subject: [PATCH v3 3/5] dshash: add partition-scoped sequential scan
Add dshash_seq_init_partition(), which restricts a sequential scan to
a single partition. This allows multiple backends to scan a dshash
table in parallel by dividing partitions among them, which a future
commit will use to implement parallel clock-sweep eviction.
Also move the DSHASH_NUM_PARTITIONS macros to dshash.h so that callers
can determine the partition count.
Rename dshash_seq_status.nbuckets to endbucket, since the field
represents the exclusive upper-bound bucket index at which the scan
terminates, not a count. This is clearer now that partition-scoped
scans set it to the first bucket of the next partition rather than
the total number of buckets.
---
src/backend/lib/dshash.c | 44 +++++++++++++++++++++++++++++-----------
src/include/lib/dshash.h | 14 ++++++++++++-
2 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c
index 1999989c14f..01e67856f43 100644
--- a/src/backend/lib/dshash.c
+++ b/src/backend/lib/dshash.c
@@ -52,15 +52,6 @@ struct dshash_table_item
/* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
};
-/*
- * The number of partitions for locking purposes. This is set to match
- * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
- * the buffer pool must be good enough for any other purpose. This could
- * become a runtime parameter in future.
- */
-#define DSHASH_NUM_PARTITIONS_LOG2 7
-#define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
-
/* A magic value used to identify our hash tables. */
#define DSHASH_MAGIC 0x75ff6a20
@@ -661,13 +652,42 @@ dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
{
status->hash_table = hash_table;
status->curbucket = 0;
- status->nbuckets = 0;
+ status->endbucket = 0;
status->curitem = NULL;
status->pnextitem = InvalidDsaPointer;
status->curpartition = -1;
status->exclusive = exclusive;
}
+/*
+ * Initialize a sequential scan restricted to a single partition.
+ *
+ * Only entries in the specified partition are visited. The caller must
+ * ensure that 0 <= partition < DSHASH_NUM_PARTITIONS.
+ */
+void
+dshash_seq_init_partition(dshash_seq_status *status, dshash_table *hash_table,
+ bool exclusive, int partition)
+{
+ Assert(partition >= 0 && partition < DSHASH_NUM_PARTITIONS);
+
+ status->hash_table = hash_table;
+ status->curitem = NULL;
+ status->pnextitem = InvalidDsaPointer;
+ status->exclusive = exclusive;
+
+ LWLockAcquire(PARTITION_LOCK(hash_table, partition),
+ exclusive ? LW_EXCLUSIVE : LW_SHARED);
+ ensure_valid_bucket_pointers(hash_table);
+
+ status->curpartition = partition;
+ status->endbucket =
+ BUCKET_INDEX_FOR_PARTITION(partition + 1, hash_table->size_log2);
+ /* Set to one before first bucket as the seq scan will ++curbucket */
+ status->curbucket =
+ BUCKET_INDEX_FOR_PARTITION(partition, hash_table->size_log2) - 1;
+}
+
/*
* Returns the next element.
*
@@ -701,7 +721,7 @@ dshash_seq_next(dshash_seq_status *status)
ensure_valid_bucket_pointers(status->hash_table);
- status->nbuckets =
+ status->endbucket =
NUM_BUCKETS(status->hash_table->control->size_log2);
next_item_pointer = status->hash_table->buckets[status->curbucket];
}
@@ -717,7 +737,7 @@ dshash_seq_next(dshash_seq_status *status)
{
int next_partition;
- if (++status->curbucket >= status->nbuckets)
+ if (++status->curbucket >= status->endbucket)
{
/* all buckets have been scanned. finish. */
return NULL;
diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h
index 64b758b381b..655b025b996 100644
--- a/src/include/lib/dshash.h
+++ b/src/include/lib/dshash.h
@@ -73,7 +73,7 @@ typedef struct dshash_seq_status
{
dshash_table *hash_table; /* dshash table working on */
int curbucket; /* bucket number we are at */
- int nbuckets; /* total number of buckets in the dshash */
+ int endbucket; /* first bucket beyond scan range */
dshash_table_item *curitem; /* item we are currently at */
dsa_pointer pnextitem; /* dsa-pointer to the next item */
int curpartition; /* partition number we are at */
@@ -109,9 +109,21 @@ extern void dshash_release_lock(dshash_table *hash_table, void *entry);
#define dshash_find_or_insert(hash_table, key, found) \
dshash_find_or_insert_extended(hash_table, key, found, 0)
+/*
+ * The number of partitions for locking purposes. This is set to match
+ * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
+ * the buffer pool must be good enough for any other purpose. This could
+ * become a runtime parameter in future.
+ */
+#define DSHASH_NUM_PARTITIONS_LOG2 7
+#define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
+
/* seq scan support */
extern void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
bool exclusive);
+extern void dshash_seq_init_partition(dshash_seq_status *status,
+ dshash_table *hash_table,
+ bool exclusive, int partition);
extern void *dshash_seq_next(dshash_seq_status *status);
extern void dshash_seq_term(dshash_seq_status *status);
extern void dshash_delete_current(dshash_seq_status *status);
--
2.50.1 (Apple Git-155)