v6-0001-ANALYZE-use-cursor-eviction-for-distinct-value-tr.patch
text/x-patch
Filename: v6-0001-ANALYZE-use-cursor-eviction-for-distinct-value-tr.patch
Type: text/x-patch
Part: 1
From df78d0bad95eb1572826a61bde6f4d6a7e1b639d Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdokimov@tantorlabs.com>
Date: Sun, 3 May 2026 16:49:11 +0300
Subject: [PATCH v6 1/2] ANALYZE: use cursor eviction for distinct-value
tracking
compute_distinct_stats() currently maintains the count=1 region by
shifting entries in track[] whenever a new singleton is inserted.
That makes each replacement O(n) in the size of the singleton region.
Replace that with a round-robin cursor over the count=1 region. This
preserves FIFO eviction order for singleton candidates while making
singleton replacement O(1) and avoiding the repeated array shifts
needed by the old scheme.
---
src/backend/commands/analyze.c | 79 ++++++++++++++++++++++++++--------
1 file changed, 60 insertions(+), 19 deletions(-)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 020a5919b84..262adf41b24 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -56,7 +56,6 @@
#include "utils/syscache.h"
#include "utils/timestamp.h"
-
/* Per-index data for ANALYZE */
typedef struct AnlIndexData
{
@@ -2108,10 +2107,11 @@ compute_trivial_stats(VacAttrStatsP stats,
*
* The most common values are determined by brute force: we keep a list
* of previously seen values, ordered by number of times seen, as we scan
- * the samples. A newly seen value is inserted just after the last
- * multiply-seen value, causing the bottommost (oldest) singly-seen value
- * to drop off the list. The accuracy of this method, and also its cost,
- * depend mainly on the length of the list we are willing to keep.
+ * the samples. Newly seen values are appended to the list, and when it's
+ * full we replace the oldest singly-seen value (FIFO) using a round-robin
+ * cursor (clock hand) over the count=1 region. This avoids repeatedly
+ * shifting the count=1 region, and when hashing is enabled, avoids having
+ * to update a large number of hash->index mappings.
*/
static void
compute_distinct_stats(VacAttrStatsP stats,
@@ -2138,6 +2138,8 @@ compute_distinct_stats(VacAttrStatsP stats,
int track_cnt,
track_max;
int num_mcv = stats->attstattarget;
+ int firstcount1 = 0, /* index of first singleton in track[] */
+ c1_cursor = 0; /* next singleton to evict (FIFO) */
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
/*
@@ -2156,7 +2158,7 @@ compute_distinct_stats(VacAttrStatsP stats,
Datum value;
bool isnull;
bool match;
- int firstcount1,
+ int match_index,
j;
vacuum_delay_point(true);
@@ -2213,6 +2215,7 @@ compute_distinct_stats(VacAttrStatsP stats,
value, track[j].value)))
{
match = true;
+ match_index = j;
break;
}
if (j < firstcount1 && track[j].count == 1)
@@ -2221,31 +2224,69 @@ compute_distinct_stats(VacAttrStatsP stats,
if (match)
{
+ bool was_count1;
+
/* Found a match */
- track[j].count++;
+ was_count1 = (track[match_index].count == 1);
+ track[match_index].count++;
/* This value may now need to "bubble up" in the track list */
- while (j > 0 && track[j].count > track[j - 1].count)
+ for (j = match_index; j > 0 && track[j].count > track[j - 1].count; j--)
{
swapDatum(track[j].value, track[j - 1].value);
swapInt(track[j].count, track[j - 1].count);
- j--;
}
+
+ /*
+ * If a singleton was promoted, the singleton region shrinks by
+ * one element, so advance its boundary.
+ */
+ if (was_count1)
+ {
+ firstcount1++;
+
+ /*
+ * If the cursor was pointing into the singleton region,
+ * advance it so it still refers to a valid singleton.
+ */
+ if (c1_cursor >= firstcount1 - 1)
+ {
+ c1_cursor++;
+ if (c1_cursor >= track_cnt)
+ c1_cursor = firstcount1;
+ }
+ }
+
+ /* Final safety: keep cursor inside singleton region */
+ if (c1_cursor < firstcount1 || c1_cursor >= track_cnt)
+ c1_cursor = firstcount1;
}
else
{
- /* No match. Insert at head of count-1 list */
+ int insert_index;
+
+ /*
+ * No match. Track the value if we still have room; otherwise
+ * evict the oldest singleton from the count=1 region.
+ */
if (track_cnt < track_max)
- track_cnt++;
- for (j = track_cnt - 1; j > firstcount1; j--)
- {
- track[j].value = track[j - 1].value;
- track[j].count = track[j - 1].count;
- }
- if (firstcount1 < track_cnt)
+ insert_index = track_cnt++;
+ else if (firstcount1 < track_cnt)
{
- track[firstcount1].value = value;
- track[firstcount1].count = 1;
+ /*
+ * Use c1_cursor as a round-robin cursor over the count=1
+ * region. Keep it on a current singleton before evicting.
+ */
+ if (c1_cursor < firstcount1 || c1_cursor >= track_cnt)
+ c1_cursor = firstcount1;
+ insert_index = c1_cursor++;
+ if (c1_cursor >= track_cnt)
+ c1_cursor = firstcount1;
}
+ else
+ continue;
+
+ track[insert_index].value = value;
+ track[insert_index].count = 1;
}
}
--
2.34.1