v1-0001-Support-loser-tree-for-k-way-merge.patch
application/octet-stream
Filename: v1-0001-Support-loser-tree-for-k-way-merge.patch
Type: application/octet-stream
Part: 0
Message:
Support loser tree for k-way merge
From 2981406dbf5bb738273b3ff4c26854ce7d715dd1 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Wed, 3 Dec 2025 18:58:28 +0800
Subject: [PATCH v1] Support loser tree for k-way merge.
The loser tree usually has fewer comparisons than the heap during
k-way merge.
---
src/backend/utils/misc/guc_parameters.dat | 7 +
src/backend/utils/sort/tuplesort.c | 160 +++++++++++++++++++++-
src/include/utils/guc.h | 1 +
3 files changed, 167 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 3b9d8349078..123ece8ea75 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -882,6 +882,13 @@
boot_val => 'true',
},
+{ name => 'enable_loser_tree', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD',
+ short_desc => 'Enables loser tree for k-way merge.',
+ flags => 'GUC_NOT_IN_SAMPLE',
+ variable => 'enable_loser_tree',
+ boot_val => 'false',
+},
+
{ name => 'enable_material', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD',
short_desc => 'Enables the planner\'s use of materialization.',
flags => 'GUC_EXPLAIN',
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 5d4411dc33f..381f5909094 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -122,6 +122,9 @@
/* GUC variables */
bool trace_sort = false;
+bool enable_loser_tree = false;
+
+#define LOSER_TREE_EOF -1
#ifdef DEBUG_BOUNDED_SORT
bool optimize_bounded_sort = true;
@@ -219,6 +222,9 @@ struct Tuplesortstate
int memtupsize; /* allocated length of memtuples array */
bool growmemtuples; /* memtuples' growth still underway? */
+ bool useLoserTree; /* use loser tree for k-way merge if true */
+ int *losers; /* array of losers, losers[0] is the winner */
+
/*
* Memory for tuples is sometimes allocated using a simple slab allocator,
* rather than with palloc(). Currently, we switch to slab allocation
@@ -468,6 +474,8 @@ static void tuplesort_sort_memtuples(Tuplesortstate *state);
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple);
static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple);
static void tuplesort_heap_delete_top(Tuplesortstate *state);
+static void tuplesort_loser_tree_build(Tuplesortstate *state);
+static void tuplesort_loser_tree_adjust(Tuplesortstate *state, int start);
static void reversedirection(Tuplesortstate *state);
static unsigned int getlen(LogicalTape *tape, bool eofOK);
static void markrunend(LogicalTape *tape);
@@ -681,6 +689,8 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt)
state->base.sortopt = sortopt;
state->base.tuples = true;
state->abbrevNext = 10;
+ state->useLoserTree = enable_loser_tree;
+ state->losers = NULL;
/*
* workMem is forced to be at least 64KB, the current minimum valid value
@@ -1647,6 +1657,37 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
state->lastReturnedTuple = NULL;
}
+ if (state->useLoserTree && state->memtupcount > 0)
+ {
+ int i = state->losers[0];
+ int start = i + state->memtupcount;
+ int srcTapeIndex = state->memtuples[i].srctape;
+ LogicalTape *srcTape = state->inputTapes[srcTapeIndex];
+ SortTuple newtup;
+
+ *stup = state->memtuples[i];
+
+ state->lastReturnedTuple = stup->tuple;
+
+ if (mergereadnext(state, srcTape, &newtup))
+ {
+ newtup.srctape = srcTapeIndex;
+ state->memtuples[i] = newtup;
+ }
+ else
+ {
+ state->losers[start] = LOSER_TREE_EOF;
+ state->nInputRuns--;
+ LogicalTapeClose(srcTape);
+ }
+ tuplesort_loser_tree_adjust(state, start);
+
+ if (state->losers[0] == LOSER_TREE_EOF)
+ state->memtupcount = 0;
+
+ return true;
+ }
+
/*
* This code should match the inner loop of mergeonerun().
*/
@@ -2206,6 +2247,41 @@ mergeonerun(Tuplesortstate *state)
Assert(state->slabAllocatorUsed);
+ if (state->useLoserTree && state->memtupcount > 0)
+ {
+ while (state->losers[0] != LOSER_TREE_EOF)
+ {
+ int i = state->losers[0];
+ int start = i + state->memtupcount;
+ SortTuple stup;
+
+ /* write the tuple to destTape */
+ srcTapeIndex = state->memtuples[i].srctape;
+ srcTape = state->inputTapes[srcTapeIndex];
+ WRITETUP(state, state->destTape, &state->memtuples[i]);
+
+ /* recycle the slot of the tuple we just wrote out, for the next read */
+ if (state->memtuples[i].tuple)
+ RELEASE_SLAB_SLOT(state, state->memtuples[i].tuple);
+
+ if (mergereadnext(state, srcTape, &stup))
+ {
+ stup.srctape = srcTapeIndex;
+ state->memtuples[i] = stup;
+ }
+ else
+ {
+ state->losers[start] = LOSER_TREE_EOF;
+ state->nInputRuns--;
+ }
+
+ tuplesort_loser_tree_adjust(state, start);
+ }
+ state->memtupcount = 0;
+ markrunend(state->destTape);
+ return;
+ }
+
/*
* Execute merge by repeatedly extracting lowest tuple in heap, writing it
* out, and replacing it with next tuple from same tape (if there is
@@ -2270,9 +2346,16 @@ beginmerge(Tuplesortstate *state)
if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
{
tup.srctape = srcTapeIndex;
- tuplesort_heap_insert(state, &tup);
+
+ if (state->useLoserTree)
+ state->memtuples[state->memtupcount++] = tup;
+ else
+ tuplesort_heap_insert(state, &tup);
}
}
+
+ if (state->useLoserTree && state->memtupcount > 0)
+ tuplesort_loser_tree_build(state);
}
/*
@@ -2823,6 +2906,81 @@ tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
memtuples[i] = *tuple;
}
+static void
+tuplesort_loser_tree_build(Tuplesortstate *state)
+{
+ int k = state->memtupcount; /* k-way */
+ int winner[MAXORDER * 2];
+ int i;
+
+ Assert(k > 0 && k <= MAXORDER);
+
+ if (state->losers == NULL)
+ state->losers = (int *) MemoryContextAlloc(state->base.maincontext,
+ MAXORDER * 2 * sizeof(int));
+
+ for (i = k; i < 2 * k; i++)
+ {
+ winner[i] = i - k;
+ state->losers[i] = i - k;
+ }
+
+ for (i = k - 1; i >= 1; i--)
+ {
+ int l = i * 2;
+ int r = l + 1;
+
+ if (COMPARETUP(state,
+ &state->memtuples[winner[l]],
+ &state->memtuples[winner[r]]) < 0)
+ {
+ winner[i] = winner[l];
+ state->losers[i] = winner[r];
+ }
+ else
+ {
+ winner[i] = winner[r];
+ state->losers[i] = winner[l];
+ }
+ }
+
+ /* set the final winner to state->losers[0] */
+ state->losers[0] = winner[1];
+}
+
+static void
+tuplesort_loser_tree_adjust(Tuplesortstate *state, int start)
+{
+ int winner;
+ int parent;
+
+ Assert(state->losers != NULL);
+ Assert(start >= state->memtupcount && start < state->memtupcount * 2);
+
+ winner = state->losers[start];
+
+ for (parent = start / 2; parent > 0; parent /= 2)
+ {
+ int loser = state->losers[parent];
+
+ /* eof is always the loser */
+ if (loser == LOSER_TREE_EOF)
+ continue;
+
+ if (winner == LOSER_TREE_EOF ||
+ COMPARETUP(state,
+ &state->memtuples[winner],
+ &state->memtuples[loser]) > 0)
+ {
+ state->losers[parent] = winner;
+ winner = loser;
+ }
+ }
+
+ /* set the final winner to state->losers[0] */
+ state->losers[0] = winner;
+}
+
/*
* Function to reverse the sort direction from its current state
*
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index f21ec37da89..8c22eed716b 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -324,6 +324,7 @@ extern PGDLLIMPORT int tcp_user_timeout;
extern PGDLLIMPORT char *role_string;
extern PGDLLIMPORT bool in_hot_standby_guc;
extern PGDLLIMPORT bool trace_sort;
+extern PGDLLIMPORT bool enable_loser_tree;
#ifdef DEBUG_BOUNDED_SORT
extern PGDLLIMPORT bool optimize_bounded_sort;
--
2.34.1