nocfbot-0009-Add-fixed-length-group-absorption-for-RPR.txt
text/plain
Filename: nocfbot-0009-Add-fixed-length-group-absorption-for-RPR.txt
Type: text/plain
Part: 8
Message:
Re: Row pattern recognition
From 5d9be742c6266d8fbff887a0577c37743d429690 Mon Sep 17 00:00:00 2001
From: Henson Choi <assam258@gmail.com>
Date: Sat, 4 Apr 2026 21:47:01 +0900
Subject: [PATCH 09/40] Add fixed-length group absorption for RPR
Extend context absorption to unbounded groups with fixed-length
children (min == max, recursively). Patterns like (A B{2})+ or
((A (B C){2}){2})+ are now absorbable, equivalent to unrolling to
{1,1} VARs at compile time without actually unrolling.
isFixedLengthChildren() recursively verifies min == max for all
children including nested subgroups, extending the existing Case 2
in isUnboundedStart().
Absorption comparison in nfa_states_covered requires states to be at
an ABSORBABLE judgment point, where count-dominance is guaranteed.
The inline advance in nfa_match is generalized to advance bounded
VARs within the absorbable region through END chains to reach the
judgment point.
Fix isAbsorbable propagation in nfa_advance_var and nfa_advance_end
exit paths, where reusing a state object skipped recomputation.
Mark VAR elements in the DFS visited bitmap at nfa_add_state_unique
instead of at nfa_advance_state entry, so that loop-back through ALT
to the same VAR is not incorrectly blocked by cycle detection.
---
src/backend/executor/execRPR.c | 110 ++++++--
src/backend/optimizer/plan/rpr.c | 136 +++++++--
src/test/regress/expected/rpr_base.out | 83 +++++-
src/test/regress/expected/rpr_explain.out | 206 +++++++++++++-
src/test/regress/expected/rpr_nfa.out | 321 ++++++++++++++++++++++
src/test/regress/sql/rpr_base.sql | 36 +++
src/test/regress/sql/rpr_explain.sql | 114 ++++++++
src/test/regress/sql/rpr_nfa.sql | 168 +++++++++++
8 files changed, 1111 insertions(+), 63 deletions(-)
diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c
index 7d0f8fd401c..aec1057e1b2 100644
--- a/src/backend/executor/execRPR.c
+++ b/src/backend/executor/execRPR.c
@@ -1760,6 +1760,10 @@ nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *
RPRNFAState *s;
RPRNFAState *tail = NULL;
+ /* Mark VAR in visited before duplicate check to prevent DFS loops */
+ winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |=
+ ((bitmapword) 1 << BITNUM(state->elemIdx));
+
/* Check for duplicate and find tail */
for (s = ctx->states; s != NULL; s = s->next)
{
@@ -2033,6 +2037,14 @@ nfa_states_covered(RPRPattern *pattern, RPRNFAContext *older, RPRNFAContext *new
elem = &pattern->elements[newerState->elemIdx];
depth = elem->depth;
+ /*
+ * Only compare at absorption judgment points (RPR_ELEM_ABSORBABLE).
+ * Judgment points are where count-dominance guarantees the newer
+ * context's future matches are a subset of the older's.
+ */
+ if (!RPRElemIsAbsorbable(elem))
+ return false;
+
for (olderState = older->states; olderState != NULL; olderState = olderState->next)
{
CHECK_FOR_INTERRUPTS();
@@ -2175,9 +2187,10 @@ nfa_eval_var_match(WindowAggState *winstate, RPRPatternElement *elem,
* - not matched: remove state (exit alternatives already exist from
* previous advance when count >= min was satisfied)
*
- * For simple VARs (min=max=1) followed by END:
- * - Advance to END and update group count before absorb phase
- * - This ensures absorption can compare states by group completion
+ * For VARs that reached max count followed by END:
+ * - Advance through END chain to reach absorption judgment point
+ * - Only deterministic exits (count >= max, max != INF) are handled
+ * - Chains through END elements while count >= max (must-exit path)
*
* Non-VAR elements (ALT, END, FIN) are kept as-is for advance phase.
*/
@@ -2191,9 +2204,9 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched)
RPRNFAState *nextState;
/*
- * Evaluate VAR elements against current row. For simple VARs with END
- * next, advance to END and update group count inline so absorb phase can
- * compare states properly.
+ * Evaluate VAR elements against current row. For VARs that reach max
+ * count with END next, advance through END chain inline so absorb phase
+ * can compare states at judgment points.
*/
for (state = ctx->states; state != NULL; state = nextState)
{
@@ -2223,34 +2236,61 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched)
state->counts[depth] = count;
/*
- * For simple VAR (min=max=1) with END next, advance to END
- * and update group count inline. This keeps state in place,
- * preserving lexical order.
+ * For VAR at max count with END next, advance through END
+ * chain to reach the absorption judgment point. Only
+ * deterministic exits (count >= max, max finite) are handled;
+ * unbounded VARs stay for advance phase.
*/
- if (elem->min == 1 && elem->max == 1 &&
+ if (RPRElemIsAbsorbableBranch(elem) &&
+ !RPRElemIsAbsorbable(elem) &&
+ count >= elem->max &&
RPRElemIsEnd(&elements[elem->next]))
{
RPRPatternElement *endElem = &elements[elem->next];
int endDepth = endElem->depth;
int32 endCount = state->counts[endDepth];
- Assert(count == 1);
-
- /* Increment group count with overflow protection */
+ /* Increment group count */
if (endCount < RPR_COUNT_MAX)
endCount++;
-
- /*
- * END's max can never be exceeded here because
- * nfa_advance_end only loops when count < max, so
- * endCount entering inline advance is at most max-1, and
- * incrementing yields at most max.
- */
Assert(endElem->max == RPR_QUANTITY_INF ||
endCount <= endElem->max);
state->elemIdx = elem->next;
state->counts[endDepth] = endCount;
+
+ /*
+ * Chain through END elements within the absorbable region
+ * (ABSORBABLE_BRANCH) until reaching the judgment point
+ * (ABSORBABLE). Continue only on must-exit path (count
+ * >= max) with END next.
+ */
+ while (RPRElemIsAbsorbableBranch(endElem) &&
+ !RPRElemIsAbsorbable(endElem) &&
+ endCount >= endElem->max &&
+ RPRElemIsEnd(&elements[endElem->next]))
+ {
+ RPRPatternElement *outerEnd = &elements[endElem->next];
+ int outerDepth = outerEnd->depth;
+ int32 outerCount = state->counts[outerDepth];
+
+ /* Reset exited group's count */
+ state->counts[endDepth] = 0;
+
+ /* Increment outer group count */
+ if (outerCount < RPR_COUNT_MAX)
+ outerCount++;
+ Assert(outerEnd->max == RPR_QUANTITY_INF ||
+ outerCount <= outerEnd->max);
+
+ state->elemIdx = endElem->next;
+ state->counts[outerDepth] = outerCount;
+
+ /* Advance to next END in chain */
+ endElem = outerEnd;
+ endDepth = outerDepth;
+ endCount = outerCount;
+ }
}
/* else: stay at VAR for advance phase */
}
@@ -2468,6 +2508,10 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx,
state->elemIdx = elem->next;
nextElem = &elements[state->elemIdx];
+ /* Update isAbsorbable for target element (monotonic) */
+ state->isAbsorbable = state->isAbsorbable &&
+ RPRElemIsAbsorbableBranch(nextElem);
+
/* END->END: increment outer END's count */
if (RPRElemIsEnd(nextElem) && state->counts[nextElem->depth] < RPR_COUNT_MAX)
state->counts[nextElem->depth]++;
@@ -2621,6 +2665,13 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx,
state->elemIdx = elem->next;
nextElem = &elements[state->elemIdx];
+ /*
+ * Update isAbsorbable for target element (monotonic: AND
+ * preserves false)
+ */
+ state->isAbsorbable = state->isAbsorbable &&
+ RPRElemIsAbsorbableBranch(nextElem);
+
/*
* When exiting directly to an outer END, increment its iteration
* count. Simple VARs (min=max=1) handle this via inline advance
@@ -2650,6 +2701,13 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx,
state->elemIdx = elem->next;
nextElem = &elements[state->elemIdx];
+ /*
+ * Update isAbsorbable for target element (monotonic: AND preserves
+ * false)
+ */
+ state->isAbsorbable = state->isAbsorbable &&
+ RPRElemIsAbsorbableBranch(nextElem);
+
/* See comment above: increment outer END count for quantified VARs */
if (RPRElemIsEnd(nextElem))
{
@@ -2686,11 +2744,19 @@ nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx,
nfa_state_free(winstate, state);
return;
}
- winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |=
- ((bitmapword) 1 << BITNUM(state->elemIdx));
elem = &pattern->elements[state->elemIdx];
+ /*
+ * Mark epsilon elements (END, ALT, BEGIN, FIN) in visited to prevent
+ * infinite epsilon cycles. VAR elements are marked later when added to
+ * the state list (nfa_add_state_unique), allowing legitimate loop-back to
+ * the same VAR in a new iteration.
+ */
+ if (!RPRElemIsVar(elem))
+ winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |=
+ ((bitmapword) 1 << BITNUM(state->elemIdx));
+
switch (elem->varId)
{
case RPR_VARID_FIN:
diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c
index 2728a0b9fca..c0e9d134aa9 100644
--- a/src/backend/optimizer/plan/rpr.c
+++ b/src/backend/optimizer/plan/rpr.c
@@ -92,6 +92,8 @@ static bool fillRPRPattern(RPRPatternNode *node, RPRPattern *pat,
static void finalizeRPRPattern(RPRPattern *result);
/* Forward declarations - context absorption */
+static bool isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx,
+ RPRDepth scopeDepth);
static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx);
static void computeAbsorbabilityRecursive(RPRPattern *pattern,
RPRElemIdx startIdx,
@@ -1524,6 +1526,70 @@ finalizeRPRPattern(RPRPattern *result)
*-------------------------------------------------------------------------
*/
+/*
+ * isFixedLengthChildren
+ * Check if all children at scopeDepth have fixed-length quantifiers
+ * (min == max), recursively for nested subgroups.
+ *
+ * A fixed-length group is semantically equivalent to unrolling each child
+ * to {1,1} copies, which is the existing Case 2 already proven correct
+ * for absorption. This check recognizes fixed-length groups at compile
+ * time without actually unrolling them.
+ *
+ * Traverses the flat element array starting at idx. For VAR elements,
+ * checks min == max. For BEGIN elements (nested subgroups), recurses
+ * into the subgroup and also checks the subgroup's END quantifier.
+ * ALT elements are rejected (alternation inside absorbable group is
+ * not supported).
+ *
+ * Returns true if all children are fixed-length, stopping at the END
+ * element at scopeDepth - 1.
+ */
+static bool
+isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, RPRDepth scopeDepth)
+{
+ RPRPatternElement *e = &pattern->elements[idx];
+
+ check_stack_depth();
+
+ while (e->depth == scopeDepth)
+ {
+ if (RPRElemIsVar(e))
+ {
+ if (e->min != e->max)
+ return false;
+ }
+ else if (RPRElemIsBegin(e))
+ {
+ RPRElemIdx childIdx = e->next;
+
+ /* Recurse into subgroup children at scopeDepth + 1 */
+ if (!isFixedLengthChildren(pattern, childIdx, scopeDepth + 1))
+ return false;
+
+ /* Advance past the subgroup to its END element */
+ e = &pattern->elements[e->next];
+ while (e->depth > scopeDepth)
+ e = &pattern->elements[e->next];
+
+ /* e is now the END at scopeDepth; check its quantifier */
+ Assert(RPRElemIsEnd(e) && e->depth == scopeDepth);
+ if (e->min != e->max)
+ return false;
+ }
+ else
+ {
+ /* ALT inside group: not supported for absorption */
+ return false;
+ }
+
+ Assert(e->next != RPR_ELEMIDX_INVALID);
+ e = &pattern->elements[e->next];
+ }
+
+ return true;
+}
+
/*
* isUnboundedStart
* Check if the element at idx starts an unbounded greedy sequence.
@@ -1533,29 +1599,31 @@ finalizeRPRPattern(RPRPattern *result)
* - Greedy (not reluctant)
* - At the start of current scope
*
- * Algorithm:
- * - Traverse elements within current scope (parentDepth to startDepth)
- * - For GROUP: must be unbounded greedy AND contain only simple {1,1} VARs
- * - Sets ABSORBABLE and ABSORBABLE_BRANCH flags on matching elements
- *
* Two cases are handled:
* 1. Simple VAR: A+ B C - A has max=INF, gets both flags
- * 2. Group: (A B)+ C - END has max=INF, all children are {1,1} VARs
- * A,B,END get ABSORBABLE_BRANCH, only END gets ABSORBABLE
+ * 2. Unbounded GROUP with fixed-length children: (A B{2})+ C
+ * All children must have min == max (recursively for nested subgroups).
+ * This is equivalent to unrolling to {1,1} VARs, e.g., (A B B)+ C.
+ * All elements within the group get ABSORBABLE_BRANCH.
+ * Only the unbounded END gets ABSORBABLE (judgment point).
+ * Examples:
+ * (A B{2})+ C - B{2} has min==max, step=3
+ * (A (B C){2} D)+ E - nested {2} subgroup, step=6
+ * ((A (B C){2}){2})+ - doubly nested {2}, step=10
+ * (A ((B C{3}){2} D){2} E)+ F - deep nesting, step=20
*
* Returns false for patterns where absorption cannot work:
* - A B+ (unbounded not at start)
* - A+? B (reluctant quantifier)
* - (A | B)+ (ALT inside group)
- * - (A B+)+ (unbounded element inside group)
- * - ((A B)+ C)+ (nested unbounded groups)
+ * - (A B+)+ (variable-length element inside group)
+ * - (A B{2,5})+ (min != max inside group)
*/
static bool
isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx)
{
RPRPatternElement *elem = &pattern->elements[idx];
RPRDepth startDepth = elem->depth;
- RPRPatternElement *nextElem;
RPRPatternElement *e;
/* Case 1: Simple unbounded VAR at start (greedy only) */
@@ -1568,21 +1636,19 @@ isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx)
}
/*
- * Case 2: Unbounded GROUP - traverse siblings at startDepth and check if
- * they're all simple {1,1} VARs, then check if END at startDepth - 1 is
- * unbounded greedy.
+ * Case 2: Unbounded GROUP with fixed-length children. Each child must
+ * have min == max (recursively for nested subgroups), ensuring a fixed
+ * step size per iteration so that count-dominance holds.
*/
- for (e = elem; e->depth == startDepth; e = nextElem)
- {
- /* Must be simple {1,1} VAR */
- if (!RPRElemIsVar(e) || e->min != 1 || e->max != 1)
- return false;
+ if (!isFixedLengthChildren(pattern, idx, startDepth))
+ return false;
- Assert(e->next != RPR_ELEMIDX_INVALID);
- nextElem = &pattern->elements[e->next];
- }
+ /* Find the END element at startDepth - 1 */
+ e = &pattern->elements[idx];
+ while (e->depth >= startDepth)
+ e = &pattern->elements[e->next];
- /* Now e should be END at startDepth - 1 */
+ /* END must be unbounded greedy */
if (e->depth == startDepth - 1 &&
RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF &&
!RPRElemIsReluctant(e))
@@ -1590,7 +1656,8 @@ isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx)
Assert(e->jump == idx); /* END points back to first child */
/* Set ABSORBABLE_BRANCH on all children, ABSORBABLE on END only */
- for (e = elem; !RPRElemIsEnd(e); e = &pattern->elements[e->next])
+ for (e = elem; !RPRElemIsEnd(e) || e->depth >= startDepth;
+ e = &pattern->elements[e->next])
e->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE;
return true;
@@ -1654,12 +1721,25 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx,
}
else if (RPRElemIsBegin(elem))
{
- /* BEGIN: skip to first child and check that */
- computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable);
-
- /* Mark BEGIN element if contents are absorbable */
- if (*hasAbsorbable)
+ /*
+ * BEGIN: first try to treat this BEGIN's children as an unbounded
+ * group directly (handles nested fixed-length groups like ((A{2}
+ * B{3}){2})+). If that fails, skip to first child and recurse as
+ * before.
+ */
+ if (isUnboundedStart(pattern, elem->next))
+ {
+ *hasAbsorbable = true;
elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ }
+ else
+ {
+ computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable);
+
+ /* Mark BEGIN element if contents are absorbable */
+ if (*hasAbsorbable)
+ elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ }
}
else
{
diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out
index 3168468d0ae..7452cf1b3ab 100644
--- a/src/test/regress/expected/rpr_base.out
+++ b/src/test/regress/expected/rpr_base.out
@@ -3312,7 +3312,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
-------------------------------------------------------------------------------
WindowAgg
Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
- Pattern: (a{2}){2,}
+ Pattern: (a{2}'){2,}"
-> Sort
Sort Key: id
-> Seq Scan on rpr_plan
@@ -4095,6 +4095,87 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
-> Seq Scan on rpr_plan
(6 rows)
+-- Fixed-length group absorbable: (A{2} B{3})+ -> (a{2}' b{3}'){2,}"
+-- All children have min == max, equivalent to unrolling to {1,1}
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A{2} B{3})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ WindowAgg
+ Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a{2}' b{3}')+"
+ -> Sort
+ Sort Key: id
+ -> Seq Scan on rpr_plan
+(6 rows)
+
+-- Nested fixed-length group: (A (B C){2} D)+ -> absorbable
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A (B C){2} D)+)
+ DEFINE A AS val <= 20, B AS val <= 40, C AS val <= 60, D AS val > 60);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ WindowAgg
+ Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a' (b' c'){2}' d')+"
+ -> Sort
+ Sort Key: id
+ -> Seq Scan on rpr_plan
+(6 rows)
+
+-- Nested fixed-length with inner quantifier: ((A{2} B{3}){2})+ -> absorbable
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN (((A{2} B{3}){2})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ WindowAgg
+ Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: ((a{2}' b{3}'){2}')+"
+ -> Sort
+ Sort Key: id
+ -> Seq Scan on rpr_plan
+(6 rows)
+
+-- Non-absorbable fixed-length: (A B{2,5})+ -> no markers (min != max)
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B{2,5})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ WindowAgg
+ Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a b{2,5})+
+ -> Sort
+ Sort Key: id
+ -> Seq Scan on rpr_plan
+(6 rows)
+
+-- Non-absorbable fixed-length: (A B?)+ -> no markers (min != max)
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B?)+)
+ DEFINE A AS val <= 50, B AS val > 50);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ WindowAgg
+ Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a b?)+
+ -> Sort
+ Sort Key: id
+ -> Seq Scan on rpr_plan
+(6 rows)
+
-- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers)
EXPLAIN (COSTS OFF)
SELECT COUNT(*) OVER w FROM rpr_plan
diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out
index 79cbc246039..560f21f44c2 100644
--- a/src/test/regress/expected/rpr_explain.out
+++ b/src/test/regress/expected/rpr_explain.out
@@ -462,10 +462,10 @@ WINDOW w AS (
Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
Pattern: (a' b')+"
Storage: Memory Maximum Storage: NkB
- NFA States: 3 peak, 91 total, 0 merged
- NFA Contexts: 2 peak, 61 total, 0 pruned
+ NFA States: 4 peak, 91 total, 0 merged
+ NFA Contexts: 3 peak, 61 total, 0 pruned
NFA: 1 matched (len 60/60/60.0), 0 mismatched
- NFA: 29 absorbed (len 1/1/1.0), 30 skipped (len 1/1/1.0)
+ NFA: 29 absorbed (len 2/2/2.0), 30 skipped (len 1/1/1.0)
-> Function Scan on generate_series s (actual rows=60.00 loops=1)
(9 rows)
@@ -904,6 +904,188 @@ WINDOW w AS (
-> Function Scan on generate_series s (actual rows=100.00 loops=1)
(9 rows)
+-- Fixed-length group absorption: (A B B)+ C
+-- B B merged to B{2}; absorbable with fixed-length check
+-- step_size=3 (A + B + B); v % 7 cycle gives 2 iterations per match
+CREATE VIEW rpr_ev20b AS
+SELECT count(*) OVER w
+FROM generate_series(1, 70) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+ C)
+ DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20b'), E'\n')) AS line WHERE line ~ 'PATTERN';
+ line
+-------------------------
+ PATTERN ((a b b)+ c)
+(1 row)
+
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 70) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+ C)
+ DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0
+);');
+ rpr_explain_filter
+----------------------------------------------------------------------
+ WindowAgg (actual rows=70.00 loops=1)
+ Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a' b{2}')+" c
+ Storage: Memory Maximum Storage: NkB
+ NFA States: 3 peak, 91 total, 0 merged
+ NFA Contexts: 4 peak, 71 total, 40 pruned
+ NFA: 10 matched (len 7/7/7.0), 0 mismatched
+ NFA: 10 absorbed (len 3/3/3.0), 10 skipped (len 1/1/1.0)
+ -> Function Scan on generate_series s (actual rows=70.00 loops=1)
+(9 rows)
+
+-- Nested fixed-length group absorption: (A (B C){2} D)+ E
+-- step_size = 1 + (1+1)*2 + 1 = 6; v % 13 cycle gives 2 iterations + E
+CREATE VIEW rpr_ev20c AS
+SELECT count(*) OVER w
+FROM generate_series(1, 65) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10),
+ C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12),
+ E AS v % 13 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20c'), E'\n')) AS line WHERE line ~ 'PATTERN';
+ line
+--------------------------------
+ PATTERN ((a (b c){2} d)+ e)
+(1 row)
+
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 65) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10),
+ C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12),
+ E AS v % 13 = 0
+);');
+ rpr_explain_filter
+----------------------------------------------------------------------
+ WindowAgg (actual rows=65.00 loops=1)
+ Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a' (b' c'){2}' d')+" e
+ Storage: Memory Maximum Storage: NkB
+ NFA States: 3 peak, 76 total, 0 merged
+ NFA Contexts: 4 peak, 66 total, 50 pruned
+ NFA: 5 matched (len 13/13/13.0), 0 mismatched
+ NFA: 5 absorbed (len 6/6/6.0), 5 skipped (len 1/1/1.0)
+ -> Function Scan on generate_series s (actual rows=65.00 loops=1)
+(9 rows)
+
+-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F
+-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; v % 41 cycle gives 2 iterations + F
+CREATE VIEW rpr_ev20d AS
+SELECT count(*) OVER w
+FROM generate_series(1, 82) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE A AS v % 41 IN (1, 21),
+ B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35),
+ C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38),
+ D AS v % 41 IN (10, 19, 30, 39),
+ E AS v % 41 IN (20, 40),
+ F AS v % 41 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20d'), E'\n')) AS line WHERE line ~ 'PATTERN';
+ line
+-------------------------------------------
+ PATTERN ((a ((b c c c){2} d){2} e)+ f)
+(1 row)
+
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 82) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE A AS v % 41 IN (1, 21),
+ B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35),
+ C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38),
+ D AS v % 41 IN (10, 19, 30, 39),
+ E AS v % 41 IN (20, 40),
+ F AS v % 41 = 0
+);');
+ rpr_explain_filter
+----------------------------------------------------------------------
+ WindowAgg (actual rows=82.00 loops=1)
+ Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: (a' ((b' c{3}'){2}' d'){2}' e')+" f
+ Storage: Memory Maximum Storage: NkB
+ NFA States: 3 peak, 87 total, 0 merged
+ NFA Contexts: 4 peak, 83 total, 76 pruned
+ NFA: 2 matched (len 41/41/41.0), 0 mismatched
+ NFA: 2 absorbed (len 20/20/20.0), 2 skipped (len 1/1/1.0)
+ -> Function Scan on generate_series s (actual rows=82.00 loops=1)
+(9 rows)
+
+-- 3-level END chain absorption: ((A (B C){2}){2})+
+-- step_size = (1 + (1+1)*2) * 2 = 10; v % 21 cycle gives 2 iterations
+-- END chain: END(BC{2}) -> END(A..{2}) -> END(+, ABSORBABLE)
+CREATE VIEW rpr_ev20e AS
+SELECT count(*) OVER w
+FROM generate_series(1, 42) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE A AS v % 21 IN (1, 6, 11, 16),
+ B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19),
+ C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20)
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20e'), E'\n')) AS line WHERE line ~ 'PATTERN';
+ line
+---------------------------------
+ PATTERN (((a (b c){2}){2})+)
+(1 row)
+
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 42) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE A AS v % 21 IN (1, 6, 11, 16),
+ B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19),
+ C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20)
+);');
+ rpr_explain_filter
+----------------------------------------------------------------------
+ WindowAgg (actual rows=42.00 loops=1)
+ Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ Pattern: ((a' (b' c'){2}'){2}')+"
+ Storage: Memory Maximum Storage: NkB
+ NFA States: 5 peak, 47 total, 0 merged
+ NFA Contexts: 5 peak, 43 total, 30 pruned
+ NFA: 2 matched (len 20/20/20.0), 0 mismatched
+ NFA: 2 absorbed (len 10/10/10.0), 8 skipped (len 1/5/3.0)
+ -> Function Scan on generate_series s (actual rows=42.00 loops=1)
+(9 rows)
+
-- ============================================================
-- Match Length Statistics Tests
-- ============================================================
@@ -1894,10 +2076,10 @@ WINDOW w AS (
Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
Pattern: (a' b' c')+"
Storage: Memory Maximum Storage: NkB
- NFA States: 3 peak, 81 total, 0 merged
- NFA Contexts: 3 peak, 61 total, 20 pruned
+ NFA States: 4 peak, 81 total, 0 merged
+ NFA Contexts: 4 peak, 61 total, 20 pruned
NFA: 1 matched (len 60/60/60.0), 0 mismatched
- NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0)
+ NFA: 19 absorbed (len 3/3/3.0), 20 skipped (len 1/1/1.0)
-> Function Scan on generate_series s (actual rows=60.00 loops=1)
(9 rows)
@@ -2461,7 +2643,7 @@ WINDOW w AS (
NFA States: 4 peak, 102 total, 0 merged
NFA Contexts: 2 peak, 41 total, 10 pruned
NFA: 10 matched (len 3/3/3.0), 0 mismatched
- NFA: 20 absorbed (len 1/1/1.0), 0 skipped
+ NFA: 10 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0)
-> Function Scan on generate_series s (actual rows=40.00 loops=1)
(9 rows)
@@ -3158,10 +3340,10 @@ WINDOW w AS (
Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
Pattern: (a' b')+"
Storage: Memory Maximum Storage: NkB
- NFA States: 3 peak, 61 total, 0 merged
- NFA Contexts: 2 peak, 41 total, 0 pruned
+ NFA States: 4 peak, 61 total, 0 merged
+ NFA Contexts: 3 peak, 41 total, 0 pruned
NFA: 1 matched (len 40/40/40.0), 0 mismatched
- NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0)
+ NFA: 19 absorbed (len 2/2/2.0), 20 skipped (len 1/1/1.0)
-> Function Scan on generate_series s (actual rows=40.00 loops=1)
(9 rows)
@@ -3234,12 +3416,12 @@ WINDOW w AS (
----------------------------------------------------------------------
WindowAgg (actual rows=60.00 loops=1)
Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
- Pattern: ((a b){2})+
+ Pattern: ((a' b'){2}')+"
Storage: Memory Maximum Storage: NkB
NFA States: 5 peak, 76 total, 0 merged
NFA Contexts: 4 peak, 61 total, 15 pruned
NFA: 1 matched (len 60/60/60.0), 0 mismatched
- NFA: 0 absorbed, 44 skipped (len 1/4/2.3)
+ NFA: 14 absorbed (len 4/4/4.0), 30 skipped (len 1/2/1.5)
-> Function Scan on generate_series s (actual rows=60.00 loops=1)
(9 rows)
diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out
index 7b5a17fb671..250f7f131b1 100644
--- a/src/test/regress/expected/rpr_nfa.out
+++ b/src/test/regress/expected/rpr_nfa.out
@@ -447,6 +447,327 @@ WINDOW w AS (
7 | {X} | |
(7 rows)
+-- Fixed-length group absorption: (A B{2})+ C
+-- B{2} has min == max, equivalent to unrolling to (A B B)+ C
+WITH test_absorb_fixedlen AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['B']),
+ (4, ARRAY['A']),
+ (5, ARRAY['B']),
+ (6, ARRAY['B']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['B']),
+ (10, ARRAY['C']),
+ (11, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_fixedlen
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B{2})+ C)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags)
+);
+ id | flags | match_start | match_end
+----+-------+-------------+-----------
+ 1 | {A} | 1 | 10
+ 2 | {B} | |
+ 3 | {B} | |
+ 4 | {A} | |
+ 5 | {B} | |
+ 6 | {B} | |
+ 7 | {A} | |
+ 8 | {B} | |
+ 9 | {B} | |
+ 10 | {C} | |
+ 11 | {X} | |
+(11 rows)
+
+-- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+
+-- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check
+WITH test_absorb_consecutive AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['B']),
+ (4, ARRAY['A']),
+ (5, ARRAY['B']),
+ (6, ARRAY['B']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['B']),
+ (10, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_consecutive
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags)
+);
+ id | flags | match_start | match_end
+----+-------+-------------+-----------
+ 1 | {A} | 1 | 9
+ 2 | {B} | |
+ 3 | {B} | |
+ 4 | {A} | |
+ 5 | {B} | |
+ 6 | {B} | |
+ 7 | {A} | |
+ 8 | {B} | |
+ 9 | {B} | |
+ 10 | {X} | |
+(10 rows)
+
+-- Nested fixed-length group absorption: (A (B C){2} D)+ E
+-- Inner group {2} has min == max; absorbable via recursive check
+-- step_size = 1 + (1+1)*2 + 1 = 6
+WITH test_absorb_nested_fixedlen AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['C']),
+ (4, ARRAY['B']),
+ (5, ARRAY['C']),
+ (6, ARRAY['D']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['C']),
+ (10, ARRAY['B']),
+ (11, ARRAY['C']),
+ (12, ARRAY['D']),
+ (13, ARRAY['E']),
+ (14, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_nested_fixedlen
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags),
+ D AS 'D' = ANY(flags),
+ E AS 'E' = ANY(flags)
+);
+ id | flags | match_start | match_end
+----+-------+-------------+-----------
+ 1 | {A} | 1 | 13
+ 2 | {B} | |
+ 3 | {C} | |
+ 4 | {B} | |
+ 5 | {C} | |
+ 6 | {D} | |
+ 7 | {A} | |
+ 8 | {B} | |
+ 9 | {C} | |
+ 10 | {B} | |
+ 11 | {C} | |
+ 12 | {D} | |
+ 13 | {E} | |
+ 14 | {X} | |
+(14 rows)
+
+-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F
+-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; 2 iterations + F = 41 rows
+WITH test_absorb_doubly_nested AS (
+ SELECT v AS id, ARRAY[
+ CASE
+ WHEN v % 41 IN (1, 21) THEN 'A'
+ WHEN v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35) THEN 'B'
+ WHEN v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38) THEN 'C'
+ WHEN v % 41 IN (10, 19, 30, 39) THEN 'D'
+ WHEN v % 41 IN (20, 40) THEN 'E'
+ WHEN v % 41 = 0 THEN 'F'
+ ELSE 'X'
+ END
+ ] AS flags
+ FROM generate_series(1, 82) AS s(v)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_doubly_nested
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags),
+ D AS 'D' = ANY(flags),
+ E AS 'E' = ANY(flags),
+ F AS 'F' = ANY(flags)
+);
+ id | flags | match_start | match_end
+----+-------+-------------+-----------
+ 1 | {A} | 1 | 41
+ 2 | {B} | |
+ 3 | {C} | |
+ 4 | {C} | |
+ 5 | {C} | |
+ 6 | {B} | |
+ 7 | {C} | |
+ 8 | {C} | |
+ 9 | {C} | |
+ 10 | {D} | |
+ 11 | {B} | |
+ 12 | {C} | |
+ 13 | {C} | |
+ 14 | {C} | |
+ 15 | {B} | |
+ 16 | {C} | |
+ 17 | {C} | |
+ 18 | {C} | |
+ 19 | {D} | |
+ 20 | {E} | |
+ 21 | {A} | |
+ 22 | {B} | |
+ 23 | {C} | |
+ 24 | {C} | |
+ 25 | {C} | |
+ 26 | {B} | |
+ 27 | {C} | |
+ 28 | {C} | |
+ 29 | {C} | |
+ 30 | {D} | |
+ 31 | {B} | |
+ 32 | {C} | |
+ 33 | {C} | |
+ 34 | {C} | |
+ 35 | {B} | |
+ 36 | {C} | |
+ 37 | {C} | |
+ 38 | {C} | |
+ 39 | {D} | |
+ 40 | {E} | |
+ 41 | {F} | |
+ 42 | {A} | 42 | 82
+ 43 | {B} | |
+ 44 | {C} | |
+ 45 | {C} | |
+ 46 | {C} | |
+ 47 | {B} | |
+ 48 | {C} | |
+ 49 | {C} | |
+ 50 | {C} | |
+ 51 | {D} | |
+ 52 | {B} | |
+ 53 | {C} | |
+ 54 | {C} | |
+ 55 | {C} | |
+ 56 | {B} | |
+ 57 | {C} | |
+ 58 | {C} | |
+ 59 | {C} | |
+ 60 | {D} | |
+ 61 | {E} | |
+ 62 | {A} | |
+ 63 | {B} | |
+ 64 | {C} | |
+ 65 | {C} | |
+ 66 | {C} | |
+ 67 | {B} | |
+ 68 | {C} | |
+ 69 | {C} | |
+ 70 | {C} | |
+ 71 | {D} | |
+ 72 | {B} | |
+ 73 | {C} | |
+ 74 | {C} | |
+ 75 | {C} | |
+ 76 | {B} | |
+ 77 | {C} | |
+ 78 | {C} | |
+ 79 | {C} | |
+ 80 | {D} | |
+ 81 | {E} | |
+ 82 | {F} | |
+(82 rows)
+
+-- 3-level END chain: ((A (B C){2}){2})+
+-- Tests END(BC{2}) -> END(A..{2}) -> END(+) chaining
+-- 2 iterations of +, each 10 rows: (A B C B C)(A B C B C)
+WITH test_absorb_3level_end AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']), -- 1st + iter, 1st {2}, A
+ (2, ARRAY['B']),
+ (3, ARRAY['C']),
+ (4, ARRAY['B']),
+ (5, ARRAY['C']), -- 1st (BC){2} done
+ (6, ARRAY['A']), -- 1st + iter, 2nd {2}, A
+ (7, ARRAY['B']),
+ (8, ARRAY['C']),
+ (9, ARRAY['B']),
+ (10, ARRAY['C']), -- 2nd (BC){2} done, 1st {2} done, 1st + iter done
+ (11, ARRAY['A']), -- 2nd + iter, 1st {2}, A
+ (12, ARRAY['B']),
+ (13, ARRAY['C']),
+ (14, ARRAY['B']),
+ (15, ARRAY['C']),
+ (16, ARRAY['A']), -- 2nd + iter, 2nd {2}, A
+ (17, ARRAY['B']),
+ (18, ARRAY['C']),
+ (19, ARRAY['B']),
+ (20, ARRAY['C']), -- 2nd + iter done
+ (21, ARRAY['X']) -- no match, + ends
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_3level_end
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags)
+);
+ id | flags | match_start | match_end
+----+-------+-------------+-----------
+ 1 | {A} | 1 | 20
+ 2 | {B} | |
+ 3 | {C} | |
+ 4 | {B} | |
+ 5 | {C} | |
+ 6 | {A} | |
+ 7 | {B} | |
+ 8 | {C} | |
+ 9 | {B} | |
+ 10 | {C} | |
+ 11 | {A} | |
+ 12 | {B} | |
+ 13 | {C} | |
+ 14 | {B} | |
+ 15 | {C} | |
+ 16 | {A} | |
+ 17 | {B} | |
+ 18 | {C} | |
+ 19 | {B} | |
+ 20 | {C} | |
+ 21 | {X} | |
+(21 rows)
+
-- Multiple unbounded: A+ B+ (first element unbounded enables absorption)
WITH test_multi_unbounded AS (
SELECT * FROM (VALUES
diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql
index cf6c062ae85..8c23c7598a3 100644
--- a/src/test/regress/sql/rpr_base.sql
+++ b/src/test/regress/sql/rpr_base.sql
@@ -2600,6 +2600,42 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ B | A B)*)
DEFINE A AS val <= 50, B AS val > 50);
+-- Fixed-length group absorbable: (A{2} B{3})+ -> (a{2}' b{3}'){2,}"
+-- All children have min == max, equivalent to unrolling to {1,1}
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A{2} B{3})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+
+-- Nested fixed-length group: (A (B C){2} D)+ -> absorbable
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A (B C){2} D)+)
+ DEFINE A AS val <= 20, B AS val <= 40, C AS val <= 60, D AS val > 60);
+
+-- Nested fixed-length with inner quantifier: ((A{2} B{3}){2})+ -> absorbable
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN (((A{2} B{3}){2})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+
+-- Non-absorbable fixed-length: (A B{2,5})+ -> no markers (min != max)
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B{2,5})+)
+ DEFINE A AS val <= 50, B AS val > 50);
+
+-- Non-absorbable fixed-length: (A B?)+ -> no markers (min != max)
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) OVER w FROM rpr_plan
+WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B?)+)
+ DEFINE A AS val <= 50, B AS val > 50);
+
-- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers)
EXPLAIN (COSTS OFF)
SELECT COUNT(*) OVER w FROM rpr_plan
diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql
index 93e06b0cbdf..237f0366631 100644
--- a/src/test/regress/sql/rpr_explain.sql
+++ b/src/test/regress/sql/rpr_explain.sql
@@ -578,6 +578,120 @@ WINDOW w AS (
DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0
);');
+-- Fixed-length group absorption: (A B B)+ C
+-- B B merged to B{2}; absorbable with fixed-length check
+-- step_size=3 (A + B + B); v % 7 cycle gives 2 iterations per match
+CREATE VIEW rpr_ev20b AS
+SELECT count(*) OVER w
+FROM generate_series(1, 70) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+ C)
+ DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20b'), E'\n')) AS line WHERE line ~ 'PATTERN';
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 70) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+ C)
+ DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0
+);');
+
+-- Nested fixed-length group absorption: (A (B C){2} D)+ E
+-- step_size = 1 + (1+1)*2 + 1 = 6; v % 13 cycle gives 2 iterations + E
+CREATE VIEW rpr_ev20c AS
+SELECT count(*) OVER w
+FROM generate_series(1, 65) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10),
+ C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12),
+ E AS v % 13 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20c'), E'\n')) AS line WHERE line ~ 'PATTERN';
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 65) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10),
+ C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12),
+ E AS v % 13 = 0
+);');
+
+-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F
+-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; v % 41 cycle gives 2 iterations + F
+CREATE VIEW rpr_ev20d AS
+SELECT count(*) OVER w
+FROM generate_series(1, 82) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE A AS v % 41 IN (1, 21),
+ B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35),
+ C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38),
+ D AS v % 41 IN (10, 19, 30, 39),
+ E AS v % 41 IN (20, 40),
+ F AS v % 41 = 0
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20d'), E'\n')) AS line WHERE line ~ 'PATTERN';
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 82) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE A AS v % 41 IN (1, 21),
+ B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35),
+ C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38),
+ D AS v % 41 IN (10, 19, 30, 39),
+ E AS v % 41 IN (20, 40),
+ F AS v % 41 = 0
+);');
+
+-- 3-level END chain absorption: ((A (B C){2}){2})+
+-- step_size = (1 + (1+1)*2) * 2 = 10; v % 21 cycle gives 2 iterations
+-- END chain: END(BC{2}) -> END(A..{2}) -> END(+, ABSORBABLE)
+CREATE VIEW rpr_ev20e AS
+SELECT count(*) OVER w
+FROM generate_series(1, 42) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE A AS v % 21 IN (1, 6, 11, 16),
+ B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19),
+ C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20)
+);
+SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20e'), E'\n')) AS line WHERE line ~ 'PATTERN';
+SELECT rpr_explain_filter('
+EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF)
+SELECT count(*) OVER w
+FROM generate_series(1, 42) AS s(v)
+WINDOW w AS (
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE A AS v % 21 IN (1, 6, 11, 16),
+ B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19),
+ C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20)
+);');
+
-- ============================================================
-- Match Length Statistics Tests
-- ============================================================
diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql
index 5edcb3357e6..aaa7b44f789 100644
--- a/src/test/regress/sql/rpr_nfa.sql
+++ b/src/test/regress/sql/rpr_nfa.sql
@@ -346,6 +346,174 @@ WINDOW w AS (
B AS 'B' = ANY(flags)
);
+-- Fixed-length group absorption: (A B{2})+ C
+-- B{2} has min == max, equivalent to unrolling to (A B B)+ C
+WITH test_absorb_fixedlen AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['B']),
+ (4, ARRAY['A']),
+ (5, ARRAY['B']),
+ (6, ARRAY['B']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['B']),
+ (10, ARRAY['C']),
+ (11, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_fixedlen
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B{2})+ C)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags)
+);
+
+-- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+
+-- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check
+WITH test_absorb_consecutive AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['B']),
+ (4, ARRAY['A']),
+ (5, ARRAY['B']),
+ (6, ARRAY['B']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['B']),
+ (10, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_consecutive
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A B B)+)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags)
+);
+
+-- Nested fixed-length group absorption: (A (B C){2} D)+ E
+-- Inner group {2} has min == max; absorbable via recursive check
+-- step_size = 1 + (1+1)*2 + 1 = 6
+WITH test_absorb_nested_fixedlen AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']),
+ (2, ARRAY['B']),
+ (3, ARRAY['C']),
+ (4, ARRAY['B']),
+ (5, ARRAY['C']),
+ (6, ARRAY['D']),
+ (7, ARRAY['A']),
+ (8, ARRAY['B']),
+ (9, ARRAY['C']),
+ (10, ARRAY['B']),
+ (11, ARRAY['C']),
+ (12, ARRAY['D']),
+ (13, ARRAY['E']),
+ (14, ARRAY['X'])
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_nested_fixedlen
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A (B C){2} D)+ E)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags),
+ D AS 'D' = ANY(flags),
+ E AS 'E' = ANY(flags)
+);
+
+-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F
+-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; 2 iterations + F = 41 rows
+WITH test_absorb_doubly_nested AS (
+ SELECT v AS id, ARRAY[
+ CASE
+ WHEN v % 41 IN (1, 21) THEN 'A'
+ WHEN v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35) THEN 'B'
+ WHEN v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18,
+ 23,24,25, 27,28,29, 32,33,34, 36,37,38) THEN 'C'
+ WHEN v % 41 IN (10, 19, 30, 39) THEN 'D'
+ WHEN v % 41 IN (20, 40) THEN 'E'
+ WHEN v % 41 = 0 THEN 'F'
+ ELSE 'X'
+ END
+ ] AS flags
+ FROM generate_series(1, 82) AS s(v)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_doubly_nested
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN ((A ((B C C C){2} D){2} E)+ F)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags),
+ D AS 'D' = ANY(flags),
+ E AS 'E' = ANY(flags),
+ F AS 'F' = ANY(flags)
+);
+
+-- 3-level END chain: ((A (B C){2}){2})+
+-- Tests END(BC{2}) -> END(A..{2}) -> END(+) chaining
+-- 2 iterations of +, each 10 rows: (A B C B C)(A B C B C)
+WITH test_absorb_3level_end AS (
+ SELECT * FROM (VALUES
+ (1, ARRAY['A']), -- 1st + iter, 1st {2}, A
+ (2, ARRAY['B']),
+ (3, ARRAY['C']),
+ (4, ARRAY['B']),
+ (5, ARRAY['C']), -- 1st (BC){2} done
+ (6, ARRAY['A']), -- 1st + iter, 2nd {2}, A
+ (7, ARRAY['B']),
+ (8, ARRAY['C']),
+ (9, ARRAY['B']),
+ (10, ARRAY['C']), -- 2nd (BC){2} done, 1st {2} done, 1st + iter done
+ (11, ARRAY['A']), -- 2nd + iter, 1st {2}, A
+ (12, ARRAY['B']),
+ (13, ARRAY['C']),
+ (14, ARRAY['B']),
+ (15, ARRAY['C']),
+ (16, ARRAY['A']), -- 2nd + iter, 2nd {2}, A
+ (17, ARRAY['B']),
+ (18, ARRAY['C']),
+ (19, ARRAY['B']),
+ (20, ARRAY['C']), -- 2nd + iter done
+ (21, ARRAY['X']) -- no match, + ends
+ ) AS t(id, flags)
+)
+SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end
+FROM test_absorb_3level_end
+WINDOW w AS (
+ ORDER BY id
+ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (((A (B C){2}){2})+)
+ DEFINE
+ A AS 'A' = ANY(flags),
+ B AS 'B' = ANY(flags),
+ C AS 'C' = ANY(flags)
+);
+
-- Multiple unbounded: A+ B+ (first element unbounded enables absorption)
WITH test_multi_unbounded AS (
SELECT * FROM (VALUES
--
2.50.1 (Apple Git-155)