v47-0004-Row-pattern-recognition-patch-planner.patch
application/octet-stream
Filename: v47-0004-Row-pattern-recognition-patch-planner.patch
Type: application/octet-stream
Part: 3
Message:
Re: Row pattern recognition
From 1a9f209676a9f2095bce03411380296b59729b10 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Sat, 2 May 2026 13:40:29 +0900
Subject: [PATCH v47 4/9] Row pattern recognition patch (planner).
---
src/backend/nodes/nodeFuncs.c | 42 +
src/backend/optimizer/path/allpaths.c | 79 +
src/backend/optimizer/path/costsize.c | 29 +
src/backend/optimizer/plan/Makefile | 1 +
src/backend/optimizer/plan/createplan.c | 417 ++++-
src/backend/optimizer/plan/meson.build | 1 +
src/backend/optimizer/plan/planner.c | 17 +-
src/backend/optimizer/plan/rpr.c | 1993 +++++++++++++++++++++
src/backend/optimizer/plan/setrefs.c | 29 +-
src/backend/optimizer/prep/prepjointree.c | 9 +
src/include/nodes/plannodes.h | 103 ++
src/include/optimizer/rpr.h | 65 +
12 files changed, 2781 insertions(+), 4 deletions(-)
create mode 100644 src/backend/optimizer/plan/rpr.c
create mode 100644 src/include/optimizer/rpr.h
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index f968ac68314..651d049cfa2 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -69,6 +69,9 @@ exprType(const Node *expr)
case T_MergeSupportFunc:
type = ((const MergeSupportFunc *) expr)->msftype;
break;
+ case T_RPRNavExpr:
+ type = ((const RPRNavExpr *) expr)->resulttype;
+ break;
case T_SubscriptingRef:
type = ((const SubscriptingRef *) expr)->refrestype;
break;
@@ -853,6 +856,9 @@ exprCollation(const Node *expr)
case T_MergeSupportFunc:
coll = ((const MergeSupportFunc *) expr)->msfcollid;
break;
+ case T_RPRNavExpr:
+ coll = ((const RPRNavExpr *) expr)->resultcollid;
+ break;
case T_SubscriptingRef:
coll = ((const SubscriptingRef *) expr)->refcollid;
break;
@@ -1162,6 +1168,9 @@ exprSetCollation(Node *expr, Oid collation)
case T_MergeSupportFunc:
((MergeSupportFunc *) expr)->msfcollid = collation;
break;
+ case T_RPRNavExpr:
+ ((RPRNavExpr *) expr)->resultcollid = collation;
+ break;
case T_SubscriptingRef:
((SubscriptingRef *) expr)->refcollid = collation;
break;
@@ -1437,6 +1446,9 @@ exprLocation(const Node *expr)
case T_MergeSupportFunc:
loc = ((const MergeSupportFunc *) expr)->location;
break;
+ case T_RPRNavExpr:
+ loc = ((const RPRNavExpr *) expr)->location;
+ break;
case T_SubscriptingRef:
/* just use container argument's location */
loc = exprLocation((Node *) ((const SubscriptingRef *) expr)->refexpr);
@@ -2198,6 +2210,18 @@ expression_tree_walker_impl(Node *node,
return true;
}
break;
+ case T_RPRNavExpr:
+ {
+ RPRNavExpr *expr = (RPRNavExpr *) node;
+
+ if (WALK(expr->arg))
+ return true;
+ if (expr->offset_arg && WALK(expr->offset_arg))
+ return true;
+ if (expr->compound_offset_arg && WALK(expr->compound_offset_arg))
+ return true;
+ }
+ break;
case T_SubscriptingRef:
{
SubscriptingRef *sbsref = (SubscriptingRef *) node;
@@ -2431,6 +2455,8 @@ expression_tree_walker_impl(Node *node,
return true;
if (WALK(wc->endOffset))
return true;
+ if (WALK(wc->defineClause))
+ return true;
}
break;
case T_CTECycleClause:
@@ -2823,6 +2849,8 @@ query_tree_walker_impl(Query *query,
return true;
if (WALK(wc->endOffset))
return true;
+ if (WALK(wc->defineClause))
+ return true;
}
}
@@ -3143,6 +3171,18 @@ expression_tree_mutator_impl(Node *node,
return (Node *) newnode;
}
break;
+ case T_RPRNavExpr:
+ {
+ RPRNavExpr *nav = (RPRNavExpr *) node;
+ RPRNavExpr *newnode;
+
+ FLATCOPY(newnode, nav, RPRNavExpr);
+ MUTATE(newnode->arg, nav->arg, Expr *);
+ MUTATE(newnode->offset_arg, nav->offset_arg, Expr *);
+ MUTATE(newnode->compound_offset_arg, nav->compound_offset_arg, Expr *);
+ return (Node *) newnode;
+ }
+ break;
case T_SubscriptingRef:
{
SubscriptingRef *sbsref = (SubscriptingRef *) node;
@@ -3565,6 +3605,7 @@ expression_tree_mutator_impl(Node *node,
MUTATE(newnode->orderClause, wc->orderClause, List *);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
MUTATE(newnode->endOffset, wc->endOffset, Node *);
+ MUTATE(newnode->defineClause, wc->defineClause, List *);
return (Node *) newnode;
}
break;
@@ -3914,6 +3955,7 @@ query_tree_mutator_impl(Query *query,
FLATCOPY(newnode, wc, WindowClause);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
MUTATE(newnode->endOffset, wc->endOffset, Node *);
+ MUTATE(newnode->defineClause, wc->defineClause, List *);
resultlist = lappend(resultlist, (Node *) newnode);
}
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 61093f222a1..470029e42e0 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2463,6 +2463,17 @@ find_window_run_conditions(Query *subquery, AttrNumber attno,
wclause = (WindowClause *) list_nth(subquery->windowClause,
wfunc->winref - 1);
+ /*
+ * If a DEFINE clause exists, we cannot push down a run condition. In the
+ * case, a window partition (or frame) is divided into multiple reduced
+ * frames and each frame should be evaluated to the end of the partition
+ * (or full frame end). This means we cannot apply the run condition
+ * optimization because it stops evaluation window functions in certain
+ * cases.
+ */
+ if (wclause->defineClause != NIL)
+ return false;
+
req.type = T_SupportRequestWFuncMonotonic;
req.window_func = wfunc;
req.window_clause = wclause;
@@ -4739,6 +4750,74 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
if (contain_volatile_functions(texpr))
continue;
+ /*
+ * If any RPR (Row Pattern Recognition) window clause references this
+ * column in its DEFINE clause, don't remove it. The DEFINE
+ * expression needs these columns in the tuplestore slot for pattern
+ * matching evaluation, even if the outer query doesn't reference
+ * them.
+ */
+ if (IsA(texpr, Var))
+ {
+ Var *var = (Var *) texpr;
+ ListCell *wlc;
+ bool needed_by_define = false;
+
+ foreach(wlc, subquery->windowClause)
+ {
+ WindowClause *wc = lfirst_node(WindowClause, wlc);
+
+ if (wc->defineClause != NIL)
+ {
+ List *vars = pull_var_clause((Node *) wc->defineClause, 0);
+ ListCell *vlc;
+
+ foreach(vlc, vars)
+ {
+ Var *dvar = (Var *) lfirst(vlc);
+
+ if (dvar->varattno == var->varattno)
+ {
+ needed_by_define = true;
+ break;
+ }
+ }
+ list_free(vars);
+ if (needed_by_define)
+ break;
+ }
+ }
+ if (needed_by_define)
+ continue;
+ }
+
+ /*
+ * If it's a window function referencing a window clause with RPR,
+ * don't remove it. Even when the window function result is unused by
+ * the outer query, the RPR pattern matching (frame reduction via
+ * DEFINE/PATTERN) must still execute. Replacing this with NULL would
+ * leave no active window functions for the WindowClause, causing the
+ * planner to omit the WindowAgg node entirely.
+ */
+ if (IsA(texpr, WindowFunc))
+ {
+ WindowFunc *wfunc = (WindowFunc *) texpr;
+ ListCell *wlc;
+
+ foreach(wlc, subquery->windowClause)
+ {
+ WindowClause *wc = lfirst_node(WindowClause, wlc);
+
+ if (wc->winref == wfunc->winref &&
+ wc->defineClause != NIL)
+ {
+ break;
+ }
+ }
+ if (wlc != NULL)
+ continue;
+ }
+
/*
* OK, we don't need it. Replace the expression with a NULL constant.
* Preserve the exposed type of the expression, in case something
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1c575e56ff6..b38cad9f121 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -104,6 +104,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/plancat.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/rpr.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
@@ -3228,7 +3229,35 @@ cost_windowagg(Path *path, PlannerInfo *root,
* many rows the window function will fetch, it's hard to do better. In
* any case, it's a good estimate for all the built-in window functions,
* so we'll just do this for now.
+ *
+ * Moreover, if row pattern recognition is used, we charge the DEFINE
+ * expressions once per tuple for each variable that appears in PATTERN.
*/
+ if (winclause->rpPattern)
+ {
+ List *pattern_vars;
+ ListCell *lc2;
+ QualCost defcosts;
+
+ pattern_vars = collectPatternVariables(winclause->rpPattern);
+
+ foreach(lc2, pattern_vars)
+ {
+ char *ptname = strVal(lfirst(lc2));
+
+ foreach_node(TargetEntry, def, winclause->defineClause)
+ {
+ if (!strcmp(ptname, def->resname))
+ {
+ cost_qual_eval_node(&defcosts, (Node *) def->expr, root);
+ startup_cost += defcosts.startup;
+ total_cost += defcosts.per_tuple * input_tuples;
+ }
+ }
+ }
+ list_free_deep(pattern_vars);
+ }
+
foreach(lc, windowFuncs)
{
WindowFunc *wfunc = lfirst_node(WindowFunc, lc);
diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile
index 80ef162e484..7e0167789d8 100644
--- a/src/backend/optimizer/plan/Makefile
+++ b/src/backend/optimizer/plan/Makefile
@@ -19,6 +19,7 @@ OBJS = \
planagg.o \
planmain.o \
planner.o \
+ rpr.o \
setrefs.o \
subselect.o
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index de6a183da79..52205cc7159 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -16,6 +16,7 @@
*/
#include "postgres.h"
+#include "common/int.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "catalog/pg_class.h"
@@ -36,6 +37,7 @@
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/subselect.h"
+#include "optimizer/rpr.h"
#include "optimizer/tlist.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
@@ -288,7 +290,11 @@ static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
- List *runCondition, List *qual, bool topWindow,
+ List *runCondition, RPSkipTo rpSkipTo,
+ RPRPattern *compiledPattern,
+ List *defineClause,
+ Bitmapset *defineMatchStartDependent,
+ List *qual, bool topWindow,
Plan *lefttree);
static Group *make_group(List *tlist, List *qual, int numGroupCols,
AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
@@ -2457,6 +2463,363 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path)
return plan;
}
+/*
+ * NavOffsetContext - context for compute_nav_offsets walker.
+ *
+ * Collects both backward reach (PREV, LAST-with-offset, compound
+ * PREV_LAST/NEXT_LAST) and forward-from-match-start reach (FIRST,
+ * compound PREV_FIRST/NEXT_FIRST) in a single tree walk.
+ */
+typedef struct NavOffsetContext
+{
+ int64 maxOffset; /* max PREV/LAST backward offset (>= 0) */
+ bool maxNeedsEval; /* non-constant PREV/LAST offset found */
+ bool maxOverflow; /* constant offset overflow detected */
+ int64 firstOffset; /* min FIRST offset (may be negative for
+ * PREV_FIRST) */
+ bool hasFirst; /* any FIRST node found */
+ bool firstNeedsEval; /* non-constant FIRST offset found */
+} NavOffsetContext;
+
+/*
+ * Helper: extract constant offset from an expression, handling NULL/negative.
+ * If expr is NULL, returns defaultOffset.
+ * Returns true if constant, false if non-constant (Param, cast, etc.).
+ */
+static bool
+extract_const_offset(Expr *expr, int64 defaultOffset, int64 *result)
+{
+ if (expr == NULL)
+ {
+ *result = defaultOffset;
+ return true;
+ }
+
+ if (IsA(expr, Const))
+ {
+ Const *c = (Const *) expr;
+
+ if (c->constisnull)
+ *result = 0; /* runtime error; safe placeholder */
+ else
+ {
+ *result = DatumGetInt64(c->constvalue);
+ if (*result < 0)
+ *result = 0; /* runtime error; safe placeholder */
+ }
+ return true;
+ }
+
+ return false; /* non-constant */
+}
+
+/*
+ * nav_offset_walker
+ * Expression tree walker for compute_nav_offsets.
+ *
+ * For each RPRNavExpr found, extract its constant offset(s) and update the
+ * NavOffsetContext with the maximum backward reach (maxOffset) and minimum
+ * forward reach (firstOffset). Handles simple navigation (PREV, NEXT,
+ * FIRST, LAST) and compound forms (PREV_FIRST, NEXT_FIRST, PREV_LAST,
+ * NEXT_LAST) by combining inner and outer offsets.
+ *
+ * Non-constant offsets set maxNeedsEval or firstNeedsEval. Overflow sets
+ * maxOverflow or firstOverflow for RETAIN_ALL fallback.
+ */
+static bool
+nav_offset_walker(Node *node, void *ctx)
+{
+ NavOffsetContext *context = (NavOffsetContext *) ctx;
+
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, RPRNavExpr))
+ {
+ RPRNavExpr *nav = (RPRNavExpr *) node;
+
+ /*
+ * Simple PREV(v, N) and LAST(v, N): backward reach from currentpos.
+ * LAST without offset = currentpos, no backward reach. NEXT: forward
+ * only, irrelevant for trim.
+ */
+ if (nav->kind == RPR_NAV_PREV ||
+ (nav->kind == RPR_NAV_LAST && nav->offset_arg != NULL))
+ {
+ if (!context->maxNeedsEval)
+ {
+ int64 offset;
+
+ if (extract_const_offset(nav->offset_arg, 1, &offset))
+ context->maxOffset = Max(context->maxOffset, offset);
+ else
+ context->maxNeedsEval = true;
+ }
+ }
+
+ /*
+ * Simple FIRST(v, N): forward reach from match_start. Smaller N means
+ * older rows needed.
+ */
+ if (nav->kind == RPR_NAV_FIRST)
+ {
+ context->hasFirst = true;
+
+ if (!context->firstNeedsEval)
+ {
+ int64 offset;
+
+ if (extract_const_offset(nav->offset_arg, 0, &offset))
+ context->firstOffset = Min(context->firstOffset, offset);
+ else
+ context->firstNeedsEval = true;
+ }
+ }
+
+ /*
+ * Compound PREV_LAST / NEXT_LAST: base = currentpos. PREV_LAST(v, N,
+ * M): target = currentpos - N - M -> lookback = N + M NEXT_LAST(v, N,
+ * M): target = currentpos - N + M -> lookback = max(N - M, 0)
+ */
+ if (nav->kind == RPR_NAV_PREV_LAST ||
+ nav->kind == RPR_NAV_NEXT_LAST)
+ {
+ if (!context->maxNeedsEval)
+ {
+ int64 inner,
+ outer,
+ combined;
+
+ if (extract_const_offset(nav->offset_arg, 0, &inner) &&
+ extract_const_offset(nav->compound_offset_arg, 1, &outer))
+ {
+ if (nav->kind == RPR_NAV_PREV_LAST)
+ {
+ if (pg_add_s64_overflow(inner, outer, &combined))
+ {
+ context->maxOverflow = true;
+ return false;
+ }
+ }
+ else
+ combined = (inner > outer) ? inner - outer : 0;
+
+ context->maxOffset = Max(context->maxOffset, combined);
+ }
+ else
+ context->maxNeedsEval = true;
+ }
+ }
+
+ /*
+ * Compound PREV_FIRST / NEXT_FIRST: base = match_start. PREV_FIRST(v,
+ * N, M): target = match_start + N - M NEXT_FIRST(v, N, M): target =
+ * match_start + N + M The combined offset (N+/-M) from match_start
+ * can be negative, meaning rows before match_start are needed.
+ */
+ if (nav->kind == RPR_NAV_PREV_FIRST ||
+ nav->kind == RPR_NAV_NEXT_FIRST)
+ {
+ context->hasFirst = true;
+
+ if (!context->firstNeedsEval)
+ {
+ int64 inner,
+ outer,
+ combined;
+
+ if (extract_const_offset(nav->offset_arg, 0, &inner) &&
+ extract_const_offset(nav->compound_offset_arg, 1, &outer))
+ {
+ if (nav->kind == RPR_NAV_PREV_FIRST)
+ {
+ /*
+ * combined = inner - outer. Both are non-negative,
+ * so the result >= -INT64_MAX, which cannot underflow
+ * int64. No overflow check needed.
+ */
+ combined = inner - outer;
+ }
+ else
+ {
+ /*
+ * NEXT_FIRST: combined = inner + outer. This can
+ * overflow, but the result is always >= 0, so it
+ * never updates firstOffset (which tracks the
+ * minimum). Clamp to INT64_MAX on overflow.
+ */
+ if (pg_add_s64_overflow(inner, outer, &combined))
+ combined = INT64_MAX;
+ }
+
+ context->firstOffset = Min(context->firstOffset, combined);
+ }
+ else
+ context->firstNeedsEval = true;
+ }
+ }
+
+ /* Don't walk into RPRNavExpr children */
+ return false;
+ }
+
+ return expression_tree_walker(node, nav_offset_walker, ctx);
+}
+
+/*
+ * compute_nav_offsets
+ * Compute navigation offsets for tuplestore trim in a single pass.
+ *
+ * Walks all DEFINE clause expressions once, computing:
+ * - maxOffset: max backward reach from PREV, LAST-with-offset,
+ * compound PREV_LAST/NEXT_LAST
+ * - hasFirst/firstOffset: min forward-from-match-start reach from
+ * FIRST, compound PREV_FIRST/NEXT_FIRST
+ */
+static void
+compute_nav_offsets(List *defineClause,
+ RPRNavOffsetKind *maxKind, int64 *maxResult,
+ bool *hasFirst,
+ RPRNavOffsetKind *firstKind, int64 *firstResult)
+{
+ NavOffsetContext ctx;
+ ListCell *lc;
+
+ ctx.maxOffset = 0;
+ ctx.maxNeedsEval = false;
+ ctx.maxOverflow = false;
+ ctx.firstOffset = INT64_MAX; /* sentinel: no FIRST found yet */
+ ctx.hasFirst = false;
+ ctx.firstNeedsEval = false;
+
+ foreach(lc, defineClause)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+
+ nav_offset_walker((Node *) te->expr, &ctx);
+ }
+
+ /* Max backward offset */
+ if (ctx.maxOverflow)
+ {
+ *maxKind = RPR_NAV_OFFSET_RETAIN_ALL;
+ *maxResult = 0;
+ }
+ else if (ctx.maxNeedsEval)
+ {
+ *maxKind = RPR_NAV_OFFSET_NEEDS_EVAL;
+ *maxResult = 0;
+ }
+ else
+ {
+ *maxKind = RPR_NAV_OFFSET_FIXED;
+ *maxResult = ctx.maxOffset;
+ }
+
+ /* First offset (can be negative for compound PREV_FIRST) */
+ *hasFirst = ctx.hasFirst;
+ if (ctx.hasFirst)
+ {
+ if (ctx.firstNeedsEval)
+ {
+ *firstKind = RPR_NAV_OFFSET_NEEDS_EVAL;
+ *firstResult = 0;
+ }
+ else if (ctx.firstOffset == INT64_MAX)
+ {
+ *firstKind = RPR_NAV_OFFSET_FIXED;
+ *firstResult = 0; /* only implicit FIRST(v) */
+ }
+ else
+ {
+ *firstKind = RPR_NAV_OFFSET_FIXED;
+ *firstResult = ctx.firstOffset; /* may be negative */
+ }
+ }
+ else
+ {
+ *firstKind = RPR_NAV_OFFSET_FIXED;
+ *firstResult = 0;
+ }
+}
+
+/*
+ * has_match_start_dependency
+ * Check if an expression tree contains navigation that depends on
+ * match_start: FIRST, LAST-with-offset, or compound PREV_FIRST/
+ * NEXT_FIRST/PREV_LAST/NEXT_LAST with offset. Such expressions
+ * require per-context re-evaluation during NFA processing.
+ *
+ * LAST without offset always resolves to currentpos and is
+ * match_start-independent.
+ */
+static bool
+has_match_start_dependency(Node *node, void *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, RPRNavExpr))
+ {
+ RPRNavExpr *nav = (RPRNavExpr *) node;
+
+ if (nav->kind == RPR_NAV_FIRST)
+ return true;
+ if (nav->kind == RPR_NAV_LAST && nav->offset_arg != NULL)
+ return true;
+
+ /* Compound kinds with FIRST base depend on match_start */
+ if (nav->kind == RPR_NAV_PREV_FIRST ||
+ nav->kind == RPR_NAV_NEXT_FIRST)
+ return true;
+
+ /*
+ * PREV_LAST/NEXT_LAST: inner is LAST, which uses currentpos.
+ * match_start-dependent only if inner has offset (clamped to
+ * match_start).
+ */
+ if ((nav->kind == RPR_NAV_PREV_LAST ||
+ nav->kind == RPR_NAV_NEXT_LAST) &&
+ nav->offset_arg != NULL)
+ return true;
+
+ /* Check children (arg may contain further nav expressions) */
+ return has_match_start_dependency((Node *) nav->arg, context);
+ }
+
+ return expression_tree_walker(node, has_match_start_dependency, NULL);
+}
+
+/*
+ * compute_match_start_dependent
+ * Build a Bitmapset of DEFINE variable indices whose expressions
+ * depend on match_start (contain FIRST, LAST-with-offset, or
+ * compound PREV_FIRST/NEXT_FIRST/PREV_LAST/NEXT_LAST with offset).
+ *
+ * Variables in this set require per-context re-evaluation during NFA
+ * processing, because different contexts may have different match_start
+ * values.
+ */
+static Bitmapset *
+compute_match_start_dependent(List *defineClause)
+{
+ Bitmapset *result = NULL;
+ ListCell *lc;
+ int varIdx = 0;
+
+ foreach(lc, defineClause)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+
+ if (has_match_start_dependency((Node *) te->expr, NULL))
+ result = bms_add_member(result, varIdx);
+
+ varIdx++;
+ }
+
+ return result;
+}
+
/*
* create_windowagg_plan
*
@@ -2481,6 +2844,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
Oid *ordOperators;
Oid *ordCollations;
ListCell *lc;
+ List *defineVariableList = NIL;
+ List *filteredDefineClause = NIL;
+ RPRPattern *compiledPattern = NULL;
+ Bitmapset *matchStartDependent = NULL;
+
/*
* Choice of tlist here is motivated by the fact that WindowAgg will be
@@ -2531,6 +2899,28 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
ordNumCols++;
}
+ /* Build RPR pattern and defineVariableList */
+ if (wc->rpPattern)
+ {
+ /*
+ * Build defineVariableList from defineClause. The parser already
+ * rejects DEFINE variables not used in PATTERN, so no filtering is
+ * needed.
+ */
+ buildDefineVariableList(wc->defineClause, &defineVariableList);
+ filteredDefineClause = wc->defineClause;
+
+ /* Identify match_start-dependent DEFINE variables */
+ matchStartDependent = compute_match_start_dependent(wc->defineClause);
+
+ /* Compile and optimize RPR patterns */
+ compiledPattern = buildRPRPattern(wc->rpPattern,
+ defineVariableList,
+ wc->rpSkipTo,
+ wc->frameOptions,
+ !bms_is_empty(matchStartDependent));
+ }
+
/* And finally we can make the WindowAgg node */
plan = make_windowagg(tlist,
wc,
@@ -2543,6 +2933,10 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
ordOperators,
ordCollations,
best_path->runCondition,
+ wc->rpSkipTo,
+ compiledPattern,
+ filteredDefineClause,
+ matchStartDependent,
best_path->qual,
best_path->topwindow,
subplan);
@@ -6613,7 +7007,11 @@ static WindowAgg *
make_windowagg(List *tlist, WindowClause *wc,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
- List *runCondition, List *qual, bool topWindow, Plan *lefttree)
+ List *runCondition, RPSkipTo rpSkipTo,
+ RPRPattern *compiledPattern,
+ List *defineClause,
+ Bitmapset *defineMatchStartDependent,
+ List *qual, bool topWindow, Plan *lefttree)
{
WindowAgg *node = makeNode(WindowAgg);
Plan *plan = &node->plan;
@@ -6640,6 +7038,21 @@ make_windowagg(List *tlist, WindowClause *wc,
node->inRangeAsc = wc->inRangeAsc;
node->inRangeNullsFirst = wc->inRangeNullsFirst;
node->topWindow = topWindow;
+ node->rpSkipTo = rpSkipTo;
+
+ /* Store compiled pattern for NFA execution */
+ node->rpPattern = compiledPattern;
+
+ node->defineClause = defineClause;
+
+ /* Store pre-computed match_start dependency bitmapset */
+ node->defineMatchStartDependent = defineMatchStartDependent;
+
+ /* Compute nav offsets for tuplestore trim optimization */
+ compute_nav_offsets(defineClause,
+ &node->navMaxOffsetKind, &node->navMaxOffset,
+ &node->hasFirstNav,
+ &node->navFirstOffsetKind, &node->navFirstOffset);
plan->targetlist = tlist;
plan->lefttree = lefttree;
diff --git a/src/backend/optimizer/plan/meson.build b/src/backend/optimizer/plan/meson.build
index c565b2adbcc..5b2381cd774 100644
--- a/src/backend/optimizer/plan/meson.build
+++ b/src/backend/optimizer/plan/meson.build
@@ -7,6 +7,7 @@ backend_sources += files(
'planagg.c',
'planmain.c',
'planner.c',
+ 'rpr.c',
'setrefs.c',
'subselect.c',
)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 933dcbf5004..703c6ab7432 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1053,6 +1053,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name,
EXPRKIND_LIMIT);
wc->endOffset = preprocess_expression(root, wc->endOffset,
EXPRKIND_LIMIT);
+ wc->defineClause = (List *) preprocess_expression(root,
+ (Node *) wc->defineClause,
+ EXPRKIND_TARGET);
}
parse->limitOffset = preprocess_expression(root, parse->limitOffset,
@@ -6112,6 +6115,14 @@ optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
if (wflists->windowFuncs[wc->winref] == NIL)
continue;
+ /*
+ * If a DEFINE clause exists, do not let support functions replace the
+ * frame with a non-RPR-compatible one. RPR windows require ROWS
+ * BETWEEN CURRENT ROW AND ...
+ */
+ if (wc->defineClause != NIL)
+ continue;
+
foreach(lc2, wflists->windowFuncs[wc->winref])
{
SupportRequestOptimizeWindowClause req;
@@ -6195,7 +6206,11 @@ optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
equal(wc->orderClause, existing_wc->orderClause) &&
wc->frameOptions == existing_wc->frameOptions &&
equal(wc->startOffset, existing_wc->startOffset) &&
- equal(wc->endOffset, existing_wc->endOffset))
+ equal(wc->endOffset, existing_wc->endOffset) &&
+ wc->rpSkipTo == existing_wc->rpSkipTo &&
+ wc->initial == existing_wc->initial &&
+ equal(wc->defineClause, existing_wc->defineClause) &&
+ equal(wc->rpPattern, existing_wc->rpPattern))
{
ListCell *lc4;
diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c
new file mode 100644
index 00000000000..2543170c374
--- /dev/null
+++ b/src/backend/optimizer/plan/rpr.c
@@ -0,0 +1,1993 @@
+/*-------------------------------------------------------------------------
+ *
+ * rpr.c
+ * Row Pattern Recognition pattern compilation for planner
+ *
+ * This file contains functions for optimizing RPR pattern AST and
+ * compiling it to a flat element array for NFA execution by WindowAgg.
+ *
+ * Key components:
+ * 1. Pattern Optimization: Simplifies patterns before compilation
+ * (e.g., flatten nested SEQ/ALT, merge consecutive vars)
+ * 2. Pattern Compilation: Converts AST to flat element array for NFA
+ * 3. Absorption Analysis: Computes flags for O(n^2)->O(n) optimization
+ *
+ * Context Absorption Optimization:
+ * When a pattern starts with a greedy unbounded element (e.g., A+ or (A B)+),
+ * newer contexts cannot produce longer matches than older contexts.
+ * By absorbing (eliminating) redundant newer contexts, we reduce
+ * complexity from O(n^2) to O(n) for patterns like A+ B.
+ *
+ * The absorption analysis uses two element flags:
+ * - RPR_ELEM_ABSORBABLE: marks WHERE to compare (judgment point)
+ * - RPR_ELEM_ABSORBABLE_BRANCH: marks the absorbable region
+ *
+ * See computeAbsorbability() and the detailed comments before
+ * isUnboundedStart() for the full design explanation.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/plan/rpr.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/rpr.h"
+
+/* Forward declarations - pattern comparison */
+static bool rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b);
+static bool rprPatternChildrenEqual(List *a, List *b);
+
+/* Forward declarations - pattern optimization (shared) */
+static RPRPatternNode *tryUnwrapSingleChild(RPRPatternNode *pattern);
+
+/* Forward declarations - SEQ optimization */
+static List *flattenSeqChildren(List *children);
+static List *mergeConsecutiveVars(List *children);
+static List *mergeConsecutiveGroups(List *children);
+static List *mergeConsecutiveAlts(List *children);
+static List *mergeGroupPrefixSuffix(List *children);
+static RPRPatternNode *optimizeSeqPattern(RPRPatternNode *pattern);
+
+/* Forward declarations - ALT optimization */
+static List *flattenAltChildren(List *children);
+static List *removeDuplicateAlternatives(List *children);
+static RPRPatternNode *optimizeAltPattern(RPRPatternNode *pattern);
+
+/* Forward declarations - GROUP optimization */
+static RPRPatternNode *tryMultiplyQuantifiers(RPRPatternNode *pattern);
+static RPRPatternNode *tryUnwrapGroup(RPRPatternNode *pattern);
+static RPRPatternNode *optimizeGroupPattern(RPRPatternNode *pattern);
+
+/* Forward declarations - optimization dispatcher */
+static RPRPatternNode *optimizeRPRPattern(RPRPatternNode *pattern);
+
+/* Forward declarations - pattern compilation */
+static int collectDefineVariables(List *defineVariableList, char **varNames);
+static void scanRPRPatternRecursive(RPRPatternNode *node, char **varNames,
+ int *numVars, int *numElements,
+ RPRDepth depth, RPRDepth *maxDepth);
+static void scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars,
+ int *numElements, RPRDepth *maxDepth);
+static RPRPattern *allocateRPRPattern(int numVars, int numElements,
+ RPRDepth maxDepth, char **varNamesStack);
+static RPRVarId getVarIdFromPattern(RPRPattern *pat, const char *varName);
+static bool fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat,
+ int *idx, RPRDepth depth);
+static bool fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat,
+ int *idx, RPRDepth depth);
+static bool fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat,
+ int *idx, RPRDepth depth);
+static bool fillRPRPattern(RPRPatternNode *node, RPRPattern *pat,
+ int *idx, RPRDepth depth);
+static void finalizeRPRPattern(RPRPattern *result);
+
+/* Forward declarations - context absorption */
+static bool isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx,
+ RPRDepth scopeDepth);
+static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx);
+static void computeAbsorbabilityRecursive(RPRPattern *pattern,
+ RPRElemIdx startIdx,
+ bool *hasAbsorbable);
+static void computeAbsorbability(RPRPattern *pattern);
+
+/*
+ * rprPatternEqual
+ * Compare two RPRPatternNode trees for equality.
+ *
+ * Returns true if the trees are structurally identical.
+ */
+static bool
+rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b)
+{
+ /* Pattern nodes in children lists must never be NULL */
+ Assert(a != NULL && b != NULL);
+
+ /* Must have same node type and quantifiers */
+ if (a->nodeType != b->nodeType)
+ return false;
+ if (a->min != b->min || a->max != b->max)
+ return false;
+ if (a->reluctant != b->reluctant)
+ return false;
+
+ switch (a->nodeType)
+ {
+ case RPR_PATTERN_VAR:
+ return strcmp(a->varName, b->varName) == 0;
+
+ case RPR_PATTERN_SEQ:
+ case RPR_PATTERN_ALT:
+ case RPR_PATTERN_GROUP:
+ return rprPatternChildrenEqual(a->children, b->children);
+ }
+
+ return false; /* keep compiler quiet */
+}
+
+/*
+ * rprPatternChildrenEqual
+ * Compare children lists of two pattern nodes for equality.
+ *
+ * Returns true if the children lists are structurally identical.
+ */
+static bool
+rprPatternChildrenEqual(List *a, List *b)
+{
+ ListCell *lca,
+ *lcb;
+
+ if (list_length(a) != list_length(b))
+ return false;
+
+ forboth(lca, a, lcb, b)
+ {
+ if (!rprPatternEqual((RPRPatternNode *) lfirst(lca),
+ (RPRPatternNode *) lfirst(lcb)))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * tryUnwrapSingleChild
+ * Try to unwrap pattern node with single child.
+ *
+ * Examples (internal node representation):
+ * SEQ[A] -> A (single-element sequence becomes the element)
+ * ALT[A] -> A (single-alternative becomes the alternative)
+ *
+ * If pattern has exactly one child, return the child directly.
+ * Otherwise returns the pattern unchanged.
+ * Used by both SEQ and ALT optimization.
+ */
+static RPRPatternNode *
+tryUnwrapSingleChild(RPRPatternNode *pattern)
+{
+ if (list_length(pattern->children) == 1)
+ return (RPRPatternNode *) linitial(pattern->children);
+
+ return pattern;
+}
+
+/*
+ * flattenSeqChildren
+ * Recursively optimize children and flatten nested SEQ.
+ *
+ * Example:
+ * SEQ(A, SEQ(B, C)) -> SEQ(A, B, C)
+ *
+ * Returns a new list with optimized children, with nested SEQ children
+ * flattened into the parent list.
+ */
+static List *
+flattenSeqChildren(List *children)
+{
+ ListCell *lc;
+ List *newChildren = NIL;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+ RPRPatternNode *opt = optimizeRPRPattern(child);
+
+ /* GROUP{1,1} should have been unwrapped by optimizeGroupPattern */
+ Assert(!(opt->nodeType == RPR_PATTERN_GROUP &&
+ opt->min == 1 && opt->max == 1 && opt->reluctant == false));
+
+ if (opt->nodeType == RPR_PATTERN_SEQ)
+ {
+ newChildren = list_concat(newChildren,
+ list_copy(opt->children));
+ }
+ else
+ {
+ newChildren = lappend(newChildren, opt);
+ }
+ }
+
+ return newChildren;
+}
+
+/*
+ * mergeConsecutiveVars
+ * Merge consecutive identical VAR nodes.
+ *
+ * Examples:
+ * A{m1,M1} A{m2,M2} -> A{m1+m2, M1+M2} where INF + x = INF.
+ *
+ * Only merges non-reluctant VAR nodes with the same variable name.
+ */
+static List *
+mergeConsecutiveVars(List *children)
+{
+ ListCell *lc;
+ List *mergedChildren = NIL;
+ RPRPatternNode *prev = NULL;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+
+ if (child->nodeType == RPR_PATTERN_VAR && child->reluctant == false)
+ {
+ /* ----------------------
+ * Can merge consecutive VAR nodes if:
+ * 1. Same variable name
+ * 2. No min overflow: prev->min + child->min <= INF
+ * 3. No max overflow: prev->max + child->max <= INF (or either is INF)
+ */
+ if (prev != NULL &&
+ strcmp(prev->varName, child->varName) == 0 &&
+ prev->min <= RPR_QUANTITY_INF - child->min &&
+ (prev->max <= RPR_QUANTITY_INF - child->max ||
+ prev->max == RPR_QUANTITY_INF ||
+ child->max == RPR_QUANTITY_INF))
+ {
+ /*
+ * Merge: accumulate min/max into prev. prev is guaranteed to
+ * be a non-reluctant VAR by the outer condition.
+ */
+ Assert(prev->nodeType == RPR_PATTERN_VAR && prev->reluctant == false);
+
+ prev->min += child->min;
+
+ if (prev->max == RPR_QUANTITY_INF ||
+ child->max == RPR_QUANTITY_INF)
+ prev->max = RPR_QUANTITY_INF;
+ else
+ prev->max += child->max;
+ }
+ else
+ {
+ /* Flush previous and start new */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+ prev = child;
+ }
+ }
+ else
+ {
+ /* Non-mergeable - flush previous */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+ mergedChildren = lappend(mergedChildren, child);
+ prev = NULL;
+ }
+ }
+
+ /* Flush remaining */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+
+ return mergedChildren;
+}
+
+/*
+ * mergeConsecutiveGroups
+ * Merge consecutive identical GROUP nodes.
+ *
+ * Example:
+ * (A B)+ (A B)+ -> (A B){2,}
+ *
+ * Only merges non-reluctant GROUP nodes with identical children.
+ */
+static List *
+mergeConsecutiveGroups(List *children)
+{
+ ListCell *lc;
+ List *mergedChildren = NIL;
+ RPRPatternNode *prev = NULL;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+
+ if (child->nodeType == RPR_PATTERN_GROUP && child->reluctant == false)
+ {
+ /* ----------------------
+ * Can merge consecutive GROUP nodes if:
+ * 1. Identical children
+ * 2. No min overflow: prev->min + child->min <= INF
+ * 3. No max overflow: prev->max + child->max <= INF (or either is INF)
+ */
+ if (prev != NULL &&
+ rprPatternChildrenEqual(prev->children, child->children) &&
+ prev->min <= RPR_QUANTITY_INF - child->min &&
+ (prev->max <= RPR_QUANTITY_INF - child->max ||
+ prev->max == RPR_QUANTITY_INF ||
+ child->max == RPR_QUANTITY_INF))
+ {
+ /*
+ * Merge: accumulate min/max into prev. prev is guaranteed to
+ * be a non-reluctant GROUP by the outer condition.
+ */
+ Assert(prev->nodeType == RPR_PATTERN_GROUP && prev->reluctant == false);
+
+ prev->min += child->min;
+
+ if (prev->max == RPR_QUANTITY_INF ||
+ child->max == RPR_QUANTITY_INF)
+ prev->max = RPR_QUANTITY_INF;
+ else
+ prev->max += child->max;
+ }
+ else
+ {
+ /* Flush previous and start new */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+ prev = child;
+ }
+ }
+ else
+ {
+ /* Non-mergeable - flush previous */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+ mergedChildren = lappend(mergedChildren, child);
+ prev = NULL;
+ }
+ }
+
+ /* Flush remaining */
+ if (prev != NULL)
+ mergedChildren = lappend(mergedChildren, prev);
+
+ return mergedChildren;
+}
+
+/*
+ * mergeConsecutiveAlts
+ * Merge consecutive identical ALT nodes into a GROUP.
+ *
+ * Example:
+ * (A | B) (A | B) (A | B) -> (A | B){3}
+ *
+ * After GROUP{1,1} unwrap, bare alternations like (A | B) become ALT nodes
+ * in the SEQ. This step detects consecutive identical ALT nodes and wraps
+ * them in a GROUP with the appropriate quantifier.
+ */
+static List *
+mergeConsecutiveAlts(List *children)
+{
+ ListCell *lc;
+ List *mergedChildren = NIL;
+ RPRPatternNode *prev = NULL;
+ int count = 0;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+
+ if (child->nodeType == RPR_PATTERN_ALT && child->reluctant == false)
+ {
+ if (prev != NULL &&
+ rprPatternChildrenEqual(prev->children, child->children))
+ {
+ /* Same ALT as prev - accumulate */
+ count++;
+ }
+ else
+ {
+ /* Different ALT or first ALT - flush previous */
+ if (prev != NULL)
+ {
+ if (count > 1)
+ {
+ /* Wrap in GROUP{count,count}(ALT) */
+ RPRPatternNode *group = makeNode(RPRPatternNode);
+
+ group->nodeType = RPR_PATTERN_GROUP;
+ group->min = count;
+ group->max = count;
+ group->reluctant = false;
+ group->reluctant_location = -1;
+ group->location = -1;
+ group->children = list_make1(prev);
+ mergedChildren = lappend(mergedChildren, group);
+ }
+ else
+ mergedChildren = lappend(mergedChildren, prev);
+ }
+ prev = child;
+ count = 1;
+ }
+ }
+ else
+ {
+ /* Non-ALT - flush previous */
+ if (prev != NULL)
+ {
+ if (count > 1)
+ {
+ RPRPatternNode *group = makeNode(RPRPatternNode);
+
+ group->nodeType = RPR_PATTERN_GROUP;
+ group->min = count;
+ group->max = count;
+ group->reluctant = false;
+ group->reluctant_location = -1;
+ group->location = -1;
+ group->children = list_make1(prev);
+ mergedChildren = lappend(mergedChildren, group);
+ }
+ else
+ mergedChildren = lappend(mergedChildren, prev);
+ }
+ mergedChildren = lappend(mergedChildren, child);
+ prev = NULL;
+ count = 0;
+ }
+ }
+
+ /* Flush remaining */
+ if (prev != NULL)
+ {
+ if (count > 1)
+ {
+ RPRPatternNode *group = makeNode(RPRPatternNode);
+
+ group->nodeType = RPR_PATTERN_GROUP;
+ group->min = count;
+ group->max = count;
+ group->reluctant = false;
+ group->reluctant_location = -1;
+ group->location = -1;
+ group->children = list_make1(prev);
+ mergedChildren = lappend(mergedChildren, group);
+ }
+ else
+ mergedChildren = lappend(mergedChildren, prev);
+ }
+
+ return mergedChildren;
+}
+
+/*
+ * mergeGroupPrefixSuffix
+ * Merge sequence prefix/suffix into GROUP with matching children.
+ *
+ * When a GROUP's children appear as a prefix before and/or suffix after
+ * the GROUP in a SEQ, absorb them by incrementing the GROUP's quantifier.
+ * This runs iteratively: A B A B (A B)+ A B -> (A B){5,}.
+ *
+ * Algorithm:
+ * For each GROUP encountered in the sequence:
+ * 1. PREFIX phase: compare the last N elements already in the result
+ * list against the GROUP's children. On match, remove them from
+ * result and increment the GROUP's min/max. Repeat until no match.
+ * 2. SUFFIX phase: compare the next N elements in the input against
+ * the GROUP's children. On match, skip them (via skipUntil) and
+ * increment min/max. Repeat until no match.
+ *
+ * Examples:
+ * A B (A B)+ -> (A B){2,}
+ * (A B)+ A B -> (A B){2,}
+ * A B (A B)+ A B -> (A B){3,}
+ */
+static List *
+mergeGroupPrefixSuffix(List *children)
+{
+ List *result = NIL;
+ int numChildren = list_length(children);
+ int i;
+ int skipUntil = -1; /* skip suffix elements already absorbed */
+
+ for (i = 0; i < numChildren; i++)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) list_nth(children, i);
+
+ /*
+ * The suffix absorption logic below adjusts i to skip absorbed
+ * elements, ensuring we never revisit them. Verify this invariant.
+ */
+ Assert(i >= skipUntil);
+
+ /*
+ * If this is a GROUP, see if preceding/following elements match its
+ * children. GROUP's content may be wrapped in a SEQ - unwrap for
+ * comparison.
+ */
+ if (child->nodeType == RPR_PATTERN_GROUP && child->reluctant == false)
+ {
+ List *groupContent = child->children;
+ int groupChildCount;
+ int prefixLen = list_length(result);
+ List *trimmed;
+
+ /*
+ * If GROUP has single SEQ child, compare with SEQ's children.
+ * e.g., (A B)+ internally contains sequence A B; compare against
+ * that.
+ */
+ if (list_length(groupContent) == 1)
+ {
+ RPRPatternNode *inner = (RPRPatternNode *) linitial(groupContent);
+
+ if (inner->nodeType == RPR_PATTERN_SEQ)
+ groupContent = inner->children;
+ }
+
+ groupChildCount = list_length(groupContent);
+
+ /*
+ * PREFIX MERGE: Check if preceding elements match. Keep absorbing
+ * as long as we have matching prefixes.
+ */
+ while (prefixLen >= groupChildCount && groupChildCount > 0)
+ {
+ List *prefixElements = NIL;
+ int j;
+
+ /* Extract last groupChildCount elements from prefix */
+ for (j = prefixLen - groupChildCount; j < prefixLen; j++)
+ {
+ prefixElements = lappend(prefixElements,
+ list_nth(result, j));
+ }
+
+ /* Compare with GROUP's (possibly unwrapped) children */
+ if (rprPatternChildrenEqual(prefixElements, groupContent) &&
+ child->min < RPR_QUANTITY_INF - 1 &&
+ (child->max == RPR_QUANTITY_INF ||
+ child->max < RPR_QUANTITY_INF - 1))
+ {
+ /*
+ * Match! Merge by incrementing GROUP's quantifier. Remove
+ * the prefix elements from output.
+ */
+ child->min += 1;
+ if (child->max != RPR_QUANTITY_INF)
+ child->max += 1;
+
+ /* Rebuild result without matched prefix */
+ trimmed = NIL;
+ for (j = 0; j < prefixLen - groupChildCount; j++)
+ {
+ trimmed = lappend(trimmed,
+ list_nth(result, j));
+ }
+ result = trimmed;
+ prefixLen = list_length(result);
+ }
+ else
+ {
+ list_free(prefixElements);
+ break;
+ }
+
+ list_free(prefixElements);
+ }
+
+ /*
+ * SUFFIX MERGE: Check if following elements match. Keep absorbing
+ * as long as we have matching suffixes.
+ */
+ while (i + groupChildCount < numChildren && groupChildCount > 0)
+ {
+ List *suffixElements = NIL;
+ int j;
+ int suffixStart = i + 1;
+
+ /* suffixStart always >= skipUntil after i adjustment */
+ Assert(skipUntil <= suffixStart);
+
+ /* Extract next groupChildCount elements as suffix */
+ for (j = 0; j < groupChildCount; j++)
+ {
+ int idx = suffixStart + j;
+
+ /* while condition guarantees idx < numChildren */
+ Assert(idx < numChildren);
+ suffixElements = lappend(suffixElements,
+ list_nth(children, idx));
+ }
+
+ /* Compare with GROUP's children */
+ if (list_length(suffixElements) == groupChildCount &&
+ rprPatternChildrenEqual(suffixElements, groupContent) &&
+ child->min < RPR_QUANTITY_INF - 1 &&
+ (child->max == RPR_QUANTITY_INF ||
+ child->max < RPR_QUANTITY_INF - 1))
+ {
+ /*
+ * Match! Absorb suffix by incrementing quantifier and
+ * skipping.
+ */
+ child->min += 1;
+ if (child->max != RPR_QUANTITY_INF)
+ child->max += 1;
+ skipUntil = suffixStart + groupChildCount;
+
+ /*
+ * Update i to continue suffix check after absorbed
+ * elements
+ */
+ i = skipUntil - 1;
+ }
+ else
+ {
+ list_free(suffixElements);
+ break;
+ }
+
+ list_free(suffixElements);
+ }
+ }
+
+ result = lappend(result, child);
+ }
+
+ return result;
+}
+
+/*
+ * optimizeSeqPattern
+ * Optimize SEQ pattern node.
+ *
+ * Optimizations:
+ * 1. Flatten nested SEQ and GROUP{1,1}
+ * 2. Merge consecutive identical VAR nodes
+ * 3. Merge consecutive identical GROUP nodes
+ * 4. Merge consecutive identical ALT nodes into GROUP
+ * 5. Merge prefix/suffix into GROUP with matching children
+ * 6. Unwrap single-item SEQ
+ */
+static RPRPatternNode *
+optimizeSeqPattern(RPRPatternNode *pattern)
+{
+ /* Recursively optimize children and flatten nested SEQ/GROUP{1,1} */
+ pattern->children = flattenSeqChildren(pattern->children);
+
+ /* Merge consecutive identical VAR nodes */
+ pattern->children = mergeConsecutiveVars(pattern->children);
+
+ /* Merge consecutive identical GROUP nodes */
+ pattern->children = mergeConsecutiveGroups(pattern->children);
+
+ /* Merge consecutive identical ALT nodes into GROUP */
+ pattern->children = mergeConsecutiveAlts(pattern->children);
+
+ /* Merge prefix/suffix into GROUP with matching children */
+ pattern->children = mergeGroupPrefixSuffix(pattern->children);
+
+ /* Unwrap single-item SEQ */
+ return tryUnwrapSingleChild(pattern);
+}
+
+/*
+ * flattenAltChildren
+ * Recursively optimize children and flatten nested ALT nodes.
+ *
+ * Example:
+ * (A | (B | C)) -> (A | B | C)
+ *
+ * Returns a new list with optimized children, with nested ALT children
+ * flattened into the parent list.
+ */
+static List *
+flattenAltChildren(List *children)
+{
+ ListCell *lc;
+ List *newChildren = NIL;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+ RPRPatternNode *opt = optimizeRPRPattern(child);
+
+ if (opt->nodeType == RPR_PATTERN_ALT)
+ newChildren = list_concat(newChildren, list_copy(opt->children));
+ else
+ newChildren = lappend(newChildren, opt);
+ }
+
+ return newChildren;
+}
+
+/*
+ * removeDuplicateAlternatives
+ * Remove duplicate alternatives from a list.
+ *
+ * Examples:
+ * (A | B | A) -> (A | B)
+ * (X | Y | X | Z | Y) -> (X | Y | Z)
+ *
+ * Returns a new list with only unique children (first occurrence kept).
+ */
+static List *
+removeDuplicateAlternatives(List *children)
+{
+ ListCell *lc;
+ ListCell *lc2;
+ List *uniqueChildren = NIL;
+
+ foreach(lc, children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+ bool isDuplicate = false;
+
+ foreach(lc2, uniqueChildren)
+ {
+ if (rprPatternEqual((RPRPatternNode *) lfirst(lc2), child))
+ {
+ isDuplicate = true;
+ break;
+ }
+ }
+
+ if (!isDuplicate)
+ uniqueChildren = lappend(uniqueChildren, child);
+ }
+
+ return uniqueChildren;
+}
+
+/*
+ * optimizeAltPattern
+ * Optimize ALT pattern node.
+ *
+ * Optimizations:
+ * 1. Flatten nested ALT
+ * 2. Remove duplicate alternatives
+ * 3. Unwrap single-item ALT
+ */
+static RPRPatternNode *
+optimizeAltPattern(RPRPatternNode *pattern)
+{
+ /* Recursively optimize children and flatten nested ALT */
+ pattern->children = flattenAltChildren(pattern->children);
+
+ /* Remove duplicate alternatives */
+ pattern->children = removeDuplicateAlternatives(pattern->children);
+
+ /* Unwrap single-item ALT */
+ return tryUnwrapSingleChild(pattern);
+}
+
+/*
+ * tryMultiplyQuantifiers
+ * Try to multiply quantifiers.
+ *
+ * Multiplication is SAFE when:
+ * 1. Both unbounded: (A*)* -> A*, (A+)+ -> A+
+ * 2. Outer exact: (A{m,n}){k} -> A{m*k, n*k}
+ * 3. Outer range + child {1,1}: (A){2,} -> A{2,}
+ *
+ * Multiplication is NOT safe when:
+ * - Only child unbounded: (A+){3} has different semantics
+ * - Outer range + child not {1,1}: gaps possible
+ * e.g., (A{2}){2,3} yields 4,6 only (not 4,5,6)
+ *
+ * Returns the child node with multiplied quantifiers if successful,
+ * otherwise returns the original pattern unchanged.
+ */
+static RPRPatternNode *
+tryMultiplyQuantifiers(RPRPatternNode *pattern)
+{
+ RPRPatternNode *child;
+ int64 new_min_64;
+ int64 new_max_64;
+
+ /* Parser always creates GROUP with exactly one child */
+ Assert(list_length(pattern->children) == 1);
+
+ if (pattern->reluctant)
+ return pattern;
+
+ child = (RPRPatternNode *) linitial(pattern->children);
+
+ if ((child->nodeType != RPR_PATTERN_VAR &&
+ child->nodeType != RPR_PATTERN_GROUP) ||
+ child->reluctant)
+ return pattern;
+
+ /* Case 1: Both unbounded - (A*)* -> A*, (A+)+ -> A+ */
+ if (child->max == RPR_QUANTITY_INF && pattern->max == RPR_QUANTITY_INF)
+ {
+ new_min_64 = (int64) child->min * pattern->min;
+ if (new_min_64 >= RPR_QUANTITY_INF)
+ return pattern; /* overflow, skip optimization */
+
+ child->min = (int) new_min_64;
+ child->max = RPR_QUANTITY_INF;
+ return child;
+ }
+
+ /*----------
+ * Case 2: Outer exact (min == max): (A{2,3}){4} -> A{8,12}.
+ * Safe because every iteration produces the same range.
+ *
+ * Case 3: Child {1,1}: (A){2,5} -> A{2,5}.
+ * Safe because the child contributes exactly one per
+ * iteration, so the outer range maps directly.
+ *
+ * Unsafe example: (A{2}){2,3} produces counts 4 or 6 only, not the full
+ * range 4..6, so we cannot flatten when child has a non-trivial range AND
+ * outer is also a range.
+ *----------
+ */
+ if (child->max != RPR_QUANTITY_INF &&
+ (pattern->min == pattern->max ||
+ (child->min == 1 && child->max == 1)))
+ {
+ new_min_64 = (int64) pattern->min * child->min;
+ if (new_min_64 >= RPR_QUANTITY_INF)
+ return pattern;
+
+ /* Outer unbounded: result is unbounded regardless of child */
+ if (pattern->max == RPR_QUANTITY_INF)
+ new_max_64 = RPR_QUANTITY_INF;
+ else
+ {
+ new_max_64 = (int64) pattern->max * child->max;
+
+ if (new_max_64 >= RPR_QUANTITY_INF)
+ return pattern;
+ }
+
+ child->min = (int) new_min_64;
+ child->max = (int) new_max_64;
+ return child;
+ }
+
+ /* Not safe to multiply */
+ return pattern;
+}
+
+/*
+ * tryUnwrapGroup
+ * Try to unwrap GROUP{1,1} node.
+ *
+ * Examples:
+ * (A){1,1} -> A
+ * (A B){1,1} -> SEQ(A, B) (unwraps the inner SEQ)
+ * (A)? -> A? (propagate quantifier to single VAR child)
+ * (A)+? -> A+? (propagate quantifier including reluctant)
+ *
+ * If GROUP has min=1, max=1, return the child directly (reluctant on
+ * {1,1} is meaningless). If GROUP has a single VAR child with default
+ * quantifier {1,1}, propagate the GROUP's quantifier to the child and
+ * unwrap. Otherwise returns the pattern unchanged.
+ *
+ * Note: Parser always creates GROUP with exactly one child via list_make1().
+ */
+static RPRPatternNode *
+tryUnwrapGroup(RPRPatternNode *pattern)
+{
+ RPRPatternNode *child;
+
+ /* Parser always creates GROUP with single child */
+ Assert(list_length(pattern->children) == 1);
+
+ child = (RPRPatternNode *) linitial(pattern->children);
+
+ /* GROUP{1,1}: unwrap directly (reluctant on {1,1} is meaningless) */
+ if (pattern->min == 1 && pattern->max == 1)
+ return child;
+
+ /*
+ * Single VAR child with default {1,1}: propagate GROUP's quantifier to
+ * the child and unwrap. E.g., (A)?? -> A??, (A)+? -> A+?
+ */
+ if (child->nodeType == RPR_PATTERN_VAR &&
+ child->min == 1 && child->max == 1 && child->reluctant == false)
+ {
+ child->min = pattern->min;
+ child->max = pattern->max;
+ child->reluctant = pattern->reluctant;
+ child->reluctant_location = pattern->reluctant_location;
+ return child;
+ }
+
+ return pattern;
+}
+
+/*
+ * optimizeGroupPattern
+ * Optimize GROUP pattern node.
+ *
+ * Optimizations:
+ * 1. Quantifier multiplication: (A{m}){n} -> A{m*n}
+ * 2. Unwrap GROUP{1,1}
+ */
+static RPRPatternNode *
+optimizeGroupPattern(RPRPatternNode *pattern)
+{
+ ListCell *lc;
+ List *newChildren;
+ RPRPatternNode *result;
+
+ /* Recursively optimize children */
+ newChildren = NIL;
+ foreach(lc, pattern->children)
+ {
+ RPRPatternNode *child = (RPRPatternNode *) lfirst(lc);
+
+ newChildren = lappend(newChildren, optimizeRPRPattern(child));
+ }
+ pattern->children = newChildren;
+
+ /* Try quantifier multiplication */
+ result = tryMultiplyQuantifiers(pattern);
+ if (result != pattern)
+ return result;
+
+ /* Try unwrapping GROUP{1,1} */
+ return tryUnwrapGroup(pattern);
+}
+
+/*
+ * optimizeRPRPattern
+ * Optimize RPRPatternNode tree (dispatcher).
+ *
+ * Dispatches to type-specific optimization functions.
+ * Returns the optimized pattern (may be a different node).
+ */
+static RPRPatternNode *
+optimizeRPRPattern(RPRPatternNode *pattern)
+{
+ /* Pattern nodes from parser are never NULL */
+ Assert(pattern != NULL);
+
+ check_stack_depth();
+
+ switch (pattern->nodeType)
+ {
+ case RPR_PATTERN_VAR:
+ return pattern;
+ case RPR_PATTERN_SEQ:
+ return optimizeSeqPattern(pattern);
+ case RPR_PATTERN_ALT:
+ return optimizeAltPattern(pattern);
+ case RPR_PATTERN_GROUP:
+ return optimizeGroupPattern(pattern);
+ }
+
+ return pattern; /* keep compiler quiet */
+}
+
+/*
+ * collectDefineVariables
+ * Collect variable names from DEFINE clause.
+ *
+ * Populates varNames array with variable names in DEFINE order.
+ * This ensures varId == defineIdx, eliminating runtime mapping.
+ * Returns the number of variables collected.
+ */
+static int
+collectDefineVariables(List *defineVariableList, char **varNames)
+{
+ ListCell *lc;
+ int numVars = 0;
+
+ foreach(lc, defineVariableList)
+ {
+ /* Parser already checked this limit in transformDefineClause */
+ Assert(numVars < RPR_VARID_MAX);
+
+ varNames[numVars++] = strVal(lfirst(lc));
+ }
+
+ return numVars;
+}
+
+/*
+ * scanRPRPatternRecursive
+ * Recursively scan pattern AST (pass 1 internal).
+ *
+ * Collects unique variable names and counts elements while tracking depth.
+ * Variables from DEFINE clause are already in varNames; this adds any
+ * additional variables found in the pattern.
+ */
+static void
+scanRPRPatternRecursive(RPRPatternNode *node, char **varNames, int *numVars,
+ int *numElements, RPRDepth depth, RPRDepth *maxDepth)
+{
+ ListCell *lc;
+ int i;
+
+ /* Pattern nodes from parser are never NULL */
+ Assert(node != NULL);
+
+ check_stack_depth();
+
+ /* Check recursion depth limit before overflow occurs */
+ if (depth >= RPR_DEPTH_MAX)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("pattern nesting too deep"),
+ errdetail("Pattern nesting depth %d exceeds maximum %d.",
+ depth, RPR_DEPTH_MAX - 1)));
+
+ /* Track maximum depth */
+ if (depth > *maxDepth)
+ *maxDepth = depth;
+
+ switch (node->nodeType)
+ {
+ case RPR_PATTERN_VAR:
+ /* Count element */
+ (*numElements)++;
+
+ /* Collect variable name if not already present */
+ for (i = 0; i < *numVars; i++)
+ {
+ if (strcmp(varNames[i], node->varName) == 0)
+ return; /* Already have this variable */
+ }
+
+ /*
+ * Variable not in DEFINE clause - this is valid per SQL standard.
+ * Such variables are implicitly TRUE. Add to varNames so they get
+ * a varId >= defineVariableList length, which executor treats as
+ * TRUE.
+ */
+ Assert(*numVars < RPR_VARID_MAX);
+ varNames[(*numVars)++] = node->varName;
+ break;
+
+ case RPR_PATTERN_SEQ:
+ /* Sequence: just recurse into children */
+ foreach(lc, node->children)
+ {
+ scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames,
+ numVars, numElements, depth, maxDepth);
+ }
+ break;
+
+ case RPR_PATTERN_GROUP:
+ /* Add BEGIN element if group has non-trivial quantifier */
+ if (node->min != 1 || node->max != 1)
+ (*numElements)++;
+
+ /* Recurse into children at increased depth */
+ foreach(lc, node->children)
+ {
+ scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames,
+ numVars, numElements, depth + 1, maxDepth);
+ }
+
+ /* Add END element if group has non-trivial quantifier */
+ if (node->min != 1 || node->max != 1)
+ (*numElements)++;
+ break;
+
+ case RPR_PATTERN_ALT:
+ /* Count ALT start element */
+ (*numElements)++;
+
+ /* Recurse into children at increased depth */
+ foreach(lc, node->children)
+ {
+ scanRPRPatternRecursive((RPRPatternNode *) lfirst(lc), varNames,
+ numVars, numElements, depth + 1, maxDepth);
+ }
+ break;
+ }
+}
+
+/*
+ * scanRPRPattern
+ * Scan pattern AST (pass 1 entry point).
+ *
+ * Collects unique variable names (appending to those from DEFINE clause),
+ * counts total elements (including FIN marker), and tracks maximum depth.
+ * Reports error if element count exceeds RPR_ELEMIDX_MAX.
+ */
+static void
+scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars,
+ int *numElements, RPRDepth *maxDepth)
+{
+ *numElements = 0;
+ *maxDepth = 0;
+
+ scanRPRPatternRecursive(node, varNames, numVars, numElements, 0, maxDepth);
+
+ (*numElements)++; /* +1 for FIN marker */
+
+ if (*numElements > RPR_ELEMIDX_MAX)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("pattern too complex"),
+ errdetail("Pattern has %d elements, maximum is %d.",
+ *numElements, RPR_ELEMIDX_MAX)));
+}
+
+/*
+ * allocateRPRPattern
+ * Allocate and initialize RPRPattern structure.
+ *
+ * Creates the pattern structure, copies variable names, and allocates
+ * the elements array. The elements array is zero-initialized.
+ */
+static RPRPattern *
+allocateRPRPattern(int numVars, int numElements, RPRDepth maxDepth,
+ char **varNamesStack)
+{
+ RPRPattern *result;
+ int i;
+
+ result = makeNode(RPRPattern);
+ result->numVars = numVars;
+ result->maxDepth = maxDepth + 1; /* +1: depth is 0-based */
+ result->numElements = numElements;
+
+ /* Copy varNames (pattern must have at least one variable) */
+ Assert(numVars > 0);
+ result->varNames = palloc(numVars * sizeof(char *));
+ for (i = 0; i < numVars; i++)
+ result->varNames[i] = pstrdup(varNamesStack[i]);
+
+ /* Allocate elements array (zero-init for reserved fields) */
+ Assert(numElements >= 2);
+ result->elements = palloc0(numElements * sizeof(RPRPatternElement));
+
+ return result;
+}
+
+/*
+ * getVarIdFromPattern
+ * Get variable ID for a variable name from RPRPattern.
+ *
+ * Returns the index of the variable in the varNames array.
+ */
+static RPRVarId
+getVarIdFromPattern(RPRPattern *pat, const char *varName)
+{
+ for (int i = 0; i < pat->numVars; i++)
+ {
+ if (strcmp(pat->varNames[i], varName) == 0)
+ return (RPRVarId) i;
+ }
+
+ /* Should not happen - variable should already be collected */
+ elog(ERROR, "pattern variable \"%s\" not found", varName);
+ pg_unreachable();
+}
+
+/*
+ * fillRPRPatternVar
+ * Fill a VAR pattern element.
+ *
+ * Returns true if this VAR is nullable (can match zero rows).
+ */
+static bool
+fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth)
+{
+ RPRPatternElement *elem = &pat->elements[*idx];
+
+ memset(elem, 0, sizeof(RPRPatternElement));
+ elem->varId = getVarIdFromPattern(pat, node->varName);
+ elem->depth = depth;
+ elem->min = node->min;
+ elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max;
+ Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF &&
+ elem->max >= 1 &&
+ (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max));
+ elem->next = RPR_ELEMIDX_INVALID;
+ elem->jump = RPR_ELEMIDX_INVALID;
+ if (node->reluctant)
+ elem->flags |= RPR_ELEM_RELUCTANT;
+ (*idx)++;
+
+ return (node->min == 0);
+}
+
+/*
+ * fillRPRPatternGroup
+ * Fill a GROUP pattern and its children.
+ *
+ * Creates elements for group content at increased depth, plus BEGIN/END
+ * marker pair if the group has a non-trivial quantifier (not {1,1}).
+ *
+ * Element layout for (A B){2,3}:
+ *
+ * [BEGIN] [A] [B] [END] [next element...]
+ * | | ^
+ * | +-- jump --+ (loop back to first child)
+ * +---- jump -------------------+ (skip to after END)
+ *
+ * BEGIN.jump points past END (skip path when count >= max or min == 0).
+ * END.jump points to the first child (loop-back path).
+ * BEGIN.next and END.next are set later by finalizeRPRPattern().
+ *
+ * Returns true if this group is nullable. A group is nullable when its
+ * min is 0 (can be skipped entirely) or its body is nullable (every path
+ * through the body can match zero rows).
+ */
+static bool
+fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth)
+{
+ ListCell *lc;
+ int groupStartIdx = *idx;
+ int beginIdx = -1;
+ bool bodyNullable = true;
+
+ /* Add BEGIN marker if group has non-trivial quantifier */
+ if (node->min != 1 || node->max != 1)
+ {
+ RPRPatternElement *elem = &pat->elements[*idx];
+
+ beginIdx = *idx;
+ memset(elem, 0, sizeof(RPRPatternElement));
+ elem->varId = RPR_VARID_BEGIN;
+ elem->depth = depth;
+ elem->min = node->min;
+ elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max;
+ Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF &&
+ elem->max >= 1 &&
+ (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max));
+ elem->next = RPR_ELEMIDX_INVALID; /* set by finalize */
+ elem->jump = RPR_ELEMIDX_INVALID; /* set after END */
+ if (node->reluctant)
+ elem->flags |= RPR_ELEM_RELUCTANT;
+ (*idx)++;
+ groupStartIdx = *idx; /* children start after BEGIN */
+ }
+
+ foreach(lc, node->children)
+ {
+ if (!fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth + 1))
+ bodyNullable = false;
+ }
+
+ /* Add group end marker if group has non-trivial quantifier */
+ if (node->min != 1 || node->max != 1)
+ {
+ RPRPatternElement *beginElem = &pat->elements[beginIdx];
+ RPRPatternElement *endElem = &pat->elements[*idx];
+
+ memset(endElem, 0, sizeof(RPRPatternElement));
+ endElem->varId = RPR_VARID_END;
+ endElem->depth = depth;
+ endElem->min = node->min;
+ endElem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max;
+ Assert(endElem->min >= 0 && endElem->min < RPR_QUANTITY_INF &&
+ endElem->max >= 1 &&
+ (endElem->max == RPR_QUANTITY_INF || endElem->min <= endElem->max));
+ endElem->next = RPR_ELEMIDX_INVALID;
+ endElem->jump = groupStartIdx; /* loop to first child */
+ if (node->reluctant)
+ endElem->flags |= RPR_ELEM_RELUCTANT;
+
+ /*
+ * If the group body is nullable (all paths can match empty), mark the
+ * END element so that nfa_advance_end can fast-forward the iteration
+ * count to min when reached via empty-match skip paths.
+ */
+ if (bodyNullable)
+ endElem->flags |= RPR_ELEM_EMPTY_LOOP;
+
+ (*idx)++;
+
+ /* Set BEGIN skip pointer (next is set by finalize) */
+ beginElem->jump = *idx; /* skip: go to after END */
+ }
+
+ return (node->min == 0 || bodyNullable);
+}
+
+/*
+ * fillRPRPatternAlt
+ * Fill an ALT pattern and its alternatives.
+ *
+ * Creates ALT_START marker, fills each alternative at increased depth,
+ * sets jump pointers for backtracking, and next pointers for successful paths.
+ *
+ * Returns true if any branch is nullable (OR semantics: one nullable
+ * branch suffices for the alternation to produce an empty match).
+ */
+static bool
+fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth)
+{
+ ListCell *lc;
+ ListCell *lc2;
+ RPRPatternElement *elem;
+ List *altBranchStarts = NIL;
+ List *altEndPositions = NIL;
+ int afterAltIdx;
+ bool anyNullable = false;
+
+ /* Add alternation start marker */
+ elem = &pat->elements[*idx];
+ memset(elem, 0, sizeof(RPRPatternElement));
+ elem->varId = RPR_VARID_ALT;
+ elem->depth = depth;
+ elem->min = 1;
+ elem->max = 1;
+ elem->next = RPR_ELEMIDX_INVALID;
+ elem->jump = RPR_ELEMIDX_INVALID;
+ (*idx)++;
+
+ /* Fill each alternative */
+ foreach(lc, node->children)
+ {
+ RPRPatternNode *alt = (RPRPatternNode *) lfirst(lc);
+ int branchStart = *idx;
+
+ altBranchStarts = lappend_int(altBranchStarts, branchStart);
+ if (fillRPRPattern(alt, pat, idx, depth + 1))
+ anyNullable = true;
+ altEndPositions = lappend_int(altEndPositions, *idx - 1);
+ }
+
+ /* Set jump on first element of each alternative to next alternative */
+ foreach(lc, altBranchStarts)
+ {
+ int firstElemIdx = lfirst_int(lc);
+
+ if (lnext(altBranchStarts, lc) != NULL)
+ pat->elements[firstElemIdx].jump = lfirst_int(lnext(altBranchStarts, lc));
+ }
+
+ /* Set next on last element of each alternative to after the alternation */
+ afterAltIdx = *idx;
+ lc2 = list_head(altBranchStarts);
+
+ foreach(lc, altEndPositions)
+ {
+ int endPos = lfirst_int(lc);
+ int branchStart = lfirst_int(lc2);
+
+ if (pat->elements[endPos].next != RPR_ELEMIDX_INVALID)
+ {
+ /*
+ * An inner ALT already set next on this element. Redirect all
+ * elements in this branch that share the same target to point to
+ * after this ALT instead.
+ */
+ int oldTarget = pat->elements[endPos].next;
+ int j;
+
+ for (j = branchStart; j <= endPos; j++)
+ {
+ if (pat->elements[j].next == oldTarget)
+ pat->elements[j].next = afterAltIdx;
+ }
+ }
+ else
+ {
+ pat->elements[endPos].next = afterAltIdx;
+ }
+
+ lc2 = lnext(altBranchStarts, lc2);
+ }
+
+ list_free(altBranchStarts);
+ list_free(altEndPositions);
+
+ return anyNullable;
+}
+
+/*
+ * fillRPRPattern
+ * Fill pattern elements array from AST (pass 2).
+ *
+ * Recursively traverses AST and populates pre-allocated elements array.
+ * Dispatches to type-specific fill functions.
+ *
+ * Returns true if the pattern is nullable (can match zero rows).
+ * For SEQ nodes, all children must be nullable (AND).
+ */
+static bool
+fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth)
+{
+ ListCell *lc;
+ bool allNullable = true;
+
+ /* Pattern nodes from parser are never NULL */
+ Assert(node != NULL);
+
+ check_stack_depth();
+
+ switch (node->nodeType)
+ {
+ case RPR_PATTERN_SEQ:
+ foreach(lc, node->children)
+ {
+ if (!fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth))
+ allNullable = false;
+ }
+ return allNullable;
+
+ case RPR_PATTERN_VAR:
+ return fillRPRPatternVar(node, pat, idx, depth);
+
+ case RPR_PATTERN_GROUP:
+ return fillRPRPatternGroup(node, pat, idx, depth);
+
+ case RPR_PATTERN_ALT:
+ return fillRPRPatternAlt(node, pat, idx, depth);
+ }
+
+ return false; /* unreachable */
+}
+
+/*
+ * finalizeRPRPattern
+ * Finalize pattern structure after filling elements.
+ *
+ * This performs:
+ * 1. Initialize absorption flag to false
+ * 2. Set up next pointers for sequential flow
+ * 3. Add FIN marker at the end
+ */
+static void
+finalizeRPRPattern(RPRPattern *result)
+{
+ int finIdx = result->numElements - 1;
+ int i;
+ RPRPatternElement *finElem;
+
+ /* Initialize absorption flag */
+ result->isAbsorbable = false;
+
+ /* Set up next pointers for elements that don't have one */
+ for (i = 0; i < finIdx; i++)
+ {
+ RPRPatternElement *elem = &result->elements[i];
+
+ if (elem->next == RPR_ELEMIDX_INVALID)
+ elem->next = (i < finIdx - 1) ? i + 1 : finIdx;
+
+ /* Verify quantifier range is valid */
+ Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF &&
+ elem->max >= 1 &&
+ (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max));
+ }
+
+ /* Add FIN marker at the end */
+ finElem = &result->elements[finIdx];
+ memset(finElem, 0, sizeof(RPRPatternElement));
+ finElem->varId = RPR_VARID_FIN;
+ finElem->depth = 0;
+ finElem->min = 1;
+ finElem->max = 1;
+ finElem->next = RPR_ELEMIDX_INVALID;
+ finElem->jump = RPR_ELEMIDX_INVALID;
+}
+
+/*-------------------------------------------------------------------------
+ * CONTEXT ABSORPTION: TWO-FLAG DESIGN
+ *-------------------------------------------------------------------------
+ *
+ * Context absorption eliminates redundant match searches by absorbing newer
+ * contexts that cannot produce longer matches than older contexts. This
+ * achieves O(n^2) -> O(n) performance improvement for patterns like A+ B.
+ *
+ * Core Insight:
+ * For pattern A+ B, if Ctx1 starts at row 0 and Ctx2 starts at row 1,
+ * both matching A continuously, Ctx1 will always have more A matches.
+ * When B finally appears, Ctx1's match (0 to current) is always longer
+ * than Ctx2's match (1 to current). So Ctx2 can be safely eliminated.
+ *
+ * Two Flags:
+ * 1. RPR_ELEM_ABSORBABLE - "Absorption judgment point"
+ * WHERE contexts can be compared for absorption.
+ * - Simple unbounded VAR (A+): the VAR element itself
+ * - Unbounded GROUP ((A B)+): the END element only
+ *
+ * 2. RPR_ELEM_ABSORBABLE_BRANCH - "Absorbable region marker"
+ * ALL elements within the absorbable region.
+ * - Used for tracking state.isAbsorbable at runtime
+ * - States leaving this region become non-absorbable permanently
+ *
+ * Why Two Flags?
+ * For pattern "(A B)+", contexts at different positions (one at A,
+ * another at B) cannot be compared - they must synchronize at END.
+ *
+ * Example: "(A B)+" with input A B A B A B...
+ * Row 0 (A): Ctx1 starts, matches A
+ * Row 1 (B): Ctx1 matches B -> END (count=1)
+ * Row 2 (A): Ctx1 loops to A, Ctx2 starts at A
+ * Row 3 (B): Ctx1 at END (count=2), Ctx2 at END (count=1)
+ * -> Both at END, comparable! Ctx1 absorbs Ctx2.
+ *
+ * Contexts synchronize at END every group-length rows. Therefore:
+ * - ABSORBABLE marks END as judgment point (where to compare)
+ * - ABSORBABLE_BRANCH keeps state.isAbsorbable=true through A->B->END
+ *
+ * Pattern Examples:
+ *
+ * Pattern: A+ B
+ * Element 0 (A): ABSORBABLE | ABSORBABLE_BRANCH <- judgment point
+ * Element 1 (B): (none)
+ * -> Compare at A every row. When contexts move to B, absorption stops.
+ *
+ * Pattern: (A B)+ C
+ * Element 0 (A): ABSORBABLE_BRANCH
+ * Element 1 (B): ABSORBABLE_BRANCH
+ * Element 2 (END): ABSORBABLE | ABSORBABLE_BRANCH <- judgment point
+ * Element 3 (C): (none)
+ * -> Compare at END every 2 rows. When contexts move to C, absorption stops.
+ *
+ * Pattern: (A+ B+)+ C
+ * Element 0 (A): ABSORBABLE | ABSORBABLE_BRANCH <- only first A+ flagged
+ * Element 1 (B): (none)
+ * Element 2 (END): (none)
+ * Element 3 (C): (none)
+ * -> Only first unbounded portion (A+) gets flags. Absorption happens
+ * at A during first iteration. After moving to B+, absorption stops.
+ *
+ * First Unbounded Portion Strategy:
+ * The algorithm only flags the FIRST unbounded portion starting from
+ * element 0. This is sufficient because:
+ * - Absorption in first portion already achieves O(n) complexity
+ * - Later portions have different synchronization characteristics
+ * - Nested unbounded patterns are too complex for simple absorption
+ * - Complex patterns (nested groups, etc.) naturally die from mismatch
+ *
+ * Runtime Usage (in nodeWindowAgg.c):
+ * - state.isAbsorbable = (previous && elem.ABSORBABLE_BRANCH)
+ * - Monotonic: once false, stays false (cannot re-enter region)
+ * - context.hasAbsorbableState: can absorb others (>=1 absorbable state)
+ * - context.allStatesAbsorbable: can be absorbed (ALL states absorbable)
+ * - Absorption check: if Ctx1.hasAbsorbable && Ctx2.allAbsorbable,
+ * compare counts at same elemIdx, absorb if Ctx1.count >= Ctx2.count
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * isFixedLengthChildren
+ * Check if all children at scopeDepth have fixed-length quantifiers
+ * (min == max), recursively for nested subgroups.
+ *
+ * A fixed-length group is semantically equivalent to unrolling each child
+ * to {1,1} copies, which is the existing Case 2 already proven correct
+ * for absorption. This check recognizes fixed-length groups at compile
+ * time without actually unrolling them.
+ *
+ * Traverses the flat element array starting at idx. For VAR elements,
+ * checks min == max. For BEGIN elements (nested subgroups), recurses
+ * into the subgroup and also checks the subgroup's END quantifier.
+ * ALT elements are rejected (alternation inside absorbable group is
+ * not supported).
+ *
+ * Returns true if all children are fixed-length, stopping at the END
+ * element at scopeDepth - 1.
+ */
+static bool
+isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, RPRDepth scopeDepth)
+{
+ RPRPatternElement *e = &pattern->elements[idx];
+
+ check_stack_depth();
+
+ while (e->depth == scopeDepth)
+ {
+ if (RPRElemIsVar(e))
+ {
+ if (e->min != e->max)
+ return false;
+ }
+ else if (RPRElemIsBegin(e))
+ {
+ RPRElemIdx childIdx = e->next;
+
+ /* Recurse into subgroup children at scopeDepth + 1 */
+ if (!isFixedLengthChildren(pattern, childIdx, scopeDepth + 1))
+ return false;
+
+ /* Advance past the subgroup to its END element */
+ e = &pattern->elements[e->next];
+ while (e->depth > scopeDepth)
+ e = &pattern->elements[e->next];
+
+ /* e is now the END at scopeDepth; check its quantifier */
+ Assert(RPRElemIsEnd(e) && e->depth == scopeDepth);
+ if (e->min != e->max)
+ return false;
+ }
+ else
+ {
+ /* ALT inside group: not supported for absorption */
+ return false;
+ }
+
+ Assert(e->next != RPR_ELEMIDX_INVALID);
+ e = &pattern->elements[e->next];
+ }
+
+ return true;
+}
+
+/*
+ * isUnboundedStart
+ * Check if the element at idx starts an unbounded greedy sequence.
+ *
+ * For context absorption to work, the sequence starting at idx must be:
+ * - Unbounded (max = infinity)
+ * - Greedy (not reluctant)
+ * - At the start of current scope
+ *
+ * Two cases are handled:
+ * 1. Simple VAR: A+ B C - A has max=INF, gets both flags
+ * 2. Unbounded GROUP with fixed-length children: (A B{2})+ C
+ * All children must have min == max (recursively for nested subgroups).
+ * This is equivalent to unrolling to {1,1} VARs, e.g., (A B B)+ C.
+ * All elements within the group get ABSORBABLE_BRANCH.
+ * Only the unbounded END gets ABSORBABLE (judgment point).
+ * Examples:
+ * (A B{2})+ C - B{2} has min==max, step=3
+ * (A (B C){2} D)+ E - nested {2} subgroup, step=6
+ * ((A (B C){2}){2})+ - doubly nested {2}, step=10
+ * (A ((B C{3}){2} D){2} E)+ F - deep nesting, step=20
+ *
+ * Returns false for patterns where absorption cannot work:
+ * - A B+ (unbounded not at start)
+ * - A+? B (reluctant quantifier)
+ * - (A | B)+ (ALT inside group)
+ * - (A B+)+ (variable-length element inside group)
+ * - (A B{2,5})+ (min != max inside group)
+ */
+static bool
+isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx)
+{
+ RPRPatternElement *elem = &pattern->elements[idx];
+ RPRDepth startDepth = elem->depth;
+ RPRPatternElement *e;
+
+ /* Case 1: Simple unbounded VAR at start (greedy only) */
+ if (RPRElemIsVar(elem) && elem->max == RPR_QUANTITY_INF &&
+ !RPRElemIsReluctant(elem))
+ {
+ /* Set both flags on first element */
+ elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE;
+ return true;
+ }
+
+ /*
+ * Case 2: Unbounded GROUP with fixed-length children. Each child must
+ * have min == max (recursively for nested subgroups), ensuring a fixed
+ * step size per iteration so that count-dominance holds.
+ */
+ if (!isFixedLengthChildren(pattern, idx, startDepth))
+ return false;
+
+ /* Find the END element at startDepth - 1 */
+ e = &pattern->elements[idx];
+ while (e->depth >= startDepth)
+ e = &pattern->elements[e->next];
+
+ /* END must be unbounded greedy */
+ if (e->depth == startDepth - 1 &&
+ RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF &&
+ !RPRElemIsReluctant(e))
+ {
+ Assert(e->jump == idx); /* END points back to first child */
+
+ /* Set ABSORBABLE_BRANCH on all children, ABSORBABLE on END only */
+ for (e = elem; !RPRElemIsEnd(e) || e->depth >= startDepth;
+ e = &pattern->elements[e->next])
+ e->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * computeAbsorbabilityRecursive
+ * Recursively check absorbability starting from given index.
+ *
+ * If the element at startIdx is ALT, recursively checks each branch independently.
+ * Each branch gets its own absorbability status, and if any branch is absorbable,
+ * the ALT element itself is marked with RPR_ELEM_ABSORBABLE_BRANCH.
+ *
+ * If BEGIN, skips to first child.
+ *
+ * Otherwise (VAR), checks if the element starts an unbounded sequence via
+ * isUnboundedStart.
+ */
+static void
+computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx,
+ bool *hasAbsorbable)
+{
+ RPRPatternElement *elem = &pattern->elements[startIdx];
+
+ check_stack_depth();
+
+ if (RPRElemIsAlt(elem))
+ {
+ /* ALT: recursively check each branch */
+ RPRElemIdx branchIdx = elem->next;
+ RPRPatternElement *branchFirst;
+ bool branchAbsorbable;
+
+ while (branchIdx != RPR_ELEMIDX_INVALID)
+ {
+ branchAbsorbable = false;
+
+ Assert(branchIdx < pattern->numElements);
+ branchFirst = &pattern->elements[branchIdx];
+
+ /* Stop if element is outside ALT scope (not a branch) */
+ if (branchFirst->depth <= elem->depth)
+ break;
+
+ /* Recursively check this branch */
+ computeAbsorbabilityRecursive(pattern, branchIdx, &branchAbsorbable);
+ if (branchAbsorbable)
+ {
+ *hasAbsorbable = true;
+ }
+
+ branchIdx = branchFirst->jump;
+ }
+
+ /* Mark ALT element if any branch is absorbable */
+ if (*hasAbsorbable)
+ elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ }
+ else if (RPRElemIsBegin(elem))
+ {
+ /*
+ * BEGIN: first try to treat this BEGIN's children as an unbounded
+ * group directly (handles nested fixed-length groups like ((A{2}
+ * B{3}){2})+). If that fails, skip to first child and recurse as
+ * before.
+ */
+ if (isUnboundedStart(pattern, elem->next))
+ {
+ *hasAbsorbable = true;
+ elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ }
+ else
+ {
+ computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable);
+
+ /* Mark BEGIN element if contents are absorbable */
+ if (*hasAbsorbable)
+ elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH;
+ }
+ }
+ else
+ {
+ /* Should never reach END - structural invariant of pattern AST */
+ Assert(!RPRElemIsEnd(elem));
+
+ /* Non-ALT, non-BEGIN: check if unbounded start */
+ if (isUnboundedStart(pattern, startIdx))
+ {
+ *hasAbsorbable = true;
+ }
+ }
+}
+
+/*
+ * computeAbsorbability
+ * Determine if pattern supports context absorption optimization.
+ *
+ * Context absorption eliminates redundant match searches by absorbing
+ * newer contexts that cannot produce longer matches than older contexts.
+ * This achieves O(n^2) -> O(n) performance improvement.
+ *
+ * Only greedy unbounded quantifiers at pattern start can be absorbable.
+ * Reluctant quantifiers are excluded because they don't maintain monotonic
+ * decrease property required for safe absorption.
+ *
+ * This function sets two flags:
+ * RPR_ELEM_ABSORBABLE: Absorption judgment point
+ * - Simple unbounded VAR: the VAR itself (e.g., A in A+)
+ * - Unbounded GROUP: the END element (e.g., END in (A B)+)
+ * RPR_ELEM_ABSORBABLE_BRANCH: All elements in absorbable region
+ * - All elements within the same scope as unbounded start
+ *
+ * Examples:
+ * A+ B C - absorbable (A gets both flags)
+ * (A B)+ C - absorbable (A,B,END get BRANCH, END gets ABSORBABLE)
+ * A B+ - NOT absorbable (unbounded not at start)
+ * A+? B C - NOT absorbable (reluctant quantifier)
+ * (A+ B+)+ - only first A+ on first iteration (nested unbounded not supported)
+ * A+ | B+ - both branches absorbable independently
+ * A+ | C D - only A+ branch absorbable (C D branch not absorbable)
+ * ((A+ B) | C) D - nested ALT: A+ branch is absorbable
+ */
+static void
+computeAbsorbability(RPRPattern *pattern)
+{
+ bool hasAbsorbable = false;
+
+ /* Parser always produces at least one element + FIN */
+ Assert(pattern->numElements >= 2);
+
+ /* Start recursion from first element */
+ computeAbsorbabilityRecursive(pattern, 0, &hasAbsorbable);
+ pattern->isAbsorbable = hasAbsorbable;
+}
+
+/*
+ * collectPatternVariablesRecursive
+ * Recursively collect variable names from pattern AST.
+ */
+static void
+collectPatternVariablesRecursive(RPRPatternNode *node, List **varNames)
+{
+ ListCell *lc;
+
+ Assert(node != NULL);
+
+ check_stack_depth();
+
+ switch (node->nodeType)
+ {
+ case RPR_PATTERN_VAR:
+ /* Add variable if not already in list */
+ foreach(lc, *varNames)
+ {
+ if (strcmp(strVal(lfirst(lc)), node->varName) == 0)
+ return; /* Already collected */
+ }
+ *varNames = lappend(*varNames, makeString(pstrdup(node->varName)));
+ break;
+
+ case RPR_PATTERN_SEQ:
+ case RPR_PATTERN_ALT:
+ case RPR_PATTERN_GROUP:
+ foreach(lc, node->children)
+ {
+ collectPatternVariablesRecursive((RPRPatternNode *) lfirst(lc),
+ varNames);
+ }
+ break;
+ }
+}
+
+/*
+ * collectPatternVariables
+ * Collect unique variable names used in PATTERN clause.
+ *
+ * Returns a list of String nodes containing variable names.
+ * Used to filter defineClause before varId assignment.
+ */
+List *
+collectPatternVariables(RPRPatternNode *pattern)
+{
+ List *varNames = NIL;
+
+ /* Caller ensures pattern is not NULL */
+ Assert(pattern != NULL);
+
+ collectPatternVariablesRecursive(pattern, &varNames);
+ return varNames;
+}
+
+/*
+ * buildDefineVariableList
+ * Build defineVariableList from defineClause.
+ *
+ * The parser already ensures that all DEFINE variables appear in PATTERN,
+ * so no filtering is needed here.
+ *
+ * Sets *defineVariableList to list of variable names (String nodes).
+ */
+void
+buildDefineVariableList(List *defineClause, List **defineVariableList)
+{
+ ListCell *lc;
+
+ *defineVariableList = NIL;
+
+ foreach(lc, defineClause)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+
+ *defineVariableList = lappend(*defineVariableList,
+ makeString(pstrdup(te->resname)));
+ }
+}
+
+/*
+ * buildRPRPattern
+ * Compile pattern AST to flat bytecode array.
+ *
+ * Compilation phases:
+ * 1. Optimize AST (flatten, merge, deduplicate)
+ * 2. Scan: collect variables, count elements (pass 1)
+ * 3. Allocate result structure
+ * 4. Fill elements from AST (pass 2)
+ * 5. Finalize pattern structure
+ * 6. Compute context absorption eligibility
+ *
+ * Called from createplan.c during plan creation.
+ */
+RPRPattern *
+buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList,
+ RPSkipTo rpSkipTo, int frameOptions,
+ bool hasMatchStartDependent)
+{
+ RPRPattern *result;
+ RPRPatternNode *optimized;
+ char *varNamesStack[RPR_VARID_MAX];
+ int numVars;
+ int numElements;
+ RPRDepth maxDepth;
+ int idx;
+ bool hasLimitedFrame;
+
+ /* Caller must check for NULL pattern before calling */
+ Assert(pattern != NULL);
+
+ /* Optimize the pattern tree */
+ optimized = optimizeRPRPattern(copyObject(pattern));
+
+ /* Collect variable names from DEFINE clause */
+ numVars = collectDefineVariables(defineVariableList, varNamesStack);
+
+ /* Scan pattern: collect variables, count elements, validate limits */
+ scanRPRPattern(optimized, varNamesStack, &numVars, &numElements, &maxDepth);
+
+ /* Allocate result structure */
+ result = allocateRPRPattern(numVars, numElements, maxDepth, varNamesStack);
+
+ /* Fill elements (pass 2) */
+ idx = 0;
+ fillRPRPattern(optimized, result, &idx, 0);
+
+ /* Finalize: set up next pointers, flags, and add FIN marker */
+ finalizeRPRPattern(result);
+
+ /*
+ * Compute context absorption eligibility. Absorption requires both
+ * structural absorbability and runtime conditions. Check runtime
+ * conditions first to avoid unnecessary pattern analysis.
+ *
+ * Runtime conditions for absorption:
+ *
+ * 1. SKIP TO PAST LAST ROW required (not SKIP TO NEXT ROW): With NEXT
+ * ROW, after each match the search resumes from the next row, so contexts
+ * are immediately discarded. No redundant contexts accumulate, making
+ * absorption unnecessary.
+ *
+ * 2. Unbounded frame end required (not ROWS with bounded end): With a
+ * bounded frame (e.g., ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING),
+ * matches may be truncated at frame boundaries. This changes the
+ * absorption semantics - older contexts don't necessarily produce longer
+ * matches when frame limits apply differently to each context.
+ */
+ hasLimitedFrame = (frameOptions & FRAMEOPTION_ROWS) &&
+ !(frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING);
+
+ if (rpSkipTo == ST_PAST_LAST_ROW && !hasLimitedFrame &&
+ !hasMatchStartDependent)
+ {
+ /* Runtime conditions met - check structural absorbability */
+ computeAbsorbability(result);
+ }
+
+ return result;
+}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index ff0e875f2a2..813a326bd78 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -214,7 +214,6 @@ static List *set_windowagg_runcondition_references(PlannerInfo *root,
static void record_elided_node(PlannerGlobal *glob, int plan_node_id,
NodeTag elided_type, Bitmapset *relids);
-
/*****************************************************************************
*
* SUBPLAN REFERENCES
@@ -2633,6 +2632,34 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
NRM_EQUAL,
NUM_EXEC_QUAL(plan));
+ /*
+ * Replace an expression tree in each DEFINE clause so that all Var
+ * nodes's varno refers to OUTER_VAR.
+ */
+ if (IsA(plan, WindowAgg))
+ {
+ WindowAgg *wplan = (WindowAgg *) plan;
+
+ if (wplan->defineClause != NIL)
+ {
+ foreach(l, wplan->defineClause)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ tle = flatCopyTargetEntry(tle);
+ tle->expr = (Expr *)
+ fix_upper_expr(root,
+ (Node *) tle->expr,
+ subplan_itlist,
+ OUTER_VAR,
+ rtoffset,
+ NRM_EQUAL,
+ NUM_EXEC_QUAL(plan));
+ lfirst(l) = tle;
+ }
+ }
+ }
+
pfree(subplan_itlist);
}
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 4424fdbe906..02898a9106b 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -2593,6 +2593,15 @@ perform_pullup_replace_vars(PlannerInfo *root,
parse->returningList = (List *)
pullup_replace_vars((Node *) parse->returningList, rvcontext);
+ foreach(lc, parse->windowClause)
+ {
+ WindowClause *wc = lfirst_node(WindowClause, lc);
+
+ if (wc->defineClause != NIL)
+ wc->defineClause = (List *)
+ pullup_replace_vars((Node *) wc->defineClause, rvcontext);
+ }
+
if (parse->onConflict)
{
parse->onConflict->onConflictSet = (List *)
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 14a1dfed2b9..fd4bdf2cb31 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -18,6 +18,7 @@
#include "access/stratnum.h"
#include "nodes/bitmapset.h"
#include "nodes/lockoptions.h"
+#include "nodes/parsenodes.h"
#include "nodes/primnodes.h"
@@ -1244,6 +1245,70 @@ typedef struct Agg
List *chain;
} Agg;
+/* ----------------
+ * Row Pattern Recognition compiled pattern types
+ * ----------------
+ */
+
+/* Type definitions for RPR pattern elements */
+typedef uint8 RPRVarId; /* pattern variable ID */
+typedef uint8 RPRElemFlags; /* element flags */
+typedef uint8 RPRDepth; /* group nesting depth */
+typedef int32 RPRQuantity; /* quantifier min/max */
+typedef int16 RPRElemIdx; /* element array index */
+
+/*
+ * RPRPatternElement - flat element for NFA pattern matching (16 bytes)
+ *
+ * Layout optimized for alignment (no padding holes):
+ * varId(1) + depth(1) + flags(1) + reserved(1) + min(4) + max(4) + next(2) + jump(2)
+ */
+typedef struct RPRPatternElement
+{
+ RPRVarId varId; /* variable ID, or special value for control */
+ RPRDepth depth; /* group nesting depth */
+ RPRElemFlags flags; /* flags (reluctant, etc.) */
+ uint8 reserved; /* reserved padding byte */
+ RPRQuantity min; /* quantifier minimum */
+ RPRQuantity max; /* quantifier maximum */
+ RPRElemIdx next; /* next element index */
+ RPRElemIdx jump; /* jump target (for ALT/GROUP) */
+} RPRPatternElement;
+
+/*
+ * RPRPattern - compiled pattern for NFA execution
+ *
+ * Requires custom copy/out/read functions due to elements array.
+ */
+typedef struct RPRPattern
+{
+ pg_node_attr(custom_copy_equal, custom_read_write)
+
+ NodeTag type; /* T_RPRPattern */
+ int numVars; /* number of pattern variables */
+ char **varNames; /* array of variable names (DEFINE order
+ * first) */
+ RPRDepth maxDepth; /* maximum group nesting depth */
+ int numElements; /* number of elements */
+ RPRPatternElement *elements; /* array of pattern elements */
+
+ /*----------------
+ * Context absorption optimization.
+ *
+ * Absorption is only safe when later matches are guaranteed to be
+ * suffixes of earlier matches. This requires simple pattern structure:
+ *
+ * Case 1: No ALT, single unbounded element (A+, (A B)+)
+ * Case 2: Top-level ALT with each branch being single unbounded (A+ | B+)
+ *
+ * Complex patterns like A B (A B)+ could theoretically be transformed to
+ * (A B){2,} for absorption, but this changes lexical order and is not
+ * implemented. Similarly, (A|B)+ cannot be absorbed because different
+ * start positions produce different match contents (not suffix relation).
+ */
+ bool isAbsorbable; /* true if pattern supports context absorption */
+} RPRPattern;
+
/* ----------------
* window aggregate node
* ----------------
@@ -1314,6 +1379,44 @@ typedef struct WindowAgg
/* nulls sort first for in_range tests? */
bool inRangeNullsFirst;
+ /* Row Pattern Recognition AFTER MATCH SKIP clause */
+ RPSkipTo rpSkipTo; /* Row Pattern Skip To type */
+
+ /* Compiled Row Pattern for NFA execution */
+ struct RPRPattern *rpPattern;
+
+ /* Row Pattern DEFINE clause (list of TargetEntry) */
+ List *defineClause;
+
+ /*
+ * Bitmapset of DEFINE variable indices whose expressions depend on
+ * match_start (contain FIRST, LAST-with-offset, or compound
+ * PREV_FIRST/NEXT_FIRST/PREV_LAST/NEXT_LAST with offset). Variables in
+ * this set require per-context re-evaluation during NFA processing.
+ */
+ Bitmapset *defineMatchStartDependent;
+
+ /*
+ * Navigation offset status and values for tuplestore mark optimization.
+ * See RPRNavOffsetKind in nodes/parsenodes.h.
+ *
+ * navMaxOffset: maximum backward reach from currentpos (contributed by
+ * PREV, LAST-with-offset, compound PREV_LAST/NEXT_LAST). Only valid when
+ * navMaxOffsetKind == RPR_NAV_OFFSET_FIXED.
+ *
+ * navFirstOffset: minimum forward offset from match_start (contributed by
+ * FIRST, compound PREV_FIRST/NEXT_FIRST). Can be negative for compound
+ * PREV_FIRST. Only valid when navFirstOffsetKind == RPR_NAV_OFFSET_FIXED
+ * and hasFirstNav == true.
+ */
+ RPRNavOffsetKind navMaxOffsetKind;
+ int64 navMaxOffset;
+
+ /* true if FIRST-based navigation (FIRST, PREV_FIRST, NEXT_FIRST) is used */
+ bool hasFirstNav;
+ RPRNavOffsetKind navFirstOffsetKind;
+ int64 navFirstOffset;
+
/*
* false for all apart from the WindowAgg that's closest to the root of
* the plan
diff --git a/src/include/optimizer/rpr.h b/src/include/optimizer/rpr.h
new file mode 100644
index 00000000000..0a14cfad79b
--- /dev/null
+++ b/src/include/optimizer/rpr.h
@@ -0,0 +1,65 @@
+/*-------------------------------------------------------------------------
+ *
+ * rpr.h
+ * Row Pattern Recognition pattern compilation for planner
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/optimizer/rpr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef OPTIMIZER_RPR_H
+#define OPTIMIZER_RPR_H
+
+#include "nodes/parsenodes.h"
+#include "nodes/plannodes.h"
+
+/* Limits and special values */
+/*
+ * Maximum number of unique pattern variables (varId 0 to RPR_VARID_MAX - 1).
+ * Values from RPR_VARID_BEGIN (252) onward are reserved for control elements.
+ */
+#define RPR_VARID_MAX 251
+#define RPR_QUANTITY_INF INT32_MAX /* unbounded quantifier */
+#define RPR_COUNT_MAX INT32_MAX /* max runtime count (NFA state) */
+#define RPR_ELEMIDX_MAX INT16_MAX /* max pattern elements */
+#define RPR_ELEMIDX_INVALID ((RPRElemIdx) -1) /* invalid index */
+#define RPR_DEPTH_MAX (UINT8_MAX - 1) /* max pattern nesting depth: 254 */
+#define RPR_DEPTH_NONE UINT8_MAX /* no enclosing group (top-level) */
+
+/* Special varId values for control elements (252-255) */
+#define RPR_VARID_BEGIN ((RPRVarId) 252) /* group begin */
+#define RPR_VARID_END ((RPRVarId) 253) /* group end */
+#define RPR_VARID_ALT ((RPRVarId) 254) /* alternation start */
+#define RPR_VARID_FIN ((RPRVarId) 255) /* pattern finish */
+
+/* Element flags */
+#define RPR_ELEM_RELUCTANT 0x01 /* reluctant (non-greedy)
+ * quantifier */
+#define RPR_ELEM_EMPTY_LOOP 0x02 /* END: group body can produce
+ * empty match */
+#define RPR_ELEM_ABSORBABLE_BRANCH 0x04 /* element in absorbable region */
+#define RPR_ELEM_ABSORBABLE 0x08 /* absorption judgment point */
+
+/* Accessor macros for RPRPatternElement */
+#define RPRElemIsReluctant(e) (((e)->flags & RPR_ELEM_RELUCTANT) != 0)
+#define RPRElemCanEmptyLoop(e) (((e)->flags & RPR_ELEM_EMPTY_LOOP) != 0)
+#define RPRElemIsAbsorbableBranch(e) (((e)->flags & RPR_ELEM_ABSORBABLE_BRANCH) != 0)
+#define RPRElemIsAbsorbable(e) (((e)->flags & RPR_ELEM_ABSORBABLE) != 0)
+#define RPRElemIsVar(e) ((e)->varId <= RPR_VARID_MAX)
+#define RPRElemIsBegin(e) ((e)->varId == RPR_VARID_BEGIN)
+#define RPRElemIsEnd(e) ((e)->varId == RPR_VARID_END)
+#define RPRElemIsAlt(e) ((e)->varId == RPR_VARID_ALT)
+#define RPRElemIsFin(e) ((e)->varId == RPR_VARID_FIN)
+#define RPRElemCanSkip(e) ((e)->min == 0)
+
+extern List *collectPatternVariables(RPRPatternNode *pattern);
+extern void buildDefineVariableList(List *defineClause,
+ List **defineVariableList);
+extern RPRPattern *buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList,
+ RPSkipTo rpSkipTo, int frameOptions,
+ bool hasMatchStartDependent);
+
+#endif /* OPTIMIZER_RPR_H */
--
2.43.0