nocfbot-0018-Clarify-execRPR.c-comments-and-tighten-an-Assert-.txt

text/plain

Filename: nocfbot-0018-Clarify-execRPR.c-comments-and-tighten-an-Assert-.txt
Type: text/plain
Part: 18
Message: Re: Row pattern recognition
From 71ee26f3954ebebb9c22aa2d279544edaa0a4d4f Mon Sep 17 00:00:00 2001
From: Henson Choi <assam258@gmail.com>
Date: Wed, 27 May 2026 13:44:55 +0900
Subject: [PATCH 18/26] Clarify execRPR.c comments and tighten an Assert per
 Jian He's review

- nfa_advance_var: add Assert that elem->next is within bounds, as
  any reachable VAR's next pointer must be a valid index.
- nfa_advance: document the state->next reset point as the boundary
  contract for the epsilon-expansion DFS, naming the other linking
  site (nfa_add_state_unique) as the pair.
- nfa_add_state_unique: back-reference the asymmetric visited-marking
  scheme, whose primary explanation sits in nfa_advance_state.
- nfa_states_equal: rewrite the compareDepth comment to explain both
  the +1 slot arithmetic and why deeper slots are excluded.
- nfa_advance_begin: replace the misleading "Greedy: enter first,
  skip second" label with one that covers both greedy-optional and
  non-nullable cases.
- nfa_advance_alt: explain when the depth break actually fires
  (quantified-group last branch) and why <= is the correct relation.
- ExecRPRFinalizeAllContexts: reframe as the partition-end
  classification policy holder, enumerating the three context shapes
  that survive into Finalize and how each is handled.
---
 src/backend/executor/execRPR.c | 75 ++++++++++++++++++++++++++++++----
 1 file changed, 68 insertions(+), 7 deletions(-)

diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c
index e1caa7bb528..261e1209744 100644
--- a/src/backend/executor/execRPR.c
+++ b/src/backend/executor/execRPR.c
@@ -311,9 +311,19 @@ nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2)
 	if (s1->elemIdx != s2->elemIdx)
 		return false;
 
-	/* Compare counts up to current element's depth */
+	/*
+	 * Compare counts up to current element's depth.  Two states sharing
+	 * elemIdx are equivalent iff every enclosing-or-current depth count
+	 * matches.
+	 *
+	 * The +1 is the slot arithmetic: comparing through depth N requires
+	 * counts[0..N], i.e., N+1 entries.  Deeper slots (counts[d] with d >
+	 * elem->depth) are excluded because they hold scratch state from inner
+	 * groups that gets zeroed on re-entry (see END loop-back in
+	 * nfa_advance_end), and so must not participate in equivalence judgment.
+	 */
 	elem = &pattern->elements[s1->elemIdx];
-	compareDepth = elem->depth + 1; /* depth 0 needs 1 count, etc. */
+	compareDepth = elem->depth + 1;
 
 	if (memcmp(s1->counts, s2->counts, sizeof(int32) * compareDepth) != 0)
 		return false;
@@ -334,7 +344,12 @@ nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *
 	RPRNFAState *s;
 	RPRNFAState *tail = NULL;
 
-	/* Mark VAR in visited before duplicate check to prevent DFS loops */
+	/*
+	 * Mark VAR in visited before duplicate check to prevent DFS loops. This
+	 * is the deferred half of the asymmetric visited-marking scheme; see
+	 * nfa_advance_state for the non-VAR (END/ALT/BEGIN/FIN) half and the
+	 * rationale for the asymmetry.
+	 */
 	nfa_mark_visited(winstate, state->elemIdx);
 
 	/* Check for duplicate and find tail */
@@ -950,7 +965,16 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx,
 		RPRPatternElement *altElem = &elements[altIdx];
 		RPRNFAState *newState;
 
-		/* Stop if element is outside ALT scope (not a branch) */
+		/*
+		 * Stop if element is outside ALT scope (not a branch).  The check
+		 * fires when the last branch is a quantified group whose BEGIN.jump
+		 * (set by fillRPRPatternGroup) is preserved -- not overridden by
+		 * fillRPRPatternAlt, which only links non-last branch heads -- and
+		 * leads to a post-ALT element.  Other branch shapes terminate the
+		 * walk earlier via altIdx = RPR_ELEMIDX_INVALID.  Use <=, not <: the
+		 * post-ALT element may sit at the same depth as the ALT when the ALT
+		 * has a sibling at that level.
+		 */
 		if (altElem->depth <= elem->depth)
 			break;
 
@@ -1016,7 +1040,12 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx,
 	}
 	else
 	{
-		/* Greedy: enter first, skip second */
+		/*
+		 * Greedy-or-non-nullable: route to the first child.  For optional
+		 * groups (skipState != NULL, greedy min=0) additionally create the
+		 * skip path; for non-nullable groups (skipState == NULL, min>0) the
+		 * skip-path action is suppressed by the guard below.
+		 */
 		state->elemIdx = elem->next;
 		nfa_route_to_elem(winstate, ctx, state,
 						  &elements[state->elemIdx], currentPos);
@@ -1205,6 +1234,9 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx,
 	/* After a successful match, count >= 1, so at least one must be true */
 	Assert(canLoop || canExit);
 
+	/* elem->next must be a valid index for any reachable VAR */
+	Assert(elem->next >= 0 && elem->next < pattern->numElements);
+
 	if (canLoop && canExit)
 	{
 		/*
@@ -1428,6 +1460,15 @@ nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos)
 
 		state = states;
 		states = states->next;
+
+		/*
+		 * Boundary contract: state->next is reset to NULL here, before
+		 * crossing into nfa_advance_state's epsilon-expansion DFS.  The inner
+		 * branches (nfa_advance_var, nfa_advance_begin/end/alt) treat
+		 * state->next as already-NULL and don't reset it themselves; the
+		 * other linking site is nfa_add_state_unique, which sets it when
+		 * appending to ctx->states.
+		 */
 		state->next = NULL;
 
 		nfa_advance_state(winstate, ctx, state, currentPos);
@@ -1779,8 +1820,28 @@ ExecRPRCleanupDeadContexts(WindowAggState *winstate, RPRNFAContext *excludeCtx)
 /*
  * ExecRPRFinalizeAllContexts
  *
- * Finalize all active contexts when partition ends.
- * Match with NULL to force mismatch, then advance to process epsilon transitions.
+ * Partition-end classification policy: kill any VAR states still pursuing
+ * when rows run out, so cleanup sees a uniform ctx->states == NULL across
+ * every context.  By the time this runs, all genuine FIN reaches have
+ * already been recorded in-flight; three shapes survive here:
+ *
+ *   - Pure pursuit (matchedState == NULL): VAR states waiting for input
+ *     that never arrives (e.g., A+ B mid-pattern at partition end).
+ *   - Empty-match candidate + pursuit (matchedState != NULL,
+ *     matchEndRow < matchStartRow): initial-advance FIN-via-skip recorded
+ *     an empty match while VAR states are still chasing a longer one
+ *     (e.g., greedy A*).
+ *   - Real match + pursuit (matchedState != NULL,
+ *     matchEndRow >= matchStartRow): a match has been recorded and VAR
+ *     states are still looping for a longer one.
+ *
+ * Killing the VAR reclassifies the first two as failures in cleanup
+ * (otherwise they linger without contributing to stats).  The third is
+ * stat-neutral -- cleanup skips it either way -- but goes through the
+ * same uniform path so partition-end classification stays centralized.
+ *
+ * Implementation: nfa_match with NULL forces VAR mismatch; nfa_advance
+ * then drains any remaining epsilon transitions.
  */
 void
 ExecRPRFinalizeAllContexts(WindowAggState *winstate, int64 lastPos)
-- 
2.50.1 (Apple Git-155)