v35-0001-Row-pattern-recognition-patch-for-raw-parser.patch

application/octet-stream

Filename: v35-0001-Row-pattern-recognition-patch-for-raw-parser.patch
Type: application/octet-stream
Part: 0
Message: Re: Row pattern recognition
From 3048f88fd4bffecddcdec394843e5da6b13a22f6 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Tue, 18 Nov 2025 11:22:15 +0900
Subject: [PATCH v35 1/8] Row pattern recognition patch for raw parser.

---
 src/backend/parser/gram.y       | 123 +++++++++++++++++++++++++++-----
 src/include/nodes/parsenodes.h  |  47 ++++++++++++
 src/include/parser/kwlist.h     |   5 ++
 src/include/parser/parse_node.h |   1 +
 4 files changed, 160 insertions(+), 16 deletions(-)

diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c3a0a354a9c..fccc26964a0 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -679,6 +679,14 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				json_object_constructor_null_clause_opt
 				json_array_constructor_null_clause_opt
 
+%type <target>	row_pattern_definition
+%type <node>	opt_row_pattern_common_syntax
+				row_pattern_term
+%type <list>	row_pattern_definition_list
+				row_pattern
+%type <ival>	opt_row_pattern_skip_to
+%type <boolean>	opt_row_pattern_initial_or_seek
+
 /*
  * Non-keyword token types.  These are hard-wired into the "flex" lexer.
  * They must be listed first so that their numeric codes do not depend on
@@ -721,7 +729,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
 
 	DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
-	DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC
+	DEFERRABLE DEFERRED DEFINE DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC
 	DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P
 	DOUBLE_P DROP
 
@@ -737,7 +745,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	HANDLER HAVING HEADER_P HOLD HOUR_P
 
 	IDENTITY_P IF_P IGNORE_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
-	INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
+	INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIAL INITIALLY INLINE_P
 	INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
 	INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
 
@@ -762,8 +770,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	ORDER ORDINALITY OTHERS OUT_P OUTER_P
 	OVER OVERLAPS OVERLAY OVERRIDING OWNED OWNER
 
-	PARALLEL PARAMETER PARSER PARTIAL PARTITION PASSING PASSWORD PATH
-	PERIOD PLACING PLAN PLANS POLICY
+	PARALLEL PARAMETER PARSER PARTIAL PARTITION PASSING PASSWORD PAST PATH
+	PATTERN_P PERIOD PLACING PLAN PLANS POLICY
 	POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY
 	PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROCEDURES PROGRAM PUBLICATION
 
@@ -774,7 +782,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	RESET RESPECT_P RESTART RESTRICT RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP
 	ROUTINE ROUTINES ROW ROWS RULE
 
-	SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SELECT
+	SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SEEK SELECT
 	SEQUENCE SEQUENCES
 	SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW
 	SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SOURCE SQL_P STABLE STANDALONE_P
@@ -857,8 +865,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  * reference point for a precedence level that we can assign to other
  * keywords that lack a natural precedence level.
  *
- * We need to do this for PARTITION, RANGE, ROWS, and GROUPS to support
- * opt_existing_window_name (see comment there).
+ * We need to do this for PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL,
+ * SEEK, PATTERN_P to support opt_existing_window_name (see comment there).
  *
  * The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING
  * are even messier: since UNBOUNDED is an unreserved keyword (per spec!),
@@ -888,6 +896,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %nonassoc	UNBOUNDED NESTED /* ideally would have same precedence as IDENT */
 %nonassoc	IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP
 			SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH
+			AFTER INITIAL SEEK PATTERN_P
 %left		Op OPERATOR		/* multi-character ops and user-defined operators */
 %left		'+' '-'
 %left		'*' '/' '%'
@@ -16568,7 +16577,8 @@ over_clause: OVER window_specification
 		;
 
 window_specification: '(' opt_existing_window_name opt_partition_clause
-						opt_sort_clause opt_frame_clause ')'
+						opt_sort_clause opt_frame_clause
+						opt_row_pattern_common_syntax ')'
 				{
 					WindowDef  *n = makeNode(WindowDef);
 
@@ -16580,20 +16590,21 @@ window_specification: '(' opt_existing_window_name opt_partition_clause
 					n->frameOptions = $5->frameOptions;
 					n->startOffset = $5->startOffset;
 					n->endOffset = $5->endOffset;
+					n->rpCommonSyntax = (RPCommonSyntax *)$6;
 					n->location = @1;
 					$$ = n;
 				}
 		;
 
 /*
- * If we see PARTITION, RANGE, ROWS or GROUPS as the first token after the '('
- * of a window_specification, we want the assumption to be that there is
- * no existing_window_name; but those keywords are unreserved and so could
- * be ColIds.  We fix this by making them have the same precedence as IDENT
- * and giving the empty production here a slightly higher precedence, so
- * that the shift/reduce conflict is resolved in favor of reducing the rule.
- * These keywords are thus precluded from being an existing_window_name but
- * are not reserved for any other purpose.
+ * If we see PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, SEEK or PATTERN_P
+ * as the first token after the '(' of a window_specification, we want the
+ * assumption to be that there is no existing_window_name; but those keywords
+ * are unreserved and so could be ColIds.  We fix this by making them have the
+ * same precedence as IDENT and giving the empty production here a slightly
+ * higher precedence, so that the shift/reduce conflict is resolved in favor
+ * of reducing the rule.  These keywords are thus precluded from being an
+ * existing_window_name but are not reserved for any other purpose.
  */
 opt_existing_window_name: ColId						{ $$ = $1; }
 			| /*EMPTY*/				%prec Op		{ $$ = NULL; }
@@ -16762,6 +16773,76 @@ opt_window_exclusion_clause:
 			| /*EMPTY*/				{ $$ = 0; }
 		;
 
+opt_row_pattern_common_syntax:
+opt_row_pattern_skip_to opt_row_pattern_initial_or_seek
+				PATTERN_P '(' row_pattern ')'
+				DEFINE row_pattern_definition_list
+			{
+				RPCommonSyntax *n = makeNode(RPCommonSyntax);
+				n->rpSkipTo = $1;
+				n->initial = $2;
+				n->rpPatterns = $5;
+				n->rpDefs = $8;
+				$$ = (Node *) n;
+			}
+			| /*EMPTY*/		{ $$ = NULL; }
+		;
+
+opt_row_pattern_skip_to:
+			AFTER MATCH SKIP TO NEXT ROW
+				{
+					$$ = ST_NEXT_ROW;
+				}
+			| AFTER MATCH SKIP PAST LAST_P ROW
+				{
+					$$ = ST_PAST_LAST_ROW;
+				}
+			| /*EMPTY*/
+				{
+					$$ = ST_PAST_LAST_ROW;
+				}
+		;
+
+opt_row_pattern_initial_or_seek:
+			INITIAL		{ $$ = true; }
+			| SEEK
+				{
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("SEEK is not supported"),
+							 errhint("Use INITIAL instead."),
+							 parser_errposition(@1)));
+				}
+			| /*EMPTY*/		{ $$ = true; }
+		;
+
+row_pattern:
+			row_pattern_term					{ $$ = list_make1($1); }
+			| row_pattern row_pattern_term		{ $$ = lappend($1, $2); }
+		;
+
+row_pattern_term:
+			ColId	{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "", (Node *)makeString($1), NULL, @1); }
+			| ColId '*'	{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "*", (Node *)makeString($1), NULL, @1); }
+			| ColId '+'	{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", (Node *)makeString($1), NULL, @1); }
+			| ColId '?'	{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "?", (Node *)makeString($1), NULL, @1); }
+		;
+
+row_pattern_definition_list:
+			row_pattern_definition										{ $$ = list_make1($1); }
+			| row_pattern_definition_list ',' row_pattern_definition	{ $$ = lappend($1, $3); }
+		;
+
+row_pattern_definition:
+			ColId AS a_expr
+				{
+					$$ = makeNode(ResTarget);
+					$$->name = $1;
+					$$->indirection = NIL;
+					$$->val = (Node *) $3;
+					$$->location = @1;
+				}
+		;
 
 /*
  * Supporting nonterminals for expressions.
@@ -17966,6 +18047,7 @@ unreserved_keyword:
 			| INDEXES
 			| INHERIT
 			| INHERITS
+			| INITIAL
 			| INLINE_P
 			| INPUT_P
 			| INSENSITIVE
@@ -18040,7 +18122,9 @@ unreserved_keyword:
 			| PARTITION
 			| PASSING
 			| PASSWORD
+			| PAST
 			| PATH
+			| PATTERN_P
 			| PERIOD
 			| PLAN
 			| PLANS
@@ -18094,6 +18178,7 @@ unreserved_keyword:
 			| SEARCH
 			| SECOND_P
 			| SECURITY
+			| SEEK
 			| SEQUENCE
 			| SEQUENCES
 			| SERIALIZABLE
@@ -18317,6 +18402,7 @@ reserved_keyword:
 			| CURRENT_USER
 			| DEFAULT
 			| DEFERRABLE
+			| DEFINE
 			| DESC
 			| DISTINCT
 			| DO
@@ -18480,6 +18566,7 @@ bare_label_keyword:
 			| DEFAULTS
 			| DEFERRABLE
 			| DEFERRED
+			| DEFINE
 			| DEFINER
 			| DELETE_P
 			| DELIMITER
@@ -18558,6 +18645,7 @@ bare_label_keyword:
 			| INDEXES
 			| INHERIT
 			| INHERITS
+			| INITIAL
 			| INITIALLY
 			| INLINE_P
 			| INNER_P
@@ -18670,7 +18758,9 @@ bare_label_keyword:
 			| PARTITION
 			| PASSING
 			| PASSWORD
+			| PAST
 			| PATH
+			| PATTERN_P
 			| PERIOD
 			| PLACING
 			| PLAN
@@ -18729,6 +18819,7 @@ bare_label_keyword:
 			| SCROLL
 			| SEARCH
 			| SECURITY
+			| SEEK
 			| SELECT
 			| SEQUENCE
 			| SEQUENCES
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index d14294a4ece..9e17abbaec5 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -578,6 +578,31 @@ typedef struct SortBy
 	ParseLoc	location;		/* operator location, or -1 if none/unknown */
 } SortBy;
 
+/*
+ * AFTER MATCH row pattern skip to types in row pattern common syntax
+ */
+typedef enum RPSkipTo
+{
+	ST_NONE,					/* AFTER MATCH omitted */
+	ST_NEXT_ROW,				/* SKIP TO NEXT ROW */
+	ST_PAST_LAST_ROW,			/* SKIP TO PAST LAST ROW */
+} RPSkipTo;
+
+/*
+ * RowPatternCommonSyntax - raw representation of row pattern common syntax
+ *
+ */
+typedef struct RPCommonSyntax
+{
+	NodeTag		type;
+	RPSkipTo	rpSkipTo;		/* Row Pattern AFTER MATCH SKIP type */
+	bool		initial;		/* true if <row pattern initial or seek> is
+								 * initial */
+	List	   *rpPatterns;		/* PATTERN variables (list of A_Expr) */
+	List	   *rpDefs;			/* row pattern definitions clause (list of
+								 * ResTarget) */
+} RPCommonSyntax;
+
 /*
  * WindowDef - raw representation of WINDOW and OVER clauses
  *
@@ -593,6 +618,7 @@ typedef struct WindowDef
 	char	   *refname;		/* referenced window name, if any */
 	List	   *partitionClause;	/* PARTITION BY expression list */
 	List	   *orderClause;	/* ORDER BY (list of SortBy) */
+	RPCommonSyntax *rpCommonSyntax; /* row pattern common syntax */
 	int			frameOptions;	/* frame_clause options, see below */
 	Node	   *startOffset;	/* expression for starting bound, if any */
 	Node	   *endOffset;		/* expression for ending bound, if any */
@@ -1561,6 +1587,11 @@ typedef struct GroupingSet
  * the orderClause might or might not be copied (see copiedOrder); the framing
  * options are never copied, per spec.
  *
+ * "defineClause" is Row Pattern Recognition DEFINE clause (list of
+ * TargetEntry). TargetEntry.resname represents row pattern definition
+ * variable name. "patternVariable" and "patternRegexp" represents PATTERN
+ * clause.
+ *
  * The information relevant for the query jumbling is the partition clause
  * type and its bounds.
  */
@@ -1590,6 +1621,22 @@ typedef struct WindowClause
 	Index		winref;			/* ID referenced by window functions */
 	/* did we copy orderClause from refname? */
 	bool		copiedOrder pg_node_attr(query_jumble_ignore);
+	/* Row Pattern AFTER MACH SKIP clause */
+	RPSkipTo	rpSkipTo;		/* Row Pattern Skip To type */
+	bool		initial;		/* true if <row pattern initial or seek> is
+								 * initial */
+	/* Row Pattern DEFINE clause (list of TargetEntry) */
+	List	   *defineClause;
+	/* Row Pattern DEFINE variable initial names (list of String) */
+	List	   *defineInitial;
+	/* Row Pattern PATTERN variable name (list of String) */
+	List	   *patternVariable;
+
+	/*
+	 * Row Pattern PATTERN regular expression quantifier ('+' or ''. list of
+	 * String)
+	 */
+	List	   *patternRegexp;
 } WindowClause;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 5d4fe27ef96..7c60b9b44a8 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -129,6 +129,7 @@ PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("define", DEFINE, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("delete", DELETE_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD, BARE_LABEL)
@@ -217,6 +218,7 @@ PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("initial", INITIAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
@@ -341,7 +343,9 @@ PG_KEYWORD("partial", PARTIAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("past", PAST, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("path", PATH, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("pattern", PATTERN_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("period", PERIOD, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("plan", PLAN, UNRESERVED_KEYWORD, BARE_LABEL)
@@ -404,6 +408,7 @@ PG_KEYWORD("scroll", SCROLL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("search", SEARCH, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("second", SECOND_P, UNRESERVED_KEYWORD, AS_LABEL)
 PG_KEYWORD("security", SECURITY, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("seek", SEEK, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("select", SELECT, RESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("sequence", SEQUENCE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("sequences", SEQUENCES, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index f7d07c84542..d286a8b7783 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -51,6 +51,7 @@ typedef enum ParseExprKind
 	EXPR_KIND_WINDOW_FRAME_RANGE,	/* window frame clause with RANGE */
 	EXPR_KIND_WINDOW_FRAME_ROWS,	/* window frame clause with ROWS */
 	EXPR_KIND_WINDOW_FRAME_GROUPS,	/* window frame clause with GROUPS */
+	EXPR_KIND_RPR_DEFINE,		/* DEFINE */
 	EXPR_KIND_SELECT_TARGET,	/* SELECT target list item */
 	EXPR_KIND_INSERT_TARGET,	/* INSERT target list item */
 	EXPR_KIND_UPDATE_SOURCE,	/* UPDATE assignment source item */
-- 
2.43.0