From b2562bad97c2bfd2b9b649f3df9ee35a46b724f9 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 23 Dec 2025 22:21:47 +0900 Subject: [PATCH v36 1/8] Row pattern recognition patch for raw parser. --- src/backend/parser/gram.y | 137 ++++++++++++++++++++++++++++---- src/include/nodes/parsenodes.h | 47 +++++++++++ src/include/parser/kwlist.h | 5 ++ src/include/parser/parse_node.h | 1 + 4 files changed, 174 insertions(+), 16 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 28f4e11e30f..99c27dc178c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -682,6 +682,14 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); json_object_constructor_null_clause_opt json_array_constructor_null_clause_opt +%type row_pattern_definition +%type opt_row_pattern_common_syntax + row_pattern_term +%type row_pattern_definition_list + row_pattern +%type opt_row_pattern_skip_to +%type opt_row_pattern_initial_or_seek + /* * Non-keyword token types. These are hard-wired into the "flex" lexer. * They must be listed first so that their numeric codes do not depend on @@ -724,7 +732,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS - DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC + DEFERRABLE DEFERRED DEFINE DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP @@ -740,7 +748,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P IDENTITY_P IF_P IGNORE_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE - INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P + INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIAL INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -765,8 +773,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ORDER ORDINALITY OTHERS OUT_P OUTER_P OVER OVERLAPS OVERLAY OVERRIDING OWNED OWNER - PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PATH - PERIOD PLACING PLAN PLANS POLICY + PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PAST PATH + PATTERN_P PERIOD PLACING PLAN PLANS POLICY POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROCEDURES PROGRAM PUBLICATION @@ -777,7 +785,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RESET RESPECT_P RESTART RESTRICT RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROUTINE ROUTINES ROW ROWS RULE - SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SELECT + SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SEEK SELECT SEQUENCE SEQUENCES SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SPLIT SOURCE SQL_P STABLE STANDALONE_P @@ -860,8 +868,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); * reference point for a precedence level that we can assign to other * keywords that lack a natural precedence level. * - * We need to do this for PARTITION, RANGE, ROWS, and GROUPS to support - * opt_existing_window_name (see comment there). + * We need to do this for PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, + * SEEK, PATTERN_P to support opt_existing_window_name (see comment there). * * The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING * are even messier: since UNBOUNDED is an unreserved keyword (per spec!), @@ -891,6 +899,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */ %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH + AFTER INITIAL SEEK PATTERN_P %left Op OPERATOR /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' @@ -16620,7 +16629,8 @@ over_clause: OVER window_specification ; window_specification: '(' opt_existing_window_name opt_partition_clause - opt_sort_clause opt_frame_clause ')' + opt_sort_clause opt_frame_clause + opt_row_pattern_common_syntax ')' { WindowDef *n = makeNode(WindowDef); @@ -16632,20 +16642,21 @@ window_specification: '(' opt_existing_window_name opt_partition_clause n->frameOptions = $5->frameOptions; n->startOffset = $5->startOffset; n->endOffset = $5->endOffset; + n->rpCommonSyntax = (RPCommonSyntax *)$6; n->location = @1; $$ = n; } ; /* - * If we see PARTITION, RANGE, ROWS or GROUPS as the first token after the '(' - * of a window_specification, we want the assumption to be that there is - * no existing_window_name; but those keywords are unreserved and so could - * be ColIds. We fix this by making them have the same precedence as IDENT - * and giving the empty production here a slightly higher precedence, so - * that the shift/reduce conflict is resolved in favor of reducing the rule. - * These keywords are thus precluded from being an existing_window_name but - * are not reserved for any other purpose. + * If we see PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, SEEK or PATTERN_P + * as the first token after the '(' of a window_specification, we want the + * assumption to be that there is no existing_window_name; but those keywords + * are unreserved and so could be ColIds. We fix this by making them have the + * same precedence as IDENT and giving the empty production here a slightly + * higher precedence, so that the shift/reduce conflict is resolved in favor + * of reducing the rule. These keywords are thus precluded from being an + * existing_window_name but are not reserved for any other purpose. */ opt_existing_window_name: ColId { $$ = $1; } | /*EMPTY*/ %prec Op { $$ = NULL; } @@ -16814,6 +16825,90 @@ opt_window_exclusion_clause: | /*EMPTY*/ { $$ = 0; } ; +opt_row_pattern_common_syntax: +opt_row_pattern_skip_to opt_row_pattern_initial_or_seek + PATTERN_P '(' row_pattern ')' + DEFINE row_pattern_definition_list + { + RPCommonSyntax *n = makeNode(RPCommonSyntax); + n->rpSkipTo = $1; + n->initial = $2; + n->rpPatterns = $5; + n->rpDefs = $8; + $$ = (Node *) n; + } + | /*EMPTY*/ { $$ = NULL; } + ; + +opt_row_pattern_skip_to: + AFTER MATCH SKIP TO NEXT ROW + { + $$ = ST_NEXT_ROW; + } + | AFTER MATCH SKIP PAST LAST_P ROW + { + $$ = ST_PAST_LAST_ROW; + } + | /*EMPTY*/ + { + $$ = ST_PAST_LAST_ROW; + } + ; + +opt_row_pattern_initial_or_seek: + INITIAL { $$ = true; } + | SEEK + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SEEK is not supported"), + errhint("Use INITIAL instead."), + parser_errposition(@1))); + } + | /*EMPTY*/ { $$ = true; } + ; + +row_pattern: + row_pattern_term { $$ = list_make1($1); } + | row_pattern row_pattern_term { $$ = lappend($1, $2); } + ; + +row_pattern_term: + ColId { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "", (Node *)makeString($1), NULL, @1); } + | ColId '*' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "*", (Node *)makeString($1), NULL, @1); } + | ColId '+' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", (Node *)makeString($1), NULL, @1); } +/* + * '?' quantifier. We cannot write this directly "ColId '?'" because '?' is + * not a "self" token. So we let '?' matches Op first then check if it's '?' + * or not. + */ + | ColId Op + { + if (strcmp("?", $2)) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@2)); + + $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "?", (Node *)makeString($1), NULL, @1); + } + ; + +row_pattern_definition_list: + row_pattern_definition { $$ = list_make1($1); } + | row_pattern_definition_list ',' row_pattern_definition { $$ = lappend($1, $3); } + ; + +row_pattern_definition: + ColId AS a_expr + { + $$ = makeNode(ResTarget); + $$->name = $1; + $$->indirection = NIL; + $$->val = (Node *) $3; + $$->location = @1; + } + ; /* * Supporting nonterminals for expressions. @@ -17953,6 +18048,7 @@ unreserved_keyword: | DECLARE | DEFAULTS | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -18018,6 +18114,7 @@ unreserved_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INLINE_P | INPUT_P | INSENSITIVE @@ -18093,7 +18190,9 @@ unreserved_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLAN | PLANS @@ -18147,6 +18246,7 @@ unreserved_keyword: | SEARCH | SECOND_P | SECURITY + | SEEK | SEQUENCE | SEQUENCES | SERIALIZABLE @@ -18534,6 +18634,7 @@ bare_label_keyword: | DEFAULTS | DEFERRABLE | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -18612,6 +18713,7 @@ bare_label_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INITIALLY | INLINE_P | INNER_P @@ -18725,7 +18827,9 @@ bare_label_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLACING | PLAN @@ -18784,6 +18888,7 @@ bare_label_keyword: | SCROLL | SEARCH | SECURITY + | SEEK | SELECT | SEQUENCE | SEQUENCES diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index bc7adba4a0f..21ba207bb88 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -578,6 +578,31 @@ typedef struct SortBy ParseLoc location; /* operator location, or -1 if none/unknown */ } SortBy; +/* + * AFTER MATCH row pattern skip to types in row pattern common syntax + */ +typedef enum RPSkipTo +{ + ST_NONE, /* AFTER MATCH omitted */ + ST_NEXT_ROW, /* SKIP TO NEXT ROW */ + ST_PAST_LAST_ROW, /* SKIP TO PAST LAST ROW */ +} RPSkipTo; + +/* + * RowPatternCommonSyntax - raw representation of row pattern common syntax + * + */ +typedef struct RPCommonSyntax +{ + NodeTag type; + RPSkipTo rpSkipTo; /* Row Pattern AFTER MATCH SKIP type */ + bool initial; /* true if is + * initial */ + List *rpPatterns; /* PATTERN variables (list of A_Expr) */ + List *rpDefs; /* row pattern definitions clause (list of + * ResTarget) */ +} RPCommonSyntax; + /* * WindowDef - raw representation of WINDOW and OVER clauses * @@ -593,6 +618,7 @@ typedef struct WindowDef char *refname; /* referenced window name, if any */ List *partitionClause; /* PARTITION BY expression list */ List *orderClause; /* ORDER BY (list of SortBy) */ + RPCommonSyntax *rpCommonSyntax; /* row pattern common syntax */ int frameOptions; /* frame_clause options, see below */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -1587,6 +1613,11 @@ typedef struct GroupingSet * the orderClause might or might not be copied (see copiedOrder); the framing * options are never copied, per spec. * + * "defineClause" is Row Pattern Recognition DEFINE clause (list of + * TargetEntry). TargetEntry.resname represents row pattern definition + * variable name. "patternVariable" and "patternRegexp" represents PATTERN + * clause. + * * The information relevant for the query jumbling is the partition clause * type and its bounds. */ @@ -1616,6 +1647,22 @@ typedef struct WindowClause Index winref; /* ID referenced by window functions */ /* did we copy orderClause from refname? */ bool copiedOrder pg_node_attr(query_jumble_ignore); + /* Row Pattern AFTER MATCH SKIP clause */ + RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ + bool initial; /* true if is + * initial */ + /* Row Pattern DEFINE clause (list of TargetEntry) */ + List *defineClause; + /* Row Pattern DEFINE variable initial names (list of String) */ + List *defineInitial; + /* Row Pattern PATTERN variable name (list of String) */ + List *patternVariable; + + /* + * Row Pattern PATTERN regular expression quantifier ('+' or ''. list of + * String) + */ + List *patternRegexp; } WindowClause; /* diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 9fde58f541c..b3a223a5525 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -129,6 +129,7 @@ PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("define", DEFINE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delete", DELETE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD, BARE_LABEL) @@ -217,6 +218,7 @@ PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("initial", INITIAL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) @@ -342,7 +344,9 @@ PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("partitions", PARTITIONS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("past", PAST, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("path", PATH, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("pattern", PATTERN_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("period", PERIOD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("plan", PLAN, UNRESERVED_KEYWORD, BARE_LABEL) @@ -405,6 +409,7 @@ PG_KEYWORD("scroll", SCROLL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("search", SEARCH, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("second", SECOND_P, UNRESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("security", SECURITY, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("seek", SEEK, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("select", SELECT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequence", SEQUENCE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequences", SEQUENCES, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f7d07c84542..d286a8b7783 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -51,6 +51,7 @@ typedef enum ParseExprKind EXPR_KIND_WINDOW_FRAME_RANGE, /* window frame clause with RANGE */ EXPR_KIND_WINDOW_FRAME_ROWS, /* window frame clause with ROWS */ EXPR_KIND_WINDOW_FRAME_GROUPS, /* window frame clause with GROUPS */ + EXPR_KIND_RPR_DEFINE, /* DEFINE */ EXPR_KIND_SELECT_TARGET, /* SELECT target list item */ EXPR_KIND_INSERT_TARGET, /* INSERT target list item */ EXPR_KIND_UPDATE_SOURCE, /* UPDATE assignment source item */ -- 2.43.0