v36-0002-Row-pattern-recognition-patch-parse-analysis.patch
application/octet-stream
Filename: v36-0002-Row-pattern-recognition-patch-parse-analysis.patch
Type: application/octet-stream
Part: 1
Message:
Re: Row pattern recognition
From 1ba0f5b223ddff1ca3413fee0a4c3153f110b15f Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Tue, 23 Dec 2025 22:21:47 +0900
Subject: [PATCH v36 2/8] Row pattern recognition patch (parse/analysis).
---
src/backend/parser/parse_agg.c | 7 +
src/backend/parser/parse_clause.c | 262 +++++++++++++++++++++++++++++-
src/backend/parser/parse_expr.c | 6 +
src/backend/parser/parse_func.c | 3 +
4 files changed, 277 insertions(+), 1 deletion(-)
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index b8340557b34..5b219284139 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -584,6 +584,10 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
errkind = true;
break;
+ case EXPR_KIND_RPR_DEFINE:
+ errkind = true;
+ break;
+
/*
* There is intentionally no default: case here, so that the
* compiler will warn if we add a new ParseExprKind without
@@ -1023,6 +1027,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
case EXPR_KIND_CYCLE_MARK:
errkind = true;
break;
+ case EXPR_KIND_RPR_DEFINE:
+ errkind = true;
+ break;
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 57609e2d55c..40c4e93837c 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -96,7 +96,12 @@ static WindowClause *findWindowClause(List *wclist, const char *name);
static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
Node *clause);
-
+static void transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+ List **targetlist);
+static List *transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+ List **targetlist);
+static void transformPatternClause(ParseState *pstate, WindowClause *wc,
+ WindowDef *windef);
/*
* transformFromClause -
@@ -3019,6 +3024,10 @@ transformWindowDefinitions(ParseState *pstate,
rangeopfamily, rangeopcintype,
&wc->endInRangeFunc,
windef->endOffset);
+
+ /* Process Row Pattern Recognition related clauses */
+ transformRPR(pstate, wc, windef, targetlist);
+
wc->winref = winref;
result = lappend(result, wc);
@@ -3894,3 +3903,254 @@ transformFrameOffset(ParseState *pstate, int frameOptions,
return node;
}
+
+/*
+ * transformRPR
+ * Process Row Pattern Recognition related clauses
+ */
+static void
+transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+ List **targetlist)
+{
+ /*
+ * Window definition exists?
+ */
+ if (windef == NULL)
+ return;
+
+ /*
+ * Row Pattern Common Syntax clause exists?
+ */
+ if (windef->rpCommonSyntax == NULL)
+ return;
+
+ /* Check Frame option. Frame must start at current row */
+ if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("FRAME must start at current row when row pattern recognition is used")));
+
+ /* Transform AFTER MACH SKIP TO clause */
+ wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo;
+
+ /* Transform SEEK or INITIAL clause */
+ wc->initial = windef->rpCommonSyntax->initial;
+
+ /* Transform DEFINE clause into list of TargetEntry's */
+ wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist);
+
+ /* Check PATTERN clause and copy to patternClause */
+ transformPatternClause(pstate, wc, windef);
+}
+
+/*
+ * transformDefineClause
+ * Process DEFINE clause and transform ResTarget into list of
+ * TargetEntry.
+ *
+ * XXX we only support column reference in row pattern definition search
+ * condition, e.g. "price". <row pattern definition variable name>.<column
+ * reference> is not supported, e.g. "A.price".
+ */
+static List *
+transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+ List **targetlist)
+{
+ /* DEFINE variable name initials */
+ static const char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz";
+
+ ListCell *lc,
+ *l;
+ ResTarget *restarget,
+ *r;
+ List *restargets;
+ List *defineClause;
+ char *name;
+ int initialLen;
+ int numinitials;
+
+ /*
+ * If Row Definition Common Syntax exists, DEFINE clause must exist. (the
+ * raw parser should have already checked it.)
+ */
+ Assert(windef->rpCommonSyntax->rpDefs != NULL);
+
+ /*
+ * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE
+ * per the SQL standard.
+ */
+ restargets = NIL;
+ foreach(lc, windef->rpCommonSyntax->rpPatterns)
+ {
+ A_Expr *a;
+ bool found = false;
+
+ if (!IsA(lfirst(lc), A_Expr))
+ ereport(ERROR,
+ errmsg("node type is not A_Expr"));
+
+ a = (A_Expr *) lfirst(lc);
+ name = strVal(a->lexpr);
+
+ foreach(l, windef->rpCommonSyntax->rpDefs)
+ {
+ restarget = (ResTarget *) lfirst(l);
+
+ if (!strcmp(restarget->name, name))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ /*
+ * "name" is missing. So create "name AS name IS TRUE" ResTarget
+ * node and add it to the temporary list.
+ */
+ A_Const *n;
+
+ restarget = makeNode(ResTarget);
+ n = makeNode(A_Const);
+ n->val.boolval.type = T_Boolean;
+ n->val.boolval.boolval = true;
+ n->location = -1;
+ restarget->name = pstrdup(name);
+ restarget->indirection = NIL;
+ restarget->val = (Node *) n;
+ restarget->location = -1;
+ restargets = lappend((List *) restargets, restarget);
+ }
+ }
+
+ if (list_length(restargets) >= 1)
+ {
+ /* add missing DEFINEs */
+ windef->rpCommonSyntax->rpDefs =
+ list_concat(windef->rpCommonSyntax->rpDefs, restargets);
+ list_free(restargets);
+ }
+
+ /*
+ * Check for duplicate row pattern definition variables. The standard
+ * requires that no two row pattern definition variable names shall be
+ * equivalent.
+ */
+ restargets = NIL;
+ numinitials = 0;
+ initialLen = strlen(defineVariableInitials);
+ foreach(lc, windef->rpCommonSyntax->rpDefs)
+ {
+ char initial[2];
+
+ restarget = (ResTarget *) lfirst(lc);
+ name = restarget->name;
+
+ /*
+ * Add DEFINE expression (Restarget->val) to the targetlist as a
+ * TargetEntry if it does not exist yet. Planner will add the column
+ * ref var node to the outer plan's target list later on. This makes
+ * DEFINE expression could access the outer tuple while evaluating
+ * PATTERN.
+ *
+ * XXX: adding whole expressions of DEFINE to the plan.targetlist is
+ * not so good, because it's not necessary to evalute the expression
+ * in the target list while running the plan. We should extract the
+ * var nodes only then add them to the plan.targetlist.
+ */
+ findTargetlistEntrySQL99(pstate, (Node *) restarget->val,
+ targetlist, EXPR_KIND_RPR_DEFINE);
+
+ /*
+ * Make sure that the row pattern definition search condition is a
+ * boolean expression.
+ */
+ transformWhereClause(pstate, restarget->val,
+ EXPR_KIND_RPR_DEFINE, "DEFINE");
+
+ foreach(l, restargets)
+ {
+ char *n;
+
+ r = (ResTarget *) lfirst(l);
+ n = r->name;
+
+ if (!strcmp(n, name))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause",
+ name),
+ parser_errposition(pstate, exprLocation((Node *) r))));
+ }
+
+ /*
+ * Create list of row pattern DEFINE variable name's initial. We
+ * assign [a-z] to them (up to 26 variable names are allowed).
+ */
+ if (numinitials >= initialLen)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("number of row pattern definition variable names exceeds %d",
+ initialLen),
+ parser_errposition(pstate,
+ exprLocation((Node *) restarget))));
+ }
+ initial[0] = defineVariableInitials[numinitials++];
+ initial[1] = '\0';
+ wc->defineInitial = lappend(wc->defineInitial,
+ makeString(pstrdup(initial)));
+
+ restargets = lappend(restargets, restarget);
+ }
+ list_free(restargets);
+
+ /* turns a list of ResTarget's into a list of TargetEntry's */
+ defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs,
+ EXPR_KIND_RPR_DEFINE);
+
+ /* mark column origins */
+ markTargetListOrigins(pstate, defineClause);
+
+ /* mark all nodes in the DEFINE clause tree with collation information */
+ assign_expr_collations(pstate, (Node *) defineClause);
+
+ return defineClause;
+}
+
+/*
+ * transformPatternClause
+ * Process PATTERN clause and return PATTERN clause in the raw parse tree
+ *
+ * windef->rpCommonSyntax must exist.
+ */
+static void
+transformPatternClause(ParseState *pstate, WindowClause *wc,
+ WindowDef *windef)
+{
+ ListCell *lc;
+
+ Assert(windef->rpCommonSyntax != NULL);
+
+ wc->patternVariable = NIL;
+ wc->patternRegexp = NIL;
+ foreach(lc, windef->rpCommonSyntax->rpPatterns)
+ {
+ A_Expr *a;
+ char *name;
+ char *regexp;
+
+ if (!IsA(lfirst(lc), A_Expr))
+ ereport(ERROR,
+ errmsg("node type is not A_Expr"));
+
+ a = (A_Expr *) lfirst(lc);
+ name = strVal(a->lexpr);
+
+ wc->patternVariable = lappend(wc->patternVariable, makeString(pstrdup(name)));
+ regexp = strVal(lfirst(list_head(a->name)));
+
+ wc->patternRegexp = lappend(wc->patternRegexp, makeString(pstrdup(regexp)));
+ }
+}
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 6b8fa15fca3..c6dd7d53e90 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -576,6 +576,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
case EXPR_KIND_COPY_WHERE:
case EXPR_KIND_GENERATED_COLUMN:
case EXPR_KIND_CYCLE_MARK:
+ case EXPR_KIND_RPR_DEFINE:
/* okay */
break;
@@ -1861,6 +1862,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
case EXPR_KIND_GENERATED_COLUMN:
err = _("cannot use subquery in column generation expression");
break;
+ case EXPR_KIND_RPR_DEFINE:
+ err = _("cannot use subquery in DEFINE expression");
+ break;
/*
* There is intentionally no default: case here, so that the
@@ -3220,6 +3224,8 @@ ParseExprKindName(ParseExprKind exprKind)
return "GENERATED AS";
case EXPR_KIND_CYCLE_MARK:
return "CYCLE";
+ case EXPR_KIND_RPR_DEFINE:
+ return "DEFINE";
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 778d69c6f3c..13c6500a087 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2783,6 +2783,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
case EXPR_KIND_CYCLE_MARK:
errkind = true;
break;
+ case EXPR_KIND_RPR_DEFINE:
+ errkind = true;
+ break;
/*
* There is intentionally no default: case here, so that the
--
2.43.0