v2-0001-Broken-out-tokeniziation-of-arrows.patch
text/x-patch
Filename: v2-0001-Broken-out-tokeniziation-of-arrows.patch
Type: text/x-patch
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v2-0001
Subject: Broken out tokeniziation of arrows
| File | + | − |
|---|---|---|
| src/backend/parser/gram.y | 17 | 3 |
| src/backend/parser/scan.l | 35 | 0 |
| src/fe_utils/psqlscan.l | 5 | 0 |
| src/include/parser/scanner.h | 1 | 0 |
| src/interfaces/ecpg/preproc/pgc.l | 34 | 0 |
| src/pl/plpgsql/src/pl_gram.y | 1 | 0 |
From 79e9474fd02fab7210bed6a5a3db3bc57d725193 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andreas@proxel.se>
Date: Tue, 29 Oct 2024 20:23:24 +0100
Subject: [PATCH v2] Broken out tokeniziation of arrows
---
src/backend/parser/gram.y | 20 +++++++++++++++---
src/backend/parser/scan.l | 35 +++++++++++++++++++++++++++++++
src/fe_utils/psqlscan.l | 5 +++++
src/include/parser/scanner.h | 1 +
src/interfaces/ecpg/preproc/pgc.l | 34 ++++++++++++++++++++++++++++++
src/pl/plpgsql/src/pl_gram.y | 1 +
6 files changed, 93 insertions(+), 3 deletions(-)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 67eb96396af..179069e0299 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -681,6 +681,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
/*
* If you want to make any keyword changes, update the keyword table in
@@ -821,7 +822,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%left AND
%right NOT
%nonassoc IS ISNULL NOTNULL /* IS sets precedence for IS NULL, etc */
-%nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%nonassoc '<' LEFT_ARROW_LESS '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
%nonassoc BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA
%nonassoc ESCAPE /* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
@@ -874,8 +875,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */
%nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP
SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH
-%left Op OPERATOR /* multi-character ops and user-defined operators */
-%left '+' '-'
+%left Op OPERATOR RIGHT_ARROW /* multi-character ops and user-defined operators */
+%left '+' '-' LEFT_ARROW_MINUS
%left '*' '/' '%'
%left '^'
/* Unary Operators */
@@ -14893,6 +14894,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); }
| '-' a_expr %prec UMINUS
{ $$ = doNegate($2, @1); }
+ | LEFT_ARROW_MINUS a_expr %prec UMINUS
+ { $$ = doNegate($2, @1); }
| a_expr '+' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); }
| a_expr '-' a_expr
@@ -14907,6 +14910,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
| a_expr '<' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+ | a_expr LEFT_ARROW_LESS a_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
| a_expr '>' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
| a_expr '=' a_expr
@@ -14917,6 +14922,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
| a_expr NOT_EQUALS a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+ | a_expr RIGHT_ARROW a_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
| a_expr qual_Op a_expr %prec Op
{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
@@ -15386,6 +15393,8 @@ b_expr: c_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
| b_expr '<' b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+ | b_expr LEFT_ARROW_LESS b_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
| b_expr '>' b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
| b_expr '=' b_expr
@@ -15396,6 +15405,8 @@ b_expr: c_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
| b_expr NOT_EQUALS b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+ | b_expr RIGHT_ARROW b_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
| b_expr qual_Op b_expr %prec Op
{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
| qual_Op b_expr %prec Op
@@ -16554,16 +16565,19 @@ all_Op: Op { $$ = $1; }
MathOp: '+' { $$ = "+"; }
| '-' { $$ = "-"; }
+ | LEFT_ARROW_MINUS { $$ = "-"; }
| '*' { $$ = "*"; }
| '/' { $$ = "/"; }
| '%' { $$ = "%"; }
| '^' { $$ = "^"; }
| '<' { $$ = "<"; }
+ | LEFT_ARROW_LESS { $$ = "<"; }
| '>' { $$ = ">"; }
| '=' { $$ = "="; }
| LESS_EQUALS { $$ = "<="; }
| GREATER_EQUALS { $$ = ">="; }
| NOT_EQUALS { $$ = "<>"; }
+ | RIGHT_ARROW { $$ = "->"; }
;
qual_Op: Op
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 72404e72fff..a17e42d0ef1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -366,6 +366,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -892,8 +893,18 @@ other .
return NOT_EQUALS;
}
+{right_arrow} {
+ SET_YYLLOC();
+ return RIGHT_ARROW;
+ }
+
{self} {
SET_YYLLOC();
+ if (yytext[0] == '-' && yyextra->inleftarrow)
+ {
+ yyextra->inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
return yytext[0];
}
@@ -919,6 +930,26 @@ other .
if (slashstar)
nchars = slashstar - yytext;
+ if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+ {
+ /* Strip the unwanted chars from the token */
+ yyless(1);
+
+ yyextra->inleftarrow = true;
+
+ return LEFT_ARROW_LESS;
+ }
+
+ if (nchars == 1 && yytext[0] == '-' && yyextra->inleftarrow)
+ {
+ /* Strip the unwanted chars from the token */
+ if (nchars < yyleng)
+ yyless(nchars);
+
+ yyextra->inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
+
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
@@ -989,6 +1020,8 @@ other .
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
+ if (yytext[0] == '-' && yytext[1] == '>')
+ return RIGHT_ARROW;
}
}
@@ -1294,6 +1327,8 @@ scanner_init(const char *str,
yyext->literalbuf = (char *) palloc(yyext->literalalloc);
yyext->literallen = 0;
+ yyext->inleftarrow = false;
+
return scanner;
}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 8e8b049e15f..6f8fd7cd258 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -302,6 +302,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -661,6 +662,10 @@ other .
ECHO;
}
+{right_arrow} {
+ ECHO;
+ }
+
/*
* These rules are specific to psql --- they implement parenthesis
* counting and detection of command-ending semicolon. These must
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
index d6293b1e878..61647be928c 100644
--- a/src/include/parser/scanner.h
+++ b/src/include/parser/scanner.h
@@ -105,6 +105,7 @@ typedef struct core_yy_extra_type
int state_before_str_stop; /* start cond. before end quote */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
+ bool inleftarrow; /* are we parsing a -> operator? */
YYLTYPE save_yylloc; /* one-element stack for PUSH_YYLLOC() */
/* first part of UTF16 surrogate pair for Unicode escapes */
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 82708013ee6..5ea88117aa1 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -335,6 +335,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -463,6 +464,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
%%
%{
+ static bool inleftarrow = false;
+
/* code to execute during start of each call of yylex() */
char *newdefsymbol = NULL;
@@ -854,6 +857,10 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return NOT_EQUALS;
}
+{right_arrow} {
+ return RIGHT_ARROW;
+ }
+
{informix_special} {
/* are we simulating Informix? */
if (INFORMIX_MODE)
@@ -871,6 +878,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
*/
if (yytext[0] == ';' && struct_level == 0)
BEGIN(C);
+ if (yytext[0] == '-' && inleftarrow)
+ {
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
return yytext[0];
}
@@ -896,6 +908,26 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
if (slashstar)
nchars = slashstar - yytext;
+ if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+ {
+ /* Strip the unwanted chars from the token */
+ yyless(1);
+
+ inleftarrow = true;
+
+ return LEFT_ARROW_LESS;
+ }
+
+ if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+ {
+ /* Strip the unwanted chars from the token */
+ if (nchars < yyleng)
+ yyless(nchars);
+
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
+
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
@@ -968,6 +1000,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
+ if (yytext[0] == '-' && yytext[1] == '>')
+ return RIGHT_ARROW;
}
}
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 8182ce28aa1..c5cea379554 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -237,6 +237,7 @@ static void check_raise_parameters(PLpgSQL_stmt_raise *stmt);
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
/*
* Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c).
--
2.45.2