v1-0015-jsonpath-scanner-Use-flex-yyextra.patch

text/plain

Filename: v1-0015-jsonpath-scanner-Use-flex-yyextra.patch
Type: text/plain
Part: 14
Message: Re: pure parsers and reentrant scanners

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v1-0015
Subject: jsonpath scanner: Use flex yyextra
File+
src/backend/utils/adt/jsonpath_scan.l 92 79
From fa4592d5fbc26b536b71b6eae79a65a59cc6e2bc Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 2 Dec 2024 10:35:37 +0100
Subject: [PATCH v1 15/19] jsonpath scanner: Use flex yyextra

---
 src/backend/utils/adt/jsonpath_scan.l | 171 ++++++++++++++------------
 1 file changed, 92 insertions(+), 79 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 700c17712d0..8ed6c7ddf63 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -30,11 +30,15 @@
 }
 
 %{
-static JsonPathString scanstring;	/* FIXME */
+struct jsonpath_yy_extra_type
+{
+	JsonPathString scanstring;
+};
+#define YY_EXTRA_TYPE struct jsonpath_yy_extra_type *
 
-static void addstring(bool init, char *s, int l);
-static void addchar(bool init, char c);
-static enum yytokentype checkKeyword(void);
+static void addstring(bool init, char *s, int l, yyscan_t yyscanner);
+static void addchar(bool init, char c, yyscan_t yyscanner);
+static enum yytokentype checkKeyword(yyscan_t yyscanner);
 static bool parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner);
 static bool parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner);
 
@@ -116,44 +120,44 @@ hex_fail	\\x{hexdigit}{0,1}
 %%
 
 <xnq>{other}+					{
-									addstring(false, yytext, yyleng);
+									addstring(false, yytext, yyleng, yyscanner);
 								}
 
 <xnq>{blank}+					{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									BEGIN INITIAL;
-									return checkKeyword();
+									return checkKeyword(yyscanner);
 								}
 
 <xnq>\/\*						{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									BEGIN xc;
 								}
 
 <xnq>({special}|\")				{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									yyless(0);
 									BEGIN INITIAL;
-									return checkKeyword();
+									return checkKeyword(yyscanner);
 								}
 
 <xnq><<EOF>>					{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									BEGIN INITIAL;
-									return checkKeyword();
+									return checkKeyword(yyscanner);
 								}
 
-<xnq,xq,xvq>\\b				{ addchar(false, '\b'); }
+<xnq,xq,xvq>\\b				{ addchar(false, '\b', yyscanner); }
 
-<xnq,xq,xvq>\\f				{ addchar(false, '\f'); }
+<xnq,xq,xvq>\\f				{ addchar(false, '\f', yyscanner); }
 
-<xnq,xq,xvq>\\n				{ addchar(false, '\n'); }
+<xnq,xq,xvq>\\n				{ addchar(false, '\n', yyscanner); }
 
-<xnq,xq,xvq>\\r				{ addchar(false, '\r'); }
+<xnq,xq,xvq>\\r				{ addchar(false, '\r', yyscanner); }
 
-<xnq,xq,xvq>\\t				{ addchar(false, '\t'); }
+<xnq,xq,xvq>\\t				{ addchar(false, '\t', yyscanner); }
 
-<xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
+<xnq,xq,xvq>\\v				{ addchar(false, '\v', yyscanner); }
 
 <xnq,xq,xvq>{unicode}+		{
 								if (!parseUnicode(yytext, yyleng, escontext, yyscanner))
@@ -184,7 +188,7 @@ hex_fail	\\x{hexdigit}{0,1}
 									yyterminate();
 							}
 
-<xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
+<xnq,xq,xvq>\\.				{ addchar(false, yytext[1], yyscanner); }
 
 <xnq,xq,xvq>\\				{
 								jsonpath_yyerror(NULL, escontext, yyscanner,
@@ -199,18 +203,18 @@ hex_fail	\\x{hexdigit}{0,1}
 							}
 
 <xq>\"							{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									BEGIN INITIAL;
 									return STRING_P;
 								}
 
 <xvq>\"							{
-									yylval->str = scanstring;
+									yylval->str = yyextra->scanstring;
 									BEGIN INITIAL;
 									return VARIABLE_P;
 								}
 
-<xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng); }
+<xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng, yyscanner); }
 
 <xc>\*\/						{ BEGIN INITIAL; }
 
@@ -246,14 +250,14 @@ hex_fail	\\x{hexdigit}{0,1}
 \>								{ return GREATER_P; }
 
 \${other}+						{
-									addstring(true, yytext + 1, yyleng - 1);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext + 1, yyleng - 1, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return VARIABLE_P;
 								}
 
 \$\"							{
-									addchar(true, '\0');
+									addchar(true, '\0', yyscanner);
 									BEGIN xvq;
 								}
 
@@ -262,49 +266,49 @@ hex_fail	\\x{hexdigit}{0,1}
 {blank}+						{ /* ignore */ }
 
 \/\*							{
-									addchar(true, '\0');
+									addchar(true, '\0', yyscanner);
 									BEGIN xc;
 								}
 
 {real}							{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return NUMERIC_P;
 								}
 
 {decimal}						{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return NUMERIC_P;
 								}
 
 {decinteger}					{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return INT_P;
 								}
 
 {hexinteger}					{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return INT_P;
 								}
 
 {octinteger}					{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return INT_P;
 								}
 
 {bininteger}					{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
+									addstring(true, yytext, yyleng, yyscanner);
+									addchar(false, '\0', yyscanner);
+									yylval->str = yyextra->scanstring;
 									return INT_P;
 								}
 
@@ -329,18 +333,18 @@ hex_fail	\\x{hexdigit}{0,1}
 									yyterminate();
 								}
 \"								{
-									addchar(true, '\0');
+									addchar(true, '\0', yyscanner);
 									BEGIN xq;
 								}
 
 \\								{
 									yyless(0);
-									addchar(true, '\0');
+									addchar(true, '\0', yyscanner);
 									BEGIN xnq;
 								}
 
 {other}+						{
-									addstring(true, yytext, yyleng);
+									addstring(true, yytext, yyleng, yyscanner);
 									BEGIN xnq;
 								}
 
@@ -350,6 +354,10 @@ hex_fail	\\x{hexdigit}{0,1}
 
 /* LCOV_EXCL_STOP */
 
+/* see scan.l */
+#undef yyextra
+#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)
+
 void
 jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
 				 yyscan_t yyscanner,
@@ -426,9 +434,11 @@ static const JsonPathKeyword keywords[] = {
 	{ 12,false, TIMESTAMP_TZ_P, "timestamp_tz"},
 };
 
-/* Check if current scanstring value is a keyword */
+/*
+ * Check if current scanstring value is a keyword
+ */
 static enum yytokentype
-checkKeyword()
+checkKeyword(yyscan_t yyscanner)
 {
 	int			res = IDENT_P;
 	int			diff;
@@ -436,18 +446,18 @@ checkKeyword()
 						   *StopHigh = keywords + lengthof(keywords),
 						   *StopMiddle;
 
-	if (scanstring.len > keywords[lengthof(keywords) - 1].len)
+	if (yyextra->scanstring.len > keywords[lengthof(keywords) - 1].len)
 		return res;
 
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
 
-		if (StopMiddle->len == scanstring.len)
-			diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
-								  scanstring.len);
+		if (StopMiddle->len == yyextra->scanstring.len)
+			diff = pg_strncasecmp(StopMiddle->keyword, yyextra->scanstring.val,
+								  yyextra->scanstring.len);
 		else
-			diff = StopMiddle->len - scanstring.len;
+			diff = StopMiddle->len - yyextra->scanstring.len;
 
 		if (diff < 0)
 			StopLow = StopMiddle + 1;
@@ -456,8 +466,8 @@ checkKeyword()
 		else
 		{
 			if (StopMiddle->lowercase)
-				diff = strncmp(StopMiddle->keyword, scanstring.val,
-							   scanstring.len);
+				diff = strncmp(StopMiddle->keyword, yyextra->scanstring.val,
+							   yyextra->scanstring.len);
 
 			if (diff == 0)
 				res = StopMiddle->val;
@@ -474,42 +484,42 @@ checkKeyword()
  * Reinitialize if required.
  */
 static void
-resizeString(bool init, int appendLen)
+resizeString(bool init, int appendLen, yyscan_t yyscanner)
 {
 	if (init)
 	{
-		scanstring.total = Max(32, appendLen);
-		scanstring.val = (char *) palloc(scanstring.total);
-		scanstring.len = 0;
+		yyextra->scanstring.total = Max(32, appendLen);
+		yyextra->scanstring.val = (char *) palloc(yyextra->scanstring.total);
+		yyextra->scanstring.len = 0;
 	}
 	else
 	{
-		if (scanstring.len + appendLen >= scanstring.total)
+		if (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total)
 		{
-			while (scanstring.len + appendLen >= scanstring.total)
-				scanstring.total *= 2;
-			scanstring.val = repalloc(scanstring.val, scanstring.total);
+			while (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total)
+				yyextra->scanstring.total *= 2;
+			yyextra->scanstring.val = repalloc(yyextra->scanstring.val, yyextra->scanstring.total);
 		}
 	}
 }
 
 /* Add set of bytes at "s" of length "l" to scanstring */
 static void
-addstring(bool init, char *s, int l)
+addstring(bool init, char *s, int l, yyscan_t yyscanner)
 {
-	resizeString(init, l + 1);
-	memcpy(scanstring.val + scanstring.len, s, l);
-	scanstring.len += l;
+	resizeString(init, l + 1, yyscanner);
+	memcpy(yyextra->scanstring.val + yyextra->scanstring.len, s, l);
+	yyextra->scanstring.len += l;
 }
 
 /* Add single byte "c" to scanstring */
 static void
-addchar(bool init, char c)
+addchar(bool init, char c, yyscan_t yyscanner)
 {
-	resizeString(init, 1);
-	scanstring.val[scanstring.len] = c;
+	resizeString(init, 1, yyscanner);
+	yyextra->scanstring.val[yyextra->scanstring.len] = c;
 	if (c != '\0')
-		scanstring.len++;
+		yyextra->scanstring.len++;
 }
 
 /* Interface to jsonpath parser */
@@ -518,10 +528,13 @@ parsejsonpath(const char *str, int len, struct Node *escontext)
 {
 	JsonPathParseResult	*parseresult;
 	yyscan_t	scanner;
+	struct jsonpath_yy_extra_type yyext;
 
 	if (jsonpath_yylex_init(&scanner) != 0)
 		elog(ERROR, "yylex_init() failed: %m");
 
+	yyset_extra(&yyext, scanner);
+
 	if (len <= 0)
 		len = strlen(str);
 
@@ -560,7 +573,7 @@ hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner)
 
 /* Add given unicode character to scanstring */
 static bool
-addUnicodeChar(int ch, struct Node *escontext)
+addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
 {
 	if (ch == 0)
 	{
@@ -586,14 +599,14 @@ addUnicodeChar(int ch, struct Node *escontext)
 			ereturn(escontext, false,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("could not convert Unicode to server encoding")));
-		addstring(false, cbuf, strlen(cbuf));
+		addstring(false, cbuf, strlen(cbuf), yyscanner);
 	}
 	return true;
 }
 
 /* Add unicode character, processing any surrogate pairs */
 static bool
-addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
+addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
 {
 	if (is_utf16_surrogate_first(ch))
 	{
@@ -626,7 +639,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
 						   "surrogate.")));
 	}
 
-	return addUnicodeChar(ch, escontext);
+	return addUnicodeChar(ch, escontext, yyscanner);
 }
 
 /*
@@ -664,7 +677,7 @@ parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner)
 			}
 		}
 
-		if (! addUnicode(ch, &hi_surrogate, escontext))
+		if (! addUnicode(ch, &hi_surrogate, escontext, yyscanner))
 			return false;
 	}
 
@@ -692,7 +705,7 @@ parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner)
 
 	ch = (s2 << 4) | s3;
 
-	return addUnicodeChar(ch, escontext);
+	return addUnicodeChar(ch, escontext, yyscanner);
 }
 
 /*
-- 
2.47.1