v1-0014-jsonpath-scanner-reentrant-scanner.patch

text/plain

Filename: v1-0014-jsonpath-scanner-reentrant-scanner.patch
Type: text/plain
Part: 13
Message: Re: pure parsers and reentrant scanners

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v1-0014
Subject: jsonpath scanner: reentrant scanner
File+
src/backend/utils/adt/jsonpath_gram.y 2 0
src/backend/utils/adt/jsonpath_internal.h 7 2
src/backend/utils/adt/jsonpath_scan.l 41 73
From e4b4c3e37e16e18ef0c6d8cafdbbb03a95feacf5 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 2 Dec 2024 10:35:37 +0100
Subject: [PATCH v1 14/19] jsonpath scanner: reentrant scanner

Note: The parser was already pure.
---
 src/backend/utils/adt/jsonpath_gram.y     |   2 +
 src/backend/utils/adt/jsonpath_internal.h |   9 +-
 src/backend/utils/adt/jsonpath_scan.l     | 114 ++++++++--------------
 3 files changed, 50 insertions(+), 75 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index 8733a0eac66..de5a455c96d 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -60,8 +60,10 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr,
 %name-prefix="jsonpath_yy"
 %parse-param {JsonPathParseResult **result}
 %parse-param {struct Node *escontext}
+%parse-param {yyscan_t yyscanner}
 %lex-param {JsonPathParseResult **result}
 %lex-param {struct Node *escontext}
+%lex-param {yyscan_t yyscanner}
 
 %union
 {
diff --git a/src/backend/utils/adt/jsonpath_internal.h b/src/backend/utils/adt/jsonpath_internal.h
index 6cd6d8b652d..71f885475dd 100644
--- a/src/backend/utils/adt/jsonpath_internal.h
+++ b/src/backend/utils/adt/jsonpath_internal.h
@@ -22,17 +22,22 @@ typedef struct JsonPathString
 	int			total;
 } JsonPathString;
 
+typedef void *yyscan_t;
+
 #include "utils/jsonpath.h"
 #include "jsonpath_gram.h"
 
 #define YY_DECL extern int     jsonpath_yylex(YYSTYPE *yylval_param, \
 							  JsonPathParseResult **result, \
-							  struct Node *escontext)
+							  struct Node *escontext, \
+							  yyscan_t yyscanner)
 YY_DECL;
 extern int	jsonpath_yyparse(JsonPathParseResult **result,
-							 struct Node *escontext);
+							 struct Node *escontext,
+							 yyscan_t yyscanner);
 extern void jsonpath_yyerror(JsonPathParseResult **result,
 							 struct Node *escontext,
+							 yyscan_t yyscanner,
 							 const char *message);
 
 #endif							/* JSONPATH_INTERNAL_H */
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index f5a85de36f5..700c17712d0 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -30,18 +30,13 @@
 }
 
 %{
-static JsonPathString scanstring;
-
-/* Handles to the buffer that the lexer uses internally */
-static YY_BUFFER_STATE scanbufhandle;
-static char *scanbuf;
-static int	scanbuflen;
+static JsonPathString scanstring;	/* FIXME */
 
 static void addstring(bool init, char *s, int l);
 static void addchar(bool init, char c);
 static enum yytokentype checkKeyword(void);
-static bool parseUnicode(char *s, int l, struct Node *escontext);
-static bool parseHexChar(char *s, struct Node *escontext);
+static bool parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner);
+static bool parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner);
 
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
@@ -65,6 +60,7 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 %option noyywrap
 %option warn
 %option prefix="jsonpath_yy"
+%option reentrant
 %option bison-bridge
 %option noyyalloc
 %option noyyrealloc
@@ -160,23 +156,23 @@ hex_fail	\\x{hexdigit}{0,1}
 <xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
 
 <xnq,xq,xvq>{unicode}+		{
-								if (!parseUnicode(yytext, yyleng, escontext))
+								if (!parseUnicode(yytext, yyleng, escontext, yyscanner))
 									yyterminate();
 							}
 
 <xnq,xq,xvq>{hex_char}		{
-								if (!parseHexChar(yytext, escontext))
+								if (!parseHexChar(yytext, escontext, yyscanner))
 									yyterminate();
 							}
 
 <xnq,xq,xvq>{unicode}*{unicodefail} {
-								jsonpath_yyerror(NULL, escontext,
+								jsonpath_yyerror(NULL, escontext, yyscanner,
 												 "invalid Unicode escape sequence");
 								yyterminate();
 							}
 
 <xnq,xq,xvq>{hex_fail}		{
-								jsonpath_yyerror(NULL, escontext,
+								jsonpath_yyerror(NULL, escontext, yyscanner,
 												 "invalid hexadecimal character sequence");
 								yyterminate();
 							}
@@ -184,20 +180,20 @@ hex_fail	\\x{hexdigit}{0,1}
 <xnq,xq,xvq>{unicode}+\\	{
 								/* throw back the \\, and treat as unicode */
 								yyless(yyleng - 1);
-								if (!parseUnicode(yytext, yyleng, escontext))
+								if (!parseUnicode(yytext, yyleng, escontext, yyscanner))
 									yyterminate();
 							}
 
 <xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
 
 <xnq,xq,xvq>\\				{
-								jsonpath_yyerror(NULL, escontext,
+								jsonpath_yyerror(NULL, escontext, yyscanner,
 												 "unexpected end after backslash");
 								yyterminate();
 							}
 
 <xq,xvq><<EOF>>				{
-								jsonpath_yyerror(NULL, escontext,
+								jsonpath_yyerror(NULL, escontext, yyscanner,
 												 "unterminated quoted string");
 								yyterminate();
 							}
@@ -223,7 +219,7 @@ hex_fail	\\x{hexdigit}{0,1}
 <xc>\*							{ }
 
 <xc><<EOF>>						{
-									jsonpath_yyerror(NULL, escontext,
+									jsonpath_yyerror(NULL, escontext, yyscanner,
 													 "unexpected end of comment");
 									yyterminate();
 								}
@@ -313,22 +309,22 @@ hex_fail	\\x{hexdigit}{0,1}
 								}
 
 {realfail}						{
-									jsonpath_yyerror(NULL, escontext,
+									jsonpath_yyerror(NULL, escontext, yyscanner,
 													 "invalid numeric literal");
 									yyterminate();
 								}
 {decinteger_junk}				{
-									jsonpath_yyerror(NULL, escontext,
+									jsonpath_yyerror(NULL, escontext, yyscanner,
 													 "trailing junk after numeric literal");
 									yyterminate();
 								}
 {decimal_junk}					{
-									jsonpath_yyerror(NULL, escontext,
+									jsonpath_yyerror(NULL, escontext, yyscanner,
 													 "trailing junk after numeric literal");
 									yyterminate();
 								}
 {real_junk}						{
-									jsonpath_yyerror(NULL, escontext,
+									jsonpath_yyerror(NULL, escontext, yyscanner,
 													 "trailing junk after numeric literal");
 									yyterminate();
 								}
@@ -356,8 +352,11 @@ hex_fail	\\x{hexdigit}{0,1}
 
 void
 jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
+				 yyscan_t yyscanner,
 				 const char *message)
 {
+	struct yyguts_t * yyg = (struct yyguts_t *) yyscanner;	/* needed for yytext macro */
+
 	/* don't overwrite escontext if it's already been set */
 	if (SOFT_ERROR_OCCURRED(escontext))
 		return;
@@ -470,44 +469,6 @@ checkKeyword()
 	return res;
 }
 
-/*
- * Called before any actual parsing is done
- */
-static void
-jsonpath_scanner_init(const char *str, int slen)
-{
-	if (slen <= 0)
-		slen = strlen(str);
-
-	/*
-	 * Might be left over after ereport()
-	 */
-	yy_init_globals();
-
-	/*
-	 * Make a scan buffer with special termination needed by flex.
-	 */
-
-	scanbuflen = slen;
-	scanbuf = palloc(slen + 2);
-	memcpy(scanbuf, str, slen);
-	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
-
-	BEGIN(INITIAL);
-}
-
-
-/*
- * Called after parsing is done to clean up after jsonpath_scanner_init()
- */
-static void
-jsonpath_scanner_finish(void)
-{
-	yy_delete_buffer(scanbufhandle);
-	pfree(scanbuf);
-}
-
 /*
  * Resize scanstring so that it can append string of given length.
  * Reinitialize if required.
@@ -556,20 +517,27 @@ JsonPathParseResult *
 parsejsonpath(const char *str, int len, struct Node *escontext)
 {
 	JsonPathParseResult	*parseresult;
+	yyscan_t	scanner;
+
+	if (jsonpath_yylex_init(&scanner) != 0)
+		elog(ERROR, "yylex_init() failed: %m");
+
+	if (len <= 0)
+		len = strlen(str);
 
-	jsonpath_scanner_init(str, len);
+	jsonpath_yy_scan_bytes(str, len, scanner);
 
-	if (jsonpath_yyparse(&parseresult, escontext) != 0)
-		jsonpath_yyerror(NULL, escontext, "invalid input"); /* shouldn't happen */
+	if (jsonpath_yyparse(&parseresult, escontext, scanner) != 0)
+		jsonpath_yyerror(NULL, escontext, scanner, "invalid input"); /* shouldn't happen */
 
-	jsonpath_scanner_finish();
+	jsonpath_yylex_destroy(scanner);
 
 	return parseresult;
 }
 
 /* Turn hex character into integer */
 static bool
-hexval(char c, int *result, struct Node *escontext)
+hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner)
 {
 	if (c >= '0' && c <= '9')
 	{
@@ -586,7 +554,7 @@ hexval(char c, int *result, struct Node *escontext)
 		*result = c - 'A' + 0xA;
 		return true;
 	}
-	jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
+	jsonpath_yyerror(NULL, escontext, yyscanner, "invalid hexadecimal digit");
 	return false;
 }
 
@@ -666,7 +634,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
  * src/backend/utils/adt/json.c
  */
 static bool
-parseUnicode(char *s, int l, struct Node *escontext)
+parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner)
 {
 	int			i = 2;
 	int			hi_surrogate = -1;
@@ -680,7 +648,7 @@ parseUnicode(char *s, int l, struct Node *escontext)
 		{
 			while (s[++i] != '}' && i < l)
 			{
-				if (!hexval(s[i], &si, escontext))
+				if (!hexval(s[i], &si, escontext, yyscanner))
 					return false;
 				ch = (ch << 4) | si;
 			}
@@ -690,7 +658,7 @@ parseUnicode(char *s, int l, struct Node *escontext)
 		{
 			for (j = 0; j < 4 && i < l; j++)
 			{
-				if (!hexval(s[i++], &si, escontext))
+				if (!hexval(s[i++], &si, escontext, yyscanner))
 					return false;
 				ch = (ch << 4) | si;
 			}
@@ -714,12 +682,12 @@ parseUnicode(char *s, int l, struct Node *escontext)
 
 /* Parse sequence of hex-encoded characters */
 static bool
-parseHexChar(char *s, struct Node *escontext)
+parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner)
 {
 	int s2, s3, ch;
-	if (!hexval(s[2], &s2, escontext))
+	if (!hexval(s[2], &s2, escontext, yyscanner))
 		return false;
-	if (!hexval(s[3], &s3, escontext))
+	if (!hexval(s[3], &s3, escontext, yyscanner))
 		return false;
 
 	ch = (s2 << 4) | s3;
@@ -733,13 +701,13 @@ parseHexChar(char *s, struct Node *escontext)
  */
 
 void *
-jsonpath_yyalloc(yy_size_t bytes)
+jsonpath_yyalloc(yy_size_t bytes, yyscan_t yyscanner)
 {
 	return palloc(bytes);
 }
 
 void *
-jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
+jsonpath_yyrealloc(void *ptr, yy_size_t bytes, yyscan_t yyscanner)
 {
 	if (ptr)
 		return repalloc(ptr, bytes);
@@ -748,7 +716,7 @@ jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
 }
 
 void
-jsonpath_yyfree(void *ptr)
+jsonpath_yyfree(void *ptr, yyscan_t yyscanner)
 {
 	if (ptr)
 		pfree(ptr);
-- 
2.47.1