v0-0001-cube-pure-parser-and-reentrant-scanner.patch

text/plain

Filename: v0-0001-cube-pure-parser-and-reentrant-scanner.patch
Type: text/plain
Part: 0
Message: pure parsers and reentrant scanners

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v0-0001
Subject: cube: pure parser and reentrant scanner
File+
contrib/cube/cube.c 4 3
contrib/cube/cubedata.h 11 4
contrib/cube/cubeparse.y 6 9
contrib/cube/cubescan.l 24 27
From 01823a7c975fda47ce220db5bb91ecd0959d5123 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 2 Dec 2024 10:35:37 +0100
Subject: [PATCH v0 01/15] cube: pure parser and reentrant scanner

Use the flex %option reentrant and the bison option %pure-parser to
make the generated scanner and parser pure, reentrant, and
thread-safe.

(There are still some issues in the surrounding integration, see
FIXMEs.)
---
 contrib/cube/cube.c      |  7 +++---
 contrib/cube/cubedata.h  | 15 ++++++++----
 contrib/cube/cubeparse.y | 15 +++++-------
 contrib/cube/cubescan.l  | 51 +++++++++++++++++++---------------------
 4 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/contrib/cube/cube.c b/contrib/cube/cube.c
index 1fc447511a1..bf8fc489dca 100644
--- a/contrib/cube/cube.c
+++ b/contrib/cube/cube.c
@@ -120,13 +120,14 @@ cube_in(PG_FUNCTION_ARGS)
 	char	   *str = PG_GETARG_CSTRING(0);
 	NDBOX	   *result;
 	Size		scanbuflen;
+	yyscan_t	scanner;
 
-	cube_scanner_init(str, &scanbuflen);
+	cube_scanner_init(str, &scanbuflen, &scanner);
 
-	cube_yyparse(&result, scanbuflen, fcinfo->context);
+	cube_yyparse(&result, scanbuflen, fcinfo->context, scanner);
 
 	/* We might as well run this even on failure. */
-	cube_scanner_finish();
+	cube_scanner_finish(scanner);
 
 	PG_RETURN_NDBOX_P(result);
 }
diff --git a/contrib/cube/cubedata.h b/contrib/cube/cubedata.h
index 96fa41a04e7..8bfcc6e99a2 100644
--- a/contrib/cube/cubedata.h
+++ b/contrib/cube/cubedata.h
@@ -59,14 +59,21 @@ typedef struct NDBOX
 #define CubeKNNDistanceEuclid			17	/* <-> */
 #define CubeKNNDistanceChebyshev		18	/* <=> */
 
+/* for cubescan.l and cubeparse.y */
+/* All grammar constructs return strings */
+#define YYSTYPE char *
+typedef void *yyscan_t;
+
 /* in cubescan.l */
-extern int	cube_yylex(void);
+extern int	cube_yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
 extern void cube_yyerror(NDBOX **result, Size scanbuflen,
 						 struct Node *escontext,
+						 yyscan_t yyscanner,
 						 const char *message);
-extern void cube_scanner_init(const char *str, Size *scanbuflen);
-extern void cube_scanner_finish(void);
+extern void cube_scanner_init(const char *str, Size *scanbuflen, yyscan_t *yyscannerp);
+extern void cube_scanner_finish(yyscan_t yyscanner);
 
 /* in cubeparse.y */
 extern int	cube_yyparse(NDBOX **result, Size scanbuflen,
-						 struct Node *escontext);
+						 struct Node *escontext,
+						 yyscan_t yyscanner);
diff --git a/contrib/cube/cubeparse.y b/contrib/cube/cubeparse.y
index 52622875cbb..a6b7e70630d 100644
--- a/contrib/cube/cubeparse.y
+++ b/contrib/cube/cubeparse.y
@@ -7,19 +7,11 @@
 #include "postgres.h"
 
 #include "cubedata.h"
+#include "cubeparse.h"	/* must be after cubedata.h for YYSTYPE and NDBOX */
 #include "nodes/miscnodes.h"
 #include "utils/float.h"
 #include "varatt.h"
 
-/* All grammar constructs return strings */
-#define YYSTYPE char *
-
-#include "cubeparse.h"
-
-/* silence -Wmissing-variable-declarations */
-extern int cube_yychar;
-extern int cube_yynerrs;
-
 /*
  * Bison doesn't allocate anything that needs to live across parser calls,
  * so we can easily have it use palloc instead of malloc.  This prevents
@@ -40,6 +32,9 @@ static bool write_point_as_box(int dim, char *str,
 %parse-param {NDBOX **result}
 %parse-param {Size scanbuflen}
 %parse-param {struct Node *escontext}
+%parse-param {yyscan_t yyscanner}
+%lex-param   {yyscan_t yyscanner}
+%pure-parser
 %expect 0
 %name-prefix="cube_yy"
 
@@ -75,6 +70,8 @@ box: O_BRACKET paren_list COMMA paren_list C_BRACKET
 
 		if (!write_box(dim, $2, $4, result, escontext))
 			YYABORT;
+
+		(void) yynerrs;	/* suppress compiler warning */
 	}
 
 	| paren_list COMMA paren_list
diff --git a/contrib/cube/cubescan.l b/contrib/cube/cubescan.l
index a30fbfc3111..09109675711 100644
--- a/contrib/cube/cubescan.l
+++ b/contrib/cube/cubescan.l
@@ -6,13 +6,8 @@
 
 #include "postgres.h"
 
-/*
- * NB: include cubeparse.h only AFTER defining YYSTYPE (to match cubeparse.y)
- * and cubedata.h for NDBOX.
- */
 #include "cubedata.h"
-#define YYSTYPE char *
-#include "cubeparse.h"
+#include "cubeparse.h"	/* must be after cubedata.h for YYSTYPE and NDBOX */
 }
 
 %{
@@ -32,10 +27,11 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 }
 
 /* Handles to the buffer that the lexer uses internally */
-static YY_BUFFER_STATE scanbufhandle;
-static char *scanbuf;
+static char *scanbuf; // FIXME
 %}
 
+%option reentrant
+%option bison-bridge
 %option 8bit
 %option never-interactive
 %option nodefault
@@ -55,14 +51,14 @@ NaN          [nN][aA][nN]
 
 %%
 
-{float}      cube_yylval = yytext; return CUBEFLOAT;
-{infinity}   cube_yylval = yytext; return CUBEFLOAT;
-{NaN}        cube_yylval = yytext; return CUBEFLOAT;
-\[           cube_yylval = "("; return O_BRACKET;
-\]           cube_yylval = ")"; return C_BRACKET;
-\(           cube_yylval = "("; return O_PAREN;
-\)           cube_yylval = ")"; return C_PAREN;
-\,           cube_yylval = ","; return COMMA;
+{float}      *yylval = yytext; return CUBEFLOAT;
+{infinity}   *yylval = yytext; return CUBEFLOAT;
+{NaN}        *yylval = yytext; return CUBEFLOAT;
+\[           *yylval = "("; return O_BRACKET;
+\]           *yylval = ")"; return C_BRACKET;
+\(           *yylval = "("; return O_PAREN;
+\)           *yylval = ")"; return C_PAREN;
+\,           *yylval = ","; return COMMA;
 [ \t\n\r\f\v]+ /* discard spaces */
 .            return yytext[0]; /* alert parser of the garbage */
 
@@ -74,8 +70,11 @@ NaN          [nN][aA][nN]
 void
 cube_yyerror(NDBOX **result, Size scanbuflen,
 			 struct Node *escontext,
+			 yyscan_t yyscanner,
 			 const char *message)
 {
+	struct yyguts_t * yyg = (struct yyguts_t *) yyscanner;	/* needed for yytext macro */
+
 	if (*yytext == YY_END_OF_BUFFER_CHAR)
 	{
 		errsave(escontext,
@@ -99,15 +98,15 @@ cube_yyerror(NDBOX **result, Size scanbuflen,
  * Called before any actual parsing is done
  */
 void
-cube_scanner_init(const char *str, Size *scanbuflen)
+cube_scanner_init(const char *str, Size *scanbuflen, yyscan_t *yyscannerp)
 {
 	Size		slen = strlen(str);
+	yyscan_t	yyscanner;
 
-	/*
-	 * Might be left over after ereport()
-	 */
-	if (YY_CURRENT_BUFFER)
-		yy_delete_buffer(YY_CURRENT_BUFFER);
+	if (yylex_init(yyscannerp) != 0)
+		elog(ERROR, "yylex_init() failed: %m");
+
+	yyscanner = *yyscannerp;
 
 	/*
 	 * Make a scan buffer with special termination needed by flex.
@@ -116,9 +115,7 @@ cube_scanner_init(const char *str, Size *scanbuflen)
 	scanbuf = palloc(slen + 2);
 	memcpy(scanbuf, str, slen);
 	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
-
-	BEGIN(INITIAL);
+	yy_scan_buffer(scanbuf, slen + 2, yyscanner);
 }
 
 
@@ -126,8 +123,8 @@ cube_scanner_init(const char *str, Size *scanbuflen)
  * Called after parsing is done to clean up after cube_scanner_init()
  */
 void
-cube_scanner_finish(void)
+cube_scanner_finish(yyscan_t yyscanner)
 {
-	yy_delete_buffer(scanbufhandle);
+	yylex_destroy(yyscanner);
 	pfree(scanbuf);
 }

base-commit: 2f696453d2b39fea800d5f7d8e5d3e1a2266de24
-- 
2.47.1