From af6d9cd07c140ffed2c1672b66f0b76e28a2d48c Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Fri, 26 Dec 2025 20:17:29 +0900 Subject: [PATCH 3/3] SQL/PGQ: Support multi-pattern path matching in GRAPH_TABLE Add support for comma-separated path patterns in MATCH clause, e.g.: MATCH (a)->(b), (b)->(c) Path patterns with shared variables are merged into a single join. Disconnected patterns (no shared variables) produce a Cartesian product, consistent with SQL/PGQ standard and Neo4j Cypher semantics. The parser already supported this syntax; this commit enables execution by merging multiple path patterns instead of rejecting them. --- src/backend/rewrite/rewriteGraphTable.c | 34 ++++- src/test/regress/expected/graph_table.out | 167 ++++++++++++++++++++++ src/test/regress/sql/graph_table.sql | 85 +++++++++++ 3 files changed, 281 insertions(+), 5 deletions(-) diff --git a/src/backend/rewrite/rewriteGraphTable.c b/src/backend/rewrite/rewriteGraphTable.c index 44bdc8a1f20..1420786fbb4 100644 --- a/src/backend/rewrite/rewriteGraphTable.c +++ b/src/backend/rewrite/rewriteGraphTable.c @@ -121,16 +121,40 @@ rewriteGraphTable(Query *parsetree, int rt_index) { RangeTblEntry *rte; Query *graph_table_query; - List *path_pattern; List *pathqueries = NIL; + int num_patterns; rte = rt_fetch(rt_index, parsetree->rtable); + num_patterns = list_length(rte->graph_pattern->path_pattern_list); - if (list_length(rte->graph_pattern->path_pattern_list) != 1) - elog(ERROR, "unsupported path pattern list length"); + if (num_patterns == 1) + { + /* Single path pattern - original behavior */ + List *path_pattern = linitial(rte->graph_pattern->path_pattern_list); + + pathqueries = generate_queries_for_path_pattern(rte, path_pattern); + } + else + { + /* + * Multiple path patterns - merge all element patterns into a single + * combined path pattern. Variables with the same name will be merged + * by generate_queries_for_path_pattern(). + */ + List *combined_pattern = NIL; + ListCell *lc; + + foreach(lc, rte->graph_pattern->path_pattern_list) + { + List *path_pattern = (List *) lfirst(lc); + + combined_pattern = list_concat(combined_pattern, + list_copy(path_pattern)); + } + + pathqueries = generate_queries_for_path_pattern(rte, combined_pattern); + } - path_pattern = linitial(rte->graph_pattern->path_pattern_list); - pathqueries = generate_queries_for_path_pattern(rte, path_pattern); graph_table_query = generate_union_from_pathqueries(&pathqueries); AcquireRewriteLocks(graph_table_query, true, false); diff --git a/src/test/regress/expected/graph_table.out b/src/test/regress/expected/graph_table.out index a7377a6d768..1c69104a0d8 100644 --- a/src/test/regress/expected/graph_table.out +++ b/src/test/regress/expected/graph_table.out @@ -1382,4 +1382,171 @@ SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (PROPERTY_NAMES ERROR: PROPERTY_NAMES() argument must be an element variable LINE 1: ...APH_TABLE (myshop MATCH (c IS customers) COLUMNS (PROPERTY_N... ^ +-- Multi-pattern path tests (comma-separated path patterns with shared variables) +-- Basic multi-pattern: (a)->(b), (b)->(c) where b is shared +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[e1 IS el1]->(b IS vl2), + (b)-[e2 IS el2]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY a_name, b_name, c_name; + a_name | b_name | c_name +--------+--------+-------- + v11 | v22 | v32 +(1 row) + +-- Multi-pattern with same start vertex reaching different destinations +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (a)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY a_name, b_name, c_name; + a_name | b_name | c_name +--------+--------+-------- + v11 | v22 | v22 + v11 | v22 | v31 + v11 | v22 | v33 + v12 | v21 | v21 + v13 | v23 | v23 +(5 rows) + +-- Multi-pattern with three patterns sharing one variable +SELECT a_name, b2_name, b3_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b2 IS vl2), + (a)-[]->(b3 IS vl3) + COLUMNS (a.vname AS a_name, b2.vname AS b2_name, b3.vname AS b3_name) +) ORDER BY a_name, b2_name, b3_name; + a_name | b2_name | b3_name +--------+---------+--------- + v11 | v22 | v22 + v11 | v22 | v31 + v11 | v22 | v33 + v12 | v21 | v21 + v13 | v23 | v23 +(5 rows) + +-- Same variable with same label (should work) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (b IS vl2)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + a_name | b_name | c_name +--------+--------+-------- + v11 | v22 | v32 +(1 row) + +-- Same variable with different labels (conflict - what happens?) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (b IS vl3)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; +ERROR: element patterns with same variable name "b" but different label expressions are not supported +-- Same variable with label OR expression (b IS vl2|vl3) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + a_name | b_name | c_name +--------+--------+-------- + v11 | v22 | v32 + v11 | v33 | v33 + v11 | v33 | v33 +(3 rows) + +-- Multi-pattern with LABELS() filtering in WHERE clause +-- Filter shared variable b by label using LABELS() function +EXPLAIN (COSTS OFF) SELECT a_name, b_name, b_labels, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl2' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, LABELS(b) AS b_labels, c.vname AS c_name) +); + QUERY PLAN +------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Hash Join + Hash Cond: ((e2_3.id_2_1 = v2.id1) AND (e2_3.id_2_2 = v2.id2)) + -> Seq Scan on e2_3 + -> Hash + -> Merge Join + Merge Cond: ((e1_2.id_2_1 = v2.id1) AND (e1_2.id_2_2 = v2.id2)) + -> Sort + Sort Key: e1_2.id_2_1, e1_2.id_2_2 + -> Seq Scan on e1_2 + -> Sort + Sort Key: v2.id1, v2.id2 + -> Seq Scan on v2 + -> Index Scan using v1_pkey on v1 + Index Cond: (id = e1_2.id_1) + -> Index Scan using v3_pkey on v3 + Index Cond: (id = e2_3.id_3) +(18 rows) + +SELECT a_name, b_name, b_labels, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl2' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, LABELS(b) AS b_labels, c.vname AS c_name) +) ORDER BY 1, 2, 3; + a_name | b_name | b_labels | c_name +--------+--------+--------------+-------- + v11 | v22 | {l1,vl2,vl3} | v32 +(1 row) + +-- Multi-pattern with LABELS() filtering - no common labels (should prune all) +-- b IS vl2|vl3 but WHERE filters by 'vl1' which has no overlap +EXPLAIN (COSTS OFF) SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl1' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +); + QUERY PLAN +--------------------------------- + Result + Replaces: Scan on graph_table + One-Time Filter: false +(3 rows) + +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl1' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + a_name | b_name | c_name +--------+--------+-------- +(0 rows) + +-- Disconnected patterns (no shared variables) - should produce cross product +-- (a)->(b) and (c)->(d) are independent, result is Cartesian product +-- vl1->vl2 has 3 paths, vl1->vl3 has 3 paths, so cross product = 3 x 3 = 9 rows +SELECT a_name, b_name, c_name, d_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (c IS vl1)-[]->(d IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, + c.vname AS c_name, d.vname AS d_name) +) ORDER BY 1, 2, 3, 4; + a_name | b_name | c_name | d_name +--------+--------+--------+-------- + v11 | v22 | v11 | v22 + v11 | v22 | v11 | v31 + v11 | v22 | v11 | v33 + v11 | v22 | v12 | v21 + v11 | v22 | v13 | v23 + v12 | v21 | v11 | v22 + v12 | v21 | v11 | v31 + v12 | v21 | v11 | v33 + v12 | v21 | v12 | v21 + v12 | v21 | v13 | v23 + v13 | v23 | v11 | v22 + v13 | v23 | v11 | v31 + v13 | v23 | v11 | v33 + v13 | v23 | v12 | v21 + v13 | v23 | v13 | v23 +(15 rows) + -- leave the objects behind for pg_upgrade/pg_dump tests diff --git a/src/test/regress/sql/graph_table.sql b/src/test/regress/sql/graph_table.sql index 3e0b10e39d2..521fb42a616 100644 --- a/src/test/regress/sql/graph_table.sql +++ b/src/test/regress/sql/graph_table.sql @@ -767,4 +767,89 @@ SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (PROPERTY_NAMES -- PROPERTY_NAMES() error: non-variable argument SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (PROPERTY_NAMES(123))); +-- Multi-pattern path tests (comma-separated path patterns with shared variables) +-- Basic multi-pattern: (a)->(b), (b)->(c) where b is shared +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[e1 IS el1]->(b IS vl2), + (b)-[e2 IS el2]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY a_name, b_name, c_name; + +-- Multi-pattern with same start vertex reaching different destinations +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (a)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY a_name, b_name, c_name; + +-- Multi-pattern with three patterns sharing one variable +SELECT a_name, b2_name, b3_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b2 IS vl2), + (a)-[]->(b3 IS vl3) + COLUMNS (a.vname AS a_name, b2.vname AS b2_name, b3.vname AS b3_name) +) ORDER BY a_name, b2_name, b3_name; + +-- Same variable with same label (should work) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (b IS vl2)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + +-- Same variable with different labels (conflict - what happens?) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (b IS vl3)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + +-- Same variable with label OR expression (b IS vl2|vl3) +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + +-- Multi-pattern with LABELS() filtering in WHERE clause +-- Filter shared variable b by label using LABELS() function +EXPLAIN (COSTS OFF) SELECT a_name, b_name, b_labels, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl2' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, LABELS(b) AS b_labels, c.vname AS c_name) +); + +SELECT a_name, b_name, b_labels, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl2' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, LABELS(b) AS b_labels, c.vname AS c_name) +) ORDER BY 1, 2, 3; + +-- Multi-pattern with LABELS() filtering - no common labels (should prune all) +-- b IS vl2|vl3 but WHERE filters by 'vl1' which has no overlap +EXPLAIN (COSTS OFF) SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl1' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +); + +SELECT a_name, b_name, c_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2|vl3), + (b)-[]->(c IS vl3) + WHERE 'vl1' = ANY(LABELS(b)) + COLUMNS (a.vname AS a_name, b.vname AS b_name, c.vname AS c_name) +) ORDER BY 1, 2, 3; + +-- Disconnected patterns (no shared variables) - should produce cross product +-- (a)->(b) and (c)->(d) are independent, result is Cartesian product +-- vl1->vl2 has 3 paths, vl1->vl3 has 3 paths, so cross product = 3 x 3 = 9 rows +SELECT a_name, b_name, c_name, d_name FROM GRAPH_TABLE (g1 + MATCH (a IS vl1)-[]->(b IS vl2), + (c IS vl1)-[]->(d IS vl3) + COLUMNS (a.vname AS a_name, b.vname AS b_name, + c.vname AS c_name, d.vname AS d_name) +) ORDER BY 1, 2, 3, 4; + -- leave the objects behind for pg_upgrade/pg_dump tests -- 2.50.1 (Apple Git-155)