From 27f5610bd1714a3fc5babc2e51a33f80acb34afd Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Wed, 24 Dec 2025 23:44:00 +0900 Subject: [PATCH 1/3] SQL/PGQ: Add LABELS() graph element function Implement LABELS() function that returns all labels of a graph element as text[]. This follows the SQL/PGQ standard for graph element functions. Implementation wraps each element table in a subquery that adds a virtual __labels__ column containing the element's label array. This design enables the query planner to: - Prune Append branches when filtering by specific labels (e.g., WHERE 'Person' = ANY(LABELS(v)) scans only Person table) - Constant-fold label arrays for single-label elements - Eliminate scans entirely for non-matching label filters - Optimize queries with host variables ($1) since LABELS() is already a constant array at plan time Permission handling follows regular SQL subquery behavior: - The wrapper subquery RTE does not have relid/relkind/perminfoindex set - Permission checking happens inside the subquery where RTE_RELATION lives - Column-level privileges (selectedCols) are set based on matched labels' property definitions, so users only need SELECT on actually used columns Key changes: - Add GraphLabelsRef node type for LABELS() in parse tree - Transform LABELS(var) to Var referencing __labels__ column - Wrap element tables in subqueries with __labels__ column - Add get_element_used_columns() for column-level privilege checking - Add optimizer pruning tests for shared labels and host variables --- src/backend/nodes/nodeFuncs.c | 9 + src/backend/parser/parse_collate.c | 1 + src/backend/parser/parse_expr.c | 6 + src/backend/parser/parse_graphtable.c | 64 ++++ src/backend/rewrite/rewriteGraphTable.c | 364 ++++++++++++++++++++-- src/backend/utils/adt/ruleutils.c | 8 + src/include/nodes/primnodes.h | 10 + src/include/parser/parse_graphtable.h | 2 + src/test/regress/expected/graph_table.out | 236 ++++++++++++++ src/test/regress/sql/graph_table.sql | 118 +++++++ 10 files changed, 790 insertions(+), 28 deletions(-) diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index f58f74d0550..8c478a31f49 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -287,6 +287,9 @@ exprType(const Node *expr) case T_GraphPropertyRef: type = ((const GraphPropertyRef *) expr)->typeId; break; + case T_GraphLabelsRef: + type = TEXTARRAYOID; + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); type = InvalidOid; /* keep compiler quiet */ @@ -541,6 +544,8 @@ exprTypmod(const Node *expr) return exprTypmod((Node *) ((const PlaceHolderVar *) expr)->phexpr); case T_GraphPropertyRef: return ((const GraphPropertyRef *) expr)->typmod; + case T_GraphLabelsRef: + return -1; default: break; } @@ -1066,6 +1071,9 @@ exprCollation(const Node *expr) case T_GraphPropertyRef: coll = ((const GraphPropertyRef *) expr)->collation; break; + case T_GraphLabelsRef: + coll = DEFAULT_COLLATION_OID; + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); coll = InvalidOid; /* keep compiler quiet */ @@ -2133,6 +2141,7 @@ expression_tree_walker_impl(Node *node, case T_SortGroupClause: case T_CTESearchClause: case T_GraphPropertyRef: + case T_GraphLabelsRef: case T_MergeSupportFunc: /* primitive node types with no expression subnodes */ break; diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index 8f912065a01..9aaf01bf58b 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -547,6 +547,7 @@ assign_collations_walker(Node *node, assign_collations_context *context) case T_SetToDefault: case T_CurrentOfExpr: case T_GraphPropertyRef: + case T_GraphLabelsRef: /* * General case for childless expression nodes. These should diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index d4132587e23..e08ffe44705 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1448,6 +1448,12 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) Node *last_srf = pstate->p_last_srf; List *targs; ListCell *args; + Node *result; + + /* Check for graph functions like LABELS() in GRAPH_TABLE context */ + result = transformGraphTableFuncCall(pstate, fn); + if (result != NULL) + return result; /* Transform the list of arguments ... */ targs = NIL; diff --git a/src/backend/parser/parse_graphtable.c b/src/backend/parser/parse_graphtable.c index a8769a67b6a..5bd71eae536 100644 --- a/src/backend/parser/parse_graphtable.c +++ b/src/backend/parser/parse_graphtable.c @@ -82,6 +82,70 @@ transformGraphTablePropertyRef(ParseState *pstate, ColumnRef *cref) return NULL; } +/* + * Transform a graph function call like LABELS(v) inside GRAPH_TABLE. + * Returns NULL if this is not a recognized graph function. + */ +Node * +transformGraphTableFuncCall(ParseState *pstate, FuncCall *fn) +{ + GraphTableParseState *gpstate = pstate->p_graph_table_pstate; + char *funcname; + + if (!gpstate) + return NULL; + + if (list_length(fn->funcname) != 1) + return NULL; + + funcname = strVal(linitial(fn->funcname)); + + if (pg_strcasecmp(funcname, "labels") == 0) + { + Node *arg; + ColumnRef *cref; + char *elvarname; + GraphLabelsRef *glr; + + if (list_length(fn->args) != 1) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("LABELS() requires exactly one argument"), + parser_errposition(pstate, fn->location)); + + arg = linitial(fn->args); + + if (!IsA(arg, ColumnRef)) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("LABELS() argument must be an element variable"), + parser_errposition(pstate, fn->location)); + + cref = (ColumnRef *) arg; + if (list_length(cref->fields) != 1) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("LABELS() argument must be an element variable"), + parser_errposition(pstate, cref->location)); + + elvarname = strVal(linitial(cref->fields)); + + if (!list_member(gpstate->variables, linitial(cref->fields))) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("element variable \"%s\" does not exist", elvarname), + parser_errposition(pstate, cref->location)); + + glr = makeNode(GraphLabelsRef); + glr->elvarname = pstrdup(elvarname); + glr->location = fn->location; + + return (Node *) glr; + } + + return NULL; +} + /* * Transform a label expression. */ diff --git a/src/backend/rewrite/rewriteGraphTable.c b/src/backend/rewrite/rewriteGraphTable.c index 3b319639b81..605ce35da97 100644 --- a/src/backend/rewrite/rewriteGraphTable.c +++ b/src/backend/rewrite/rewriteGraphTable.c @@ -15,6 +15,7 @@ #include "access/table.h" #include "access/htup_details.h" +#include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_propgraph_element.h" #include "catalog/pg_propgraph_element_label.h" @@ -38,8 +39,10 @@ #include "rewrite/rewriteManip.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/catcache.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#include "utils/rel.h" #include "utils/ruleutils.h" #include "utils/syscache.h" @@ -86,12 +89,16 @@ struct path_element /* Source and destination conditions for an edge element. */ List *src_quals; List *dest_quals; + /* Attribute number of __labels__ column in subquery RTE */ + AttrNumber labels_attnum; }; static Node *replace_property_refs(Oid propgraphid, Node *node, const List *mappings); static List *build_edge_vertex_link_quals(HeapTuple edgetup, int edgerti, int refrti, Oid refid, AttrNumber catalog_key_attnum, AttrNumber catalog_ref_attnum, AttrNumber catalog_eqop_attnum); static List *generate_queries_for_path_pattern(RangeTblEntry *rte, List *element_patterns); static Query *generate_query_for_graph_path(RangeTblEntry *rte, List *path); +static Const *build_labels_const(Oid elemoid); +static RangeTblEntry *create_element_subquery_rte(struct path_element *pe, Oid reloid, AttrNumber *labels_attnum); static Node *generate_setop_from_pathqueries(List *pathqueries, List **rtable, List **targetlist); static List *generate_queries_for_path_pattern_recurse(RangeTblEntry *rte, List *pathqueries, List *cur_path, List *path_pattern_lists, int elempos); static Query *generate_query_for_empty_path_pattern(RangeTblEntry *rte); @@ -407,7 +414,6 @@ generate_query_for_graph_path(RangeTblEntry *rte, List *graph_path) Query *path_query = makeNode(Query); List *fromlist = NIL; List *qual_exprs = NIL; - List *vars; path_query->commandType = CMD_SELECT; @@ -415,8 +421,8 @@ generate_query_for_graph_path(RangeTblEntry *rte, List *graph_path) { struct path_factor *pf = pe->path_factor; RangeTblRef *rtr; - Relation rel; - ParseNamespaceItem *pni; + RangeTblEntry *subrte; + AttrNumber labels_attnum; Assert(pf->kind == VERTEX_PATTERN || IS_EDGE_PATTERN(pf->kind)); @@ -484,22 +490,20 @@ generate_query_for_graph_path(RangeTblEntry *rte, List *graph_path) Assert(!pe->src_quals && !pe->dest_quals); /* - * Create RangeTblEntry for this element table. + * Create RangeTblEntry for this element table wrapped in a subquery. + * The subquery adds a __labels__ column for LABELS() support. * * SQL/PGQ standard (Ref. Section 11.19, Access rule 2 and General * rule 4) does not specify whose access privileges to use when * accessing the element tables: property graph owner's or current * user's. It is safer to use current user's privileges so as not to - * make property graphs as a hole for unpriviledged data access. This + * make property graphs as a hole for unprivileged data access. This * is inline with the views being security_invoker by default. */ - rel = table_open(pe->reloid, AccessShareLock); - pni = addRangeTableEntryForRelation(make_parsestate(NULL), rel, AccessShareLock, - NULL, true, false); - table_close(rel, NoLock); - path_query->rtable = lappend(path_query->rtable, pni->p_rte); - path_query->rteperminfos = lappend(path_query->rteperminfos, pni->p_perminfo); - pni->p_rte->perminfoindex = list_length(path_query->rteperminfos); + subrte = create_element_subquery_rte(pe, pe->reloid, &labels_attnum); + pe->labels_attnum = labels_attnum; + path_query->rtable = lappend(path_query->rtable, subrte); + rtr = makeNode(RangeTblRef); rtr->rtindex = list_length(path_query->rtable); fromlist = lappend(fromlist, rtr); @@ -540,22 +544,6 @@ generate_query_for_graph_path(RangeTblEntry *rte, List *graph_path) (Node *) rte->graph_table_columns, graph_path)); - /* - * Mark the columns being accessed in the path query as requiring SELECT - * privilege. Any lateral columns should have been handled when the - * corresponding ColumnRefs were transformed. Ignore those here. - */ - vars = pull_vars_of_level((Node *) list_make2(qual_exprs, path_query->targetList), 0); - foreach_node(Var, var, vars) - { - RTEPermissionInfo *perminfo = getRTEPermissionInfo(path_query->rteperminfos, - rt_fetch(var->varno, path_query->rtable)); - - /* Must offset the attnum to fit in a bitmapset */ - perminfo->selectedCols = bms_add_member(perminfo->selectedCols, - var->varattno - FirstLowInvalidHeapAttributeNumber); - } - return path_query; } @@ -1135,6 +1123,35 @@ replace_property_refs_mutator(Node *node, struct replace_property_refs_context * return n; } + else if (IsA(node, GraphLabelsRef)) + { + GraphLabelsRef *glr = (GraphLabelsRef *) node; + struct path_element *found_mapping = NULL; + Var *var; + + /* Find the element mapping for this variable */ + foreach_ptr(struct path_element, m, context->mappings) + { + if (m->path_factor->variable && + strcmp(glr->elvarname, m->path_factor->variable) == 0) + { + found_mapping = m; + break; + } + } + if (!found_mapping) + elog(ERROR, "undefined element variable \"%s\"", glr->elvarname); + + /* + * Return a Var referencing the __labels__ column in the subquery RTE. + * This allows the optimizer to push down predicates involving LABELS(). + */ + var = makeVar(found_mapping->path_factor->factorpos + 1, + found_mapping->labels_attnum, + TEXTARRAYOID, -1, InvalidOid, 0); + + return (Node *) var; + } return expression_tree_mutator(node, replace_property_refs_mutator, context); } @@ -1284,6 +1301,148 @@ is_property_associated_with_label(Oid labeloid, Oid propoid) return associated; } +/* + * Helper to collect Var attnum from expression tree. + * Used to determine which columns an element actually accesses. + */ +static bool +collect_var_attnums_walker(Node *node, Bitmapset **attnums) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + /* Only collect columns from varno 1 (the element table) */ + if (var->varno == 1 && var->varattno > 0) + *attnums = bms_add_member(*attnums, + var->varattno - FirstLowInvalidHeapAttributeNumber); + return false; + } + return expression_tree_walker(node, collect_var_attnums_walker, attnums); +} + +/* + * Helper to add column attnums from an int16 array to the bitmapset. + */ +static void +add_columns_from_array(Bitmapset **attnums, Datum arrayDatum) +{ + Datum *elems; + int nelems; + + deconstruct_array_builtin(DatumGetArrayTypeP(arrayDatum), INT2OID, + &elems, NULL, &nelems); + for (int i = 0; i < nelems; i++) + { + AttrNumber attnum = DatumGetInt16(elems[i]); + + *attnums = bms_add_member(*attnums, + attnum - FirstLowInvalidHeapAttributeNumber); + } +} + +/* + * Collect all column attribute numbers used by an element for the given labels. + * This includes columns from: + * - Property definitions (plpexpr expressions) for the specified labels + * - Element key columns + * - Edge source/destination key columns (for edges) + * + * The labeloids parameter filters which label's properties are collected. + * Only properties from labels in this list are included. + * + * Returns a bitmapset suitable for use as RTEPermissionInfo.selectedCols. + */ +static Bitmapset * +get_element_used_columns(Oid elemoid, List *labeloids) +{ + Bitmapset *attnums = NULL; + Relation rel; + SysScanDesc scan; + ScanKeyData key[1]; + HeapTuple labeltup; + HeapTuple elemtup; + Form_pg_propgraph_element pgeform; + Datum datum; + bool isnull; + + /* First, collect key columns from the element definition */ + elemtup = SearchSysCache1(PROPGRAPHELOID, ObjectIdGetDatum(elemoid)); + if (!HeapTupleIsValid(elemtup)) + elog(ERROR, "cache lookup failed for property graph element %u", elemoid); + + pgeform = (Form_pg_propgraph_element) GETSTRUCT(elemtup); + + /* Add element key columns */ + datum = SysCacheGetAttr(PROPGRAPHELOID, elemtup, + Anum_pg_propgraph_element_pgekey, &isnull); + if (!isnull) + add_columns_from_array(&attnums, datum); + + /* For edges, also add source and destination key columns */ + if (pgeform->pgekind == PGEKIND_EDGE) + { + datum = SysCacheGetAttr(PROPGRAPHELOID, elemtup, + Anum_pg_propgraph_element_pgesrckey, &isnull); + if (!isnull) + add_columns_from_array(&attnums, datum); + + datum = SysCacheGetAttr(PROPGRAPHELOID, elemtup, + Anum_pg_propgraph_element_pgedestkey, &isnull); + if (!isnull) + add_columns_from_array(&attnums, datum); + } + + ReleaseSysCache(elemtup); + + /* Now scan labels associated with this element for property columns */ + rel = table_open(PropgraphElementLabelRelationId, RowShareLock); + ScanKeyInit(&key[0], + Anum_pg_propgraph_element_label_pgelelid, + BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(elemoid)); + scan = systable_beginscan(rel, PropgraphElementLabelElementLabelIndexId, + true, NULL, 1, key); + + while (HeapTupleIsValid(labeltup = systable_getnext(scan))) + { + Form_pg_propgraph_element_label ele_label = + (Form_pg_propgraph_element_label) GETSTRUCT(labeltup); + CatCList *proplist; + + /* + * Only include properties from labels that match the query pattern. + * This ensures column-level privileges are checked only for the + * columns actually used by the matched labels. + */ + if (!list_member_oid(labeloids, ele_label->pgellabelid)) + continue; + + /* Get all properties for this label */ + proplist = SearchSysCacheList1(PROPGRAPHLABELPROP, + ObjectIdGetDatum(ele_label->oid)); + + for (int i = 0; i < proplist->n_members; i++) + { + HeapTuple proptup = &proplist->members[i]->tuple; + Node *expr; + + expr = stringToNode(TextDatumGetCString( + SysCacheGetAttrNotNull(PROPGRAPHLABELPROP, proptup, + Anum_pg_propgraph_label_property_plpexpr))); + collect_var_attnums_walker(expr, &attnums); + } + + ReleaseSysCacheList(proplist); + } + + systable_endscan(scan); + table_close(rel, RowShareLock); + + return attnums; +} + /* * If given element has the given property associated with it, through any of * the associated labels, return value expression of the property. Otherwise @@ -1327,3 +1486,152 @@ get_element_property_expr(Oid elemoid, Oid propoid, int rtindex) return n; } + +/* + * Build a Const node containing an array of label names for the given element. + * Returns text[] constant like ARRAY['Person', 'Employee']::text[]. + */ +static Const * +build_labels_const(Oid elemoid) +{ + List *label_names = NIL; + ArrayType *arr; + Datum *elems; + int nelems; + int i; + CatCList *catlist; + + /* Get all labels for this element using syscache */ + catlist = SearchSysCacheList1(PROPGRAPHELEMENTLABELELEMENTLABEL, + ObjectIdGetDatum(elemoid)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple labeltup = &catlist->members[i]->tuple; + Form_pg_propgraph_element_label form = + (Form_pg_propgraph_element_label) GETSTRUCT(labeltup); + char *labelname = get_propgraph_label_name(form->pgellabelid); + + label_names = lappend(label_names, makeString(labelname)); + } + + ReleaseSysCacheList(catlist); + + /* Build ARRAY['label1', 'label2', ...]::text[] */ + nelems = list_length(label_names); + elems = (Datum *) palloc(nelems * sizeof(Datum)); + i = 0; + foreach_ptr(String, s, label_names) + { + elems[i++] = CStringGetTextDatum(strVal(s)); + } + + arr = construct_array(elems, nelems, TEXTOID, -1, false, TYPALIGN_INT); + + return makeConst(TEXTARRAYOID, -1, InvalidOid, + -1, PointerGetDatum(arr), false, false); +} + +/* + * Create a subquery RTE that wraps the element table and adds a __labels__ + * column. The subquery structure is: + * + * SELECT *, ARRAY['Label1', 'Label2']::text[] AS __labels__ + * FROM element_table + * + * This allows LABELS(v) to reference a Var instead of a constant, enabling + * the planner to push down predicates involving LABELS(). + * + * The returned subquery RTE does NOT have relid/relkind/perminfoindex set, + * matching how regular SQL subqueries work. Permission checking (including + * column-level privileges via selectedCols) happens inside the subquery + * where the actual RTE_RELATION lives. + */ +static RangeTblEntry * +create_element_subquery_rte(struct path_element *pe, Oid reloid, AttrNumber *labels_attnum) +{ + Query *subquery; + RangeTblEntry *subrte; + RangeTblEntry *relrte; + RangeTblRef *rtr; + Relation rel; + TupleDesc tupdesc; + int attno; + ParseNamespaceItem *pni; + Const *labels_const; + TargetEntry *labels_te; + + subquery = makeNode(Query); + subquery->commandType = CMD_SELECT; + + /* Create RTE for the actual table */ + rel = table_open(reloid, AccessShareLock); + pni = addRangeTableEntryForRelation(make_parsestate(NULL), rel, + AccessShareLock, NULL, true, false); + relrte = pni->p_rte; + tupdesc = RelationGetDescr(rel); + table_close(rel, NoLock); + + subquery->rtable = list_make1(relrte); + subquery->rteperminfos = list_make1(pni->p_perminfo); + relrte->perminfoindex = 1; + + /* + * Set selectedCols to include only columns used by the element's + * property definitions for the matched labels. This enables proper + * column-level privilege checking - the user only needs SELECT on + * columns that are actually referenced by the matched label's properties, + * not all columns of the table or all labels of the element. + */ + pni->p_perminfo->selectedCols = get_element_used_columns(pe->elemoid, + pe->path_factor->labeloids); + + /* Create FromExpr */ + rtr = makeNode(RangeTblRef); + rtr->rtindex = 1; + subquery->jointree = makeFromExpr(list_make1(rtr), NULL); + + /* Build targetlist: all columns from the table + __labels__ */ + subquery->targetList = NIL; + attno = 1; + for (int i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + Var *var; + TargetEntry *te; + + if (attr->attisdropped) + continue; + + var = makeVar(1, attr->attnum, attr->atttypid, + attr->atttypmod, attr->attcollation, 0); + te = makeTargetEntry((Expr *) var, attno++, + pstrdup(NameStr(attr->attname)), false); + subquery->targetList = lappend(subquery->targetList, te); + } + + /* Add __labels__ column */ + labels_const = build_labels_const(pe->elemoid); + labels_te = makeTargetEntry((Expr *) labels_const, attno, + pstrdup("__labels__"), false); + subquery->targetList = lappend(subquery->targetList, labels_te); + *labels_attnum = attno; + + /* Create the wrapper subquery RTE */ + subrte = makeNode(RangeTblEntry); + subrte->rtekind = RTE_SUBQUERY; + subrte->subquery = subquery; + subrte->lateral = false; + subrte->inh = false; + subrte->inFromCl = true; + + /* Build column name list for the subquery RTE */ + subrte->eref = makeAlias("__element__", NIL); + foreach_node(TargetEntry, te, subquery->targetList) + { + subrte->eref->colnames = lappend(subrte->eref->colnames, + makeString(pstrdup(te->resname))); + } + + return subrte; +} diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 54a8ebd9020..9aff5b32527 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -11143,6 +11143,14 @@ get_rule_expr(Node *node, deparse_context *context, break; } + case T_GraphLabelsRef: + { + GraphLabelsRef *glr = (GraphLabelsRef *) node; + + appendStringInfo(buf, "LABELS(%s)", quote_identifier(glr->elvarname)); + break; + } + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); break; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index e9e2d6c1504..b658bb2be8c 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -2201,6 +2201,16 @@ typedef struct GraphPropertyRef ParseLoc location; } GraphPropertyRef; +/* + * GraphLabelsRef - LABELS(element_variable) inside GRAPH_TABLE clause + */ +typedef struct GraphLabelsRef +{ + Expr xpr; + const char *elvarname; /* element variable name */ + ParseLoc location; +} GraphLabelsRef; + /*-------------------- * TargetEntry - * a target entry (used in query target lists) diff --git a/src/include/parser/parse_graphtable.h b/src/include/parser/parse_graphtable.h index 4cefd5acf9d..51d2e72b4a6 100644 --- a/src/include/parser/parse_graphtable.h +++ b/src/include/parser/parse_graphtable.h @@ -19,6 +19,8 @@ extern Node *transformGraphTablePropertyRef(ParseState *pstate, ColumnRef *cref); +extern Node *transformGraphTableFuncCall(ParseState *pstate, FuncCall *fn); + extern Node *transformGraphPattern(ParseState *pstate, GraphPattern *graph_pattern); #endif /* PARSE_GRAPHTABLE_H */ diff --git a/src/test/regress/expected/graph_table.out b/src/test/regress/expected/graph_table.out index a4df7464d79..c82d7fd7291 100644 --- a/src/test/regress/expected/graph_table.out +++ b/src/test/regress/expected/graph_table.out @@ -928,4 +928,240 @@ SELECT sname, dname FROM GRAPH_TABLE (g1 MATCH (src)->(dest) WHERE src.vprop1 > ERROR: subqueries within GRAPH_TABLE reference are not supported SELECT sname, dname FROM GRAPH_TABLE (g1 MATCH (src)->(dest) WHERE out_degree(src.vname) > (SELECT max(out_degree(nname)) FROM GRAPH_TABLE (g1 MATCH (node) COLUMNS (node.vname AS nname))) COLUMNS(src.vname AS sname, dest.vname AS dname)); ERROR: subqueries within GRAPH_TABLE reference are not supported +-- LABELS() function tests +-- basic LABELS() in COLUMNS clause +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (c.name, LABELS(c) AS lbls)) ORDER BY 1; + name | lbls +-----------+------------- + customer1 | {customers} + customer2 | {customers} + customer3 | {customers} +(3 rows) + +-- LABELS() for vertices and edges +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers)-[e IS customer_orders]->(o IS orders) COLUMNS (c.name, LABELS(c) AS clbls, LABELS(e) AS elbls, LABELS(o) AS olbls)) ORDER BY 1; + name | clbls | elbls | olbls +-----------+-------------+------------------------------+---------------- + customer1 | {customers} | {customer_orders,cust_lists} | {orders,lists} + customer2 | {customers} | {customer_orders,cust_lists} | {orders,lists} +(2 rows) + +-- LABELS() in WHERE clause +SELECT * FROM GRAPH_TABLE (myshop MATCH (c) WHERE 'customers' = ANY(LABELS(c)) COLUMNS (c.name)) ORDER BY 1; + name +----------- + customer1 + customer2 + customer3 +(3 rows) + +-- LABELS() with array functions +SELECT * FROM GRAPH_TABLE (myshop MATCH (c) WHERE array_length(LABELS(c), 1) >= 1 COLUMNS (c.name, LABELS(c) AS lbls)) ORDER BY 1; + name | lbls +-----------+------------------- + customer1 | {customers} + customer2 | {customers} + customer3 | {customers} + product1 | {products} + product2 | {products} + product3 | {products} + | {lists,wishlists} + | {lists,wishlists} + | {lists,wishlists} + | {orders,lists} + | {orders,lists} + | {orders,lists} +(12 rows) + +-- LABELS() error: undefined variable +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(undefined_var))); +ERROR: element variable "undefined_var" does not exist +LINE 1: ...LE (myshop MATCH (c IS customers) COLUMNS (LABELS(undefined_... + ^ +-- LABELS() error: no argument +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS())); +ERROR: LABELS() requires exactly one argument +LINE 1: ...APH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS()))... + ^ +-- LABELS() error: too many arguments +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(c, c))); +ERROR: LABELS() requires exactly one argument +LINE 1: ...APH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(c, ... + ^ +-- LABELS() error: non-variable argument +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(123))); +ERROR: LABELS() argument must be an element variable +LINE 1: ...APH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(123... + ^ +-- LABELS() with shared labels (optimizer pruning tests) +-- The 'lists' label is shared by both 'orders' and 'wishlists' tables +-- LABELS() filtering in WHERE clause +-- Filter by 'orders' label - only orders table should be scanned +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'orders' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + QUERY PLAN +----------------------------------------------------- + Seq Scan on graph_table_tests.orders + Output: '{orders,lists}'::text[], orders.order_id +(2 rows) + +-- Filter by 'wishlists' label - only wishlists table should be scanned +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'wishlists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on graph_table_tests.wishlists + Output: '{lists,wishlists}'::text[], wishlists.wishlist_id +(2 rows) + +-- Filter by 'lists' label - both tables should be scanned (shared label) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'lists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + QUERY PLAN +-------------------------------------------------------------------- + Append + -> Seq Scan on graph_table_tests.orders + Output: '{orders,lists}'::text[], orders.order_id + -> Seq Scan on graph_table_tests.wishlists + Output: '{lists,wishlists}'::text[], wishlists.wishlist_id +(5 rows) + +-- Filter by nonexistent label - should show One-Time Filter: false +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'nonexistent' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + QUERY PLAN +----------------------------------------------------- + Result + Output: "graph_table".lbls, "graph_table".node_id + Replaces: Scan on graph_table + One-Time Filter: false +(4 rows) + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'orders' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) ORDER BY node_id; + lbls | node_id +----------------+--------- + {orders,lists} | 1 + {orders,lists} | 2 + {orders,lists} | 3 +(3 rows) + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'wishlists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) ORDER BY node_id; + lbls | node_id +-------------------+--------- + {lists,wishlists} | 1 + {lists,wishlists} | 2 + {lists,wishlists} | 3 +(3 rows) + +-- LABELS() filtering outside GRAPH_TABLE (in outer WHERE clause) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'orders' = ANY(lbls); + QUERY PLAN +----------------------------------------------------- + Seq Scan on graph_table_tests.orders + Output: '{orders,lists}'::text[], orders.order_id +(2 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'wishlists' = ANY(lbls); + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on graph_table_tests.wishlists + Output: '{lists,wishlists}'::text[], wishlists.wishlist_id +(2 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'lists' = ANY(lbls); + QUERY PLAN +-------------------------------------------------------------------- + Append + -> Seq Scan on graph_table_tests.orders + Output: '{orders,lists}'::text[], orders.order_id + -> Seq Scan on graph_table_tests.wishlists + Output: '{lists,wishlists}'::text[], wishlists.wishlist_id +(5 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'nonexistent' = ANY(lbls); + QUERY PLAN +----------------------------------------------------- + Result + Output: "graph_table".lbls, "graph_table".node_id + Replaces: Scan on graph_table + One-Time Filter: false +(4 rows) + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'orders' = ANY(lbls) ORDER BY node_id; + lbls | node_id +----------------+--------- + {orders,lists} | 1 + {orders,lists} | 2 + {orders,lists} | 3 +(3 rows) + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'wishlists' = ANY(lbls) ORDER BY node_id; + lbls | node_id +-------------------+--------- + {lists,wishlists} | 1 + {lists,wishlists} | 2 + {lists,wishlists} | 3 +(3 rows) + +-- LABELS() with host variable (parameter) - optimizer can still prune based on constant arrays +PREPARE labels_test(text) AS +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE $1 = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); +EXPLAIN (VERBOSE, COSTS OFF) EXECUTE labels_test('orders'); + QUERY PLAN +----------------------------------------------------- + Seq Scan on graph_table_tests.orders + Output: '{orders,lists}'::text[], orders.order_id +(2 rows) + +DEALLOCATE labels_test; -- leave the objects behind for pg_upgrade/pg_dump tests diff --git a/src/test/regress/sql/graph_table.sql b/src/test/regress/sql/graph_table.sql index 7521c3e5c1d..ba841dd8ab1 100644 --- a/src/test/regress/sql/graph_table.sql +++ b/src/test/regress/sql/graph_table.sql @@ -545,4 +545,122 @@ SELECT * FROM customers co WHERE co.customer_id = (SELECT customer_id FROM GRAPH SELECT sname, dname FROM GRAPH_TABLE (g1 MATCH (src)->(dest) WHERE src.vprop1 > (SELECT max(v1.vprop1) FROM v1) COLUMNS(src.vname AS sname, dest.vname AS dname)); SELECT sname, dname FROM GRAPH_TABLE (g1 MATCH (src)->(dest) WHERE out_degree(src.vname) > (SELECT max(out_degree(nname)) FROM GRAPH_TABLE (g1 MATCH (node) COLUMNS (node.vname AS nname))) COLUMNS(src.vname AS sname, dest.vname AS dname)); +-- LABELS() function tests +-- basic LABELS() in COLUMNS clause +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (c.name, LABELS(c) AS lbls)) ORDER BY 1; + +-- LABELS() for vertices and edges +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers)-[e IS customer_orders]->(o IS orders) COLUMNS (c.name, LABELS(c) AS clbls, LABELS(e) AS elbls, LABELS(o) AS olbls)) ORDER BY 1; + +-- LABELS() in WHERE clause +SELECT * FROM GRAPH_TABLE (myshop MATCH (c) WHERE 'customers' = ANY(LABELS(c)) COLUMNS (c.name)) ORDER BY 1; + +-- LABELS() with array functions +SELECT * FROM GRAPH_TABLE (myshop MATCH (c) WHERE array_length(LABELS(c), 1) >= 1 COLUMNS (c.name, LABELS(c) AS lbls)) ORDER BY 1; + +-- LABELS() error: undefined variable +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(undefined_var))); + +-- LABELS() error: no argument +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS())); + +-- LABELS() error: too many arguments +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(c, c))); + +-- LABELS() error: non-variable argument +SELECT * FROM GRAPH_TABLE (myshop MATCH (c IS customers) COLUMNS (LABELS(123))); + +-- LABELS() with shared labels (optimizer pruning tests) +-- The 'lists' label is shared by both 'orders' and 'wishlists' tables + +-- LABELS() filtering in WHERE clause +-- Filter by 'orders' label - only orders table should be scanned +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'orders' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + +-- Filter by 'wishlists' label - only wishlists table should be scanned +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'wishlists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + +-- Filter by 'lists' label - both tables should be scanned (shared label) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'lists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + +-- Filter by nonexistent label - should show One-Time Filter: false +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'nonexistent' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'orders' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) ORDER BY node_id; + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE 'wishlists' = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) ORDER BY node_id; + +-- LABELS() filtering outside GRAPH_TABLE (in outer WHERE clause) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'orders' = ANY(lbls); + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'wishlists' = ANY(lbls); + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'lists' = ANY(lbls); + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'nonexistent' = ANY(lbls); + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'orders' = ANY(lbls) ORDER BY node_id; + +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + COLUMNS (LABELS(n) AS lbls, n.node_id) +) WHERE 'wishlists' = ANY(lbls) ORDER BY node_id; + +-- LABELS() with host variable (parameter) - optimizer can still prune based on constant arrays +PREPARE labels_test(text) AS +SELECT * FROM GRAPH_TABLE (myshop + MATCH (n IS lists) + WHERE $1 = ANY(LABELS(n)) + COLUMNS (LABELS(n) AS lbls, n.node_id) +); +EXPLAIN (VERBOSE, COSTS OFF) EXECUTE labels_test('orders'); +DEALLOCATE labels_test; + -- leave the objects behind for pg_upgrade/pg_dump tests -- 2.50.1 (Apple Git-155)