From f6f4beb4a01f61a1287b1ed3762870187ebc52ce Mon Sep 17 00:00:00 2001 From: Jack Boylan <70636379+jackboyla@users.noreply.github.com> Date: Fri, 24 May 2024 13:54:08 +0100 Subject: [PATCH] Logical `OR` for relationship MATCH (#44) * Adds support for multigraphs * Refactors `_is_edge_attr_match` * Filters relations by __label__ during `_lookup` * Bundles relation attributes together for lookup * Refactors and adds inline docs * Adds tests for multigraph support * Cleans up inline docs * Removes slicing list twice to avoid two copies in memory * Supports WHERE clause for relationships in multigraphs * Adds test for multigraph with WHERE clause on single edge * Accounts for WHERE with string node attributes in MultiDiGraphs * Unifies all unit tests to work with both DiGraphs and MultiDiGraphs * Completes multidigraph test for WHERE on node attribute * Supports logical OR for relationship matching * Adds tests for logical OR in MATCH for relationships --- grandcypher/__init__.py | 44 +++++++++++++----- grandcypher/test_queries.py | 91 +++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 12 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index abd2824..4c3259b 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -16,7 +16,7 @@ import grandiso -from lark import Lark, Transformer, v_args, Token +from lark import Lark, Transformer, v_args, Token, Tree _OPERATORS = { @@ -107,8 +107,8 @@ edge_match : LEFT_ANGLE? "--" RIGHT_ANGLE? | LEFT_ANGLE? "-[]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME "]-" RIGHT_ANGLE? - | LEFT_ANGLE? "-[" CNAME ":" TYPE "]-" RIGHT_ANGLE? - | LEFT_ANGLE? "-[" ":" TYPE "]-" RIGHT_ANGLE? + | LEFT_ANGLE? "-[" CNAME ":" type_list "]-" RIGHT_ANGLE? + | LEFT_ANGLE? "-[" ":" type_list "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" "*" MIN_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" "*" MIN_HOP ".." MAX_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME "*" MIN_HOP "]-" RIGHT_ANGLE? @@ -118,6 +118,7 @@ | LEFT_ANGLE? "-[" CNAME ":" TYPE "*" MIN_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME ":" TYPE "*" MIN_HOP ".." MAX_HOP "]-" RIGHT_ANGLE? +type_list : TYPE ( "|" TYPE )* LEFT_ANGLE : "<" RIGHT_ANGLE : ">" @@ -228,10 +229,14 @@ def _is_edge_attr_match( motif_edges = _aggregate_edge_labels(motif_edges) host_edges = _aggregate_edge_labels(host_edges) + motif_types = motif_edges.get('__labels__', set()) + host_types = host_edges.get('__labels__', set()) + + if motif_types and not motif_types.intersection(host_types): + return False + for attr, val in motif_edges.items(): if attr == "__labels__": - if val and val - host_edges.get("__labels__", set()): - return False continue if host_edges.get(attr) != val: return False @@ -775,10 +780,21 @@ def entity_id(self, entity_id): return ".".join(entity_id) return entity_id.value - def edge_match(self, edge_name): - direction = cname = min_hop = max_hop = edge_type = None + def edge_match(self, edge_tokens): + def flatten_tokens(edge_tokens): + flat_tokens = [] + for token in edge_tokens: + if isinstance(token, Tree): + flat_tokens.extend(flatten_tokens(token.children)) # Recursively flatten the tree + else: + flat_tokens.append(token) + return flat_tokens + + direction = cname = min_hop = max_hop = None + edge_types = [] + edge_tokens = flatten_tokens(edge_tokens) - for token in edge_name: + for token in edge_tokens: if token.type == "MIN_HOP": min_hop = int(token.value) elif token.type == "MAX_HOP": @@ -790,15 +806,19 @@ def edge_match(self, edge_name): elif token.type == "RIGHT_ANGLE": direction = "r" elif token.type == "TYPE": - edge_type = token.value + edge_types.append(token.value) else: cname = token direction = direction if direction is not None else "b" if (min_hop is not None or max_hop is not None) and (direction == "b"): - raise TypeError("not support edge hopping for bidirectional edge") + raise TypeError("Bidirectional edge does not support edge hopping") + + # Handle the case where no edge types are specified, defaulting to a generic type if needed + if edge_types == []: + edge_types = None - return (cname, edge_type, direction, min_hop, max_hop) + return (cname, edge_types, direction, min_hop, max_hop) def node_match(self, node_name): cname = node_type = json_data = None @@ -845,7 +865,7 @@ def match_clause(self, match_clause: Tuple): if maxh > self._max_hop: raise ValueError(f"max hop is caped at 100, found {maxh}!") if t: - t = set([t]) + t = set([t] if type(t) is str else t) self._motif.add_edges_from( edges, __min_hop__=minh, __max_hop__=maxh, __is_hop__=ish, __labels__=t ) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 450b28b..9fc0bfb 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -1661,3 +1661,94 @@ def test_path(self, graph_type): res = GrandCypher(host).run(qry) assert len(res["P"][0]) == 5 + + +class TestMatchWithOrOperatorInRelationships: + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_single_or_operator(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|WORKS_WITH]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice", "Bob"] + assert res["n2.name"] == ["Bob", "Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_multiple_or_operators(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_node("d", name="Derek") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("a", "c", __labels__={"KNOWS"}) + host.add_edge("b", "c", __labels__={"LIVES_NEAR"}) + host.add_edge("b", "d", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|KNOWS|LIVES_NEAR]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice", "Alice", "Bob"] + assert res["n2.name"] == ["Bob", "Carol", "Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_or_operator_and_other_conditions(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice", age=30) + host.add_node("b", name="Bob", age=25) + host.add_node("c", name="Carol", age=40) + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("a", "c", __labels__={"KNOWS"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|KNOWS]->(n2) + WHERE n1.age > 28 AND n2.age > 35 + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice"] + assert res["n2.name"] == ["Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_no_results_when_no_matching_edges(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_edge("a", "b", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:IN_CITY|HAS_ROUTE]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert len(res["n1.name"]) == 0 # No results because no edges match + + def test_multigraph_match_with_single_or_operator(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_node("d", name="Derek") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + host.add_edge("b", "c", __labels__={"DISLIKES"}) + host.add_edge("b", "d", __labels__={"DISLIKES"}) + + qry = """ + MATCH (n1)-[r:IS_SUING|DISLIKES]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Bob", "Bob"] + assert res["n2.name"] == ["Carol", "Derek"]