From ceea7379712995f3ff9d832ac0be0662fdaab620 Mon Sep 17 00:00:00 2001
From: Edmond <edmond.chuc@outlook.com>
Date: Fri, 13 Dec 2024 22:15:34 +1100
Subject: [PATCH 1/4] feat: use the RGDA1 canonicalization algorithm + lexical
 n-triples sort to produce deterministic longturtle serialisation

---
 rdflib/plugins/serializers/longturtle.py      |  38 ++----
 test/data/longturtle/longturtle-target.ttl    |  82 ++++++------
 .../test_serializer_longturtle_sort.py        | 117 ++++++++++++++++++
 3 files changed, 167 insertions(+), 70 deletions(-)
 create mode 100644 test/test_serializers/test_serializer_longturtle_sort.py

diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index f596a7ded..ed5e385d1 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -20,7 +20,9 @@
 
 from typing import IO, Any, Optional
 
+from rdflib.compare import to_canonical_graph
 from rdflib.exceptions import Error
+from rdflib.graph import Graph
 from rdflib.namespace import RDF
 from rdflib.term import BNode, Literal, URIRef
 
@@ -42,7 +44,14 @@ class LongTurtleSerializer(RecursiveSerializer):
 
     def __init__(self, store):
         self._ns_rewrite = {}
-        super(LongTurtleSerializer, self).__init__(store)
+        store = to_canonical_graph(store)
+        content = store.serialize(format="application/n-triples")
+        lines = content.split("\n")
+        lines.sort()
+        graph = Graph()
+        graph.parse(data="\n".join(lines), format="nt", skolemize=True)
+        graph = graph.de_skolemize()
+        super(LongTurtleSerializer, self).__init__(graph)
         self.keywords = {RDF.type: "a"}
         self.reset()
         self.stream = None
@@ -293,34 +302,7 @@ def predicateList(self, subject, newline=False):
     def verb(self, node, newline=False):
         self.path(node, VERB, newline)
 
-    def sortObjects(
-        self, values: list[URIRef | BNode | Literal]
-    ) -> list[URIRef | BNode | Literal]:
-        """
-        Perform a sort on the values where each value is a blank node. Grab the CBD of the
-        blank node and sort it by its longturtle serialization value.
-
-        Identified nodes come first and the sorted blank nodes are tacked on after.
-        """
-        bnode_map: dict[BNode, list[str]] = {}
-        objects = []
-        for value in values:
-            if isinstance(value, BNode):
-                bnode_map[value] = []
-            else:
-                objects.append(value)
-
-        for bnode in bnode_map:
-            cbd = self.store.cbd(bnode).serialize(format="longturtle")
-            bnode_map[bnode].append(cbd)
-
-        sorted_bnodes = sorted(
-            [(k, v) for k, v in bnode_map.items()], key=lambda x: x[1]
-        )
-        return objects + [x[0] for x in sorted_bnodes]
-
     def objectList(self, objects):
-        objects = self.sortObjects(objects)
         count = len(objects)
         if count == 0:
             return
diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl
index 329f2ca0c..54cf23e9f 100644
--- a/test/data/longturtle/longturtle-target.ttl
+++ b/test/data/longturtle/longturtle-target.ttl
@@ -1,74 +1,72 @@
-PREFIX cn: <https://linked.data.gov.au/def/cn/>
-PREFIX ex: <http://example.com/>
 PREFIX geo: <http://www.opengis.net/ont/geosparql#>
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX sdo: <https://schema.org/>
+PREFIX schema: <https://schema.org/>
 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 
-ex:nicholas
-    a sdo:Person ;
-    sdo:age 41 ;
-    sdo:alternateName
-        "N.J. Car" ,
-        "Nick Car" ,
+<http://example.com/nicholas>
+    a schema:Person ;
+    schema:age 41 ;
+    schema:alternateName 
         [
-            sdo:name "Dr N.J. Car" ;
-        ] ;
-    sdo:name
+            schema:name "Dr N.J. Car" ;
+        ] ,
+        "N.J. Car" ,
+        "Nick Car" ;
+    schema:name
         [
-            a cn:CompoundName ;
-            sdo:hasPart 
-                [
-                    a cn:CompoundName ;
-                    rdf:value "John" ;
-                ] ,
+            a <https://linked.data.gov.au/def/cn/CompoundName> ;
+            schema:hasPart 
                 [
-                    a cn:CompoundName ;
-                    rdf:value "Nicholas" ;
-                ] ,
-                [
-                    a cn:CompoundName ;
-                    sdo:hasPart 
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    schema:hasPart 
                         [
-                            a cn:CompoundName ;
+                            a <https://linked.data.gov.au/def/cn/CompoundName> ;
                             rdf:value "Car" ;
                         ] ,
                         [
-                            a cn:CompoundName ;
+                            a <https://linked.data.gov.au/def/cn/CompoundName> ;
                             rdf:value "Maxov" ;
                         ] ;
+                ] ,
+                [
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    rdf:value "Nicholas" ;
+                ] ,
+                [
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    rdf:value "John" ;
                 ] ;
         ] ;
-    sdo:worksFor <https://kurrawong.ai> ;
+    schema:worksFor <https://kurrawong.ai> ;
 .
 
 <https://kurrawong.ai>
-    a sdo:Organization ;
-    sdo:location <https://kurrawong.ai/hq> ;
+    a schema:Organization ;
+    schema:location <https://kurrawong.ai/hq> ;
 .
 
 <https://kurrawong.ai/hq>
-    a sdo:Place ;
-    sdo:address
+    a schema:Place ;
+    schema:address
         [
-            a sdo:PostalAddress ;
-            sdo:addressCountry
+            a schema:PostalAddress ;
+            schema:addressCountry
                 [
-                    sdo:identifier "au" ;
-                    sdo:name "Australia" ;
+                    schema:identifier "au" ;
+                    schema:name "Australia" ;
                 ] ;
-            sdo:addressLocality "Shorncliffe" ;
-            sdo:addressRegion "QLD" ;
-            sdo:postalCode 4017 ;
-            sdo:streetAddress (
+            schema:addressLocality "Shorncliffe" ;
+            schema:addressRegion "QLD" ;
+            schema:postalCode 4017 ;
+            schema:streetAddress (
                 72
                 "Yundah"
                 "Street"
             ) ;
         ] ;
-    sdo:geo
+    schema:geo
         [
-            sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
+            schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
         ] ;
-    sdo:name "KurrawongAI HQ" ;
+    schema:name "KurrawongAI HQ" ;
 .
diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py
new file mode 100644
index 000000000..9eba6e187
--- /dev/null
+++ b/test/test_serializers/test_serializer_longturtle_sort.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+# Portions of this file contributed by NIST are governed by the
+# following statement:
+#
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to Title 17 Section 105 of the
+# United States Code, this software is not subject to copyright
+# protection within the United States. NIST assumes no responsibility
+# whatsoever for its use by other parties, and makes no guarantees,
+# expressed or implied, about its quality, reliability, or any other
+# characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+import random
+from collections import defaultdict
+
+from rdflib import RDFS, BNode, Graph, Literal, Namespace, URIRef
+
+EX = Namespace("http://example.org/ex/")
+
+
+def test_sort_semiblank_graph() -> None:
+    """
+    This test reviews whether the output of the Turtle form is
+    consistent when involving repeated generates with blank nodes.
+    """
+
+    serialization_counter: defaultdict[str, int] = defaultdict(int)
+
+    first_graph_text: str = ""
+
+    # Use a fixed sequence of once-but-no-longer random values for more
+    # consistent test results.
+    nonrandom_shuffler = random.Random(1234)
+    for x in range(1, 10):
+        graph = Graph()
+        graph.bind("ex", EX)
+        graph.bind("rdfs", RDFS)
+
+        graph.add((EX.A, RDFS.comment, Literal("Thing A")))
+        graph.add((EX.B, RDFS.comment, Literal("Thing B")))
+        graph.add((EX.C, RDFS.comment, Literal("Thing C")))
+
+        nodes: list[URIRef] = [EX.A, EX.B, EX.C, EX.B]
+        nonrandom_shuffler.shuffle(nodes)
+        for node in nodes:
+            # Instantiate one bnode per URIRef node.
+            graph.add((BNode(), RDFS.seeAlso, node))
+
+        nesteds: list[URIRef] = [EX.A, EX.B, EX.C]
+        nonrandom_shuffler.shuffle(nesteds)
+        for nested in nesteds:
+            # Instantiate a nested node reference.
+            outer_node = BNode()
+            inner_node = BNode()
+            graph.add((outer_node, EX.has, inner_node))
+            graph.add((inner_node, RDFS.seeAlso, nested))
+
+        graph_text = graph.serialize(format="longturtle", sort=True)
+        if first_graph_text == "":
+            first_graph_text = graph_text
+
+        serialization_counter[graph_text] += 1
+
+    expected_serialization = """\
+PREFIX ns1: <http://example.org/ex/>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+ns1:A
+    rdfs:comment "Thing A" ;
+.
+
+ns1:C
+    rdfs:comment "Thing C" ;
+.
+
+ns1:B
+    rdfs:comment "Thing B" ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:A ;
+        ] ; ;
+.
+
+[]    rdfs:seeAlso ns1:B ; ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:C ;
+        ] ; ;
+.
+
+[]    rdfs:seeAlso ns1:A ; ;
+.
+
+[]    rdfs:seeAlso ns1:C ; ;
+.
+
+[]    rdfs:seeAlso ns1:B ; ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:B ;
+        ] ; ;
+.
+
+"""
+
+    assert expected_serialization.strip() == first_graph_text.strip()
+    assert 1 == len(serialization_counter)

From e4845dae757a07599112c086610c82e077da2ae0 Mon Sep 17 00:00:00 2001
From: Edmond <edmond.chuc@outlook.com>
Date: Fri, 13 Dec 2024 22:22:05 +1100
Subject: [PATCH 2/4] chore: normalise usage of format

---
 rdflib/plugins/serializers/longturtle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index ed5e385d1..6626cb4c7 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -49,7 +49,7 @@ def __init__(self, store):
         lines = content.split("\n")
         lines.sort()
         graph = Graph()
-        graph.parse(data="\n".join(lines), format="nt", skolemize=True)
+        graph.parse(data="\n".join(lines), format="application/n-triples", skolemize=True)
         graph = graph.de_skolemize()
         super(LongTurtleSerializer, self).__init__(graph)
         self.keywords = {RDF.type: "a"}

From 7405e32109cca9c63cddb54a1f689b568c7aef04 Mon Sep 17 00:00:00 2001
From: Edmond <edmond.chuc@outlook.com>
Date: Sat, 14 Dec 2024 00:03:26 +1100
Subject: [PATCH 3/4] chore: apply black

---
 rdflib/plugins/serializers/longturtle.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index 6626cb4c7..cc245441c 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -49,7 +49,9 @@ def __init__(self, store):
         lines = content.split("\n")
         lines.sort()
         graph = Graph()
-        graph.parse(data="\n".join(lines), format="application/n-triples", skolemize=True)
+        graph.parse(
+            data="\n".join(lines), format="application/n-triples", skolemize=True
+        )
         graph = graph.de_skolemize()
         super(LongTurtleSerializer, self).__init__(graph)
         self.keywords = {RDF.type: "a"}

From 412fb5d63d79c5d5816e05751e6be6bd791b1385 Mon Sep 17 00:00:00 2001
From: Edmond <edmond.chuc@outlook.com>
Date: Sat, 14 Dec 2024 03:06:01 +1100
Subject: [PATCH 4/4] fix: double up of semicolons when subject is a blank node

---
 rdflib/plugins/serializers/longturtle.py           |  2 +-
 .../test_serializer_longturtle_sort.py             | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index cc245441c..8de1e52a2 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -197,7 +197,7 @@ def s_squared(self, subject):
             return False
         self.write("\n" + self.indent() + "[]")
         self.predicateList(subject, newline=False)
-        self.write(" ;\n.")
+        self.write("\n.")
         return True
 
     def path(self, node, position, newline=False):
diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py
index 9eba6e187..df0e38676 100644
--- a/test/test_serializers/test_serializer_longturtle_sort.py
+++ b/test/test_serializers/test_serializer_longturtle_sort.py
@@ -84,31 +84,31 @@ def test_sort_semiblank_graph() -> None:
 []    ns1:has
         [
             rdfs:seeAlso ns1:A ;
-        ] ; ;
+        ] ;
 .
 
-[]    rdfs:seeAlso ns1:B ; ;
+[]    rdfs:seeAlso ns1:B ;
 .
 
 []    ns1:has
         [
             rdfs:seeAlso ns1:C ;
-        ] ; ;
+        ] ;
 .
 
-[]    rdfs:seeAlso ns1:A ; ;
+[]    rdfs:seeAlso ns1:A ;
 .
 
-[]    rdfs:seeAlso ns1:C ; ;
+[]    rdfs:seeAlso ns1:C ;
 .
 
-[]    rdfs:seeAlso ns1:B ; ;
+[]    rdfs:seeAlso ns1:B ;
 .
 
 []    ns1:has
         [
             rdfs:seeAlso ns1:B ;
-        ] ; ;
+        ] ;
 .
 
 """