Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Turtle and N-Triples #31

Merged
merged 3 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/ldtab/import.clj
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@
(let [db {:connection-uri db-connection}
iri2prefix (load-prefixes db)
^FileInputStream is (new FileInputStream rdf-path)
^Iterator it (if (= (last (str/split rdf-path #"\.")) "ttl") ;guess input format
(RDFDataMgr/createIteratorTriples is Lang/TTL "base")
(RDFDataMgr/createIteratorTriples is Lang/RDFXML "base")) ;use RDFXML by default
^String extension (last (str/split rdf-path #"\."))
^Iterator it (cond ;guess file format
(= extension "ttl") (RDFDataMgr/createIteratorTriples is Lang/TTL "base")
(= extension "nt") (RDFDataMgr/createIteratorTriples is Lang/NT "base")
:else ;use RDFXML by default
(RDFDataMgr/createIteratorTriples is Lang/RDFXML "base"))
windowsize 500]
(loop [backlog {}
thin-backlog [nil nil nil]
Expand Down Expand Up @@ -209,4 +212,3 @@
(println t))))

;(time (import-rdf-stream (first args) (second args) "graph")))

42 changes: 14 additions & 28 deletions src/ldtab/rdf_model.clj
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns ldtab.rdf-model
(:require [clojure.set :as set])
(:require [clojure.set :as set]
[clojure.string :as str])
(:import [org.apache.jena.rdf.model ModelFactory Model StmtIterator Resource Property RDFNode Statement]
[org.apache.jena.riot RDFDataMgr]
[org.apache.jena.atlas.web TypedInputStream]))
Expand Down Expand Up @@ -38,39 +39,24 @@
roots (filter (fn [^Resource x] (not (contains? blanknode-objects x))) subjects)]
roots))

(defn create-model
"Given a file path, return the format of the file."
[^String rdf-path]
(let [^String extension (last (str/split rdf-path #"\."))
^TypedInputStream in (RDFDataMgr/open rdf-path)]
(cond
(= extension "ttl") (.read (ModelFactory/createDefaultModel) in "" "TURTLE")
(= extension "nt") (.read (ModelFactory/createDefaultModel) in "" "N-TRIPLE")
:else
^Model (.read (ModelFactory/createDefaultModel) in "")))) ; default to RDF/XML

(defn group-blank-node-paths
"Given an RDF graph, group triples w.r.t. bank node paths
(a blank node path is a path (s_1,p_1,o_1),...,(s_n,p_n,o_n) where
o_i = s_{i+1} are blank nodes for 1 <= i <= n."
[^String input]
(let [^TypedInputStream in (RDFDataMgr/open input)
^Model model (.read (ModelFactory/createDefaultModel) in "")
(let [^Model model (create-model input)
root-subjects (get-root-subjects model)
subject-with-dependencies (map #(get-blanknode-dependencies % model) root-subjects)
dependency-triples (map #(map (fn [^Statement x] (.asTriple x)) %) subject-with-dependencies)]
dependency-triples))

(defn group-blank-node-paths-n-triples
"Given an RDF graph, group triples w.r.t. bank node paths
(a blank node path is a path (s_1,p_1,o_1),...,(s_n,p_n,o_n) where
o_i = s_{i+1} are blank nodes for 1 <= i <= n."
[input]
(let [in (RDFDataMgr/open input)
model (.read (ModelFactory/createDefaultModel) in "" "NTRIPLES")
root-subjects (get-root-subjects model)
subject-with-dependencies (map (fn [x] (get-blanknode-dependencies x model)) root-subjects)
dependency-triples (map #(map (fn [^Statement x] (.asTriple x)) %) subject-with-dependencies)]
dependency-triples))

(defn group-blank-node-paths-turtle
"Given an RDF graph, group triples w.r.t. bank node paths
(a blank node path is a path (s_1,p_1,o_1),...,(s_n,p_n,o_n) where
o_i = s_{i+1} are blank nodes for 1 <= i <= n."
[input]
(let [in (RDFDataMgr/open input)
model (.read (ModelFactory/createDefaultModel) in "")
root-subjects (get-root-subjects model)
subject-with-dependencies (map #(get-blanknode-dependencies % model) root-subjects)
dependency-triples (map #(map (fn [^Statement x] (.asTriple x)) %) subject-with-dependencies)]
dependency-triples))

Loading