Skip to content

Commit

Permalink
Merge pull request #13 from TREEcg/feat/bulk-ingest
Browse files Browse the repository at this point in the history
Feat/bulk ingest
  • Loading branch information
pietercolpaert authored Feb 15, 2024
2 parents 5bfbfc9 + e436c8d commit eb09d22
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 15 deletions.
66 changes: 63 additions & 3 deletions lib/CBDShapeExtractor.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import rdfDereference, { RdfDereferencer } from "rdf-dereference";
import { NodeLink, RDFMap, ShapesGraph, ShapeTemplate } from "./Shape";
import { Path, PathResult } from "./Path";
import { BlankNode, Store } from "n3";
import { BlankNode, DefaultGraph, Store } from "n3";
import { Quad, Term } from "@rdfjs/types";

class DereferenceNeeded {
Expand All @@ -13,6 +13,10 @@ class DereferenceNeeded {
}
}

type CBDShapeExtractorOptions = {
cbdDefaultGraph: boolean;
};

/**
* Usage:
* import {ShapeExtractor} from "extract-cbd-shape";
Expand All @@ -24,7 +28,16 @@ export class CBDShapeExtractor {
dereferencer: RdfDereferencer;
shapesGraph?: ShapesGraph;

constructor(shapesGraphStore?: Store, dereferencer?: RdfDereferencer) {
options: CBDShapeExtractorOptions;

constructor(
shapesGraphStore?: Store,
dereferencer?: RdfDereferencer<Quad>,
options: Partial<CBDShapeExtractorOptions> = {},
) {
this.options = {
cbdDefaultGraph: options.cbdDefaultGraph || false,
};
if (!dereferencer) {
this.dereferencer = rdfDereference;
} else {
Expand All @@ -43,6 +56,52 @@ export class CBDShapeExtractor {
});
}

public async bulkExtract(
store: Store,
ids: Array<Term>,
shapeId?: Term,
graphsToIgnore?: Array<Term>,
itemExtracted?: (member: { subject: Term; quads: Quad[] }) => void,
): Promise<Array<{ subject: Term; quads: Quad[] }>> {
const out: Array<{ subject: Term; quads: Quad[] }> = [];
const idSet = new Set(ids.map((x) => x.value));

const memberSpecificQuads: { [id: string]: Array<Quad> } = {};
for (let id of ids) {
memberSpecificQuads[id.value] = [];
}
const newStore = new Store();
for (let quad of store.readQuads(null, null, null, null)) {
if (quad.graph.termType == "NamedNode" && idSet.has(quad.graph.value)) {
memberSpecificQuads[quad.graph.value].push(quad);
} else {
newStore.add(quad);
}
}

const promises = [];
for (let id of ids) {
const promise = this.extract(
newStore,
id,
shapeId,
(graphsToIgnore || []).slice(),
).then((quads) => {
quads.push(...memberSpecificQuads[id.value]);
if (itemExtracted) {
itemExtracted({ subject: id, quads });
}

out.push({ subject: id, quads });
});
promises.push(promise);
}

await Promise.all(promises);

return out;
}

/**
* Extracts:
* * first level quads,
Expand Down Expand Up @@ -306,7 +365,8 @@ export class CBDShapeExtractor {
graphsToIgnore: Array<string>,
) {
extractedStar.addCBDTerm(id);
const quads = store.getQuads(id, null, null, null);
const graph = this.options.cbdDefaultGraph ? new DefaultGraph() : null;
const quads = store.getQuads(id, null, null, graph);

//Iterate over the quads, add them to the result and check whether we should further get other quads based on blank nodes or the SHACL shape
for (const q of quads) {
Expand Down
2 changes: 1 addition & 1 deletion tests/06 - shapes and named graphs/data.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ ex:M1v1 {
ex:M1v2 {
ex:M1 rdfs:label "M1v2" .
}


57 changes: 47 additions & 10 deletions tests/06 - shapes and named graphs/extraction-example.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,32 @@ import { assert } from "chai";
import { NamedNode, Parser, Store, StreamParser, Term, Writer } from "n3";
import { CBDShapeExtractor } from "../../lib/CBDShapeExtractor";
import rdfDereference from "rdf-dereference";
import { Quad, Term as RTerm } from "@rdfjs/types";

describe("Check weather all selected quads can be extracted", function () {
let shapeStore = new Store();
let extractor: CBDShapeExtractor;
let dataStore = new Store();
before(async () => {
let readStream = (
await rdfDereference.dereference("./tests/06 - shapes and named graphs/shape-example.ttl", {
localFiles: true,
})
await rdfDereference.dereference(
"./tests/06 - shapes and named graphs/shape-example.ttl",
{
localFiles: true,
},
)
).data;
await new Promise((resolve, reject) => {
shapeStore.import(readStream).on("end", resolve).on("error", reject);
});
extractor = new CBDShapeExtractor(shapeStore);
let readStream2 = (
await rdfDereference.dereference("./tests/06 - shapes and named graphs/data-example.ttl", {
localFiles: true,
})
await rdfDereference.dereference(
"./tests/06 - shapes and named graphs/data-example.ttl",
{
localFiles: true,
},
)
).data;
await new Promise((resolve, reject) => {
dataStore.import(readStream2).on("end", resolve).on("error", reject);
Expand All @@ -29,11 +36,41 @@ describe("Check weather all selected quads can be extracted", function () {
it("All quads from example should be extracted", async () => {
let result = await extractor.extract(
dataStore,
new NamedNode("http://example.org/important_point"),
new NamedNode("http://example.org/Shape")
new NamedNode("http://example.org/line"),
new NamedNode("http://example.org/shape"),
);
// It should only have 6 quads
assert.equal(result.length, 3);
assert.equal(result.length, 6);
});

it("bulk - All quads from example should be extracted", async () => {
let called = 0;
const cb = (member: { subject: RTerm; quads: Quad[] }) => {
called += 1;
if (member.subject.value == "http://example.org/line") {
assert.equal(member.quads.length, 6);
return;
}

if (member.subject.value == "http://example.org/important_point") {
assert.equal(member.quads.length, 2);
return;
}
assert.fail();
};

let result = await extractor.bulkExtract(
dataStore,
[
new NamedNode("http://example.org/line"),
new NamedNode("http://example.org/important_point"),
],
new NamedNode("http://example.org/shape"),
undefined,
cb,
);
// It should only have 6 quads
assert.equal(result.length, 2);
assert.equal(called, 2);
});

});
3 changes: 2 additions & 1 deletion tests/06 - shapes and named graphs/shape-example.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ex:shape a sh:NodeShape;
sh:path ex:point;
sh:node ex:PointShape;
].

ex:PointShape
a sh:NodeShape ;
sh:property [
Expand All @@ -18,4 +19,4 @@ ex:PointShape
sh:path ex:y ;
sh:datatype xsd:integer ;
sh:minCount 1;
].
].

0 comments on commit eb09d22

Please sign in to comment.