-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.py
33 lines (22 loc) · 793 Bytes
/
1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Purpose: Calculate the similarity between two sets of predicates
from rdflib import Graph, URIRef
# Parse the TTL data
g = Graph()
g.parse("metadata.rdf", format="ttl")
# Get the predicates for ex:dataset-001
predicates = set()
predicates2 = set()
dataset_001 = URIRef('https://dcat.example.org/dataset-001')
for s, p, o in g.triples((dataset_001, None, None)):
predicates.add(o)
dataset_002 = URIRef('https://dcat.example.org/dataset-002')
for s, p, o in g.triples((dataset_002, None, None)):
predicates2.add(o)
# Calculate the similarity between the two sets of predicates item by item
similarity = 0
for p1 in predicates:
for p2 in predicates2:
if p1 == p2:
similarity += 1
similarity /= max(len(predicates), len(predicates2))
print(similarity)