Skip to content

Commit

Permalink
Merge pull request #138 from MaRDI4NFDI/description_fixes
Browse files Browse the repository at this point in the history
description fixes
  • Loading branch information
LizzAlice authored Apr 22, 2024
2 parents 6eeef1b + c554760 commit a5fe09f
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
15 changes: 12 additions & 3 deletions mardi_importer/mardi_importer/openml/OpenMLDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
from .OpenMLPublication import OpenMLPublication
import validators
import math

semantic_tags = [
"Agriculture",
Expand Down Expand Up @@ -114,14 +115,19 @@ def insert_claims(self):
self.item.add_claim(prop_nr, str(self.version))
if self.description is not None:
prop_nr = self.api.get_local_id_by_label("description", "property")
self.item.add_claim(prop_nr, str(self.description))
description = self.description.replace("\n", "\\N")
description = description.replace("\t", "\\T")
self.item.add_claim(prop_nr, description)
if self.creators and self.creators != "None":
#object has role
qualifier = [self.api.get_claim("wdt:P3831", "wd:Q59275219")]
creator_claims = []
if not isinstance(self.creators, list):
self.creators = [self.creators]
for c in self.creators:
if not c or c == "None":
continue
c = c.replace("\n", " ")
claim = self.api.get_claim("wdt:P2093", c, qualifiers=qualifier)
creator_claims.append(claim)
self.item.add_claims(creator_claims)
Expand All @@ -131,6 +137,9 @@ def insert_claims(self):
if not isinstance(self.contributors, list):
self.contributors = [self.contributors]
for c in self.contributors:
if not c or c == "None":
continue
c = c.replace("\n", " ")
claim = self.api.get_claim("wdt:P2093", c, qualifiers=qualifier)
contributor_claims.append(claim)
self.item.add_claims(contributor_claims)
Expand Down Expand Up @@ -191,7 +200,7 @@ def insert_claims(self):
if self.num_binary_features is not None and self.num_binary_features != "None":
prop_nr = self.api.get_local_id_by_label("number of binary features", "property")
self.item.add_claim(prop_nr, int(self.num_binary_features))
if self.num_classes is not None and self.num_classes != "None":
if self.num_classes is not None and not math.isnan(self.num_classes) and self.num_classes != "None":
prop_nr = self.api.get_local_id_by_label("number of classes", "property")
self.item.add_claim(prop_nr, int(self.num_classes))
if self.num_features is not None and self.num_features != "None":
Expand All @@ -216,7 +225,7 @@ def insert_claims(self):
if self.format.lower() == "arff":
self.item.add_claim("wdt:P2701", "wd:Q4489412")
elif self.format.lower() == "sparse_arff":
qid = self.api.get_local_id_by_label("Sparse ARFF", "item")
qid = self.api.get_local_id_by_label("Sparse ARFF", "item")[0]
self.item.add_claim("wdt:P2701", qid)
else:
sys.exit(f"Invalid file format {self.format}")
Expand Down
6 changes: 6 additions & 0 deletions mardi_importer/mardi_importer/openml/OpenMLSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,14 @@ def push(self):
# 'num_numeric_features': [0.0],
# 'num_symbolic_features': [37.0],
# 'format': ['ARFF']}
found = False
for items in zip_longest(*[dataset_dict[key] for key in dataset_dict], fillvalue=None):
lookup_dict = dict(zip(dataset_dict.keys(), items))
# if lookup_dict["dataset_id"] == 3:
# found = True
# continue
# if not found:
# continue
dataset = OpenMLDataset(
integrator = self.integrator,
**lookup_dict
Expand Down

0 comments on commit a5fe09f

Please sign in to comment.