Skip to content

Commit

Permalink
Merge pull request #142 from MaRDI4NFDI/zenodo
Browse files Browse the repository at this point in the history
Zenodo
  • Loading branch information
eloiferrer authored Jul 11, 2024
2 parents 775b59a + f657c43 commit 3c42da3
Show file tree
Hide file tree
Showing 9 changed files with 559 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, languages=["en", "de"]) -> None:
self.excluded_properties = ['P1151', 'P1855', 'P2139', 'P2302', \
'P2559', 'P2875', 'P3254', 'P3709', \
'P3713', 'P3734', 'P6104', 'P6685', \
'P8093', 'P8979']
'P8093', 'P8979', 'P12861']

def config(self):
"""
Expand Down
83 changes: 74 additions & 9 deletions mardi_importer/mardi_importer/publications/ZenodoResource.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from mardi_importer.integrator.MardiIntegrator import MardiIntegrator
from mardi_importer.publications.Author import Author
from wikibaseintegrator.wbi_enums import ActionIfExists
from mardi_importer.zenodo.Community import Community
from mardi_importer.zenodo.Project import Project

import logging
import urllib.request, json
import urllib.request, json, re
from dataclasses import dataclass, field
from typing import Dict, List

Expand All @@ -18,6 +19,8 @@ class ZenodoResource():
_authors: List[Author] = field(default_factory=list)
_resource_type: str = None
_license: str = None
_communities: List[Community] = field(default_factory=list)
_projects: List[Project] = field(default_factory = list)
metadata: Dict[str, object] = field(default_factory=dict)
QID: str = None

Expand Down Expand Up @@ -57,8 +60,9 @@ def __post_init__(self):
@property
def publication_date(self):
if not self._publication_date:
publication_date = f"{self.metadata['publication_date']}T00:00:00Z"
self._publication_date = publication_date
if re.match("\d{4}-\d{2}-\d{2}",self.metadata['publication_date']):
publication_date = f"{self.metadata['publication_date']}T00:00:00Z"
self._publication_date = publication_date
return self._publication_date

@property
Expand Down Expand Up @@ -105,6 +109,36 @@ def resource_type(self):
self._resource_type = "wd:Q37866906"
return self._resource_type

@property
def communities(self):
if not self._communities:
for communityCur in self.metadata["communities"]:
community_id = communityCur.get("id")
if community_id == "mathplus":
community = Community(api = self.api, community_id = community_id)
self._communities.append(community)
return self._communities

@property
def projects(self):
community = None
if self._communities:
for communityCur in self._communities:
if communityCur.community_id == "mathplus":
community = communityCur
break
if not self._projects and community and self.metadata.get("related_identifiers"):
for related_ids in self.metadata.get("related_identifiers"):
#print("identifier: " + related_ids["identifier"])
if related_ids["identifier"] in Project.get_project_ids():
project = Project(api = self.api, community = community, project_id = related_ids["identifier"])
self._projects.append(project)
return self._projects

def exists(self):
if self.QID:
return self.QID

def update(self):
# description_prop_nr = "P727"
zenodo_item = self.api.item.new()
Expand All @@ -122,14 +156,31 @@ def update(self):
elif self.license['id'] == "mit-license":
new_item.add_claim("wdt:P275", "wd:Q334661")

return new_item.write()
return new_item.write()

def update2(self):

self.item = self.api.item.get(entity_id=self.QID)

self.insert_claims()
self.item.write()

def create(self):
if self.QID:
print(f"zenodo item with ID {self.QID} has been updated.")
return self.QID
else:
print(f"zenodo item could not be updated.")
return None

def create(self, update = False):

item = self.api.item.new()
if not update:
if self.QID:
return self.QID

item = self.api.item.new()
else:
item = self.api.item.get(entity_id=self.QID)
# Add title
if self.title:
item.labels.set(language="en", value=self.title)
Expand All @@ -146,11 +197,11 @@ def create(self):
language="en",
value="Resource published at Zenodo repository"
)

# Publication date
if self.publication_date:
item.add_claim('wdt:P577', self.publication_date)

# Authors
author_QID = self.__preprocess_authors()
claims = []
Expand All @@ -174,7 +225,21 @@ def create(self):
elif self.license['id'] == "mit-license":
item.add_claim("wdt:P275", "wd:Q334661")

# Communities
if self.communities:
for community in self.communities:
prop_nr = self.api.get_local_id_by_label("community", "property")
item.add_claim(prop_nr, community.QID)

# Projects
if self.projects:
for project in self.projects:
project.create()
prop_nr = self.api.get_local_id_by_label("Internal Project ID", "property")
item.add_claim(prop_nr, project.QID)

self.QID = item.write().id

if self.QID:
log.info(f"Zenodo resource with Zenodo id: {self.zenodo_id} created with ID {self.QID}.")
return self.QID
Expand Down
19 changes: 14 additions & 5 deletions mardi_importer/mardi_importer/scripts/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
import logging
import logging.config
from argparse import ArgumentParser
from mardi_importer.importer import Importer

from mardi_importer.zbmath import ZBMathSource, ZBMathConfigParser
#from mardi_importer.openml import OpenMLSource
from mardi_importer.importer import Importer
from mardi_importer.cran import CRANSource
from mardi_importer.polydb import PolyDBSource
from mardi_importer.zenodo import ZenodoSource


def get_parser():
"""Get arguments parser"""
parser = ArgumentParser()
parser.add_argument(
"--mode", type=str, required=True, choices=["ZBMath", "CRAN", "polydb", "OpenML"]
"--mode", type=str, required=True, choices=["ZBMath", "CRAN", "polydb","OpenML", "zenodo"]
)
parser.add_argument("--conf_path", required=False)
parser.add_argument("--wikidata_id_file_path", required=False)
Expand Down Expand Up @@ -47,9 +50,10 @@ def main(**args):
#conf_parser = OpenMLConfigParser(args["conf_path"])
#conf = conf_parser.parse_config()

data_source = OpenMLSource()
importer = Importer(data_source)
importer.import_all(pull=False, push=True)
#data_source = OpenMLSource()
#importer = Importer(data_source)
#importer.import_all(pull=False, push=True)
print('Deactivate due to error in openml package')

elif args["mode"] == "CRAN":
data_source = CRANSource()
Expand All @@ -61,6 +65,11 @@ def main(**args):
importer = Importer(data_source)
importer.import_all()

elif args["mode"] == "zenodo":
data_source = ZenodoSource()
importer = Importer(data_source)
importer.import_all()


if __name__ == "__main__":
args = get_parser().parse_args()
Expand Down
75 changes: 75 additions & 0 deletions mardi_importer/mardi_importer/zenodo/Community.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from mardi_importer.integrator.MardiIntegrator import MardiIntegrator
from mardi_importer.integrator.MardiEntities import MardiItemEntity


from dataclasses import dataclass, field
from typing import List

@dataclass
class Community:
api: MardiIntegrator
community_id : str
community_title: str = None
community_str : str = None
description : str = None
url : str = None
QID: str = None
_item: MardiItemEntity = None

def __post_init__(self):
zenodo_community_id = "wdt:P9934"
QID_results = self.api.search_entity_by_value(zenodo_community_id, self.community_id)
if QID_results:
self.QID = QID_results[0]

if self.community_id == "mathplus":
self.community_title = "MATH+"
self.community_str = "The Berlin Mathematics Research Center MATH+ is a cross-institutional and interdisciplinary Cluster of Excellence."

def exists(self):

if self.QID:
return self.QID

def create(self):

if self.exists():
self._item = self.api.item.get(entity_id=self.QID)
else:
self._item = self.api.item.new()

self._item.labels.set(language="en", value=self.community_title)
self._item.descriptions.set(language="en", value = self.community_str)

# instance of = community
self._item.add_claim("wdt:P31", "wd:Q177634")

# Add zenodo community ID
if self.community_id:
self._item.add_claim("wdt:P9934", self.community_id)

# mardi profile type: mardi community profile
self._item.add_claim("MaRDI profile type", "MaRDI community profile")

if self.url:
self._item.add_claim("wdt:P973", self.url)

if self.description:
self._item.add_claim("description", self.description)

self.QID = self._item.write().id

if self.QID:
print(f"Zenodo community with community id: {self.community_id} created with ID {self.QID}.")
return self.QID
else:
print(f"Zenodo community with community id: {self.community_id} could not be created.")
return None

# add community str:
# for now manual, api still not released (https://developers.zenodo.org/#rest-api)





Loading

0 comments on commit 3c42da3

Please sign in to comment.