From ea4419eb58cc1d50940c79775da1943c46f29243 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 25 Apr 2024 14:36:13 +0200 Subject: [PATCH] changed zbmath to include arxiv id --- .../mardi_importer/openml/OpenMLSource.py | 2 +- .../mardi_importer/zbmath/ZBMathAuthor.py | 4 +-- .../zbmath/ZBMathPublication.py | 27 ++++++++++++++----- .../mardi_importer/zbmath/ZBMathSource.py | 13 ++++++--- .../mardi_importer/zbmath/new_entities.json | 9 +++++-- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/mardi_importer/mardi_importer/openml/OpenMLSource.py b/mardi_importer/mardi_importer/openml/OpenMLSource.py index 7bd1abd..3159093 100644 --- a/mardi_importer/mardi_importer/openml/OpenMLSource.py +++ b/mardi_importer/mardi_importer/openml/OpenMLSource.py @@ -16,7 +16,7 @@ def setup(self): # Import entities from Wikidata filename = self.filepath + "/wikidata_entities.txt" self.integrator.import_entities(filename=filename) - self.create_local_entities() + #self.create_local_entities() # self.de_number_prop = self.integrator.get_local_id_by_label( # "zbMATH DE Number", "property" # ) diff --git a/mardi_importer/mardi_importer/zbmath/ZBMathAuthor.py b/mardi_importer/mardi_importer/zbmath/ZBMathAuthor.py index 40574c0..a3bc75f 100644 --- a/mardi_importer/mardi_importer/zbmath/ZBMathAuthor.py +++ b/mardi_importer/mardi_importer/zbmath/ZBMathAuthor.py @@ -31,9 +31,7 @@ def init_item(self): # instance of: human item.add_claim("wdt:P31", "wd:Q5") profile_prop = self.api.get_local_id_by_label("MaRDI profile type", "property") - profile_target = self.api.get_local_id_by_label("MaRDI person profile", "item")[ - 0 - ] + profile_target = self.api.get_local_id_by_label("MaRDI person profile", "item")[0] item.add_claim(profile_prop, profile_target) if self.zbmath_author_id: # if self.name: diff --git a/mardi_importer/mardi_importer/zbmath/ZBMathPublication.py b/mardi_importer/mardi_importer/zbmath/ZBMathPublication.py index 68c0970..0d82c7d 100644 --- a/mardi_importer/mardi_importer/zbmath/ZBMathPublication.py +++ b/mardi_importer/mardi_importer/zbmath/ZBMathPublication.py @@ -21,6 +21,8 @@ class ZBMathPublication: creation date of entry zbl_id: zbl_id + arxiv_id: + arxiv_id review_text: review text reviewer: @@ -49,6 +51,7 @@ def __init__( links, creation_date, zbl_id, + arxiv_id, review_text, reviewer, classifications, @@ -60,9 +63,12 @@ def __init__( self.api = integrator self.title = title self.zbl_id = zbl_id + self.arxiv_id = arxiv_id self.QID = None self.language = language - self.doi = doi.lower() + self.doi = doi + if self.doi: + self.doi = self.doi.lower() self.authors = authors self.journal = journal self.time = time @@ -100,6 +106,8 @@ def insert_claims(self): # zbmath document id if self.zbl_id: self.item.add_claim("wdt:P894", self.zbl_id) + if self.arxiv_id: + self.item.add_claim("wdt:P818", self.arxiv_id) if self.doi: self.item.add_claim("wdt:P356", self.doi) author_claims = [] @@ -160,17 +168,24 @@ def exists(self): # instance of scholarly article if self.title: self.QID = self.item.is_instance_of_with_property( - "wd:Q13442814", "P1451", self.de_number + "wd:Q13442814", self.de_number_prop, self.de_number ) + if not self.QID: + if self.arxiv_id: + self.QID = self.item.is_instance_of_with_property( + "wd:Q13442814", "wdt:P818", self.arxiv_id) else: QID_list = self.api.search_entity_by_value( self.de_number_prop, self.de_number ) if not QID_list: - self.QID = None - else: - # should not be more than one - self.QID = QID_list[0] + if self.arxiv_id: + QID_list = self.api.search_entity_by_value("wdt:P818", self.arxiv_id) + if not QID_list: + self.QID = None + return(self.QID) + # should not be more than one + self.QID = QID_list[0] return self.QID def update(self): diff --git a/mardi_importer/mardi_importer/zbmath/ZBMathSource.py b/mardi_importer/mardi_importer/zbmath/ZBMathSource.py index 6295e0e..a4bca09 100644 --- a/mardi_importer/mardi_importer/zbmath/ZBMathSource.py +++ b/mardi_importer/mardi_importer/zbmath/ZBMathSource.py @@ -79,7 +79,7 @@ def setup(self): # Import entities from Wikidata filename = self.filepath + "/wikidata_entities.txt" self.integrator.import_entities(filename=filename) - self.create_local_entities() + #self.create_local_entities() self.de_number_prop = self.integrator.get_local_id_by_label( "zbMATH DE Number", "property" ) @@ -104,8 +104,9 @@ def create_local_entities(self): item = self.integrator.item.new() item.labels.set(language="en", value=item_element["label"]) item.descriptions.set(language="en", value=item_element["description"]) - for key, value in item_element["claims"].items(): - item.add_claim(key, value=value) + if "claims" in item_element: + for key, value in item_element["claims"].items(): + item.add_claim(key, value=value) if not item.exists(): item.write() @@ -416,6 +417,11 @@ def push(self): links = [ x.strip() for x in links if (pattern.match(x) and "http" in x) ] + arxiv_prefix = "https://arxiv.org/abs/" + arxiv_id = None + for l in links: + if arxiv_prefix in l: + arxiv_id = l.removeprefix(arxiv_prefix) else: links = [] @@ -523,6 +529,7 @@ def push(self): links=links, creation_date=creation_date, zbl_id=zbl_id, + arxiv_id=arxiv_id, review_text=review_text, reviewer=reviewer, classifications=classifications, diff --git a/mardi_importer/mardi_importer/zbmath/new_entities.json b/mardi_importer/mardi_importer/zbmath/new_entities.json index 63a26f3..b7572bb 100644 --- a/mardi_importer/mardi_importer/zbmath/new_entities.json +++ b/mardi_importer/mardi_importer/zbmath/new_entities.json @@ -11,10 +11,15 @@ "datatype": "external-id" }, { - "label": "zbMATH keyword string", + "label": "zbMATH Keywords", "description": "keyword string from zbMATH", "datatype": "string" } ], - "items": [] + "items": [ + { + "label": "MaRDI person profile", + "description": "type of MaRDI profile" + } + ] } \ No newline at end of file