Skip to content

Commit

Permalink
Merge pull request #12 from ResearchObject/preserve-affiliation
Browse files Browse the repository at this point in the history
Preserve affiliations
  • Loading branch information
elichad authored May 21, 2024
2 parents 5f0ade0 + 778a0f4 commit e574bf7
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 6 deletions.
30 changes: 27 additions & 3 deletions src/rocrate_upload/authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import logging

from rocrate.model.person import Person
from rocrate.model.contextentity import ContextEntity
from zenodo_client import Creator

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

ORCID_REGEX = r"https:\/\/orcid\.org\/(?P<id>([0-9]{4}-){3}[0-9]{3}[0-9X])"
ROR_REGEX = r"https:\/\/ror\.org\/(?P<id>0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2})"


def build_zenodo_creator_list(authors: list[Person] | Person) -> list[Creator]:
Expand All @@ -24,15 +26,15 @@ def get_author_details(person: Person) -> dict:
"""Collects details from a Person entity and returns them using Creator fields"""
# check if @id is an ORCID
id = person["@id"]
orcid_id = get_orcid_id_or_none(id)
orcid = get_orcid_id_or_none(id)

name = get_formatted_author_name(person)

affiliation = person.get("affiliation", None)
if affiliation:
affiliation = str(affiliation)
affiliation = get_affiliation_name(affiliation)

return {"name": name, "orcid": orcid_id, "affiliation": affiliation}
return {"name": name, "orcid": orcid, "affiliation": affiliation}


def get_formatted_author_name(person: Person) -> str:
Expand Down Expand Up @@ -70,9 +72,31 @@ def get_formatted_author_name(person: Person) -> str:
return name


def get_affiliation_name(organization: ContextEntity | str) -> str:
# if it's free text, return as-is
if type(organization) == str:
return organization

# otherwise, we should have a ContextEntity object
assert isinstance(organization, ContextEntity)

# get the organisation name, or fall back on @id
id = organization["@id"].lstrip("#")
name = organization.get("name", id)
return name


def get_orcid_id_or_none(str: str) -> str | None:
match = re.match(ORCID_REGEX, str)
if match:
return match.group("id")
else:
return None


def get_ror_id_or_none(str: str) -> str | None:
match = re.match(ROR_REGEX, str)
if match:
return match.group("id")
else:
return None
3 changes: 2 additions & 1 deletion src/rocrate_upload/upload.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import annotations
from typing import Any

import json

Expand Down Expand Up @@ -57,7 +58,7 @@ def ensure_crate_zipped(crate: ROCrate) -> str:
return zipped


def upload_crate_to_zenodo(crate_zip_path: str, metadata: Metadata):
def upload_crate_to_zenodo(crate_zip_path: str, metadata: Metadata) -> Any:
"""Upload a zipped crate and its metadata to Zenodo.
It's recommended to keep sandbox=True until ready for production use."""
Expand Down
79 changes: 78 additions & 1 deletion test/test_author_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,48 @@
import pytest

from rocrate_upload.authors import get_orcid_id_or_none, get_author_details
from rocrate.model.contextentity import ContextEntity
from rocrate_upload.authors import (
get_orcid_id_or_none,
get_ror_id_or_none,
get_affiliation_name,
)

organizations = {
# organization with id as ROR
"id-ror": {
"identifier": "https://ror.org/0abcdef12",
"properties": {
"name": "ROR Organization",
},
},
# organization with id as URI
"id-uri": {
"identifier": "https://example.org",
"properties": {
"name": "URI Organization",
},
},
# organization with id as local identifier
"id-local": {
"identifier": "#local_organization",
"properties": {
"name": "Local Organization",
},
},
# organization with id as a name, and no other info
"id-name": {"identifier": "Named Organization"},
# organization with id as blank node identifier
"id-blank-node": {
"identifier": "_:blank_organization",
},
# organization with international characters in their name
"name-intl-chars": {
"identifier": "Ãệïøù Organization",
"properties": {
"name": "Ãệïøù Organization",
},
},
}


@pytest.mark.parametrize(
Expand All @@ -18,3 +60,38 @@ def test_get_orcid_id(input, expected):

# Assert
assert expected == result


@pytest.mark.parametrize(
"input, expected",
[
("https://ror.org/02mhbdp94", "02mhbdp94"),
("02mhbdp94", None),
("not an ROR", None),
],
)
def test_get_ror_id(input, expected):
# Act
result = get_ror_id_or_none(input)

# Assert
assert expected == result


@pytest.mark.parametrize(
"org_key, expected",
[
("id-ror", "ROR Organization"),
("id-uri", "URI Organization"),
("id-local", "Local Organization"),
("id-name", "Named Organization"),
("name-intl-chars", "Ãệïøù Organization"),
],
)
def test_get_formatted_author_name(org_key, expected):
org_dict = organizations[org_key]
org = ContextEntity(None, **org_dict)

result = get_affiliation_name(org)

assert expected == result
2 changes: 1 addition & 1 deletion test/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_build_zenodo_metadata(self):
"creators": [
{
"name": "Smith, Jane",
"affiliation": "<https://ror.org/0abcdef00 Organization>",
"affiliation": "Example University",
"orcid": "0000-0000-0000-0000",
"gnd": None,
}
Expand Down

0 comments on commit e574bf7

Please sign in to comment.