Skip to content

Commit

Permalink
Create SCCS workflow (#77)
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* Support deposits requested by Scholarly Communications and Collections Strategy (SCCS).

How this addresses that need:
* Create SCCS workflow and metadata mapping JSON file
* Move DemoWorkflow metadata mapping JSON file
* Make 'delimiter' and 'language' optional configs
* Omit null and empty string configs from metadata mapping JSON files
* Change Workflow.email_recipients to list[str]
* Clean up Workflow and SimpleCSV fixtures
* Rename DemoWorkflow -> Demo

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/IN-1098
  • Loading branch information
jonavellecuerdo authored Jan 16, 2025
1 parent b015028 commit b510322
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 50 deletions.
2 changes: 1 addition & 1 deletion dsc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main(
workflow = workflow_class(
collection_handle=collection_handle,
batch_id=batch_id,
email_recipients=tuple(email_recipients.split(",")),
email_recipients=email_recipients.split(","),
s3_bucket=s3_bucket,
output_queue=output_queue,
)
Expand Down
5 changes: 3 additions & 2 deletions dsc/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dsc.workflows.base import Workflow
from dsc.workflows.base.simple_csv import SimpleCSV
from dsc.workflows.demo import DemoWorkflow
from dsc.workflows.demo import Demo
from dsc.workflows.sccs import SCCS

__all__ = ["DemoWorkflow", "SimpleCSV", "Workflow"]
__all__ = ["SCCS", "Demo", "SimpleCSV", "Workflow"]
21 changes: 14 additions & 7 deletions dsc/workflows/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
self,
collection_handle: str,
batch_id: str,
email_recipients: tuple[str, ...],
email_recipients: list[str],
s3_bucket: str | None = None,
output_queue: str | None = None,
) -> None:
Expand Down Expand Up @@ -259,12 +259,19 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any
A metadata mapping is a dict with the format seen below:
{
"dc.contributor": {
"source_field_name": "contributor",
"language": None,
"delimiter": "|",
"dc.contributor": {
"source_field_name": "contributor",
"language": "<language>",
"delimiter": "<delimiting character>",
"required": true | false
}
}
When setting up the metadata mapping JSON file, "language" and "delimiter"
can be omitted from the file if not applicable. Required fields ("item_identifier"
and "title") must be set as required (true); if "required" is not listed as a
a config, the field defaults as not required (false).
MUST NOT be overridden by workflow subclasses.
Args:
Expand All @@ -281,8 +288,8 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any
f"{field_mapping["source_field_name"]}'"
)
if field_value:
delimiter = field_mapping["delimiter"]
language = field_mapping["language"]
delimiter = field_mapping.get("delimiter")
language = field_mapping.get("language")
if delimiter:
metadata_entries.extend(
[
Expand Down
4 changes: 2 additions & 2 deletions dsc/workflows/demo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from dsc.workflows.base.simple_csv import SimpleCSV


class DemoWorkflow(SimpleCSV):
class Demo(SimpleCSV):

workflow_name: str = "demo"
submission_system: str = "DSpace@MIT"
metadata_mapping_path: str = "tests/fixtures/demo_metadata_mapping.json"
metadata_mapping_path: str = "dsc/workflows/metadata_mapping/demo.json"
Original file line number Diff line number Diff line change
@@ -1,62 +1,47 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"language": null,
"delimiter": ""
"required": true
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"required": true
},
"dc.publisher": {
"source_field_name": "dc.publisher",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.eprint.version": {
"source_field_name": "dc.eprint.version",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.type": {
"source_field_name": "dc.type",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.source": {
"source_field_name": "dc.source",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.contributor.author": {
"source_field_name": "dc.contributor.author",
"language": "en_US",
"delimiter": "|"
},
"dc.relation.isversionof": {
"source_field_name": "dc.relation.isversionof",
"language": "",
"delimiter": ""
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"delimiter": ""
"source_field_name": "dc.relation.isversionof"
},
"dc.relation.journal": {
"source_field_name": "dc.relation.journal",
"language": "",
"delimiter": ""
"source_field_name": "dc.relation.journal"
},
"dc.identifier.issn": {
"source_field_name": "dc.identifier.issn",
"language": "",
"delimiter": ""
"source_field_name": "dc.identifier.issn"
},
"dc.date.issued": {
"source_field_name": "dc.date.issued",
"language": "",
"delimiter": ""
"source_field_name": "dc.date.issued"
},
"dc.rights.uri": {
"source_field_name": "dc.rights.uri",
"language": "",
"delimiter": ""
"source_field_name": "dc.rights.uri"
}
}
59 changes: 59 additions & 0 deletions dsc/workflows/metadata_mapping/sccs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"required": true
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"required": true
},
"dc.publisher": {
"source_field_name": "dc.publisher",
"language": "en_US"
},
"dc.identifier.mitlicense": {
"source_field_name": "dc.identifier.mitlicense",
"language": "en_US"
},
"dc.eprint.version": {
"source_field_name": "dc.eprint.version",
"language": "en_US"
},
"dc.type": {
"source_field_name": "dc.type",
"language": "en_US"
},
"dc.source": {
"source_field_name": "dc.source",
"language": "en_US"
},
"dc.contributor.author": {
"source_field_name": "dc.contributor.author",
"language": "en_US",
"delimiter": "|"
},
"dc.relation.isversionof": {
"source_field_name": "dc.relation.isversionof"
},
"dc.relation.journal": {
"source_field_name": "dc.relation.journal"
},
"dc.identifier.issn": {
"source_field_name": "dc.identifier.issn"
},
"dc.date.issued": {
"source_field_name": "dc.date.issued"
},
"dc.rights": {
"source_field_name": "dc.rights",
"language": "en_US"
},
"dc.rights.uri": {
"source_field_name": "dc.rights.uri"
},
"dc.description.sponsorship": {
"source_field_name": "dc.description.sponsorship",
"language": "en_US"
}
}
13 changes: 13 additions & 0 deletions dsc/workflows/sccs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from dsc.workflows import SimpleCSV


class SCCS(SimpleCSV):
"""Workflow for SCCS-requested deposits.
The deposits managed by this workflow are requested by the Scholarly
Communication and Collection Strategy (SCCS) department
and are for submission to DSpace@MIT.
"""

workflow_name: str = "sccs"
metadata_mapping_path: str = "dsc/workflows/metadata_mapping/sccs.json"
6 changes: 2 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class TestWorkflow(Workflow):

workflow_name: str = "test"
submission_system: str = "Test@MIT"
email_recipients: tuple[str] = ("test@test.test",)
metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json"

def item_metadata_iter(self):
Expand Down Expand Up @@ -56,10 +55,7 @@ class TestSimpleCSV(SimpleCSV):

workflow_name = "simple_csv"
submission_system: str = "Test@MIT"
email_recipients: tuple[str] = ("test@test.test",)
metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json"
s3_bucket: str = "dsc"
output_queue: str = "mock-output_queue"


@pytest.fixture(autouse=True)
Expand All @@ -79,6 +75,7 @@ def base_workflow_instance(item_metadata, metadata_mapping, mocked_s3):
collection_handle="123.4/5678",
batch_id="batch-aaa",
email_recipients=["test@test.test"],
output_queue="mock-output_queue",
)


Expand All @@ -88,6 +85,7 @@ def simple_csv_workflow_instance(metadata_mapping):
collection_handle="123.4/5678",
batch_id="batch-aaa",
email_recipients=["test@test.test"],
output_queue="mock-output_queue",
)


Expand Down
4 changes: 0 additions & 4 deletions tests/fixtures/test_metadata_mapping.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"language": null,
"delimiter": "",
"required": true
},
"dc.title": {
"source_field_name": "title",
"language": "en_US",
"delimiter": "",
"required": true
},
"dc.contributor": {
"source_field_name": "contributor",
"language": null,
"delimiter": "|"
}
}

0 comments on commit b510322

Please sign in to comment.