diff --git a/dsc/cli.py b/dsc/cli.py index 650de71..9f5a123 100644 --- a/dsc/cli.py +++ b/dsc/cli.py @@ -70,7 +70,7 @@ def main( workflow = workflow_class( collection_handle=collection_handle, batch_id=batch_id, - email_recipients=tuple(email_recipients.split(",")), + email_recipients=email_recipients.split(","), s3_bucket=s3_bucket, output_queue=output_queue, ) diff --git a/dsc/workflows/__init__.py b/dsc/workflows/__init__.py index a085a0c..b154351 100644 --- a/dsc/workflows/__init__.py +++ b/dsc/workflows/__init__.py @@ -5,6 +5,7 @@ from dsc.workflows.base import Workflow from dsc.workflows.base.simple_csv import SimpleCSV -from dsc.workflows.demo import DemoWorkflow +from dsc.workflows.demo import Demo +from dsc.workflows.sccs import SCCS -__all__ = ["DemoWorkflow", "SimpleCSV", "Workflow"] +__all__ = ["SCCS", "Demo", "SimpleCSV", "Workflow"] diff --git a/dsc/workflows/base/__init__.py b/dsc/workflows/base/__init__.py index 03d27d3..4902fbb 100644 --- a/dsc/workflows/base/__init__.py +++ b/dsc/workflows/base/__init__.py @@ -35,7 +35,7 @@ def __init__( self, collection_handle: str, batch_id: str, - email_recipients: tuple[str, ...], + email_recipients: list[str], s3_bucket: str | None = None, output_queue: str | None = None, ) -> None: @@ -259,12 +259,19 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any A metadata mapping is a dict with the format seen below: { - "dc.contributor": { - "source_field_name": "contributor", - "language": None, - "delimiter": "|", + "dc.contributor": { + "source_field_name": "contributor", + "language": "", + "delimiter": "", + "required": true | false + } } + When setting up the metadata mapping JSON file, "language" and "delimiter" + can be omitted from the file if not applicable. Required fields ("item_identifier" + and "title") must be set as required (true); if "required" is not listed as a + a config, the field defaults as not required (false). + MUST NOT be overridden by workflow subclasses. Args: @@ -281,8 +288,8 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any f"{field_mapping["source_field_name"]}'" ) if field_value: - delimiter = field_mapping["delimiter"] - language = field_mapping["language"] + delimiter = field_mapping.get("delimiter") + language = field_mapping.get("language") if delimiter: metadata_entries.extend( [ diff --git a/dsc/workflows/demo.py b/dsc/workflows/demo.py index 0ca82cb..7e0bf00 100644 --- a/dsc/workflows/demo.py +++ b/dsc/workflows/demo.py @@ -1,8 +1,8 @@ from dsc.workflows.base.simple_csv import SimpleCSV -class DemoWorkflow(SimpleCSV): +class Demo(SimpleCSV): workflow_name: str = "demo" submission_system: str = "DSpace@MIT" - metadata_mapping_path: str = "tests/fixtures/demo_metadata_mapping.json" + metadata_mapping_path: str = "dsc/workflows/metadata_mapping/demo.json" diff --git a/tests/fixtures/demo_metadata_mapping.json b/dsc/workflows/metadata_mapping/demo.json similarity index 50% rename from tests/fixtures/demo_metadata_mapping.json rename to dsc/workflows/metadata_mapping/demo.json index 6b2f173..392be94 100644 --- a/tests/fixtures/demo_metadata_mapping.json +++ b/dsc/workflows/metadata_mapping/demo.json @@ -1,28 +1,28 @@ { "item_identifier": { "source_field_name": "item_identifier", - "language": null, - "delimiter": "" + "required": true + }, + "dc.title": { + "source_field_name": "dc.title", + "language": "en_US", + "required": true }, "dc.publisher": { "source_field_name": "dc.publisher", - "language": "en_US", - "delimiter": "" + "language": "en_US" }, "dc.eprint.version": { "source_field_name": "dc.eprint.version", - "language": "en_US", - "delimiter": "" + "language": "en_US" }, "dc.type": { "source_field_name": "dc.type", - "language": "en_US", - "delimiter": "" + "language": "en_US" }, "dc.source": { "source_field_name": "dc.source", - "language": "en_US", - "delimiter": "" + "language": "en_US" }, "dc.contributor.author": { "source_field_name": "dc.contributor.author", @@ -30,33 +30,18 @@ "delimiter": "|" }, "dc.relation.isversionof": { - "source_field_name": "dc.relation.isversionof", - "language": "", - "delimiter": "" - }, - "dc.title": { - "source_field_name": "dc.title", - "language": "en_US", - "delimiter": "" + "source_field_name": "dc.relation.isversionof" }, "dc.relation.journal": { - "source_field_name": "dc.relation.journal", - "language": "", - "delimiter": "" + "source_field_name": "dc.relation.journal" }, "dc.identifier.issn": { - "source_field_name": "dc.identifier.issn", - "language": "", - "delimiter": "" + "source_field_name": "dc.identifier.issn" }, "dc.date.issued": { - "source_field_name": "dc.date.issued", - "language": "", - "delimiter": "" + "source_field_name": "dc.date.issued" }, "dc.rights.uri": { - "source_field_name": "dc.rights.uri", - "language": "", - "delimiter": "" + "source_field_name": "dc.rights.uri" } } \ No newline at end of file diff --git a/dsc/workflows/metadata_mapping/sccs.json b/dsc/workflows/metadata_mapping/sccs.json new file mode 100644 index 0000000..5a0fd17 --- /dev/null +++ b/dsc/workflows/metadata_mapping/sccs.json @@ -0,0 +1,59 @@ +{ + "item_identifier": { + "source_field_name": "item_identifier", + "required": true + }, + "dc.title": { + "source_field_name": "dc.title", + "language": "en_US", + "required": true + }, + "dc.publisher": { + "source_field_name": "dc.publisher", + "language": "en_US" + }, + "dc.identifier.mitlicense": { + "source_field_name": "dc.identifier.mitlicense", + "language": "en_US" + }, + "dc.eprint.version": { + "source_field_name": "dc.eprint.version", + "language": "en_US" + }, + "dc.type": { + "source_field_name": "dc.type", + "language": "en_US" + }, + "dc.source": { + "source_field_name": "dc.source", + "language": "en_US" + }, + "dc.contributor.author": { + "source_field_name": "dc.contributor.author", + "language": "en_US", + "delimiter": "|" + }, + "dc.relation.isversionof": { + "source_field_name": "dc.relation.isversionof" + }, + "dc.relation.journal": { + "source_field_name": "dc.relation.journal" + }, + "dc.identifier.issn": { + "source_field_name": "dc.identifier.issn" + }, + "dc.date.issued": { + "source_field_name": "dc.date.issued" + }, + "dc.rights": { + "source_field_name": "dc.rights", + "language": "en_US" + }, + "dc.rights.uri": { + "source_field_name": "dc.rights.uri" + }, + "dc.description.sponsorship": { + "source_field_name": "dc.description.sponsorship", + "language": "en_US" + } +} \ No newline at end of file diff --git a/dsc/workflows/sccs.py b/dsc/workflows/sccs.py new file mode 100644 index 0000000..25a81c2 --- /dev/null +++ b/dsc/workflows/sccs.py @@ -0,0 +1,13 @@ +from dsc.workflows import SimpleCSV + + +class SCCS(SimpleCSV): + """Workflow for SCCS-requested deposits. + + The deposits managed by this workflow are requested by the Scholarly + Communication and Collection Strategy (SCCS) department + and are for submission to DSpace@MIT. + """ + + workflow_name: str = "sccs" + metadata_mapping_path: str = "dsc/workflows/metadata_mapping/sccs.json" diff --git a/tests/conftest.py b/tests/conftest.py index a4a58e4..85ce873 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,6 @@ class TestWorkflow(Workflow): workflow_name: str = "test" submission_system: str = "Test@MIT" - email_recipients: tuple[str] = ("test@test.test",) metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json" def item_metadata_iter(self): @@ -56,10 +55,7 @@ class TestSimpleCSV(SimpleCSV): workflow_name = "simple_csv" submission_system: str = "Test@MIT" - email_recipients: tuple[str] = ("test@test.test",) metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json" - s3_bucket: str = "dsc" - output_queue: str = "mock-output_queue" @pytest.fixture(autouse=True) @@ -79,6 +75,7 @@ def base_workflow_instance(item_metadata, metadata_mapping, mocked_s3): collection_handle="123.4/5678", batch_id="batch-aaa", email_recipients=["test@test.test"], + output_queue="mock-output_queue", ) @@ -88,6 +85,7 @@ def simple_csv_workflow_instance(metadata_mapping): collection_handle="123.4/5678", batch_id="batch-aaa", email_recipients=["test@test.test"], + output_queue="mock-output_queue", ) diff --git a/tests/fixtures/test_metadata_mapping.json b/tests/fixtures/test_metadata_mapping.json index 0b3e3dd..38809c6 100644 --- a/tests/fixtures/test_metadata_mapping.json +++ b/tests/fixtures/test_metadata_mapping.json @@ -1,19 +1,15 @@ { "item_identifier": { "source_field_name": "item_identifier", - "language": null, - "delimiter": "", "required": true }, "dc.title": { "source_field_name": "title", "language": "en_US", - "delimiter": "", "required": true }, "dc.contributor": { "source_field_name": "contributor", - "language": null, "delimiter": "|" } } \ No newline at end of file