diff --git a/fixtures/dmdsecs_mets.xml b/fixtures/dmdsecs_mets.xml
new file mode 100644
index 0000000..20a5972
--- /dev/null
+++ b/fixtures/dmdsecs_mets.xml
@@ -0,0 +1,85 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/metsrw/__init__.py b/metsrw/__init__.py
index 225c63b..4cf76f6 100644
--- a/metsrw/__init__.py
+++ b/metsrw/__init__.py
@@ -16,6 +16,7 @@
GROUP_ID_PREFIX,
urlencode,
urldecode,
+ generate_mdtype_key,
)
from .validate import (
METS_XSD_PATH,
@@ -67,6 +68,7 @@
"SubSection",
"__version__",
"feature_broker",
+ "generate_mdtype_key",
"get_schematron",
"get_xmlschema",
"has_class_methods",
diff --git a/metsrw/fsentry.py b/metsrw/fsentry.py
index cd60ca4..48b8747 100644
--- a/metsrw/fsentry.py
+++ b/metsrw/fsentry.py
@@ -4,6 +4,7 @@
from itertools import chain
import logging
import os
+from uuid import uuid4
from lxml import etree
import six
@@ -173,6 +174,7 @@ def __init__(
self.checksumtype = checksumtype
self.amdsecs = []
self.dmdsecs = []
+ self.dmdsecs_by_mdtype = {}
@classmethod
def dir(cls, label, children):
@@ -298,7 +300,43 @@ def add_rightsmd(self, md, mdtype, mode="mdwrap", **kwargs):
return self._add_metadata_element(md, "rightsMD", mdtype, mode, **kwargs)
def add_dmdsec(self, md, mdtype, mode="mdwrap", **kwargs):
- return self._add_metadata_element(md, "dmdSec", mdtype, mode, **kwargs)
+ """Add dmdsec.
+
+ Extension of _add_metadata_element that adds a dmdSec and updates the
+ previous dmdSecs with the same MDTYPE and OTHERMDTYPE attribute values,
+ marking them as "superseded" and using the same group_id for all of them.
+ """
+ dmdsec = self._add_metadata_element(md, "dmdSec", mdtype, mode, **kwargs)
+ dmdsec.status = kwargs.get("status") or "original"
+ mdtype_key = utils.generate_mdtype_key(mdtype, kwargs.get("othermdtype", ""))
+ if mdtype_key in self.dmdsecs_by_mdtype:
+ group_id = getattr(self.dmdsecs_by_mdtype[mdtype_key][0], "group_id")
+ if not group_id:
+ group_id = str(uuid4())
+ dmdsec.group_id = group_id
+ for previous_dmdsec in self.dmdsecs_by_mdtype[mdtype_key]:
+ previous_dmdsec.group_id = group_id
+ if not previous_dmdsec.status:
+ previous_dmdsec.status = "original"
+ if not previous_dmdsec.status.endswith("-superseded"):
+ previous_dmdsec.status += "-superseded"
+ self.dmdsecs_by_mdtype.setdefault(mdtype_key, []).append(dmdsec)
+ return dmdsec
+
+ def delete_dmdsec(self, mdtype, othermdtype=""):
+ """Mark latest dmdsec of mdtype_othermdtype as deleted.
+
+ It doesn't delete the dmdsec from the METS. It only sets its status
+ attribute to "deleted".
+ """
+ mdtype_key = utils.generate_mdtype_key(mdtype, othermdtype)
+ if mdtype_key in self.dmdsecs_by_mdtype:
+ self.dmdsecs_by_mdtype[mdtype_key][-1].status = "deleted"
+
+ def has_dmdsec(self, mdtype, othermdtype=""):
+ """Check if a dmdsec of mdtype_othermdtype exists for this entry."""
+ mdtype_key = utils.generate_mdtype_key(mdtype, othermdtype)
+ return mdtype_key in self.dmdsecs_by_mdtype
def serialize_md_inst(self, md_inst, md_class):
"""Serialize object ``md_inst`` by transforming it into an
@@ -344,9 +382,9 @@ def add_premis_rights(self, md, mode="mdwrap"):
mode,
)
- def add_dublin_core(self, md, mode="mdwrap"):
- # TODO add extra args and create DC object here
- return self.add_dmdsec(md, "DC", mode)
+ def add_dublin_core(self, md, mode="mdwrap", **kwargs):
+ # TODO create DC object here
+ return self.add_dmdsec(md, "DC", mode, **kwargs)
def add_child(self, child):
"""Add a child FSEntry to this FSEntry.
diff --git a/metsrw/metadata.py b/metsrw/metadata.py
index 3568576..aeb63dd 100644
--- a/metsrw/metadata.py
+++ b/metsrw/metadata.py
@@ -335,9 +335,14 @@ def get_status(self):
return self.status
if self.subsection == "dmdSec":
if self.older is None:
- return "original"
+ status = "original"
+ if self.newer is not None:
+ status += "-superseded"
else:
- return "updated"
+ status = "update"
+ if self.newer is not None:
+ status += "-superseded"
+ return status
if self.subsection in ("techMD", "rightsMD"):
# TODO how to handle ones where newer has been deleted?
if self.newer is None:
diff --git a/metsrw/mets.py b/metsrw/mets.py
index a804f96..b70badf 100755
--- a/metsrw/mets.py
+++ b/metsrw/mets.py
@@ -518,20 +518,19 @@ def _analyze_fptr(fptr_elem, tree, entry_type):
@staticmethod
def _add_dmdsecs_to_fs_entry(elem, fs_entry, tree):
- dmdids = elem.get("DMDID")
- if dmdids:
- dmdids = dmdids.split()
- for dmdid in dmdids:
- dmdsec_elem = tree.find(
- 'mets:dmdSec[@ID="' + dmdid + '"]', namespaces=utils.NAMESPACES
- )
- dmdsec = metadata.SubSection.parse(dmdsec_elem)
- fs_entry.dmdsecs.append(dmdsec)
- # Create older/newer relationships
- fs_entry.dmdsecs.sort(key=lambda x: x.created)
- for prev_dmdsec, dmdsec in zip(fs_entry.dmdsecs, fs_entry.dmdsecs[1:]):
- if dmdsec.status == "updated":
- prev_dmdsec.replace_with(dmdsec)
+ for dmdid in elem.get("DMDID", "").split():
+ dmdsec_elem = tree.find(
+ 'mets:dmdSec[@ID="' + dmdid + '"]', namespaces=utils.NAMESPACES
+ )
+ dmdsec = metadata.SubSection.parse(dmdsec_elem)
+ fs_entry.dmdsecs.append(dmdsec)
+ # Order by creation date and generate mapping by mdtype_othermdtype
+ fs_entry.dmdsecs.sort(key=lambda x: x.created)
+ for dmdsec in fs_entry.dmdsecs:
+ mdtype_key = utils.generate_mdtype_key(
+ dmdsec.contents.mdtype, getattr(dmdsec.contents, "othermdtype", "")
+ )
+ fs_entry.dmdsecs_by_mdtype.setdefault(mdtype_key, []).append(dmdsec)
@staticmethod
def _add_amdsecs_to_fs_entry(amdids, fs_entry, tree):
diff --git a/metsrw/utils.py b/metsrw/utils.py
index 6803b2a..8f4a501 100644
--- a/metsrw/utils.py
+++ b/metsrw/utils.py
@@ -64,3 +64,11 @@ def urldecode(url):
https://tools.ietf.org/html/rfc3986#section-2.1.
"""
return _urlendecode(url, unquote_plus)
+
+
+def generate_mdtype_key(mdtype, othermdtype=""):
+ """Used to generate the keys of the FSEntry's dmdsecs_by_mdtype dict."""
+ mdtype_key = mdtype
+ if othermdtype:
+ mdtype_key += "_" + othermdtype
+ return mdtype_key
diff --git a/tests/test_fsentry.py b/tests/test_fsentry.py
index 316eba9..329050a 100644
--- a/tests/test_fsentry.py
+++ b/tests/test_fsentry.py
@@ -143,6 +143,62 @@ def test_add_metadata_to_fsentry(self):
assert len(f1.amdsecs[0].subsections) == 4
+ def test_dmdsec_management(self):
+ """Test addition, check and deletion of dmdSecs."""
+ file = metsrw.FSEntry("file[1].txt", file_uuid=str(uuid.uuid4()))
+ assert file.dmdsecs == []
+ assert file.dmdsecs_by_mdtype == {}
+ assert file.has_dmdsec("DC") is False
+ assert file.has_dmdsec("OTHER_CUSTOM") is False
+ file.add_dmdsec("", "DC")
+ assert len(file.dmdsecs) == 1
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 1
+ assert file.has_dmdsec("DC") is True
+ assert file.dmdsecs[0].status == "original"
+ file.add_dmdsec("", "DC", status="update")
+ assert len(file.dmdsecs) == 2
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 2
+ assert file.dmdsecs[0].status == "original-superseded"
+ assert file.dmdsecs[1].status == "update"
+ assert file.dmdsecs[0].group_id == file.dmdsecs[1].group_id
+ file.add_dmdsec("", "DC", status="update")
+ assert len(file.dmdsecs) == 3
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert file.dmdsecs[1].status == "update-superseded"
+ assert file.dmdsecs[2].status == "update"
+ assert file.dmdsecs[0].group_id == file.dmdsecs[1].group_id
+ assert file.dmdsecs[0].group_id == file.dmdsecs[2].group_id
+ file.delete_dmdsec("DC")
+ assert len(file.dmdsecs) == 3
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert file.dmdsecs[2].status == "deleted"
+ file.add_dmdsec("", "OTHER", othermdtype="CUSTOM")
+ assert len(file.dmdsecs) == 4
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert len(file.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 1
+ assert file.has_dmdsec("OTHER", othermdtype="CUSTOM") is True
+ assert file.dmdsecs[3].status == "original"
+ file.add_dmdsec("", "OTHER", othermdtype="CUSTOM", status="update")
+ assert len(file.dmdsecs) == 5
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert len(file.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 2
+ assert file.dmdsecs[3].status == "original-superseded"
+ assert file.dmdsecs[4].status == "update"
+ assert file.dmdsecs[3].group_id == file.dmdsecs[4].group_id
+ file.add_dmdsec("", "OTHER", othermdtype="CUSTOM", status="update")
+ assert len(file.dmdsecs) == 6
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert len(file.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 3
+ assert file.dmdsecs[4].status == "update-superseded"
+ assert file.dmdsecs[5].status == "update"
+ assert file.dmdsecs[3].group_id == file.dmdsecs[4].group_id
+ assert file.dmdsecs[3].group_id == file.dmdsecs[5].group_id
+ file.delete_dmdsec("OTHER", othermdtype="CUSTOM")
+ assert len(file.dmdsecs) == 6
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 3
+ assert len(file.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 3
+ assert file.dmdsecs[5].status == "deleted"
+
def test_add_child(self):
"""
It should add a new entry to the children list.
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 8e32b5d..678a5e8 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -226,13 +226,13 @@ def test_replacement_dmdsec(self):
assert dmdsec_old.get_status() == "original"
dmdsec_new = metsrw.SubSection("dmdSec", self.STUB_MDWRAP)
dmdsec_old.replace_with(dmdsec_new)
- assert dmdsec_old.get_status() == "original"
- assert dmdsec_new.get_status() == "updated"
+ assert dmdsec_old.get_status() == "original-superseded"
+ assert dmdsec_new.get_status() == "update"
dmdsec_newer = metsrw.SubSection("dmdSec", self.STUB_MDWRAP)
dmdsec_new.replace_with(dmdsec_newer)
- assert dmdsec_old.get_status() == "original"
- assert dmdsec_new.get_status() == "updated"
- assert dmdsec_newer.get_status() == "updated"
+ assert dmdsec_old.get_status() == "original-superseded"
+ assert dmdsec_new.get_status() == "update-superseded"
+ assert dmdsec_newer.get_status() == "update"
def test_subsection_serialize(self):
content = metsrw.MDWrap("", None)
diff --git a/tests/test_mets.py b/tests/test_mets.py
index 3d5b2e1..60f5420 100644
--- a/tests/test_mets.py
+++ b/tests/test_mets.py
@@ -107,6 +107,28 @@ def test_parse(self):
is not None
)
+ def test_parse_dmdsecs(self):
+ """It should create FSEntry's ordered dmdsecs and mapping by type."""
+ mw = metsrw.METSDocument()
+ parser = etree.XMLParser(remove_blank_text=True)
+ root = etree.parse("fixtures/dmdsecs_mets.xml", parser=parser)
+ mw.tree = root
+ mw._parse_tree()
+ objects = mw.get_file(type="Directory", label="objects")
+ assert len(objects.dmdsecs) == 4
+ assert objects.dmdsecs[0].id_string == "dmdSec_1"
+ assert objects.dmdsecs[-1].id_string == "dmdSec_4"
+ assert len(objects.dmdsecs_by_mdtype["DC"]) == 1
+ assert len(objects.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 1
+ assert len(objects.dmdsecs_by_mdtype["OTHER_MD_A"]) == 1
+ assert len(objects.dmdsecs_by_mdtype["OTHER_MD_B"]) == 1
+ file = mw.get_file(type="Item", label="Landing_zone.jpg")
+ assert len(file.dmdsecs) == 5
+ assert file.dmdsecs[0].id_string == "dmdSec_5"
+ assert file.dmdsecs[-1].id_string == "dmdSec_9"
+ assert len(file.dmdsecs_by_mdtype["DC"]) == 2
+ assert len(file.dmdsecs_by_mdtype["OTHER_CUSTOM"]) == 3
+
def test_parse_tree_createdate_too_new(self):
mw = metsrw.METSDocument()
root = etree.parse("fixtures/createdate_too_new.xml")
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 8f1e9db..ee0d1d6 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -24,3 +24,8 @@ def test_url_encoding():
for url in BAD_URLS:
with pytest.raises(ValueError):
metsrw.urlencode(url)
+
+
+def test_generate_mdtype_key():
+ assert metsrw.generate_mdtype_key("MDTYPE") == "MDTYPE"
+ assert metsrw.generate_mdtype_key("MDTYPE", "OTHERMDTYPE") == "MDTYPE_OTHERMDTYPE"