From b2d30da17113dbd3009935662ad86ba33322cc75 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Wed, 5 Feb 2020 15:58:22 -0600 Subject: [PATCH] Parse mets:transformFile subelements This allows parsing of `mets:transformFile` subelements on FSEntry objects. --- docs/conf.py | 4 ++-- metsrw/__init__.py | 2 +- metsrw/fsentry.py | 1 + metsrw/mets.py | 27 +++++++++++++++++++++++++-- tests/test_fsentry.py | 21 +++++++++++++++++++++ tests/test_mets.py | 23 ++++------------------- 6 files changed, 54 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 37d8587..dcc9591 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,9 +51,9 @@ # built documents. # # The short X.Y version. -version = "0.3.14" +version = "0.3.15" # The full version, including alpha/beta/rc tags. -release = "0.3.14" +release = "0.3.15" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/metsrw/__init__.py b/metsrw/__init__.py index 9405a47..3f61b01 100644 --- a/metsrw/__init__.py +++ b/metsrw/__init__.py @@ -43,7 +43,7 @@ LOGGER = logging.getLogger(__name__) LOGGER.addHandler(logging.NullHandler()) -__version__ = "0.3.14" +__version__ = "0.3.15" __all__ = [ "Agent", diff --git a/metsrw/fsentry.py b/metsrw/fsentry.py index a469ccb..31986dd 100644 --- a/metsrw/fsentry.py +++ b/metsrw/fsentry.py @@ -192,6 +192,7 @@ def from_fptr(cls, label, type_, fptr): derived_from=fptr.derived_from, checksum=fptr.checksum, checksumtype=fptr.checksumtype, + transform_files=fptr.transform_files, ) def __str__(self): diff --git a/metsrw/mets.py b/metsrw/mets.py index 714c351..3627455 100755 --- a/metsrw/mets.py +++ b/metsrw/mets.py @@ -22,8 +22,11 @@ AIP_ENTRY_TYPE = "archival information package" FPtr = namedtuple( - "FPtr", "file_uuid derived_from use path amdids checksum checksumtype fileid" + "FPtr", + "file_uuid derived_from use path amdids checksum checksumtype fileid transform_files", ) +TRANSFORM_PREFIX = "TRANSFORM" +TRANSFORM_PREFIX_LEN = len(TRANSFORM_PREFIX) class METSDocument(object): @@ -460,8 +463,28 @@ def _analyze_fptr(fptr_elem, tree, entry_type): group_uuid = file_elem.get("GROUPID", "").replace(utils.GROUP_ID_PREFIX, "", 1) if group_uuid != file_uuid: derived_from = group_uuid # Use group_uuid as placeholder + transform_files = [] + for transform_file in file_elem.findall( + "mets:transformFile", namespaces=utils.NAMESPACES + ): + transform_file_attributes = {} + for attrib, value in transform_file.attrib.items(): + # FSEntry.__init__ will make this uppercase anyway + key = attrib.upper() + if key.startswith(TRANSFORM_PREFIX): + key = key[TRANSFORM_PREFIX_LEN:] + transform_file_attributes[key] = value + transform_files.append(transform_file_attributes) return FPtr( - file_uuid, derived_from, use, path, amdids, checksum, checksumtype, file_id + file_uuid, + derived_from, + use, + path, + amdids, + checksum, + checksumtype, + file_id, + transform_files, ) @staticmethod diff --git a/tests/test_fsentry.py b/tests/test_fsentry.py index 19774a6..386f4fa 100644 --- a/tests/test_fsentry.py +++ b/tests/test_fsentry.py @@ -245,6 +245,27 @@ def test_serialize_filesec_no_use(self): el = f.serialize_filesec() assert el is None + def test_serialize_filesec_transform_files(self): + """ + It should produce a mets:file element. + It should have a child mets:FLocat element. + It should have a child mets:transformFile element. + """ + transform_files = [ + {"type": "decryption", "order": 1, "algorithm": "GPG", "key": "somekey"} + ] + f = metsrw.FSEntry( + "file[1].txt", file_uuid=str(uuid.uuid4()), transform_files=transform_files + ) + file_element = f.serialize_filesec() + assert file_element.tag == "{http://www.loc.gov/METS/}file" + assert file_element[0].tag == "{http://www.loc.gov/METS/}FLocat" + assert file_element[1].tag == "{http://www.loc.gov/METS/}transformFile" + assert file_element[1].attrib["TRANSFORMTYPE"] == "decryption" + assert file_element[1].attrib["TRANSFORMORDER"] == "1" + assert file_element[1].attrib["TRANSFORMALGORITHM"] == "GPG" + assert file_element[1].attrib["TRANSFORMKEY"] == "somekey" + def test_serialize_filesec_no_path(self): """ It should produce a mets:file element. diff --git a/tests/test_mets.py b/tests/test_mets.py index 2872b8f..a84e567 100644 --- a/tests/test_mets.py +++ b/tests/test_mets.py @@ -331,6 +331,7 @@ def test_analyze_fptr(self): amdids="amdSec_3", checksum=None, checksumtype=None, + transform_files=[], ) def test_analyze_fptr_from_aip(self): @@ -342,6 +343,9 @@ def test_analyze_fptr_from_aip(self): fptr = mw._analyze_fptr(fptr_elem, tree, "Archival Information Package") assert fptr.file_uuid == "7327b00f-d83a-4ae8-bb89-84fce994e827" assert fptr.use == "Archival Information Package" + assert fptr.transform_files == [ + {"ALGORITHM": "bzip2", "ORDER": "1", "TYPE": "decompression"} + ] def test_analyze_fptr_sets_uuid_from_aip_with_file_id_prefix(self): """ @@ -822,25 +826,6 @@ def test_pointer_file(self): ] = premis_schema_location aip_fs_entry.add_premis_agent(agent_el) - # TODO: we need metsrw to be able to set transformFile elements. - # compression - 7z or tar.bz2 - """ - if extension == '.7z': - etree.SubElement(file_, namespaces.metsBNS + "transformFile", - TRANSFORMORDER='1', - TRANSFORMTYPE='decompression', - TRANSFORMALGORITHM=algorithm) - elif extension == '.bz2': - etree.SubElement(file_, namespaces.metsBNS + "transformFile", - TRANSFORMORDER='1', - TRANSFORMTYPE='decompression', - TRANSFORMALGORITHM='bzip2') - etree.SubElement(file_, namespaces.metsBNS + "transformFile", - TRANSFORMORDER='2', - TRANSFORMTYPE='decompression', - TRANSFORMALGORITHM='tar') - """ - mw.append_file(aip_fs_entry) self.assert_pointer_valid(mw.serialize())