Skip to content

Commit

Permalink
Parse mets:transformFile subelements
Browse files Browse the repository at this point in the history
This allows parsing of `mets:transformFile` subelements on FSEntry
objects.
  • Loading branch information
replaceafill authored Feb 5, 2020
1 parent aedc666 commit b2d30da
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 24 deletions.
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@
# built documents.
#
# The short X.Y version.
version = "0.3.14"
version = "0.3.15"
# The full version, including alpha/beta/rc tags.
release = "0.3.14"
release = "0.3.15"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion metsrw/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())
__version__ = "0.3.14"
__version__ = "0.3.15"

__all__ = [
"Agent",
Expand Down
1 change: 1 addition & 0 deletions metsrw/fsentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def from_fptr(cls, label, type_, fptr):
derived_from=fptr.derived_from,
checksum=fptr.checksum,
checksumtype=fptr.checksumtype,
transform_files=fptr.transform_files,
)

def __str__(self):
Expand Down
27 changes: 25 additions & 2 deletions metsrw/mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@

AIP_ENTRY_TYPE = "archival information package"
FPtr = namedtuple(
"FPtr", "file_uuid derived_from use path amdids checksum checksumtype fileid"
"FPtr",
"file_uuid derived_from use path amdids checksum checksumtype fileid transform_files",
)
TRANSFORM_PREFIX = "TRANSFORM"
TRANSFORM_PREFIX_LEN = len(TRANSFORM_PREFIX)


class METSDocument(object):
Expand Down Expand Up @@ -460,8 +463,28 @@ def _analyze_fptr(fptr_elem, tree, entry_type):
group_uuid = file_elem.get("GROUPID", "").replace(utils.GROUP_ID_PREFIX, "", 1)
if group_uuid != file_uuid:
derived_from = group_uuid # Use group_uuid as placeholder
transform_files = []
for transform_file in file_elem.findall(
"mets:transformFile", namespaces=utils.NAMESPACES
):
transform_file_attributes = {}
for attrib, value in transform_file.attrib.items():
# FSEntry.__init__ will make this uppercase anyway
key = attrib.upper()
if key.startswith(TRANSFORM_PREFIX):
key = key[TRANSFORM_PREFIX_LEN:]
transform_file_attributes[key] = value
transform_files.append(transform_file_attributes)
return FPtr(
file_uuid, derived_from, use, path, amdids, checksum, checksumtype, file_id
file_uuid,
derived_from,
use,
path,
amdids,
checksum,
checksumtype,
file_id,
transform_files,
)

@staticmethod
Expand Down
21 changes: 21 additions & 0 deletions tests/test_fsentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,27 @@ def test_serialize_filesec_no_use(self):
el = f.serialize_filesec()
assert el is None

def test_serialize_filesec_transform_files(self):
"""
It should produce a mets:file element.
It should have a child mets:FLocat element.
It should have a child mets:transformFile element.
"""
transform_files = [
{"type": "decryption", "order": 1, "algorithm": "GPG", "key": "somekey"}
]
f = metsrw.FSEntry(
"file[1].txt", file_uuid=str(uuid.uuid4()), transform_files=transform_files
)
file_element = f.serialize_filesec()
assert file_element.tag == "{http://www.loc.gov/METS/}file"
assert file_element[0].tag == "{http://www.loc.gov/METS/}FLocat"
assert file_element[1].tag == "{http://www.loc.gov/METS/}transformFile"
assert file_element[1].attrib["TRANSFORMTYPE"] == "decryption"
assert file_element[1].attrib["TRANSFORMORDER"] == "1"
assert file_element[1].attrib["TRANSFORMALGORITHM"] == "GPG"
assert file_element[1].attrib["TRANSFORMKEY"] == "somekey"

def test_serialize_filesec_no_path(self):
"""
It should produce a mets:file element.
Expand Down
23 changes: 4 additions & 19 deletions tests/test_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ def test_analyze_fptr(self):
amdids="amdSec_3",
checksum=None,
checksumtype=None,
transform_files=[],
)

def test_analyze_fptr_from_aip(self):
Expand All @@ -342,6 +343,9 @@ def test_analyze_fptr_from_aip(self):
fptr = mw._analyze_fptr(fptr_elem, tree, "Archival Information Package")
assert fptr.file_uuid == "7327b00f-d83a-4ae8-bb89-84fce994e827"
assert fptr.use == "Archival Information Package"
assert fptr.transform_files == [
{"ALGORITHM": "bzip2", "ORDER": "1", "TYPE": "decompression"}
]

def test_analyze_fptr_sets_uuid_from_aip_with_file_id_prefix(self):
"""
Expand Down Expand Up @@ -822,25 +826,6 @@ def test_pointer_file(self):
] = premis_schema_location
aip_fs_entry.add_premis_agent(agent_el)

# TODO: we need metsrw to be able to set transformFile elements.
# compression - 7z or tar.bz2
"""
if extension == '.7z':
etree.SubElement(file_, namespaces.metsBNS + "transformFile",
TRANSFORMORDER='1',
TRANSFORMTYPE='decompression',
TRANSFORMALGORITHM=algorithm)
elif extension == '.bz2':
etree.SubElement(file_, namespaces.metsBNS + "transformFile",
TRANSFORMORDER='1',
TRANSFORMTYPE='decompression',
TRANSFORMALGORITHM='bzip2')
etree.SubElement(file_, namespaces.metsBNS + "transformFile",
TRANSFORMORDER='2',
TRANSFORMTYPE='decompression',
TRANSFORMALGORITHM='tar')
"""

mw.append_file(aip_fs_entry)
self.assert_pointer_valid(mw.serialize())

Expand Down

0 comments on commit b2d30da

Please sign in to comment.