diff --git a/docs/conf.py b/docs/conf.py index 56c3d60..23ee41e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -127,7 +127,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +# html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -326,7 +326,10 @@ # epub_use_index = True doctest_global_setup = """ +import os +import tempfile import uuid +import lxml import metsrw """ diff --git a/docs/reading-mets.rst b/docs/reading-mets.rst index 5a32ea6..725a5ca 100644 --- a/docs/reading-mets.rst +++ b/docs/reading-mets.rst @@ -4,13 +4,13 @@ Reading METS files metsrw supports reading METS files from disk, from strings, or from lxml_ `_Element` or `_ElementTree` objects. -.. code-block:: python +.. testcode:: # From a file on disk - mets = metsrw.METSDocument.fromfile('path/to/file') + mets = metsrw.METSDocument.fromfile("../fixtures/complete_mets.xml") - # From a string - mets_str = """ + # From bytes + mets_str = b""" @@ -18,7 +18,7 @@ metsrw supports reading METS files from disk, from strings, or from lxml_ mets = metsrw.METSDocument.fromstring(mets_str) # From an lxml object - tree = lxml.etree.fromfile('path/to/file') + tree = lxml.etree.parse("../fixtures/complete_mets.xml") mets = metsrw.METSDocument.fromtree(tree) @@ -26,19 +26,74 @@ Accessing METS Data ------------------- To retrieve an :class:`metsrw.FSEntry`, use the -:func:`~metsrw.METSDocument.get_file` method. +:meth:`~metsrw.METSDocument.get_file` or +:meth:`~metsrw.METSDocument.all_files` methods. -.. code-block:: python +.. doctest:: - mets = metsrw.METSDocument() - file_uuid = str(uuid.uuid4()) - file_1 = metsrw.FSEntry( - label="hello.pdf", path="test/hello.pdf", type="Item", - file_uuid=file_uuid) - mets.append_file(file_1) + >>> mets = metsrw.METSDocument() + >>> file_uuid = str(uuid.uuid4()) + >>> file_1 = metsrw.FSEntry( + ... label="hello.pdf", path="test/hello.pdf", type="Item", + ... file_uuid=file_uuid) + >>> mets.append_file(file_1) - # Returns file_1 - mets.get_file(file_uuid=file_uuid) + >>> mets.get_file(file_uuid=file_uuid) + FSEntry(type='Item', path='test/hello.pdf', use='original', ...) + + >>> mets.all_files() + {FSEntry(type='Item', path='test/hello.pdf', use='original', ...)} + + # Currently, filtering files can only be done via iteration + >>> [entry for entry in mets.all_files() if entry.use == "original"] + [FSEntry(type='Item', path='test/hello.pdf', use='original', ...)] + + +`amdSec` and `dmdSec` data is accessible via the +:attr:`~metsrw.FSEntry.amdsecs` and :attr:`~metsrw.FSEntry.dmdsecs` +properties. + +.. doctest:: + + >>> mets = metsrw.METSDocument.fromfile('../fixtures/complete_mets.xml') + >>> fsentry = mets.get_file(file_uuid="ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2") + >>> amdsec1 = fsentry.amdsecs[0] + >>> [section for section in amdsec1.subsections if section.subsection == 'techMD'] + [] + >>> fsentry.dmdsecs[0] + + + +.. note:: + In most cases, you'll want to access PREMIS data via the `get_premis` + series of methods, rather than accessing the `amdSec` or `dmdSec` data + directly. See `Accessing PREMIS Data`_ for more info. + + +Accessing PREMIS Data +--------------------- + +To access PREMIS_ metadata associated with a file, use the following +methods: + +* :meth:`~metsrw.FSEntry.get_premis_objects` +* :meth:`~metsrw.FSEntry.get_premis_events` +* :meth:`~metsrw.FSEntry.get_premis_agents` +* :meth:`~metsrw.FSEntry.get_premis_rights` + + +.. doctest:: + + # Currently, filtering PREMIS objects can only be done via iteration + >>> ingestion_events = [] + >>> mets = metsrw.METSDocument.fromfile('../fixtures/complete_mets.xml') + >>> for fsentry in mets.all_files(): + ... for event in fsentry.get_premis_events(): + ... if event.type == "ingestion": + ... ingestion_events.append(event) + >>> ingestion_events[0] + ('event', ...) .. _lxml: https://lxml.de/index.html +.. _PREMIS: https://www.loc.gov/standards/premis/v3/index.html diff --git a/docs/writing-mets.rst b/docs/writing-mets.rst index 3b82765..5f6ee57 100644 --- a/docs/writing-mets.rst +++ b/docs/writing-mets.rst @@ -8,40 +8,39 @@ To add data to a :class:`metsrw.METSDocument`, create and append :class:`metsrw.FSEntry` objects. -.. code-block:: python +.. doctest:: - mets = metsrw.METSDocument() - directory_1 = metsrw.FSEntry(label="test", path="test", type="Directory") - - file_1 = metsrw.FSEntry( - label="hello.pdf", path="test/hello.pdf", type="Item", - file_uuid=str(uuid.uuid4())) - directory_1.children.append(file_1) - - file_2 = metsrw.FSEntry( - label="demo.jpg", path="test/demo.jpg", type="Item", - file_uuid=str(uuid.uuid4())) - directory_1.children.append(file_2) - - mets.append_file(file1) + >>> mets = metsrw.METSDocument() + >>> directory_1 = metsrw.FSEntry(label="test", path="test", type="Directory") + >>> file_1 = metsrw.FSEntry( + ... label="hello.pdf", path="test/hello.pdf", type="Item", + ... file_uuid=str(uuid.uuid4())) + >>> directory_1.children.append(file_1) + >>> file_2 = metsrw.FSEntry( + ... label="demo.jpg", path="test/demo.jpg", type="Item", + ... file_uuid=str(uuid.uuid4())) + >>> directory_1.children.append(file_2) + >>> mets.append_file(directory_1) + >>> mets.all_files() + {FSEntry(...), FSEntry(...)} Adding metadata is done via the :class:`metsrw.FSEntry`. -.. code-block:: python +.. testcode:: file_1 = metsrw.FSEntry( label="hello.pdf", path="test/hello.pdf", type="Item", file_uuid=str(uuid.uuid4())) - file1.add_premis_object("object") - file1.add_premis_event("event") - file1.add_premis_agent("agent") - rights = file1.add_premis_rights("rights") - dc = file1.add_dublin_core("metadata") + file_1.add_premis_object("object") + file_1.add_premis_event("event") + file_1.add_premis_agent("agent") + rights = file_1.add_premis_rights("rights") + dc = file_1.add_dublin_core("metadata") - # Replaces added metatdata - rights.replace_with(file1.add_premis_rights("newer rights")) + # Replaces added metadata + rights.replace_with(file_1.add_premis_rights("newer rights")) Serialization @@ -50,22 +49,35 @@ Serialization metsrw supports serialization to file, bytes or lxml_ Element object. -.. code-block:: python +.. testsetup:: serialization + + temp_dir = tempfile.mkdtemp() + output_path = os.path.join(temp_dir, "demo.xml") + mets = metsrw.METSDocument() + file1 = metsrw.FSEntry("hello.pdf", file_uuid=str(uuid.uuid4())) + mets.append_file(file1) + +.. testcleanup:: serialization + + os.remove(output_path) + os.removedirs(temp_dir) + +.. doctest:: serialization >>> mets = metsrw.METSDocument() >>> file1 = metsrw.FSEntry("hello.pdf", file_uuid=str(uuid.uuid4())) >>> mets.append_file(file1) - >>> # To file on disk - >>> mets.write("/path/to/file") + # To file on disk + >>> mets.write(output_path) - >>> # To _Element object + # To _Element object >>> mets.serialize() - + - >>> # To bytes + # To bytes >>> mets.tostring() - b'\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n' + b'\n