From 2d3013e2d39c54602a03112cc952bcf2ffb9216a Mon Sep 17 00:00:00 2001 From: Justin Littman Date: Fri, 22 Sep 2023 09:53:11 -0400 Subject: [PATCH] Refactor and annotate PreservationIngestService. refs #4594 --- app/services/preservation_ingest_service.rb | 93 +++++++++++++------ .../preservation_ingest_service_spec.rb | 32 ++++--- 2 files changed, 84 insertions(+), 41 deletions(-) diff --git a/app/services/preservation_ingest_service.rb b/app/services/preservation_ingest_service.rb index 2fb41181e..185be4205 100644 --- a/app/services/preservation_ingest_service.rb +++ b/app/services/preservation_ingest_service.rb @@ -11,50 +11,62 @@ class PreservationIngestService # @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow # @raise [Preservation::Client::Error] if bad response from preservation catalog. def self.transfer(cocina_object) - druid = cocina_object.externalIdentifier - workspace = DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root) - signature_catalog = signature_catalog_from_preservation(druid) - new_version_id = signature_catalog.version_id + 1 + new(cocina_object).transfer + end + + def initialize(cocina_object) + @cocina_object = cocina_object + end + + # @param [Cocina::Models::DRO, Cocina::Models::Collection] cocina_object The representation of the digital object + # @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow + # @raise [Preservation::Client::Error] if bad response from preservation catalog. + def transfer + # Writes versionMetadata.xml, contentMetadata.xml, and cocina.json metadata_dir = PreservationMetadataExtractor.extract(workspace:, cocina_object:) + + # Makes sure that the versionMetadata.xml version matches the expected version from preservation. verify_version_metadata(metadata_dir, new_version_id) - version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:, - druid:, - version_id: new_version_id) + # Creates a Moab::FileInventory from the contentMetadata.xml + version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:, druid:, version_id: new_version_id) + + # Creates a Moab::FileInventory containing only the files that are changed in this version. version_additions = signature_catalog.version_additions(version_inventory) + + # Find the changed files in the workspace content_additions = version_additions.group('content') - if content_additions.nil? || content_additions.files.empty? - content_dir = nil - else + content_dir = nil + if content_additions.present? && content_additions.files.any? new_file_list = content_additions.path_list content_dir = workspace.find_filelist_parent('content', new_file_list) end + content_group = version_inventory.group('content') + + # Regenerate the fixitites for content (md5, sha1, sha256) if they are missing. signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty? - # export the bag (in tar format) - bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', '')) - bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir) - bagger.reset_bag - bagger.create_bag_inventory(:depositor) - bagger.deposit_group('content', content_dir) - bagger.deposit_group('metadata', metadata_dir) - bagger.create_tagfiles - Preserve::BagVerifier.verify(directory: bag_dir) + + export(version_inventory, content_dir, metadata_dir) end - # NOTE: the following methods should probably all be private - # @param [String] druid The object identifier + private + + attr_reader :cocina_object + # @return [Moab::SignatureCatalog] the manifest of all files previously ingested, # or if there is none, a SignatureCatalog object for version 0. # @raise [Preservation::Client::Error] if bad response from preservation catalog. - def self.signature_catalog_from_preservation(druid) - Preservation::Client.objects.signature_catalog(druid) - rescue Preservation::Client::NotFoundError - Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0) + def signature_catalog + @signature_catalog ||= begin + Preservation::Client.objects.signature_catalog(druid) + rescue Preservation::Client::NotFoundError + Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0) + end end # @param [Pathname] metadata_dir the location of the metadata directory in the workspace # @param [Integer] expected the version identifer expected to be used in the versionMetadata - def self.verify_version_metadata(metadata_dir, expected) + def verify_version_metadata(metadata_dir, expected) vmfile = metadata_dir.join('versionMetadata.xml') verify_version_id(vmfile, expected, vmfile_version_id(vmfile)) true @@ -63,7 +75,7 @@ def self.verify_version_metadata(metadata_dir, expected) # @param [Pathname] pathname The location of the file containing a version number # @param [Integer] expected The version number that should be in the file # @param [Integer] found The version number that is actually in the file - def self.verify_version_id(pathname, expected, found) + def verify_version_id(pathname, expected, found) raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found true @@ -71,7 +83,7 @@ def self.verify_version_id(pathname, expected, found) # @param [Pathname] pathname the location of the versionMetadata file # @return [Integer] the versionId found in the last version element, or nil if missing - def self.vmfile_version_id(pathname) + def vmfile_version_id(pathname) raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist? doc = Nokogiri::XML(File.read(pathname.to_s)) @@ -79,4 +91,29 @@ def self.vmfile_version_id(pathname) version_id = nodeset.last['versionId'] version_id&.to_i end + + # Export the bag (using symlinks for content files) + def export(version_inventory, content_dir, metadata_dir) + bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', '')) + bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir) + bagger.reset_bag + bagger.create_bag_inventory(:depositor) + bagger.deposit_group('content', content_dir) + bagger.deposit_group('metadata', metadata_dir) + bagger.create_tagfiles + # Checks for required files. (Does not check fixities.) + Preserve::BagVerifier.verify(directory: bag_dir) + end + + def druid + cocina_object.externalIdentifier + end + + def workspace + @workspace ||= DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root) + end + + def new_version_id + @new_version_id ||= signature_catalog.version_id + 1 + end end diff --git a/spec/services/preservation_ingest_service_spec.rb b/spec/services/preservation_ingest_service_spec.rb index 4b86abc04..89a6f832c 100644 --- a/spec/services/preservation_ingest_service_spec.rb +++ b/spec/services/preservation_ingest_service_spec.rb @@ -11,6 +11,8 @@ File.read(fixtures.join('sdr_repo/dd116zh0343/v0001/manifests/signatureCatalog.xml')) ) end + let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) } + let(:druid) { 'druid:dd116zh0343' } before do allow(Settings.sdr).to receive_messages(local_workspace_root: fixtures.join('workspace').to_s, @@ -36,8 +38,6 @@ end describe '.transfer' do - let(:druid) { 'druid:dd116zh0343' } - let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) } let(:metadata_dir) { fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata') } before do @@ -109,8 +109,8 @@ end end - describe '.signature_catalog_from_preservation' do - let(:druid) { 'druid:dd116zh0343' } + describe '.signature_catalog' do + let(:service) { described_class.new(cocina_object) } context 'when signature_catalog exists in preservation' do before do @@ -118,7 +118,7 @@ end it 'retrieves it as a Moab::SignatureCatalog object' do - sig_cat = described_class.signature_catalog_from_preservation(druid) + sig_cat = service.send(:signature_catalog) expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog) expect(sig_cat.digital_object_id).to eq druid expect(sig_cat.version_id).to eq 1 @@ -132,7 +132,7 @@ end it 'returns a Moab::SignatureCatalog object for version 0' do - sig_cat = described_class.signature_catalog_from_preservation(druid) + sig_cat = service.send(:signature_catalog) expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog) expect(sig_cat.digital_object_id).to eq druid expect(sig_cat.version_id).to eq 0 @@ -141,14 +141,20 @@ end end - specify '.verify_version_id' do - expect(described_class.verify_version_id('/mypath/myfile', 2, 2)).to be_truthy - expect { described_class.verify_version_id('/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2') + describe '.verify_version_id' do + let(:service) { described_class.new(cocina_object) } + + it 'verifies the version' do + expect(service.send(:verify_version_id, '/mypath/myfile', 2, 2)).to be_truthy + expect { service.send(:verify_version_id, '/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2') + end end - specify '.vmfile_version_id' do - metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata') - vmfile = metadata_dir.join('versionMetadata.xml') - expect(described_class.vmfile_version_id(vmfile)).to eq 2 + describe '.vmfile_version_id' do + it 'returns the version' do + metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata') + vmfile = metadata_dir.join('versionMetadata.xml') + expect(described_class.new(cocina_object).send(:vmfile_version_id, vmfile)).to eq 2 + end end end