Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor and annotate PreservationIngestService. #4596

Merged
merged 1 commit into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 65 additions & 28 deletions app/services/preservation_ingest_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,50 +11,62 @@ class PreservationIngestService
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def self.transfer(cocina_object)
druid = cocina_object.externalIdentifier
workspace = DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root)
signature_catalog = signature_catalog_from_preservation(druid)
new_version_id = signature_catalog.version_id + 1
new(cocina_object).transfer
end

def initialize(cocina_object)
@cocina_object = cocina_object
end

# @param [Cocina::Models::DRO, Cocina::Models::Collection] cocina_object The representation of the digital object
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def transfer
# Writes versionMetadata.xml, contentMetadata.xml, and cocina.json
metadata_dir = PreservationMetadataExtractor.extract(workspace:, cocina_object:)

# Makes sure that the versionMetadata.xml version matches the expected version from preservation.
verify_version_metadata(metadata_dir, new_version_id)
version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:,
druid:,
version_id: new_version_id)
# Creates a Moab::FileInventory from the contentMetadata.xml
version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:, druid:, version_id: new_version_id)

# Creates a Moab::FileInventory containing only the files that are changed in this version.
version_additions = signature_catalog.version_additions(version_inventory)

# Find the changed files in the workspace
content_additions = version_additions.group('content')
if content_additions.nil? || content_additions.files.empty?
content_dir = nil
else
content_dir = nil
if content_additions.present? && content_additions.files.any?
new_file_list = content_additions.path_list
content_dir = workspace.find_filelist_parent('content', new_file_list)
end

content_group = version_inventory.group('content')

# Regenerate the fixitites for content (md5, sha1, sha256) if they are missing.
signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
# export the bag (in tar format)
bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', ''))
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
bagger.reset_bag
bagger.create_bag_inventory(:depositor)
bagger.deposit_group('content', content_dir)
bagger.deposit_group('metadata', metadata_dir)
bagger.create_tagfiles
Preserve::BagVerifier.verify(directory: bag_dir)

export(version_inventory, content_dir, metadata_dir)
end
# NOTE: the following methods should probably all be private

# @param [String] druid The object identifier
private

attr_reader :cocina_object

# @return [Moab::SignatureCatalog] the manifest of all files previously ingested,
# or if there is none, a SignatureCatalog object for version 0.
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def self.signature_catalog_from_preservation(druid)
Preservation::Client.objects.signature_catalog(druid)
rescue Preservation::Client::NotFoundError
Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0)
def signature_catalog
@signature_catalog ||= begin
Preservation::Client.objects.signature_catalog(druid)
rescue Preservation::Client::NotFoundError
Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0)
end
end

# @param [Pathname] metadata_dir the location of the metadata directory in the workspace
# @param [Integer] expected the version identifer expected to be used in the versionMetadata
def self.verify_version_metadata(metadata_dir, expected)
def verify_version_metadata(metadata_dir, expected)
vmfile = metadata_dir.join('versionMetadata.xml')
verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
true
Expand All @@ -63,20 +75,45 @@ def self.verify_version_metadata(metadata_dir, expected)
# @param [Pathname] pathname The location of the file containing a version number
# @param [Integer] expected The version number that should be in the file
# @param [Integer] found The version number that is actually in the file
def self.verify_version_id(pathname, expected, found)
def verify_version_id(pathname, expected, found)
raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found

true
end

# @param [Pathname] pathname the location of the versionMetadata file
# @return [Integer] the versionId found in the last version element, or nil if missing
def self.vmfile_version_id(pathname)
def vmfile_version_id(pathname)
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?

doc = Nokogiri::XML(File.read(pathname.to_s))
nodeset = doc.xpath('/versionMetadata/version')
version_id = nodeset.last['versionId']
version_id&.to_i
end

# Export the bag (using symlinks for content files)
def export(version_inventory, content_dir, metadata_dir)
bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', ''))
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
bagger.reset_bag
bagger.create_bag_inventory(:depositor)
bagger.deposit_group('content', content_dir)
bagger.deposit_group('metadata', metadata_dir)
bagger.create_tagfiles
# Checks for required files. (Does not check fixities.)
Preserve::BagVerifier.verify(directory: bag_dir)
end

def druid
cocina_object.externalIdentifier
end

def workspace
@workspace ||= DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root)
end

def new_version_id
@new_version_id ||= signature_catalog.version_id + 1
end
end
32 changes: 19 additions & 13 deletions spec/services/preservation_ingest_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
File.read(fixtures.join('sdr_repo/dd116zh0343/v0001/manifests/signatureCatalog.xml'))
)
end
let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) }
let(:druid) { 'druid:dd116zh0343' }

before do
allow(Settings.sdr).to receive_messages(local_workspace_root: fixtures.join('workspace').to_s,
Expand All @@ -36,8 +38,6 @@
end

describe '.transfer' do
let(:druid) { 'druid:dd116zh0343' }
let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) }
let(:metadata_dir) { fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata') }

before do
Expand Down Expand Up @@ -109,16 +109,16 @@
end
end

describe '.signature_catalog_from_preservation' do
let(:druid) { 'druid:dd116zh0343' }
describe '.signature_catalog' do
let(:service) { described_class.new(cocina_object) }

context 'when signature_catalog exists in preservation' do
before do
allow(Preservation::Client.objects).to receive(:signature_catalog).and_return(fixture_sig_cat_obj)
end

it 'retrieves it as a Moab::SignatureCatalog object' do
sig_cat = described_class.signature_catalog_from_preservation(druid)
sig_cat = service.send(:signature_catalog)
expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog)
expect(sig_cat.digital_object_id).to eq druid
expect(sig_cat.version_id).to eq 1
Expand All @@ -132,7 +132,7 @@
end

it 'returns a Moab::SignatureCatalog object for version 0' do
sig_cat = described_class.signature_catalog_from_preservation(druid)
sig_cat = service.send(:signature_catalog)
expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog)
expect(sig_cat.digital_object_id).to eq druid
expect(sig_cat.version_id).to eq 0
Expand All @@ -141,14 +141,20 @@
end
end

specify '.verify_version_id' do
expect(described_class.verify_version_id('/mypath/myfile', 2, 2)).to be_truthy
expect { described_class.verify_version_id('/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2')
describe '.verify_version_id' do
let(:service) { described_class.new(cocina_object) }

it 'verifies the version' do
expect(service.send(:verify_version_id, '/mypath/myfile', 2, 2)).to be_truthy
expect { service.send(:verify_version_id, '/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2')
end
end

specify '.vmfile_version_id' do
metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata')
vmfile = metadata_dir.join('versionMetadata.xml')
expect(described_class.vmfile_version_id(vmfile)).to eq 2
describe '.vmfile_version_id' do
it 'returns the version' do
metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata')
vmfile = metadata_dir.join('versionMetadata.xml')
expect(described_class.new(cocina_object).send(:vmfile_version_id, vmfile)).to eq 2
end
end
end