Skip to content

Commit

Permalink
cleanup/stop accessioning rake tasks (#4598)
Browse files Browse the repository at this point in the history
* cleanup/stop accessioning rake tasks

* change to logic of preservationIngestWF check; logging changes

* move all rake task logic to CleanupService and add tests
  • Loading branch information
peetucket authored Oct 12, 2023
1 parent 69e32c6 commit 537ae14
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 0 deletions.
80 changes: 80 additions & 0 deletions app/services/cleanup_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,72 @@

# Remove all traces of the object's data files from the workspace and export areas
class CleanupService
# @param [String] druid The identifier for the object for which we will stop accessioning
# @param [String] backup_path The base directory to backup to
# @param [boolean] dryrun if true, will just display output but not perform actions
def self.stop_accessioning(druid, backup_path, dryrun: false)
# This will raise an exception if an invalid format (or no) druid is passed in
druid_obj = DruidTools::Druid.new(druid)

# Returns the druid with prefix even if not passed in with a prefix, e.g. druid:ab123bc4567
druid = druid_obj.druid

# Verify druid exists: this will raise an exception if the druid is not found
object = CocinaObjectStore.find(druid)

$stdout.puts '*** DRY RUN - NO ACTIONS WILL BE PERFORMED' if dryrun
$stdout.puts "...object found is an item: version #{object.version}"

# Verify the current version has not made it to preservation by checking if it is openable:
# if it is, then it must have been sent to preservation and therefore we must stop.
raise "v#{object.version} of the object has already been sent to preservation: cannot proceed" if VersionService.can_open?(druid:, version: object.version)

# If `preservationIngestWF#complete-ingest` exists and is not completed, then a step in this workflow is likely in error
# (ie. preservation got part way and then failed) and we should stop, since extra remediation may be needed
ingest_complete = WorkflowClientFactory.build.workflow_status(druid:, workflow: 'preservationIngestWF', process: 'complete-ingest')
raise "v#{object.version} of the object has preservationIngestWF#complete-ingest not completed: cannot proceed" if ingest_complete.present? && ingest_complete != 'completed'

$stdout.puts "...v#{object.version} of the object has not been sent to preservation"

# backup folders
$stdout.puts '...backing up content folders'
backup_content_by_druid(druid, backup_path) unless dryrun

# delete workspace folders
$stdout.puts '...deleting content folders'
cleanup_by_druid(druid) unless dryrun

# Delete workflows for the current object version
$stdout.puts '...deleting workflows'
delete_accessioning_workflows(druid, object.version) unless dryrun

# Let user know we are done
$stdout.puts "...accessioning stopped complete for #{druid}"
end

# @param [String] druid The identifier for the object whose data is to be removed
def self.cleanup_by_druid(druid)
cleanup_workspace_content(druid, Settings.cleanup.local_workspace_root)
cleanup_workspace_content(druid, Settings.cleanup.local_assembly_root)
cleanup_export(druid)
end

# @param [String] druid The identifier for the object whose data is to be backed up
# @param [String] backup_path The base directory to backup to
def self.backup_content_by_druid(druid, backup_path)
backup_content(druid, Settings.cleanup.local_workspace_root, backup_path)
backup_content(druid, Settings.cleanup.local_assembly_root, backup_path)
backup_content(druid, Settings.cleanup.local_export_home, backup_path)
end

# @param [String] druid The identifier for the object whose accessioning workflows should be deleted
# @param [String] version The object version to delete workflows for
def self.delete_accessioning_workflows(druid, version)
%w[accessionWF assemblyWF versioningWF].each do |workflow|
WorkflowClientFactory.build.delete_workflow(druid:, workflow:, version:)
end
end

# @param [String] druid The identifier for the object whose data is to be removed
# @param [String] base The base directory to delete from
# @return [void] remove the object's data files from the workspace area
Expand All @@ -19,6 +79,26 @@ def self.cleanup_workspace_content(druid, base)
end
private_class_method :cleanup_workspace_content

# Backup specified workspace content folder (e.g. /dor/workspace) if they exist
# Content is backed up to a base druid folder and then subfolders by workspace (allowing for multiple to exist)
# e.g. /dor/workspace/ab/123/bc/4567/ab1234567 --> /dor/staging/stopped/ab123bc4567/workspace/content /metadata
# /dor/assembly/ab/123/bc/4567/ab1234567 ---> /dor/staging/stopped/ab123bc4567/assembly
# @param [String] druid The identifier for the object whose data is to be backed up
# @param [String] base The base directory path to backup from
# @return [String] backup_path The directory to backup to
def self.backup_content(druid, base, backup_path)
content_path = DruidTools::Druid.new(druid, base) # e.g. /dor/workspace/ab/123/bc/4567/ab1234567

return unless File.directory?(content_path.path)

base_backup_path = File.join(backup_path, content_path.id) # e.g. /dor/staging/stopped/ab123bc4567
specific_backup_path = File.join(base_backup_path, File.basename(base)) # e.g. /dor/staging/stopped/ab123bc4567/workspace

FileUtils.mkdir_p(base_backup_path)
FileUtils.cp_r(content_path.path, specific_backup_path)
end
private_class_method :backup_content

# @param [String] druid The identifier for the object whose data is to be removed
# @return [void] remove copy of the data that was exported to preservation core
def self.cleanup_export(druid)
Expand Down
46 changes: 46 additions & 0 deletions lib/tasks/cleanup.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# frozen_string_literal: true

# rubocop: disable Metrics/BlockLength
namespace :cleanup do
# Stop accessioning in progress for the supplied druid (if possible).
# bundle exec rake cleanup:stop_accessioning['druid:ab123bc4567']
# bundle exec rake cleanup:stop_accessioning['druid:ab123bc4567',:dryrun] # shows output but does not actually delete
desc 'Stop Accessioning for single druid'
task :stop_accessioning, [:druid, :dryrun] => :environment do |_task, args|
dryrun = args[:dryrun] || false
druid = args[:druid]

$stdout.puts "This will completely stop accessioning for #{druid}. Are you sure? [y/n]:"
raise 'Aborting' unless $stdin.gets.chomp == 'y'

CleanupService.stop_accessioning(druid, '/dor/stopped', dryrun:)
end

# Stop accessioning in progress for multiple druids supplied in a CSV (one per line, no header)
# bundle exec rake cleanup:bulk_stop_accessioning['tmp/druids.csv']
# bundle exec rake cleanup:bulk_stop_accessioning['tmp/druids.csv',:dryrun] # shows output but does not actually delete
desc 'Stop Accessioning for multiple druids provided in a CSV'
task :bulk_stop_accessioning, [:input_file, :dryrun] => :environment do |_task, args|
input_file = args[:input_file]
raise 'CSV file not found' unless File.exist? input_file

dryrun = args[:dryrun] || false
$stdout.puts '*** DRY RUN - NO ACTIONS WILL BE PERFORMED' if dryrun

rows = CSV.read(input_file)
$stdout.puts "This will completely stop accessioning for #{rows.size} objects. Are you sure? [y/n]:"
raise 'Aborting' unless $stdin.gets.chomp == 'y'

rows.each do |row|
druid = row.first
$stdout.puts druid

begin
CleanupService.stop_accessioning(druid, '/dor/stopped', dryrun:)
rescue StandardError => e
$stdout.puts "Error stopping accessioning for #{druid}: #{e.message} #{e.backtrace.join("\n")}"
end
end
end
end
# rubocop: enable Metrics/BlockLength
143 changes: 143 additions & 0 deletions spec/services/cleanup_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
let(:export_pathname) { Pathname(Settings.cleanup.local_export_home) }
let(:bag_pathname) { export_pathname.join(druid.split(':').last) }
let(:tarfile_pathname) { export_pathname.join("#{bag_pathname}.tar") }
let(:backup_path) { Pathname('tmp/stopped') }
let(:workspace_backup_path) { backup_path.join(File.basename(workitem_pathname), File.basename(workspace_root_pathname)) } # e.g. tmp/stopped/aa123bb4567/workspace

before do
allow(Settings.cleanup).to receive_messages(
Expand All @@ -38,6 +40,7 @@
item_root_branch.rmtree if item_root_branch.exist?
bag_pathname.rmtree if bag_pathname.exist?
tarfile_pathname.rmtree if tarfile_pathname.exist?
workspace_backup_path.rmtree if workspace_backup_path.exist?
end

it 'can find the fixtures workspace and export folders' do
Expand All @@ -56,6 +59,136 @@
end
end

describe '.stop_accessioning' do
let(:dro) { build(:ar_dro) }
let(:druid) { dro.external_identifier }
let(:version) { dro.version }
let(:client) { instance_double(Dor::Workflow::Client) }

before do
allow(CocinaObjectStore).to receive(:find).and_return(dro)
allow(described_class).to receive(:backup_content_by_druid)
allow(described_class).to receive(:cleanup_by_druid)
allow(described_class).to receive(:delete_accessioning_workflows)
end

context 'when object cannot be opened and preservationIngestWF exists and is completed' do
before do
allow(VersionService).to receive_messages(can_open?: false)
allow(WorkflowClientFactory).to receive(:build).and_return(client)
allow(client).to receive(:workflow_status).with(druid:, workflow: 'preservationIngestWF', process: 'complete-ingest').and_return('completed')
end

it 'backups, cleans up content, and delete workflows' do
described_class.stop_accessioning(druid, backup_path)
expect(described_class).to have_received(:backup_content_by_druid).once.with(druid, backup_path)
expect(described_class).to have_received(:cleanup_by_druid).once.with(druid)
expect(described_class).to have_received(:delete_accessioning_workflows).once.with(druid, version)
end

context 'when dryrun' do
it 'does nothing' do
described_class.stop_accessioning(druid, backup_path, dryrun: true)
expect(described_class).not_to have_received(:backup_content_by_druid)
expect(described_class).not_to have_received(:cleanup_by_druid)
expect(described_class).not_to have_received(:delete_accessioning_workflows)
end
end
end

context 'when object cannot be opened and preservationIngestWF does not exist' do
before do
allow(VersionService).to receive_messages(can_open?: false)
allow(WorkflowClientFactory).to receive(:build).and_return(client)
allow(client).to receive(:workflow_status).with(druid:, workflow: 'preservationIngestWF', process: 'complete-ingest').and_return(nil)
end

it 'backups, cleans up content, and delete workflows' do
described_class.stop_accessioning(druid, backup_path)
expect(described_class).to have_received(:backup_content_by_druid).once.with(druid, backup_path)
expect(described_class).to have_received(:cleanup_by_druid).once.with(druid)
expect(described_class).to have_received(:delete_accessioning_workflows).once.with(druid, version)
end
end

context 'when object can be opened (i.e. already in perservation)' do
before { allow(VersionService).to receive_messages(can_open?: true) }

it 'raises an exception and stops' do
expect { described_class.stop_accessioning(druid, backup_path) }.to raise_error StandardError
expect(described_class).not_to have_received(:backup_content_by_druid)
expect(described_class).not_to have_received(:cleanup_by_druid)
expect(described_class).not_to have_received(:delete_accessioning_workflows)
end
end

context 'when object cannot be opened but preservationIngestWF exists and is not complete (i.e. problem preserving)' do
before do
allow(VersionService).to receive_messages(can_open?: false)
allow(WorkflowClientFactory).to receive(:build).and_return(client)
allow(client).to receive(:workflow_status).with(druid:, workflow: 'preservationIngestWF', process: 'complete-ingest').and_return('waiting')
end

it 'raises an exception and stops' do
expect { described_class.stop_accessioning(druid, backup_path) }.to raise_error StandardError
expect(described_class).not_to have_received(:backup_content_by_druid)
expect(described_class).not_to have_received(:cleanup_by_druid)
expect(described_class).not_to have_received(:delete_accessioning_workflows)
end
end

context 'with bogus druid' do
it 'raises an exception and stops' do
expect { described_class.stop_accessioning('bogus', backup_path) }.to raise_error StandardError
expect(described_class).not_to have_received(:backup_content_by_druid)
expect(described_class).not_to have_received(:cleanup_by_druid)
expect(described_class).not_to have_received(:delete_accessioning_workflows)
end
end

context 'with object not found' do
before do
allow(CocinaObjectStore).to receive(:find).and_raise(CocinaObjectStore::CocinaObjectNotFoundError)
end

it 'raises an exception and stops' do
expect { described_class.stop_accessioning('druid:oo001oo0001', backup_path) }.to raise_error StandardError
expect(described_class).not_to have_received(:backup_content_by_druid)
expect(described_class).not_to have_received(:cleanup_by_druid)
expect(described_class).not_to have_received(:delete_accessioning_workflows)
end
end
end

describe '.backup_content_by_druid' do
before do
allow(described_class).to receive(:backup_content)
end

it 'calls backup_content for each workspace area' do
described_class.backup_content_by_druid(druid, backup_path)
expect(described_class).to have_received(:backup_content).once.with(druid, Settings.cleanup.local_workspace_root, backup_path)
expect(described_class).to have_received(:backup_content).once.with(druid, Settings.cleanup.local_assembly_root, backup_path)
expect(described_class).to have_received(:backup_content).once.with(druid, Settings.cleanup.local_export_home, backup_path)
end
end

describe '.delete_accessioning_workflows' do
let(:client) { instance_double(Dor::Workflow::Client, delete_workflow: nil) }
let(:version) { 1 }

before do
allow(WorkflowClientFactory).to receive(:build).and_return(client)
end

it 'calls workflow client to delete each accessioning workflow' do
described_class.delete_accessioning_workflows(druid, version)
expect(client).to have_received(:delete_workflow).once.with(druid:, workflow: 'accessionWF', version:)
expect(client).to have_received(:delete_workflow).once.with(druid:, workflow: 'assemblyWF', version:)
expect(client).to have_received(:delete_workflow).once.with(druid:, workflow: 'versioningWF', version:)
end
end

describe '.cleanup_export' do
before do
allow(FileUtils).to receive(:rm_rf)
Expand All @@ -69,6 +202,16 @@
end
end

describe '.backup_content' do
it 'backs up and then removes content from workspace area' do
expect(workspace_backup_path.join('content')).not_to exist # backup content is not there yet
expect(workitem_pathname.join('content')).to exist
described_class.send(:backup_content, druid, workspace_root_pathname, backup_path)
expect(workitem_pathname.join('content')).to exist # main content is still there!
expect(workspace_backup_path.join('content')).to exist # backup content is now there
end
end

describe '.cleanup_by_druid' do
it 'can do a complete cleanup' do
expect(workitem_pathname.join('content')).to exist
Expand Down

0 comments on commit 537ae14

Please sign in to comment.