From 2a150d819ef46356ececc382bbeebe75dca84c34 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Mon, 1 Jul 2024 17:41:27 -0500 Subject: [PATCH] Attempt to read paths from content addressed storage if available --- app/models/cocina.rb | 7 + app/models/stacks_file.rb | 4 +- app/models/storage_root.rb | 35 ++-- config/settings.yml | 1 + spec/controllers/file_controller_spec.rb | 21 +-- .../legacy_image_service_controller_spec.rb | 1 + spec/factories/cocina.rb | 34 ++++ spec/models/stacks_file_spec.rb | 23 ++- spec/rails_helper.rb | 2 + spec/requests/file_auth_request_spec.rb | 44 +----- spec/requests/file_spec.rb | 42 +---- .../iiif/auth/v2/probe_service_spec.rb | 145 ++--------------- spec/requests/iiif_auth_request_spec.rb | 67 +------- spec/requests/iiif_spec.rb | 149 +++--------------- spec/requests/metrics_spec.rb | 25 +-- .../remote_iiif_image_delivery_spec.rb | 21 +-- 16 files changed, 124 insertions(+), 497 deletions(-) create mode 100644 spec/factories/cocina.rb diff --git a/app/models/cocina.rb b/app/models/cocina.rb index 6492748c..9fc16407 100644 --- a/app/models/cocina.rb +++ b/app/models/cocina.rb @@ -44,6 +44,13 @@ def find_file(file_name) .find { |file| file['filename'] == file_name } || raise(ActionController::MissingFile, "File not found '#{file_name}'") end + def find_file_md5(file_name) + file_node = find_file(file_name) + file_node.fetch('hasMessageDigests') + .find { |digest_node| digest_node.fetch('type') == 'md5' } + .fetch('digest') + end + def thumbnail_file data.dig('structural', 'contains') .lazy.flat_map { |file_set| file_set.dig('structural', 'contains') } diff --git a/app/models/stacks_file.rb b/app/models/stacks_file.rb index 76b2897f..4be6c7a1 100644 --- a/app/models/stacks_file.rb +++ b/app/models/stacks_file.rb @@ -9,6 +9,7 @@ class StacksFile def initialize(file_name:, cocina:) @file_name = file_name @cocina = cocina + validate! end attr_reader :file_name, :cocina @@ -17,7 +18,7 @@ def id cocina.druid end - validates :id, format: { with: StorageRoot::DRUID_PARTS_PATTERN } + validates :file_name, presence: true # Some files exist but have unreadable permissions, treat these as non-existent def readable? @@ -40,6 +41,7 @@ def path @path ||= storage_root.absolute_path end + # Used as the IIIF identifier for retrieving this file from the image server def treeified_path storage_root.relative_path end diff --git a/app/models/storage_root.rb b/app/models/storage_root.rb index 6e4d6052..3cd2e8ec 100644 --- a/app/models/storage_root.rb +++ b/app/models/storage_root.rb @@ -13,21 +13,9 @@ def initialize(file_name:, cocina:) delegate :druid, to: :cocina - def druid_parts - @druid_parts ||= druid.match(DRUID_PARTS_PATTERN) - end - - def absolute_path - return unless relative_path + delegate :absolute_path, to: :path_finder - path_finder.absolute_path.to_s - end - - def relative_path - return unless druid_parts && file_name - - path_finder.relative_path.to_s - end + delegate :relative_path, to: :path_finder def treeified_id File.join(druid_parts[1..4]) @@ -38,26 +26,41 @@ def treeified_id attr_reader :cocina, :file_name def path_finder - @path_finder ||= path_finder_class.new(treeified_id:, druid:, file_name:) + @path_finder ||= path_finder_class.new(treeified_id:, file_name:, cocina:) end def path_finder_class LegacyPathFinder end + def druid_parts + @druid_parts ||= druid.match(DRUID_PARTS_PATTERN) + end + # Calculate file paths in the legacy Stacks structure class LegacyPathFinder - def initialize(treeified_id:, file_name:, druid:) # rubocop:disable Lint/UnusedMethodArgument + def initialize(treeified_id:, file_name:, cocina:) @treeified_id = treeified_id @file_name = file_name + @cocina = cocina end + # As this is used for external service URLs (Canteloupe image server), we don't want to put content addressable path here.' def relative_path File.join(@treeified_id, @file_name) end def absolute_path + return content_addressable_path if File.exist?(content_addressable_path) + File.join(Settings.stacks.storage_root, relative_path) end + + def content_addressable_path + @content_addressable_path ||= begin + md5 = @cocina.find_file_md5(@file_name) + File.join(Settings.stacks.content_addressable_storage_root, @treeified_id, @cocina.druid, 'content', md5) + end + end end end diff --git a/config/settings.yml b/config/settings.yml index 8f6b591f..a669172a 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -4,6 +4,7 @@ features: stacks: storage_root: /stacks + content_addressable_storage_root: /stacks/content_addressable imageserver: base_uri: "http://imageserver-prod.stanford.edu/iiif/2/" diff --git a/spec/controllers/file_controller_spec.rb b/spec/controllers/file_controller_spec.rb index 8dd1fa51..8e5aec34 100644 --- a/spec/controllers/file_controller_spec.rb +++ b/spec/controllers/file_controller_spec.rb @@ -8,26 +8,7 @@ end let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file end describe '#show' do diff --git a/spec/controllers/legacy_image_service_controller_spec.rb b/spec/controllers/legacy_image_service_controller_spec.rb index 9ceafe47..22f46cac 100644 --- a/spec/controllers/legacy_image_service_controller_spec.rb +++ b/spec/controllers/legacy_image_service_controller_spec.rb @@ -9,6 +9,7 @@ let(:public_json) do { + 'externalIdentifier' => 'druid:nr349ct7889', 'structural' => { 'contains' => [ { diff --git a/spec/factories/cocina.rb b/spec/factories/cocina.rb new file mode 100644 index 00000000..6ac6af64 --- /dev/null +++ b/spec/factories/cocina.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Factories + def self.cocina(id: "druid:nr349ct7889") + { "externalIdentifier" => id } + end + + def self.cocina_with_file(id: "druid:nr349ct7889", file_name: 'image.jp2', access: {}, + file_access: { 'view' => 'world', 'download' => 'world' }, + mime_type: 'image/jp2') + cocina(id:).merge( + 'access' => access, + 'structural' => { + 'contains' => [ + { + 'structural' => { + 'contains' => [ + { + 'filename' => file_name, + 'hasMessageDigests' => [ + { 'type' => 'sha1', 'digest' => 'b1a2922356709cc53b85f1b8027982d23b573f80' }, + { 'type' => 'md5', 'digest' => '02f77c96c40ad3c7c843baa9c7b2ff2c' } + ], + 'hasMimeType' => mime_type, + 'access' => file_access + } + ] + } + } + ] + } + ) + end +end diff --git a/spec/models/stacks_file_spec.rb b/spec/models/stacks_file_spec.rb index cca03399..14a6af95 100644 --- a/spec/models/stacks_file_spec.rb +++ b/spec/models/stacks_file_spec.rb @@ -5,10 +5,19 @@ RSpec.describe StacksFile do let(:druid) { 'nr349ct7889' } let(:file_name) { 'image.jp2' } - let(:cocina) { Cocina.new({ 'externalIdentifier' => druid }) } + let(:cocina) { Cocina.new(public_json) } let(:instance) { described_class.new(file_name:, cocina:) } let(:path) { storage_root.absolute_path } let(:storage_root) { StorageRoot.new(cocina:, file_name:) } + let(:public_json) { Factories.cocina_with_file } + + context 'with a missing file name' do + let(:file_name) { nil } + + it 'raises an error' do + expect { instance }.to raise_error ActiveModel::ValidationError + end + end describe '#path' do subject { instance.path } @@ -16,18 +25,6 @@ it 'is the druid tree path to the file' do expect(subject).to eq(path) end - - context 'with a malformed druid' do - let(:druid) { 'abcdef' } - - it { is_expected.to be_nil } - end - - context 'with a missing file name' do - let(:file_name) { nil } - - it { is_expected.to be_nil } - end end describe '#readable?' do diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index d870e9ee..72998b92 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -11,6 +11,8 @@ require 'spec_helper' require 'rspec/rails' require 'capybara/rails' + +require_relative 'factories/cocina' # Add additional requires below this line. Rails is not loaded until this point! # Requires supporting ruby files with custom matchers and macros, etc, in diff --git a/spec/requests/file_auth_request_spec.rb b/spec/requests/file_auth_request_spec.rb index c3ee7412..2b330383 100644 --- a/spec/requests/file_auth_request_spec.rb +++ b/spec/requests/file_auth_request_spec.rb @@ -25,26 +25,7 @@ # NOTE: stanford only + location rights tested under location context context 'stanford only (no location qualifications)' do let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'stanford', - 'download' => 'stanford' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'stanford', 'download' => 'stanford' }) end context 'webauthed user' do @@ -69,27 +50,8 @@ context 'location' do context 'not stanford qualified in any way' do let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'location-based', - 'download' => 'location-based', - 'location' => 'location1' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', + 'location' => 'location1' }) end it 'allows when user in location' do diff --git a/spec/requests/file_spec.rb b/spec/requests/file_spec.rb index 4e1a3a3c..b41de18f 100644 --- a/spec/requests/file_spec.rb +++ b/spec/requests/file_spec.rb @@ -10,26 +10,7 @@ let(:druid) { 'nr349ct7889' } let(:file_name) { 'image.jp2' } let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file end describe 'OPTIONS options' do @@ -44,26 +25,7 @@ describe 'GET file with slashes in filename' do let(:file_name) { 'path/to/image.jp2' } let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_name:) end before do diff --git a/spec/requests/iiif/auth/v2/probe_service_spec.rb b/spec/requests/iiif/auth/v2/probe_service_spec.rb index 87d6682c..79590bd3 100644 --- a/spec/requests/iiif/auth/v2/probe_service_spec.rb +++ b/spec/requests/iiif/auth/v2/probe_service_spec.rb @@ -7,7 +7,7 @@ let(:file_name) { 'image.jp2' } let(:stacks_uri) { "https://stacks-uat.stanford.edu/file/druid:#{id}/#{URI.encode_uri_component(file_name)}" } let(:stacks_uri_param) { URI.encode_uri_component(stacks_uri) } - let(:public_json) { { "externalIdentifier" => "druid:nr349ct7889" } } + let(:public_json) { Factories.cocina } # NOTE: For any unauthorized responses, the status from the service is OK...the access status of the resource is in the response body @@ -73,26 +73,7 @@ context 'when the user has access to the resource because it is world accessible' do let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_name:) end before do @@ -140,26 +121,7 @@ context 'when the user has access to the resource and it is streamable' do let(:file_name) { 'SC0193_1982-013_b06_f01_1981-09-29.mp4' } let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_name:) end let(:stacks_uri) { "https://stacks-uat.stanford.edu/file/#{id}/#{URI.encode_uri_component(file_name)}" } @@ -185,6 +147,10 @@ get "/iiif/auth/v2/probe?id=#{stacks_uri_param}" end + let(:public_json) do + Factories.cocina_with_file(file_name:) + end + it 'returns a 404 response' do expect(response).to have_http_status :ok expect(response.parsed_body).to include({ @@ -197,26 +163,7 @@ context 'when a Stanford only resource' do let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'stanford', - 'download' => 'stanford' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'stanford', 'download' => 'stanford' }, file_name:) end context 'when the user has a bearer token with the ldap group' do @@ -278,27 +225,7 @@ context 'when the user does not have access to a location restricted resource' do let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'location-based', - 'download' => 'location_based', - 'location' => location - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', 'location' => location }) end before do @@ -342,31 +269,8 @@ context 'when the user does not have access to a stanford restricted embargoed resource' do let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'access' => { - 'embargo' => { - "releaseDate" => Time.parse('2099-05-15').getlocal.as_json - } - }, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'stanford', - 'download' => 'stanford' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(access: { 'embargo' => { "releaseDate" => Time.parse('2099-05-15').getlocal.as_json } }, + file_access: { 'view' => 'stanford', 'download' => 'stanford' }) end before do @@ -390,31 +294,8 @@ context 'when the user does not have access to an embargoed resource' do let(:public_json) do - { - "externalIdentifier" => "druid:nr349ct7889", - 'access' => { - 'embargo' => { - "releaseDate" => Time.parse('2099-05-15').getlocal.as_json - } - }, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'none', - 'download' => 'none' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(access: { 'embargo' => { "releaseDate" => Time.parse('2099-05-15').getlocal.as_json } }, + file_access: { 'view' => 'none', 'download' => 'none' }) end before do diff --git a/spec/requests/iiif_auth_request_spec.rb b/spec/requests/iiif_auth_request_spec.rb index cd01694d..221fc6ae 100644 --- a/spec/requests/iiif_auth_request_spec.rb +++ b/spec/requests/iiif_auth_request_spec.rb @@ -35,26 +35,7 @@ context 'with a public item' do let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file end context 'with an unauthenticated user' do @@ -70,26 +51,7 @@ context 'with a stanford only item' do let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'stanford', - 'download' => 'stanford' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'stanford', 'download' => 'stanford' }) end context 'with a authorized webauthed user' do @@ -121,29 +83,10 @@ end end - context 'with a location-restricted item' do + context 'with a location-restricted item that is not a thumbnail' do let(:public_json) do - { - 'externalIdentifier' => druid, - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'location-based', - 'download' => 'location-based', - 'location' => 'location1' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', 'location' => 'location1' }, + mime_type: 'image/jpeg') end context 'with a user in the location' do diff --git a/spec/requests/iiif_spec.rb b/spec/requests/iiif_spec.rb index 971803e0..307170e1 100644 --- a/spec/requests/iiif_spec.rb +++ b/spec/requests/iiif_spec.rb @@ -15,26 +15,7 @@ end let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file end before do @@ -70,60 +51,22 @@ end context 'for location-restricted documents' do - let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'location-based', - 'download' => 'location_based', - 'location' => 'location1' - } - } - ] - } - } - ] - } - } - end - context 'outside of the location' do - it 'uses the unauthorized status code for the response' do - get '/image/iiif/nr349ct7889%2Fimage/info.json' - expect(response).to have_http_status :unauthorized + context 'when the file is not a thumbnail' do + let(:public_json) do + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', 'location' => 'location1' }, + mime_type: 'image/jpeg') + end + + it 'uses the unauthorized status code for the response' do + get '/image/iiif/nr349ct7889%2Fimage/info.json' + expect(response).to have_http_status :unauthorized + end end - context 'for a thumbnail' do + context 'when the files is a thumbnail' do let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'location-based', - 'download' => 'location_based', - 'location' => 'location1' - }, - 'hasMimeType' => 'image/jp2' - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', 'location' => 'location1' }) end it 'redirects requests to the degraded info.json' do @@ -137,6 +80,9 @@ context 'at the location' do let(:user) { User.new(ip_address: 'ip.address1') } + let(:public_json) do + Factories.cocina_with_file(file_access: { 'view' => 'location-based', 'download' => 'location-based', 'location' => 'location1' }) + end before do allow_any_instance_of(IiifController).to receive(:current_user).and_return(user) end @@ -149,26 +95,7 @@ context 'for stanford-restricted documents' do let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'stanford', - 'download' => 'stanford' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'stanford', 'download' => 'stanford' }) end it 'redirects requests to the degraded info.json' do @@ -190,26 +117,7 @@ context 'where no one can download' do let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'world', - 'download' => 'none' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'world', 'download' => 'none' }) end it 'serves up regular info.json (no degraded)' do @@ -227,26 +135,7 @@ context 'where stanford only no download rights' do let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'stanford', - 'download' => 'none' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file(file_access: { 'view' => 'stanford', 'download' => 'none' }) end it 'redirects to degraded version' do diff --git a/spec/requests/metrics_spec.rb b/spec/requests/metrics_spec.rb index e96539fa..008ba0df 100644 --- a/spec/requests/metrics_spec.rb +++ b/spec/requests/metrics_spec.rb @@ -6,27 +6,8 @@ include ActiveJob::TestHelper let(:druid) { 'nr349ct7889' } let(:file_name) { 'image.jp2' } - let(:json) do - { - 'externalIdentifier' => "druid:#{druid}", - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => file_name, - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - }.to_json + let(:public_json) do + Factories.cocina_with_file end let(:ability) { instance_double(CocinaAbility, can?: true, authorize!: true) } @@ -35,7 +16,7 @@ allow(Settings).to receive(:metrics_api_url).and_return('https://example.com') allow(CocinaAbility).to receive(:new).and_return(ability) stub_request(:post, 'https://example.com/ahoy/events') - stub_request(:get, "https://purl.stanford.edu/#{druid}.json").to_return(status: 200, body: json) + stub_request(:get, "https://purl.stanford.edu/#{druid}.json").to_return(status: 200, body: public_json.to_json) end context 'with an object' do diff --git a/spec/requests/remote_iiif_image_delivery_spec.rb b/spec/requests/remote_iiif_image_delivery_spec.rb index fff015ee..cf54687e 100644 --- a/spec/requests/remote_iiif_image_delivery_spec.rb +++ b/spec/requests/remote_iiif_image_delivery_spec.rb @@ -13,26 +13,7 @@ "http://imageserver-prod.stanford.edu/iiif/2/#{image_server_path('nr349ct7889', 'image.jp2')}/full/max/0/default.jpg" end let(:public_json) do - { - 'externalIdentifier' => 'druid:nr349ct7889', - 'structural' => { - 'contains' => [ - { - 'structural' => { - 'contains' => [ - { - 'filename' => 'image.jp2', - 'access' => { - 'view' => 'world', - 'download' => 'world' - } - } - ] - } - } - ] - } - } + Factories.cocina_with_file end before do