diff --git a/Gemfile b/Gemfile index 761e67e5..5b1e2399 100644 --- a/Gemfile +++ b/Gemfile @@ -84,4 +84,5 @@ gem 'connection_pool' group :production do gem 'newrelic_rpm' -end \ No newline at end of file +end +gem "device_detector", "~> 1.1" diff --git a/Gemfile.lock b/Gemfile.lock index 6084c3b3..4a5b303b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -123,6 +123,7 @@ GEM irb (>= 1.5.0) reline (>= 0.3.1) deep_merge (1.2.2) + device_detector (1.1.1) diff-lcs (1.5.0) dlss-capistrano (4.4.0) capistrano (~> 3.0) @@ -421,6 +422,7 @@ DEPENDENCIES connection_pool dalli debug + device_detector (~> 1.1) dlss-capistrano dor-rights-auth (~> 1.8) faraday diff --git a/app/controllers/concerns/metrics_concern.rb b/app/controllers/concerns/metrics_concern.rb new file mode 100644 index 00000000..20eaf4d0 --- /dev/null +++ b/app/controllers/concerns/metrics_concern.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +# Methods for logging usage metrics based on requests for files +module MetricsConcern + def track_download(druid, file: nil) + return unless enabled? + + ensure_visit + properties = { druid:, file: }.compact + metrics_service.track_event(event_data('download', properties)) + end + + private + + # We're responsible for ensuring that every event is tied to a visit + def ensure_visit + return if existing_visit? + + set_visit_token unless visit_token + set_visitor_token unless visitor_token + + metrics_service.track_visit(visit_data) + end + + # Schema: https://github.com/ankane/ahoy#visits-1 + def visit_data + { + visit_token:, + visitor_token:, + js: false + }.merge(visit_properties) + end + + # Schema: https://github.com/ankane/ahoy#events-1 + def event_data(name, properties = {}) + { + visit_token:, + visitor_token:, + events: [ + { + id: generate_id, + time: Time.current, + name:, + properties: + } + ] + } + end + + def existing_visit? + visit_token && visitor_token + end + + def visit_token + cookies[:ahoy_visit] + end + + def visitor_token + cookies[:ahoy_visitor] + end + + # Sessions last for 1 hour (default used by Zenodo) + def set_visit_token + cookies[:ahoy_visit] = { + value: generate_id, + expires: 1.hour.from_now, + domain: 'stanford.edu' + } + end + + # Visitors are remembered for 2 years (Ahoy's default) + def set_visitor_token + cookies[:ahoy_visitor] = { + value: generate_id, + expires: 2.years.from_now, + domain: 'stanford.edu' + } + end + + # Ahoy uses UUIDs for visit/visitor/event IDs + def generate_id + SecureRandom.uuid + end + + def visit_properties + @visit_properties ||= VisitProperties.new(request).generate + end + + def metrics_service + @metrics_service ||= MetricsService.new + end + + def enabled? + Settings.features.metrics == true + end +end diff --git a/app/controllers/file_controller.rb b/app/controllers/file_controller.rb index f4496faa..16f1d132 100644 --- a/app/controllers/file_controller.rb +++ b/app/controllers/file_controller.rb @@ -3,10 +3,13 @@ ## # API for delivering files from stacks class FileController < ApplicationController + include MetricsConcern + rescue_from ActionController::MissingFile do render plain: 'File not found', status: :not_found end + # rubocop:disable Metrics/AbcSize def show return unless stale?(**cache_headers) @@ -16,8 +19,10 @@ def show response.headers['Content-Length'] = current_file.content_length response.headers.delete('X-Frame-Options') + track_download current_file.id, file: current_file.file_name send_file current_file.path, disposition: end + # rubocop:enable Metrics/AbcSize def options response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS' diff --git a/app/controllers/object_controller.rb b/app/controllers/object_controller.rb index fd999b57..e639e411 100644 --- a/app/controllers/object_controller.rb +++ b/app/controllers/object_controller.rb @@ -4,6 +4,7 @@ # API for delivering whole objects from stacks class ObjectController < ApplicationController include Zipline + include MetricsConcern # Return a zip of all the files if they have access to all the files. # This will force a login if any of the files is not access=world @@ -22,6 +23,7 @@ def show ] end + track_download druid zipline(zip_contents, "#{druid}.zip") end diff --git a/app/models/visit_properties.rb b/app/models/visit_properties.rb new file mode 100644 index 00000000..78797e70 --- /dev/null +++ b/app/models/visit_properties.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +# Properties of a user's session that are useful for tracking SDR metrics +# +# Adapted from: lib/ahoy/visit_properties.rb +# https://github.com/ankane/ahoy/blob/master/lib/ahoy/visit_properties.rb +class VisitProperties + attr_reader :request, :params, :referrer, :landing_page + + def initialize(request) + @request = request + @params = request.params + @referrer = request.referer || '' + @landing_page = request.original_url + end + + def generate + @generate ||= request_properties.merge(tech_properties) + end + + private + + def request_properties + { + ip:, + user_agent:, + referrer:, + referring_domain:, + landing_page: + } + end + + def tech_properties + client = DeviceDetector.new(user_agent) + + # Convert device type to Ahoy's style + device_type = + case client.device_type + when 'smartphone' then 'Mobile' + when 'tv' then 'TV' + else client.device_type&.titleize + end + + { + browser: client.name, + os: client.os_name, + device_type: + } + end + + def referring_domain + return if referrer.blank? + + URI.parse(referrer).host.first(255) + rescue URI::InvalidURIError + nil + end + + # Mask IPs by zeroing last octet (IPv4) or 80 bits (IPv6) + # Based on Google Analytics' IP masking + # https://support.google.com/analytics/answer/2763052 + def ip + addr = IPAddr.new(@request.remote_ip) + addr.ipv4? ? addr.mask(24).to_s : addr.mask(48).to_s + end + + # User agents don't need to be valid UTF-8, but we would like them to be + def user_agent + @request.user_agent.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') + end +end diff --git a/app/services/metrics_service.rb b/app/services/metrics_service.rb new file mode 100644 index 00000000..4ab188c9 --- /dev/null +++ b/app/services/metrics_service.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +# Tracks metrics via the SDR Metrics API +# https://github.com/sul-dlss/sdr-metrics-api +# +# See also Ahoy's API spec: +# https://github.com/ankane/ahoy#api-spec +class MetricsService + attr_reader :base_url + + def initialize(base_url: Settings.metrics_api_url) + @base_url = base_url + end + + def track_visit(data) + post_json('/ahoy/visits', data) + end + + def track_event(data) + post_json('/ahoy/events', data) + end + + private + + def post_json(url, data) + connection.post(url) do |req| + req.headers['Content-Type'] = 'application/json' + req.headers['Ahoy-Visit'] = data[:visit_token] + req.headers['Ahoy-Visitor'] = data[:visitor_token] + req.body = data.to_json + end + rescue Faraday::ConnectionFailed => e + Rails.logger.error("Error sending metrics: #{e}") + nil + end + + def connection + @connection ||= Faraday.new(base_url) + end +end diff --git a/config/settings.yml b/config/settings.yml index aeddf981..398f31e1 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -4,6 +4,8 @@ stacks: imageserver: base_uri: 'http://imageserver-prod.stanford.edu/iiif/2/' +metrics_api_url: 'https://sdr-metrics-api-prod.stanford.edu' + purl: url: 'https://purl.stanford.edu/' @@ -46,6 +48,7 @@ cors: features: streaming_media: true cocina: false + metrics: false token: default_expiry_time: <%= 1.hour %> diff --git a/spec/controllers/file_controller_spec.rb b/spec/controllers/file_controller_spec.rb index 465c3685..289f5c85 100644 --- a/spec/controllers/file_controller_spec.rb +++ b/spec/controllers/file_controller_spec.rb @@ -73,5 +73,22 @@ expect(controller).to receive(:send_file).and_raise ActionController::MissingFile expect(subject.status).to eq 404 end + + context 'when metrics tracking is enabled' do + before do + allow(Settings.features).to receive(:metrics).and_return(true) + stub_request :post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/events' + stub_request :post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/visits' + end + + it 'tracks a download event with the druid and file name' do + get :show, params: { id: 'xf680rd3068', file_name: 'xf680rd3068_1.jp2' } + expect(a_request(:post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/events').with do |req| + expect(req.body).to include '"name":"download"' + expect(req.body).to include '"druid":"xf680rd3068"' + expect(req.body).to include '"file":"xf680rd3068_1.jp2"' + end).to have_been_made + end + end end end diff --git a/spec/controllers/metrics_concern_spec.rb b/spec/controllers/metrics_concern_spec.rb new file mode 100644 index 00000000..9c81c7a1 --- /dev/null +++ b/spec/controllers/metrics_concern_spec.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe MetricsConcern do + controller do + # rubocop:disable RSpec/DescribedClass + include MetricsConcern + # rubocop:enable RSpec/DescribedClass + + def download + track_download params[:druid], file: params[:file] + head :ok + end + end + + let(:metrics_service) { instance_double(MetricsService) } + let(:visit_cookie) { 'abc123' } + let(:visitor_cookie) { 'xyz789' } + + before do + allow(Settings.features).to receive(:metrics).and_return(true) + allow(controller).to receive(:metrics_service).and_return(metrics_service) + routes.draw { get 'download' => 'anonymous#download' } + cookies[:ahoy_visit] = visit_cookie + cookies[:ahoy_visitor] = visitor_cookie + end + + describe '#track_download' do + before do + allow(metrics_service).to receive(:track_visit) + allow(metrics_service).to receive(:track_event) + end + + it 'tracks a download event with the druid' do + get 'download', params: { druid: 'fd063dh3727' } + expect(metrics_service).to have_received(:track_event).with( + visit_token: visit_cookie, + visitor_token: visitor_cookie, + events: [ + { + id: be_kind_of(String), + time: be_kind_of(Time), + name: 'download', + properties: { + druid: 'fd063dh3727' + } + } + ] + ) + end + + context 'when an individual file is passed' do + it 'tracks the event with the druid and filename' do + get 'download', params: { druid: 'fd063dh3727', file: 'file.txt' } + expect(metrics_service).to have_received(:track_event).with( + visit_token: visit_cookie, + visitor_token: visitor_cookie, + events: [ + { + id: be_kind_of(String), + time: be_kind_of(Time), + name: 'download', + properties: { + druid: 'fd063dh3727', + file: 'file.txt' + } + } + ] + ) + end + end + + context 'when a visit is not in progress' do + let(:visit_cookie) { nil } + + it 'creates a new visit' do + get 'download', params: { druid: 'fd063dh3727' } + expect(metrics_service).to have_received(:track_visit) + end + end + end +end diff --git a/spec/controllers/object_controller_spec.rb b/spec/controllers/object_controller_spec.rb index f1d13757..7678840a 100644 --- a/spec/controllers/object_controller_spec.rb +++ b/spec/controllers/object_controller_spec.rb @@ -138,6 +138,22 @@ entries = ZipTricks::FileReader.new.read_zip_structure(io: StringIO.new(response.body)) expect(entries.length).to eq 6 end + + context 'when metrics tracking is enabled' do + before do + allow(Settings.features).to receive(:metrics).and_return(true) + stub_request :post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/events' + stub_request :post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/visits' + end + + it 'tracks a download event with the druid' do + get :show, params: { id: 'fd063dh3727' } + expect(a_request(:post, 'https://sdr-metrics-api-prod.stanford.edu/ahoy/events').with do |req| + expect(req.body).to include '"name":"download"' + expect(req.body).to include '"druid":"fd063dh3727"' + end).to have_been_made + end + end end context "with a stanford access file" do diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index c181c1a7..7444e664 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -60,4 +60,9 @@ # The different available types are documented in the features, such as in # https://relishapp.com/rspec/rspec-rails/docs config.infer_spec_type_from_file_location! + + # Make anonymous controllers declared in tests inherit from ApplicationController + # instead of BaseController. Useful for concern tests. + # https://rspec.info/features/6-0/rspec-rails/controller-specs/anonymous-controller/ + config.infer_base_class_for_anonymous_controllers = false end