Skip to content

Commit

Permalink
Merge pull request #1880 from mysociety/excel-analyzer-pii-badger-job
Browse files Browse the repository at this point in the history
[ExcelAnalyzer] Add PII badger job
  • Loading branch information
gbp authored May 30, 2024
2 parents c3c850d + abe7b77 commit 9ee9f97
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 33 deletions.
41 changes: 12 additions & 29 deletions lib/excel_analyzer.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
ExcelAnalyzer.on_hidden_metadata = ->(attachment_blob, metadata) do
Rails.application.config.before_initialize do
loader = Rails.autoloaders.main

Dir[File.join(File.dirname(__FILE__), 'excel_analyzer', '**/')].each do
loader.push_dir(_1, namespace: ExcelAnalyzer)
end

loader.inflector.inflect("pii_badger_job" => "PIIBadgerJob")
end

ExcelAnalyzer.on_hidden_metadata = ->(attachment_blob, _) do
foi_attachment = FoiAttachment.joins(:file_blob).
find_by(active_storage_blobs: { id: attachment_blob })

Expand All @@ -16,32 +26,5 @@
}
)

ExcelAnalyzerNotifier.report(foi_attachment, metadata).deliver_now
end

Rails.configuration.to_prepare do
class ExcelAnalyzerNotifier < ApplicationMailer
include Rails.application.routes.url_helpers
default_url_options[:host] = AlaveteliConfiguration.domain

def report(foi_attachment, metadata)
@foi_attachment = foi_attachment
@incoming_message = foi_attachment.incoming_message
@metadata = metadata

from = email_address_with_name(
blackhole_email, 'WhatDoTheyKnow.com Excel Analyzer report'
)

headers['X-WDTK-Contact'] = 'wdtk-excel-analyzer-report'
headers['X-WDTK-CaseRef'] = @foi_attachment.id

mail(
from: from,
to: ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'],
subject: _('ExcelAnalyzer: hidden data detected [{{reference}}]',
reference: @foi_attachment.id)
)
end
end
ExcelAnalyzer::PIIBadgerJob.perform_later(attachment_blob)
end
28 changes: 28 additions & 0 deletions lib/excel_analyzer/jobs/pii_badger_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
##
# Job to run additional Personally Identifiable Information (PII) checks on
# files stored as ActiveStorage::Blob
#
# Examples:
# ExcelAnalyzer::PIIBadgerJob.perform(ActiveStorage::Blob.first)
#
module ExcelAnalyzer
class PIIBadgerJob < ApplicationJob
queue_as :excel_analyzer

def perform(attachment_blob)
attachment_blob.open(tmpdir: ENV['EXCEL_ANALYZER_TMP_DIR']) do |file|
cmd = [
ENV['EXCEL_ANALYZER_PII_BADGER_COMMAND'], '--file', file.path
].join(' ')

pii_badger_metadata = IO.popen(cmd) { JSON.parse(_1.read) }

attachment_blob.update(metadata: attachment_blob.metadata.merge(
pii_badger: pii_badger_metadata
))
end

ExcelAnalyzer::NotifierMailer.report(attachment_blob).deliver_now
end
end
end
27 changes: 27 additions & 0 deletions lib/excel_analyzer/mailers/notifier_mailer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module ExcelAnalyzer
class NotifierMailer < ApplicationMailer
include Rails.application.routes.url_helpers
default_url_options[:host] = AlaveteliConfiguration.domain

def report(attachment_blob)
@foi_attachment = FoiAttachment.joins(:file_blob).
find_by(active_storage_blobs: { id: attachment_blob })
@incoming_message = @foi_attachment.incoming_message
@metadata = attachment_blob.metadata

from = email_address_with_name(
blackhole_email, 'WhatDoTheyKnow.com Excel Analyzer report'
)

headers['X-WDTK-Contact'] = 'wdtk-excel-analyzer-report'
headers['X-WDTK-CaseRef'] = @foi_attachment.id

mail(
from: from,
to: ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'],
subject: _('ExcelAnalyzer: hidden data detected [{{reference}}]',
reference: @foi_attachment.id)
)
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ spreadsheet due to the detection of potentially suspect hidden data.
Admin request URL: <%= admin_request_url(@incoming_message.info_request_id) %>
Admin attachment URL: <%= edit_admin_foi_attachment_url(@foi_attachment) %>

The following was detected:
<% @metadata.each do |key, value| %>
<%= key %>: <%= value %>
<% end %>
Excel Analyzer metadata: <%= JSON.pretty_generate(@metadata[:excel]) %>

PII Badger metadata: <%= JSON.pretty_generate(@metadata[:pii_badger]) %>

Please review the file carefully.
51 changes: 51 additions & 0 deletions spec/excel_analyzer/hook_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
require_relative '../spec_helper'

RSpec.describe 'ExcelAnalyzer on_hidden_metadata hook' do
let(:message) { FactoryBot.create(:incoming_message, sent_at: Time.now) }
let(:attachment) { message.foi_attachments.first }
let(:blob) { attachment.file_blob }

around do |example|
to = ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL']
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = 'excel@localhost'
example.call
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = to
end

it 'hides the attachment with prominence reason' do
expect(attachment.prominence).to eq('normal')
expect(attachment.prominence_reason).to be_nil

ExcelAnalyzer.on_hidden_metadata.call(blob, blob.metadata)
attachment.reload

expect(attachment.prominence).to eq('hidden')
expect(attachment.prominence_reason).to eq(<<~TXT.squish)
We've found a problem with this file, so it's been hidden while we review
it. We might not be able to give more details until then.
TXT
end

it 'queues PII Badger job' do
expect(ExcelAnalyzer::PIIBadgerJob).to receive(:perform_later).with(blob)
ExcelAnalyzer.on_hidden_metadata.call(blob, blob.metadata)
end

context 'when message was sent more than 1 day ago' do
let(:message) do
FactoryBot.create(:incoming_message, sent_at: 24.hours.ago - 1.minute)
end

it 'does not hide the attachment' do
expect { ExcelAnalyzer.on_hidden_metadata.call(blob, blob.metadata) }.
to_not change(attachment, :prominence)
end

it 'sents no email' do
deliveries = ActionMailer::Base.deliveries
expect(deliveries.size).to eq(0)
ExcelAnalyzer.on_hidden_metadata.call(blob, blob.metadata)
expect(deliveries.size).to eq(0)
end
end
end
51 changes: 51 additions & 0 deletions spec/excel_analyzer/jobs/pii_badger_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
require_relative '../../spec_helper'

RSpec.describe ExcelAnalyzer::PIIBadgerJob, type: :job do
let(:message) { FactoryBot.create(:incoming_message) }
let(:attachment) { message.foi_attachments.first }
let(:blob) { attachment.file_blob }

let(:excel_metadata) { { foo: 'baz' } }
let(:pii_metadata) { { bar: 'baz' } }

around do |example|
to = ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL']
cmd = ENV['EXCEL_ANALYZER_PII_BADGER_COMMAND']
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = 'excel@localhost'
ENV['EXCEL_ANALYZER_PII_BADGER_COMMAND'] = '/usr/bin/pii_badger.sh'
example.call
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = to
ENV['EXCEL_ANALYZER_PII_BADGER_COMMAND'] = cmd
end

before do
blob.update(metadata: blob.metadata.merge(excel: excel_metadata))
allow(IO).to receive(:popen).and_return(pii_metadata)
end

def perform
described_class.new.perform(blob)
end

it 'calls external command' do
expect(IO).to receive(:popen).with(%r(^/usr/bin/pii_badger.sh --file /.*$))
perform
end

it 'updates the blob metadata' do
expect { perform }.to change(blob, :metadata)
expect(blob.metadata).to include(pii_badger: pii_metadata)
end

it 'sents report email' do
deliveries = ActionMailer::Base.deliveries
expect(deliveries.size).to eq(0)

expect(ExcelAnalyzer::NotifierMailer).to receive(:report).with(blob).
and_call_original
perform
expect(deliveries.size).to eq(1)

deliveries.clear
end
end
55 changes: 55 additions & 0 deletions spec/excel_analyzer/mailers/notifier_mailer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
require_relative '../../spec_helper'

RSpec.describe ExcelAnalyzer::NotifierMailer do
let(:message) { FactoryBot.create(:incoming_message) }
let(:attachment) { message.foi_attachments.first }
let(:blob) { attachment.file_blob }

let(:mail) do
described_class.report(blob).deliver_now
ActionMailer::Base.deliveries[-1]
end

around do |example|
to = ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL']
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = 'excel@localhost'
example.call
ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'] = to
end

before do
allow(blob).to receive(:metadata).and_return(excel: {})
end

after { ActionMailer::Base.deliveries.clear }

it 'has custom mail headers' do
expect(mail['X-WDTK-Contact'].value).to eq('wdtk-excel-analyzer-report')
expect(mail['X-WDTK-CaseRef'].value).to eq(attachment.to_param)
end

it 'sents mail from and to the correct addresses' do
expect(mail.from).to include(blackhole_email)
expect(mail.to).to include(ENV['EXCEL_ANALYZER_NOTIFICATION_EMAIL'])
end

it 'has attachment ID in the subject' do
expect(mail.subject).
to eq("ExcelAnalyzer: hidden data detected [#{attachment.id}]")
end

it 'includes the admin URL in the body' do
expect(mail.body).
to include("http://test.host/admin/requests/#{message.info_request_id}")
expect(mail.body).
to include("http://test.host/admin/attachments/#{attachment.id}/edit")
end

it 'includes the metadata in the body' do
allow(blob).to receive(:metadata).and_return(
excel: { foo: 'bar' }, pii_badger: { baz: 'qux' }
)
expect(mail.body).to include('"foo": "bar"')
expect(mail.body).to include('"baz": "qux"')
end
end

0 comments on commit 9ee9f97

Please sign in to comment.