Skip to content

Commit

Permalink
Merge pull request #107 from MITLibraries/tco-33
Browse files Browse the repository at this point in the history
Implement Detections model
  • Loading branch information
matt-bernhardt authored Sep 20, 2024
2 parents d5256c1 + ba32dbe commit 9fca519
Show file tree
Hide file tree
Showing 18 changed files with 386 additions and 11 deletions.
1 change: 1 addition & 0 deletions .env.test
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
DETECTOR_VERSION=1
LINKRESOLVER_BASEURL=https://mit.primo.exlibrisgroup.com/discovery/openurl?institution=01MIT_INST&rfr_id=info:sid/mit.tacos.api&vid=01MIT_INST:MIT
UNPAYWALL_EMAIL=timdex@mit.edu
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ If you need to regenerate these cassettes, the following procedure should be suf

### Required

`DETECTOR_VERSION`: a string that gets incremented as the application's detectors develop. When any detector's behavior
changes, this is the signal which indicates that terms need to be re-evaluated.

`LINKRESOLVER_BASEURL`: base url for our link resolver. `https://mit.primo.exlibrisgroup.com/discovery/openurl?institution=01MIT_INST&rfr_id=info:sid/mit.tacos.api&vid=01MIT_INST:MIT` is probably the best value unless you are doing something interesting.

`ORIGINS`: comma-separated list of domains allowed to connect to (and thus query or contribute to) the application. Be sure to specify the port number if a connecting application is not using the standard ports (this applies mostly to local development). If not defined, no external connections will be permitted.
Expand Down
3 changes: 3 additions & 0 deletions app.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
"postdeploy": "bundle exec rails db:seed"
},
"env": {
"DETECTOR_VERSION": {
"required": false
},
"LINKRESOLVER_BASEURL": {
"required": false
},
Expand Down
42 changes: 42 additions & 0 deletions app/models/detection.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# frozen_string_literal: true

# A Detection is a joining record between a Term and a Detector, created when a Detector activates based on some aspect
# of the Term. This is the signal that TACOS found something about this Term.
#
# There is a uniqueness constraint on the combination of term_id, detector_id, and detector_version.
#
# New records can be created by passing a Term and a Detector object. The model will look up the current detector
# version, and include that in the record.
#
# == Schema Information
#
# Table name: detections
#
# id :integer not null, primary key
# term_id :integer not null
# detector_id :integer not null
# detector_version :string
# created_at :datetime not null
# updated_at :datetime not null
#
class Detection < ApplicationRecord
belongs_to :term
belongs_to :detector

# We use the before_create hook to prevent needing to override the initialize method, which Rails frowns upon.
before_create :set_defaults

# These scopes allow for easy filtering of Detection records by a single parameter.
scope :current, -> { where(detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')) }
scope :for_detector, ->(detector) { where(detector_id: detector.id) }
scope :for_term, ->(term) { where(term_id: term.id) }

private

# This looks up the current Detector Version from the environment, storing the value as part of the record which is
# about to be saved. This prevents the rest of the application from having to worry about this value, while also
# providing a mechanism to prevent duplicate records from being created.
def set_defaults
self.detector_version = ENV.fetch('DETECTOR_VERSION', 'unset')
end
end
11 changes: 11 additions & 0 deletions app/models/detector.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# frozen_string_literal: true

# Detectors are a generic representation of specific checks performed by the
# application. Examples include the Detector::Journal or
# Detector::StandardIdentifier checks.
#
# @note A Detector record must be created for each check in the application, and
# joined to the relevant Category record as part of the application's
# knowledge graph.
#
# @note Detectors are joined to Term records via the Detections class.
#
# == Schema Information
#
# Table name: detectors
Expand All @@ -12,4 +22,5 @@
class Detector < ApplicationRecord
has_many :detector_categories, dependent: :destroy
has_many :categories, through: :detector_categories
has_many :detections, dependent: :destroy
end
19 changes: 19 additions & 0 deletions app/models/detector/journal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ def self.partial_term_match(phrase)
Journal.all.select { |journal| phrase.downcase.include?(journal.name) }
end

# Look up any matching Detector::Journal records, building on the full_term_match method. If a match is found, a
# Detection record is created to indicate this success.
#
# @note This does not care whether multiple matching journals are detected. If _any_ match is found, a Detection
# record is created. The uniqueness constraint on Detection records would make multiple detections irrelevant.
#
# @return nil
def self.record(term)
result = full_term_match(term.phrase)
return unless result.any?

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'Journal').first
)

nil
end

private

# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
Expand Down
21 changes: 21 additions & 0 deletions app/models/detector/standard_identifiers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@ def initialize(term)
strip_invalid_issns
end

# The record method will consult the set of regex-based detectors that are defined in
# Detector::StandardIdentifiers. Any matches will be registered as Detection records.
#
# @note There are multiple checks within the Detector::StandardIdentifier class. Each check is capable of generating
# a separate Detection record (although a single check finding multiple matches would still only result in one
# Detection for that check).
#
# @return nil
def self.record(term)
si = Detector::StandardIdentifiers.new(term.phrase)

si.identifiers.each_key do |k|
Detection.find_or_create_by(
term:,
detector: Detector.where(name: k.to_s.upcase).first
)
end

nil
end

private

def term_pattern_checker(term)
Expand Down
22 changes: 22 additions & 0 deletions app/models/detector/suggested_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,27 @@ def self.bulk_replace(input)
def self.full_term_match(phrase)
SuggestedResource.where(fingerprint: calculate_fingerprint(phrase))
end

# Look up any matching Detector::SuggestedResource records, building on the full_term_match method. If a match is
# found, a Detection record is created to indicate this success.
#
# @note Multiple matches with Detector::SuggestedResource are not possible due to internal constraints in that
# detector, which requires a unique fingerprint for every record.
#
# @note Multiple detections are irrelevant for this method. If _any_ match is found, a Detection record is created.
# The uniqueness contraint on Detection records would make multiple detections irrelevant.
#
# @return nil
def self.record(term)
result = full_term_match(term.phrase)
return unless result.any?

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'SuggestedResource').first
)

nil
end
end
end
16 changes: 16 additions & 0 deletions app/models/term.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# frozen_string_literal: true

# Terms are received by contributing systems. For the moment, they include a single string, which was provided by a user
# as part of a search. This model intentionally includes no other information.
#
# == Schema Information
#
# Table name: terms
Expand All @@ -11,4 +14,17 @@
#
class Term < ApplicationRecord
has_many :search_events, dependent: :destroy
has_many :detections, dependent: :destroy

# The record_detections method is the one-stop method to call every Detector's record method that is defined within
# the application.
#
# @return nil
def record_detections
Detector::StandardIdentifiers.record(self)
Detector::Journal.record(self)
Detector::SuggestedResource.record(self)

nil
end
end
13 changes: 13 additions & 0 deletions db/migrate/20240917160025_create_detections.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class CreateDetections < ActiveRecord::Migration[7.1]
def change
create_table :detections do |t|
t.belongs_to :term, null: false, foreign_key: true
t.belongs_to :detector, null: false, foreign_key: true
t.string :detector_version

t.timestamps
end
add_index :detections, [:term_id, :detector_id, :detector_version], unique: true
add_index :detections, [:detector_id, :term_id, :detector_version], unique: true
end
end
16 changes: 15 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions docs/reference/classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ classDiagram
Term: id
Term: +String phrase
Term: calculateCategory()
Term: recordDetections()
Term: recordPatterns()
Term: recordJouranls()
Term: recordSuggestedResources()
class SearchEvent
SearchEvent: +Integer id
Expand All @@ -45,14 +49,10 @@ classDiagram
Detection: +Integer id
Detection: +Integer term_id
Detection: +Integer detector_id
Detection: +Integer detector_version
Detection: +Float confidence
Detection: initialize()
Detection: setDetectionVersion()
Detection: recordDetections()
Detection: recordPatterns()
Detection: recordJournals()
Detection: recordSuggestedResource()
Detection: +String detector_version
Detection: current()
Detection: for_detector()
Detection: for_term()
class Detector
Detector: +Integer id
Expand Down
25 changes: 25 additions & 0 deletions test/fixtures/detections.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# == Schema Information
#
# Table name: detections
#
# id :integer not null, primary key
# term_id :integer not null
# detector_id :integer not null
# detector_version :string
# created_at :datetime not null
# updated_at :datetime not null
#
one:
term: doi
detector: doi
detector_version: 1

two:
term: multiple_detections
detector: doi
detector_version: 1

three:
term: multiple_detections
detector: pmid
detector_version: 1
93 changes: 93 additions & 0 deletions test/models/detection_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: detections
#
# id :integer not null, primary key
# term_id :integer not null
# detector_id :integer not null
# detector_version :string
# created_at :datetime not null
# updated_at :datetime not null
#
require 'test_helper'

class DetectionTest < ActiveSupport::TestCase
test 'duplicate detections are not allowed' do
initial_count = Detection.count

sample = {
term: terms('hi'),
detector: detectors('doi')
}

Detection.create!(sample)

post_count = Detection.count

assert_equal(initial_count + 1, post_count)

assert_raises(ActiveRecord::RecordNotUnique) do
Detection.create!(sample)
end

post_duplicate_count = Detection.count

assert_equal(post_count, post_duplicate_count)
end

test 'new detections are allowed when detector_version is updated' do
initial_count = Detection.count

sample = Detection.first

new_sample = {
term: sample.term,
detector: sample.detector
}

# A purely duplicate record fails to save...
assert_raises(ActiveRecord::RecordNotUnique) do
Detection.create!(new_sample)
end

# ...but when we update the DETECTOR_VERSION env, now the same record does save.
new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
Detection.create!(new_sample)

assert_equal(initial_count + 1, Detection.count)
end
end

test 'detections are assigned the current DETECTOR_VERSION value from env' do
new_detection = {
term: terms('hi'),
detector: detectors('pmid')
}

Detection.create!(new_detection)

confirmation = Detection.last

assert_equal(confirmation.detector_version, ENV.fetch('DETECTOR_VERSION'))
end

test 'detector current scope filters on current env value' do
count = Detection.current.count

new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
updated_count = Detection.current.count

assert_not_equal(count, updated_count)
end
end
end
Loading

0 comments on commit 9fca519

Please sign in to comment.