diff --git a/app/models/aggregate_match.rb b/app/models/aggregate_match.rb new file mode 100644 index 0000000..741c010 --- /dev/null +++ b/app/models/aggregate_match.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# AggregateMatch aggregates statistics for matches for all SearchEvents +# +# @see MonthlyMatch +class AggregateMatch < ApplicationRecord + include MatchCounter + + # generate data for all SearchEvents + # + # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as + # accurate as possible statistics. Running further from the month in question will work, but matches will use the + # current versions of all algorithms which may not allow for tracking algorithm performance + # over time as accurately as intended. + # @todo Prevent running more than once by checking if we have data and then erroring? + # @return [AggregateMatch] The created AggregateMatch object. + def generate + matches = count_matches(SearchEvent.all) + AggregateMatch.create(doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], + pmid: matches[:pmid], unmatched: matches[:unmatched]) + end +end diff --git a/app/models/match_counter.rb b/app/models/match_counter.rb new file mode 100644 index 0000000..8725e47 --- /dev/null +++ b/app/models/match_counter.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +# Counts matches supplied events +module MatchCounter + # Counts matches supplied events + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. + # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. + def count_matches(events) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + events.each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if ids.identifiers[id].present? + end + end + + matches + end +end diff --git a/app/models/monthly_match.rb b/app/models/monthly_match.rb index 3ac08b9..ff0e490 100644 --- a/app/models/monthly_match.rb +++ b/app/models/monthly_match.rb @@ -19,6 +19,8 @@ # # @see AggregateMatch class MonthlyMatch < ApplicationRecord + include MatchCounter + # generate data for a provided month # # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as @@ -28,42 +30,9 @@ class MonthlyMatch < ApplicationRecord # @todo Prevent running more than once by checking if we have data and then erroring. # @param month [DateTime] A DateTime object within the `month` to be generated. # @return [MonthlyMatch] The created MonthlyMatch object. - def generate_monthly(month) - matches = count_matches(month) + def generate(month) + matches = count_matches(SearchEvent.single_month(month)) MonthlyMatch.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], pmid: matches[:pmid], unmatched: matches[:unmatched]) end - - # Counts matches for the given month - # - # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to - # expand to handle additional match types. - # @param month [DateTime] A DateTime object within the `month` to be generated. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def count_matches(month) - matches = Hash.new(0) - known_ids = %i[unmatched pmid isbn issn doi] - - SearchEvent.single_month(month).each do |event| - ids = StandardIdentifiers.new(event.term.phrase) - - matches[:unmatched] += 1 if ids.identifiers.blank? - - known_ids.each do |id| - matches[id] += 1 if standard_identifier_match?(id, ids) - end - end - - matches - end - - # Returns true if the provided identifier type was matched in this SearchEvent - # - # @param identifier [symbol,string] A specific StandardIdentifier type to look for in the SearchEvent, such as `pmid` - # or `doi`. We use symbols, but it supports strings as well. - # @param ids [StandardIdentifiers, Hash] A Hash with matches for know standard identifiers. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def standard_identifier_match?(identifier, ids) - true if ids.identifiers[identifier].present? - end end diff --git a/db/migrate/20240621132150_create_aggregate_matches.rb b/db/migrate/20240621132150_create_aggregate_matches.rb new file mode 100644 index 0000000..d90b77d --- /dev/null +++ b/db/migrate/20240621132150_create_aggregate_matches.rb @@ -0,0 +1,12 @@ +class CreateAggregateMatches < ActiveRecord::Migration[7.1] + def change + create_table :aggregate_matches do |t| + t.integer :doi + t.integer :issn + t.integer :isbn + t.integer :pmid + t.integer :unmatched + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 4aa2ef7..b748f13 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,17 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_06_21_132136) do +ActiveRecord::Schema[7.1].define(version: 2024_06_21_132150) do + create_table "aggregate_matches", force: :cascade do |t| + t.integer "doi" + t.integer "issn" + t.integer "isbn" + t.integer "pmid" + t.integer "unmatched" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "monthly_matches", force: :cascade do |t| t.date "month" t.integer "doi" diff --git a/test/fixtures/aggregate_matches.yml b/test/fixtures/aggregate_matches.yml new file mode 100644 index 0000000..2958a1d --- /dev/null +++ b/test/fixtures/aggregate_matches.yml @@ -0,0 +1,23 @@ +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# This model initially had no columns defined. If you add columns to the +# model remove the "{}" from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/models/aggregate_match_test.rb b/test/models/aggregate_match_test.rb new file mode 100644 index 0000000..c4ace4b --- /dev/null +++ b/test/models/aggregate_match_test.rb @@ -0,0 +1,79 @@ +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class AggregateMatchTest < ActiveSupport::TestCase + test 'dois counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate(DateTime.now) + assert aggregate.doi == 1 + end + + test 'issns counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate(DateTime.now) + assert aggregate.issn == 1 + end + + test 'isbns counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate(DateTime.now) + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate(DateTime.now) + assert aggregate.pmid == 1 + end + + test 'unmatched counts are included are included in aggregation' do + aggregate = MonthlyMatch.new.generate(DateTime.now) + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = MonthlyMatch.new.generate(DateTime.now) + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end +end diff --git a/test/models/monthly_match_test.rb b/test/models/monthly_match_test.rb index 91ecfd1..74995d9 100644 --- a/test/models/monthly_match_test.rb +++ b/test/models/monthly_match_test.rb @@ -16,27 +16,27 @@ class MonthlyMatchTest < ActiveSupport::TestCase test 'dois counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.doi == 1 end test 'issns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.issn == 1 end test 'isbns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.isbn == 1 end test 'pmids counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.pmid == 1 end test 'unmatched counts are included are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.unmatched == 2 end @@ -69,7 +69,7 @@ class MonthlyMatchTest < ActiveSupport::TestCase SearchEvent.create(term: terms(:hi), source: 'test') end - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert doi_expected_count == aggregate.doi assert issn_expected_count == aggregate.issn