From ab302d9752b4cc13e20fd8e77fcd0269c747cfb9 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Tue, 25 Jun 2024 16:24:51 -0400 Subject: [PATCH 1/7] Update dev debugging and fixup issn detection Why are these changes being introduced: * The ISSN detection was returning a key set to `nil` when an ISSN-like pattern (probably a year span) was detected but it failed the ISSN validation checksum. This differed from other standard_identifiers and caused issues when using the standard_identifiers detection for our montly statistics. This just deletes the key instead of setting it to `nil` in conditions when the checksum fails. * Our development logging wasn't showing a useful loglevel for development. I set it to debug here, but we may want to allow it to be controlled by ENV and also consider using lograge like we do on other apps --- app/models/standard_identifiers.rb | 2 +- config/environments/development.rb | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/app/models/standard_identifiers.rb b/app/models/standard_identifiers.rb index b2b383f..5588625 100644 --- a/app/models/standard_identifiers.rb +++ b/app/models/standard_identifiers.rb @@ -41,7 +41,7 @@ def term_patterns def strip_invalid_issns return unless @identifiers[:issn] - @identifiers[:issn] = nil unless validate_issn(@identifiers[:issn]) + @identifiers.delete(:issn) unless validate_issn(@identifiers[:issn]) end # validate_issn is only called when the regex for an ISSN has indicated an ISSN diff --git a/config/environments/development.rb b/config/environments/development.rb index 2e7fb48..4eec57e 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -1,4 +1,4 @@ -require "active_support/core_ext/integer/time" +require 'active_support/core_ext/integer/time' Rails.application.configure do # Settings specified here will take precedence over those in config/application.rb. @@ -19,13 +19,13 @@ # Enable/disable caching. By default caching is disabled. # Run rails dev:cache to toggle caching. - if Rails.root.join("tmp/caching-dev.txt").exist? + if Rails.root.join('tmp/caching-dev.txt').exist? config.action_controller.perform_caching = true config.action_controller.enable_fragment_cache_logging = true config.cache_store = :memory_store config.public_file_server.headers = { - "Cache-Control" => "public, max-age=#{2.days.to_i}" + 'Cache-Control' => "public, max-age=#{2.days.to_i}" } else config.action_controller.perform_caching = false @@ -73,4 +73,8 @@ # Raise error when a before_action's only/except options reference missing actions config.action_controller.raise_on_missing_callback_actions = true + + # Local logging overrides + config.logger = Logger.new(STDOUT) + config.log_level = :debug end From 146d92963311bc6bb2ee9095b8693ad0d1fbb19a Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Tue, 25 Jun 2024 16:26:05 -0400 Subject: [PATCH 2/7] Adds support for filtering SearchEvents to a month Why are these changes being introduced: * The ability to filter to a specific month is a feature needed for the montly aggregation of algorithm matches Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TCO-17 How does this address that need: * Creates a scope on the model --- app/models/search_event.rb | 8 ++++++++ test/models/search_event_test.rb | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/app/models/search_event.rb b/app/models/search_event.rb index 46cae56..8fe5a5f 100644 --- a/app/models/search_event.rb +++ b/app/models/search_event.rb @@ -10,8 +10,16 @@ # created_at :datetime not null # updated_at :datetime not null # + +# SearchEvent represents an instance of a logged search Term class SearchEvent < ApplicationRecord belongs_to :term validates :source, presence: true + + # :single_month filters to requested month + # + # @param month [DateTime] A DateTime object within the `month` to be filtered. + # @return [Array] All SearchEvents for the supplied `month`. + scope :single_month, ->(month) { where(created_at: month.beginning_of_month..month.end_of_month) } end diff --git a/test/models/search_event_test.rb b/test/models/search_event_test.rb index dd87810..ec921d2 100644 --- a/test/models/search_event_test.rb +++ b/test/models/search_event_test.rb @@ -28,4 +28,14 @@ class SearchEventTest < ActiveSupport::TestCase s.source = nil refute(s.valid?) end + + test 'monthly scope returns requested month of SearchEvents' do + assert SearchEvent.all.include?(search_events(:current_month_pmid)) + assert SearchEvent.single_month(Time.now).include?(search_events(:current_month_pmid)) + end + + test 'monthly scope does not return SearchEvents outside the requested month' do + assert SearchEvent.all.include?(search_events(:old_month_pmid)) + refute SearchEvent.single_month(Time.now).include?(search_events(:old_month_pmid)) + end end From 2a49276b44f409014dea91467f7d5050fea34fa9 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Tue, 25 Jun 2024 16:29:02 -0400 Subject: [PATCH 3/7] Monthly matches Why are these changes being introduced: * Implement data models for counting algorithm matches for the current month Terms Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TCO-17 See also: * https://github.com/MITLibraries/tacos/blob/main/docs/architecture-decisions/0005-use-multiple-minimal-historical-analytics-models.md How does this address that need: * Creates a new model `MontlyMatch` * Adds methods to run each (current) StandardIdentifier algorithm on each Term (via the SearchEvents) Document any side effects to this change: * A schedulable job to run this automatically is out of scope and will be added under a separate ticket --- app/models/monthly_match.rb | 69 ++++++++++++++++ .../20240621132136_create_monthly_matches.rb | 13 +++ db/schema.rb | 13 ++- test/fixtures/monthly_matches.yml | 24 ++++++ test/fixtures/search_events.yml | 16 ++++ test/fixtures/terms.yml | 12 +++ test/models/monthly_match_test.rb | 80 +++++++++++++++++++ 7 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 app/models/monthly_match.rb create mode 100644 db/migrate/20240621132136_create_monthly_matches.rb create mode 100644 test/fixtures/monthly_matches.yml create mode 100644 test/models/monthly_match_test.rb diff --git a/app/models/monthly_match.rb b/app/models/monthly_match.rb new file mode 100644 index 0000000..3ac08b9 --- /dev/null +++ b/app/models/monthly_match.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: monthly_matches +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# MonthlyMatch aggregates statistics for matches in a given month +# +# @see AggregateMatch +class MonthlyMatch < ApplicationRecord + # generate data for a provided month + # + # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as + # accurate as possible statistics. Running further from the month in question will work, but matches will use the + # current versions of all algorithms which may not match the algorithm in place during the month the SearchEvent + # occurred. + # @todo Prevent running more than once by checking if we have data and then erroring. + # @param month [DateTime] A DateTime object within the `month` to be generated. + # @return [MonthlyMatch] The created MonthlyMatch object. + def generate_monthly(month) + matches = count_matches(month) + MonthlyMatch.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], + pmid: matches[:pmid], unmatched: matches[:unmatched]) + end + + # Counts matches for the given month + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param month [DateTime] A DateTime object within the `month` to be generated. + # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. + def count_matches(month) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + SearchEvent.single_month(month).each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if standard_identifier_match?(id, ids) + end + end + + matches + end + + # Returns true if the provided identifier type was matched in this SearchEvent + # + # @param identifier [symbol,string] A specific StandardIdentifier type to look for in the SearchEvent, such as `pmid` + # or `doi`. We use symbols, but it supports strings as well. + # @param ids [StandardIdentifiers, Hash] A Hash with matches for know standard identifiers. + # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. + def standard_identifier_match?(identifier, ids) + true if ids.identifiers[identifier].present? + end +end diff --git a/db/migrate/20240621132136_create_monthly_matches.rb b/db/migrate/20240621132136_create_monthly_matches.rb new file mode 100644 index 0000000..f00d0d8 --- /dev/null +++ b/db/migrate/20240621132136_create_monthly_matches.rb @@ -0,0 +1,13 @@ +class CreateMonthlyMatches < ActiveRecord::Migration[7.1] + def change + create_table :monthly_matches do |t| + t.date :month + t.integer :doi + t.integer :issn + t.integer :isbn + t.integer :pmid + t.integer :unmatched + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 2ac49e5..4aa2ef7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,18 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2023_10_19_191933) do +ActiveRecord::Schema[7.1].define(version: 2024_06_21_132136) do + create_table "monthly_matches", force: :cascade do |t| + t.date "month" + t.integer "doi" + t.integer "issn" + t.integer "isbn" + t.integer "pmid" + t.integer "unmatched" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "search_events", force: :cascade do |t| t.integer "term_id" t.string "source" diff --git a/test/fixtures/monthly_matches.yml b/test/fixtures/monthly_matches.yml new file mode 100644 index 0000000..02e3ff2 --- /dev/null +++ b/test/fixtures/monthly_matches.yml @@ -0,0 +1,24 @@ +# == Schema Information +# +# Table name: monthly_matches +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# This model initially had no columns defined. If you add columns to the +# model remove the "{}" from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/fixtures/search_events.yml b/test/fixtures/search_events.yml index eb48a8b..3bae084 100644 --- a/test/fixtures/search_events.yml +++ b/test/fixtures/search_events.yml @@ -15,3 +15,19 @@ timdex_cool: bento_hi: term: hi source: bento +current_month_pmid: + term: pmid_38908367 + source: test +old_month_pmid: + term: pmid_38908367 + source: test + created_at: <%= 1.year.ago %> +current_month_issn: + term: issn_1075_8623 + source: test +current_month_doi: + term: doi + source: test +current_month_isbn: + term: isbn_9781319145446 + source: test diff --git a/test/fixtures/terms.yml b/test/fixtures/terms.yml index 128b327..6e49cf6 100644 --- a/test/fixtures/terms.yml +++ b/test/fixtures/terms.yml @@ -13,3 +13,15 @@ cool: hi: phrase: hello world + +pmid_38908367: + phrase: 'TERT activation targets DNA methylation and multiple aging hallmarks. Shim HS, et al. Cell. 2024. PMID: 38908367' + +issn_1075_8623: + phrase: 1075-8623 + +doi: + phrase: '10.1016/j.physio.2010.12.004' + +isbn_9781319145446: + phrase: 'Sadava, D. E., D. M. Hillis, et al. Life: The Science of Biology. 11th ed. W. H. Freeman, 2016. ISBN: 9781319145446' diff --git a/test/models/monthly_match_test.rb b/test/models/monthly_match_test.rb new file mode 100644 index 0000000..91ecfd1 --- /dev/null +++ b/test/models/monthly_match_test.rb @@ -0,0 +1,80 @@ +# == Schema Information +# +# Table name: monthly_matches +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class MonthlyMatchTest < ActiveSupport::TestCase + test 'dois counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + assert aggregate.doi == 1 + end + + test 'issns counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + assert aggregate.issn == 1 + end + + test 'isbns counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in aggregation' do + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + assert aggregate.pmid == 1 + end + + test 'unmatched counts are included are included in aggregation' do + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end +end From abd0a5b93e5fa6c4e0f59ffd083dbb5998f39ab1 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Wed, 26 Jun 2024 08:13:17 -0400 Subject: [PATCH 4/7] Add parallel test support to coverage Why are these changes being introduced: * We'll hit 50 tests in the next commit which is the Rails default before using parallel tests. How does this address that need: * This updates the coverage config to work with parallel tests. --- test/test_helper.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_helper.rb b/test/test_helper.rb index f55fa99..9e51c06 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -24,6 +24,14 @@ class TestCase # Run tests in parallel with specified workers parallelize(workers: :number_of_processors) + parallelize_setup do |worker| + SimpleCov.command_name "#{SimpleCov.command_name}-#{worker}" + end + + parallelize_teardown do |worker| + SimpleCov.result + end + # Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order. fixtures :all From 0bce48d0ed26b3aa4600de51399e8062af644524 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Wed, 26 Jun 2024 08:14:25 -0400 Subject: [PATCH 5/7] Total aggregate matches Why are these changes being introduced: * Implement data models for counting algorithm matches for all Terms Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TCO-17 See also: * https://github.com/MITLibraries/tacos/blob/main/docs/architecture-decisions/0005-use-multiple-minimal-historical-analytics-models.md How does this address that need: * Creates a new model `AggregateMatch` * Adds methods to run each (current) StandardIdentifier algorithm on each Term (via the SearchEvents) * Adjusts `MontlyMatch` counting algorithm to be useful for both cases and extracts it to a module which is imported into both Classes Document any side effects to this change: * A schedulable job to run this automatically is out of scope and will be added under a separate ticket * The tests are identical between this and `MontlyMatch`. There may be a way to avoid the duplication and thus ensure both get relevant updates but it was not clear to me how to do that in an obvious way at the time of this work. --- app/models/aggregate_match.rb | 36 +++++++++ app/models/match_counter.rb | 27 +++++++ app/models/monthly_match.rb | 39 +-------- ...20240621132150_create_aggregate_matches.rb | 12 +++ db/schema.rb | 12 ++- test/fixtures/aggregate_matches.yml | 23 ++++++ test/models/aggregate_match_test.rb | 79 +++++++++++++++++++ test/models/monthly_match_test.rb | 12 +-- 8 files changed, 198 insertions(+), 42 deletions(-) create mode 100644 app/models/aggregate_match.rb create mode 100644 app/models/match_counter.rb create mode 100644 db/migrate/20240621132150_create_aggregate_matches.rb create mode 100644 test/fixtures/aggregate_matches.yml create mode 100644 test/models/aggregate_match_test.rb diff --git a/app/models/aggregate_match.rb b/app/models/aggregate_match.rb new file mode 100644 index 0000000..741c010 --- /dev/null +++ b/app/models/aggregate_match.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# AggregateMatch aggregates statistics for matches for all SearchEvents +# +# @see MonthlyMatch +class AggregateMatch < ApplicationRecord + include MatchCounter + + # generate data for all SearchEvents + # + # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as + # accurate as possible statistics. Running further from the month in question will work, but matches will use the + # current versions of all algorithms which may not allow for tracking algorithm performance + # over time as accurately as intended. + # @todo Prevent running more than once by checking if we have data and then erroring? + # @return [AggregateMatch] The created AggregateMatch object. + def generate + matches = count_matches(SearchEvent.all) + AggregateMatch.create(doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], + pmid: matches[:pmid], unmatched: matches[:unmatched]) + end +end diff --git a/app/models/match_counter.rb b/app/models/match_counter.rb new file mode 100644 index 0000000..8725e47 --- /dev/null +++ b/app/models/match_counter.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +# Counts matches supplied events +module MatchCounter + # Counts matches supplied events + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. + # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. + def count_matches(events) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + events.each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if ids.identifiers[id].present? + end + end + + matches + end +end diff --git a/app/models/monthly_match.rb b/app/models/monthly_match.rb index 3ac08b9..ff0e490 100644 --- a/app/models/monthly_match.rb +++ b/app/models/monthly_match.rb @@ -19,6 +19,8 @@ # # @see AggregateMatch class MonthlyMatch < ApplicationRecord + include MatchCounter + # generate data for a provided month # # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as @@ -28,42 +30,9 @@ class MonthlyMatch < ApplicationRecord # @todo Prevent running more than once by checking if we have data and then erroring. # @param month [DateTime] A DateTime object within the `month` to be generated. # @return [MonthlyMatch] The created MonthlyMatch object. - def generate_monthly(month) - matches = count_matches(month) + def generate(month) + matches = count_matches(SearchEvent.single_month(month)) MonthlyMatch.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], pmid: matches[:pmid], unmatched: matches[:unmatched]) end - - # Counts matches for the given month - # - # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to - # expand to handle additional match types. - # @param month [DateTime] A DateTime object within the `month` to be generated. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def count_matches(month) - matches = Hash.new(0) - known_ids = %i[unmatched pmid isbn issn doi] - - SearchEvent.single_month(month).each do |event| - ids = StandardIdentifiers.new(event.term.phrase) - - matches[:unmatched] += 1 if ids.identifiers.blank? - - known_ids.each do |id| - matches[id] += 1 if standard_identifier_match?(id, ids) - end - end - - matches - end - - # Returns true if the provided identifier type was matched in this SearchEvent - # - # @param identifier [symbol,string] A specific StandardIdentifier type to look for in the SearchEvent, such as `pmid` - # or `doi`. We use symbols, but it supports strings as well. - # @param ids [StandardIdentifiers, Hash] A Hash with matches for know standard identifiers. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def standard_identifier_match?(identifier, ids) - true if ids.identifiers[identifier].present? - end end diff --git a/db/migrate/20240621132150_create_aggregate_matches.rb b/db/migrate/20240621132150_create_aggregate_matches.rb new file mode 100644 index 0000000..d90b77d --- /dev/null +++ b/db/migrate/20240621132150_create_aggregate_matches.rb @@ -0,0 +1,12 @@ +class CreateAggregateMatches < ActiveRecord::Migration[7.1] + def change + create_table :aggregate_matches do |t| + t.integer :doi + t.integer :issn + t.integer :isbn + t.integer :pmid + t.integer :unmatched + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 4aa2ef7..b748f13 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,17 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_06_21_132136) do +ActiveRecord::Schema[7.1].define(version: 2024_06_21_132150) do + create_table "aggregate_matches", force: :cascade do |t| + t.integer "doi" + t.integer "issn" + t.integer "isbn" + t.integer "pmid" + t.integer "unmatched" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "monthly_matches", force: :cascade do |t| t.date "month" t.integer "doi" diff --git a/test/fixtures/aggregate_matches.yml b/test/fixtures/aggregate_matches.yml new file mode 100644 index 0000000..2958a1d --- /dev/null +++ b/test/fixtures/aggregate_matches.yml @@ -0,0 +1,23 @@ +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# + +# This model initially had no columns defined. If you add columns to the +# model remove the "{}" from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/models/aggregate_match_test.rb b/test/models/aggregate_match_test.rb new file mode 100644 index 0000000..3254b96 --- /dev/null +++ b/test/models/aggregate_match_test.rb @@ -0,0 +1,79 @@ +# == Schema Information +# +# Table name: aggregate_matches +# +# id :integer not null, primary key +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class AggregateMatchTest < ActiveSupport::TestCase + test 'dois counts are included in aggregation' do + aggregate = AggregateMatch.new.generate + assert aggregate.doi == 1 + end + + test 'issns counts are included in aggregation' do + aggregate = AggregateMatch.new.generate + assert aggregate.issn == 1 + end + + test 'isbns counts are included in aggregation' do + aggregate = AggregateMatch.new.generate + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in aggregation' do + aggregate = AggregateMatch.new.generate + assert aggregate.pmid == 2 + end + + test 'unmatched counts are included are included in aggregation' do + aggregate = AggregateMatch.new.generate + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = AggregateMatch.new.generate + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end +end diff --git a/test/models/monthly_match_test.rb b/test/models/monthly_match_test.rb index 91ecfd1..74995d9 100644 --- a/test/models/monthly_match_test.rb +++ b/test/models/monthly_match_test.rb @@ -16,27 +16,27 @@ class MonthlyMatchTest < ActiveSupport::TestCase test 'dois counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.doi == 1 end test 'issns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.issn == 1 end test 'isbns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.isbn == 1 end test 'pmids counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.pmid == 1 end test 'unmatched counts are included are included in aggregation' do - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert aggregate.unmatched == 2 end @@ -69,7 +69,7 @@ class MonthlyMatchTest < ActiveSupport::TestCase SearchEvent.create(term: terms(:hi), source: 'test') end - aggregate = MonthlyMatch.new.generate_monthly(DateTime.now) + aggregate = MonthlyMatch.new.generate(DateTime.now) assert doi_expected_count == aggregate.doi assert issn_expected_count == aggregate.issn From 1f33926566d34c16b6b399b2b8f1eaaf71b59c22 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Fri, 28 Jun 2024 14:14:03 -0400 Subject: [PATCH 6/7] Refactor to concern --- app/models/aggregate_match.rb | 2 +- app/models/concerns/match_counter.rb | 31 ++++++++++++++++++++++++++++ app/models/match_counter.rb | 27 ------------------------ app/models/monthly_match.rb | 2 +- 4 files changed, 33 insertions(+), 29 deletions(-) create mode 100644 app/models/concerns/match_counter.rb delete mode 100644 app/models/match_counter.rb diff --git a/app/models/aggregate_match.rb b/app/models/aggregate_match.rb index 741c010..9f66b1d 100644 --- a/app/models/aggregate_match.rb +++ b/app/models/aggregate_match.rb @@ -29,7 +29,7 @@ class AggregateMatch < ApplicationRecord # @todo Prevent running more than once by checking if we have data and then erroring? # @return [AggregateMatch] The created AggregateMatch object. def generate - matches = count_matches(SearchEvent.all) + matches = AggregateMatch.count_matches(SearchEvent.all) AggregateMatch.create(doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], pmid: matches[:pmid], unmatched: matches[:unmatched]) end diff --git a/app/models/concerns/match_counter.rb b/app/models/concerns/match_counter.rb new file mode 100644 index 0000000..b821954 --- /dev/null +++ b/app/models/concerns/match_counter.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +# Counts matches supplied events +module MatchCounter + extend ActiveSupport::Concern + + class_methods do + # Counts matches supplied events + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. + # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. + def count_matches(events) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + events.each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if ids.identifiers[id].present? + end + end + + matches + end + end +end diff --git a/app/models/match_counter.rb b/app/models/match_counter.rb deleted file mode 100644 index 8725e47..0000000 --- a/app/models/match_counter.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -# Counts matches supplied events -module MatchCounter - # Counts matches supplied events - # - # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to - # expand to handle additional match types. - # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def count_matches(events) - matches = Hash.new(0) - known_ids = %i[unmatched pmid isbn issn doi] - - events.each do |event| - ids = StandardIdentifiers.new(event.term.phrase) - - matches[:unmatched] += 1 if ids.identifiers.blank? - - known_ids.each do |id| - matches[id] += 1 if ids.identifiers[id].present? - end - end - - matches - end -end diff --git a/app/models/monthly_match.rb b/app/models/monthly_match.rb index ff0e490..0967980 100644 --- a/app/models/monthly_match.rb +++ b/app/models/monthly_match.rb @@ -31,7 +31,7 @@ class MonthlyMatch < ApplicationRecord # @param month [DateTime] A DateTime object within the `month` to be generated. # @return [MonthlyMatch] The created MonthlyMatch object. def generate(month) - matches = count_matches(SearchEvent.single_month(month)) + matches = MonthlyMatch.count_matches(SearchEvent.single_month(month)) MonthlyMatch.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], pmid: matches[:pmid], unmatched: matches[:unmatched]) end From 044b1a0ae6dafb67a9706b031193be3df7213283 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Mon, 1 Jul 2024 12:35:54 -0400 Subject: [PATCH 7/7] Refactors to Metrics::Algorithms Moves both types of metrics on algorithms to use a single class and associated database table. Creating the namespace `Metrics` should allow us to better organize code associated with various metrics into a common space that is both intuitive and useful. Relevant ticket(s): https://mitlibraries.atlassian.net/browse/TCO-17 --- app/models/aggregate_match.rb | 36 ----- app/models/concerns/match_counter.rb | 31 ---- app/models/metrics/algorithms.rb | 72 +++++++++ app/models/monthly_match.rb | 38 ----- ...240621132136_create_metrics_algorithms.rb} | 4 +- ...20240621132150_create_aggregate_matches.rb | 12 -- db/schema.rb | 14 +- test/fixtures/aggregate_matches.yml | 23 --- test/fixtures/monthly_matches.yml | 24 --- test/models/aggregate_match_test.rb | 79 ---------- test/models/metrics/algorithms_test.rb | 147 ++++++++++++++++++ test/models/monthly_match_test.rb | 80 ---------- 12 files changed, 223 insertions(+), 337 deletions(-) delete mode 100644 app/models/aggregate_match.rb delete mode 100644 app/models/concerns/match_counter.rb create mode 100644 app/models/metrics/algorithms.rb delete mode 100644 app/models/monthly_match.rb rename db/migrate/{20240621132136_create_monthly_matches.rb => 20240621132136_create_metrics_algorithms.rb} (63%) delete mode 100644 db/migrate/20240621132150_create_aggregate_matches.rb delete mode 100644 test/fixtures/aggregate_matches.yml delete mode 100644 test/fixtures/monthly_matches.yml delete mode 100644 test/models/aggregate_match_test.rb create mode 100644 test/models/metrics/algorithms_test.rb delete mode 100644 test/models/monthly_match_test.rb diff --git a/app/models/aggregate_match.rb b/app/models/aggregate_match.rb deleted file mode 100644 index 9f66b1d..0000000 --- a/app/models/aggregate_match.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -# == Schema Information -# -# Table name: aggregate_matches -# -# id :integer not null, primary key -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# - -# AggregateMatch aggregates statistics for matches for all SearchEvents -# -# @see MonthlyMatch -class AggregateMatch < ApplicationRecord - include MatchCounter - - # generate data for all SearchEvents - # - # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as - # accurate as possible statistics. Running further from the month in question will work, but matches will use the - # current versions of all algorithms which may not allow for tracking algorithm performance - # over time as accurately as intended. - # @todo Prevent running more than once by checking if we have data and then erroring? - # @return [AggregateMatch] The created AggregateMatch object. - def generate - matches = AggregateMatch.count_matches(SearchEvent.all) - AggregateMatch.create(doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], - pmid: matches[:pmid], unmatched: matches[:unmatched]) - end -end diff --git a/app/models/concerns/match_counter.rb b/app/models/concerns/match_counter.rb deleted file mode 100644 index b821954..0000000 --- a/app/models/concerns/match_counter.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -# Counts matches supplied events -module MatchCounter - extend ActiveSupport::Concern - - class_methods do - # Counts matches supplied events - # - # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to - # expand to handle additional match types. - # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. - # @return [Hash] A Hash with keys for each known standard identifier and the count of matched search events. - def count_matches(events) - matches = Hash.new(0) - known_ids = %i[unmatched pmid isbn issn doi] - - events.each do |event| - ids = StandardIdentifiers.new(event.term.phrase) - - matches[:unmatched] += 1 if ids.identifiers.blank? - - known_ids.each do |id| - matches[id] += 1 if ids.identifiers[id].present? - end - end - - matches - end - end -end diff --git a/app/models/metrics/algorithms.rb b/app/models/metrics/algorithms.rb new file mode 100644 index 0000000..6343081 --- /dev/null +++ b/app/models/metrics/algorithms.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: metrics_algorithms +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +module Metrics + # Algorithms aggregates statistics for matches for all SearchEvents + class Algorithms < ApplicationRecord + self.table_name = 'metrics_algorithms' + + # generate metrics data about SearchEvents matches + # + # @note This is expected to only be run once per month per type of aggregation (once with no month supplied, once + # with a month supplied), ideally at the beginning of the following month to ensure as + # accurate as possible statistics. Running further from the month in question will work, but matches will use the + # current versions of all algorithms which may not match the algorithm in place during the month the SearchEvent + # occurred. + # @note We don't currently prevent this running more than once per month per type of aggregation. + # @param month [DateTime] A DateTime object within the `month` to be generated. Defaults to nil will runs is how + # total algorithm statistics are created. + # @example + # # Generate metrics for all SearchEvents + # Metrics::Algorithms.new.generate + # + # # Generate metrics for all SearchEvents last month + # Metrics::Algorithms.new.generate(1.month.ago) + # @return [Metrics::Algorithms] The created Metrics::Algorithms object. + def generate(month = nil) + matches = if month.present? + count_matches(SearchEvent.single_month(month).includes(:term)) + else + count_matches(SearchEvent.all.includes(:term)) + end + Metrics::Algorithms.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], + pmid: matches[:pmid], unmatched: matches[:unmatched]) + end + + # Counts matches supplied events + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. + # @return [Hash] A Hash with keys for each known algorithm and the count of matched SearchEvents. + def count_matches(events) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + events.each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if ids.identifiers[id].present? + end + end + + matches + end + end +end diff --git a/app/models/monthly_match.rb b/app/models/monthly_match.rb deleted file mode 100644 index 0967980..0000000 --- a/app/models/monthly_match.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -# == Schema Information -# -# Table name: monthly_matches -# -# id :integer not null, primary key -# month :date -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# - -# MonthlyMatch aggregates statistics for matches in a given month -# -# @see AggregateMatch -class MonthlyMatch < ApplicationRecord - include MatchCounter - - # generate data for a provided month - # - # @note This is expected to only be run once per month, ideally at the beginning of the following monthto ensure as - # accurate as possible statistics. Running further from the month in question will work, but matches will use the - # current versions of all algorithms which may not match the algorithm in place during the month the SearchEvent - # occurred. - # @todo Prevent running more than once by checking if we have data and then erroring. - # @param month [DateTime] A DateTime object within the `month` to be generated. - # @return [MonthlyMatch] The created MonthlyMatch object. - def generate(month) - matches = MonthlyMatch.count_matches(SearchEvent.single_month(month)) - MonthlyMatch.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], - pmid: matches[:pmid], unmatched: matches[:unmatched]) - end -end diff --git a/db/migrate/20240621132136_create_monthly_matches.rb b/db/migrate/20240621132136_create_metrics_algorithms.rb similarity index 63% rename from db/migrate/20240621132136_create_monthly_matches.rb rename to db/migrate/20240621132136_create_metrics_algorithms.rb index f00d0d8..be6ef1a 100644 --- a/db/migrate/20240621132136_create_monthly_matches.rb +++ b/db/migrate/20240621132136_create_metrics_algorithms.rb @@ -1,6 +1,6 @@ -class CreateMonthlyMatches < ActiveRecord::Migration[7.1] +class CreateMetricsAlgorithms < ActiveRecord::Migration[7.1] def change - create_table :monthly_matches do |t| + create_table :metrics_algorithms do |t| t.date :month t.integer :doi t.integer :issn diff --git a/db/migrate/20240621132150_create_aggregate_matches.rb b/db/migrate/20240621132150_create_aggregate_matches.rb deleted file mode 100644 index d90b77d..0000000 --- a/db/migrate/20240621132150_create_aggregate_matches.rb +++ /dev/null @@ -1,12 +0,0 @@ -class CreateAggregateMatches < ActiveRecord::Migration[7.1] - def change - create_table :aggregate_matches do |t| - t.integer :doi - t.integer :issn - t.integer :isbn - t.integer :pmid - t.integer :unmatched - t.timestamps - end - end -end diff --git a/db/schema.rb b/db/schema.rb index b748f13..384ff92 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,18 +10,8 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_06_21_132150) do - create_table "aggregate_matches", force: :cascade do |t| - t.integer "doi" - t.integer "issn" - t.integer "isbn" - t.integer "pmid" - t.integer "unmatched" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - end - - create_table "monthly_matches", force: :cascade do |t| +ActiveRecord::Schema[7.1].define(version: 2024_06_21_132136) do + create_table "metrics_algorithms", force: :cascade do |t| t.date "month" t.integer "doi" t.integer "issn" diff --git a/test/fixtures/aggregate_matches.yml b/test/fixtures/aggregate_matches.yml deleted file mode 100644 index 2958a1d..0000000 --- a/test/fixtures/aggregate_matches.yml +++ /dev/null @@ -1,23 +0,0 @@ -# == Schema Information -# -# Table name: aggregate_matches -# -# id :integer not null, primary key -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# - -# This model initially had no columns defined. If you add columns to the -# model remove the "{}" from the fixture names and add the columns immediately -# below each fixture, per the syntax in the comments below -# -one: {} -# column: value -# -two: {} -# column: value diff --git a/test/fixtures/monthly_matches.yml b/test/fixtures/monthly_matches.yml deleted file mode 100644 index 02e3ff2..0000000 --- a/test/fixtures/monthly_matches.yml +++ /dev/null @@ -1,24 +0,0 @@ -# == Schema Information -# -# Table name: monthly_matches -# -# id :integer not null, primary key -# month :date -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# - -# This model initially had no columns defined. If you add columns to the -# model remove the "{}" from the fixture names and add the columns immediately -# below each fixture, per the syntax in the comments below -# -one: {} -# column: value -# -two: {} -# column: value diff --git a/test/models/aggregate_match_test.rb b/test/models/aggregate_match_test.rb deleted file mode 100644 index 3254b96..0000000 --- a/test/models/aggregate_match_test.rb +++ /dev/null @@ -1,79 +0,0 @@ -# == Schema Information -# -# Table name: aggregate_matches -# -# id :integer not null, primary key -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# -require 'test_helper' - -class AggregateMatchTest < ActiveSupport::TestCase - test 'dois counts are included in aggregation' do - aggregate = AggregateMatch.new.generate - assert aggregate.doi == 1 - end - - test 'issns counts are included in aggregation' do - aggregate = AggregateMatch.new.generate - assert aggregate.issn == 1 - end - - test 'isbns counts are included in aggregation' do - aggregate = AggregateMatch.new.generate - assert aggregate.isbn == 1 - end - - test 'pmids counts are included in aggregation' do - aggregate = AggregateMatch.new.generate - assert aggregate.pmid == 2 - end - - test 'unmatched counts are included are included in aggregation' do - aggregate = AggregateMatch.new.generate - assert aggregate.unmatched == 2 - end - - test 'creating lots of searchevents leads to correct data' do - # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created - SearchEvent.delete_all - - doi_expected_count = rand(1...100) - doi_expected_count.times do - SearchEvent.create(term: terms(:doi), source: 'test') - end - - issn_expected_count = rand(1...100) - issn_expected_count.times do - SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') - end - - isbn_expected_count = rand(1...100) - isbn_expected_count.times do - SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') - end - - pmid_expected_count = rand(1...100) - pmid_expected_count.times do - SearchEvent.create(term: terms(:pmid_38908367), source: 'test') - end - - unmatched_expected_count = rand(1...100) - unmatched_expected_count.times do - SearchEvent.create(term: terms(:hi), source: 'test') - end - - aggregate = AggregateMatch.new.generate - - assert doi_expected_count == aggregate.doi - assert issn_expected_count == aggregate.issn - assert isbn_expected_count == aggregate.isbn - assert pmid_expected_count == aggregate.pmid - assert unmatched_expected_count == aggregate.unmatched - end -end diff --git a/test/models/metrics/algorithms_test.rb b/test/models/metrics/algorithms_test.rb new file mode 100644 index 0000000..fa3252d --- /dev/null +++ b/test/models/metrics/algorithms_test.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: metrics_algorithms +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class Algorithms < ActiveSupport::TestCase + # Monthlies + test 'dois counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.doi == 1 + end + + test 'issns counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.issn == 1 + end + + test 'isbns counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.pmid == 1 + end + + test 'unmatched counts are included are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data for monthly' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end + + # Total + test 'dois counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.doi == 1 + end + + test 'issns counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.issn == 1 + end + + test 'isbns counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.pmid == 2 + end + + test 'unmatched counts are included are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data for total' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = Metrics::Algorithms.new.generate + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end +end diff --git a/test/models/monthly_match_test.rb b/test/models/monthly_match_test.rb deleted file mode 100644 index 74995d9..0000000 --- a/test/models/monthly_match_test.rb +++ /dev/null @@ -1,80 +0,0 @@ -# == Schema Information -# -# Table name: monthly_matches -# -# id :integer not null, primary key -# month :date -# doi :integer -# issn :integer -# isbn :integer -# pmid :integer -# unmatched :integer -# created_at :datetime not null -# updated_at :datetime not null -# -require 'test_helper' - -class MonthlyMatchTest < ActiveSupport::TestCase - test 'dois counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate(DateTime.now) - assert aggregate.doi == 1 - end - - test 'issns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate(DateTime.now) - assert aggregate.issn == 1 - end - - test 'isbns counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate(DateTime.now) - assert aggregate.isbn == 1 - end - - test 'pmids counts are included in aggregation' do - aggregate = MonthlyMatch.new.generate(DateTime.now) - assert aggregate.pmid == 1 - end - - test 'unmatched counts are included are included in aggregation' do - aggregate = MonthlyMatch.new.generate(DateTime.now) - assert aggregate.unmatched == 2 - end - - test 'creating lots of searchevents leads to correct data' do - # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created - SearchEvent.delete_all - - doi_expected_count = rand(1...100) - doi_expected_count.times do - SearchEvent.create(term: terms(:doi), source: 'test') - end - - issn_expected_count = rand(1...100) - issn_expected_count.times do - SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') - end - - isbn_expected_count = rand(1...100) - isbn_expected_count.times do - SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') - end - - pmid_expected_count = rand(1...100) - pmid_expected_count.times do - SearchEvent.create(term: terms(:pmid_38908367), source: 'test') - end - - unmatched_expected_count = rand(1...100) - unmatched_expected_count.times do - SearchEvent.create(term: terms(:hi), source: 'test') - end - - aggregate = MonthlyMatch.new.generate(DateTime.now) - - assert doi_expected_count == aggregate.doi - assert issn_expected_count == aggregate.issn - assert isbn_expected_count == aggregate.isbn - assert pmid_expected_count == aggregate.pmid - assert unmatched_expected_count == aggregate.unmatched - end -end