diff --git a/app/models/metrics/algorithms.rb b/app/models/metrics/algorithms.rb new file mode 100644 index 0000000..6343081 --- /dev/null +++ b/app/models/metrics/algorithms.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: metrics_algorithms +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +module Metrics + # Algorithms aggregates statistics for matches for all SearchEvents + class Algorithms < ApplicationRecord + self.table_name = 'metrics_algorithms' + + # generate metrics data about SearchEvents matches + # + # @note This is expected to only be run once per month per type of aggregation (once with no month supplied, once + # with a month supplied), ideally at the beginning of the following month to ensure as + # accurate as possible statistics. Running further from the month in question will work, but matches will use the + # current versions of all algorithms which may not match the algorithm in place during the month the SearchEvent + # occurred. + # @note We don't currently prevent this running more than once per month per type of aggregation. + # @param month [DateTime] A DateTime object within the `month` to be generated. Defaults to nil will runs is how + # total algorithm statistics are created. + # @example + # # Generate metrics for all SearchEvents + # Metrics::Algorithms.new.generate + # + # # Generate metrics for all SearchEvents last month + # Metrics::Algorithms.new.generate(1.month.ago) + # @return [Metrics::Algorithms] The created Metrics::Algorithms object. + def generate(month = nil) + matches = if month.present? + count_matches(SearchEvent.single_month(month).includes(:term)) + else + count_matches(SearchEvent.all.includes(:term)) + end + Metrics::Algorithms.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn], + pmid: matches[:pmid], unmatched: matches[:unmatched]) + end + + # Counts matches supplied events + # + # @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to + # expand to handle additional match types. + # @param events [Array of SearchEvents] An array of SearchEvents to check for matches. + # @return [Hash] A Hash with keys for each known algorithm and the count of matched SearchEvents. + def count_matches(events) + matches = Hash.new(0) + known_ids = %i[unmatched pmid isbn issn doi] + + events.each do |event| + ids = StandardIdentifiers.new(event.term.phrase) + + matches[:unmatched] += 1 if ids.identifiers.blank? + + known_ids.each do |id| + matches[id] += 1 if ids.identifiers[id].present? + end + end + + matches + end + end +end diff --git a/app/models/search_event.rb b/app/models/search_event.rb index 46cae56..8fe5a5f 100644 --- a/app/models/search_event.rb +++ b/app/models/search_event.rb @@ -10,8 +10,16 @@ # created_at :datetime not null # updated_at :datetime not null # + +# SearchEvent represents an instance of a logged search Term class SearchEvent < ApplicationRecord belongs_to :term validates :source, presence: true + + # :single_month filters to requested month + # + # @param month [DateTime] A DateTime object within the `month` to be filtered. + # @return [Array] All SearchEvents for the supplied `month`. + scope :single_month, ->(month) { where(created_at: month.beginning_of_month..month.end_of_month) } end diff --git a/app/models/standard_identifiers.rb b/app/models/standard_identifiers.rb index b2b383f..5588625 100644 --- a/app/models/standard_identifiers.rb +++ b/app/models/standard_identifiers.rb @@ -41,7 +41,7 @@ def term_patterns def strip_invalid_issns return unless @identifiers[:issn] - @identifiers[:issn] = nil unless validate_issn(@identifiers[:issn]) + @identifiers.delete(:issn) unless validate_issn(@identifiers[:issn]) end # validate_issn is only called when the regex for an ISSN has indicated an ISSN diff --git a/config/environments/development.rb b/config/environments/development.rb index 2e7fb48..4eec57e 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -1,4 +1,4 @@ -require "active_support/core_ext/integer/time" +require 'active_support/core_ext/integer/time' Rails.application.configure do # Settings specified here will take precedence over those in config/application.rb. @@ -19,13 +19,13 @@ # Enable/disable caching. By default caching is disabled. # Run rails dev:cache to toggle caching. - if Rails.root.join("tmp/caching-dev.txt").exist? + if Rails.root.join('tmp/caching-dev.txt').exist? config.action_controller.perform_caching = true config.action_controller.enable_fragment_cache_logging = true config.cache_store = :memory_store config.public_file_server.headers = { - "Cache-Control" => "public, max-age=#{2.days.to_i}" + 'Cache-Control' => "public, max-age=#{2.days.to_i}" } else config.action_controller.perform_caching = false @@ -73,4 +73,8 @@ # Raise error when a before_action's only/except options reference missing actions config.action_controller.raise_on_missing_callback_actions = true + + # Local logging overrides + config.logger = Logger.new(STDOUT) + config.log_level = :debug end diff --git a/db/migrate/20240621132136_create_metrics_algorithms.rb b/db/migrate/20240621132136_create_metrics_algorithms.rb new file mode 100644 index 0000000..be6ef1a --- /dev/null +++ b/db/migrate/20240621132136_create_metrics_algorithms.rb @@ -0,0 +1,13 @@ +class CreateMetricsAlgorithms < ActiveRecord::Migration[7.1] + def change + create_table :metrics_algorithms do |t| + t.date :month + t.integer :doi + t.integer :issn + t.integer :isbn + t.integer :pmid + t.integer :unmatched + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 2ac49e5..384ff92 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,18 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2023_10_19_191933) do +ActiveRecord::Schema[7.1].define(version: 2024_06_21_132136) do + create_table "metrics_algorithms", force: :cascade do |t| + t.date "month" + t.integer "doi" + t.integer "issn" + t.integer "isbn" + t.integer "pmid" + t.integer "unmatched" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "search_events", force: :cascade do |t| t.integer "term_id" t.string "source" diff --git a/test/fixtures/search_events.yml b/test/fixtures/search_events.yml index eb48a8b..3bae084 100644 --- a/test/fixtures/search_events.yml +++ b/test/fixtures/search_events.yml @@ -15,3 +15,19 @@ timdex_cool: bento_hi: term: hi source: bento +current_month_pmid: + term: pmid_38908367 + source: test +old_month_pmid: + term: pmid_38908367 + source: test + created_at: <%= 1.year.ago %> +current_month_issn: + term: issn_1075_8623 + source: test +current_month_doi: + term: doi + source: test +current_month_isbn: + term: isbn_9781319145446 + source: test diff --git a/test/fixtures/terms.yml b/test/fixtures/terms.yml index 128b327..6e49cf6 100644 --- a/test/fixtures/terms.yml +++ b/test/fixtures/terms.yml @@ -13,3 +13,15 @@ cool: hi: phrase: hello world + +pmid_38908367: + phrase: 'TERT activation targets DNA methylation and multiple aging hallmarks. Shim HS, et al. Cell. 2024. PMID: 38908367' + +issn_1075_8623: + phrase: 1075-8623 + +doi: + phrase: '10.1016/j.physio.2010.12.004' + +isbn_9781319145446: + phrase: 'Sadava, D. E., D. M. Hillis, et al. Life: The Science of Biology. 11th ed. W. H. Freeman, 2016. ISBN: 9781319145446' diff --git a/test/models/metrics/algorithms_test.rb b/test/models/metrics/algorithms_test.rb new file mode 100644 index 0000000..fa3252d --- /dev/null +++ b/test/models/metrics/algorithms_test.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: metrics_algorithms +# +# id :integer not null, primary key +# month :date +# doi :integer +# issn :integer +# isbn :integer +# pmid :integer +# unmatched :integer +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class Algorithms < ActiveSupport::TestCase + # Monthlies + test 'dois counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.doi == 1 + end + + test 'issns counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.issn == 1 + end + + test 'isbns counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.pmid == 1 + end + + test 'unmatched counts are included are included in monthly aggregation' do + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data for monthly' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = Metrics::Algorithms.new.generate(DateTime.now) + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end + + # Total + test 'dois counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.doi == 1 + end + + test 'issns counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.issn == 1 + end + + test 'isbns counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.isbn == 1 + end + + test 'pmids counts are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.pmid == 2 + end + + test 'unmatched counts are included are included in total aggregation' do + aggregate = Metrics::Algorithms.new.generate + assert aggregate.unmatched == 2 + end + + test 'creating lots of searchevents leads to correct data for total' do + # drop all searchevents to make math easier and minimize fragility over time as more fixtures are created + SearchEvent.delete_all + + doi_expected_count = rand(1...100) + doi_expected_count.times do + SearchEvent.create(term: terms(:doi), source: 'test') + end + + issn_expected_count = rand(1...100) + issn_expected_count.times do + SearchEvent.create(term: terms(:issn_1075_8623), source: 'test') + end + + isbn_expected_count = rand(1...100) + isbn_expected_count.times do + SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test') + end + + pmid_expected_count = rand(1...100) + pmid_expected_count.times do + SearchEvent.create(term: terms(:pmid_38908367), source: 'test') + end + + unmatched_expected_count = rand(1...100) + unmatched_expected_count.times do + SearchEvent.create(term: terms(:hi), source: 'test') + end + + aggregate = Metrics::Algorithms.new.generate + + assert doi_expected_count == aggregate.doi + assert issn_expected_count == aggregate.issn + assert isbn_expected_count == aggregate.isbn + assert pmid_expected_count == aggregate.pmid + assert unmatched_expected_count == aggregate.unmatched + end +end diff --git a/test/models/search_event_test.rb b/test/models/search_event_test.rb index dd87810..ec921d2 100644 --- a/test/models/search_event_test.rb +++ b/test/models/search_event_test.rb @@ -28,4 +28,14 @@ class SearchEventTest < ActiveSupport::TestCase s.source = nil refute(s.valid?) end + + test 'monthly scope returns requested month of SearchEvents' do + assert SearchEvent.all.include?(search_events(:current_month_pmid)) + assert SearchEvent.single_month(Time.now).include?(search_events(:current_month_pmid)) + end + + test 'monthly scope does not return SearchEvents outside the requested month' do + assert SearchEvent.all.include?(search_events(:old_month_pmid)) + refute SearchEvent.single_month(Time.now).include?(search_events(:old_month_pmid)) + end end diff --git a/test/test_helper.rb b/test/test_helper.rb index f55fa99..9e51c06 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -24,6 +24,14 @@ class TestCase # Run tests in parallel with specified workers parallelize(workers: :number_of_processors) + parallelize_setup do |worker| + SimpleCov.command_name "#{SimpleCov.command_name}-#{worker}" + end + + parallelize_teardown do |worker| + SimpleCov.result + end + # Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order. fixtures :all