Skip to content

Commit

Permalink
Work in progress to include SuggestedResource matches in metrics
Browse files Browse the repository at this point in the history
Now includes database migration

Also includes tests
  • Loading branch information
matt-bernhardt committed Aug 13, 2024
1 parent b8a272d commit 136efc8
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 4 deletions.
8 changes: 6 additions & 2 deletions app/models/detector/suggested_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ class SuggestedResource < ApplicationRecord
before_save :update_fingerprint

def update_fingerprint
self.fingerprint = calculate_fingerprint(phrase)
self.fingerprint = Detector::SuggestedResource.calculate_fingerprint(phrase)
end

# This implements the OpenRefine fingerprinting algorithm. See
# https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint
def calculate_fingerprint(old_phrase)
def self.calculate_fingerprint(old_phrase)
modified_phrase = old_phrase
modified_phrase = modified_phrase.strip
modified_phrase = modified_phrase.downcase
Expand Down Expand Up @@ -76,5 +76,9 @@ def self.bulk_replace(input)
record.save
end
end

def self.full_term_match(phrase)
SuggestedResource.where(fingerprint: calculate_fingerprint(phrase))
end
end
end
10 changes: 9 additions & 1 deletion app/models/metrics/algorithms.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def generate(month = nil)
end
Metrics::Algorithms.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn],
pmid: matches[:pmid], journal_exact: matches[:journal_exact],
suggested_resource_exact: matches[:suggested_resource_exact],
unmatched: matches[:unmatched])
end

Expand Down Expand Up @@ -73,8 +74,9 @@ def count_matches(events)
def event_matches(event, matches)
ids = match_standard_identifiers(event, matches)
journal_exact = process_journals(event, matches)
suggested_resource_exact = process_suggested_resources(event, matches)

matches[:unmatched] += 1 if ids.identifiers.blank? && journal_exact.count.zero?
matches[:unmatched] += 1 if ids.identifiers.blank? && journal_exact.count.zero? && suggested_resource_exact.count.zero?
end

# Checks for StandardIdentifer matches
Expand Down Expand Up @@ -107,5 +109,11 @@ def process_journals(event, matches)
matches[:journal_exact] += 1 if journal_exact.count.positive?
journal_exact
end

def process_suggested_resources(event, matches)
suggested_resource_exact = Detector::SuggestedResource.full_term_match(event.term.phrase)
matches[:suggested_resource_exact] += 1 if suggested_resource_exact.count.positive?
suggested_resource_exact
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddSuggestedResourceExactToMetricsAlgorithm < ActiveRecord::Migration[7.1]
def change
add_column :metrics_algorithms, :suggested_resource_exact, :integer
end
end
3 changes: 2 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions test/fixtures/search_events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,10 @@ old_month_nature_medicine:
term: journal_nature_medicine
source: test
created_at: <%= 1.year.ago %>
suggested_resource_jstor:
term: suggested_resource_jstor
source: test
old_suggested_resource_jstor:
term: suggested_resource_jstor
source: test
created_at: <%= 1.year.ago %>
3 changes: 3 additions & 0 deletions test/fixtures/terms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ isbn_9781319145446:

journal_nature_medicine:
phrase: 'nature medicine'

suggested_resource_jstor:
phrase: 'jstor'
18 changes: 18 additions & 0 deletions test/models/metrics/algorithms_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class Algorithms < ActiveSupport::TestCase
assert_equal 1, aggregate.journal_exact
end

test 'suggested_resource exact counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)

assert_equal 1, aggregate.suggested_resource_exact
end

test 'unmatched counts are included are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)

Expand Down Expand Up @@ -124,6 +130,12 @@ class Algorithms < ActiveSupport::TestCase
assert_equal 2, aggregate.journal_exact
end

test 'suggested_resource exact counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate

assert_equal 2, aggregate.suggested_resource_exact
end

test 'unmatched counts are included are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate

Expand Down Expand Up @@ -159,6 +171,11 @@ class Algorithms < ActiveSupport::TestCase
SearchEvent.create(term: terms(:journal_nature_medicine), source: 'test')
end

suggested_resource_exact_count = rand(1...100)
suggested_resource_exact_count.times do
SearchEvent.create(term: terms(:suggested_resource_jstor), source: 'test')
end

unmatched_expected_count = rand(1...100)
unmatched_expected_count.times do
SearchEvent.create(term: terms(:hi), source: 'test')
Expand All @@ -171,6 +188,7 @@ class Algorithms < ActiveSupport::TestCase
assert_equal isbn_expected_count, aggregate.isbn
assert_equal pmid_expected_count, aggregate.pmid
assert_equal journal_exact_count, aggregate.journal_exact
assert_equal suggested_resource_exact_count, aggregate.suggested_resource_exact
assert_equal unmatched_expected_count, aggregate.unmatched
end
end

0 comments on commit 136efc8

Please sign in to comment.