Skip to content

Commit

Permalink
Merge pull request #162 from MITLibraries/tco-110-journals
Browse files Browse the repository at this point in the history
Split Detector::Journals model into two, extending BulkChecker into the detection model
  • Loading branch information
matt-bernhardt authored Dec 20, 2024
2 parents 351e5e3 + ac9a31f commit 143d467
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 68 deletions.
43 changes: 14 additions & 29 deletions app/models/detector/journal.rb
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: detector_journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#
class Detector
# Detector::Journal stores information about academic journals loaded from external sources to allow us to check our
# incoming Terms against these information
class Journal < ApplicationRecord
before_save :downcase_fields!
# Detector::Journal handles the comparison between incoming Term records and our known list of academic journals
# (which are managed by the separate Journal model).
class Journal
attr_reader :detections

def self.table_name_prefix
'detector_'
# shared singleton methods
extend Detector::BulkChecker

def initialize(phrase)
@detections = Detector::Journal.full_term_match(phrase)
end

# Identify journals in which the incoming phrase matches a Journal.name exactly
Expand All @@ -30,9 +23,9 @@ def self.table_name_prefix
#
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
#
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
# @return [Set of Journal] A set of ActiveRecord Journal records.
def self.full_term_match(phrase)
Journal.where(name: phrase.downcase)
::Journal.where(name: phrase.downcase)
end

# Identify journals in which the incoming phrase contains one or more Journal names
Expand All @@ -41,12 +34,12 @@ def self.full_term_match(phrase)
#
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
#
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
# @return [Set of Journal] A set of ActiveRecord Journal records.
def self.partial_term_match(phrase)
Journal.all.select { |journal| phrase.downcase.include?(journal.name) }
::Journal.all.select { |journal| phrase.downcase.include?(journal.name) }
end

# Look up any matching Detector::Journal records, building on the full_term_match method. If a match is found, a
# Look up any matching Journal records, building on the full_term_match method. If a match is found, a
# Detection record is created to indicate this success.
#
# @note This does not care whether multiple matching journals are detected. If _any_ match is found, a Detection
Expand All @@ -65,13 +58,5 @@ def self.record(term)

nil
end

private

# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json
def downcase_fields!
name.downcase!
end
end
end
28 changes: 28 additions & 0 deletions app/models/journal.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#

# Journal is the list of academic journals which are known to TACOS. This list of records is referred to by the
# Detector::Journal model in order to determine whether a given term matches a known journal. The names of these
# journals are stored in lowercase, which matches how the Detector::Journal processes incoming terms, in order to
# prevent capitalization differences resulting in a false negative.
class Journal < ApplicationRecord
before_save :downcase_fields!

private

# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json
def downcase_fields!
name.downcase!
end
end
11 changes: 11 additions & 0 deletions db/migrate/20241218192753_create_journals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateJournals < ActiveRecord::Migration[7.1]
def change
create_table :journals do |t|
t.string :name
t.json :additional_info

t.timestamps
end
add_index :journals, :name
end
end
15 changes: 15 additions & 0 deletions db/migrate/20241219144452_drop_detector_journals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class DropDetectorJournals < ActiveRecord::Migration[7.1]
def up
drop_table :detector_journals
end

def down
create_table :detector_journals do |t|
t.string :name
t.json :additional_info

t.timestamps
end
add_index :detector_journals, :name
end
end
18 changes: 9 additions & 9 deletions db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions docs/reference/classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ classDiagram
Detector "1" --> "0..*" DetectorCategory
DetectorJournal -- Journal : references
Confirmation --> Term
Confirmation --> Category
User --> Confirmation : provides many
Expand Down Expand Up @@ -110,6 +112,11 @@ classDiagram
DetectorSuggestedResource: record()
DetectorSuggestedResource: update_fingerprint()
class Journal
Journal: +Integer id
Journal: +String name
Journal: +JSON additional_info
class Confirmation
Confirmation: +Integer id
Confirmation: +Integer user_id
Expand All @@ -134,6 +141,7 @@ classDiagram
class DetectorLcsh["Detector::Lcsh"]
class DetectorStandardIdentifier["Detector::StandardIdentifiers"]
class DetectorSuggestedResource["Detector::SuggestedResource"]
class Journal
}
namespace UserActivity {
Expand All @@ -153,6 +161,7 @@ classDiagram
style DetectorLcsh fill:#000,stroke:#fc8d62,color:#fc8d62
style DetectorStandardIdentifier fill:#000,stroke:#fc8d62,color:#fc8d62
style DetectorSuggestedResource fill:#000,stroke:#fc8d62,color:#fc8d62
style Journal fill:#000,stroke:#fc8d62,color:#fc8d62
style Categorization fill:#000,stroke:#8da0cb,color:#8da0cb,stroke-dasharray: 3 5;
style Detection fill:#000,stroke:#8da0cb,color:#8da0cb,stroke-dasharray: 3 5;
Expand Down
14 changes: 7 additions & 7 deletions lib/tasks/journals.rake
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,18 @@ namespace :journals do
end

# Delete all journals. We do this to simplify the loader process to avoid consideration of updates/deletes.
Detector::Journal.delete_all
Journal.delete_all

# not ideal, we should consider streaming the file rather than loading it fully into memory
json = JSON.parse(data)

json['core'].each do |journal|
Detector::Journal.create(name: journal['title'],
additional_info: { issns: journal['issns'],
publisher: journal['publisher'],
alternate_titles: journal['alternate_titles'],
type: journal['type'],
abbreviated_title: journal['abbreviated_title'] })
Journal.create(name: journal['title'],
additional_info: { issns: journal['issns'],
publisher: journal['publisher'],
alternate_titles: journal['alternate_titles'],
type: journal['type'],
abbreviated_title: journal['abbreviated_title'] })
end
end
end
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# == Schema Information
#
# Table name: detector_journals
# Table name: journals
#
# id :integer not null, primary key
# name :string
Expand Down
4 changes: 3 additions & 1 deletion test/models/detector/bulk_checker_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ class CitationTest < ActiveSupport::TestCase
end

test 'journal_bulk_checker' do
skip 'Detector::Journal does not yet support bulk_checker'
bulk = Detector::Journal.check_all_matches(output: true)

assert_equal(1, bulk.count)
end

test 'lcsh_bulk_checker' do
Expand Down
23 changes: 2 additions & 21 deletions test/models/detector/journal_test.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,19 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: detector_journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#
require 'test_helper'

class Detector
class JournalTest < ActiveSupport::TestCase
test 'exact term match on journal name' do
expected = detector_journals('the_new_england_journal_of_medicine')
expected = journals('the_new_england_journal_of_medicine')
actual = Detector::Journal.full_term_match('the new england journal of medicine')

assert_equal 1, actual.count
assert_equal(expected, actual.first)
end

test 'mixed case exact term match on journal name' do
expected = detector_journals('the_new_england_journal_of_medicine')
expected = journals('the_new_england_journal_of_medicine')
actual = Detector::Journal.full_term_match('The New England Journal of Medicine')

assert_equal 1, actual.count
Expand All @@ -48,15 +38,6 @@ class JournalTest < ActiveSupport::TestCase
assert_equal 2, actual.count
end

test 'mixed titles are downcased when saved' do
mixed_case = 'ThIs Is A tItLe'
actual = Detector::Journal.create(name: mixed_case)
actual.reload

assert_not_equal(mixed_case, actual.name)
assert_equal(mixed_case.downcase, actual.name)
end

test 'record does relevant work' do
detection_count = Detection.count
t = terms('journal_nature_medicine')
Expand Down
24 changes: 24 additions & 0 deletions test/models/journal_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#
require 'test_helper'

class JournalTest < ActiveSupport::TestCase
test 'mixed titles are downcased when saved' do
mixed_case = 'ThIs Is A tItLe'
actual = Journal.create(name: mixed_case)
actual.reload

assert_not_equal(mixed_case, actual.name)
assert_equal(mixed_case.downcase, actual.name)
end
end

0 comments on commit 143d467

Please sign in to comment.