From a5953c9156ef8592dc420d7c7806624880ddd32b Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Mon, 9 Sep 2024 14:56:40 -0400 Subject: [PATCH] Implement Detector, Category, DetectorCategory ** Why are these changes being introduced: Our chosen architecture calls for a set of models that will comprise a sort of "knowledge graph", which TACOS will consult during the categorization process. This includes classes for Category, Detector, and a linking DetectorCategory class. The Detector and DetectorCategory classes will each define a confidence value. ** Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/tco-82 ** How does this address that need: This defines those classes. The migration includes the creation of the needed records for each class: Three category records, six detectors, and five records which link between them. There are currently no detectors which map to two of the categories, although we have talked about those being needed. Additionally, one detector, SuggestedResource, is unique in that specific records will count toward each category - so it isn't appropriate to have a link record which uniformly connects to only one category. ** Document any side effects to this change: We previously had a Detector model file, but it was set as a Module in order to provide a namespace for its subclasses. This has been updated to be just a class, which impacts the dashboard and test files. Also, there was a method to define a table name prefix, which has been moved from the Detector file to the subclass files. --- .../detector/suggested_resource_dashboard.rb | 2 +- app/models/category.rb | 16 +++++++ app/models/detector.rb | 17 ++++++-- app/models/detector/journal.rb | 6 ++- app/models/detector/standard_identifiers.rb | 6 ++- app/models/detector/suggested_resource.rb | 6 ++- app/models/detector_category.rb | 17 ++++++++ .../20240909183413_create_categories.rb | 42 +++++++++++++++++++ db/schema.rb | 28 ++++++++++++- test/models/detector/journal_test.rb | 4 +- .../detector/standard_identifiers_test.rb | 4 +- .../detector/suggested_resource_test.rb | 2 +- 12 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 app/models/category.rb create mode 100644 app/models/detector_category.rb create mode 100644 db/migrate/20240909183413_create_categories.rb diff --git a/app/dashboards/detector/suggested_resource_dashboard.rb b/app/dashboards/detector/suggested_resource_dashboard.rb index 83c6f6a..d866fdb 100644 --- a/app/dashboards/detector/suggested_resource_dashboard.rb +++ b/app/dashboards/detector/suggested_resource_dashboard.rb @@ -2,7 +2,7 @@ require 'administrate/base_dashboard' -module Detector +class Detector class SuggestedResourceDashboard < Administrate::BaseDashboard # ATTRIBUTE_TYPES # a hash that describes the type of each of the model's fields. diff --git a/app/models/category.rb b/app/models/category.rb new file mode 100644 index 0000000..fe23b04 --- /dev/null +++ b/app/models/category.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: categories +# +# id :integer not null, primary key +# name :string +# description :text +# created_at :datetime not null +# updated_at :datetime not null +# +class Category < ApplicationRecord + has_many :detector_categories, dependent: :destroy + has_many :detectors, through: :detector_categories +end diff --git a/app/models/detector.rb b/app/models/detector.rb index 5694035..8db4df0 100644 --- a/app/models/detector.rb +++ b/app/models/detector.rb @@ -1,9 +1,18 @@ # frozen_string_literal: true +# == Schema Information +# +# Table name: detectors +# +# id :integer not null, primary key +# name :string +# confidence :float +# created_at :datetime not null +# updated_at :datetime not null +# # Detectors are classes that implement various algorithms that allow us to identify patterns # within search terms. -module Detector - def self.table_name_prefix - 'detector_' - end +class Detector < ApplicationRecord + has_many :detector_categories, dependent: :destroy + has_many :categories, through: :detector_categories end diff --git a/app/models/detector/journal.rb b/app/models/detector/journal.rb index ebcf50b..95f720a 100644 --- a/app/models/detector/journal.rb +++ b/app/models/detector/journal.rb @@ -10,12 +10,16 @@ # created_at :datetime not null # updated_at :datetime not null # -module Detector +class Detector # Detector::Journal stores information about academic journals loaded from external sources to allow us to check our # incoming Terms against these information class Journal < ApplicationRecord before_save :downcase_fields! + def self.table_name_prefix + 'detector_' + end + # Identify journals in which the incoming phrase matches a Journal.name exactly # # @note We always store the Journal.name downcased, so we should also always downcase the phrase diff --git a/app/models/detector/standard_identifiers.rb b/app/models/detector/standard_identifiers.rb index 2a7f644..fe25b80 100644 --- a/app/models/detector/standard_identifiers.rb +++ b/app/models/detector/standard_identifiers.rb @@ -1,11 +1,15 @@ # frozen_string_literal: true -module Detector +class Detector # Detector::StandardIdentifiers detects the identifiers DOI, ISBN, ISSN, PMID. # See /docs/reference/pattern_detection_and_enhancement.md for details. class StandardIdentifiers attr_reader :identifiers + def self.table_name_prefix + 'detector_' + end + def initialize(term) @identifiers = {} term_pattern_checker(term) diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index 11bd296..c257989 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -15,13 +15,17 @@ require 'stringex/core_ext' -module Detector +class Detector # Detector::SuggestedResource stores custom hints that we want to send to the # user in response to specific strings. For example, a search for "web of # science" should be met with our custom login link to Web of Science via MIT. class SuggestedResource < ApplicationRecord before_save :update_fingerprint + def self.table_name_prefix + 'detector_' + end + # This exists for the before_save lifecycle hook to call the calculate_fingerprint method, to ensure that these # records always have a correctly-calculated fingerprint. It has no arguments and returns nothing. def update_fingerprint diff --git a/app/models/detector_category.rb b/app/models/detector_category.rb new file mode 100644 index 0000000..d322332 --- /dev/null +++ b/app/models/detector_category.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: detector_categories +# +# id :integer not null, primary key +# detector_id :integer not null +# category_id :integer not null +# confidence :float +# created_at :datetime not null +# updated_at :datetime not null +# +class DetectorCategory < ApplicationRecord + belongs_to :category + belongs_to :detector +end diff --git a/db/migrate/20240909183413_create_categories.rb b/db/migrate/20240909183413_create_categories.rb new file mode 100644 index 0000000..8b36f7b --- /dev/null +++ b/db/migrate/20240909183413_create_categories.rb @@ -0,0 +1,42 @@ +class CreateCategories < ActiveRecord::Migration[7.1] + def change + create_table :detectors do |t| + t.string :name + t.float :confidence + + t.timestamps + end + + create_table :categories do |t| + t.string :name + t.text :description + + t.timestamps + end + + create_table :detector_categories do |t| + t.belongs_to :detector, null: false, foreign_key: true + t.belongs_to :category, null: false, foreign_key: true + t.float :confidence + + t.timestamps + end + + Detector.create(name: 'DOI', confidence: 0.95) + Detector.create(name: 'ISBN', confidence: 0.8) + Detector.create(name: 'ISSN', confidence: 0.6) + Detector.create(name: 'PMID', confidence: 0.95) + Detector.create(name: 'Journal', confidence: 0.2) + Detector.create(name: 'SuggestedResource', confidence: 0.95) + + Category.create(name: 'Informational', description: 'A type of search where the user is looking for broad information, rather than an individual item. Also known as "open-ended" or "topical".') + Category.create(name: 'Navigational', description: 'A type of search where the user has a location in mind, and wants to go there. In library discovery, this should mean a URL that will not be in the searched index.') + Category.create(name: 'Transactional', description: 'A type of search where the user has an item in mind, and wants to get that item. Also known as "known-item".') + + DetectorCategory.create(detector: Detector.find_by(name: 'DOI'), category: Category.find_by(name: 'Transactional'), confidence: 0.95) + DetectorCategory.create(detector: Detector.find_by(name: 'ISBN'), category: Category.find_by(name: 'Transactional'), confidence: 0.95) + DetectorCategory.create(detector: Detector.find_by(name: 'ISSN'), category: Category.find_by(name: 'Transactional'), confidence: 0.95) + DetectorCategory.create(detector: Detector.find_by(name: 'PMID'), category: Category.find_by(name: 'Transactional'), confidence: 0.95) + DetectorCategory.create(detector: Detector.find_by(name: 'Journal'), category: Category.find_by(name: 'Transactional'), confidence: 0.5) + end +end diff --git a/db/schema.rb b/db/schema.rb index 5be5861..f2c610f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,24 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_08_13_181057) do +ActiveRecord::Schema[7.1].define(version: 2024_09_09_183413) do + create_table "categories", force: :cascade do |t| + t.string "name" + t.text "description" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + + create_table "detector_categories", force: :cascade do |t| + t.integer "detector_id", null: false + t.integer "category_id", null: false + t.float "confidence" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["category_id"], name: "index_detector_categories_on_category_id" + t.index ["detector_id"], name: "index_detector_categories_on_detector_id" + end + create_table "detector_journals", force: :cascade do |t| t.string "name" t.json "additional_info" @@ -30,6 +47,13 @@ t.index ["phrase"], name: "index_detector_suggested_resources_on_phrase", unique: true end + create_table "detectors", force: :cascade do |t| + t.string "name" + t.float "confidence" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "metrics_algorithms", force: :cascade do |t| t.date "month" t.integer "doi" @@ -69,4 +93,6 @@ t.index ["uid"], name: "index_users_on_uid", unique: true end + add_foreign_key "detector_categories", "categories" + add_foreign_key "detector_categories", "detectors" end diff --git a/test/models/detector/journal_test.rb b/test/models/detector/journal_test.rb index ccfba12..9b5ba40 100644 --- a/test/models/detector/journal_test.rb +++ b/test/models/detector/journal_test.rb @@ -12,7 +12,7 @@ # require 'test_helper' -module Detector +class Detector class JournalTest < ActiveSupport::TestCase test 'exact term match on journal name' do expected = detector_journals('the_new_england_journal_of_medicine') @@ -57,4 +57,4 @@ class JournalTest < ActiveSupport::TestCase assert_equal(mixed_case.downcase, actual.name) end end -end +end \ No newline at end of file diff --git a/test/models/detector/standard_identifiers_test.rb b/test/models/detector/standard_identifiers_test.rb index bee47d8..36c50cd 100644 --- a/test/models/detector/standard_identifiers_test.rb +++ b/test/models/detector/standard_identifiers_test.rb @@ -2,7 +2,7 @@ require 'test_helper' -module Detector +class Detector class StandardIdentifiersTest < ActiveSupport::TestCase test 'ISBN detected in a string' do actual = Detector::StandardIdentifiers.new('test 978-3-16-148410-0 test').identifiers @@ -191,4 +191,4 @@ class StandardIdentifiersTest < ActiveSupport::TestCase end end end -end +end \ No newline at end of file diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index 815f497..efbb00a 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -14,7 +14,7 @@ # require 'test_helper' -module Detector +class Detector class SuggestedResourceTest < ActiveSupport::TestCase test 'fingerprints are generated automatically' do resource = {