diff --git a/Gemfile b/Gemfile index 8eeffd0..d077ee0 100644 --- a/Gemfile +++ b/Gemfile @@ -86,6 +86,8 @@ group :production do end group :development, :test do + gem 'awesome_print' + # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem gem 'debug', platforms: %i[mri windows] diff --git a/Gemfile.lock b/Gemfile.lock index 0cbd8e0..e7e8e53 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -100,6 +100,7 @@ GEM rake (>= 10.4, < 14.0) ast (2.4.2) attr_required (1.0.2) + awesome_print (1.9.2) barnes (0.0.9) multi_json (~> 1) statsd-ruby (~> 1.1) @@ -475,6 +476,7 @@ PLATFORMS DEPENDENCIES administrate (~> 0.20.1) annotate + awesome_print barnes bootsnap cancancan diff --git a/app/models/detector/bulk_checker.rb b/app/models/detector/bulk_checker.rb new file mode 100644 index 0000000..5e7841a --- /dev/null +++ b/app/models/detector/bulk_checker.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +class Detector + # BulkTermChecker is expected to be added to Detectors via `extend Detector::BulkTermChecker` to allow the + # singleton class to access it + # See also: `PatternChecker` for shared instance methods + module BulkChecker + # This method is intended to be used for inspecting detections during development. + # Assumptions include + # - the Class including this module implements a `detections` method (either via `attr_reader` or as a method) + # that is only populated for Terms in which it has made a detection + # - the initialize method accepts a `phrase` as a string + # @param output [boolean] optional. Defaults to false as that is the more likely scenario useful in development as + # the logger output is often what is desired. + def check_all_matches(output: false) + count = 0 + matches = [] + Term.find_each do |t| + d = new(t.phrase) + next if d.detections.blank? + + count += 1 + + matches.push [t.phrase, d.detections] + end + + if Rails.env.development? + Rails.logger.ap matches + + Rails.logger.ap "Total Terms: #{Term.count}" + Rails.logger.ap "Total Matches: #{count}" + end + + matches if output + end + end +end diff --git a/app/models/detector/citation.rb b/app/models/detector/citation.rb index 7ef5b2c..4f4132b 100644 --- a/app/models/detector/citation.rb +++ b/app/models/detector/citation.rb @@ -12,6 +12,9 @@ class Detector class Citation attr_reader :score, :subpatterns, :summary + # shared singleton methods + extend Detector::BulkChecker + # Citation patterns are regular expressions which attempt to identify structures that are part of many citations. # This object is used as part of the pattern_checker method. Some of these patterns may get promoted to the Detector # model if they prove useful beyond a Citation context. @@ -29,6 +32,8 @@ class Citation # The required score value is the threshold needed for a phrase to be officially recorded with a Detection via it's # associated Term. + # Hint: set this to 0 in development environments if you want to temporarily see all output + # of `.check_all_matches` rather than just the matches that met this threshold. REQUIRED_SCORE = 6 # Summary thresholds are used by the calculate_score method. This class counts the number of occurrences of specific @@ -69,6 +74,12 @@ def initialize(phrase) @score = calculate_score end + def detections + return unless detection? + + [@summary, @subpatterns, @score] + end + # The record method first runs all of the parsers by running the initialize method. If the resulting score is higher # than the REQUIRED_SCORE value, then a Detection is registered. # @param term [Term] diff --git a/app/models/detector/lcsh.rb b/app/models/detector/lcsh.rb index b1e0f22..134437f 100644 --- a/app/models/detector/lcsh.rb +++ b/app/models/detector/lcsh.rb @@ -9,6 +9,9 @@ class Lcsh # shared instance methods include Detector::PatternChecker + # shared singleton methods + extend Detector::BulkChecker + # For now the initialize method just needs to run the pattern checker. A space for future development would be to # write additional methods to look up the detected LCSH for more information, and to confirm that the phrase is # actually an LCSH. diff --git a/app/models/detector/pattern_checker.rb b/app/models/detector/pattern_checker.rb index dc13c23..5768993 100644 --- a/app/models/detector/pattern_checker.rb +++ b/app/models/detector/pattern_checker.rb @@ -3,6 +3,7 @@ class Detector # PatternChecker is intended to be added to Detectors via `include Detector::PatternChecker` to make # these methods available to instances of the class + # See also: `BulkTermChecker` for shared singleton methods module PatternChecker # pattern_checker iterates over all patterns defined in the calling object's `pattern` method. # diff --git a/app/models/detector/standard_identifiers.rb b/app/models/detector/standard_identifiers.rb index 7b03979..c9b0891 100644 --- a/app/models/detector/standard_identifiers.rb +++ b/app/models/detector/standard_identifiers.rb @@ -13,6 +13,9 @@ def self.table_name_prefix # shared instance methods include Detector::PatternChecker + # shared singleton methods + extend Detector::BulkChecker + # Initialization process will run pattern checkers and strip invalid ISSN detections. # @param phrase String. Often a `Term.phrase`. # @return Nothing intentional. Data is written to Hash `@detections` during processing. diff --git a/test/models/detector/bulk_checker_test.rb b/test/models/detector/bulk_checker_test.rb new file mode 100644 index 0000000..92ad967 --- /dev/null +++ b/test/models/detector/bulk_checker_test.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'test_helper' + +class Detector + class CitationTest < ActiveSupport::TestCase + test 'citation_bulk_checker' do + bulk = Detector::Citation.check_all_matches(output: true) + + assert_equal(1, bulk.count) + end + + test 'journal_bulk_checker' do + skip 'Detector::Journal does not yet support bulk_checker' + end + + test 'lcsh_bulk_checker' do + bulk = Detector::Lcsh.check_all_matches(output: true) + + assert_equal(1, bulk.count) + end + + test 'standard_identifier_bulk_checker' do + bulk = Detector::StandardIdentifiers.check_all_matches(output: true) + + assert_equal(5, bulk.count) + end + + test 'suggested_resources_bulk_checker' do + skip 'Detector::SuggestedResources does not yet support bulk_checker' + end + end +end diff --git a/test/models/detector/citation_test.rb b/test/models/detector/citation_test.rb index 900a84c..008dbf3 100644 --- a/test/models/detector/citation_test.rb +++ b/test/models/detector/citation_test.rb @@ -245,5 +245,20 @@ class CitationTest < ActiveSupport::TestCase assert_equal detection_count + 1, Detection.count end end + + test 'detections returns nil when score is lower than configured' do + result = Detector::Citation.new('nothing here') + + assert_equal 0, result.score + assert_nil result.detections + end + + test 'detections returns expected array when score is higher than configured' do + result = Detector::Citation.new(terms('citation').phrase) + + assert_equal result.summary, result.detections[0] + assert_equal result.subpatterns, result.detections[1] + assert_equal result.score, result.detections[2] + end end end