From 1b3ba42f6cb2838f19efddb1244f3eeacaa438d9 Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Tue, 16 Jul 2024 10:50:05 -0400 Subject: [PATCH 1/5] Basic rails model generation bin/rails generate model Detector::SuggestedResource title:string url:string phrase:string:uniq fingerprint:string:uniq --- app/models/detector/suggested_resource.rb | 2 ++ ...850_create_detector_suggested_resources.rb | 14 +++++++++++++ db/schema.rb | 13 +++++++++++- .../fixtures/detector/suggested_resources.yml | 20 +++++++++++++++++++ .../detector/suggested_resource_test.rb | 7 +++++++ 5 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 app/models/detector/suggested_resource.rb create mode 100644 db/migrate/20240716143850_create_detector_suggested_resources.rb create mode 100644 test/fixtures/detector/suggested_resources.yml create mode 100644 test/models/detector/suggested_resource_test.rb diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb new file mode 100644 index 0000000..7675cbc --- /dev/null +++ b/app/models/detector/suggested_resource.rb @@ -0,0 +1,2 @@ +class Detector::SuggestedResource < ApplicationRecord +end diff --git a/db/migrate/20240716143850_create_detector_suggested_resources.rb b/db/migrate/20240716143850_create_detector_suggested_resources.rb new file mode 100644 index 0000000..b13e90b --- /dev/null +++ b/db/migrate/20240716143850_create_detector_suggested_resources.rb @@ -0,0 +1,14 @@ +class CreateDetectorSuggestedResources < ActiveRecord::Migration[7.1] + def change + create_table :detector_suggested_resources do |t| + t.string :title + t.string :url + t.string :phrase + t.string :fingerprint + + t.timestamps + end + add_index :detector_suggested_resources, :phrase, unique: true + add_index :detector_suggested_resources, :fingerprint, unique: true + end +end diff --git a/db/schema.rb b/db/schema.rb index 3acaf32..52c9d2d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_07_01_205444) do +ActiveRecord::Schema[7.1].define(version: 2024_07_16_143850) do create_table "detector_journals", force: :cascade do |t| t.string "name" t.json "additional_info" @@ -19,6 +19,17 @@ t.index ["name"], name: "index_detector_journals_on_name" end + create_table "detector_suggested_resources", force: :cascade do |t| + t.string "title" + t.string "url" + t.string "phrase" + t.string "fingerprint" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["fingerprint"], name: "index_detector_suggested_resources_on_fingerprint", unique: true + t.index ["phrase"], name: "index_detector_suggested_resources_on_phrase", unique: true + end + create_table "metrics_algorithms", force: :cascade do |t| t.date "month" t.integer "doi" diff --git a/test/fixtures/detector/suggested_resources.yml b/test/fixtures/detector/suggested_resources.yml new file mode 100644 index 0000000..c5ace34 --- /dev/null +++ b/test/fixtures/detector/suggested_resources.yml @@ -0,0 +1,20 @@ +# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + + +one: + title: JSTOR + url: https://libguides.mit.edu/jstor + phrase: jstor + fingerprint: jstor + +two: + title: Web of Science + url: https://libguides.mit.edu/webofsci + phrase: web of science + fingerprint: of science web + +three: + title: Web of Knowledge + url: https://libguides.mit.edu/webofsci + phrase: web of knowledge + fingerprint: knowledge of web diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb new file mode 100644 index 0000000..7a4c844 --- /dev/null +++ b/test/models/detector/suggested_resource_test.rb @@ -0,0 +1,7 @@ +require "test_helper" + +class Detector::SuggestedResourceTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end From 03597ba02b757a6e29c980d53884d473d9a29275 Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Tue, 16 Jul 2024 14:30:19 -0400 Subject: [PATCH 2/5] Model annotation Output of `make annotate` --- app/models/detector/suggested_resource.rb | 12 ++++++++++++ test/fixtures/detector/suggested_resources.yml | 12 ++++++++++++ test/models/detector/suggested_resource_test.rb | 12 ++++++++++++ 3 files changed, 36 insertions(+) diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index 7675cbc..12ed72b 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -1,2 +1,14 @@ +# == Schema Information +# +# Table name: detector_suggested_resources +# +# id :integer not null, primary key +# title :string +# url :string +# phrase :string +# fingerprint :string +# created_at :datetime not null +# updated_at :datetime not null +# class Detector::SuggestedResource < ApplicationRecord end diff --git a/test/fixtures/detector/suggested_resources.yml b/test/fixtures/detector/suggested_resources.yml index c5ace34..a145aab 100644 --- a/test/fixtures/detector/suggested_resources.yml +++ b/test/fixtures/detector/suggested_resources.yml @@ -1,3 +1,15 @@ +# == Schema Information +# +# Table name: detector_suggested_resources +# +# id :integer not null, primary key +# title :string +# url :string +# phrase :string +# fingerprint :string +# created_at :datetime not null +# updated_at :datetime not null +# # Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index 7a4c844..8f43900 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -1,3 +1,15 @@ +# == Schema Information +# +# Table name: detector_suggested_resources +# +# id :integer not null, primary key +# title :string +# url :string +# phrase :string +# fingerprint :string +# created_at :datetime not null +# updated_at :datetime not null +# require "test_helper" class Detector::SuggestedResourceTest < ActiveSupport::TestCase From d8d45e5d1ca91a77f9ab36dbef8b1d3bee703cb4 Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Wed, 24 Jul 2024 16:50:24 -0400 Subject: [PATCH 3/5] Build out fingerprinting logic and tests ** Why are these changes being introduced: * With the SuggestedResource model being auto-generated and annotated in previous commits, we now need to implement the custom fingerprinting logic from the Bento application, and write some tests about that logic. ** Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/tco-22 ** How does this address that need: * The bulk of this commit is the calculate_fingerprint method in the detector::suggestedresource model. This is nearly the same as the model from Bento, with two changes: 1. We no longer preserve c++ or c#, and run the gsub command in this method. 2. The position of the .to_ascii method moves up in the order, to match the current documentation from OpenRefine. This also resolves some flaws in ordering and term handling that happened when the .to_ascii happens last. * Fixture names are made more meaningful * We add the stringex gem to get the .to_ascii method in the first place, which may end up not being needed as we continue to develop the application. For now, though, I suspect that we'll benefit from having it. * Add some tests around how the fingerprint logic operates, focusing on character manipulation and the before_save hook's impact. ** Document any side effects to this change: * The method applies the punctuation and symbol cleaning twice, because the .to_ascii method can introduce new punctuation when converting some letters. However, just running the regex once still had problems with these tests, so twice is necessary? * This also removes a comment in the fixtures that was introduced when I re-ran the annotation command. --- Gemfile | 2 + Gemfile.lock | 2 + app/models/detector/suggested_resource.rb | 36 ++++++++++++++ .../fixtures/detector/suggested_resources.yml | 7 ++- .../detector/suggested_resource_test.rb | 49 +++++++++++++++++-- 5 files changed, 89 insertions(+), 7 deletions(-) diff --git a/Gemfile b/Gemfile index c07aed4..ffe03d9 100644 --- a/Gemfile +++ b/Gemfile @@ -46,6 +46,8 @@ gem 'rack-cors' # Use Redis adapter to run Action Cable in production # gem "redis", ">= 4.0.1" +gem 'stringex' + # Use Kredis to get higher-level data types in Redis [https://github.com/rails/kredis] # gem "kredis" diff --git a/Gemfile.lock b/Gemfile.lock index 3ffa3b7..0748244 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -306,6 +306,7 @@ GEM sqlite3 (1.7.3-x86_64-linux) stimulus-rails (1.3.3) railties (>= 6.0.0) + stringex (2.8.6) stringio (3.1.1) strscan (3.1.0) thor (1.3.1) @@ -368,6 +369,7 @@ DEPENDENCIES sprockets-rails sqlite3 stimulus-rails + stringex turbo-rails tzinfo-data vcr diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index 12ed72b..f527622 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -10,5 +10,41 @@ # created_at :datetime not null # updated_at :datetime not null # + +require 'stringex/core_ext' + class Detector::SuggestedResource < ApplicationRecord + before_save :update_fingerprint + + def update_fingerprint + self.fingerprint = calculate_fingerprint + end + + # This implements the OpenRefine fingerprinting algorithm. See + # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint + def calculate_fingerprint + temp = self.phrase + temp.strip! + temp.downcase! + + # This removes all punctuation and symbol characters from the string. + temp.gsub!(/\p{P}|\p{S}/, '') + + # Normalize to ASCII (e.g. gödel and godel are liable to be intended to + # find the same thing) + temp = temp.to_ascii + + # Coercion to ASCII can introduce new symbols, so we remove those now. + temp.gsub!(/\p{P}|\p{S}/, '') + + # Tokenize + array = temp.split + + # Remove duplicates and sort + array.uniq! + array.sort! + + # Rejoin tokens + new_fingerprint = array.join(' ') + end end diff --git a/test/fixtures/detector/suggested_resources.yml b/test/fixtures/detector/suggested_resources.yml index a145aab..82c43a7 100644 --- a/test/fixtures/detector/suggested_resources.yml +++ b/test/fixtures/detector/suggested_resources.yml @@ -10,22 +10,21 @@ # created_at :datetime not null # updated_at :datetime not null # -# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html -one: +jstor: title: JSTOR url: https://libguides.mit.edu/jstor phrase: jstor fingerprint: jstor -two: +web_of_science: title: Web of Science url: https://libguides.mit.edu/webofsci phrase: web of science fingerprint: of science web -three: +web_of_knowledge: title: Web of Knowledge url: https://libguides.mit.edu/webofsci phrase: web of knowledge diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index 8f43900..6754473 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -13,7 +13,50 @@ require "test_helper" class Detector::SuggestedResourceTest < ActiveSupport::TestCase - # test "the truth" do - # assert true - # end + test 'fingerprints are generated automatically' do + resource = { + title: 'Our latest resource', + url: 'https://example.org', + phrase: 'Our latest resource' + } + + new_resource = Detector::SuggestedResource.create(resource) + + assert new_resource.fingerprint == 'latest our resource' + end + + test 'fingerprints are recalculated on save' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'A brand new phrase' + + resource.phrase = 'This is a brand new phrase' + resource.save + resource.reload + + assert resource.fingerprint == 'a brand is new phrase this' + end + + test 'fingerprints standardize characters used' do + resource = { + title: 'A wide range of characters', + url: 'https://example.org', + phrase: 'This phrase uses: WeIrD caPital letters, * (punctuation), and symbols™ like ¥€$' + } + + new_resource = Detector::SuggestedResource.create(resource) + + assert new_resource.fingerprint == 'and capital letters like phrase punctuation symbols this uses weird' + end + + test 'fingerprints coerce characters to ASCII' do + resource = { + title: 'A wide range of characters', + url: 'https://example.org', + phrase: 'а а̀ а̂ а̄ ӓ б в г ґ д ђ ѓ е ѐ е̄ е̂ ё є ж з з́ ѕ и і ї ꙇ ѝ и̂ ӣ й ј к л љ м н њ о о̀ о̂ ō ӧ п р с с́ т ћ ќ у у̀ у̂ ӯ ў ӱ ф х ц ч џ ш щ ꙏ ъ ъ̀ ы ь ѣ э ю ю̀ я' + } + + new_resource = Detector::SuggestedResource.create(resource) + + assert new_resource.fingerprint == 'a b ch d dj dz dzh e f g gh gj i ia ie io iu j k kh kj l lj m n nj o p r s sh shch t ts tsh u v y yi z zh' + end end From 76a62d69ff1c5336f77d92fff13604ede4dac9ad Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Thu, 25 Jul 2024 16:21:37 -0400 Subject: [PATCH 4/5] Rubocop-inspired fixes after informal code review --- app/models/detector/suggested_resource.rb | 57 ++++++++------ .../detector/suggested_resource_test.rb | 78 ++++++++++--------- 2 files changed, 74 insertions(+), 61 deletions(-) diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index f527622..f7c1180 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # == Schema Information # # Table name: detector_suggested_resources @@ -13,38 +15,43 @@ require 'stringex/core_ext' -class Detector::SuggestedResource < ApplicationRecord - before_save :update_fingerprint +module Detector + # Detector::SuggestedResource stores custom hints that we want to send to the + # user in response to specific strings. For example, a search for "web of + # science" should be met with our custom login link to Web of Science via MIT. + class SuggestedResource < ApplicationRecord + before_save :update_fingerprint - def update_fingerprint - self.fingerprint = calculate_fingerprint - end + def update_fingerprint + self.fingerprint = calculate_fingerprint + end - # This implements the OpenRefine fingerprinting algorithm. See - # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint - def calculate_fingerprint - temp = self.phrase - temp.strip! - temp.downcase! + # This implements the OpenRefine fingerprinting algorithm. See + # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint + def calculate_fingerprint + temp = phrase + temp.strip! + temp.downcase! - # This removes all punctuation and symbol characters from the string. - temp.gsub!(/\p{P}|\p{S}/, '') + # This removes all punctuation and symbol characters from the string. + temp.gsub!(/\p{P}|\p{S}/, '') - # Normalize to ASCII (e.g. gödel and godel are liable to be intended to - # find the same thing) - temp = temp.to_ascii + # Normalize to ASCII (e.g. gödel and godel are liable to be intended to + # find the same thing) + temp = temp.to_ascii - # Coercion to ASCII can introduce new symbols, so we remove those now. - temp.gsub!(/\p{P}|\p{S}/, '') + # Coercion to ASCII can introduce new symbols, so we remove those now. + temp.gsub!(/\p{P}|\p{S}/, '') - # Tokenize - array = temp.split + # Tokenize + array = temp.split - # Remove duplicates and sort - array.uniq! - array.sort! + # Remove duplicates and sort + array.uniq! + array.sort! - # Rejoin tokens - new_fingerprint = array.join(' ') + # Rejoin tokens + array.join(' ') + end end end diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index 6754473..b981c40 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # == Schema Information # # Table name: detector_suggested_resources @@ -10,53 +12,57 @@ # created_at :datetime not null # updated_at :datetime not null # -require "test_helper" +require 'test_helper' -class Detector::SuggestedResourceTest < ActiveSupport::TestCase - test 'fingerprints are generated automatically' do - resource = { - title: 'Our latest resource', - url: 'https://example.org', - phrase: 'Our latest resource' - } +module Detector + class SuggestedResourceTest < ActiveSupport::TestCase + test 'fingerprints are generated automatically' do + resource = { + title: 'Our latest resource', + url: 'https://example.org', + phrase: 'Our latest resource' + } - new_resource = Detector::SuggestedResource.create(resource) + new_resource = Detector::SuggestedResource.create(resource) - assert new_resource.fingerprint == 'latest our resource' - end + assert new_resource.fingerprint == 'latest our resource' + end - test 'fingerprints are recalculated on save' do - resource = detector_suggested_resources('jstor') - refute resource.fingerprint == 'A brand new phrase' + test 'fingerprints are recalculated on save' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'A brand new phrase' - resource.phrase = 'This is a brand new phrase' - resource.save - resource.reload + resource.phrase = 'This is a brand new phrase' + resource.save + resource.reload - assert resource.fingerprint == 'a brand is new phrase this' - end + assert resource.fingerprint == 'a brand is new phrase this' + end - test 'fingerprints standardize characters used' do - resource = { - title: 'A wide range of characters', - url: 'https://example.org', - phrase: 'This phrase uses: WeIrD caPital letters, * (punctuation), and symbols™ like ¥€$' - } + test 'fingerprints standardize characters used' do + resource = { + title: 'A wide range of characters', + url: 'https://example.org', + phrase: 'This phrase uses: WeIrD caPital letters, * (punctuation), and symbols™ like ¥€$' + } - new_resource = Detector::SuggestedResource.create(resource) + new_resource = Detector::SuggestedResource.create(resource) - assert new_resource.fingerprint == 'and capital letters like phrase punctuation symbols this uses weird' - end + assert new_resource.fingerprint == 'and capital letters like phrase punctuation symbols this uses weird' + end - test 'fingerprints coerce characters to ASCII' do - resource = { - title: 'A wide range of characters', - url: 'https://example.org', - phrase: 'а а̀ а̂ а̄ ӓ б в г ґ д ђ ѓ е ѐ е̄ е̂ ё є ж з з́ ѕ и і ї ꙇ ѝ и̂ ӣ й ј к л љ м н њ о о̀ о̂ ō ӧ п р с с́ т ћ ќ у у̀ у̂ ӯ ў ӱ ф х ц ч џ ш щ ꙏ ъ ъ̀ ы ь ѣ э ю ю̀ я' - } + test 'fingerprints coerce characters to ASCII' do + resource = { + title: 'A wide range of characters', + url: 'https://example.org', + phrase: 'а а̀ а̂ а̄ ӓ б в г ґ д ђ ѓ е ѐ е̄ е̂ ё є ж з з́ ѕ и і ї ꙇ ѝ и̂ ӣ й ј к л љ м н њ о о̀ о̂ ō ӧ п р с с́'\ + ' т ћ ќ у у̀ у̂ ӯ ў ӱ ф х ц ч џ ш щ ꙏ ъ ъ̀ ы ь ѣ э ю ю̀ я' + } - new_resource = Detector::SuggestedResource.create(resource) + new_resource = Detector::SuggestedResource.create(resource) - assert new_resource.fingerprint == 'a b ch d dj dz dzh e f g gh gj i ia ie io iu j k kh kj l lj m n nj o p r s sh shch t ts tsh u v y yi z zh' + assert new_resource.fingerprint == 'a b ch d dj dz dzh e f g gh gj i ia ie io iu j k kh kj l lj m n nj o p r s '\ + 'sh shch t ts tsh u v y yi z zh' + end end end From b5786115a7ed0124fb5c7f0dc13806319571564c Mon Sep 17 00:00:00 2001 From: Matthew Bernhardt Date: Fri, 26 Jul 2024 12:08:05 -0400 Subject: [PATCH 5/5] Updates after informal code review * There are more tests, each explicitly testing one aspect of the fingerprinting. Most notably is one to confirm that the original phrase is not affected while calculating the fingerprint. * The calculate_fingerprint method now has an explicit argument to receive the phrase * Variable names internal to this method have been clarified * Bang methods are no longer being used --- app/models/detector/suggested_resource.rb | 24 +++---- .../detector/suggested_resource_test.rb | 71 ++++++++++++++++--- 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/app/models/detector/suggested_resource.rb b/app/models/detector/suggested_resource.rb index f7c1180..526074e 100644 --- a/app/models/detector/suggested_resource.rb +++ b/app/models/detector/suggested_resource.rb @@ -23,35 +23,35 @@ class SuggestedResource < ApplicationRecord before_save :update_fingerprint def update_fingerprint - self.fingerprint = calculate_fingerprint + self.fingerprint = calculate_fingerprint(phrase) end # This implements the OpenRefine fingerprinting algorithm. See # https://openrefine.org/docs/technical-reference/clustering-in-depth#fingerprint - def calculate_fingerprint - temp = phrase - temp.strip! - temp.downcase! + def calculate_fingerprint(old_phrase) + modified_phrase = old_phrase + modified_phrase = modified_phrase.strip + modified_phrase = modified_phrase.downcase # This removes all punctuation and symbol characters from the string. - temp.gsub!(/\p{P}|\p{S}/, '') + modified_phrase = modified_phrase.gsub(/\p{P}|\p{S}/, '') # Normalize to ASCII (e.g. gödel and godel are liable to be intended to # find the same thing) - temp = temp.to_ascii + modified_phrase = modified_phrase.to_ascii # Coercion to ASCII can introduce new symbols, so we remove those now. - temp.gsub!(/\p{P}|\p{S}/, '') + modified_phrase = modified_phrase.gsub(/\p{P}|\p{S}/, '') # Tokenize - array = temp.split + tokens = modified_phrase.split # Remove duplicates and sort - array.uniq! - array.sort! + tokens = tokens.uniq + tokens = tokens.sort # Rejoin tokens - array.join(' ') + tokens.join(' ') end end end diff --git a/test/models/detector/suggested_resource_test.rb b/test/models/detector/suggested_resource_test.rb index b981c40..620b35b 100644 --- a/test/models/detector/suggested_resource_test.rb +++ b/test/models/detector/suggested_resource_test.rb @@ -39,16 +39,49 @@ class SuggestedResourceTest < ActiveSupport::TestCase assert resource.fingerprint == 'a brand is new phrase this' end - test 'fingerprints standardize characters used' do - resource = { - title: 'A wide range of characters', - url: 'https://example.org', - phrase: 'This phrase uses: WeIrD caPital letters, * (punctuation), and symbols™ like ¥€$' - } + test 'generating fingerprints does not alter the phrase' do + resource = detector_suggested_resources('jstor') + benchmark = 'This is an updated phrase! ' - new_resource = Detector::SuggestedResource.create(resource) + refute resource.phrase == benchmark + resource.phrase = benchmark + resource.save + resource.reload + + assert resource.phrase == benchmark + end + + test 'fingerprints strip extra spaces' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'i need space' + + resource.phrase = ' i need space ' + resource.save + resource.reload + + assert resource.fingerprint == 'i need space' + end + + test 'fingerprints are coerced to lowercase' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'ftw intercapping' + + resource.phrase = 'InterCapping FTW' + resource.save + resource.reload - assert new_resource.fingerprint == 'and capital letters like phrase punctuation symbols this uses weird' + assert resource.fingerprint == 'ftw intercapping' + end + + test 'fingerprints remove punctuation and symbols' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'bullets perfect phrase punctuation quoted symbols' + + resource.phrase = 'symbols™ + punctuation: * bullets! - "quoted phrase" (perfect) ¥€$' + resource.save + resource.reload + + assert resource.fingerprint == 'bullets perfect phrase punctuation quoted symbols' end test 'fingerprints coerce characters to ASCII' do @@ -64,5 +97,27 @@ class SuggestedResourceTest < ActiveSupport::TestCase assert new_resource.fingerprint == 'a b ch d dj dz dzh e f g gh gj i ia ie io iu j k kh kj l lj m n nj o p r s '\ 'sh shch t ts tsh u v y yi z zh' end + + test 'fingerprints remove repeated words' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'double' + + resource.phrase = 'double double' + resource.save + resource.reload + + assert resource.fingerprint == 'double' + end + + test 'fingerprints sort words alphabetically' do + resource = detector_suggested_resources('jstor') + refute resource.fingerprint == 'delta gamma' + + resource.phrase = 'gamma delta' + resource.save + resource.reload + + assert resource.fingerprint == 'delta gamma' + end end end