From d07a583a375fc2b12a1e40649b4a1c900cb093f1 Mon Sep 17 00:00:00 2001 From: Jeremy Prevost Date: Thu, 11 Jul 2024 16:58:03 -0400 Subject: [PATCH] Updates PMID detection regex Why are these changes being introduced: * The regex was missing PMIDs with no space between the `PMID:` and the indentifier, such as `PMID:35648703` Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TCO-42 How does this address that need: * Updates regex to allow, but not require, a single whitespace character between `pmid:` and the indentifier --- app/models/standard_identifiers.rb | 2 +- test/models/standard_identifiers_test.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/standard_identifiers.rb b/app/models/standard_identifiers.rb index 5588625..0de7f4c 100644 --- a/app/models/standard_identifiers.rb +++ b/app/models/standard_identifiers.rb @@ -33,7 +33,7 @@ def term_patterns { isbn: /\b(ISBN-*(1[03])* *(: ){0,1})*(([0-9Xx][- ]*){13}|([0-9Xx][- ]*){10})\b/, issn: /\b[0-9]{4}-[0-9]{3}[0-9xX]\b/, - pmid: /\b((pmid|PMID): (\d{7,8}))\b/, + pmid: /\b((pmid|PMID):\s?(\d{7,8}))\b/, doi: %r{\b10\.(\d+\.*)+/(([^\s.])+\.*)+\b} } end diff --git a/test/models/standard_identifiers_test.rb b/test/models/standard_identifiers_test.rb index 088c054..cdf8154 100644 --- a/test/models/standard_identifiers_test.rb +++ b/test/models/standard_identifiers_test.rb @@ -157,7 +157,7 @@ class StandardIdentifiersTest < ActiveSupport::TestCase end test 'pmid examples' do - samples = ['PMID: 35648703', 'pmid: 1234567'] + samples = ['PMID: 35648703', 'pmid: 1234567', 'PMID:35648703'] samples.each do |pmid| actual = StandardIdentifiers.new(pmid).identifiers