From 23a8ec6465c1ae6b3d5105027dc77d0efaa41b18 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Wed, 6 Mar 2024 19:04:25 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20date=20indexing=20to=20ret?= =?UTF-8?q?urn=20more=20than=20YYYY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit when the date_created property was indexed, only the year was indexed. If the input was something like "2024-01-01" then `date_ssi` would only be "2024". This commit will index YYYY-MM-DD, YYYY-MM, and YYYY if given. Anything other than those three formats will be indexed as is and won't work correctly with sorting or range queries. Ref: - https://github.com/scientist-softserv/palni-palci/issues/973 --- app/indexers/app_indexer.rb | 23 ++++++++---- spec/indexers/app_indexer_spec.rb | 58 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/app/indexers/app_indexer.rb b/app/indexers/app_indexer.rb index cb70b6f2d..f523852f6 100644 --- a/app/indexers/app_indexer.rb +++ b/app/indexers/app_indexer.rb @@ -30,12 +30,23 @@ def full_text(file_set_id) end def add_date(solr_doc) - # The allowed date formats are either YYYY, YYYY-MM, or YYYY-MM-DD - # the date must be formatted as a 4 digit year in order to be sorted. - valid_date_formats = /\A(\d{4})(?:-\d{2}(?:-\d{2})?)?\z/ date_string = solr_doc['date_created_tesim']&.first - year = date_string&.match(valid_date_formats)&.captures&.first - solr_doc['date_tesi'] = year if year - solr_doc['date_ssi'] = year if year + return unless date_string + + date_string = pad_date_with_zero(date_string) if date_string.include?('-') + + # The allowed date formats are either YYYY, YYYY-MM, or YYYY-MM-DD + valid_date_formats = /\A(\d{4}(?:-\d{2}(?:-\d{2})?)?)\z/ + date = date_string&.match(valid_date_formats)&.captures&.first + + # If the date is not in the correct format, index the original date string + date ||= date_string + + solr_doc['date_tesi'] = date if date + solr_doc['date_ssi'] = date if date + end + + def pad_date_with_zero(date_string) + date_string.split('-').map { |d| d.rjust(2, '0') }.join('-') end end diff --git a/spec/indexers/app_indexer_spec.rb b/spec/indexers/app_indexer_spec.rb index 9d96ece20..a771964c3 100644 --- a/spec/indexers/app_indexer_spec.rb +++ b/spec/indexers/app_indexer_spec.rb @@ -24,4 +24,62 @@ expect(solr_document.fetch("account_cname_tesim")).to eq(account.cname) end end + + describe "#generate_solr_document" do + context "when given a date with a YYYY-MM-DD format" do + it "indexes date_ssi in YYYY-MM-DD format" do + work.date_created = ["2024-01-01"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01-01") + end + end + + context "when given a date with a YYYY-MM format" do + it "indexes date_ssi in YYYY-MM format" do + work.date_created = ["2024-01"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01") + end + end + + context "when given a date with a YYYY format" do + it "indexes date_ssi in YYYY format" do + work.date_created = ["2024"] + expect(solr_document.fetch("date_ssi")).to eq("2024") + end + end + + context "when given a date with a YYYY-M-D format" do + it "converts the date to YYYY-MM-DD format and indexes date_ssi" do + work.date_created = ["2024-1-1"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01-01") + end + end + + context "when given a date with a YYYY-M format" do + it "converts the date to YYYY-MM format and indexes date_ssi" do + work.date_created = ["2024-1"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01") + end + end + + context "when given a date with a YYYY-MM-D format" do + it "converts the date to YYYY-MM-DD format and indexes date_ssi" do + work.date_created = ["2024-01-1"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01-01") + end + end + + context "when given a date with a YYYY-M-DD format" do + it "converts the date to YYYY-M-DD format and indexes date_ssi" do + work.date_created = ["2024-1-01"] + expect(solr_document.fetch("date_ssi")).to eq("2024-01-01") + end + end + + context "when given a date with an invalid format" do + it "indexes the given date" do + work.date_created = ["Jan 1, 2024"] + expect(solr_document.fetch("date_ssi")).to eq("Jan 1, 2024") + end + end + end end