From b0e4c8d553e13a759610205c9399bf7020d5d1de Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Wed, 27 Mar 2024 15:45:38 +0000 Subject: [PATCH] Add OnsUpdateCheckWorker --- app/workers/ons_update_check_worker.rb | 36 +++++++++++++ spec/support/rss_helper.rb | 28 ++++++++++ spec/workers/ons_update_check_worker_spec.rb | 56 ++++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 app/workers/ons_update_check_worker.rb create mode 100644 spec/support/rss_helper.rb create mode 100644 spec/workers/ons_update_check_worker_spec.rb diff --git a/app/workers/ons_update_check_worker.rb b/app/workers/ons_update_check_worker.rb new file mode 100644 index 00000000..3932c398 --- /dev/null +++ b/app/workers/ons_update_check_worker.rb @@ -0,0 +1,36 @@ +require "rss" +require "open-uri" + +class OnsUpdateCheckWorker < OnsBaseWorker + ONSPD_RSS_URL = "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc".freeze + + def perform + URI.parse(ONSPD_RSS_URL).open do |rss| + feed = RSS::Parser.parse(rss) + + newest = feed.items.first + + if more_recent?(newest) + new_data_url = "https://www.arcgis.com/sharing/rest/content/items/#{arcgis_item_id(newest)}/data" + Rails.logger.info("Updated ONSPD file: #{newest.title}") + Rails.logger.info("Starting download from #{new_data_url}") + OnsDownloadWorker.perform_async(new_data_url) + end + end + end + +private + + def more_recent?(item) + return true unless Postcode.onspd.any? + + item.pubDate > Postcode.onspd.order(updated_at: :desc).first.updated_at + end + + def arcgis_item_id(item) + matches = item.guid.content.match(/id=(\h+)/) + raise StandardError("OnsUpdateCheckWorker couldn't extract download item from [#{item.guid.content}]") unless matches + + matches[1] + end +end diff --git a/spec/support/rss_helper.rb b/spec/support/rss_helper.rb new file mode 100644 index 00000000..20fb7b6d --- /dev/null +++ b/spec/support/rss_helper.rb @@ -0,0 +1,28 @@ +require "rss" + +def ons_rss(last_updated: Time.zone.now) + ons_base_rss(last_updated:, guid: "https://www.arcgis.com/home/item.html?id=abc123") +end + +def broken_ons_rss(last_updated: Time.zone.now) + ons_base_rss(last_updated:, guid: "https://www.arcgis.com/home/item.html") +end + +def ons_base_rss(last_updated:, guid:) + rss = RSS::Maker.make("2.0") do |maker| + maker.channel.description = "ONSPD Search" + maker.channel.updated = last_updated.to_s + maker.channel.title = "Open Geography Portal" + maker.channel.link = "https://geoportal.statistics.gov.uk/" + + maker.items.new_item do |item| + item.link = "https://geoportal.statistics.gov.uk/datasets/ons::ons-postcode-directory-february-2024" + item.title = "ONS Postcode Directory (#{last_updated.strftime('%B %Y')})" + item.updated = last_updated.to_s + guid_obj = RSS::Rss::Channel::Item::Guid.new(true, guid) + guid_obj.setup_maker(item) + end + end + + rss.to_s +end diff --git a/spec/workers/ons_update_check_worker_spec.rb b/spec/workers/ons_update_check_worker_spec.rb new file mode 100644 index 00000000..e8601301 --- /dev/null +++ b/spec/workers/ons_update_check_worker_spec.rb @@ -0,0 +1,56 @@ +require "spec_helper" + +UPDATE_RSS = "".freeze +NO_UPDATE_RSS = "".freeze + +RSpec.describe OnsUpdateCheckWorker do + describe "#perform" do + context "with no applicable update in the RSS feed" do + before do + stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: ons_rss(last_updated: Time.zone.now - 1.month), headers: {}) + end + + context "with no existing onspd records" do + it "starts an OnsDownloadWorker" do + expect(OnsDownloadWorker).to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data") + OnsUpdateCheckWorker.new.perform + end + end + + context "with existing onspd records" do + before do + Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day) + end + + it "does nothing" do + expect(OnsDownloadWorker).not_to receive(:perform_async) + OnsUpdateCheckWorker.new.perform + end + end + end + + context "with a new update in the RSS feed" do + before do + Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day) + stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: ons_rss, headers: {}) + end + + it "starts an OnsDownloadWorker" do + expect(OnsDownloadWorker).to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data") + OnsUpdateCheckWorker.new.perform + end + end + + context "with a new update in the RSS feed but the item is broken" do + before do + Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day) + stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: broken_ons_rss, headers: {}) + end + + it "raises an error and does not start an OnsDownloadWorker" do + expect(OnsDownloadWorker).not_to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data") + expect { OnsUpdateCheckWorker.new.perform }.to raise_error(StandardError) + end + end + end +end