Skip to content

Commit

Permalink
Add OnsUpdateCheckWorker
Browse files Browse the repository at this point in the history
  • Loading branch information
KludgeKML committed Mar 27, 2024
1 parent ec0269c commit b0e4c8d
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
36 changes: 36 additions & 0 deletions app/workers/ons_update_check_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
require "rss"
require "open-uri"

class OnsUpdateCheckWorker < OnsBaseWorker
ONSPD_RSS_URL = "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc".freeze

def perform
URI.parse(ONSPD_RSS_URL).open do |rss|
feed = RSS::Parser.parse(rss)

newest = feed.items.first

if more_recent?(newest)
new_data_url = "https://www.arcgis.com/sharing/rest/content/items/#{arcgis_item_id(newest)}/data"
Rails.logger.info("Updated ONSPD file: #{newest.title}")
Rails.logger.info("Starting download from #{new_data_url}")
OnsDownloadWorker.perform_async(new_data_url)
end
end
end

private

def more_recent?(item)
return true unless Postcode.onspd.any?

item.pubDate > Postcode.onspd.order(updated_at: :desc).first.updated_at
end

def arcgis_item_id(item)
matches = item.guid.content.match(/id=(\h+)/)
raise StandardError("OnsUpdateCheckWorker couldn't extract download item from [#{item.guid.content}]") unless matches

matches[1]
end
end
28 changes: 28 additions & 0 deletions spec/support/rss_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
require "rss"

def ons_rss(last_updated: Time.zone.now)
ons_base_rss(last_updated:, guid: "https://www.arcgis.com/home/item.html?id=abc123")
end

def broken_ons_rss(last_updated: Time.zone.now)
ons_base_rss(last_updated:, guid: "https://www.arcgis.com/home/item.html")
end

def ons_base_rss(last_updated:, guid:)
rss = RSS::Maker.make("2.0") do |maker|
maker.channel.description = "ONSPD Search"
maker.channel.updated = last_updated.to_s
maker.channel.title = "Open Geography Portal"
maker.channel.link = "https://geoportal.statistics.gov.uk/"

maker.items.new_item do |item|
item.link = "https://geoportal.statistics.gov.uk/datasets/ons::ons-postcode-directory-february-2024"
item.title = "ONS Postcode Directory (#{last_updated.strftime('%B %Y')})"
item.updated = last_updated.to_s
guid_obj = RSS::Rss::Channel::Item::Guid.new(true, guid)
guid_obj.setup_maker(item)
end
end

rss.to_s
end
56 changes: 56 additions & 0 deletions spec/workers/ons_update_check_worker_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
require "spec_helper"

UPDATE_RSS = "".freeze
NO_UPDATE_RSS = "".freeze

RSpec.describe OnsUpdateCheckWorker do
describe "#perform" do
context "with no applicable update in the RSS feed" do
before do
stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: ons_rss(last_updated: Time.zone.now - 1.month), headers: {})
end

context "with no existing onspd records" do
it "starts an OnsDownloadWorker" do
expect(OnsDownloadWorker).to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data")
OnsUpdateCheckWorker.new.perform
end
end

context "with existing onspd records" do
before do
Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day)
end

it "does nothing" do
expect(OnsDownloadWorker).not_to receive(:perform_async)
OnsUpdateCheckWorker.new.perform
end
end
end

context "with a new update in the RSS feed" do
before do
Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day)
stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: ons_rss, headers: {})
end

it "starts an OnsDownloadWorker" do
expect(OnsDownloadWorker).to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data")
OnsUpdateCheckWorker.new.perform
end
end

context "with a new update in the RSS feed but the item is broken" do
before do
Postcode.create!(postcode: "AB10AA", source: "onspd", created_at: Time.zone.now - 1.day, updated_at: Time.zone.now - 1.day)
stub_request(:get, "https://geoportal.statistics.gov.uk/api/feed/rss/2.0?q=PRD_ONSPD&sort=Date%20Created%7Ccreated%7Cdesc").to_return(status: 200, body: broken_ons_rss, headers: {})
end

it "raises an error and does not start an OnsDownloadWorker" do
expect(OnsDownloadWorker).not_to receive(:perform_async).with("https://www.arcgis.com/sharing/rest/content/items/abc123/data")
expect { OnsUpdateCheckWorker.new.perform }.to raise_error(StandardError)
end
end
end
end

0 comments on commit b0e4c8d

Please sign in to comment.