Skip to content

Commit

Permalink
Continued work toward rake task with tests
Browse files Browse the repository at this point in the history
This adds a bulk_reload method to the SR model, which the task calls after it has laid the foundation. The task's concern are to receive the argument and parse the file initially into a CSV::table object. Once the parsing is done, then the work passes to the SR model for processing.

The remaining work is two-fold:
1. The two pathways for receiving a local file and a remote url are not quite working the same - field headers are specified in different ways between the URI and CSV libraries, and we either need them to yield comparable outcomes or to build two different bulk_replace methods (ick).
2. Tests for the happy path are nearly done, but there are a lot of boundary conditions that still need tests off the happy path. Part of that is knowing where we're going to get the URL-based file from (right now I'm using a local Lando, but that's not great)
  • Loading branch information
matt-bernhardt committed Aug 6, 2024
1 parent d452c78 commit 8902e95
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 13 deletions.
18 changes: 18 additions & 0 deletions app/models/detector/suggested_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,23 @@ def calculate_fingerprint(old_phrase)
# Rejoin tokens
tokens.join(' ')
end

# This accepts an array of values and saves them all as new records. It is
# called by the suggested_resources:reload rake task.
def self.bulk_replace(input)
raise ArgumentError.new, 'Tabular CSV is required' unless input.instance_of?(CSV::Table)

# Need to check what columns exist in input
required_headers = %i[title url phrase]
missing_headers = required_headers - input.headers
raise ArgumentError.new, "Some CSV columns missing: #{missing_headers}" unless missing_headers.empty?

Detector::SuggestedResource.delete_all

input.each do |line|
record = Detector::SuggestedResource.new({ title: line[:title], url: line[:url], phrase: line[:phrase] })
record.save
end
end
end
end
34 changes: 21 additions & 13 deletions lib/tasks/suggested_resources.rake
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,34 @@ namespace :suggested_resources do
Rails.logger.info("Record count before we reload: #{Detector::SuggestedResource.count}")

if URI(args.addr).scheme
Rails.logger.info("Loading from remote address: #{args.addr}")
url = URI.parse(args.addr)
raise ArgumentError.new, 'HTTP/HTTPS scheme is required' unless url.scheme.in?(%w[http https])
Rails.logger.info(url)

file = url.read
Rails.logger.info(file)
# Need to connect to a CSV content type
# Invalid parsing should... do something?
data = csv_table_from_url_direct(url)
else
Rails.logger.info("Loading from local file: #{args.addr}")
file = File.read(args.addr)
Rails.logger.info(file)
file = File.open(args.addr)
# Invalid / not found file should ... do something?
data = CSV.table(file)
end

Rails.logger.info('Now ready to parse a CSV')
data = CSV.parse(file)
Rails.logger.info(data)
Detector::SuggestedResource.bulk_replace(data)

# Rails.logger.info("Record count after we reload: #{Detector::SuggestedResource.count}")
Rails.logger.info("Record count after we reload: #{Detector::SuggestedResource.count}")
end

def csv_table_from_url_direct(url)
file = url.open.read.force_encoding('UTF-8').encode
csv = CSV.parse(file, headers: true)
end

def csv_table_from_url_rebuild(url)
file = url.read
all_rows = CSV.new(file).read
header = []
all_rows[0].each { |field| header.push(field.strip.downcase.gsub("\xEF\xBB\xBF".force_encoding("UTF-8"), '').to_sym) }
value_rows = all_rows.length - 1
values = all_rows[1..value_rows]
rebuild = values.map { |row| CSV::Row.new(header, row) }
CSV::Table.new(rebuild)
end
end
3 changes: 3 additions & 0 deletions test/fixtures/files/suggested_resources.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Title,URL,Phrase
New Example,https://example.org,new example search
Web of Science,https://libraries.mit.edu/webofsci,web of Science
Binary file added test/fixtures/files/suggested_resources.xlsx
Binary file not shown.
3 changes: 3 additions & 0 deletions test/fixtures/files/suggested_resources_extra.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Title,URL,Phrase,Extra
Example,https://example.org,example search,extra 1
Web of Science,https://libraries.mit.edu/webofsci,web of Science,extra 2
3 changes: 3 additions & 0 deletions test/fixtures/files/suggested_resources_missing_field.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Title,URL
Example,https://example.org
Web of Science,https://libraries.mit.edu/webofsci
2 changes: 2 additions & 0 deletions test/fixtures/files/suggested_resources_wrong_columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Title,URL
Example,https://example.org
53 changes: 53 additions & 0 deletions test/tasks/suggested_resource_rake_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

require 'test_helper'
require 'rake'

class SuggestedResourceRakeTest < ActiveSupport::TestCase
def setup
Tacos::Application.load_tasks if Rake::Task.tasks.empty?
Rake::Task['suggested_resources:reload'].reenable
end

test 'invoked reload can accept a local file' do
records_before = Detector::SuggestedResource.count # We have three fixtures at the moment
first_record_before = Detector::SuggestedResource.first
local_file = Rails.root.join('test','fixtures','files','suggested_resources.csv').to_s
Rake::Task["suggested_resources:reload"].invoke(local_file)
refute_equal records_before, Detector::SuggestedResource.count
refute_equal first_record_before, Detector::SuggestedResource.first
end

test 'reload task errors without a file argument' do
assert_raises(ArgumentError) {
Rake::Task['suggested_resources:reload'].invoke
}
end

test 'reload can accept a url' do
VCR.use_cassette('remote csv') do
remote_file = 'http://static.lndo.site/suggested_resources.csv'
Rake::Task["suggested_resources:reload"].invoke(remote_file)
end
end

test 'reload fails with a non-CSV file' do
local_file = Rails.root.join('test','fixtures','files','suggested_resources.xlsx').to_s
assert_raises(CSV::MalformedCSVError) {
Rake::Task['suggested_resources:reload'].invoke(local_file)
}
end

test 'reload fails unless all three columns are present: title, url, phrase' do
local_file = Rails.root.join('test','fixtures','files','suggested_resources_missing_field.csv').to_s
error = assert_raises(ArgumentError) {
Rake::Task['suggested_resources:reload'].invoke(local_file)
}
assert_equal 'Some CSV columns missing: [:phrase]', error.message
end

test 'reload succeeds if extra columns are present' do
local_file = Rails.root.join('test','fixtures','files','suggested_resources_extra.csv').to_s
Rake::Task['suggested_resources:reload'].invoke(local_file)
end
end
40 changes: 40 additions & 0 deletions test/vcr_cassettes/remote_csv.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8902e95

Please sign in to comment.