Skip to content

Commit

Permalink
Merge pull request #98 from MITLibraries/url-loader-for-terms
Browse files Browse the repository at this point in the history
Add temporary url_loader for search events
  • Loading branch information
matt-bernhardt authored Aug 29, 2024
2 parents cfd7a95 + 42da8dd commit ab821df
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 2 deletions.
16 changes: 14 additions & 2 deletions lib/tasks/search_event_loader.rake
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,28 @@ namespace :search_events do
# @example
# bin/rails search_events:csv_loader['local_path_to_file.csv', 'some-source-to-use-for-all-loaded-records']
#
# @example
# bin/rails search_events:csv_loader['https://SERVER/remote_path_to_file.json', 'some-source-to-use-for-all-loaded-records']
#
# @param path [String] local file path to a CSV file to load
# @param source [String] source name to load the data under
desc 'Load search_events from csv'
task :csv_loader, %i[path source] => :environment do |_task, args|
raise ArgumentError.new, 'Path is required' if args.path.blank?
raise ArgumentError.new, 'Source is required' if args.source.blank?

Rails.logger.info("Loading data from #{args.path}")
# does the file look like a path or a URI
if URI(args.path).scheme
Rails.logger.info("Loading data from remote file #{args.path}")
data = URI.parse(args.path).open('rb', &:read)
else
Rails.logger.info("Loading data from local file #{args.path}")
data = File.read(args.path)
end

CSV.foreach(args.path) do |row|
# not ideal, we should consider streaming the file rather than loading it fully into memory
# if you run into issues with this, consider loading subsets (such as a single month) at a time
CSV.parse(data) do |row|
term = Term.create_or_find_by!(phrase: row.first)
term.search_events.create!(source: args.source, created_at: row.last)
end
Expand Down
36 changes: 36 additions & 0 deletions test/tasks/search_event_loader_rake_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# frozen_string_literal: true

require 'test_helper'
require 'rake'

class SearchEventLoaderRakeTest < ActiveSupport::TestCase
def setup
Tacos::Application.load_tasks if Rake::Task.tasks.empty?
Rake::Task['search_events:csv_loader'].reenable
end

test 'csv_loader can accept a url and source parameter' do
records_before = SearchEvent.count
VCR.use_cassette('search_events:url_loader from remote csv') do
remote_file = 'http://static.lndo.site/search_events.csv'
Rake::Task['search_events:csv_loader'].invoke(remote_file, 'test')
end

assert_not_equal records_before, SearchEvent.count
end

test 'csv_loader errors without any parameters' do
error = assert_raises(ArgumentError) do
Rake::Task['search_events:csv_loader'].invoke
end
assert_equal 'Path is required', error.message
end

test 'csv_loader errors without a source parameter' do
error = assert_raises(ArgumentError) do
remote_file = 'http://static.lndo.site/search_events.csv'
Rake::Task['search_events:csv_loader'].invoke(remote_file)
end
assert_equal 'Source is required', error.message
end
end
41 changes: 41 additions & 0 deletions test/vcr_cassettes/search_events_url_loader_from_remote_csv.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ab821df

Please sign in to comment.