Skip to content

Commit

Permalink
Refactor to a single task that can load local or remote files
Browse files Browse the repository at this point in the history
  • Loading branch information
JPrevost committed Aug 29, 2024
1 parent 33530ff commit 42da8dd
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 45 deletions.
49 changes: 13 additions & 36 deletions lib/tasks/search_event_loader.rake
Original file line number Diff line number Diff line change
Expand Up @@ -14,53 +14,30 @@ namespace :search_events do
# @example
# bin/rails search_events:csv_loader['local_path_to_file.csv', 'some-source-to-use-for-all-loaded-records']
#
# @example
# bin/rails search_events:csv_loader['https://SERVER/remote_path_to_file.json', 'some-source-to-use-for-all-loaded-records']
#
# @param path [String] local file path to a CSV file to load
# @param source [String] source name to load the data under
desc 'Load search_events from csv'
task :csv_loader, %i[path source] => :environment do |_task, args|
raise ArgumentError.new, 'Path is required' if args.path.blank?
raise ArgumentError.new, 'Source is required' if args.source.blank?

Rails.logger.info("Loading data from #{args.path}")

CSV.foreach(args.path) do |row|
term = Term.create_or_find_by!(phrase: row.first)
term.search_events.create!(source: args.source, created_at: row.last)
# does the file look like a path or a URI
if URI(args.path).scheme
Rails.logger.info("Loading data from remote file #{args.path}")
data = URI.parse(args.path).open('rb', &:read)
else
Rails.logger.info("Loading data from local file #{args.path}")
data = File.read(args.path)
end
end

# url loader can bulk load SearchEvents and Terms.
#
# @note This is not for use in production. It is intended for use in review apps on Heroku, to load records in
# preparation for demonstrations.
#
# @note the csv should be formated as `term phrase`, `timestamp`. A dataclip is available that can export in this
# format.
#
# @example
# bin/rake search_events:url_loader['https://example.org/file.csv', 'some-source-to-use-for-all-loaded-records']
#
# @param addr [String] a URL for a CSV file to load
# @param source [String] source name to load the data under
desc 'Load search_events from url'
task :url_loader, %i[addr source] => :environment do |_task, args|
raise ArgumentError.new, 'URL is required' if args.addr.blank?
raise ArgumentError.new, 'Source is required' if args.source.blank?

Rails.logger.info("Term count before import: #{Term.count}")
Rails.logger.info("SearchEvent count before import: #{SearchEvent.count}")

url = URI.parse(args.addr)
Rails.logger.info("Loading data from #{url}")

file = url.open.read
data = CSV.parse(file)
data.each do |row|
# not ideal, we should consider streaming the file rather than loading it fully into memory
# if you run into issues with this, consider loading subsets (such as a single month) at a time
CSV.parse(data) do |row|
term = Term.create_or_find_by!(phrase: row.first)
term.search_events.create!(source: args.source, created_at: row.last)
end

Rails.logger.info("Term count after import: #{Term.count}")
Rails.logger.info("SearchEvent count after import: #{SearchEvent.count}")
end
end
17 changes: 8 additions & 9 deletions test/tasks/search_event_loader_rake_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,31 @@
class SearchEventLoaderRakeTest < ActiveSupport::TestCase
def setup
Tacos::Application.load_tasks if Rake::Task.tasks.empty?
Rake::Task['search_events:url_loader'].reenable
Rake::Task['search_events:csv_loader'].reenable
end

test 'url_reload can accept a url and source parameter' do
test 'csv_loader can accept a url and source parameter' do
records_before = SearchEvent.count
VCR.use_cassette('search_events:url_loader from remote csv') do
remote_file = 'http://static.lndo.site/search_events.csv'
Rake::Task['search_events:url_loader'].invoke(remote_file, 'test')
Rake::Task['search_events:csv_loader'].invoke(remote_file, 'test')
end

assert_not_equal records_before, SearchEvent.count
end

test 'url_reload errors without any parameters' do
test 'csv_loader errors without any parameters' do
error = assert_raises(ArgumentError) do
Rake::Task['search_events:url_loader'].invoke()
Rake::Task['search_events:csv_loader'].invoke
end
assert_equal 'URL is required', error.message
assert_equal 'Path is required', error.message
end

test 'url_reload errors without a source parameter' do
test 'csv_loader errors without a source parameter' do
error = assert_raises(ArgumentError) do
remote_file = 'http://static.lndo.site/search_events.csv'
Rake::Task['search_events:url_loader'].invoke(remote_file)
Rake::Task['search_events:csv_loader'].invoke(remote_file)
end
assert_equal 'Source is required', error.message
end

end

0 comments on commit 42da8dd

Please sign in to comment.