Skip to content

Commit

Permalink
Export lists (#1992)
Browse files Browse the repository at this point in the history
Allow lists (core, custom, search results, articles, filtered or not) to be exported to a CSV file that is sent to e-mail.

- [x] Add an `exportList` GraphQL mutation
- [x] Implement a generic export class that supports media, articles and feeds
- [x] Validate maximum number of results (which is a global configuration key)
- [x] Validate permission
- [x] Create Sidekiq job to export results
- [x] Create a CSV for the export
- [x] Save CSV in S3 using a pre-signed URL that expires after X days ("X" is a global configuration key)
- [x] Add support to MailCatcher
- [x] Send CSV by e-mail
- [x] Automated tests
- [x] Make sure it works for articles as well
- [x] Make sure it works for shared feeds as well

References: CV2-5067 and CV2-4979.
  • Loading branch information
caiosba authored Aug 24, 2024
1 parent 8b36ea4 commit f1f3b6c
Show file tree
Hide file tree
Showing 22 changed files with 643 additions and 8 deletions.
24 changes: 24 additions & 0 deletions app/graph/mutations/export_mutations.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module ExportMutations
class ExportList < Mutations::BaseMutation
argument :query, GraphQL::Types::String, required: true # JSON
argument :type, GraphQL::Types::String, required: true # 'media', 'feed', 'fact-check' or 'explainer'

field :success, GraphQL::Types::Boolean, null: true

def resolve(query:, type:)
ability = context[:ability]
team = Team.find_if_can(Team.current.id, ability)
if ability.cannot?(:export_list, team)
{ success: false }
else
export = ListExport.new(type.to_sym, query, team.id)
if export.number_of_rows > CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer)
{ success: false }
else
export.generate_csv_and_send_email_in_background(User.current)
{ success: true }
end
end
end
end
end
2 changes: 2 additions & 0 deletions app/graph/types/mutation_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,6 @@ class MutationType < BaseObject

field :createExplainerItem, mutation: ExplainerItemMutations::Create
field :destroyExplainerItem, mutation: ExplainerItemMutations::Destroy

field :exportList, mutation: ExportMutations::ExportList
end
1 change: 1 addition & 0 deletions app/lib/check_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

class CheckConfig
def self.get(key, default = nil, type = nil)
key = key.to_s
value = ENV[key]
value ||= CONFIG[key] if CONFIG.has_key?(key)
return default if value.nil?
Expand Down
13 changes: 13 additions & 0 deletions app/mailers/export_list_mailer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class ExportListMailer < ApplicationMailer
layout nil

def send_csv(csv_file_url, user)
@csv_file_url = csv_file_url
@user = user
expire_in = Time.now.to_i + CheckConfig.get('export_csv_expire', 7.days.to_i, :integer)
@expire_in = I18n.l(Time.at(expire_in), format: :email)
subject = I18n.t('mails_notifications.export_list.subject')
Rails.logger.info "Sending export e-mail to #{@user.email}"
mail(to: @user.email, email_type: 'export_list', subject: subject)
end
end
2 changes: 1 addition & 1 deletion app/models/ability.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def admin_perms
can :destroy, Team, :id => @context_team.id
can :create, TeamUser, :team_id => @context_team.id, role: ['admin']
can [:update, :destroy], TeamUser, team_id: @context_team.id
can :duplicate, Team, :id => @context_team.id
can [:duplicate, :export_list], Team, :id => @context_team.id
can :set_privacy, Project, :team_id => @context_team.id
can :read_feed_invitations, Feed, :team_id => @context_team.id
can :destroy, Feed, :team_id => @context_team.id
Expand Down
8 changes: 8 additions & 0 deletions app/models/explainer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ def update_paragraphs_in_alegre
self.class.delay_for(5.seconds).update_paragraphs_in_alegre(self.id, previous_paragraphs_count, Time.now.to_f)
end

def self.get_exported_data(query, team)
data = [['ID', 'Title', 'Description', 'URL', 'Language']]
team.filtered_explainers(query).find_each do |exp|
data << [exp.id, exp.title, exp.description, exp.url, exp.language]
end
data
end

def self.update_paragraphs_in_alegre(id, previous_paragraphs_count, timestamp)
explainer = Explainer.find(id)

Expand Down
8 changes: 8 additions & 0 deletions app/models/fact_check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ def update_item_status
end
end

def self.get_exported_data(query, team)
data = [['ID', 'Title', 'Summary', 'URL', 'Language', 'Report Status', 'Imported?']]
team.filtered_fact_checks(query).find_each do |fc|
data << [fc.id, fc.title, fc.summary, fc.url, fc.language, fc.report_status, fc.imported.to_s]
end
data
end

private

def set_language
Expand Down
8 changes: 8 additions & 0 deletions app/models/feed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,14 @@ def saved_search_was
SavedSearch.find_by_id(self.saved_search_id_before_last_save)
end

def get_exported_data(filters)
data = [['Title', 'Number of media', 'Number of requests', 'Number of fact-checks']]
self.filtered_clusters(filters).find_each do |cluster|
data << [cluster.title, cluster.media_count, cluster.requests_count, cluster.fact_checks_count]
end
data
end

# This takes some time to run because it involves external HTTP requests and writes to the database:
# 1) If the query contains a media URL, it will be downloaded... if it contains some other URL, it will be sent to Pender
# 2) Requests will be made to Alegre in order to index the request media and to look for similar requests
Expand Down
130 changes: 130 additions & 0 deletions app/views/export_list_mailer/send_csv.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
<%= render "shared/header" %>

<style>
@media only screen and (max-width: 481px) {
.notify-title a {
display: block !important;
width: auto !important;
}
}


@media only screen and (max-width: 601px) {
.notify-title__content {
padding: 0 30px;
}
}


@media only screen and (max-width: 481px) {
.notify-title__content {
padding: 0 !important;
}
}
</style>

<!--~~// Notify-title module start \\~~-->
<!--————————————————————————————-->
<div class="notify-title" style="text-align: <%= @direction[:align] %> !important; direction: <%= @direction[:dir] %>">
<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 44px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>
<div class="container " width="600" style="margin: 0 auto; text-align: <%= @direction[:align] %>; width: 600px;">
<table class="container__table" cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; margin: 0 auto; mso-table-lspace: 0pt; mso-table-rspace: 0pt; table-layout: fixed;">
<tr>
<th class="col-1" style="font-weight: normal; mso-line-height-rule: exactly; padding: 0;" width="600px" valign="top" align="<%= @direction[:align] %>">
<div class="notify-title__content" style="text-align:<%= @direction[:align] %>;">
<div class="h3 notify-title__proj" style="font-size: 21px; letter-spacing: -0.2px; line-height: 29px;">
<%= I18n.t(:"mails_notifications.export_list.hello", name: @user.name) %>
</div>
<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 13px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>
<div class="h1 notify-title__header" style="font-size: 40px; font-weight: bold; letter-spacing: -0.8px; line-height: 40px;">
<%= I18n.t("mails_notifications.export_list.subject") %>
</div>
<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 12px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>
<div class="text-gray" style="color: #757575 !important;">
<div class="h3" style="font-size: 21px; letter-spacing: -0.2px; line-height: 30px;">
<%= I18n.t(:"mails_notifications.export_list.body") %>
</div>
</div>
</div>

<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 28px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>

<!--————————————————————————————-->
<!--~~\\ Notify-title module end //~~-->

<div class="container wide" width="600" style="margin: 0 auto; text-align: <%= @direction[:align] %>; width: 600px;">

<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td class="notify-title__button text-white" style="background: #2E77FC; border-collapse: collapse; border-radius: 4px; color: #f1f1f1 !important; display: inline-block; padding-bottom: 15px; padding-left: 25px; padding-right: 23px; padding-top: 15px;"
width="auto">
<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; padding-right: 14px;">
<span class="span" style="font-size: 17px; font-weight: bold; line-height: 20px;">
<%=
link_to(I18n.t('mails_notifications.export_list.button_label'),
@csv_file_url,
:style => "text-decoration: none !important;color: #fff !important;"
)
%>
</span>
</td>
<td style="border-collapse: collapse;" align="right">
<%= image_tag("https://images.ctfassets.net/g118h5yoccvd/#{@direction[:arrow]}", width: "7", alt: "arrow-icon", style: "-ms-interpolation-mode: bicubic; border: 0 none; height: auto; line-height: 100%; outline: none; text-decoration: none;") %>
</td>
</tr>
</table>
</td>
</tr>
</table>

<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 23px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>

<div class="text-gray" style="color: #757575 !important;">
<div class="h3" style="font-size: 21px; letter-spacing: -0.2px; line-height: 30px;">
<%= I18n.t(:"mails_notifications.export_list.footer", date: @expire_in) %>
</div>
</div>
</div>
<table cellpadding="0" cellspacing="0" border="0" width="100%" style="border-collapse: collapse; mso-table-lspace: 0pt; mso-table-rspace: 0pt;">
<tr>
<td style="border-collapse: collapse; font-size: 23px; line-height: 1; text-decoration: none !important;">&nbsp;</td>
</tr>
</table>
<style>
@media only screen and (max-width: 601px) {
th.footer {
padding: 0 30px !important;
}
}


@media only screen and (max-width: 481px) {
th.footer {
padding: 0 !important;
}
}
</style>

<%= render "shared/footer" %>
14 changes: 14 additions & 0 deletions app/views/export_list_mailer/send_csv.text.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<%= I18n.t('mails_notifications.export_list.hello', name: @user.name) %>
<%= I18n.t('mails_notifications.export_list.subject') %>
<%= I18n.t('mails_notifications.export_list.body') %>
<%= I18n.t('mails_notifications.export_list.button_label') %>: <%= @csv_file_url %>
<%= I18n.t('mails_notifications.export_list.footer', date: @expire_in ) %>

...

<%= strip_tags I18n.t("mails_notifications.copyright_html", app_name: CheckConfig.get('app_name')) %>
https://meedan.com
11 changes: 9 additions & 2 deletions config/config.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ development: &default
smtp_user: # '<GMAIL USERNAME>'
smtp_pass: # '<GMAIL PASSWORD>'
smtp_default_url_host: 'http://localhost:3333' # Used to construct URLs for links in email
smtp_mailcatcher_host: # 'host.docker.internal'

# Pusher notification service https://pusher.com/channels
#
Expand Down Expand Up @@ -262,20 +263,26 @@ development: &default
otel_traces_sampler:
otel_custom_sampling_rate:

# Rate limits for tiplines
# Limits
#
# OPTIONAL
# When not set, default values are used.
#
tipline_user_max_messages_per_day: 1500
nlu_global_rate_limit: 100
nlu_user_rate_limit: 30

devise_maximum_attempts: 5
devise_unlock_accounts_after: 1
login_rate_limit: 10
api_rate_limit: 100
export_csv_maximum_number_of_results: 10000
export_csv_expire: 604800 # Seconds: Default is 7 days

# Session
#
# OPTIONAL
# When not set, default values are used.
#
session_store_key: '_checkdesk_session_dev'
session_store_domain: 'localhost'
test:
Expand Down
8 changes: 8 additions & 0 deletions config/environments/development.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,12 @@
else
puts '[WARNING] config.hosts not provided. Only requests from localhost are allowed. To change, update `whitelisted_hosts` in config.yml'
end

mailcatcher_host = ENV['smtp_mailcatcher_host'] || cfg['smtp_mailcatcher_host']
unless mailcatcher_host.blank?
config.action_mailer.smtp_settings = {
address: mailcatcher_host,
port: 1025
}
end
end
2 changes: 1 addition & 1 deletion config/initializers/plugins.rb
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Load classes on boot, in production, that otherwise wouldn't be auto-loaded by default
CcDeville && Bot::Keep && Workflow::Workflow.workflows && CheckS3 && Bot::Tagger && Bot::Fetch && Bot::Smooch && Bot::Slack && Bot::Alegre && CheckChannels && RssFeed && UrlRewriter && ClusterTeam
CcDeville && Bot::Keep && Workflow::Workflow.workflows && CheckS3 && Bot::Tagger && Bot::Fetch && Bot::Smooch && Bot::Slack && Bot::Alegre && CheckChannels && RssFeed && UrlRewriter && ClusterTeam && ListExport
6 changes: 6 additions & 0 deletions config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,12 @@ en:
constitutes acceptance of our updated Terms of Service.
term_button: Terms of Service
more_info: This is a one-time required legal notice sent to all Check users, even those who have unsubscribed by optional announcements.
export_list:
hello: Hello %{name}
subject: Check Data Export
body: Your requested Check data export is available to download.
button_label: Download Export
footer: This download link will expire on %{date}.
mail_security:
device_subject: 'Security alert: New login to %{app_name} from %{browser} on %{platform}'
ip_subject: 'Security alert: New or unusual %{app_name} login'
Expand Down
9 changes: 9 additions & 0 deletions lib/check_s3.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,13 @@ def self.delete(*paths)
client = Aws::S3::Client.new
client.delete_objects(bucket: CheckConfig.get('storage_bucket'), delete: { objects: objects })
end

def self.write_presigned(path, content_type, content, expires_in)
self.write(path, content_type, content)
bucket = CheckConfig.get('storage_bucket')
client = Aws::S3::Client.new
s3 = Aws::S3::Resource.new(client: client)
obj = s3.bucket(bucket).object(path)
obj.presigned_url(:get, expires_in: expires_in)
end
end
45 changes: 44 additions & 1 deletion lib/check_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def initialize(options, file = nil, team_id = Team.current&.id)
'fact_check_published_on' => 'fact_check_published_on'
}

def set_option(key, value)
@options[key] = value
end

def team_condition(team_id = nil)
if feed_query?
feed_teams = @options['feed_team_ids'].is_a?(Array) ? (@feed.team_ids & @options['feed_team_ids']) : @feed.team_ids
Expand Down Expand Up @@ -329,12 +333,51 @@ def medias_get_search_result(query)
@options['es_id'] ? $repository.find([@options['es_id']]).compact : $repository.search(query: query, collapse: collapse, sort: sort, size: @options['eslimit'], from: @options['esoffset']).results
end

def self.get_exported_data(query, team_id)
team = Team.find(team_id)
search = CheckSearch.new(query, nil, team_id)

# Prepare the export
data = []
header = ['Claim', 'Item page URL', 'Status', 'Created by', 'Submitted at', 'Published at', 'Number of media', 'Tags']
fields = team.team_tasks.sort
fields.each { |tt| header << tt.label }
data << header

# No pagination for the export
search.set_option('esoffset', 0)
search.set_option('eslimit', CheckConfig.get(:export_csv_maximum_number_of_results, 10000, :integer))

# Iterate through each result and generate an output row for the CSV
search.medias.find_each do |pm|
row = [
pm.claim_description&.description,
pm.full_url,
pm.status_i18n,
pm.author_name.to_s.gsub(/ \[.*\]$/, ''),
pm.created_at.strftime("%Y-%m-%d %H:%M:%S"),
pm.published_at&.strftime("%Y-%m-%d %H:%M:%S"),
pm.linked_items_count(true),
pm.tags_as_sentence(true)
]
annotations = pm.get_annotations('task').map(&:load)
fields.each do |field|
annotation = annotations.find { |a| a.team_task_id == field.id }
answer = (annotation ? (begin annotation.first_response_obj.file_data[:file_urls].join("\n") rescue annotation.first_response.to_s end) : '')
answer = begin JSON.parse(answer).collect{ |x| x['url'] }.join(', ') rescue answer end
row << answer
end
data << row
end
data
end

private

def adjust_es_window_size
window_size = 10000
current_size = @options['esoffset'].to_i + @options['eslimit'].to_i
@options['eslimit'] = window_size - @options['esoffset'].to_i if current_size > window_size
@options['eslimit'] = window_size - @options['esoffset'].to_i if current_size > window_size
end

def adjust_project_filter
Expand Down
Loading

0 comments on commit f1f3b6c

Please sign in to comment.