Skip to content

Commit

Permalink
5120 – Dont create duplicate tags and clean up # (#2054)
Browse files Browse the repository at this point in the history
Context
While looking into a tags issue I noticed a few things:
    - when we made a request with duplicate tags:
        - we got an error, so the job was retried
        - the tag was added twice to the FactCheck
        - the tag is added once to the ProjectMedia
    - when we made a request with a tag with a #
        - we got an error, so the job was retried
            - there seem to have been two errors related to this:
                - ActiveRecord::RecordInvalid: Tag already exists
                - ActiveRecord::RecordInvalid: Text has already been taken
        - the tag with the # is added to the FactCheck
        - the tag is not added to the ProjectMedia

What was happening
    There are a few things happening at the same time:
        - Creation of a ProjectMedia with tags
        - Creation of a FactCheck with tags
        - Tags are an Object from the Tag Class for ProjectMedia, but are a simple array for FactCheck
    For the ProjectMedia:
        - It would create the tag, then it would try to create the same tag again, and then it would fail and retry again, and so on
        - This happen because we have validations in place for the Tag class
    For FactCheck:
        - It would just create the tags twice
        - Because we have no validations for the tags array

TLDR: There were some issues in the tags clean-up before we create them for ProjectMedia and FactCheck, and there was a mismatch between them.

How it was fixed
- I created a helper to clean up the tags before creating them
- We need to make sure tags are:
    - stripped
    - unique
    - don't have a prepending `#`
- We use this helper both in FactCheck.rb and Tag.rb

References: 5120
PR: 2054
  • Loading branch information
vasconsaurus authored and DGaffney committed Oct 15, 2024
1 parent a704dda commit 7ac655d
Show file tree
Hide file tree
Showing 11 changed files with 166 additions and 51 deletions.
13 changes: 13 additions & 0 deletions app/models/annotations/tag.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class Tag < ApplicationRecord
include AnnotationBase
extend TagHelpers

# "tag" is a reference to a TagText object
field :tag, GraphQL::Types::Int, presence: true
Expand Down Expand Up @@ -91,6 +92,18 @@ def hit_nested_objects_limit?
ret
end

def self.create_project_media_tags(project_media_id, tags_json)
project_media = ProjectMedia.find_by_id(project_media_id)

if !project_media.nil?
tags = JSON.parse(tags_json)
clean_tags(tags).each { |tag| Tag.create! annotated: project_media, tag: tag.strip, skip_check_ability: true }
else
error = StandardError.new("[ProjectMedia] Exception creating project media's tags in background. Project media is nil.")
CheckSentry.notify(error, project_media_id: project_media_id)
end
end

private

def get_tag_text_reference
Expand Down
2 changes: 1 addition & 1 deletion app/models/concerns/project_media_creators.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def create_tags_in_background
if self.set_tags.is_a?(Array)
project_media_id = self.id
tags_json = self.set_tags.to_json
ProjectMedia.run_later_in(1.second, 'create_tags', project_media_id, tags_json, user_id: self.user_id)
Tag.run_later_in(1.second, 'create_project_media_tags', project_media_id, tags_json, user_id: self.user_id)
end
end
end
9 changes: 9 additions & 0 deletions app/models/concerns/tag_helpers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
require 'active_support/concern'

module TagHelpers
extend ActiveSupport::Concern

def clean_tags(tags)
tags.map { |tag| tag.strip.gsub(/^#/, '') }.uniq
end
end
7 changes: 7 additions & 0 deletions app/models/fact_check.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class FactCheck < ApplicationRecord
include Article
include TagHelpers

has_paper_trail on: [:create, :update], ignore: [:updated_at, :created_at, :rating, :report_status], if: proc { |_x| User.current.present? }, versions: { class_name: 'Version' }

Expand All @@ -18,6 +19,7 @@ class FactCheck < ApplicationRecord
validates_format_of :url, with: URI.regexp, allow_blank: true, allow_nil: true
validate :language_in_allowed_values, :title_or_summary_exists, :rating_in_allowed_values

before_save :clean_fact_check_tags
after_save :update_report, unless: proc { |fc| fc.skip_report_update || !DynamicAnnotation::AnnotationType.where(annotation_type: 'report_design').exists? || fc.project_media.blank? }
after_save :update_item_status, if: proc { |fc| fc.saved_change_to_rating? }
after_update :detach_claim_if_trashed
Expand Down Expand Up @@ -56,6 +58,11 @@ def self.get_exported_data(query, team)
data
end

def clean_fact_check_tags
return if self.tags.blank?
self.tags = clean_tags(self.tags)
end

private

def set_language
Expand Down
12 changes: 0 additions & 12 deletions app/models/project_media.rb
Original file line number Diff line number Diff line change
Expand Up @@ -528,18 +528,6 @@ def add_nested_objects(ms)
ms.attributes[:requests] = requests
end

def self.create_tags(project_media_id, tags_json)
project_media = ProjectMedia.find_by_id(project_media_id)

if !project_media.nil?
tags = JSON.parse(tags_json)
tags.each { |tag| Tag.create! annotated: project_media, tag: tag.strip, skip_check_ability: true }
else
error = StandardError.new("[ProjectMedia] Exception creating project media's tags in background. Project media is nil.")
CheckSentry.notify(error, project_media_id: project_media_id)
end
end

# private
#
# Please add private methods to app/models/concerns/project_media_private.rb
Expand Down
51 changes: 51 additions & 0 deletions test/controllers/graphql_controller_12_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -680,4 +680,55 @@ def teardown
assert_response :success
assert_equal 'science', JSON.parse(@response.body)['data']['createProjectMedia']['project_media']['tags']['edges'][0]['node']['tag_text']
end

test "should not create duplicate tags for the ProjectMedia and FactCheck" do
Sidekiq::Testing.inline!
t = create_team
a = ApiKey.create!
b = create_bot_user api_key_id: a.id
create_team_user team: t, user: b
p = create_project team: t
authenticate_with_token(a)

query1 = ' mutation create {
createProjectMedia(input: {
project_id: ' + p.id.to_s + ',
media_type: "Blank",
channel: { main: 1 },
set_tags: ["science", "science", "#science"],
set_status: "verified",
set_claim_description: "Claim #1.",
set_fact_check: {
title: "Title #1",
language: "en",
}
}) {
project_media {
full_url
claim_description {
fact_check {
tags
}
}
tags {
edges {
node {
tag_text
}
}
}
}
}
} '

post :create, params: { query: query1, team: t.slug }
assert_response :success

pm = JSON.parse(@response.body)['data']['createProjectMedia']['project_media']
pm_tags = pm['tags']['edges']
fc_tags = pm['claim_description']['fact_check']['tags']

assert_equal 1, pm_tags.count
assert_equal 1, fc_tags.count
end
end
6 changes: 3 additions & 3 deletions test/lib/generic_worker_helpers_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ def setup
def teardown
end

test "should run a job, without raising an error, for a method that takes a hash as a parameter" do
test "should run a job async, without raising an error, for a method that takes a hash as a parameter" do
Sidekiq::Testing.inline!

assert_difference "Team.where(name: 'BackgroundTeam', slug: 'background-team').count" do
Team.run_later('create!', name: 'BackgroundTeam', slug: 'background-team')
end
end

test "should run a job, without raising an error, for a method that takes standalone parameters" do
test "should run a job in a specified time, without raising an error, for a method that takes standalone parameters" do
Sidekiq::Testing.inline!

team = create_team
Expand All @@ -27,7 +27,7 @@ def teardown
project_media_id = pm.id
tags_json = ['one', 'two', 'three'].to_json

ProjectMedia.run_later_in(0.second, 'create_tags', project_media_id, tags_json, user_id: pm.user_id)
Tag.run_later_in(0.second, 'create_project_media_tags', project_media_id, tags_json, user_id: pm.user_id)

assert_equal 3, pm.annotations('tag').count
end
Expand Down
22 changes: 22 additions & 0 deletions test/models/fact_check_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,26 @@ def setup
end
end
end

test "should not have duplicate tags" do
pm = create_project_media
cd = create_claim_description(project_media: pm)
fc = create_fact_check claim_description: cd, tags: ['one', 'one', '#one']

assert_equal 1, fc.tags.count
assert_equal 'one', fc.tags.first
end

test "should add existing tag to a new fact check" do
pm = create_project_media
cd = create_claim_description(project_media: pm)
create_fact_check claim_description: cd, tags: ['one']

pm2 = create_project_media
cd2 = create_claim_description(project_media: pm2)
fc2 = create_fact_check claim_description: cd2, tags: ['#one']

assert_equal 1, fc2.tags.count
assert_equal 'one', fc2.tags.first
end
end
37 changes: 4 additions & 33 deletions test/models/project_media_8_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,6 @@ def setup
def teardown
end

test ":create_tags should create tags when project media id and tags are present" do
team = create_team
project = create_project team: team
pm = create_project_media project: project

project_media_id = pm.id
tags_json = ['one', 'two'].to_json

assert_nothing_raised do
ProjectMedia.create_tags(project_media_id, tags_json)
end
assert_equal 2, pm.annotations('tag').count
end

test ":create_tags should not raise an error when no project media is sent" do
project_media_id = nil
tags_json = ['one', 'two'].to_json

assert_nothing_raised do
CheckSentry.expects(:notify).once
ProjectMedia.create_tags(project_media_id, tags_json)
end
end

test "when creating an item with tag/tags, after_create :create_tags_in_background callback should create tags in the background" do
Sidekiq::Testing.inline!

Expand All @@ -55,18 +31,13 @@ def teardown
assert_equal 1, GenericWorker.jobs.size
end

test "when using :run_later_in to schedule multiple tags creation, it should only schedule one job" do
Sidekiq::Testing.fake!
test "when creating an item with multiple tags, after_create :create_tags_in_background callback should not create duplicate tags" do
Sidekiq::Testing.inline!

team = create_team
project = create_project team: team
pm = create_project_media project: project
pm = create_project_media project: project, tags: ['one', 'one', '#one']

project_media_id = pm.id
tags_json = ['one', 'two', 'three'].to_json

ProjectMedia.run_later_in(0.second, 'create_tags', project_media_id, tags_json, user_id: pm.user_id)

assert_equal 1, GenericWorker.jobs.size
assert_equal 1, pm.annotations('tag').count
end
end
54 changes: 54 additions & 0 deletions test/models/tag_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,58 @@ def setup
create_tag tag: ' foo', annotated: pm2
end
end

test ":create_project_media_tags should create tags when project media id and tags are present" do
team = create_team
project = create_project team: team
pm = create_project_media project: project

project_media_id = pm.id
tags_json = ['one', 'two'].to_json

assert_nothing_raised do
Tag.create_project_media_tags(project_media_id, tags_json)
end
assert_equal 2, pm.annotations('tag').count
end

test ":create_project_media_tags should not raise an error when no project media is sent" do
project_media_id = nil
tags_json = ['one', 'two'].to_json

assert_nothing_raised do
CheckSentry.expects(:notify).once
Tag.create_project_media_tags(project_media_id, tags_json)
end
end

test ":create_project_media_tags should not try to create duplicate tags" do
Sidekiq::Testing.fake!

team = create_team
project = create_project team: team
pm = create_project_media project: project
Tag.create_project_media_tags(pm.id, ['one', 'one', '#one'].to_json)

tags = pm.reload.annotations('tag')
tag_text_id = tags.last.data['tag']
tag_text = TagText.find(tag_text_id).text

assert_equal 1, tags.count
assert_equal 'one', tag_text
end

test ":create_project_media_tags should be able to add an existing tag to a new project media" do
Sidekiq::Testing.fake!

team = create_team
project = create_project team: team
pm = create_project_media project: project
Tag.create_project_media_tags(pm.id, ['one'].to_json)

pm2 = create_project_media project: project
Tag.create_project_media_tags(pm2.id, ['#one'].to_json)

assert_equal 1, pm2.reload.annotations('tag').count
end
end
4 changes: 2 additions & 2 deletions test/workers/generic_worker_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def teardown
tags_json = ['one', 'two'].to_json

assert_difference "Tag.where(annotation_type: 'tag').count", difference = 2 do
GenericWorker.perform_async('ProjectMedia', 'create_tags', project_media_id, tags_json, user_id: pm.user_id)
GenericWorker.perform_async('Tag', 'create_project_media_tags', project_media_id, tags_json, user_id: pm.user_id)
end
end

Expand Down Expand Up @@ -71,7 +71,7 @@ def teardown
tags_json = ['one', 'two'].to_json

assert_nothing_raised do
GenericWorker.perform_async('ProjectMedia', 'create_tags', project_media_id, tags_json, user_id: pm.user_id)
GenericWorker.perform_async('Tag', 'create_project_media_tags', project_media_id, tags_json, user_id: pm.user_id)
end

assert_equal 1, GenericWorker.jobs.size
Expand Down

0 comments on commit 7ac655d

Please sign in to comment.