From df5afec39045ec4643530de1cdab0b420c6e79e8 Mon Sep 17 00:00:00 2001 From: Sawy Date: Sun, 29 Sep 2024 11:01:35 +0300 Subject: [PATCH 1/6] CV2-5005: limit ES date to updated fields only --- app/lib/check_cached_fields.rb | 6 ++- app/models/concerns/project_association.rb | 37 ++++++++----------- .../concerns/project_media_cached_fields.rb | 2 +- app/workers/elastic_search_worker.rb | 4 +- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/app/lib/check_cached_fields.rb b/app/lib/check_cached_fields.rb index 16ffc8e10c..9545d6c80e 100644 --- a/app/lib/check_cached_fields.rb +++ b/app/lib/check_cached_fields.rb @@ -66,8 +66,10 @@ def index_and_pg_cached_field(options, value, name, target) if should_update_cached_field?(options, target) update_index = options[:update_es] || false value = target.send(update_index, value) if update_index.is_a?(Symbol) && target.respond_to?(update_index) - field_name = options[:es_field_name] || name - es_options = { keys: [field_name], data: { field_name => value } } + field_name = options[:es_field_name]&.flatten || [name] + data = {} + field_name.each{ |f| data[f] = value } + es_options = { keys: field_name, data: data } es_options[:pm_id] = target.id if target.class.name == 'ProjectMedia' model = { klass: target.class.name, id: target.id } ElasticSearchWorker.new.perform(YAML::dump(model), YAML::dump(es_options), 'update_doc') diff --git a/app/models/concerns/project_association.rb b/app/models/concerns/project_association.rb index 0200e8ebca..c1d482507b 100644 --- a/app/models/concerns/project_association.rb +++ b/app/models/concerns/project_association.rb @@ -60,7 +60,7 @@ def belonged_to_project(objid, pid, tid) validate :is_unique, on: :create, unless: proc { |p| p.is_being_copied } after_commit :add_elasticsearch_data, on: :create - after_commit :update_elasticsearch_data, on: :update + after_update :update_elasticsearch_data after_commit :destroy_elasticsearch_media , on: :destroy def get_versions_log(event_types = nil, field_names = nil, annotation_types = nil, whodunnit = nil, include_related = false) @@ -86,26 +86,21 @@ def add_elasticsearch_data def update_elasticsearch_data return if self.disable_es_callbacks || RequestStore.store[:disable_es_callbacks] - obj = self.class.find_by_id(self.id) - return if obj.nil? - data = { - 'team_id' => obj.team_id, - 'archived' => { method: 'archived', klass: obj.class.name, id: obj.id, type: 'int' }, - 'sources_count' => { method: 'sources_count', klass: 'ProjectMedia', id: obj.id, type: 'int' }, - 'user_id' => obj.user_id, - 'read' => obj.read.to_i, - 'media_published_at' => obj.media_published_at, - 'source_id' => obj.source_id, - 'source_name' => obj.source&.name, - 'project_id' => obj.project_id, - 'unmatched' => obj.unmatched, - 'channel' => obj.channel.values.flatten.map(&:to_i), - 'updated_at' => obj.updated_at.utc, - 'title' => obj.title - } - options = { keys: data.keys, data: data, pm_id: obj.id } - model = { klass: obj.class.name, id: obj.id } - ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'update_doc') + data = {} + ['team_id', 'user_id', 'read', 'source_id', 'project_id', 'unmatched'].each do |fname| + data[fname] = self.send(fname).to_i if self.send("saved_change_to_#{fname}?") + end + ['archived', 'sources_count'].each do |fname| + data[fname] = { method: fname, klass: 'ProjectMedia', id: self.id, type: 'int' } if self.send("saved_change_to_#{fname}?") + end + data['channel'] = self.channel.values.flatten.map(&:to_i) if self.send(:saved_change_to_channel?) + data['source_name'] = self.source&.name if self.send(:saved_change_to_source_id?) + unless data.blank? + data['updated_at'] = self.updated_at.utc + options = { keys: data.keys, data: data, pm_id: self.id } + model = { klass: self.class.name, id: self.id } + ElasticSearchWorker.perform_in(1.second, YAML::dump(model), YAML::dump(options), 'update_doc') + end end def destroy_elasticsearch_media diff --git a/app/models/concerns/project_media_cached_fields.rb b/app/models/concerns/project_media_cached_fields.rb index 87ccf7dd17..d5f1161aef 100644 --- a/app/models/concerns/project_media_cached_fields.rb +++ b/app/models/concerns/project_media_cached_fields.rb @@ -210,7 +210,7 @@ def title_or_description_update cached_field :title, update_es: true, - es_field_name: :title_index, + es_field_name: [:title, :title_index], recalculate: :recalculate_title, update_on: title_or_description_update diff --git a/app/workers/elastic_search_worker.rb b/app/workers/elastic_search_worker.rb index 787d272d0b..87ca4da87c 100644 --- a/app/workers/elastic_search_worker.rb +++ b/app/workers/elastic_search_worker.rb @@ -2,9 +2,9 @@ class ElasticSearchWorker include Sidekiq::Worker - sidekiq_options :queue => :esqueue, :retry => 5 + sidekiq_options :queue => :esqueue, :retry => 3 - sidekiq_retry_in { |_count, _e| 5 } + sidekiq_retry_in { |_count, _e| 3 } def perform(model_data, options, type) model_data = begin YAML::load(model_data) rescue nil end From 1be2579eea7ca51ad1cc7a48b43d15565d6f9ea9 Mon Sep 17 00:00:00 2001 From: Sawy Date: Sun, 29 Sep 2024 20:33:00 +0300 Subject: [PATCH 2/6] fix tests --- app/lib/check_cached_fields.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/lib/check_cached_fields.rb b/app/lib/check_cached_fields.rb index 9545d6c80e..601b8b87cd 100644 --- a/app/lib/check_cached_fields.rb +++ b/app/lib/check_cached_fields.rb @@ -66,7 +66,7 @@ def index_and_pg_cached_field(options, value, name, target) if should_update_cached_field?(options, target) update_index = options[:update_es] || false value = target.send(update_index, value) if update_index.is_a?(Symbol) && target.respond_to?(update_index) - field_name = options[:es_field_name]&.flatten || [name] + field_name = options[:es_field_name].nil? ? [name] : [options[:es_field_name]]&.flatten data = {} field_name.each{ |f| data[f] = value } es_options = { keys: field_name, data: data } From cb7d5e83d968873930bd91dfd5a77952cd2ce09f Mon Sep 17 00:00:00 2001 From: Sawy Date: Mon, 30 Sep 2024 08:56:15 +0300 Subject: [PATCH 3/6] CV2-5005: fix tests --- test/controllers/elastic_search_test.rb | 24 ++++++++++--------- test/controllers/graphql_controller_3_test.rb | 14 +++++------ test/models/bot/fetch_2_test.rb | 2 +- test/models/project_media_3_test.rb | 2 ++ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/test/controllers/elastic_search_test.rb b/test/controllers/elastic_search_test.rb index 4c669225c6..30a088788e 100644 --- a/test/controllers/elastic_search_test.rb +++ b/test/controllers/elastic_search_test.rb @@ -9,19 +9,20 @@ def setup test "should search media" do u = create_user @team = create_team - p = create_project team: @team m1 = create_valid_media - pm1 = create_project_media project: p, media: m1, disable_es_callbacks: false + pm1 = create_project_media team: @team, media: m1, disable_es_callbacks: false authenticate_with_user(u) pender_url = CheckConfig.get('pender_url_private') + '/api/medias' url = 'http://test.com' response = '{"type":"media","data":{"url":"' + url + '/normalized","type":"item", "title": "title_a", "description":"search_desc"}}' WebMock.stub_request(:get, pender_url).with({ query: { url: url } }).to_return(body: response) m2 = create_media(account: create_valid_account, url: url) - pm2 = create_project_media project: p, media: m2, disable_es_callbacks: false - sleep 10 - query = 'query Search { search(query: "{\"keyword\":\"title_a\",\"projects\":[' + p.id.to_s + ']}") { number_of_results, medias(first: 10) { edges { node { dbid } } } } }' + pm2 = create_project_media team: @team, media: m2, disable_es_callbacks: false + sleep 2 + Team.stubs(:current).returns(@team) + query = 'query Search { search(query: "{\"keyword\":\"title_a\"}") { number_of_results, medias(first: 10) { edges { node { dbid } } } } }' post :create, params: { query: query } + Team.unstub(:current) assert_response :success ids = [] JSON.parse(@response.body)['data']['search']['medias']['edges'].each do |id| @@ -29,9 +30,11 @@ def setup end assert_equal [pm2.id], ids create_comment text: 'title_a', annotated: pm1, disable_es_callbacks: false - sleep 20 - query = 'query Search { search(query: "{\"keyword\":\"title_a\",\"sort\":\"recent_activity\",\"projects\":[' + p.id.to_s + ']}") { medias(first: 10) { edges { node { dbid } } } } }' + sleep 2 + Team.stubs(:current).returns(@team) + query = 'query Search { search(query: "{\"keyword\":\"title_a\",\"sort\":\"recent_activity\"}") { medias(first: 10) { edges { node { dbid } } } } }' post :create, params: { query: query } + Team.unstub(:current) assert_response :success ids = [] JSON.parse(@response.body)['data']['search']['medias']['edges'].each do |id| @@ -57,7 +60,7 @@ def setup m2 = create_media(account: create_valid_account, url: url2) pm = create_project_media project: p, media: m, disable_es_callbacks: false pm2 = create_project_media project: p2, media: m2, disable_es_callbacks: false - sleep 10 + sleep 2 query = 'query Search { search(query: "{\"keyword\":\"title_a\",\"projects\":[' + p.id.to_s + ',' + p2.id.to_s + ']}") { medias(first: 10) { edges { node { dbid } } } } }' post :create, params: { query: query } assert_response :success @@ -67,7 +70,7 @@ def setup end assert_equal [pm.id, pm2.id], m_ids.sort pm2.analysis = { content: 'new_description' } - sleep 10 + sleep 2 query = 'query Search { search(query: "{\"keyword\":\"title_a\",\"projects\":[' + p.id.to_s + ',' + p2.id.to_s + ']}") { medias(first: 10) { edges { node { dbid, description } } } } }' post :create, params: { query: query } assert_response :success @@ -104,7 +107,6 @@ def setup test "should search with keyword" do t = create_team - p = create_project team: t pender_url = CheckConfig.get('pender_url_private') + '/api/medias' url = 'http://test.com' author_url = 'http://facebook.com/123456' @@ -119,7 +121,7 @@ def setup WebMock.stub_request(:get, pender_url).with({ query: { url: author_url } }).to_return(body: response) m = create_media url: url, account_id: nil, user_id: nil, account: nil, user: nil - pm = create_project_media project: p, media: m, disable_es_callbacks: false + pm = create_project_media team: t, media: m, disable_es_callbacks: false sleep 1 Team.current = t result = CheckSearch.new({keyword: "non_exist_title"}.to_json) diff --git a/test/controllers/graphql_controller_3_test.rb b/test/controllers/graphql_controller_3_test.rb index 5b22bc464d..267a64dc83 100644 --- a/test/controllers/graphql_controller_3_test.rb +++ b/test/controllers/graphql_controller_3_test.rb @@ -320,17 +320,15 @@ def setup test "should return updated offset from ES" do RequestStore.store[:skip_cached_field_update] = false - u = create_user is_admin: true authenticate_with_user(u) t = create_team - p = create_project team: t - pm1 = create_project_media project: p - create_relationship source_id: pm1.id, target_id: create_project_media(project: p).id, relationship_type: Relationship.confirmed_type - pm2 = create_project_media project: p - create_relationship source_id: pm2.id, target_id: create_project_media(project: p).id, relationship_type: Relationship.confirmed_type - create_relationship source_id: pm2.id, target_id: create_project_media(project: p).id, relationship_type: Relationship.confirmed_type - sleep 10 + pm1 = create_project_media team: t, disable_es_callbacks: false + create_relationship source_id: pm1.id, target_id: create_project_media(team: t).id, relationship_type: Relationship.confirmed_type + pm2 = create_project_media team: t, disable_es_callbacks: false + create_relationship source_id: pm2.id, target_id: create_project_media(team: t).id, relationship_type: Relationship.confirmed_type + create_relationship source_id: pm2.id, target_id: create_project_media(team: t).id, relationship_type: Relationship.confirmed_type + sleep 2 query = 'query CheckSearch { search(query: "{\"sort\":\"related\",\"id\":' + pm1.id.to_s + ',\"esoffset\":0,\"eslimit\":1}") {item_navigation_offset,medias(first:20){edges{node{dbid}}}}}' post :create, params: { query: query, team: t.slug } assert_response :success diff --git a/test/models/bot/fetch_2_test.rb b/test/models/bot/fetch_2_test.rb index 6e7024d8c4..a457b66bf4 100644 --- a/test/models/bot/fetch_2_test.rb +++ b/test/models/bot/fetch_2_test.rb @@ -107,7 +107,7 @@ def setup pm = ProjectMedia.last result = $repository.find(get_es_id(pm)) assert_equal pm.media.type, result['associated_type'] - assert_equal pm.original_title, result['title'] + assert_equal pm.title, result['title'] assert_equal "Earth isn't flat", pm.title end diff --git a/test/models/project_media_3_test.rb b/test/models/project_media_3_test.rb index de82676dee..3174b69f2a 100644 --- a/test/models/project_media_3_test.rb +++ b/test/models/project_media_3_test.rb @@ -72,6 +72,8 @@ def setup pm.refresh_media = true pm.save! pm = ProjectMedia.find(pm.id) + # TODO: discuss the failure with Caio + assert_equal 601720260, pm.media_published_at assert_queries(0, '=') { assert_equal 601720260, pm.media_published_at } end From 80955620cf95b4dbdb75a69fdccf1c7b7025f268 Mon Sep 17 00:00:00 2001 From: Sawy Date: Mon, 30 Sep 2024 14:22:00 +0300 Subject: [PATCH 4/6] CV2-5005: fix tests --- test/controllers/elastic_search_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/controllers/elastic_search_test.rb b/test/controllers/elastic_search_test.rb index 30a088788e..71bddc19f2 100644 --- a/test/controllers/elastic_search_test.rb +++ b/test/controllers/elastic_search_test.rb @@ -145,13 +145,13 @@ def setup response = '{"type":"media","data":' + data.to_json + '}' WebMock.stub_request(:get, pender_url).with({ query: { url: media_url } }).to_return(body: response) m2 = create_media url: media_url, account_id: nil, user_id: nil, account: nil, user: nil - pm2 = create_project_media project: p, media: m2, disable_es_callbacks: false + pm2 = create_project_media team: t, media: m2, disable_es_callbacks: false sleep 1 result = CheckSearch.new({keyword: "search_desc"}.to_json) assert_equal [pm.id, pm2.id].sort, result.medias.map(&:id).sort # search in quote (with and operator) m = create_claim_media quote: 'keyworda and keywordb' - pm = create_project_media project: p, media: m, disable_es_callbacks: false + pm = create_project_media team: t, media: m, disable_es_callbacks: false sleep 1 result = CheckSearch.new({keyword: "keyworda and keywordb"}.to_json) assert_equal [pm.id], result.medias.map(&:id) From eacb74aa0fd97a2dba31ddc5c2cd15ee90b43055 Mon Sep 17 00:00:00 2001 From: Sawy Date: Mon, 30 Sep 2024 22:11:37 +0300 Subject: [PATCH 5/6] CV2-5005: fix tests --- test/models/project_media_3_test.rb | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/models/project_media_3_test.rb b/test/models/project_media_3_test.rb index 3174b69f2a..e9b6503915 100644 --- a/test/models/project_media_3_test.rb +++ b/test/models/project_media_3_test.rb @@ -60,21 +60,21 @@ def setup test "should cache media published at" do RequestStore.store[:skip_cached_field_update] = false - url = 'http://twitter.com/test/123456' - pender_url = CheckConfig.get('pender_url_private') + '/api/medias' - response = '{"type":"media","data":{"url":"' + url + '","type":"item","published_at":"1989-01-25 08:30:00"}}' - WebMock.stub_request(:get, pender_url).with({ query: { url: url } }).to_return(body: response) - pm = create_project_media media: nil, url: url - assert_queries(0, '=') { assert_equal 601720200, pm.media_published_at } - response = '{"type":"media","data":{"url":"' + url + '","type":"item","published_at":"1989-01-25 08:31:00"}}' - WebMock.stub_request(:get, pender_url).with({ query: { url: url, refresh: '1' } }).to_return(body: response) - pm = ProjectMedia.find(pm.id) - pm.refresh_media = true - pm.save! - pm = ProjectMedia.find(pm.id) - # TODO: discuss the failure with Caio - assert_equal 601720260, pm.media_published_at - assert_queries(0, '=') { assert_equal 601720260, pm.media_published_at } + Sidekiq::Testing.inline! do + url = 'http://twitter.com/test/123456' + pender_url = CheckConfig.get('pender_url_private') + '/api/medias' + response = '{"type":"media","data":{"url":"' + url + '","type":"item","published_at":"1989-01-25 08:30:00"}}' + WebMock.stub_request(:get, pender_url).with({ query: { url: url } }).to_return(body: response) + pm = create_project_media media: nil, url: url + assert_queries(0, '=') { assert_equal 601720200, pm.media_published_at } + response = '{"type":"media","data":{"url":"' + url + '","type":"item","published_at":"1989-01-25 08:31:00"}}' + WebMock.stub_request(:get, pender_url).with({ query: { url: url, refresh: '1' } }).to_return(body: response) + pm = ProjectMedia.find(pm.id) + pm.refresh_media = true + pm.save! + pm = ProjectMedia.find(pm.id) + assert_queries(0, '=') { assert_equal 601720260, pm.media_published_at } + end end test "should cache number of related items" do From 0a391b57b7cb1e394aac8601ac47dd8fdbf62ae7 Mon Sep 17 00:00:00 2001 From: Sawy Date: Tue, 1 Oct 2024 00:56:45 +0300 Subject: [PATCH 6/6] CV2-5005: test coverage --- test/models/project_media_6_test.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/models/project_media_6_test.rb b/test/models/project_media_6_test.rb index 921e03dcfa..b5f46bd20d 100644 --- a/test/models/project_media_6_test.rb +++ b/test/models/project_media_6_test.rb @@ -475,6 +475,12 @@ def setup pm.save! assert_match /^text-/, pm.get_title # Uncached assert_match /^text-/, pm.title # Cached + # Verify save the title as a custom title + ProjectMedia.stubs(:get_title).returns(nil) + pm = create_project_media custom_title: 'Custom Title' + assert_equal 'Custom Title', pm.title + assert_equal 'custom_title', pm.reload.title_field + ProjectMedia.unstub(:get_title) end test "should avoid N + 1 queries problem when loading the team avatar of many items at once" do