Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ZEP-13] corrige le comportement d'Elasticsearch #4262

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions templates/searchv2/includes/publishedcontent.part.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ <h3 class="content-title" itemprop="itemListElement">
{% trans "Article publié" %}
{% elif search_result.content_type == 'TUTORIAL' %}
{% trans "Tutoriel publié" %}
{% elif search_result.content_type == 'OPINION' %}
{% trans "Opinion" %}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Le mot de base, c'est "Billiet".

{% if search_result.picked %}
{% trans "mise en avant" %}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Du coup "mis en avant".

{% endif %}
{% else %}
{% trans "Contenu publié" %}
{% endif %}
Expand Down
8 changes: 7 additions & 1 deletion update.md
Original file line number Diff line number Diff line change
Expand Up @@ -1010,10 +1010,16 @@ Une fois que tout est indexé,
Tribunes
--------

Ajouter à la fin de `/etc/munin/plugin-conf.d/zds.conf`
+ Ajouter à la fin de `/etc/munin/plugin-conf.d/zds.conf`

```
[zds_total_tribunes]
env.url http://www.zestedesavoir.com/munin/total_tribunes/
env.graph_category zds
```

+ Réindexer les données (un champ a été rajouté):

```
python manage.py es_manager index_all
```
45 changes: 40 additions & 5 deletions zds/searchv2/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,15 +234,21 @@ def delete_document_in_elasticsearch(instance):
"""

index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
index_manager.delete_document(instance)
index_manager.refresh_index()

if index_manager.index_exists:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vue la répétition de ces trois lignes, créer une méthode delete_and_refresh peut être utile non?

index_manager.delete_document(instance)
index_manager.refresh_index()


def get_django_indexable_objects():
"""Return all indexable objects registered in Django"""
return [model for model in apps.get_models() if issubclass(model, AbstractESDjangoIndexable)]


class NeedIndex(Exception):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Juste ajouter une petite docstring pour faire beau dans la doc :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done ;)

pass


class ESIndexManager(object):
"""Manage a given index with different taylor-made functions"""

Expand All @@ -260,6 +266,7 @@ def __init__(self, name, shards=5, replicas=0, connection_alias='default'):
"""

self.index = name
self.index_exists = False

self.number_of_shards = shards
self.number_of_replicas = replicas
Expand All @@ -282,6 +289,9 @@ def __init__(self, name, shards=5, replicas=0, connection_alias='default'):
else:
self.logger.info('connected to ES cluster')

if self.connected_to_es:
self.index_exists = self.es.indices.exists(self.index)

def clear_es_index(self):
"""Clear index
"""
Expand All @@ -293,6 +303,8 @@ def clear_es_index(self):
self.es.indices.delete(self.index)
self.logger.info('index cleared')

self.index_exists = False

def reset_es_index(self, models):
"""Delete old index and create an new one (with the same name). Setup the number of shards and replicas.
Then, set mappings for the different models.
Expand Down Expand Up @@ -327,6 +339,8 @@ def reset_es_index(self, models):
}
)

self.index_exists = True

self.logger.info('index created')

def setup_custom_analyzer(self):
Expand All @@ -351,6 +365,9 @@ def setup_custom_analyzer(self):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

self.es.indices.close(self.index)

document = {
Expand Down Expand Up @@ -422,9 +439,6 @@ def clear_indexing_of_model(self, model):
:type model: class
"""

if not self.connected_to_es:
return

if issubclass(model, AbstractESDjangoIndexable): # use a global update with Django
objs = model.get_es_django_indexable(force_reindexing=True)
objs.update(es_flagged=True, es_already_indexed=False)
Expand Down Expand Up @@ -456,6 +470,9 @@ def es_bulk_indexing_of_model(self, model, force_reindexing=False):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

# better safe than sorry
if model.__name__ == 'FakeChapter':
self.logger.warn('Cannot index FakeChapter model. Please index its parent model.')
Expand Down Expand Up @@ -568,6 +585,9 @@ def refresh_index(self):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

self.es.indices.refresh(self.index)

def update_single_document(self, document, doc):
Expand All @@ -584,6 +604,9 @@ def update_single_document(self, document, doc):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

arguments = {'index': self.index, 'doc_type': document.get_es_document_type(), 'id': document.es_id}
if self.es.exists(**arguments):
self.es.update(body={'doc': doc}, **arguments)
Expand All @@ -599,6 +622,9 @@ def delete_document(self, document):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

arguments = {'index': self.index, 'doc_type': document.get_es_document_type(), 'id': document.es_id}
if self.es.exists(**arguments):
self.es.delete(**arguments)
Expand All @@ -621,6 +647,9 @@ def delete_by_query(self, doc_type='', query=MatchAll()):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

response = self.es.delete_by_query(index=self.index, doc_type=doc_type, body={'query': query})

self.logger.info('delete_by_query {}s ({})'.format(doc_type, response['deleted']))
Expand All @@ -641,6 +670,9 @@ def analyze_sentence(self, request):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

document = {'text': request}
tokens = []
for token in self.es.indices.analyze(index=self.index, body=document)['tokens']:
Expand All @@ -660,4 +692,7 @@ def setup_search(self, request):
if not self.connected_to_es:
return

if not self.index_exists:
raise NeedIndex()

return request.index(self.index).using(self.es)
8 changes: 8 additions & 0 deletions zds/searchv2/tests/tests_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def test_es_manager(self):

# 1. test "index-all"
call_command('es_manager', 'index_all')
self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
self.index_manager.index_exists = True

topic = Topic.objects.get(pk=topic.pk)
post = Post.objects.get(pk=post.pk)
Expand Down Expand Up @@ -126,7 +128,10 @@ def test_es_manager(self):

# 2. test "clear"
self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index in

call_command('es_manager', 'clear')
self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index))
self.index_manager.index_exists = False

# must reset every object
topic = Topic.objects.get(pk=topic.pk)
Expand All @@ -145,6 +150,9 @@ def test_es_manager(self):

# 3. test "setup"
call_command('es_manager', 'setup')
self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
self.index_manager.index_exists = True

self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index back in ...

s = Search()
Expand Down
57 changes: 51 additions & 6 deletions zds/searchv2/tests/tests_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import shutil
import datetime

from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MatchAll
Expand Down Expand Up @@ -284,13 +285,23 @@ def test_boosts(self):
article = PublishedContentFactory(type='ARTICLE', title=text)
published_article = PublishedContent.objects.get(content_pk=article.pk)

opinion_not_picked = PublishedContentFactory(type='OPINION', title=text)
published_opinion_not_picked = PublishedContent.objects.get(content_pk=opinion_not_picked.pk)

opinion_picked = PublishedContentFactory(type='OPINION', title=text)
opinion_picked.sha_picked = opinion_picked.sha_draft
opinion_picked.date_picked = datetime.datetime.now()
opinion_picked.save()

published_opinion_picked = PublishedContent.objects.get(content_pk=opinion_picked.pk)

for model in self.indexable:
if model is FakeChapter:
continue
self.manager.es_bulk_indexing_of_model(model)
self.manager.refresh_index()

self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 8)
self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 10)

# 2. Reset all boosts to 1
for doc_type in settings.ZDS_APP['search']['boosts']:
Expand Down Expand Up @@ -418,10 +429,14 @@ def test_boosts(self):

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEquals(response.hits.total, 3)
self.assertEquals(response.hits.total, 5)

# score are equals without boost:
self.assertTrue(response[0].meta.score == response[1].meta.score)
self.assertTrue(response[0].meta.score ==
response[1].meta.score ==
response[2].meta.score ==
response[3].meta.score ==
response[4].meta.score)

settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] = 2.0

Expand All @@ -430,7 +445,7 @@ def test_boosts(self):

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEqual(response.hits.total, 3)
self.assertEqual(response.hits.total, 5)

self.assertTrue(response[0].meta.score > response[1].meta.score)
self.assertEquals(response[0].meta.id, str(published_article.pk)) # obvious
Expand All @@ -443,12 +458,42 @@ def test_boosts(self):

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEqual(response.hits.total, 3)
self.assertEqual(response.hits.total, 5)

self.assertTrue(response[0].meta.score > response[1].meta.score)
self.assertEquals(response[0].meta.id, str(published_tuto.pk)) # obvious

settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] = 1.0
settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 2.0
settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 4.0
# Note: in "real life", unpicked opinion would get a boost < 1.

result = self.client.get(
reverse('search:query') + '?q=' + text + '&models=content', follow=False)

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEqual(response.hits.total, 5)

self.assertTrue(response[0].meta.score > response[1].meta.score > response[2].meta.score)
self.assertEquals(response[0].meta.id, str(published_opinion_not_picked.pk)) # unpicked opinion got first
self.assertEquals(response[1].meta.id, str(published_opinion_picked.pk))

settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 1.0
settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 1.0
settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 2.0

result = self.client.get(
reverse('search:query') + '?q=' + text + '&models=content', follow=False)

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEqual(response.hits.total, 5)

self.assertTrue(response[0].meta.score > response[1].meta.score)
self.assertEquals(response[0].meta.id, str(published_tuto.pk)) # obvious

settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 1.0

# 6. Test global boosts
# NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists
Expand All @@ -463,7 +508,7 @@ def test_boosts(self):

self.assertEqual(result.status_code, 200)
response = result.context['object_list'].execute()
self.assertEqual(response.hits.total, 8)
self.assertEqual(response.hits.total, 10)

self.assertEqual(response[0].meta.doc_type, model.get_es_document_type()) # obvious

Expand Down
12 changes: 12 additions & 0 deletions zds/searchv2/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,22 @@ def get_queryset_publishedcontents(self):
'filter': Match(content_type='TUTORIAL'),
'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial']
},
{
'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True),
'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial']
},
{
'filter': Match(content_type='ARTICLE'),
'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article']
},
{
'filter': Match(content_type='OPINION'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

j'aime ce boost

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3

'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion']
},
{
'filter': Match(content_type='OPINION') & Match(picked=False),
'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked']
},
]

scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)
Expand Down
3 changes: 3 additions & 0 deletions zds/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,9 @@
'global': 3.0,
'if_article': 1.0,
'if_tutorial': 1.0,
'if_medium_or_big_tutorial': 1.5,
'if_opinion': 0.66,
'if_opinion_not_picked': 0.5
},
'topic': {
'global': 2.0,
Expand Down
Loading