From fa7ce42bc671f2cd684949f7641c5b6dcba037ff Mon Sep 17 00:00:00 2001 From: Nitish Gupta Date: Sat, 4 Apr 2020 02:35:02 +0530 Subject: [PATCH] This commit addds support for gitter backend. Raw and Enriched indexes have been added along with their tests and schemas. Signed-off-by: Nitish Gupta --- README.md | 1 + grimoire_elk/enriched/gitter.py | 184 +++++++++++ grimoire_elk/raw/gitter.py | 66 ++++ grimoire_elk/utils.py | 4 + .../unreleased/add-support-for-gitter.yml | 8 + schema/gitter.csv | 40 +++ tests/data/gitter.json | 307 ++++++++++++++++++ tests/data/projects-release.json | 3 + tests/test_gitter.py | 216 ++++++++++++ 9 files changed, 829 insertions(+) create mode 100644 grimoire_elk/enriched/gitter.py create mode 100644 grimoire_elk/raw/gitter.py create mode 100644 releases/unreleased/add-support-for-gitter.yml create mode 100644 schema/gitter.csv create mode 100644 tests/data/gitter.json create mode 100644 tests/test_gitter.py diff --git a/README.md b/README.md index 765a61dc0..3a549f852 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ Each enriched index includes one or more types of documents, which are summarize - **GitHub repo statistics**: each document includes repo statistics (e.g., forks, watchers). - **GitLab issues**: each document corresponds to an issue. - **GitLab merge requests**: each document corresponds to a merge request. +- **Gitter**: each document corresponds to a message. - **Googlehits**: each document contains hits information derived from Google. - **Groupsio**: each document corresponds to a message. - **Hyperkitty**: each document corresponds to a message. diff --git a/grimoire_elk/enriched/gitter.py b/grimoire_elk/enriched/gitter.py new file mode 100644 index 000000000..49232210a --- /dev/null +++ b/grimoire_elk/enriched/gitter.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2020 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: +# Nitish Gupta +# + +import logging +import re +from urllib.parse import urlparse + +from grimoirelab_toolkit.datetime import str_to_datetime + +from .enrich import Enrich, metadata +from ..elastic_mapping import Mapping as BaseMapping + + +logger = logging.getLogger(__name__) + + +class Mapping(BaseMapping): + + @staticmethod + def get_elastic_mappings(es_major): + """Get Elasticsearch mapping. + + :param es_major: major version of Elasticsearch, as string + :returns: dictionary with a key, 'items', with the mapping + """ + + mapping = """ + { + "properties": { + "text_analyzed": { + "type": "text", + "fielddata": true, + "index": true + } + } + } """ + + return {"items": mapping} + + +class GitterEnrich(Enrich): + + mapping = Mapping + + # REGEX to extract links from HTML text + HTML_LINK_REGEX = re.compile("href=[\"\'](.*?)[\"\']") + + def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=None, + db_user='', db_password='', db_host=''): + super().__init__(db_sortinghat, db_projects_map, json_projects_map, + db_user, db_password, db_host) + + def get_field_author(self): + return "fromUser" + + def get_sh_identity(self, item, identity_field=None): + # email not available for gitter + identity = { + 'username': None, + 'name': None, + 'email': None + } + + if self.get_field_author() not in item['data']: + return identity + from_ = item['data'][self.get_field_author()] + + identity['username'] = from_.get('username', None) + identity['name'] = from_.get('displayName', None) + + return identity + + def get_identities(self, item): + """ Return the identities from an item """ + + identity = self.get_sh_identity(item) + yield identity + + def get_project_repository(self, eitem): + tokens = eitem['origin'].rsplit("/", 1) + return tokens[0] + " " + tokens[1] + + @metadata + def get_rich_item(self, item): + + eitem = {} + + for f in self.RAW_FIELDS_COPY: + if f in item: + eitem[f] = item[f] + else: + eitem[f] = None + + message = item['data'] + + eitem['unread'] = 1 if message['unread'] else 0 + eitem['text_analyzed'] = message['text'] + + copy_fields = ["readBy", "issues", "id"] + + for f in copy_fields: + if f in message: + eitem[f] = message[f] + else: + eitem[f] = None + + eitem.update(self.get_rich_links(item['data'])) + + message_timestamp = str_to_datetime(eitem['metadata__updated_on']) + eitem['tz'] = int(message_timestamp.strftime("%H")) + + if self.sortinghat: + eitem.update(self.get_item_sh(item)) + + if self.prjs_map: + eitem.update(self.get_item_project(eitem)) + + eitem.update(self.get_grimoire_fields(item["metadata__updated_on"], "message")) + + self.add_repository_labels(eitem) + self.add_metadata_filter_raw(eitem) + return eitem + + def get_rich_links(self, item): + + rich_item = {} + + if item['issues']: + self.extract_issues(item['issues'], item['html']) + + if item['mentions']: + rich_item['mentioned'] = self.extract_mentions(item['mentions']) + + rich_item['url_hostname'] = [] + + if item['urls']: + for url in item['urls']: + url_parsed = urlparse(url['url']) + rich_item['url_hostname'].append('{uri.scheme}://{uri.netloc}/'.format(uri=url_parsed)) + + return rich_item + + def extract_issues(self, issue_pr, html_text): + """Enrich issues or PRs mentioned in the message""" + + links_found = self.HTML_LINK_REGEX.findall(html_text) + for i, entity in enumerate(issue_pr): + if 'repo' in entity.keys() and links_found: + if links_found[i].split('/')[-2] == 'issues': + entity['is_issue'] = entity['repo'] + ' #' + entity['number'] + elif links_found[i].split('/')[-2] == 'pull': + entity['is_pull'] = entity['repo'] + ' #' + entity['number'] + else: + continue + entity['url'] = links_found[i] + + def extract_mentions(self, mentioned): + """Enrich users mentioned in the message""" + + rich_mentions = [] + + for usr in mentioned: + if 'userId' in usr.keys(): + rich_mentions.append({'mentioned_username': usr['screenName'], 'mentioned_userId': usr['userId']}) + + return rich_mentions diff --git a/grimoire_elk/raw/gitter.py b/grimoire_elk/raw/gitter.py new file mode 100644 index 000000000..ccfcba39e --- /dev/null +++ b/grimoire_elk/raw/gitter.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2020 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: +# Nitish Gupta +# + +from .elastic import ElasticOcean +from ..elastic_mapping import Mapping as BaseMapping + + +class Mapping(BaseMapping): + + @staticmethod + def get_elastic_mappings(es_major): + """Get Elasticsearch mapping. + + :param es_major: major version of Elasticsearch, as string + :returns: dictionary with a key, 'items', with the mapping + """ + + mapping = ''' + { + "dynamic":true, + "properties": { + "data": { + "dynamic":false, + "properties": {} + } + } + } + ''' + + return {"items": mapping} + + +class GitterOcean(ElasticOcean): + """Gitter Ocean feeder""" + + mapping = Mapping + + @classmethod + def get_perceval_params_from_url(cls, url): + """ Get the perceval params given a URL for the data source """ + + params = [] + + org = url.split('/')[-2] + room = url.split('/')[-1] + params.append(org) + params.append(room) + return params diff --git a/grimoire_elk/utils.py b/grimoire_elk/utils.py index b2003ba40..1a830fa79 100755 --- a/grimoire_elk/utils.py +++ b/grimoire_elk/utils.py @@ -46,6 +46,7 @@ from perceval.backends.core.git import Git, GitCommand from perceval.backends.core.github import GitHub, GitHubCommand from perceval.backends.core.gitlab import GitLab, GitLabCommand +from perceval.backends.core.gitter import Gitter, GitterCommand from perceval.backends.core.googlehits import GoogleHits, GoogleHitsCommand from perceval.backends.core.groupsio import Groupsio, GroupsioCommand from perceval.backends.core.hyperkitty import HyperKitty, HyperKittyCommand @@ -91,6 +92,7 @@ from .enriched.github import GitHubEnrich from .enriched.github2 import GitHubEnrich2 from .enriched.gitlab import GitLabEnrich +from .enriched.gitter import GitterEnrich from .enriched.google_hits import GoogleHitsEnrich from .enriched.groupsio import GroupsioEnrich from .enriched.hyperkitty import HyperKittyEnrich @@ -130,6 +132,7 @@ from .raw.git import GitOcean from .raw.github import GitHubOcean from .raw.gitlab import GitLabOcean +from .raw.gitter import GitterOcean from .raw.google_hits import GoogleHitsOcean from .raw.graal import GraalOcean from .raw.groupsio import GroupsioOcean @@ -228,6 +231,7 @@ def get_connectors(): "github": [GitHub, GitHubOcean, GitHubEnrich, GitHubCommand], "github2": [GitHub, GitHubOcean, GitHubEnrich2, GitHubCommand], "gitlab": [GitLab, GitLabOcean, GitLabEnrich, GitLabCommand], + "gitter": [Gitter, GitterOcean, GitterEnrich, GitterCommand], "google_hits": [GoogleHits, GoogleHitsOcean, GoogleHitsEnrich, GoogleHitsCommand], "groupsio": [Groupsio, GroupsioOcean, GroupsioEnrich, GroupsioCommand], "hyperkitty": [HyperKitty, HyperKittyOcean, HyperKittyEnrich, HyperKittyCommand], diff --git a/releases/unreleased/add-support-for-gitter.yml b/releases/unreleased/add-support-for-gitter.yml new file mode 100644 index 000000000..45343f25a --- /dev/null +++ b/releases/unreleased/add-support-for-gitter.yml @@ -0,0 +1,8 @@ +--- +title: Add support for Gitter +category: added +author: Nitish Gupta +issue: 820 +notes: Added support for creating raw and enriched + indexes of message from Gitter. The visualizations + and tests for extracted data have also been added. diff --git a/schema/gitter.csv b/schema/gitter.csv new file mode 100644 index 000000000..d2506c6b6 --- /dev/null +++ b/schema/gitter.csv @@ -0,0 +1,40 @@ +name,type,aggregatable,description +author_bot,boolean,true,"True if the given author is identified as a bot." +author_domain,keyword,true,"Author domain from Email. " +author_gender,keyword,true,"Author gender. " +author_id,keyword,true,"Author Id from SortingHat." +author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile." +author_name,keyword,true,"Author name." +author_org_name,keyword,true,"Author organization name from SortingHat profile." +author_user_name,keyword,true,"Author username for the platform" +author_uuid,keyword,true,"Author UUID from SortingHat." +fromUser_bot,boolean,true,"True if the given sender is identified as a bot." +fromUser_domain,keyword,true,"Sender domain from Email. " +fromUser_gender,keyword,true,"Author gender. " +fromUser_id,keyword,true,"Sender Id from SortingHat." +fromUser_multi_org_names,keyword,true,"List of the sender organizations from SortingHat profile." +fromUser_name,keyword,true,"Sender name." +fromUser_org_name,keyword,true,"Sender organization name from SortingHat profile." +fromUser_user_name,keyword,true,"Sender username for the platform" +fromUser_uuid,keyword,true,"Sender UUID from SortingHat." +grimoire_creation_date,date,true,"Message date (when the original author sent the message)." +id,keyword,true,"Message sender user ID for gitter. " +is_gitter_message,boolean,true,"True if the item is a Gitter message. " +issues,list,true,"List of issues mentioned in the message, Empty if no issues were mentioned. " +mentioned,list,true,"List of users mentioned in the message, Empty if no user is mentioned. " +metadata__enriched_on,date,true,"Date when the item was enriched." +metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information." +metadata__gelk_version,keyword,true,"Version of the backend used to enrich information." +metadata__timestamp,date,true,"Date when the item was stored in RAW index." +metadata__updated_on,date,true,"Date when the item was updated in its original data source." +origin,keyword,true,"Original URL where the room was retrieved from." +project_1,keyword,true,"Project (if more than one level is allowed in project hierarchy)" +project,keyword,true,"Project." +readBy,int,true,"Count of users who have read the message" +repository_labels,keyword,true,"Custom repository labels defined by the user." +tag,keyword,true,"Perceval tag." +text_analyzed,keyword,true,"Message body in plain text. " +tz,int,true,"Time of the day the message was sent. " +unread,boolean,true,"True if the message is unread by user, False if read. " +url_hostname,list,true,"List of URL hostnames of the URL(s) mentioned in the message." +uuid,keyword,true,"Perceval UUID." \ No newline at end of file diff --git a/tests/data/gitter.json b/tests/data/gitter.json new file mode 100644 index 000000000..1c8e218d2 --- /dev/null +++ b/tests/data/gitter.json @@ -0,0 +1,307 @@ +[ + { + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "fromUser": { + "avatarUrl": "https://avatars-01.gitter.im/g/u/sample_avatar", + "avatarUrlMedium": "https://pbs.twimg.com/profile_images/sample_avatar_med.jpg", + "avatarUrlSmall": "https://pbs.twimg.com/profile_images/sample_avatar_smol.jpg", + "displayName": "Nitish", + "id": "93n2139n294b2193n3dd93bn", + "url": "/imnitishng", + "username": "imnitishng", + "v": 1 + }, + "html": "Does anyone know if I need to give the resource root URL inside Jenkins", + "id": "1jf932nsdaf122h3292n402y", + "issues": [], + "mentions": [], + "meta": [], + "readBy": 19, + "sent": "2020-04-09T21:18:43.858Z", + "text": "Does anyone know if I need to give the resource root URL inside Jenkins", + "unread": false, + "urls": [], + "v": 1 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "1jf932nsdaf122h3292n402y", + "room": "test_room", + "room_id": "64329hjkfed8723523942345" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784623.073985, + "updated_on": 1586467123.858, + "uuid": "1a8b34f51ac1d831d095c6e39fae0a7e600cfcc7" + }, + { + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "fromUser": { + "avatarUrl": "https://avatars-01.gitter.im/gh/uv/4/imnitishng", + "avatarUrlMedium": "https://avatars1.githubusercontent.com/u/35267629?v=4&s=128", + "avatarUrlSmall": "https://avatars1.githubusercontent.com/u/35267629?v=4&s=60", + "displayName": "Nitish Gupta", + "gv": "4", + "id": "23ndfskijsdf223joisfkm33", + "url": "/imnitishng", + "username": "imnitishng", + "v": 25 + }, + "html": "will try to work on this jenkinsci/docker#939", + "id": "5e90c74af450c25cc8d05649", + "issues": [ + { + "number": "939", + "repo": "jenkinsci/docker" + } + ], + "mentions": [], + "meta": [], + "readBy": 17, + "sent": "2020-04-10T19:21:46.907Z", + "text": "will try to work on this https://github.com/jenkinsci/docker/issues/939", + "unread": false, + "urls": [], + "v": 1 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "5e90c74af450c25cc8d05649", + "room": "test_room", + "room_id": "64329hjkfed8723523942345" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784623.086927, + "updated_on": 1586546506.907, + "uuid": "c42df902ac8277c70afb8296af6bd09b5ffa31d6" + }, + { + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "fromUser": { + "avatarUrl": "https://avatars-01.gitter.im/gh/uv/4/imnitishng", + "avatarUrlMedium": "https://avatars1.githubusercontent.com/u/35267629?v=4&s=128", + "avatarUrlSmall": "https://avatars1.githubusercontent.com/u/35267629?v=4&s=60", + "displayName": "Nitish Gupta", + "gv": "4", + "id": "5d18a7dcd73408ce4fc4df7e", + "url": "/imnitishng", + "username": "imnitishng", + "v": 25 + }, + "html": "Wanted to know more about jenkinsci/jenkins#3861 and jenkinsci/jenkins#3863", + "id": "5e879554f1207e2adc0cfe53", + "issues": [ + { + "number": "3861", + "repo": "jenkinsci/jenkins" + }, + { + "number": "3863", + "repo": "jenkinsci/jenkins" + } + ], + "mentions": [], + "meta": [], + "readBy": 34, + "sent": "2020-04-03T19:58:12.722Z", + "text": "Wanted to know more about https://github.com/jenkinsci/jenkins/pull/3861 and https://github.com/jenkinsci/jenkins/pull/3863", + "unread": false, + "urls": [], + "v": 1 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "5e879554f1207e2adc0cfe53", + "room": "test_room", + "room_id": "64329hjkfed8723523942345" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784624.304634, + "updated_on": 1585943892.722, + "uuid": "e8266108628134ae9ba2b1cf08ea3dffefebf8e3" +}, +{ + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "fromUser": { + "avatarUrl": "https://avatars-02.gitter.im/gh/uv/4/OrangeDog", + "avatarUrlMedium": "https://avatars1.githubusercontent.com/u/675056?v=4&s=128", + "avatarUrlSmall": "https://avatars1.githubusercontent.com/u/675056?v=4&s=60", + "displayName": "Nitish Kumar", + "gv": "4", + "id": "876767fd16b6c7089cb7c112", + "url": "/imnitish", + "username": "imnitish", + "v": 16 + }, + "html": "@IbrahimPatel89 don't. There is no reason to.", + "id": "5e87070981a582042e974777", + "issues": [], + "mentions": [ + { + "screenName": "IbrahimPatel89", + "userId": "5e870448d73408ce4fdf18b6", + "userIds": [] + } + ], + "meta": [], + "readBy": 28, + "sent": "2020-04-03T09:51:05.823Z", + "text": "@IbrahimPatel89 don't. There is no reason to.", + "unread": false, + "urls": [], + "v": 1 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "5e87070981a582042e9747e6", + "room": "test_room", + "room_id": "576d08b1c2f0db084a1fb0c9" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784624.297063, + "updated_on": 1585907465.823, + "uuid": "42ca765de9425d85ac2aebb6750701b3c5f85dcc" + }, + { + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "editedAt": "2020-04-02T14:00:05.484Z", + "fromUser": { + "avatarUrl": "https://avatars-01.gitter.im/gh/uv/4/RicoToothless", + "avatarUrlMedium": "https://avatars2.githubusercontent.com/u/39393540?v=4&s=128", + "avatarUrlSmall": "https://avatars2.githubusercontent.com/u/39393540?v=4&s=60", + "displayName": "RicoToothless", + "gv": "4", + "id": "5bd5a1c3d73408ce4fad070d", + "url": "/RicoToothless", + "username": "RicoToothless", + "v": 11 + }, + "html": "Did anyone know kubernetes plugin yamlTemplate only can used by Jenkinsfile. But can\u2019t add in jenkins config? (I add by helm chart values)
https://github.com/helm/charts/blob/master/stable/jenkins/values.yaml#L461", + "id": "5e85efad1742080c5dfffab0", + "issues": [], + "mentions": [], + "meta": [], + "readBy": 24, + "sent": "2020-04-02T13:59:09.312Z", + "text": "Did anyone know kubernetes plugin `yamlTemplate` only can used by Jenkinsfile. But can\u2019t add in jenkins config? (I add by helm chart values)\nhttps://github.com/helm/charts/blob/master/stable/jenkins/values.yaml#L461", + "unread": true, + "urls": [ + { + "url": "https://github.com/helm/charts/blob/master/stable/jenkins/values.yaml#L461" + } + ], + "v": 2 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "5e85efad1742080c5dfffab0", + "room": "test_room", + "room_id": "576d08b1c2f0db084a1fb0c9" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784624.286332, + "updated_on": 1585835949.312, + "uuid": "2bdae2762b5a132669869048a0f07c0bef7f15cb" + }, + { + "backend_name": "Gitter", + "backend_version": "0.1.0", + "category": "message", + "classified_fields_filtered": null, + "data": { + "editedAt": "2020-04-05T11:00:05.484Z", + "fromUser": { + "avatarUrl": "https://avatars-01.gitter.im/gh/uv/4/RicoToothless", + "avatarUrlMedium": "https://avatars2.githubusercontent.com/u/39393540?v=4&s=128", + "avatarUrlSmall": "https://avatars2.githubusercontent.com/u/39393540?v=4&s=60", + "displayName": "RicoToothless", + "gv": "4", + "id": "5bd5a1c3d73408ce4fad070d", + "url": "/RicoToothless", + "username": "RicoToothless", + "v": 11 + }, + "html": "Wanted to know more about jenkinsci/jenkins#3861 and jenkinsci/jenkins#3863", + "id": "5e85efad1742650c5dfffab0", + "issues": [ + { + "number": "3861", + "repo": "jenkinsci/jenkins" + }, + { + "number": "3863", + "repo": "jenkinsci/jenkins" + } + ], + "mentions": [ + { + "screenName": "IbrahimPatel89", + "userId": "5e870448d73408ce4fdf18b6", + "userIds": [] + }, + { + "screenName": "devanshu123", + "userId": "5e870448d73408ce743924hg", + "userIds": [] + } + + ], + "meta": [], + "readBy": 1, + "sent": "2020-04-02T13:59:09.312Z", + "text": "Did anyone know kubernetes plugin `yamlTemplate` only can used by Jenkinsfile. But can\u2019t add in jenkins config? (I add by helm chart values)\nhttps://github.com/helm/charts/blob/master/stable/jenkins/values.yaml#L461", + "unread": true, + "urls": [ + { + "url": "https://github.com/helm/charts/blob/master/stable/jenkins/values.yaml#L461" + }, + { + "url": "https://facebook.com/abcd" + } + ], + "v": 2 + }, + "origin": "https://gitter.im/test_org/test_room", + "perceval_version": "0.13.0", + "search_fields": { + "group": "test_org", + "item_id": "5e85efad1742080c5dfffab0", + "room": "test_room", + "room_id": "576d08b1c2f0db084a1fb0c9" + }, + "tag": "https://gitter.im/test_org/test_room", + "timestamp": 1586784624.286332, + "updated_on": 1585835949.312, + "uuid": "2bdae2762b5a132669869048a0f07c0bef777667" + } +] \ No newline at end of file diff --git a/tests/data/projects-release.json b/tests/data/projects-release.json index 38b991855..d9a805c95 100644 --- a/tests/data/projects-release.json +++ b/tests/data/projects-release.json @@ -41,6 +41,9 @@ "gitlab": [ "https://gitlab.com/inkscape/inkscape-web" ], + "gitter": [ + "https://gitter.im/test_org/test_room" + ], "groupsio": [ "updates" ], diff --git a/tests/test_gitter.py b/tests/test_gitter.py new file mode 100644 index 000000000..6836c41c4 --- /dev/null +++ b/tests/test_gitter.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2019 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: +# Nitish Gupta +# +import logging +import unittest + +import requests +from base import TestBaseBackend +from grimoire_elk.enriched.utils import REPO_LABELS + + +class TestGitter(TestBaseBackend): + """Test Gitter backend""" + + connector = "gitter" + ocean_index = "test_" + connector + enrich_index = "test_" + connector + "_enrich" + + def test_has_identities(self): + """Test value of has_identities method""" + + enrich_backend = self.connectors[self.connector][2]() + self.assertTrue(enrich_backend.has_identities()) + + def test_items_to_raw(self): + """Test whether JSON items are properly inserted into ES""" + + result = self._test_items_to_raw() + + self.assertEqual(result['items'], 6) + self.assertEqual(result['raw'], 6) + + def test_raw_to_enrich(self): + """Test whether the raw index is properly enriched""" + + result = self._test_raw_to_enrich() + self.assertEqual(result['raw'], 6) + self.assertEqual(result['enrich'], 6) + + enrich_backend = self.connectors[self.connector][2]() + + item = self.items[0] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '1jf932nsdaf122h3292n402y') + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 19) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertEqual(eitem['unread'], 0) + self.assertEqual(eitem['uuid'], '1a8b34f51ac1d831d095c6e39fae0a7e600cfcc7') + + item = self.items[1] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '5e90c74af450c25cc8d05649') + self.assertEqual(eitem['is_gitter_message'], 1) + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 17) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertIn('issues', eitem) + self.assertEqual(eitem['issues'][0]['is_issue'], 'jenkinsci/docker #939') + self.assertEqual(eitem['issues'][0]['url'], 'https://github.com/jenkinsci/docker/issues/939') + self.assertEqual(eitem['issues'][0]['repo'], 'jenkinsci/docker') + self.assertEqual(eitem['unread'], 0) + self.assertEqual(eitem['uuid'], 'c42df902ac8277c70afb8296af6bd09b5ffa31d6') + + item = self.items[2] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '5e879554f1207e2adc0cfe53') + self.assertEqual(eitem['is_gitter_message'], 1) + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 34) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertIn('issues', eitem) + self.assertEqual(eitem['issues'][0]['is_pull'], 'jenkinsci/jenkins #3861') + self.assertEqual(eitem['issues'][0]['url'], 'https://github.com/jenkinsci/jenkins/pull/3861') + self.assertEqual(eitem['issues'][0]['repo'], 'jenkinsci/jenkins') + self.assertEqual(eitem['unread'], 0) + self.assertEqual(eitem['uuid'], 'e8266108628134ae9ba2b1cf08ea3dffefebf8e3') + + item = self.items[3] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '5e87070981a582042e974777') + self.assertEqual(eitem['is_gitter_message'], 1) + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 28) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertIn('mentioned', eitem) + self.assertEqual(eitem['mentioned'][0]['mentioned_username'], 'IbrahimPatel89') + self.assertEqual(eitem['mentioned'][0]['mentioned_userId'], '5e870448d73408ce4fdf18b6') + self.assertEqual(eitem['unread'], 0) + self.assertEqual(eitem['uuid'], '42ca765de9425d85ac2aebb6750701b3c5f85dcc') + + item = self.items[4] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '5e85efad1742080c5dfffab0') + self.assertEqual(eitem['is_gitter_message'], 1) + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 24) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertIn('url_hostname', eitem) + self.assertEqual(eitem['url_hostname'][0], 'https://github.com/') + self.assertEqual(eitem['unread'], 1) + self.assertEqual(eitem['uuid'], '2bdae2762b5a132669869048a0f07c0bef7f15cb') + + item = self.items[5] + eitem = enrich_backend.get_rich_item(item) + self.assertEqual(eitem['id'], '5e85efad1742650c5dfffab0') + self.assertEqual(eitem['is_gitter_message'], 1) + self.assertEqual(eitem['origin'], 'https://gitter.im/test_org/test_room') + self.assertEqual(eitem['readBy'], 1) + self.assertEqual(eitem['repository_labels'], None) + self.assertEqual(eitem['tag'], 'https://gitter.im/test_org/test_room') + self.assertIn('text_analyzed', eitem) + self.assertIn('url_hostname', eitem) + self.assertIn('mentioned', eitem) + self.assertIn('issues', eitem) + self.assertEqual(eitem['unread'], 1) + self.assertEqual(eitem['uuid'], '2bdae2762b5a132669869048a0f07c0bef777667') + self.assertEqual(eitem['url_hostname'][1], 'https://facebook.com/') + self.assertEqual(eitem['issues'][1]['is_pull'], 'jenkinsci/jenkins #3863') + self.assertEqual(eitem['mentioned'][1]['mentioned_username'], 'devanshu123') + + def test_enrich_repo_labels(self): + """Test whether the field REPO_LABELS is present in the enriched items""" + + self._test_raw_to_enrich() + enrich_backend = self.connectors[self.connector][2]() + + for item in self.items: + eitem = enrich_backend.get_rich_item(item) + self.assertIn(REPO_LABELS, eitem) + + def test_raw_to_enrich_sorting_hat(self): + """Test enrich with SortingHat""" + + result = self._test_raw_to_enrich(sortinghat=True) + self.assertEqual(result['raw'], 6) + self.assertEqual(result['enrich'], 6) + + enrich_backend = self.connectors[self.connector][2]() + enrich_backend.sortinghat = True + + url = self.es_con + "/" + self.enrich_index + "/_search" + response = enrich_backend.requests.get(url, verify=False).json() + for hit in response['hits']['hits']: + source = hit['_source'] + if 'author_uuid' in source: + self.assertIn('author_domain', source) + self.assertIn('author_gender', source) + self.assertIn('author_gender_acc', source) + self.assertIn('author_org_name', source) + self.assertIn('author_bot', source) + self.assertIn('author_name', source) + self.assertIn('author_multi_org_names', source) + self.assertIn('fromUser_gender', source) + self.assertIn('fromUser_gender_acc', source) + self.assertIn('fromUser_multi_org_names', source) + self.assertIn('fromUser_name', source) + self.assertIn('fromUser_user_name', source) + self.assertIn('fromUser_uuid', source) + + def test_raw_to_enrich_projects(self): + """Test enrich with Projects""" + + result = self._test_raw_to_enrich(projects=True) + self.assertEqual(result['raw'], 6) + self.assertEqual(result['enrich'], 6) + + res = requests.get(self.es_con + "/" + self.enrich_index + "/_search", verify=False) + for eitem in res.json()['hits']['hits']: + self.assertEqual(eitem['_source']['project'], "grimoire") + + def test_refresh_identities(self): + """Test refresh identities""" + + result = self._test_refresh_identities() + # ... ? + + def test_refresh_project(self): + """Test refresh project field for all sources""" + + result = self._test_refresh_project() + # ... ? + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("requests").setLevel(logging.WARNING) + unittest.main(warnings='ignore')