From 0095797c92c0d2abee3ace7b831ec3514e315158 Mon Sep 17 00:00:00 2001 From: Uditi Mehta Date: Mon, 21 Oct 2024 09:58:42 -0400 Subject: [PATCH 1/4] Add OOPSpam and Akismet metrics to spam report --- osf/external/askismet/client.py | 24 +++++++++++ osf/external/oopspam/client.py | 24 +++++++++++ osf/metrics/reporters/spam_count.py | 15 +++++++ osf/metrics/reports.py | 4 ++ osf_tests/external/akismet/test_akismet.py | 43 +++++++++++++++++++ osf_tests/external/oopspam/test_oopspam.py | 42 ++++++++++++++++++ osf_tests/metrics/test_spam_count_reporter.py | 43 +++++++++++++++++++ 7 files changed, 195 insertions(+) create mode 100644 osf_tests/metrics/test_spam_count_reporter.py diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py index 877f7ec4c23..4db28a9d419 100644 --- a/osf/external/askismet/client.py +++ b/osf/external/askismet/client.py @@ -133,3 +133,27 @@ def submit_ham(self, user_ip, user_agent, **kwargs): ) if res.status_code != requests.codes.ok: raise AkismetClientError(reason=res.text) + + def get_flagged_count(self, start_date, end_date): + from osf.models import NodeLog + + flagged_count = NodeLog.objects.filter( + action=NodeLog.FLAG_SPAM, + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='akismet' + ).count() + + return flagged_count + + def get_hammed_count(self, start_date, end_date): + from osf.models import NodeLog + + hammed_count = NodeLog.objects.filter( + action=NodeLog.CONFIRM_HAM, + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='akismet' + ).count() + + return hammed_count diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py index ef22864a43d..b73120600bb 100644 --- a/osf/external/oopspam/client.py +++ b/osf/external/oopspam/client.py @@ -45,3 +45,27 @@ def check_content(self, user_ip, content, **kwargs): # OOPSpam returns a spam score out of 6. 3 or higher indicates spam return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json + + def get_flagged_count(self, start_date, end_date): + from osf.models import NodeLog + + flagged_count = NodeLog.objects.filter( + action=NodeLog.FLAG_SPAM, + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='oopspam' + ).count() + + return flagged_count + + def get_hammed_count(self, start_date, end_date): + from osf.models import NodeLog + + hammed_count = NodeLog.objects.filter( + action=NodeLog.CONFIRM_HAM, + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='oopspam' + ).count() + + return hammed_count diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index 54feae8bee5..d2e4c581ae4 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -4,6 +4,8 @@ from ._base import MonthlyReporter from osf.models import PreprintLog, NodeLog from osf.models.spam import SpamStatus +from osf.external.oopspam.client import OOPSpamClient +from osf.external.askismet.client import AkismetClient class SpamCountReporter(MonthlyReporter): @@ -12,6 +14,15 @@ def report(self, report_yearmonth): target_month = report_yearmonth.target_month() next_month = report_yearmonth.next_month() + oopspam_client = OOPSpamClient() + akismet_client = AkismetClient() + + oopspam_flagged = oopspam_client.get_flagged_count(target_month, next_month) + oopspam_hammed = oopspam_client.get_hammed_count(target_month, next_month) + + akismet_flagged = akismet_client.get_flagged_count(target_month, next_month) + akismet_hammed = akismet_client.get_hammed_count(target_month, next_month) + report = SpamSummaryReport( report_yearmonth=str(report_yearmonth), # Node Log entries @@ -33,6 +44,10 @@ def report(self, report_yearmonth): created__lt=next_month, node__type='osf.node', ).count(), + oopspam_flagged=oopspam_flagged, + oopspam_hammed=oopspam_hammed, + akismet_flagged=akismet_flagged, + akismet_hammed=akismet_hammed, # Registration Log entries registration_confirmed_spam=NodeLog.objects.filter( action=NodeLog.CONFIRM_SPAM, diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 609e79fc324..b48b6b215da 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -214,3 +214,7 @@ class SpamSummaryReport(MonthlyReport): preprint_flagged = metrics.Integer() user_marked_as_spam = metrics.Integer() user_marked_as_ham = metrics.Integer() + oopspam_flagged = metrics.Integer() + oopspam_hammed = metrics.Integer() + akismet_flagged = metrics.Integer() + akismet_hammed = metrics.Integer() diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py index db3c5d0d584..b700ea15d27 100644 --- a/osf_tests/external/akismet/test_akismet.py +++ b/osf_tests/external/akismet/test_akismet.py @@ -237,3 +237,46 @@ def test_meetings_skip_spam_check(self, mock_akismet, user, node_in_conference, node.check_spam(user, {'title'}, request_headers) node.refresh_from_db() assert node.spam_status == SpamStatus.FLAGGED + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_flagged_count(self, mock_filter, user): + from osf.external.askismet.client import AkismetClient + from datetime import datetime + + mock_filter.return_value.count.return_value = 7 + + client = AkismetClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + flagged_count = client.get_flagged_count(start_date, end_date) + + mock_filter.assert_called_with( + action='flag_spam', + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='akismet' + ) + assert flagged_count == 7 + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_hammed_count(self, mock_filter, user): + from osf.external.askismet.client import AkismetClient + from datetime import datetime + + mock_filter.return_value.count.return_value = 4 + + client = AkismetClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + hammed_count = client.get_hammed_count(start_date, end_date) + + mock_filter.assert_called_with( + action='confirm_ham', + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='akismet' + ) + assert hammed_count == 4 + diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py index 36740148116..c02c23a39aa 100644 --- a/osf_tests/external/oopspam/test_oopspam.py +++ b/osf_tests/external/oopspam/test_oopspam.py @@ -125,3 +125,45 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers): ) assert user.spam_status == SpamStatus.UNKNOWN + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_flagged_count(self, mock_filter, user): + from osf.external.oopspam.client import OOPSpamClient + from datetime import datetime + + mock_filter.return_value.count.return_value = 5 + + client = OOPSpamClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + flagged_count = client.get_flagged_count(start_date, end_date) + + mock_filter.assert_called_with( + action='flag_spam', + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='oopspam' + ) + assert flagged_count == 5 + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_hammed_count(self, mock_filter, user): + from osf.external.oopspam.client import OOPSpamClient + from datetime import datetime + + mock_filter.return_value.count.return_value = 3 + + client = OOPSpamClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + hammed_count = client.get_hammed_count(start_date, end_date) + + mock_filter.assert_called_with( + action='confirm_ham', + created__gt=start_date, + created__lt=end_date, + spam_data__who_flagged='oopspam' + ) + assert hammed_count == 3 diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py new file mode 100644 index 00000000000..e6b54bd5842 --- /dev/null +++ b/osf_tests/metrics/test_spam_count_reporter.py @@ -0,0 +1,43 @@ +import datetime +import pytest +from osf.metrics.reporters.spam_count import SpamCountReporter +from osf.external.oopspam.client import OOPSpamClient +from osf.external.askismet.client import AkismetClient + +@pytest.fixture +def mock_oopspam_client(mocker): + mock = mocker.patch('osf.external.oopspam.client.OOPSpamClient') + mock.get_flagged_count.return_value = 10 + mock.get_hammed_count.return_value = 5 + return mock + +@pytest.fixture +def mock_akismet_client(mocker): + mock = mocker.patch('osf.external.askismet.client.AkismetClient') + mock.get_flagged_count.return_value = 20 + mock.get_hammed_count.return_value = 10 + return mock + +@pytest.fixture +def mock_nodelog_model(mocker): + mock = mocker.patch('osf.models.NodeLog') + mock.filter.return_value.count.return_value = 100 + return mock + +@pytest.fixture +def mock_preprintlog_model(mocker): + mock = mocker.patch('osf.models.PreprintLog') + mock.filter.return_value.count.return_value = 50 + return mock + +def test_spam_count_reporter(mock_oopspam_client, mock_akismet_client, mock_nodelog_model, mock_preprintlog_model): + report_month = datetime.datetime(2024, 10, 1) + reporter = SpamCountReporter() + report = reporter.report(report_month) + + assert report[0].oopspam_flagged == 10 + assert report[0].oopspam_hammed == 5 + assert report[0].akismet_flagged == 20 + assert report[0].akismet_hammed == 10 + assert report[0].node_confirmed_spam == 100 + assert report[0].preprint_confirmed_spam == 50 From 1c352761ba695052cc67ed58b4f5ef3310cbca7c Mon Sep 17 00:00:00 2001 From: Uditi Mehta Date: Mon, 28 Oct 2024 13:39:06 -0400 Subject: [PATCH 2/4] Add spam/ham metrics for OOPSpam and Akismet with FK join fix, test cleanup --- osf/external/askismet/client.py | 4 +- osf/external/oopspam/client.py | 4 +- osf_tests/external/akismet/test_akismet.py | 15 ++--- osf_tests/external/oopspam/test_oopspam.py | 14 ++--- osf_tests/metrics/test_spam_count_reporter.py | 56 +++++++++---------- 5 files changed, 40 insertions(+), 53 deletions(-) diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py index 4db28a9d419..61a79abf07b 100644 --- a/osf/external/askismet/client.py +++ b/osf/external/askismet/client.py @@ -141,7 +141,7 @@ def get_flagged_count(self, start_date, end_date): action=NodeLog.FLAG_SPAM, created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='akismet' + node__spam_data__who_flagged__in=['akismet', 'both'] ).count() return flagged_count @@ -153,7 +153,7 @@ def get_hammed_count(self, start_date, end_date): action=NodeLog.CONFIRM_HAM, created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='akismet' + node__spam_data__who_flagged__in=['akismet', 'both'] ).count() return hammed_count diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py index b73120600bb..14ee4267498 100644 --- a/osf/external/oopspam/client.py +++ b/osf/external/oopspam/client.py @@ -53,7 +53,7 @@ def get_flagged_count(self, start_date, end_date): action=NodeLog.FLAG_SPAM, created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='oopspam' + node__spam_data__who_flagged__in=['oopspam', 'both'] ).count() return flagged_count @@ -65,7 +65,7 @@ def get_hammed_count(self, start_date, end_date): action=NodeLog.CONFIRM_HAM, created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='oopspam' + node__spam_data__who_flagged__in=['oopspam', 'both'] ).count() return hammed_count diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py index b700ea15d27..46729e485e8 100644 --- a/osf_tests/external/akismet/test_akismet.py +++ b/osf_tests/external/akismet/test_akismet.py @@ -243,40 +243,33 @@ def test_get_flagged_count(self, mock_filter, user): from osf.external.askismet.client import AkismetClient from datetime import datetime - mock_filter.return_value.count.return_value = 7 - client = AkismetClient() start_date = datetime(2024, 10, 1) end_date = datetime(2024, 10, 31) - flagged_count = client.get_flagged_count(start_date, end_date) + client.get_flagged_count(start_date, end_date) mock_filter.assert_called_with( action='flag_spam', created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='akismet' + node__spam_data__who_flagged__in=['akismet', 'both'] ) - assert flagged_count == 7 @mock.patch('osf.models.NodeLog.objects.filter') def test_get_hammed_count(self, mock_filter, user): from osf.external.askismet.client import AkismetClient from datetime import datetime - mock_filter.return_value.count.return_value = 4 - client = AkismetClient() start_date = datetime(2024, 10, 1) end_date = datetime(2024, 10, 31) - hammed_count = client.get_hammed_count(start_date, end_date) + client.get_hammed_count(start_date, end_date) mock_filter.assert_called_with( action='confirm_ham', created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='akismet' + node__spam_data__who_flagged__in=['akismet', 'both'] ) - assert hammed_count == 4 - diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py index c02c23a39aa..96656ecc6da 100644 --- a/osf_tests/external/oopspam/test_oopspam.py +++ b/osf_tests/external/oopspam/test_oopspam.py @@ -131,39 +131,33 @@ def test_get_flagged_count(self, mock_filter, user): from osf.external.oopspam.client import OOPSpamClient from datetime import datetime - mock_filter.return_value.count.return_value = 5 - client = OOPSpamClient() start_date = datetime(2024, 10, 1) end_date = datetime(2024, 10, 31) - flagged_count = client.get_flagged_count(start_date, end_date) + client.get_flagged_count(start_date, end_date) mock_filter.assert_called_with( action='flag_spam', created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='oopspam' + node__spam_data__who_flagged__in=['oopspam', 'both'] ) - assert flagged_count == 5 @mock.patch('osf.models.NodeLog.objects.filter') def test_get_hammed_count(self, mock_filter, user): from osf.external.oopspam.client import OOPSpamClient from datetime import datetime - mock_filter.return_value.count.return_value = 3 - client = OOPSpamClient() start_date = datetime(2024, 10, 1) end_date = datetime(2024, 10, 31) - hammed_count = client.get_hammed_count(start_date, end_date) + client.get_hammed_count(start_date, end_date) mock_filter.assert_called_with( action='confirm_ham', created__gt=start_date, created__lt=end_date, - spam_data__who_flagged='oopspam' + node__spam_data__who_flagged__in=['oopspam', 'both'] ) - assert hammed_count == 3 diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py index e6b54bd5842..93fe1854358 100644 --- a/osf_tests/metrics/test_spam_count_reporter.py +++ b/osf_tests/metrics/test_spam_count_reporter.py @@ -1,43 +1,43 @@ -import datetime import pytest +from datetime import datetime from osf.metrics.reporters.spam_count import SpamCountReporter -from osf.external.oopspam.client import OOPSpamClient -from osf.external.askismet.client import AkismetClient +from unittest import mock +from osf.metrics.utils import YearMonth +from osf_tests.factories import NodeLogFactory, NodeFactory @pytest.fixture -def mock_oopspam_client(mocker): - mock = mocker.patch('osf.external.oopspam.client.OOPSpamClient') - mock.get_flagged_count.return_value = 10 - mock.get_hammed_count.return_value = 5 - return mock +def mock_oopspam_client(): + with mock.patch('osf.external.oopspam.client.OOPSpamClient') as mock_client: + instance = mock_client.return_value + instance.get_flagged_count.return_value = 10 + instance.get_hammed_count.return_value = 5 + yield instance @pytest.fixture -def mock_akismet_client(mocker): - mock = mocker.patch('osf.external.askismet.client.AkismetClient') - mock.get_flagged_count.return_value = 20 - mock.get_hammed_count.return_value = 10 - return mock +def mock_akismet_client(): + with mock.patch('osf.external.askismet.client.AkismetClient') as mock_client: + instance = mock_client.return_value + instance.get_flagged_count.return_value = 20 + instance.get_hammed_count.return_value = 10 + yield instance -@pytest.fixture -def mock_nodelog_model(mocker): - mock = mocker.patch('osf.models.NodeLog') - mock.filter.return_value.count.return_value = 100 - return mock +@pytest.mark.django_db +def test_spam_count_reporter(): + start_date = datetime(2024, 10, 1) -@pytest.fixture -def mock_preprintlog_model(mocker): - mock = mocker.patch('osf.models.PreprintLog') - mock.filter.return_value.count.return_value = 50 - return mock + oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'}) + akismet_node = NodeFactory(spam_data={'who_flagged': 'akismet'}) + + NodeLogFactory.create_batch(10, action='flag_spam', created=start_date, node=oopspam_node) + NodeLogFactory.create_batch(5, action='confirm_ham', created=start_date, node=oopspam_node) + NodeLogFactory.create_batch(20, action='flag_spam', created=start_date, node=akismet_node) + NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node) -def test_spam_count_reporter(mock_oopspam_client, mock_akismet_client, mock_nodelog_model, mock_preprintlog_model): - report_month = datetime.datetime(2024, 10, 1) + report_yearmonth = YearMonth(2024, 10) reporter = SpamCountReporter() - report = reporter.report(report_month) + report = reporter.report(report_yearmonth) assert report[0].oopspam_flagged == 10 assert report[0].oopspam_hammed == 5 assert report[0].akismet_flagged == 20 assert report[0].akismet_hammed == 10 - assert report[0].node_confirmed_spam == 100 - assert report[0].preprint_confirmed_spam == 50 From 771830bbb285f99d9a7ecfdf7365baad5762b275 Mon Sep 17 00:00:00 2001 From: Uditi Mehta Date: Mon, 4 Nov 2024 13:45:00 -0500 Subject: [PATCH 3/4] Add private spam metrics report with preprint inclusion --- osf/external/askismet/client.py | 24 +++++----- osf/external/oopspam/client.py | 24 +++++----- osf/metrics/reporters/private_spam_metrics.py | 28 ++++++++++++ osf/metrics/reporters/spam_count.py | 16 ------- osf/metrics/reports.py | 4 -- osf_tests/metrics/test_spam_count_reporter.py | 45 +++++++++---------- 6 files changed, 76 insertions(+), 65 deletions(-) create mode 100644 osf/metrics/reporters/private_spam_metrics.py diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py index 61a79abf07b..e8f495a71f4 100644 --- a/osf/external/askismet/client.py +++ b/osf/external/askismet/client.py @@ -134,26 +134,30 @@ def submit_ham(self, user_ip, user_agent, **kwargs): if res.status_code != requests.codes.ok: raise AkismetClientError(reason=res.text) - def get_flagged_count(self, start_date, end_date): - from osf.models import NodeLog + def get_flagged_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog - flagged_count = NodeLog.objects.filter( - action=NodeLog.FLAG_SPAM, + log_model = NodeLog if category == 'node' else PreprintLog + + flagged_count = log_model.objects.filter( + action=log_model.FLAG_SPAM, created__gt=start_date, created__lt=end_date, - node__spam_data__who_flagged__in=['akismet', 'both'] + **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']} ).count() return flagged_count - def get_hammed_count(self, start_date, end_date): - from osf.models import NodeLog + def get_hammed_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + log_model = NodeLog if category == 'node' else PreprintLog - hammed_count = NodeLog.objects.filter( - action=NodeLog.CONFIRM_HAM, + hammed_count = log_model.objects.filter( + action=log_model.CONFIRM_HAM, created__gt=start_date, created__lt=end_date, - node__spam_data__who_flagged__in=['akismet', 'both'] + **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']} ).count() return hammed_count diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py index 14ee4267498..d7562d94990 100644 --- a/osf/external/oopspam/client.py +++ b/osf/external/oopspam/client.py @@ -46,26 +46,30 @@ def check_content(self, user_ip, content, **kwargs): # OOPSpam returns a spam score out of 6. 3 or higher indicates spam return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json - def get_flagged_count(self, start_date, end_date): - from osf.models import NodeLog + def get_flagged_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog - flagged_count = NodeLog.objects.filter( - action=NodeLog.FLAG_SPAM, + log_model = NodeLog if category == 'node' else PreprintLog + + flagged_count = log_model.objects.filter( + action=log_model.FLAG_SPAM, created__gt=start_date, created__lt=end_date, - node__spam_data__who_flagged__in=['oopspam', 'both'] + **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']} ).count() return flagged_count - def get_hammed_count(self, start_date, end_date): - from osf.models import NodeLog + def get_hammed_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + log_model = NodeLog if category == 'node' else PreprintLog - hammed_count = NodeLog.objects.filter( - action=NodeLog.CONFIRM_HAM, + hammed_count = log_model.objects.filter( + action=log_model.CONFIRM_HAM, created__gt=start_date, created__lt=end_date, - node__spam_data__who_flagged__in=['oopspam', 'both'] + **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']} ).count() return hammed_count diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py new file mode 100644 index 00000000000..d6beba3a804 --- /dev/null +++ b/osf/metrics/reporters/private_spam_metrics.py @@ -0,0 +1,28 @@ +from osf.metrics.reports import SpamSummaryReport +from osf.external.oopspam.client import OOPSpamClient +from osf.external.askismet.client import AkismetClient +from ._base import MonthlyReporter + +class PrivateSpamMetricsReporter(MonthlyReporter): + report_name = 'Private Spam Metrics' + + def report(self, report_yearmonth): + target_month = report_yearmonth.target_month() + next_month = report_yearmonth.next_month() + + oopspam_client = OOPSpamClient() + akismet_client = AkismetClient() + + report = SpamSummaryReport( + report_yearmonth=str(report_yearmonth), + node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'), + node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'), + node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'), + node_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='node'), + preprint_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='preprint'), + preprint_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='preprint'), + preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'), + preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint') + ) + + return [report] diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index d2e4c581ae4..f893db2bec2 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -4,9 +4,6 @@ from ._base import MonthlyReporter from osf.models import PreprintLog, NodeLog from osf.models.spam import SpamStatus -from osf.external.oopspam.client import OOPSpamClient -from osf.external.askismet.client import AkismetClient - class SpamCountReporter(MonthlyReporter): @@ -14,15 +11,6 @@ def report(self, report_yearmonth): target_month = report_yearmonth.target_month() next_month = report_yearmonth.next_month() - oopspam_client = OOPSpamClient() - akismet_client = AkismetClient() - - oopspam_flagged = oopspam_client.get_flagged_count(target_month, next_month) - oopspam_hammed = oopspam_client.get_hammed_count(target_month, next_month) - - akismet_flagged = akismet_client.get_flagged_count(target_month, next_month) - akismet_hammed = akismet_client.get_hammed_count(target_month, next_month) - report = SpamSummaryReport( report_yearmonth=str(report_yearmonth), # Node Log entries @@ -44,10 +32,6 @@ def report(self, report_yearmonth): created__lt=next_month, node__type='osf.node', ).count(), - oopspam_flagged=oopspam_flagged, - oopspam_hammed=oopspam_hammed, - akismet_flagged=akismet_flagged, - akismet_hammed=akismet_hammed, # Registration Log entries registration_confirmed_spam=NodeLog.objects.filter( action=NodeLog.CONFIRM_SPAM, diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index b48b6b215da..609e79fc324 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -214,7 +214,3 @@ class SpamSummaryReport(MonthlyReport): preprint_flagged = metrics.Integer() user_marked_as_spam = metrics.Integer() user_marked_as_ham = metrics.Integer() - oopspam_flagged = metrics.Integer() - oopspam_hammed = metrics.Integer() - akismet_flagged = metrics.Integer() - akismet_hammed = metrics.Integer() diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py index 93fe1854358..db44dc848ff 100644 --- a/osf_tests/metrics/test_spam_count_reporter.py +++ b/osf_tests/metrics/test_spam_count_reporter.py @@ -1,28 +1,12 @@ import pytest from datetime import datetime -from osf.metrics.reporters.spam_count import SpamCountReporter -from unittest import mock +from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter from osf.metrics.utils import YearMonth from osf_tests.factories import NodeLogFactory, NodeFactory - -@pytest.fixture -def mock_oopspam_client(): - with mock.patch('osf.external.oopspam.client.OOPSpamClient') as mock_client: - instance = mock_client.return_value - instance.get_flagged_count.return_value = 10 - instance.get_hammed_count.return_value = 5 - yield instance - -@pytest.fixture -def mock_akismet_client(): - with mock.patch('osf.external.askismet.client.AkismetClient') as mock_client: - instance = mock_client.return_value - instance.get_flagged_count.return_value = 20 - instance.get_hammed_count.return_value = 10 - yield instance +from unittest.mock import patch @pytest.mark.django_db -def test_spam_count_reporter(): +def test_private_spam_metrics_reporter(): start_date = datetime(2024, 10, 1) oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'}) @@ -34,10 +18,21 @@ def test_spam_count_reporter(): NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node) report_yearmonth = YearMonth(2024, 10) - reporter = SpamCountReporter() - report = reporter.report(report_yearmonth) - assert report[0].oopspam_flagged == 10 - assert report[0].oopspam_hammed == 5 - assert report[0].akismet_flagged == 20 - assert report[0].akismet_hammed == 10 + with patch('osf.external.oopspam.client.OOPSpamClient.get_flagged_count') as mock_oopspam_get_flagged_count, \ + patch('osf.external.oopspam.client.OOPSpamClient.get_hammed_count') as mock_oopspam_get_hammed_count, \ + patch('osf.external.askismet.client.AkismetClient.get_flagged_count') as mock_akismet_get_flagged_count, \ + patch('osf.external.askismet.client.AkismetClient.get_hammed_count') as mock_akismet_get_hammed_count: + + mock_oopspam_get_flagged_count.return_value = 10 + mock_oopspam_get_hammed_count.return_value = 5 + mock_akismet_get_flagged_count.return_value = 20 + mock_akismet_get_hammed_count.return_value = 10 + + reporter = PrivateSpamMetricsReporter() + report = reporter.report(report_yearmonth)[0] + + assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}" + assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}" + assert report.node_akismet_flagged == 20, f"Expected 20, got {report.node_akismet_flagged}" + assert report.node_akismet_hammed == 10, f"Expected 10, got {report.node_akismet_hammed}" From f0d55582d3a5ad83261b0d543f925be2a12e9887 Mon Sep 17 00:00:00 2001 From: Uditi Mehta Date: Tue, 5 Nov 2024 14:05:58 -0500 Subject: [PATCH 4/4] Validate category; add PrivateSpamMetricsReporter to monthly reports --- osf/external/askismet/client.py | 6 ++++++ osf/external/oopspam/client.py | 6 ++++++ osf/metrics/reporters/__init__.py | 2 ++ 3 files changed, 14 insertions(+) diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py index e8f495a71f4..db57b1d3cfa 100644 --- a/osf/external/askismet/client.py +++ b/osf/external/askismet/client.py @@ -137,6 +137,9 @@ def submit_ham(self, user_ip, user_agent, **kwargs): def get_flagged_count(self, start_date, end_date, category='node'): from osf.models import NodeLog, PreprintLog + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + log_model = NodeLog if category == 'node' else PreprintLog flagged_count = log_model.objects.filter( @@ -151,6 +154,9 @@ def get_flagged_count(self, start_date, end_date, category='node'): def get_hammed_count(self, start_date, end_date, category='node'): from osf.models import NodeLog, PreprintLog + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + log_model = NodeLog if category == 'node' else PreprintLog hammed_count = log_model.objects.filter( diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py index d7562d94990..0abdfdd021f 100644 --- a/osf/external/oopspam/client.py +++ b/osf/external/oopspam/client.py @@ -49,6 +49,9 @@ def check_content(self, user_ip, content, **kwargs): def get_flagged_count(self, start_date, end_date, category='node'): from osf.models import NodeLog, PreprintLog + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + log_model = NodeLog if category == 'node' else PreprintLog flagged_count = log_model.objects.filter( @@ -63,6 +66,9 @@ def get_flagged_count(self, start_date, end_date, category='node'): def get_hammed_count(self, start_date, end_date, category='node'): from osf.models import NodeLog, PreprintLog + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + log_model = NodeLog if category == 'node' else PreprintLog hammed_count = log_model.objects.filter( diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index 1f8e0fba862..749bc3f2ca0 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -10,6 +10,7 @@ from .preprint_count import PreprintCountReporter from .user_count import UserCountReporter from .spam_count import SpamCountReporter +from .private_spam_metrics import PrivateSpamMetricsReporter class AllDailyReporters(enum.Enum): @@ -26,3 +27,4 @@ class AllDailyReporters(enum.Enum): class AllMonthlyReporters(enum.Enum): SPAM_COUNT = SpamCountReporter + PRIVATE_SPAM_METRICS = PrivateSpamMetricsReporter