diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py index 877f7ec4c23..db57b1d3cfa 100644 --- a/osf/external/askismet/client.py +++ b/osf/external/askismet/client.py @@ -133,3 +133,37 @@ def submit_ham(self, user_ip, user_agent, **kwargs): ) if res.status_code != requests.codes.ok: raise AkismetClientError(reason=res.text) + + def get_flagged_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + + log_model = NodeLog if category == 'node' else PreprintLog + + flagged_count = log_model.objects.filter( + action=log_model.FLAG_SPAM, + created__gt=start_date, + created__lt=end_date, + **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']} + ).count() + + return flagged_count + + def get_hammed_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + + log_model = NodeLog if category == 'node' else PreprintLog + + hammed_count = log_model.objects.filter( + action=log_model.CONFIRM_HAM, + created__gt=start_date, + created__lt=end_date, + **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']} + ).count() + + return hammed_count diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py index ef22864a43d..0abdfdd021f 100644 --- a/osf/external/oopspam/client.py +++ b/osf/external/oopspam/client.py @@ -45,3 +45,37 @@ def check_content(self, user_ip, content, **kwargs): # OOPSpam returns a spam score out of 6. 3 or higher indicates spam return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json + + def get_flagged_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + + log_model = NodeLog if category == 'node' else PreprintLog + + flagged_count = log_model.objects.filter( + action=log_model.FLAG_SPAM, + created__gt=start_date, + created__lt=end_date, + **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']} + ).count() + + return flagged_count + + def get_hammed_count(self, start_date, end_date, category='node'): + from osf.models import NodeLog, PreprintLog + + if category not in ['node', 'preprint']: + raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.") + + log_model = NodeLog if category == 'node' else PreprintLog + + hammed_count = log_model.objects.filter( + action=log_model.CONFIRM_HAM, + created__gt=start_date, + created__lt=end_date, + **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']} + ).count() + + return hammed_count diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index 1f8e0fba862..749bc3f2ca0 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -10,6 +10,7 @@ from .preprint_count import PreprintCountReporter from .user_count import UserCountReporter from .spam_count import SpamCountReporter +from .private_spam_metrics import PrivateSpamMetricsReporter class AllDailyReporters(enum.Enum): @@ -26,3 +27,4 @@ class AllDailyReporters(enum.Enum): class AllMonthlyReporters(enum.Enum): SPAM_COUNT = SpamCountReporter + PRIVATE_SPAM_METRICS = PrivateSpamMetricsReporter diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py new file mode 100644 index 00000000000..d6beba3a804 --- /dev/null +++ b/osf/metrics/reporters/private_spam_metrics.py @@ -0,0 +1,28 @@ +from osf.metrics.reports import SpamSummaryReport +from osf.external.oopspam.client import OOPSpamClient +from osf.external.askismet.client import AkismetClient +from ._base import MonthlyReporter + +class PrivateSpamMetricsReporter(MonthlyReporter): + report_name = 'Private Spam Metrics' + + def report(self, report_yearmonth): + target_month = report_yearmonth.target_month() + next_month = report_yearmonth.next_month() + + oopspam_client = OOPSpamClient() + akismet_client = AkismetClient() + + report = SpamSummaryReport( + report_yearmonth=str(report_yearmonth), + node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'), + node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'), + node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'), + node_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='node'), + preprint_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='preprint'), + preprint_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='preprint'), + preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'), + preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint') + ) + + return [report] diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index 54feae8bee5..f893db2bec2 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -5,7 +5,6 @@ from osf.models import PreprintLog, NodeLog from osf.models.spam import SpamStatus - class SpamCountReporter(MonthlyReporter): def report(self, report_yearmonth): diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py index db3c5d0d584..46729e485e8 100644 --- a/osf_tests/external/akismet/test_akismet.py +++ b/osf_tests/external/akismet/test_akismet.py @@ -237,3 +237,39 @@ def test_meetings_skip_spam_check(self, mock_akismet, user, node_in_conference, node.check_spam(user, {'title'}, request_headers) node.refresh_from_db() assert node.spam_status == SpamStatus.FLAGGED + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_flagged_count(self, mock_filter, user): + from osf.external.askismet.client import AkismetClient + from datetime import datetime + + client = AkismetClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + client.get_flagged_count(start_date, end_date) + + mock_filter.assert_called_with( + action='flag_spam', + created__gt=start_date, + created__lt=end_date, + node__spam_data__who_flagged__in=['akismet', 'both'] + ) + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_hammed_count(self, mock_filter, user): + from osf.external.askismet.client import AkismetClient + from datetime import datetime + + client = AkismetClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + client.get_hammed_count(start_date, end_date) + + mock_filter.assert_called_with( + action='confirm_ham', + created__gt=start_date, + created__lt=end_date, + node__spam_data__who_flagged__in=['akismet', 'both'] + ) diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py index 36740148116..96656ecc6da 100644 --- a/osf_tests/external/oopspam/test_oopspam.py +++ b/osf_tests/external/oopspam/test_oopspam.py @@ -125,3 +125,39 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers): ) assert user.spam_status == SpamStatus.UNKNOWN + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_flagged_count(self, mock_filter, user): + from osf.external.oopspam.client import OOPSpamClient + from datetime import datetime + + client = OOPSpamClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + client.get_flagged_count(start_date, end_date) + + mock_filter.assert_called_with( + action='flag_spam', + created__gt=start_date, + created__lt=end_date, + node__spam_data__who_flagged__in=['oopspam', 'both'] + ) + + @mock.patch('osf.models.NodeLog.objects.filter') + def test_get_hammed_count(self, mock_filter, user): + from osf.external.oopspam.client import OOPSpamClient + from datetime import datetime + + client = OOPSpamClient() + start_date = datetime(2024, 10, 1) + end_date = datetime(2024, 10, 31) + + client.get_hammed_count(start_date, end_date) + + mock_filter.assert_called_with( + action='confirm_ham', + created__gt=start_date, + created__lt=end_date, + node__spam_data__who_flagged__in=['oopspam', 'both'] + ) diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py new file mode 100644 index 00000000000..db44dc848ff --- /dev/null +++ b/osf_tests/metrics/test_spam_count_reporter.py @@ -0,0 +1,38 @@ +import pytest +from datetime import datetime +from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter +from osf.metrics.utils import YearMonth +from osf_tests.factories import NodeLogFactory, NodeFactory +from unittest.mock import patch + +@pytest.mark.django_db +def test_private_spam_metrics_reporter(): + start_date = datetime(2024, 10, 1) + + oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'}) + akismet_node = NodeFactory(spam_data={'who_flagged': 'akismet'}) + + NodeLogFactory.create_batch(10, action='flag_spam', created=start_date, node=oopspam_node) + NodeLogFactory.create_batch(5, action='confirm_ham', created=start_date, node=oopspam_node) + NodeLogFactory.create_batch(20, action='flag_spam', created=start_date, node=akismet_node) + NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node) + + report_yearmonth = YearMonth(2024, 10) + + with patch('osf.external.oopspam.client.OOPSpamClient.get_flagged_count') as mock_oopspam_get_flagged_count, \ + patch('osf.external.oopspam.client.OOPSpamClient.get_hammed_count') as mock_oopspam_get_hammed_count, \ + patch('osf.external.askismet.client.AkismetClient.get_flagged_count') as mock_akismet_get_flagged_count, \ + patch('osf.external.askismet.client.AkismetClient.get_hammed_count') as mock_akismet_get_hammed_count: + + mock_oopspam_get_flagged_count.return_value = 10 + mock_oopspam_get_hammed_count.return_value = 5 + mock_akismet_get_flagged_count.return_value = 20 + mock_akismet_get_hammed_count.return_value = 10 + + reporter = PrivateSpamMetricsReporter() + report = reporter.report(report_yearmonth)[0] + + assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}" + assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}" + assert report.node_akismet_flagged == 20, f"Expected 20, got {report.node_akismet_flagged}" + assert report.node_akismet_hammed == 10, f"Expected 10, got {report.node_akismet_hammed}"