Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-4438] Add OOPSpam and Akismet metrics to spam report #10783

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions osf/external/askismet/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,37 @@ def submit_ham(self, user_ip, user_agent, **kwargs):
)
if res.status_code != requests.codes.ok:
raise AkismetClientError(reason=res.text)

def get_flagged_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog
mfraezz marked this conversation as resolved.
Show resolved Hide resolved

flagged_count = log_model.objects.filter(
action=log_model.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

hammed_count = log_model.objects.filter(
action=log_model.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
).count()

return hammed_count
34 changes: 34 additions & 0 deletions osf/external/oopspam/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,37 @@ def check_content(self, user_ip, content, **kwargs):

# OOPSpam returns a spam score out of 6. 3 or higher indicates spam
return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json

def get_flagged_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

flagged_count = log_model.objects.filter(
action=log_model.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

hammed_count = log_model.objects.filter(
action=log_model.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
).count()

return hammed_count
2 changes: 2 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .preprint_count import PreprintCountReporter
from .user_count import UserCountReporter
from .spam_count import SpamCountReporter
from .private_spam_metrics import PrivateSpamMetricsReporter


class AllDailyReporters(enum.Enum):
Expand All @@ -26,3 +27,4 @@ class AllDailyReporters(enum.Enum):

class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
PRIVATE_SPAM_METRICS = PrivateSpamMetricsReporter
28 changes: 28 additions & 0 deletions osf/metrics/reporters/private_spam_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from osf.metrics.reports import SpamSummaryReport
from osf.external.oopspam.client import OOPSpamClient
from osf.external.askismet.client import AkismetClient
from ._base import MonthlyReporter

class PrivateSpamMetricsReporter(MonthlyReporter):
report_name = 'Private Spam Metrics'

def report(self, report_yearmonth):
target_month = report_yearmonth.target_month()
next_month = report_yearmonth.next_month()

oopspam_client = OOPSpamClient()
akismet_client = AkismetClient()

report = SpamSummaryReport(
report_yearmonth=str(report_yearmonth),
node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'),
node_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='node'),
preprint_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='preprint'),
preprint_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='preprint'),
preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'),
preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint')
)

return [report]
1 change: 0 additions & 1 deletion osf/metrics/reporters/spam_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from osf.models import PreprintLog, NodeLog
from osf.models.spam import SpamStatus


class SpamCountReporter(MonthlyReporter):

def report(self, report_yearmonth):
Expand Down
36 changes: 36 additions & 0 deletions osf_tests/external/akismet/test_akismet.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,39 @@ def test_meetings_skip_spam_check(self, mock_akismet, user, node_in_conference,
node.check_spam(user, {'title'}, request_headers)
node.refresh_from_db()
assert node.spam_status == SpamStatus.FLAGGED

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_flagged_count(self, mock_filter, user):
from osf.external.askismet.client import AkismetClient
from datetime import datetime

client = AkismetClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_flagged_count(start_date, end_date)

mock_filter.assert_called_with(
action='flag_spam',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
)

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_hammed_count(self, mock_filter, user):
from osf.external.askismet.client import AkismetClient
from datetime import datetime

client = AkismetClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_hammed_count(start_date, end_date)

mock_filter.assert_called_with(
action='confirm_ham',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
)
36 changes: 36 additions & 0 deletions osf_tests/external/oopspam/test_oopspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,39 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers):
)

assert user.spam_status == SpamStatus.UNKNOWN

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_flagged_count(self, mock_filter, user):
from osf.external.oopspam.client import OOPSpamClient
from datetime import datetime

client = OOPSpamClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_flagged_count(start_date, end_date)

mock_filter.assert_called_with(
action='flag_spam',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
)

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_hammed_count(self, mock_filter, user):
from osf.external.oopspam.client import OOPSpamClient
from datetime import datetime

client = OOPSpamClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_hammed_count(start_date, end_date)

mock_filter.assert_called_with(
action='confirm_ham',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
)
38 changes: 38 additions & 0 deletions osf_tests/metrics/test_spam_count_reporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
from datetime import datetime
from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter
from osf.metrics.utils import YearMonth
from osf_tests.factories import NodeLogFactory, NodeFactory
from unittest.mock import patch

@pytest.mark.django_db
def test_private_spam_metrics_reporter():
start_date = datetime(2024, 10, 1)

oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'})
akismet_node = NodeFactory(spam_data={'who_flagged': 'akismet'})

NodeLogFactory.create_batch(10, action='flag_spam', created=start_date, node=oopspam_node)
NodeLogFactory.create_batch(5, action='confirm_ham', created=start_date, node=oopspam_node)
NodeLogFactory.create_batch(20, action='flag_spam', created=start_date, node=akismet_node)
NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node)

report_yearmonth = YearMonth(2024, 10)

with patch('osf.external.oopspam.client.OOPSpamClient.get_flagged_count') as mock_oopspam_get_flagged_count, \
patch('osf.external.oopspam.client.OOPSpamClient.get_hammed_count') as mock_oopspam_get_hammed_count, \
patch('osf.external.askismet.client.AkismetClient.get_flagged_count') as mock_akismet_get_flagged_count, \
patch('osf.external.askismet.client.AkismetClient.get_hammed_count') as mock_akismet_get_hammed_count:

mock_oopspam_get_flagged_count.return_value = 10
mock_oopspam_get_hammed_count.return_value = 5
mock_akismet_get_flagged_count.return_value = 20
mock_akismet_get_hammed_count.return_value = 10

reporter = PrivateSpamMetricsReporter()
report = reporter.report(report_yearmonth)[0]

assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}"
assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}"
assert report.node_akismet_flagged == 20, f"Expected 20, got {report.node_akismet_flagged}"
assert report.node_akismet_hammed == 10, f"Expected 10, got {report.node_akismet_hammed}"
Loading