Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-4438] Add OOPSpam and Akismet metrics to spam report #10783

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions osf/external/askismet/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,27 @@ def submit_ham(self, user_ip, user_agent, **kwargs):
)
if res.status_code != requests.codes.ok:
raise AkismetClientError(reason=res.text)

def get_flagged_count(self, start_date, end_date):
from osf.models import NodeLog

flagged_count = NodeLog.objects.filter(
action=NodeLog.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date):
from osf.models import NodeLog

hammed_count = NodeLog.objects.filter(
action=NodeLog.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
).count()

return hammed_count
24 changes: 24 additions & 0 deletions osf/external/oopspam/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,27 @@ def check_content(self, user_ip, content, **kwargs):

# OOPSpam returns a spam score out of 6. 3 or higher indicates spam
return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json

def get_flagged_count(self, start_date, end_date):
from osf.models import NodeLog

flagged_count = NodeLog.objects.filter(
action=NodeLog.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date):
from osf.models import NodeLog

hammed_count = NodeLog.objects.filter(
action=NodeLog.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
).count()

return hammed_count
15 changes: 15 additions & 0 deletions osf/metrics/reporters/spam_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from ._base import MonthlyReporter
from osf.models import PreprintLog, NodeLog
from osf.models.spam import SpamStatus
from osf.external.oopspam.client import OOPSpamClient
from osf.external.askismet.client import AkismetClient


class SpamCountReporter(MonthlyReporter):
Expand All @@ -12,6 +14,15 @@ def report(self, report_yearmonth):
target_month = report_yearmonth.target_month()
next_month = report_yearmonth.next_month()

oopspam_client = OOPSpamClient()
akismet_client = AkismetClient()

oopspam_flagged = oopspam_client.get_flagged_count(target_month, next_month)
oopspam_hammed = oopspam_client.get_hammed_count(target_month, next_month)

akismet_flagged = akismet_client.get_flagged_count(target_month, next_month)
akismet_hammed = akismet_client.get_hammed_count(target_month, next_month)

report = SpamSummaryReport(
report_yearmonth=str(report_yearmonth),
# Node Log entries
Expand All @@ -33,6 +44,10 @@ def report(self, report_yearmonth):
created__lt=next_month,
node__type='osf.node',
).count(),
oopspam_flagged=oopspam_flagged,
oopspam_hammed=oopspam_hammed,
akismet_flagged=akismet_flagged,
akismet_hammed=akismet_hammed,
# Registration Log entries
registration_confirmed_spam=NodeLog.objects.filter(
action=NodeLog.CONFIRM_SPAM,
Expand Down
4 changes: 4 additions & 0 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,7 @@ class SpamSummaryReport(MonthlyReport):
preprint_flagged = metrics.Integer()
user_marked_as_spam = metrics.Integer()
user_marked_as_ham = metrics.Integer()
oopspam_flagged = metrics.Integer()
oopspam_hammed = metrics.Integer()
akismet_flagged = metrics.Integer()
akismet_hammed = metrics.Integer()
36 changes: 36 additions & 0 deletions osf_tests/external/akismet/test_akismet.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,39 @@ def test_meetings_skip_spam_check(self, mock_akismet, user, node_in_conference,
node.check_spam(user, {'title'}, request_headers)
node.refresh_from_db()
assert node.spam_status == SpamStatus.FLAGGED

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_flagged_count(self, mock_filter, user):
from osf.external.askismet.client import AkismetClient
from datetime import datetime

client = AkismetClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_flagged_count(start_date, end_date)

mock_filter.assert_called_with(
action='flag_spam',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
)

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_hammed_count(self, mock_filter, user):
from osf.external.askismet.client import AkismetClient
from datetime import datetime

client = AkismetClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_hammed_count(start_date, end_date)

mock_filter.assert_called_with(
action='confirm_ham',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['akismet', 'both']
)
36 changes: 36 additions & 0 deletions osf_tests/external/oopspam/test_oopspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,39 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers):
)

assert user.spam_status == SpamStatus.UNKNOWN

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_flagged_count(self, mock_filter, user):
from osf.external.oopspam.client import OOPSpamClient
from datetime import datetime

client = OOPSpamClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_flagged_count(start_date, end_date)

mock_filter.assert_called_with(
action='flag_spam',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
)

@mock.patch('osf.models.NodeLog.objects.filter')
def test_get_hammed_count(self, mock_filter, user):
from osf.external.oopspam.client import OOPSpamClient
from datetime import datetime

client = OOPSpamClient()
start_date = datetime(2024, 10, 1)
end_date = datetime(2024, 10, 31)

client.get_hammed_count(start_date, end_date)

mock_filter.assert_called_with(
action='confirm_ham',
created__gt=start_date,
created__lt=end_date,
node__spam_data__who_flagged__in=['oopspam', 'both']
)
43 changes: 43 additions & 0 deletions osf_tests/metrics/test_spam_count_reporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest
from datetime import datetime
from osf.metrics.reporters.spam_count import SpamCountReporter
from unittest import mock
from osf.metrics.utils import YearMonth
from osf_tests.factories import NodeLogFactory, NodeFactory

@pytest.fixture
def mock_oopspam_client():
with mock.patch('osf.external.oopspam.client.OOPSpamClient') as mock_client:
instance = mock_client.return_value
instance.get_flagged_count.return_value = 10
instance.get_hammed_count.return_value = 5
yield instance

@pytest.fixture
def mock_akismet_client():
with mock.patch('osf.external.askismet.client.AkismetClient') as mock_client:
instance = mock_client.return_value
instance.get_flagged_count.return_value = 20
instance.get_hammed_count.return_value = 10
yield instance

@pytest.mark.django_db
def test_spam_count_reporter():
start_date = datetime(2024, 10, 1)

oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'})
akismet_node = NodeFactory(spam_data={'who_flagged': 'akismet'})

NodeLogFactory.create_batch(10, action='flag_spam', created=start_date, node=oopspam_node)
NodeLogFactory.create_batch(5, action='confirm_ham', created=start_date, node=oopspam_node)
NodeLogFactory.create_batch(20, action='flag_spam', created=start_date, node=akismet_node)
NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node)

report_yearmonth = YearMonth(2024, 10)
reporter = SpamCountReporter()
report = reporter.report(report_yearmonth)

assert report[0].oopspam_flagged == 10
assert report[0].oopspam_hammed == 5
assert report[0].akismet_flagged == 20
assert report[0].akismet_hammed == 10
Loading