Skip to content

Commit

Permalink
Merge branch 'hotfix/24.05.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
mfraezz committed Aug 23, 2024
2 parents 9cbdfaf + 979eded commit 0bae41c
Show file tree
Hide file tree
Showing 16 changed files with 68 additions and 195 deletions.
12 changes: 12 additions & 0 deletions addons/base/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,10 @@ def osfstoragefile_mark_viewed(self, auth, fileversion, file_node):
@file_signals.file_viewed.connect
def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record views by contributors
return
enqueue_update_analytics(
resource,
file_node,
Expand All @@ -707,6 +711,10 @@ def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node):
@file_signals.file_viewed.connect
def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record views by contributors
return
if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint):
try:
PreprintView.record_for_preprint(
Expand All @@ -730,6 +738,10 @@ def osfstoragefile_downloaded_update_analytics(self, auth, fileversion, file_nod
@file_signals.file_downloaded.connect
def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record downloads by contributors
return
if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint):
try:
PreprintDownload.record_for_preprint(
Expand Down
2 changes: 0 additions & 2 deletions admin/management/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def post(self, request, *args, **kwargs):
class DailyReportersGo(ManagementCommandPermissionView):

def post(self, request, *args, **kwargs):
also_keen = bool(request.POST.get('also_send_to_keen', False))
report_date = request.POST.get('report_date', None)
if report_date:
report_date = isoparse(report_date).date()
Expand All @@ -109,7 +108,6 @@ def post(self, request, *args, **kwargs):

daily_reporters_go.apply_async(kwargs={
'report_date': report_date,
'also_send_to_keen': also_keen
})
messages.success(request, 'Daily reporters going!')
return redirect(reverse('management:commands'))
Expand Down
5 changes: 0 additions & 5 deletions admin/templates/management/commands.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,6 @@ <h4><u>Daily Reporters, Go!</u></h4>
<input type="date" name="report_date" id="report_date"/>
(default: yesterday)
<br>
<label for="also_send_to_keen">
Also send to keen?
</label>
<input type="checkbox" name="also_send_to_keen" id="also_send_to_keen"/>
(may result in duplicates)
<nav>
<input class="btn btn-success" type="submit" value="Run" />
</nav>
Expand Down
48 changes: 21 additions & 27 deletions osf/management/commands/daily_reporters_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,51 @@
import logging

from django.core.management.base import BaseCommand
from django.db.utils import OperationalError
from django.utils import timezone

from framework import sentry
from framework.celery_tasks import app as celery_app
from osf.metrics.reporters import DAILY_REPORTERS
from osf.metrics.reporters import AllDailyReporters
from website.app import init_app


logger = logging.getLogger(__name__)


@celery_app.task(name='management.commands.daily_reporters_go')
def daily_reporters_go(also_send_to_keen=False, report_date=None, reporter_filter=None):
def daily_reporters_go(report_date=None, reporter_filter=None, **kwargs):
init_app() # OSF-specific setup

if report_date is None: # default to yesterday
report_date = (timezone.now() - datetime.timedelta(days=1)).date()

errors = {}
for reporter_class in DAILY_REPORTERS:
if reporter_filter and (reporter_filter.lower() not in reporter_class.__name__.lower()):
for _reporter_key, _reporter_class in AllDailyReporters.__members__.items():
if reporter_filter and (reporter_filter.lower() not in _reporter_class.__name__.lower()):
continue
try:
reporter_class().run_and_record_for_date(
report_date=report_date,
also_send_to_keen=also_send_to_keen,
)
except Exception as e:
errors[reporter_class.__name__] = repr(e)
logger.exception(e)
sentry.log_exception(e)
# continue with the next reporter
return errors
daily_reporter_go.apply_async(kwargs={
'reporter_key': _reporter_key,
'report_date': report_date.isoformat(),
})


def date_fromisoformat(date_str):
return datetime.datetime.strptime(date_str, '%Y-%m-%d').date()
@celery_app.task(
name='management.commands.daily_reporter_go',
autoretry_for=(OperationalError,),
max_retries=5,
retry_backoff=True,
bind=True,
)
def daily_reporter_go(task, reporter_key: str, report_date: str):
_reporter_class = AllDailyReporters[reporter_key].value
_parsed_date = datetime.date.fromisoformat(report_date)
_reporter_class().run_and_record_for_date(report_date=_parsed_date)


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'--keen',
type=bool,
default=False,
help='also send reports to keen',
)
parser.add_argument(
'--date',
type=date_fromisoformat, # in python 3.7+, could pass datetime.date.fromisoformat
type=datetime.date.fromisoformat,
help='run for a specific date (default: yesterday)',
)
parser.add_argument(
Expand All @@ -62,7 +57,6 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
errors = daily_reporters_go(
report_date=options.get('date'),
also_send_to_keen=options['keen'],
reporter_filter=options.get('filter'),
)
for error_key, error_val in errors.items():
Expand Down
31 changes: 19 additions & 12 deletions osf/management/commands/monthly_reporters_go.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging

from django.core.management.base import BaseCommand
from django.db.utils import OperationalError
from django.utils import timezone

from framework import sentry
from framework.celery_tasks import app as celery_app
from osf.metrics.reporters import MONTHLY_REPORTERS
from osf.metrics.reporters import AllMonthlyReporters
from osf.metrics.utils import YearMonth
from website.app import init_app

Expand All @@ -28,17 +28,24 @@ def monthly_reporters_go(report_year=None, report_month=None):
year=today.year if today.month > 1 else today.year - 1,
month=today.month - 1 or MAXMONTH,
)
for _reporter_key in AllMonthlyReporters.__members__.keys():
monthly_reporter_go.apply_async(kwargs={
'reporter_key': _reporter_key,
'yearmonth': str(report_yearmonth),
})

errors = {}
for reporter_class in MONTHLY_REPORTERS:
try:
reporter_class().run_and_record_for_month(report_yearmonth)
except Exception as e:
errors[reporter_class.__name__] = str(e)
logger.exception(e)
sentry.log_exception(e)
# continue with the next reporter
return errors

@celery_app.task(
name='management.commands.monthly_reporter_go',
autoretry_for=(OperationalError,),
max_retries=5,
retry_backoff=True,
bind=True,
)
def monthly_reporter_go(task, reporter_key: str, yearmonth: str):
_reporter_class = AllMonthlyReporters[reporter_key].value
_parsed_yearmonth = YearMonth.from_str(yearmonth)
_reporter_class().run_and_record_for_month(_parsed_yearmonth)


class Command(BaseCommand):
Expand Down
29 changes: 15 additions & 14 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import enum

# from .active_users import ActiveUserReporter
from .storage_addon_usage import StorageAddonUsageReporter
from .download_count import DownloadCountReporter
Expand All @@ -10,18 +12,17 @@
from .spam_count import SpamCountReporter


DAILY_REPORTERS = (
# ActiveUserReporter,
DownloadCountReporter,
InstitutionSummaryReporter,
NewUserDomainReporter,
NodeCountReporter,
OsfstorageFileCountReporter,
PreprintCountReporter,
StorageAddonUsageReporter,
UserCountReporter,
)
class AllDailyReporters(enum.Enum):
# ACTIVE_USER = ActiveUserReporter
DOWNLOAD_COUNT = DownloadCountReporter
INSTITUTION_SUMMARY = InstitutionSummaryReporter
NEW_USER_DOMAIN = NewUserDomainReporter
NODE_COUNT = NodeCountReporter
OSFSTORAGE_FILE_COUNT = OsfstorageFileCountReporter
PREPRINT_COUNT = PreprintCountReporter
STORAGE_ADDON_USAGE = StorageAddonUsageReporter
USER_COUNT = UserCountReporter


MONTHLY_REPORTERS = (
SpamCountReporter,
)
class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
47 changes: 1 addition & 46 deletions osf/metrics/reporters/_base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
from collections import defaultdict
from datetime import datetime
import logging
import pytz

from keen.client import KeenClient

from osf.metrics.utils import YearMonth
from website.settings import KEEN as keen_settings


logger = logging.getLogger(__name__)
Expand All @@ -33,49 +27,10 @@ def report(self, report_date):
"""
raise NotImplementedError(f'{self.__name__} must implement `report`')

def keen_events_from_report(self, report):
"""given one of this reporter's own reports, build equivalent keen events
(for back-compat; to be deleted once we don't need keen anymore)
return a mapping from keen collection name to iterable of events
e.g. {'my_keen_collection': [event1, event2, ...]}
"""
raise NotImplementedError(f'{self.__name__} should probably implement keen_events_from_report')

def run_and_record_for_date(self, report_date, *, also_send_to_keen=False):
def run_and_record_for_date(self, report_date):
reports = self.report(report_date)

# expecting each reporter to spit out only a handful of reports per day;
# not bothering with bulk-create
for report in reports:
report.save()

if also_send_to_keen:
self.send_to_keen(reports)

def send_to_keen(self, reports):
keen_project = keen_settings['private']['project_id']
write_key = keen_settings['private']['write_key']
if not (keen_project and write_key):
logger.warning(f'keen not configured; not sending events for {self.__class__.__name__}')
return

keen_events_by_collection = defaultdict(list)
for report in reports:
keen_event_timestamp = datetime(
report.report_date.year,
report.report_date.month,
report.report_date.day,
tzinfo=pytz.utc,
)

for collection_name, keen_events in self.keen_events_from_report(report).items():
for event in keen_events:
event['keen'] = {'timestamp': keen_event_timestamp.isoformat()}
keen_events_by_collection[collection_name].extend(keen_events)

client = KeenClient(
project_id=keen_project,
write_key=write_key,
)
client.add_events(keen_events_by_collection)
8 changes: 0 additions & 8 deletions osf/metrics/reporters/download_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,3 @@ def report(self, date):
report_date=date,
),
]

def keen_events_from_report(self, report):
event = {
'files': {
'total': report.daily_file_downloads,
},
}
return {'download_count_summary': [event]}
14 changes: 0 additions & 14 deletions osf/metrics/reporters/institution_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,3 @@ def report(self, date):

reports.append(report)
return reports

def keen_events_from_report(self, report):
event = {
'institution': {
'id': report.institution_id,
'name': report.institution_name,
},
'users': report.users.to_dict(),
'nodes': report.nodes.to_dict(),
'projects': report.projects.to_dict(),
'registered_nodes': report.registered_nodes.to_dict(),
'registered_projects': report.registered_projects.to_dict(),
}
return {'institution_summary': [event]}
9 changes: 0 additions & 9 deletions osf/metrics/reporters/new_user_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,3 @@ def report(self, date):
)
for domain_name, count in domain_names.items()
]

def keen_events_from_report(self, report):
events = [
{'domain': report.domain_name, 'date': str(report.report_date)}
for _ in range(report.new_user_count)
]
return {
'user_domain_events': events,
}
9 changes: 0 additions & 9 deletions osf/metrics/reporters/node_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,3 @@ def report(self, date):
)

return [report]

def keen_events_from_report(self, report):
event = {
'nodes': report.nodes.to_dict(),
'projects': report.projects.to_dict(),
'registered_nodes': report.registered_nodes.to_dict(),
'registered_projects': report.registered_projects.to_dict(),
}
return {'node_summary': [event]}
6 changes: 0 additions & 6 deletions osf/metrics/reporters/osfstorage_file_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,3 @@ def report(self, date):
)

return [report]

def keen_events_from_report(self, report):
event = {
'osfstorage_files_including_quickfiles': report.files.to_dict(),
}
return {'file_summary': [event]}
9 changes: 0 additions & 9 deletions osf/metrics/reporters/preprint_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,3 @@ def report(self, date):
logger.info('{} Preprints counted for the provider {}'.format(resp['hits']['total'], preprint_provider.name))

return reports

def keen_events_from_report(self, report):
event = {
'provider': {
'name': report.provider_key,
'total': report.preprint_count,
},
}
return {'preprint_summary': [event]}
Loading

0 comments on commit 0bae41c

Please sign in to comment.