Skip to content

Commit

Permalink
create monthly reporter for institution summary
Browse files Browse the repository at this point in the history
  • Loading branch information
John Tordoff committed Sep 19, 2024
1 parent 338653d commit a5a206f
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 0 deletions.
2 changes: 2 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .download_count import DownloadCountReporter
from .institution_summary import InstitutionSummaryReporter
from .institutional_users import InstitutionalUsersReporter
from .institution_summary_monthly import InstitutionalSummaryMonthlyReporter
from .new_user_domain import NewUserDomainReporter
from .node_count import NodeCountReporter
from .osfstorage_file_count import OsfstorageFileCountReporter
Expand All @@ -28,3 +29,4 @@ class AllDailyReporters(enum.Enum):
class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
INSTITUTIONAL_USERS = InstitutionalUsersReporter
INSTITUTIONAL_SUMMARY = InstitutionalSummaryMonthlyReporter
95 changes: 95 additions & 0 deletions osf/metrics/reporters/institution_summary_monthly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from django.contrib.contenttypes.models import ContentType
from django.db.models import Q, F, Sum

from osf.models import Institution, Preprint, AbstractNode, FileVersion
from osf.models.spam import SpamStatus
from addons.osfstorage.models import OsfStorageFile
from osf.metrics.reports import InstitutionMonthlySummaryReport
from osf.metrics.utils import YearMonth
from ._base import MonthlyReporter
from datetime import datetime


class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
"""Generate an InstitutionMonthlySummaryReport for each institution."""

def report(self, yearmonth: YearMonth):
for institution in Institution.objects.all():
yield self.generate_report(institution, yearmonth)

def generate_report(self, institution, yearmonth):
node_queryset = institution.nodes.filter(deleted__isnull=True)

return InstitutionMonthlySummaryReport(
institution_id=institution._id,
private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False),
public_project_count=self._get_count(node_queryset, 'osf.node', is_public=True),
user_count=institution.get_institution_users().count(),
public_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=True),
embargoed_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=False),
preprint_count=self.get_published_preprints(institution).count(),
storage_byte_count=self.get_storage_size(node_queryset, institution),
public_file_count=self.get_files(node_queryset, institution, is_public=True).count(),
monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, yearmonth),
monthly_active_user_count=self.get_monthly_active_user_count(institution, yearmonth),
)

def _get_count(self, node_queryset, node_type, is_public):
return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count()

def get_published_preprints(self, institution):
if not hasattr(Preprint, "affiliated_institutions"):
return Preprint.objects.none()
return Preprint.objects.can_view().filter(
affiliated_institutions=institution
).exclude(spam_status=SpamStatus.SPAM)

def get_files(self, node_queryset, institution, is_public=None):
public_kwargs = {}
if is_public:
public_kwargs = {'is_public': is_public}

target_node_q = Q(
target_object_id__in=node_queryset.filter(**public_kwargs).values("pk"),
target_content_type=ContentType.objects.get_for_model(AbstractNode),
)
target_preprint_q = Q(
target_object_id__in=self.get_published_preprints(institution).values("pk"),
target_content_type=ContentType.objects.get_for_model(Preprint),
)
return OsfStorageFile.objects.filter(
deleted__isnull=True, purged__isnull=True
).filter(target_node_q | target_preprint_q)

def get_storage_size(self, node_queryset, institution):
files = self.get_files(node_queryset, institution)
return FileVersion.objects.filter(
size__gt=0,
purged__isnull=True,
basefilenode__in=files
).aggregate(storage_bytes=Sum("size", default=0))["storage_bytes"]

def get_month_start_end(self, yearmonth):
# Get the first day of the month
start_date = datetime(yearmonth.year, yearmonth.month, 1)
# Calculate the first day of the next month
if yearmonth.month == 12:
end_date = datetime(yearmonth.year + 1, 1, 1)
else:
end_date = datetime(yearmonth.year, yearmonth.month + 1, 1)
return start_date, end_date

def get_monthly_logged_in_user_count(self, institution, yearmonth):
start_date, end_date = self.get_month_start_end(yearmonth)
return institution.get_institution_users().filter(
date_last_login__gte=start_date,
date_last_login__lte=end_date
).count()

def get_monthly_active_user_count(self, institution, yearmonth):
start_date, end_date = self.get_month_start_end(yearmonth)
return institution.get_institution_users().filter(
date_disabled__isnull=True,
date_last_login__gte=start_date,
date_last_login__lte=end_date
).count()
14 changes: 14 additions & 0 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,17 @@ class InstitutionalUserReport(MonthlyReport):
published_preprint_count = metrics.Integer()
public_file_count = metrics.Long()
storage_byte_count = metrics.Long()


class InstitutionMonthlySummaryReport(MonthlyReport):
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', )
institution_id = metrics.Keyword()
public_project_count = metrics.Integer()
private_project_count = metrics.Integer()
public_registration_count = metrics.Integer()
embargoed_registration_count = metrics.Integer()
published_preprint_count = metrics.Integer()
public_file_count = metrics.Long()
private_file_count = metrics.Long()
public_storage_count = metrics.Long()
private_storage_count = metrics.Long()
77 changes: 77 additions & 0 deletions osf_tests/metrics/reporters/test_institutional_summary_reporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import datetime
from django.test import TestCase
from osf.models import Preprint
from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter
from osf.metrics.utils import YearMonth
from osf_tests.factories import (
InstitutionFactory,
ProjectFactory,
RegistrationFactory,
PreprintFactory,
UserFactory,
AuthUserFactory,
)


class TestInstiSummaryMonthlyReporter(TestCase):

@classmethod
def setUpTestData(cls):
cls._yearmonth = YearMonth(2018, 2)
cls._institution = InstitutionFactory()
cls._now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC)

cls._public_project = cls._create_affiliated_project(is_public=True)
cls._private_project = cls._create_affiliated_project(is_public=False)
cls._public_registration = cls._create_affiliated_registration(is_public=True)
cls._embargoed_registration = cls._create_affiliated_registration(is_public=False)

if hasattr(Preprint, 'affiliated_institutions'):
cls._published_preprint = PreprintFactory(creator=UserFactory(), is_public=True)
cls._published_preprint.affiliated_institutions.add(cls._institution)

cls._logged_in_user = cls._create_logged_in_user()
cls._active_user = cls._create_active_user()

@classmethod
def _create_affiliated_project(cls, is_public):
project = ProjectFactory(creator=UserFactory(), is_public=is_public)
project.affiliated_institutions.add(cls._institution)
return project

@classmethod
def _create_affiliated_registration(cls, is_public):
registration = RegistrationFactory(creator=UserFactory(), is_public=is_public)
registration.affiliated_institutions.add(cls._institution)
return registration

@classmethod
def _create_logged_in_user(cls):
user = AuthUserFactory()
user.add_or_update_affiliated_institution(cls._institution)
user.date_last_login = cls._now
user.save()
return user

@classmethod
def _create_active_user(cls):
user = AuthUserFactory()
user.add_or_update_affiliated_institution(cls._institution)
user.date_confirmed = cls._now - datetime.timedelta(days=1)
user.save()
return user

def test_report_generation(self):
reporter = InstitutionalSummaryMonthlyReporter()
reports = list(reporter.report(self._yearmonth))
self.assertEqual(len(reports), 1)

report = reports[0]
self.assertEqual(report.institution_id, self._institution._id)
self.assertEqual(report.public_project_count, 1)
self.assertEqual(report.private_project_count, 1)
self.assertEqual(report.public_registration_count, 1)
self.assertEqual(report.embargoed_registration_count, 1)
self.assertEqual(report.published_preprint_count, 1)
self.assertEqual(report.monthly_logged_in_user_count, 1)
self.assertEqual(report.monthly_active_user_count, 1)

0 comments on commit a5a206f

Please sign in to comment.