-
Notifications
You must be signed in to change notification settings - Fork 330
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ENG-6124] Create Monthly Reporter for Institution Summary #10756
Changes from 4 commits
74a43c1
ac092ae
79b2d23
e339efa
dd9c7ea
edeb0ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
from django.contrib.contenttypes.models import ContentType | ||
from django.db.models import Q, F, Sum | ||
|
||
from osf.models import Institution, Preprint, AbstractNode, FileVersion | ||
from osf.models.spam import SpamStatus | ||
from addons.osfstorage.models import OsfStorageFile | ||
from osf.metrics.reports import InstitutionMonthlySummaryReport | ||
from osf.metrics.utils import YearMonth | ||
from ._base import MonthlyReporter | ||
|
||
|
||
class InstitutionalSummaryMonthlyReporter(MonthlyReporter): | ||
"""Generate an InstitutionMonthlySummaryReport for each institution.""" | ||
|
||
def report(self, yearmonth: YearMonth): | ||
for institution in Institution.objects.all(): | ||
yield self.generate_report(institution, yearmonth) | ||
|
||
def generate_report(self, institution, yearmonth): | ||
node_queryset = institution.nodes.filter( | ||
deleted__isnull=True, | ||
created__lt=yearmonth.next_month() | ||
).exclude( | ||
spam_status=SpamStatus.SPAM, | ||
) | ||
|
||
preprint_queryset = self.get_published_preprints(institution, yearmonth) | ||
|
||
return InstitutionMonthlySummaryReport( | ||
institution_id=institution._id, | ||
user_count=institution.get_institution_users().count(), | ||
private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False), | ||
public_project_count=self._get_count(node_queryset, 'osf.node', is_public=True), | ||
public_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=True), | ||
embargoed_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=False), | ||
published_preprint_count=preprint_queryset.count(), | ||
storage_byte_count=self.get_storage_size(node_queryset, preprint_queryset), | ||
public_file_count=self.get_files(node_queryset, preprint_queryset, is_public=True).count(), | ||
monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, yearmonth), | ||
monthly_active_user_count=self.get_monthly_active_user_count(institution, yearmonth), | ||
) | ||
|
||
def _get_count(self, node_queryset, node_type, is_public): | ||
return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count() | ||
|
||
def get_published_preprints(self, institution, yearmonth): | ||
queryset = Preprint.objects.can_view().filter( | ||
affiliated_institutions=institution, | ||
created__lte=yearmonth.next_month() | ||
).exclude( | ||
spam_status=SpamStatus.SPAM | ||
) | ||
|
||
return queryset | ||
|
||
def get_files(self, node_queryset, preprint_queryset, is_public=None): | ||
public_kwargs = {} | ||
if is_public: | ||
public_kwargs = {'is_public': is_public} | ||
|
||
target_node_q = Q( | ||
target_object_id__in=node_queryset.filter(**public_kwargs).values('pk'), | ||
target_content_type=ContentType.objects.get_for_model(AbstractNode), | ||
) | ||
target_preprint_q = Q( | ||
target_object_id__in=preprint_queryset.values('pk'), | ||
target_content_type=ContentType.objects.get_for_model(Preprint), | ||
) | ||
return OsfStorageFile.objects.filter( | ||
deleted__isnull=True, purged__isnull=True | ||
).filter(target_node_q | target_preprint_q) | ||
|
||
def get_storage_size(self, node_queryset, preprint_queryset): | ||
files = self.get_files(node_queryset, preprint_queryset) | ||
return FileVersion.objects.filter( | ||
size__gt=0, | ||
purged__isnull=True, | ||
basefilenode__in=files | ||
).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes'] | ||
|
||
def get_monthly_logged_in_user_count(self, institution, yearmonth): | ||
return institution.get_institution_users().filter( | ||
date_last_login__gte=yearmonth.target_month(), | ||
date_last_login__lt=yearmonth.next_month() | ||
).count() | ||
|
||
def get_monthly_active_user_count(self, institution, yearmonth): | ||
institution_users = institution.get_institution_users().filter( | ||
date_disabled__isnull=True | ||
) | ||
|
||
active_users = institution_users.filter( | ||
Q( | ||
logs__created__gte=yearmonth.target_month(), | ||
logs__created__lt=yearmonth.next_month() | ||
) | | ||
Q( | ||
preprint_logs__created__gte=yearmonth.target_month(), | ||
preprint_logs__created__lt=yearmonth.next_month() | ||
) | ||
).distinct() | ||
|
||
return active_users.count() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import datetime | ||
from django.test import TestCase | ||
from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter | ||
from osf.metrics.utils import YearMonth | ||
from osf_tests.factories import ( | ||
InstitutionFactory, | ||
ProjectFactory, | ||
RegistrationFactory, | ||
PreprintFactory, | ||
AuthUserFactory, | ||
) | ||
|
||
|
||
class TestInstiSummaryMonthlyReporter(TestCase): | ||
|
||
@classmethod | ||
def setUpTestData(cls): | ||
cls._yearmonth = YearMonth(2018, 2) # February 2018 | ||
cls._institution = InstitutionFactory() | ||
cls._now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC) | ||
|
||
# Existing data for the primary institution | ||
cls._public_project = cls._create_affiliated_project(cls._institution, is_public=True, created=cls._now) | ||
cls._private_project = cls._create_affiliated_project(cls._institution, is_public=False, created=cls._now) | ||
cls._public_registration = cls._create_affiliated_registration(cls._institution, is_public=True, created=cls._now) | ||
cls._embargoed_registration = cls._create_affiliated_registration(cls._institution, is_public=False, created=cls._now) | ||
|
||
cls._published_preprint = cls._create_affiliated_preprint(cls._institution, is_public=True, created=cls._now) | ||
|
||
cls._logged_in_user = cls._create_logged_in_user(cls._institution, date_last_login=cls._now) | ||
cls._active_user = cls._create_active_user(cls._institution, date_confirmed=cls._now - datetime.timedelta(days=1)) | ||
|
||
@classmethod | ||
def _create_affiliated_preprint(cls, institution, is_public, created): | ||
published_preprint = PreprintFactory(is_public=is_public) | ||
published_preprint.affiliated_institutions.add(institution) | ||
published_preprint.created = created | ||
published_preprint.save() | ||
return published_preprint | ||
|
||
@classmethod | ||
def _create_affiliated_project(cls, institution, is_public, created): | ||
project = ProjectFactory(is_public=is_public) | ||
project.affiliated_institutions.add(institution) | ||
project.created = created | ||
project.save() | ||
return project | ||
|
||
@classmethod | ||
def _create_affiliated_registration(cls, institution, is_public, created): | ||
registration = RegistrationFactory(is_public=is_public) | ||
registration.affiliated_institutions.add(institution) | ||
registration.created = created | ||
registration.save() | ||
return registration | ||
|
||
@classmethod | ||
def _create_logged_in_user(cls, institution, date_last_login): | ||
user = AuthUserFactory() | ||
user.add_or_update_affiliated_institution(institution) | ||
user.date_last_login = date_last_login | ||
user.save() | ||
return user | ||
|
||
@classmethod | ||
def _create_active_user(cls, institution, date_confirmed): | ||
user = AuthUserFactory() | ||
user.add_or_update_affiliated_institution(institution) | ||
user.date_confirmed = date_confirmed | ||
ProjectFactory(creator=user) # adds log to make active | ||
log = user.logs.get() | ||
log.created = date_confirmed | ||
log.save() | ||
user.save() | ||
return user | ||
|
||
def test_report_generation(self): | ||
reporter = InstitutionalSummaryMonthlyReporter() | ||
reports = list(reporter.report(self._yearmonth)) | ||
self.assertEqual(len(reports), 1) | ||
|
||
report = reports[0] | ||
self.assertEqual(report.institution_id, self._institution._id) | ||
self.assertEqual(report.user_count, 2) # _logged_in_user and _active_user | ||
self.assertEqual(report.public_project_count, 1) | ||
self.assertEqual(report.private_project_count, 1) | ||
self.assertEqual(report.public_registration_count, 1) | ||
self.assertEqual(report.embargoed_registration_count, 1) | ||
self.assertEqual(report.published_preprint_count, 1) | ||
self.assertEqual(report.storage_byte_count, 1337) # test value for one file | ||
self.assertEqual(report.public_file_count, 1) | ||
self.assertEqual(report.monthly_logged_in_user_count, 1) | ||
self.assertEqual(report.monthly_active_user_count, 1) | ||
|
||
def test_report_generation_multiple_institutions(self): | ||
institution2 = InstitutionFactory() | ||
institution3 = InstitutionFactory() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: why create There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just making sure it can handle an empty one, |
||
|
||
# Set up dates for different months | ||
now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this is already at |
||
last_month = datetime.datetime(2018, 1, 15, tzinfo=datetime.UTC) | ||
next_month = datetime.datetime(2018, 3, 10, tzinfo=datetime.UTC) | ||
|
||
self._create_affiliated_project(institution2, is_public=True, created=now) | ||
self._create_affiliated_project(institution3, is_public=True, created=last_month) | ||
|
||
# Create future projects for self._institution (should not be counted) | ||
self._create_affiliated_project(self._institution, is_public=True, created=next_month) | ||
|
||
# Create users affiliated with different institutions | ||
self._create_active_user(institution2, date_confirmed=now) | ||
self._create_active_user(institution3, date_confirmed=last_month) | ||
|
||
# Run the reporter for the current month (February 2018) | ||
reporter = InstitutionalSummaryMonthlyReporter() | ||
reports = list(reporter.report(self._yearmonth)) | ||
self.assertEqual(len(reports), 3) # Reports for self._institution, institution2, institution3 | ||
|
||
# Extract reports by institution | ||
report_institution = next(r for r in reports if r.institution_id == self._institution._id) | ||
report_institution2 = next(r for r in reports if r.institution_id == institution2._id) | ||
|
||
# Validate report for self._institution | ||
self.assertEqual(report_institution.public_project_count, 1) | ||
self.assertEqual(report_institution.private_project_count, 1) | ||
self.assertEqual(report_institution.user_count, 2) | ||
self.assertEqual(report_institution.monthly_active_user_count, 1) | ||
self.assertEqual(report_institution.monthly_logged_in_user_count, 1) | ||
|
||
# Validate report for institution2 | ||
self.assertEqual(report_institution2.public_project_count, 1) | ||
self.assertEqual(report_institution2.private_project_count, 0) | ||
self.assertEqual(report_institution2.user_count, 1) | ||
self.assertEqual(report_institution2.monthly_active_user_count, 1) | ||
self.assertEqual(report_institution2.monthly_logged_in_user_count, 0) # No logged-in users | ||
Comment on lines
+132
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. having no counts greater than 1 doesn't test counting logic very well -- when i suggested "more than one test case" i meant to imply a small variety of situations that yield different results, not roughly the same situation a second time (tho don't get me wrong, "multiple institutions" is a good step in that direction) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would ideally have more than one test case