Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-6165] monthly institutional-users metrics report #10722

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .storage_addon_usage import StorageAddonUsageReporter
from .download_count import DownloadCountReporter
from .institution_summary import InstitutionSummaryReporter
from .institutional_users import InstitutionalUsersReporter
from .new_user_domain import NewUserDomainReporter
from .node_count import NodeCountReporter
from .osfstorage_file_count import OsfstorageFileCountReporter
Expand All @@ -26,3 +27,4 @@ class AllDailyReporters(enum.Enum):

class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
INSTITUTIONAL_USERS = InstitutionalUsersReporter
9 changes: 7 additions & 2 deletions osf/metrics/reporters/_base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
from collections import abc
import logging

from osf.metrics.reports import MonthlyReport
from osf.metrics.utils import YearMonth


logger = logging.getLogger(__name__)


class MonthlyReporter:
def report(self, report_yearmonth: YearMonth):
def report(
self,
report_yearmonth: YearMonth,
) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]:
"""build a report for the given month
"""
raise NotImplementedError(f'{self.__name__} must implement `report`')

def run_and_record_for_month(self, report_yearmonth: YearMonth):
def run_and_record_for_month(self, report_yearmonth: YearMonth) -> None:
reports = self.report(report_yearmonth)
for report in reports:
assert report.report_yearmonth == str(report_yearmonth)
Expand Down
141 changes: 141 additions & 0 deletions osf/metrics/reporters/institutional_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import dataclasses
import datetime

from django.contrib.contenttypes.models import ContentType
from django.db.models import Q, F, Sum

from osf import models as osfdb
from osf.models.spam import SpamStatus
from addons.osfstorage.models import OsfStorageFile
from osf.metrics.reports import InstitutionalUserReport
from osf.metrics.utils import YearMonth
from ._base import MonthlyReporter


_CHUNK_SIZE = 500


class InstitutionalUsersReporter(MonthlyReporter):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: docstring here could how this report can be used, maybe warn people changes could effect dashboard page.

'''build an InstitutionalUserReport for each institution-user affiliation

built for the institution dashboard at ://osf.example/institutions/<id>/dashboard/,
which offers institutional admins insight into how people at their institution are
using osf, based on their explicitly-affiliated osf objects
'''
def report(self, yearmonth: YearMonth):
_before_datetime = yearmonth.next_month()
for _institution in osfdb.Institution.objects.filter(created__lt=_before_datetime):
_user_qs = _institution.get_institution_users().filter(created__lt=_before_datetime)
for _user in _user_qs.iterator(chunk_size=_CHUNK_SIZE):
_helper = _InstiUserReportHelper(_institution, _user, yearmonth, _before_datetime)
yield _helper.report


# helper
@dataclasses.dataclass
class _InstiUserReportHelper:
institution: osfdb.Institution
user: osfdb.OSFUser
yearmonth: YearMonth
before_datetime: datetime.datetime
report: InstitutionalUserReport = dataclasses.field(init=False)

def __post_init__(self):
_affiliation = self.user.get_institution_affiliation(self.institution._id)
self.report = InstitutionalUserReport(
report_yearmonth=self.yearmonth,
institution_id=self.institution._id,
user_id=self.user._id,
user_name=self.user.fullname,
department_name=(_affiliation.sso_department or None),
month_last_login=YearMonth.from_date(self.user.date_last_login),
account_creation_date=YearMonth.from_date(self.user.created),
orcid_id=self.user.get_verified_external_id('ORCID', verified_only=True),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: No unverified ORCID's ? I think that's a correct choice. Good to remember in case individual users see they aren't counted and complain, more of a Product concern, but good to agree on it with Product.

public_project_count=self._public_project_queryset().count(),
private_project_count=self._private_project_queryset().count(),
public_registration_count=self._public_registration_queryset().count(),
embargoed_registration_count=self._embargoed_registration_queryset().count(),
public_file_count=self._public_osfstorage_file_queryset().count(),
published_preprint_count=self._published_preprint_queryset().count(),
storage_byte_count=self._storage_byte_count(),
)

def _node_queryset(self):
_institution_node_qs = self.institution.nodes.filter(
created__lt=self.before_datetime,
is_deleted=False,
).exclude(spam_status=SpamStatus.SPAM)
return osfdb.Node.objects.get_nodes_for_user(
user=self.user,
base_queryset=_institution_node_qs,
)

def _public_project_queryset(self):
return self._node_queryset().filter(
type='osf.node', # `type` field from TypedModel
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Good comments for each function, but it would be nice to have a docstring on the class listing these out.

is_public=True,
root_id=F('pk'), # only root nodes
)

def _private_project_queryset(self):
return self._node_queryset().filter(
type='osf.node', # `type` field from TypedModel
is_public=False,
root_id=F('pk'), # only root nodes
)

def _public_registration_queryset(self):
return self._node_queryset().filter(
type='osf.registration', # `type` field from TypedModel
is_public=True,
root_id=F('pk'), # only root nodes
)

def _embargoed_registration_queryset(self):
return self._node_queryset().filter(
type='osf.registration', # `type` field from TypedModel
is_public=False,
root_id=F('pk'), # only root nodes
embargo__end_date__gte=self.before_datetime,
)

def _published_preprint_queryset(self):
if not hasattr(osfdb.Preprint, 'affiliated_institutions'):
return osfdb.Preprint.objects.none() # HACK: preprints affiliation project still in-progress
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: if I was going to be a real stickler I would say this should match the creator affiliation numbers to match current SHARE, but that's really minor and probably won't confuse anyone. NBD

return (
osfdb.Preprint.objects.can_view() # published/publicly-viewable
.filter(
affiliated_institutions=self.institution,
_contributors=self.user,
date_published__lt=self.before_datetime,
)
.exclude(spam_status=SpamStatus.SPAM)
)

def _public_osfstorage_file_queryset(self):
_target_node_q = Q(
# any public project, registration, project component, or registration component
target_object_id__in=self._node_queryset().filter(is_public=True).values('pk'),
target_content_type=ContentType.objects.get_for_model(osfdb.AbstractNode),
)
_target_preprint_q = Q(
target_object_id__in=self._published_preprint_queryset().values('pk'),
target_content_type=ContentType.objects.get_for_model(osfdb.Preprint),
)
return (
OsfStorageFile.objects
.filter(
created__lt=self.before_datetime,
deleted__isnull=True,
purged__isnull=True,
)
.filter(_target_node_q | _target_preprint_q)
)

def _storage_byte_count(self):
return osfdb.FileVersion.objects.filter(
size__gt=0,
created__lt=self.before_datetime,
purged__isnull=True,
basefilenode__in=self._public_osfstorage_file_queryset(),
).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes']
20 changes: 20 additions & 0 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,23 @@ class SpamSummaryReport(MonthlyReport):
preprint_flagged = metrics.Integer()
user_marked_as_spam = metrics.Integer()
user_marked_as_ham = metrics.Integer()


class InstitutionalUserReport(MonthlyReport):
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',)
institution_id = metrics.Keyword()
# user info:
user_id = metrics.Keyword()
user_name = metrics.Text()
department_name = metrics.Text()
month_last_login = YearmonthField()
account_creation_date = YearmonthField()
orcid_id = metrics.Keyword()
# counts:
public_project_count = metrics.Integer()
private_project_count = metrics.Integer()
public_registration_count = metrics.Integer()
embargoed_registration_count = metrics.Integer()
published_preprint_count = metrics.Integer()
public_file_count = metrics.Integer()
storage_byte_count = metrics.Integer()
Loading
Loading