Skip to content

Commit

Permalink
add "new" institution-user metrics view
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Sep 9, 2024
1 parent a89f5bc commit 33b1e80
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 7 deletions.
32 changes: 32 additions & 0 deletions api/base/elasticsearch_metrics_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations
import abc
import typing

from rest_framework import generics

from api.base.views import JSONAPIBaseView

if typing.TYPE_CHECKING:
from elasticsearch_dsl import Search


class ElasticsearchMetricsListView(JSONAPIBaseView, generics.ListAPIView, abc.ABC):
'''use `elasticsearch_dsl.Search` as a queryset-analogue
'''
@abc.abstractmethod
def get_search(self) -> Search:
...

###
# beware! rest_framework shenanigans below

# note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on,
# it should work fine with default pagination!

# override rest_framework.generics.GenericAPIView
def get_queryset(self):
return self.get_search()

# override rest_framework.generics.GenericAPIView
def filter_queryset(self, queryset):
return queryset # TODO: FilterMixin, i guess
17 changes: 15 additions & 2 deletions api/institutions/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
from osf.metrics import InstitutionProjectCounts
from osf.models import OSFUser, Node, Institution, Registration
from osf.metrics import UserInstitutionProjectCounts
from osf.metrics.reports import InstitutionalUserReport
from osf.utils import permissions as osf_permissions

from api.base import permissions as base_permissions
from api.base.filters import ListFilterMixin, FilterMixin
from api.base.elasticsearch_metrics_views import ElasticsearchMetricsListView
from api.base.filters import ListFilterMixin
from api.base.views import JSONAPIBaseView
from api.base.serializers import JSONAPISerializer
from api.base.utils import get_object_or_error, get_user_auth
Expand Down Expand Up @@ -528,7 +530,7 @@ def get_default_queryset(self):
return self._make_elasticsearch_results_filterable(search, id=institution._id, department=DEFAULT_ES_NULL_VALUE)


class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseView):
class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchMetricsListView):
permission_classes = (
drf_permissions.IsAuthenticatedOrReadOnly,
base_permissions.TokenHasScope,
Expand All @@ -543,6 +545,17 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseV

serializer_class = NewInstitutionUserMetricsSerializer

def get_search(self):
_yearmonth = InstitutionalUserReport.most_recent_yearmonth()
if _yearmonth is None:
return []
_search = (
InstitutionalUserReport.search()
.filter('term', report_yearmonth=str(_yearmonth))
.filter('term', institution_id=self.get_institution()._id)
)
return _search


institution_user_metrics_list_view = view_toggled_by_feature_flag(
flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
)

from osf.metrics import UserInstitutionProjectCounts
from osf.metrics.reports import InstitutionalUserReport

@pytest.mark.es
@pytest.mark.django_db
class TestInstitutionUserMetricList:
class TestOldInstitutionUserMetricList:

@pytest.fixture(autouse=True)
def _waffled(self):
Expand Down Expand Up @@ -262,3 +263,98 @@ def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, popul
assert data[0]['attributes']['department'] == 'Biology dept'
assert data[1]['attributes']['department'] == 'N/A'
assert data[2]['attributes']['department'] == 'Psychology dept'


@pytest.mark.django_db
class TestNewInstitutionUserMetricList:
@pytest.fixture(autouse=True)
def _waffled(self):
with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=True):
yield # these tests apply only after institution dashboard improvements

@pytest.fixture()
def institution(self):
return InstitutionFactory()

@pytest.fixture()
def institutional_users(self, institution):
_users = [
AuthUserFactory(fullname='alarg'),
AuthUserFactory(fullname='blarg'),
AuthUserFactory(fullname='clarg'),
AuthUserFactory(fullname='dlarg'),
]
for _user in _users:
_user.add_or_update_affiliated_institution(institution)
return _users

@pytest.fixture()
def rando(self):
return AuthUserFactory()

@pytest.fixture()
def institutional_admin(self, institution):
_admin_user = AuthUserFactory()
institution.get_group('institutional_admins').user_set.add(_admin_user)
return _admin_user

@pytest.fixture()
def reports(self, institution, institutional_users):
return [
_report_factory('2024-08', institution, _user)
for _user in institutional_users
]

@pytest.fixture()
def url(self, institution):
return f'/{API_BASE}institutions/{institution._id}/metrics/users/'

def test_anon(self, app, url):
_resp = app.get(url, expect_errors=True)
assert _resp.status_code == 401

def test_rando(self, app, url, rando):
_resp = app.get(url, auth=rando.auth, expect_errors=True)
assert _resp.status_code == 403

@pytest.mark.es
def test_get_empty(self, app, url, institutional_admin):
_resp = app.get(url, auth=institutional_admin.auth)
assert _resp.status_code == 200
assert _resp.json['data'] == []

@pytest.mark.es
def test_get_reports(self, app, url, institutional_admin, institutional_users, reports):
_resp = app.get(url, auth=institutional_admin.auth)
assert _resp.status_code == 200
_data = _resp.json['data']
assert len(_data) == len(reports)
_expected_user_ids = {_report.user_id for _report in reports}
_actual_user_ids = {_datum['relationships']['user']['data']['id'] for _datum in _data}
assert _actual_user_ids == _expected_user_ids


def _report_factory(yearmonth, institution, user, **kwargs):
_report_kwargs = {
'department_name': 'mydep',
'month_last_login': '2024-08',
'account_creation_date': '2024-08',
'orcid_id': None,
'public_project_count': 1,
'private_project_count': 1,
'public_registration_count': 1,
'embargoed_registration_count': 1,
'published_preprint_count': 1,
'public_file_count': 1,
'storage_byte_count': 1,
**kwargs,
}
_report = InstitutionalUserReport(
report_yearmonth=yearmonth,
institution_id=institution._id,
user_id=user._id,
user_name=user.fullname,
**_report_kwargs,
)
_report.save(refresh=True)
return _report
25 changes: 23 additions & 2 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DailyReport(metrics.Metric):
There's something we'd like to know about every so often,
so let's regularly run a report and stash the results here.
"""
UNIQUE_TOGETHER_FIELDS = ('report_date',) # override in subclasses for multiple reports per day
UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_date',) # override in subclasses for multiple reports per day

report_date = metrics.Date(format='strict_date', required=True)

Expand All @@ -46,6 +46,10 @@ def deserialize(self, data):
return YearMonth.from_str(data)
elif isinstance(data, (datetime.datetime, datetime.date)):
return YearMonth.from_date(data)
elif isinstance(data, int):
# elasticsearch stores dates in milliseconds since the unix epoch
_as_datetime = datetime.datetime.fromtimestamp(data // 1000)
return YearMonth.from_date(_as_datetime)
elif data is None:
return None
else:
Expand All @@ -67,7 +71,7 @@ def serialize(self, data):
class MonthlyReport(metrics.Metric):
"""MonthlyReport (abstract base for report-based metrics that run monthly)
"""
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth',) # override in subclasses for multiple reports per month
UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_yearmonth',) # override in subclasses for multiple reports per month

report_yearmonth = YearmonthField(required=True)

Expand All @@ -76,6 +80,23 @@ class Meta:
dynamic = metrics.MetaField('strict')
source = metrics.MetaField(enabled=True)

@classmethod
def most_recent_yearmonth(cls, base_search=None) -> YearMonth | None:
_search = base_search or cls.search()
_search = _search.update_from_dict({'size': 0}) # omit hits
_search.aggs.bucket(
'agg_most_recent_yearmonth',
'terms',
field='report_yearmonth',
order={'_key': 'desc'},
size=1,
)
_response = _search.execute()
if not _response.aggregations:
return None
(_bucket,) = _response.aggregations.agg_most_recent_yearmonth.buckets
return _bucket.key

def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'
Expand Down
5 changes: 3 additions & 2 deletions osf/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import annotations
import dataclasses
import re
import datetime
Expand Down Expand Up @@ -27,12 +28,12 @@ class YearMonth:
YEARMONTH_RE: ClassVar[re.Pattern] = re.compile(r'(?P<year>\d{4})-(?P<month>\d{2})')

@classmethod
def from_date(cls, date):
def from_date(cls, date: datetime.date) -> YearMonth:
assert isinstance(date, (datetime.datetime, datetime.date))
return cls(date.year, date.month)

@classmethod
def from_str(cls, input_str):
def from_str(cls, input_str: str) -> YearMonth:
match = cls.YEARMONTH_RE.fullmatch(input_str)
if match:
return cls(
Expand Down

0 comments on commit 33b1e80

Please sign in to comment.