diff --git a/api/base/elasticsearch_metrics_views.py b/api/base/elasticsearch_metrics_views.py new file mode 100644 index 00000000000..07b0e6ce5ae --- /dev/null +++ b/api/base/elasticsearch_metrics_views.py @@ -0,0 +1,32 @@ +from __future__ import annotations +import abc +import typing + +from rest_framework import generics + +from api.base.views import JSONAPIBaseView + +if typing.TYPE_CHECKING: + from elasticsearch_dsl import Search + + +class ElasticsearchMetricsListView(JSONAPIBaseView, generics.ListAPIView, abc.ABC): + '''use `elasticsearch_dsl.Search` as a queryset-analogue + ''' + @abc.abstractmethod + def get_search(self) -> Search: + ... + + ### + # beware! rest_framework shenanigans below + + # note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on, + # it should work fine with default pagination! + + # override rest_framework.generics.GenericAPIView + def get_queryset(self): + return self.get_search() + + # override rest_framework.generics.GenericAPIView + def filter_queryset(self, queryset): + return queryset # TODO: FilterMixin, i guess diff --git a/api/institutions/views.py b/api/institutions/views.py index 8287a615136..1a18c7cb9fa 100644 --- a/api/institutions/views.py +++ b/api/institutions/views.py @@ -12,10 +12,12 @@ from osf.metrics import InstitutionProjectCounts from osf.models import OSFUser, Node, Institution, Registration from osf.metrics import UserInstitutionProjectCounts +from osf.metrics.reports import InstitutionalUserReport from osf.utils import permissions as osf_permissions from api.base import permissions as base_permissions -from api.base.filters import ListFilterMixin, FilterMixin +from api.base.elasticsearch_metrics_views import ElasticsearchMetricsListView +from api.base.filters import ListFilterMixin from api.base.views import JSONAPIBaseView from api.base.serializers import JSONAPISerializer from api.base.utils import get_object_or_error, get_user_auth @@ -528,7 +530,7 @@ def get_default_queryset(self): return self._make_elasticsearch_results_filterable(search, id=institution._id, department=DEFAULT_ES_NULL_VALUE) -class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseView): +class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchMetricsListView): permission_classes = ( drf_permissions.IsAuthenticatedOrReadOnly, base_permissions.TokenHasScope, @@ -543,6 +545,17 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseV serializer_class = NewInstitutionUserMetricsSerializer + def get_search(self): + _yearmonth = InstitutionalUserReport.most_recent_yearmonth() + if _yearmonth is None: + return [] + _search = ( + InstitutionalUserReport.search() + .filter('term', report_yearmonth=str(_yearmonth)) + .filter('term', institution_id=self.get_institution()._id) + ) + return _search + institution_user_metrics_list_view = view_toggled_by_feature_flag( flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024, diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py index 22e0066b3d2..2d2a0178f4e 100644 --- a/api_tests/institutions/views/test_institution_user_metric_list.py +++ b/api_tests/institutions/views/test_institution_user_metric_list.py @@ -14,10 +14,11 @@ ) from osf.metrics import UserInstitutionProjectCounts +from osf.metrics.reports import InstitutionalUserReport @pytest.mark.es @pytest.mark.django_db -class TestInstitutionUserMetricList: +class TestOldInstitutionUserMetricList: @pytest.fixture(autouse=True) def _waffled(self): @@ -262,3 +263,98 @@ def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, popul assert data[0]['attributes']['department'] == 'Biology dept' assert data[1]['attributes']['department'] == 'N/A' assert data[2]['attributes']['department'] == 'Psychology dept' + + +@pytest.mark.django_db +class TestNewInstitutionUserMetricList: + @pytest.fixture(autouse=True) + def _waffled(self): + with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=True): + yield # these tests apply only after institution dashboard improvements + + @pytest.fixture() + def institution(self): + return InstitutionFactory() + + @pytest.fixture() + def institutional_users(self, institution): + _users = [ + AuthUserFactory(fullname='alarg'), + AuthUserFactory(fullname='blarg'), + AuthUserFactory(fullname='clarg'), + AuthUserFactory(fullname='dlarg'), + ] + for _user in _users: + _user.add_or_update_affiliated_institution(institution) + return _users + + @pytest.fixture() + def rando(self): + return AuthUserFactory() + + @pytest.fixture() + def institutional_admin(self, institution): + _admin_user = AuthUserFactory() + institution.get_group('institutional_admins').user_set.add(_admin_user) + return _admin_user + + @pytest.fixture() + def reports(self, institution, institutional_users): + return [ + _report_factory('2024-08', institution, _user) + for _user in institutional_users + ] + + @pytest.fixture() + def url(self, institution): + return f'/{API_BASE}institutions/{institution._id}/metrics/users/' + + def test_anon(self, app, url): + _resp = app.get(url, expect_errors=True) + assert _resp.status_code == 401 + + def test_rando(self, app, url, rando): + _resp = app.get(url, auth=rando.auth, expect_errors=True) + assert _resp.status_code == 403 + + @pytest.mark.es + def test_get_empty(self, app, url, institutional_admin): + _resp = app.get(url, auth=institutional_admin.auth) + assert _resp.status_code == 200 + assert _resp.json['data'] == [] + + @pytest.mark.es + def test_get_reports(self, app, url, institutional_admin, institutional_users, reports): + _resp = app.get(url, auth=institutional_admin.auth) + assert _resp.status_code == 200 + _data = _resp.json['data'] + assert len(_data) == len(reports) + _expected_user_ids = {_report.user_id for _report in reports} + _actual_user_ids = {_datum['relationships']['user']['data']['id'] for _datum in _data} + assert _actual_user_ids == _expected_user_ids + + +def _report_factory(yearmonth, institution, user, **kwargs): + _report_kwargs = { + 'department_name': 'mydep', + 'month_last_login': '2024-08', + 'account_creation_date': '2024-08', + 'orcid_id': None, + 'public_project_count': 1, + 'private_project_count': 1, + 'public_registration_count': 1, + 'embargoed_registration_count': 1, + 'published_preprint_count': 1, + 'public_file_count': 1, + 'storage_byte_count': 1, + **kwargs, + } + _report = InstitutionalUserReport( + report_yearmonth=yearmonth, + institution_id=institution._id, + user_id=user._id, + user_name=user.fullname, + **_report_kwargs, + ) + _report.save(refresh=True) + return _report diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 7d37b1998c0..bc0df2aeaad 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -21,7 +21,7 @@ class DailyReport(metrics.Metric): There's something we'd like to know about every so often, so let's regularly run a report and stash the results here. """ - UNIQUE_TOGETHER_FIELDS = ('report_date',) # override in subclasses for multiple reports per day + UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_date',) # override in subclasses for multiple reports per day report_date = metrics.Date(format='strict_date', required=True) @@ -46,6 +46,10 @@ def deserialize(self, data): return YearMonth.from_str(data) elif isinstance(data, (datetime.datetime, datetime.date)): return YearMonth.from_date(data) + elif isinstance(data, int): + # elasticsearch stores dates in milliseconds since the unix epoch + _as_datetime = datetime.datetime.fromtimestamp(data // 1000) + return YearMonth.from_date(_as_datetime) elif data is None: return None else: @@ -67,7 +71,7 @@ def serialize(self, data): class MonthlyReport(metrics.Metric): """MonthlyReport (abstract base for report-based metrics that run monthly) """ - UNIQUE_TOGETHER_FIELDS = ('report_yearmonth',) # override in subclasses for multiple reports per month + UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_yearmonth',) # override in subclasses for multiple reports per month report_yearmonth = YearmonthField(required=True) @@ -76,6 +80,23 @@ class Meta: dynamic = metrics.MetaField('strict') source = metrics.MetaField(enabled=True) + @classmethod + def most_recent_yearmonth(cls, base_search=None) -> YearMonth | None: + _search = base_search or cls.search() + _search = _search.update_from_dict({'size': 0}) # omit hits + _search.aggs.bucket( + 'agg_most_recent_yearmonth', + 'terms', + field='report_yearmonth', + order={'_key': 'desc'}, + size=1, + ) + _response = _search.execute() + if not _response.aggregations: + return None + (_bucket,) = _response.aggregations.agg_most_recent_yearmonth.buckets + return _bucket.key + def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})' diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 0d402ad56fb..e52e0c08c45 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -1,3 +1,4 @@ +from __future__ import annotations import dataclasses import re import datetime @@ -27,12 +28,12 @@ class YearMonth: YEARMONTH_RE: ClassVar[re.Pattern] = re.compile(r'(?P\d{4})-(?P\d{2})') @classmethod - def from_date(cls, date): + def from_date(cls, date: datetime.date) -> YearMonth: assert isinstance(date, (datetime.datetime, datetime.date)) return cls(date.year, date.month) @classmethod - def from_str(cls, input_str): + def from_str(cls, input_str: str) -> YearMonth: match = cls.YEARMONTH_RE.fullmatch(input_str) if match: return cls(