From 72bc0d6396a200770b989b283d7bc814f8a56a26 Mon Sep 17 00:00:00 2001
From: John Tordoff <>
Date: Tue, 5 Nov 2024 15:13:11 -0500
Subject: [PATCH] support both raw json and view-based json directly

---
 api/base/elasticsearch_dsl_views.py           |  18 ++-
 api/institutions/views.py                     |   9 +-
 api/metrics/renderers.py                      |  32 ++++
 .../test_institution_user_metric_list.py      | 150 ++++++++++++++++++
 4 files changed, 205 insertions(+), 4 deletions(-)

diff --git a/api/base/elasticsearch_dsl_views.py b/api/base/elasticsearch_dsl_views.py
index 57f156abd392..bb094a5e5f1e 100644
--- a/api/base/elasticsearch_dsl_views.py
+++ b/api/base/elasticsearch_dsl_views.py
@@ -14,6 +14,8 @@
 from api.metrics.renderers import (
     MetricsReportsCsvRenderer,
     MetricsReportsTsvRenderer,
+    MetricsReportsJsonRenderer,
+    MetricsReportsJsonDirectDownloadRenderer
 )
 from api.base.pagination import ElasticsearchListViewPagination, JSONAPIPagination
 
@@ -47,6 +49,8 @@ def get_default_search(self) -> edsl.Search | None:
     FILE_RENDERER_CLASSES = {
         MetricsReportsCsvRenderer,
         MetricsReportsTsvRenderer,
+        MetricsReportsJsonRenderer,
+        MetricsReportsJsonDirectDownloadRenderer,
     }
     DEFAULT_OPERATOR_OVERRIDES = {}
     # (if you want to add fulltext-search or range-filter support, remove the override
@@ -63,10 +67,18 @@ def get_default_search(self) -> edsl.Search | None:
     # override rest_framework.generics.GenericAPIView
     @property
     def pagination_class(self):
-        if any(self.request.accepted_renderer.format == renderer.format for renderer in self.FILE_RENDERER_CLASSES):
+        """
+        When downloading a file assume no pagination is necessary unless the user specifies
+        """
+        is_file_download = any(
+            self.request.accepted_renderer.format == renderer.format
+            for renderer in self.FILE_RENDERER_CLASSES
+        )
+        page_size_param = getattr(super().pagination_class, 'page_size_query_param', 'page[size]')
+
+        if is_file_download and not self.request.query_params.get(page_size_param):
             return ElasticsearchListViewPagination
-        else:
-            return JSONAPIPagination
+        return JSONAPIPagination
 
     def get_queryset(self):
         _search = self.get_default_search()
diff --git a/api/institutions/views.py b/api/institutions/views.py
index 2cfd6d2fa87d..849055c51646 100644
--- a/api/institutions/views.py
+++ b/api/institutions/views.py
@@ -35,7 +35,12 @@
 )
 from api.base.settings import DEFAULT_ES_NULL_VALUE
 from api.metrics.permissions import IsInstitutionalMetricsUser
-from api.metrics.renderers import MetricsReportsCsvRenderer, MetricsReportsTsvRenderer
+from api.metrics.renderers import (
+    MetricsReportsCsvRenderer,
+    MetricsReportsTsvRenderer,
+    MetricsReportsJsonRenderer,
+    MetricsReportsJsonDirectDownloadRenderer
+)
 from api.nodes.serializers import NodeSerializer
 from api.nodes.filters import NodesFilterMixin
 from api.users.serializers import UserSerializer
@@ -559,6 +564,8 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchListView):
         *api_settings.DEFAULT_RENDERER_CLASSES,
         MetricsReportsCsvRenderer,
         MetricsReportsTsvRenderer,
+        MetricsReportsJsonRenderer,
+        MetricsReportsJsonDirectDownloadRenderer
     )
 
     serializer_class = NewInstitutionUserMetricsSerializer
diff --git a/api/metrics/renderers.py b/api/metrics/renderers.py
index ae55360b2c8f..b5843ebb2459 100644
--- a/api/metrics/renderers.py
+++ b/api/metrics/renderers.py
@@ -1,5 +1,6 @@
 import io
 import csv
+import json
 import datetime
 from api.base.settings.defaults import REPORT_FILENAME_FORMAT
 
@@ -95,3 +96,34 @@ class MetricsReportsCsvRenderer(MetricsReportsRenderer):
     extension = 'csv'
     media_type = 'text/csv'
     CSV_DIALECT = csv.excel
+
+
+class MetricsReportsJsonRenderer(MetricsReportsRenderer):
+    """
+    Just the the basic report without nested user details.
+    """
+    format = 'json_report'
+    extension = 'json'
+    media_type = 'application/json'
+
+    def render(self, json_response, accepted_media_type=None, renderer_context=None):
+        response = renderer_context['response']
+        filename = self.get_filename(renderer_context, self.extension)
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+        return json.dumps([item['attributes'] for item in json_response['data']])
+
+
+class MetricsReportsJsonDirectDownloadRenderer(MetricsReportsRenderer):
+    """
+    The whole raw report with pagination and filtering/sorts
+    """
+    format = 'direct_download'
+    extension = 'json'
+    media_type = 'application/json'
+
+    def render(self, data, accepted_media_type=None, renderer_context=None):
+        response = renderer_context['response']
+        filename = self.get_filename(renderer_context, self.extension)
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+        return json.dumps(data)
+
diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py
index 0960acc2f98b..1c114fd02753 100644
--- a/api_tests/institutions/views/test_institution_user_metric_list.py
+++ b/api_tests/institutions/views/test_institution_user_metric_list.py
@@ -1,4 +1,5 @@
 import csv
+import json
 import datetime
 from io import StringIO
 from random import random
@@ -569,6 +570,155 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
                 # Sort both expected and actual rows (ignoring the header) before comparison
                 assert sorted(response_rows[1:]) == sorted(expected_data)
 
+    def test_get_report_format_table_json(self, app, url, institutional_admin, institution):
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_orcomma',
+            account_creation_date='2018-02',
+            user_name='Brian Dawkins',
+            orcid_id='4444-3333-2222-1111',
+            department_name='Safety "The Wolverine" Weapon X',
+            storage_byte_count=736662999298,
+            embargoed_registration_count=1,
+            published_preprint_count=1,
+            public_registration_count=2,
+            public_project_count=3,
+            public_file_count=4,
+            private_project_count=5,
+            month_last_active='2018-02',
+            month_last_login='2018-02',
+        )
+
+        resp = app.get(f'{url}?format=json_report', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == 'application/json; charset=utf-8'
+
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+        expected_filename = REPORT_FILENAME_FORMAT.format(
+            view_name='institution-user-metrics',
+            date_created=current_date,
+            format_type='json'
+        )
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+
+        # Validate JSON structure and content
+        response_data = json.loads(resp.body)
+        expected_data = [
+            {
+                'account_creation_date': '2018-02',
+                'department': 'Safety "The Wolverine" Weapon X',
+                'embargoed_registration_count': 1,
+                'month_last_active': '2018-02',
+                'month_last_login': '2018-02',
+                'orcid_id': '4444-3333-2222-1111',
+                'private_projects': 5,
+                'public_file_count': 4,
+                'public_projects': 3,
+                'public_registration_count': 2,
+                'published_preprint_count': 1,
+                'storage_byte_count': 736662999298,
+                'user_name': 'Brian Dawkins'
+            }
+        ]
+        assert response_data == expected_data
+
+    def test_get_report_format_direct_json(self, app, url, institutional_admin, institution):
+        # Create multiple reports
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_orcomma',
+            account_creation_date='2018-02',
+            user_name='Brian Dawkins',
+            orcid_id='4444-3333-2222-1111',
+            department_name='Safety "The Wolverine" Weapon X',
+            storage_byte_count=736662999298,
+            embargoed_registration_count=1,
+            published_preprint_count=1,
+            public_registration_count=2,
+            public_project_count=3,
+            public_file_count=4,
+            private_project_count=5,
+            month_last_active='2018-02',
+            month_last_login='2018-02',
+        )
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_second',
+            account_creation_date='2018-03',
+            user_name='Randall Cunningham',
+            orcid_id='3333-2222-1111-0000',
+            department_name='Department of Athletics',
+            storage_byte_count=500000000,
+            embargoed_registration_count=0,
+            published_preprint_count=0,
+            public_registration_count=1,
+            public_project_count=2,
+            public_file_count=1,
+            private_project_count=1,
+            month_last_active='2018-03',
+            month_last_login='2018-03',
+        )
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_third',
+            account_creation_date='2018-04',
+            user_name='Reggie White',
+            orcid_id='2222-1111-0000-5555',
+            department_name='Minister of Defense',
+            storage_byte_count=1000000000,
+            embargoed_registration_count=2,
+            published_preprint_count=2,
+            public_registration_count=3,
+            public_project_count=4,
+            public_file_count=5,
+            private_project_count=6,
+            month_last_active='2018-04',
+            month_last_login='2018-04',
+        )
+
+        # Request with `page[size]=2` to verify pagination
+        resp = app.get(f'{url}?format=direct_download&page[size]=2', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == 'application/json; charset=utf-8'
+
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+        expected_filename = REPORT_FILENAME_FORMAT.format(
+            view_name='institution-user-metrics',
+            date_created=current_date,
+            format_type='json'
+        )
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+        assert len(resp.json['data']) == 2  # Expect 2 items only
+
+        # Check attributes and relationships for one of the entries
+        entry = resp.json['data'][0]
+        assert 'attributes' in entry
+        assert entry['attributes']['user_name'] in {'Brian Dawkins', 'Randall Cunningham', 'Reggie White'}
+        assert 'relationships' in entry
+        assert 'user' in entry['relationships']
+        assert 'institution' in entry['relationships']
+        assert 'related' in entry['relationships']['user']['links']
+        assert entry['relationships']['user']['links']['related']['href'].startswith('http://localhost:8000/v2/users/')
+        assert entry['relationships']['institution']['data']['id'] == institution._id
+
+        # Request without `page[size]` to verify all reports are returned
+        resp = app.get(f'{url}?format=direct_download', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == 'application/json; charset=utf-8'
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+
+        # Validate that the response includes all 3 entries since `page[size]` is not set
+        assert len(resp.json['data']) == 3  # Expect all 3 items
+
+        # Further validation on user names
+        assert {entry['attributes']['user_name'] for entry in resp.json['data']} == {
+            'Brian Dawkins', 'Randall Cunningham', 'Reggie White'
+        }
+
 
 def _user_ids(api_response):
     for _datum in api_response.json['data']: