diff --git a/api/metrics/renderers.py b/api/metrics/renderers.py index fc3196db7ad..e99e5705d10 100644 --- a/api/metrics/renderers.py +++ b/api/metrics/renderers.py @@ -67,7 +67,7 @@ def get_filename(self, renderer_context: dict, format_type: str) -> str: return USER_INSTITUTION_REPORT_FILENAME.format( date_created=current_date, institution_id=renderer_context['view'].kwargs['institution_id'], - format_type=format_type + format_type=format_type, ) else: raise NotImplementedError('Missing format filename') diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py index 84b73ef74d1..1773ce2a7cc 100644 --- a/api_tests/institutions/views/test_institution_user_metric_list.py +++ b/api_tests/institutions/views/test_institution_user_metric_list.py @@ -407,18 +407,18 @@ def test_paginate_reports(self, app, url, institutional_admin, institution, repo @pytest.mark.parametrize('format_type, delimiter, content_type', [ ('csv', ',', 'text/csv; charset=utf-8'), - ('tsv', '\t', 'text/tab-separated-values; charset=utf-8'), - ('json_file', None, 'application/json; charset=utf-8') + ('tsv', '\t', 'text/tab-separated-values; charset=utf-8') ]) - def test_get_report_formats(self, app, url, institutional_admin, institution, format_type, delimiter, content_type): + def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institution, format_type, delimiter, + content_type): _report_factory( '2024-08', institution, - user_id=f'u_orcomma', + user_id='u_orcomma', account_creation_date='2018-02', - user_name=f'Jason Kelce', + user_name='Jason Kelce', orcid_id='4444-3333-2222-1111', - department_name='Center \t Greatest Ever', + department_name='Center, \t Greatest Ever', storage_byte_count=736662999298, embargoed_registration_count=1, published_preprint_count=1, @@ -438,47 +438,204 @@ def test_get_report_formats(self, app, url, institutional_admin, institution, fo expected_filename = USER_INSTITUTION_REPORT_FILENAME.format( date_created=current_date, institution_id=institution._id, - format_type='json' if format_type == 'json_file' else format_type + format_type=format_type ) assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"' - if format_type == 'json_file': - # Validate JSON structure and content - response_data = json.loads(resp.body.decode('utf-8')) - expected_data = [ - { - 'account_creation_date': '2018-02', - 'department_name': 'Center \t Greatest Ever', - 'embargoed_registration_count': 1, - 'month_last_active': '2018-02', - 'month_last_login': '2018-02', - 'orcid_id': '4444-3333-2222-1111', - 'private_project_count': 5, - 'public_file_count': 4, - 'public_project_count': 3, - 'public_registration_count': 2, - 'published_preprint_count': 1, - 'storage_byte_count': 736662999298, - 'user_name': 'Jason Kelce' - } - ] - assert response_data == expected_data - else: - response_body = resp.text - expected_response = [ - ['account_creation_date', 'department_name', 'embargoed_registration_count', 'month_last_active', - 'month_last_login', 'orcid_id', 'private_projects', 'public_file_count', 'public_projects', - 'public_registration_count', 'published_preprint_count', 'storage_byte_count', 'user_name'], - ['2018-02', 'Center \t Greatest Ever', '1', '2018-02', '2018-02', '4444-3333-2222-1111', '5', '4', '3', - '2', '1', '736662999298', 'Jason Kelce'], + response_body = resp.text + expected_response = [ + [ + 'report_yearmonth', + 'institution_id', + 'user_id', + 'account_creation_date', + 'user_name', + 'orcid_id', + 'department_name', + 'storage_byte_count', + 'embargoed_registration_count', + 'published_preprint_count', + 'public_registration_count', + 'public_project_count', + 'public_file_count', + 'private_project_count', + 'month_last_active', + 'month_last_login', + 'timestamp' + ], + [ + '2024-08', + institution._id, + 'u_orcomma', + '2018-02', + 'Jason Kelce', + '4444-3333-2222-1111', + 'Center, \t Greatest Ever', + '736662999298', + '1', + '1', + '2', + '3', + '4', + '5', + '2018-02', + '2018-02', + response_body.splitlines()[1].split(delimiter)[-1] ] + ] + + if delimiter: + with StringIO(response_body) as file: + reader = csv.reader(file, delimiter=delimiter) + response_rows = list(reader) + assert response_rows[0] == expected_response[0] + assert sorted(response_rows[1:]) == sorted(expected_response[1:]) + + def test_get_report_format_json(self, app, url, institutional_admin, institution): + _report_factory( + '2024-08', + institution, + user_id='u_orcomma', + account_creation_date='2018-02', + user_name='Brian Dawkins', + orcid_id='4444-3333-2222-1111', + department_name='Safety "The Wolverine" Weapon X', + storage_byte_count=736662999298, + embargoed_registration_count=1, + published_preprint_count=1, + public_registration_count=2, + public_project_count=3, + public_file_count=4, + private_project_count=5, + month_last_active='2018-02', + month_last_login='2018-02', + ) + + resp = app.get(f'{url}?format=json_file', auth=institutional_admin.auth) + assert resp.status_code == 200 + assert resp.headers['Content-Type'] == 'application/json; charset=utf-8' + + current_date = datetime.datetime.now().strftime('%Y-%m') + expected_filename = USER_INSTITUTION_REPORT_FILENAME.format( + date_created=current_date, + institution_id=institution._id, + format_type='json' + ) + assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"' - if delimiter: - with StringIO(response_body) as file: - reader = csv.reader(file, delimiter=delimiter) - response_rows = list(reader) - assert response_rows[0] == expected_response[0] - assert sorted(response_rows[1:]) == sorted(expected_response[1:]) + # Validate JSON structure and content + response_data = json.loads(resp.body.decode('utf-8')) + expected_data = [ + { + 'account_creation_date': '2018-02', + 'department_name': 'Safety "The Wolverine" Weapon X', + 'embargoed_registration_count': 1, + 'month_last_active': '2018-02', + 'month_last_login': '2018-02', + 'orcid_id': '4444-3333-2222-1111', + 'private_project_count': 5, + 'public_file_count': 4, + 'public_project_count': 3, + 'public_registration_count': 2, + 'published_preprint_count': 1, + 'storage_byte_count': 736662999298, + 'user_name': 'Brian Dawkins', + 'institution_id': institution._id, + 'report_yearmonth': '2024-08', + 'user_id': 'u_orcomma', + 'timestamp': response_data[0]['timestamp'] # dynamically compare timestamp + } + ] + assert response_data == expected_data + + @pytest.mark.parametrize('format_type, delimiter, content_type', [ + ('csv', ',', 'text/csv; charset=utf-8'), + ('tsv', '\t', 'text/tab-separated-values; charset=utf-8') + ]) + def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institution, format_type, delimiter, + content_type): + # Create 15 records, exceeding the default page size of 10 + num_records = 15 + expected_data = [] + for i in range(num_records): + _report_factory( + '2024-08', + institution, + user_id=f'u_orcomma_{i}', + account_creation_date=f'2018-0{i % 9 + 1}', # Vary the date + user_name=f'Jalen Hurts #{i}', + orcid_id=f'4444-3333-2222-111{i}', + department_name=f'QBatman', + storage_byte_count=736662999298 + i, + embargoed_registration_count=1, + published_preprint_count=1, + public_registration_count=2, + public_project_count=3, + public_file_count=4, + private_project_count=5, + month_last_active=f'2018-0{i % 9 + 1}', + month_last_login=f'2018-0{i % 9 + 1}', + ) + expected_data.append([ + '2024-08', + institution._id, + f'u_orcomma_{i}', + f'2018-0{i % 9 + 1}', + f'Jalen Hurts #{i}', + f'4444-3333-2222-111{i}', + f'QBatman', + str(736662999298 + i), + '1', + '1', + '2', + '3', + '4', + '5', + f'2018-0{i % 9 + 1}', + f'2018-0{i % 9 + 1}', + None # Placeholder for the dynamic timestamp + ]) + + # Make request for CSV format with page[size]=10 + resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth) + assert resp.status_code == 200 + assert resp.headers['Content-Type'] == content_type + + current_date = datetime.datetime.now().strftime('%Y-%m') + expected_filename = USER_INSTITUTION_REPORT_FILENAME.format( + date_created=current_date, + institution_id=institution._id, + format_type=format_type + ) + assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"' + + # Validate the CSV content contains all 15 records, ignoring the default pagination of 10 + response_body = resp.text + rows = response_body.splitlines() + + assert len(rows) == num_records + 1 == 16 # 1 header + 15 records + + if delimiter: + with StringIO(response_body) as file: + reader = csv.reader(file, delimiter=delimiter) + response_rows = list(reader) + + # Validate header row + expected_header = [ + 'report_yearmonth', 'institution_id', 'user_id', 'account_creation_date', 'user_name', 'orcid_id', + 'department_name', 'storage_byte_count', 'embargoed_registration_count', 'published_preprint_count', + 'public_registration_count', 'public_project_count', 'public_file_count', 'private_project_count', + 'month_last_active', 'month_last_login', 'timestamp' + ] + assert response_rows[0] == expected_header + + # Sort both expected and actual rows (ignoring the header) before comparison + sorted_response_rows = sorted(response_rows[1:], key=lambda x: x[2]) # Sort by 'user_id' + sorted_expected_data = sorted(expected_data, key=lambda x: x[2]) # Sort by 'user_id' + + for i in range(num_records): + sorted_expected_data[i][-1] = sorted_response_rows[i][-1] # Dynamically compare the timestamp + assert sorted_response_rows[i] == sorted_expected_data[i] def _user_ids(api_response):