From d91bae777bb86a0b29a0125e241a21645bf10b57 Mon Sep 17 00:00:00 2001 From: Eli Jones Date: Fri, 15 Dec 2023 12:51:29 -0500 Subject: [PATCH] [forest-fixes] download forest task log no longer uses django rest framework and has tests --- frontend/templates/navbar.html | 4 +- pages/forest_pages.py | 106 +++++++++++++++++++++-------- serializers/forest_serializers.py | 61 ----------------- serializers/tableau_serializers.py | 23 +------ tests/test_forest_pages.py | 71 +++++++++++++++++-- urls.py | 2 +- 6 files changed, 144 insertions(+), 123 deletions(-) delete mode 100644 serializers/forest_serializers.py diff --git a/frontend/templates/navbar.html b/frontend/templates/navbar.html index 4584f4ef3..676327806 100755 --- a/frontend/templates/navbar.html +++ b/frontend/templates/navbar.html @@ -73,8 +73,8 @@
  • View Forest Task Log
  • View Forest Analysis Progress
  • {% if site_admin %} -
  • - Download Forest Task Log (all studies) +
  • + Download Forest Task Log As CSV
  • {% endif %} diff --git a/pages/forest_pages.py b/pages/forest_pages.py index 7bb54e467..b56238bd1 100644 --- a/pages/forest_pages.py +++ b/pages/forest_pages.py @@ -2,10 +2,12 @@ import pickle from collections import defaultdict from datetime import date, datetime +from typing import Dict import orjson from django.contrib import messages from django.core.exceptions import ValidationError +from django.db.models import QuerySet from django.http.response import FileResponse, HttpResponse from django.shortcuts import redirect, render from django.utils import timezone @@ -31,7 +33,6 @@ from libs.s3 import NoSuchKeyException from libs.streaming_zip import ZipGenerator from libs.utils.date_utils import daterange -from serializers.forest_serializers import display_true, ForestTaskCsvSerializer TASK_SERIALIZER_FIELDS = [ @@ -223,19 +224,14 @@ def task_log(request: ResearcherRequest, study_id=None): task_dict["has_output_data"] = task_dict["forest_output_exists"] task_dict["forest_tree_display"] = task_dict.pop("forest_tree").title() task_dict["created_on_display"] = task_dict.pop("created_on").strftime(DEV_TIME_FORMAT) - task_dict["forest_output_exists_display"] = display_true(task_dict["forest_output_exists"]) + task_dict["forest_output_exists_display"] = yes_no_unknown(task_dict["forest_output_exists"]) # dates/times that require safety (yes it could be less obnoxious) - task_dict["process_end_time"] = task_dict["process_end_time"].strftime(DEV_TIME_FORMAT) \ - if task_dict["process_end_time"] else None - task_dict["process_start_time"] = task_dict["process_start_time"].strftime(DEV_TIME_FORMAT) \ - if task_dict["process_start_time"] else None - task_dict["process_download_end_time"] = task_dict["process_download_end_time"].strftime(DEV_TIME_FORMAT) \ - if task_dict["process_download_end_time"] else None - task_dict["data_date_end"] = task_dict["data_date_end"].isoformat() \ - if task_dict["data_date_end"] else None - task_dict["data_date_start"] = task_dict["data_date_start"].isoformat() \ - if task_dict["data_date_start"] else None + dict_datetime_to_display(task_dict, "process_end_time", None) + dict_datetime_to_display(task_dict, "process_start_time", None) + dict_datetime_to_display(task_dict, "process_download_end_time", None) + task_dict["data_date_end"] = task_dict["data_date_end"].isoformat()if task_dict["data_date_end"] else None + task_dict["data_date_start"] = task_dict["data_date_start"].isoformat()if task_dict["data_date_start"] else None # urls task_dict["cancel_url"] = easy_url( @@ -277,18 +273,6 @@ def task_log(request: ResearcherRequest, study_id=None): ) -@require_GET -@authenticate_admin -def download_task_log(request: ResearcherRequest): - forest_tasks = ForestTask.objects.order_by("created_on") - return FileResponse( - stream_forest_task_log_csv(forest_tasks), - content_type="text/csv", - filename=f"forest_task_log_{timezone.now().isoformat()}.csv", - as_attachment=True, - ) - - @require_POST @authenticate_admin @forest_enabled @@ -428,17 +412,79 @@ def render_create_tasks(request: ResearcherRequest, study: Study): ) -def stream_forest_task_log_csv(forest_tasks): +@require_GET +@authenticate_admin +def download_task_log(request: ResearcherRequest, study_id=str): + if not request.session_researcher.site_admin: + return HttpResponse(content="", status=403) + + # study id is already validated by the url pattern? + forest_tasks = ForestTask.objects.filter(participant__study_id=study_id) + + return FileResponse( + stream_forest_task_log_csv(forest_tasks), + content_type="text/csv", + filename=f"forest_task_log_{timezone.now().isoformat()}.csv", + as_attachment=True, + ) + + +def stream_forest_task_log_csv(forest_tasks: QuerySet[ForestTask]): + # titles of rows as values, query filter values as keys + field_map = { + "created_on": "Created On", + "data_date_end": "Data Date End", + "data_date_start": "Data Date Start", + "external_id": "Id", + "forest_tree": "Forest Tree", + "forest_output_exists": "Forest Output Exists", + "participant__patient_id": "Patient Id", + "process_start_time": "Process Start Time", + "process_download_end_time": "Process Download End Time", + "process_end_time": "Process End Time", + "status": "Status", + "total_file_size": "Total File Size", + } + + # setup buffer = CSVBuffer() - writer = csv.DictWriter(buffer, fieldnames=ForestTaskCsvSerializer.Meta.fields) - writer.writeheader() - yield buffer.read() + # the csv writer isn't well handled in the vscode ide. its has a writerow and writerows method, + # writes to a file-like object, CSVBuffer might be overkill, strio or bytesio might be work + writer = csv.writer(buffer, dialect="excel") + writer.writerow(field_map.values()) + yield buffer.read() # write the header - for forest_task in forest_tasks: - writer.writerow(ForestTaskCsvSerializer(forest_task).data) + # yield rows + for forest_data in forest_tasks.values(*field_map.keys()): + dict_datetime_to_display(forest_data, "created_on", "") + dict_datetime_to_display(forest_data, "process_start_time", "") + dict_datetime_to_display(forest_data, "process_download_end_time", "") + dict_datetime_to_display(forest_data, "process_end_time", "") + forest_data["forest_tree"] = forest_data["forest_tree"].title() + forest_data["forest_output_exists"] = yes_no_unknown(forest_data["forest_output_exists"]) + writer.writerow(forest_data.values()) yield buffer.read() +def dict_datetime_to_display(some_dict: Dict[str, datetime], key: str, default: str = None): + # this pattern is repeated numerous times. + dt = some_dict[key] + if dt is None: + some_dict[key] = default + else: + some_dict[key] = dt.strftime(DEV_TIME_FORMAT) + + +def yes_no_unknown(a_bool: bool): + if a_bool is True: + return "Yes" + elif a_bool is False: + return "No" + else: + return "Unknown" + + +# we need a class that has a read and write method for the csv writer machinery to use. class CSVBuffer: line = "" diff --git a/serializers/forest_serializers.py b/serializers/forest_serializers.py deleted file mode 100644 index dd12c2b93..000000000 --- a/serializers/forest_serializers.py +++ /dev/null @@ -1,61 +0,0 @@ -import json - -from rest_framework import serializers - -from constants.common_constants import DEV_TIME_FORMAT -from database.forest_models import ForestTask - - -def display_true(a_bool: bool): - if a_bool is True: - return "Yes" - elif a_bool is False: - return "No" - else: - return "Unknown" - - -#! FIXME: Get rid of DRF, see forest_pages.task_log for a partial rewrite -class ForestTaskBaseSerializer(serializers.ModelSerializer): - created_on_display = serializers.SerializerMethodField() - forest_tree_display = serializers.SerializerMethodField() - forest_output_exists_display = serializers.SerializerMethodField() - params_dict = serializers.SerializerMethodField() - patient_id = serializers.SerializerMethodField() - - class Meta: - model = ForestTask - fields = [ - "created_on_display", - "data_date_end", - "data_date_start", - "id", - "forest_tree_display", - "forest_output_exists", - "forest_output_exists_display", - "patient_id", - "process_download_end_time", - "process_start_time", - "process_end_time", - "status", - "total_file_size", - ] - - def get_created_on_display(self, instance: ForestTask): - return instance.created_on.strftime(DEV_TIME_FORMAT) - - def get_forest_tree_display(self, instance: ForestTask): - return instance.forest_tree.title() - - def get_forest_output_exists_display(self, instance: ForestTask): - return display_true(instance.forest_output_exists) - - def get_params_dict(self, instance: ForestTask): - return repr(instance.get_params_dict()) - - def get_patient_id(self, instance: ForestTask): - return instance.participant.patient_id - - -class ForestTaskCsvSerializer(ForestTaskBaseSerializer): - pass diff --git a/serializers/tableau_serializers.py b/serializers/tableau_serializers.py index a798bbdde..953d113fc 100644 --- a/serializers/tableau_serializers.py +++ b/serializers/tableau_serializers.py @@ -2,7 +2,7 @@ from database.security_models import ApiKey - +# FIXMME: this is the only remaining DRF serializer we can do it we can finally get rid of them all class ApiKeySerializer(serializers.ModelSerializer): class Meta: model = ApiKey @@ -14,24 +14,3 @@ class Meta: "readable_name", ] - -# class SummaryStatisticDailySerializer(serializers.ModelSerializer): -# class Meta: -# model = SummaryStatisticDaily -# fields = SERIALIZABLE_FIELD_NAMES - -# participant_id = serializers.SlugRelatedField( -# slug_field="patient_id", source="participant", read_only=True -# ) -# study_id = serializers.SerializerMethodField() # Study object id - -# def __init__(self, *args, fields=None, **kwargs): -# """ dynamically modify the subset of fields on instantiation """ -# super().__init__(*args, **kwargs) -# if fields is not None: -# for field_name in set(self.fields) - set(fields): -# # is this pop valid? the value is a cached property... this needs to be tested. -# self.fields.pop(field_name) - -# def get_study_id(self, obj): -# return obj.participant.study.object_id diff --git a/tests/test_forest_pages.py b/tests/test_forest_pages.py index 95f1d5888..263d396a9 100644 --- a/tests/test_forest_pages.py +++ b/tests/test_forest_pages.py @@ -1,5 +1,5 @@ import uuid -from datetime import datetime +from datetime import date, datetime from unittest.mock import MagicMock, patch import dateutil @@ -48,12 +48,69 @@ def test(self): # TODO: make a test for whether a tsudy admin can hit this endpoint? I think there's a bug in the authenticate_admin decorator that allows that..... -# class TestForestDownloadTaskLog(ResearcherSessionTest): -# ENDPOINT_NAME = "forest_pages.download_task_log" -# -# def test(self): -# # this streams a csv of tasks on a tsudy -# self.smart_get() +class TestForestDownloadTaskLog(ResearcherSessionTest): + ENDPOINT_NAME = "forest_pages.download_task_log" + REDIRECT_ENDPOINT_NAME = ResearcherSessionTest.IGNORE_THIS_ENDPOINT + + header_row = "Created On,Data Date End,Data Date Start,Id,Forest Tree,"\ + "Forest Output Exists,Patient Id,Process Start Time,Process Download End Time,"\ + "Process End Time,Status,Total File Size\r\n" + + def test_no_relation_no_site_admin_no_worky(self): + # this streams a csv of tasks on a study + resp = self.smart_get_status_code(403, self.session_study.id) + self.assertEqual(resp.content, b"") + + def test_researcher_no_worky(self): + self.set_session_study_relation(ResearcherRole.researcher) + resp = self.smart_get_status_code(403, self.session_study.id) + self.assertEqual(resp.content, b"") + + def test_study_admin_no_worky(self): + self.set_session_study_relation(ResearcherRole.study_admin) + resp = self.smart_get_status_code(403, self.session_study.id) + self.assertEqual(resp.content, b"") + + def test_site_admin_can(self): + self.set_session_study_relation(ResearcherRole.site_admin) + resp = self.smart_get_status_code(200, self.session_study.id) + self.assertEqual(b"".join(resp.streaming_content).decode(), self.header_row) + + def test_single_forest_task(self): + self.set_session_study_relation(ResearcherRole.site_admin) + self.default_forest_task.update( + created_on=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC), + data_date_start=date(2020, 1, 1), + data_date_end=date(2020, 1, 5), + forest_tree=ForestTree.jasmine, + forest_output_exists=True, + process_start_time=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC), # midnight + process_download_end_time=datetime(2020, 1, 1, 1, tzinfo=dateutil.tz.UTC), # 1am + process_end_time=datetime(2020, 1, 1, 2, tzinfo=dateutil.tz.UTC), # 2am + status=ForestTaskStatus.success, + total_file_size=123456789, + ) + + resp = self.smart_get_status_code(200, self.session_study.id) + content = b"".join(resp.streaming_content).decode() + self.assertEqual(content.count("\r\n"), 2) + line = content.splitlines()[1] + + # 12 columns, 11 commas + self.assertEqual(line.count(","), 11) + items = line.split(",") + self.assertEqual(items[0], "2020-01-01 00:00 (UTC)") # Created On + self.assertEqual(items[1], "2020-01-05") # Data Date End + self.assertEqual(items[2], "2020-01-01") # Data Date Start + self.assertEqual(items[3], str(self.default_forest_task.external_id)) # duh + self.assertEqual(items[4], "Jasmine") # Forest Tree + self.assertEqual(items[5], "Yes") # Forest Output Exists + self.assertEqual(items[6], "patient1") # Patient Id + self.assertEqual(items[7], "2020-01-01 00:00 (UTC)") # Process Start Time + self.assertEqual(items[8], "2020-01-01 01:00 (UTC)") # Process Download End Time + self.assertEqual(items[9], "2020-01-01 02:00 (UTC)") # Process End Time + self.assertEqual(items[10], "success") # Status + self.assertEqual(items[11], "123456789") # Total File Size class TestForestCancelTask(ResearcherSessionTest): diff --git a/urls.py b/urls.py index ae83bfe85..5e5d479df 100644 --- a/urls.py +++ b/urls.py @@ -224,7 +224,7 @@ def path( path('studies//forest/progress', forest_pages.forest_tasks_progress, login_redirect=SAFE) path("studies//forest/tasks//cancel", forest_pages.cancel_task) path('studies//forest/tasks', forest_pages.task_log, login_redirect=SAFE) -path('forest/tasks/download', forest_pages.download_task_log) +path('studies//forest/tasks/download', forest_pages.download_task_log) path( "studies//forest/tasks//download_output", forest_pages.download_output_data