Skip to content

Commit

Permalink
[staged-updates] The forest task progress works again. It still serve…
Browse files Browse the repository at this point in the history
…s no purpose.
  • Loading branch information
biblicabeebli committed Dec 12, 2023
1 parent 123527a commit 699676f
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 17 deletions.
2 changes: 1 addition & 1 deletion database/forest_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class ForestTask(TimestampedModel):
process_end_time = models.DateTimeField(null=True, blank=True)
status = models.TextField(choices=ForestTaskStatus.choices())
stacktrace = models.TextField(null=True, blank=True, default=None)
# Whether or not there was any data output by Forest (None indicates unknown)
# Whether or not there was any data output by Forest (None means construct_summary_statistics errored)
forest_output_exists = models.BooleanField(null=True, blank=True)

# S3 file paths
Expand Down
1 change: 1 addition & 0 deletions database/study_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def _get_data_time_bin(
only_after_epoch: if True, will filter results only for datetimes after the Unix epoch
(1970-01-01T00:00:00Z)
only_before_now: if True, will filter results only for datetimes before now """

time_bins: QuerySet[datetime] = self.chunk_registries.values_list("time_bin", flat=True)
comparator = operator.lt if earliest else operator.gt
now = timezone.now()
Expand Down
51 changes: 36 additions & 15 deletions pages/forest_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,42 +57,62 @@
@authenticate_researcher_study_access
@forest_enabled
def forest_tasks_progress(request: ResearcherRequest, study_id=None):
study: Study = Study.objects.get(pk=study_id)
# generates a chart of study analysis progress logs
study = Study.objects.get(pk=study_id)
participants: ParticipantQuerySet = Participant.objects.filter(study=study_id)

# generate chart of study analysis progress logs
# later tasks will overwrite earlier tasks - this is intentional.
# number of forest tasks shouldn't be the bottleneck here.
tasks = ForestTask.objects.filter(participant__in=participants).order_by("created_on")

# these are quite optimized buuuuut it is still slow.
start_date = (study.get_earliest_data_time_bin() or study.created_on).date()
end_date = (study.get_latest_data_time_bin() or timezone.now()).date()

params = {}
results = defaultdict(lambda: "--")
# this code simultaneously builds up the chart of most recent forest results for date ranges
# by participant and tree, and tracks the metadata
results = defaultdict(lambda: "-")
chart_elements_lookup = {False: "N", None: "?"}
# this loop builds the chart of whether there are forest results for date ranges
# per-participant- -and-tree. The tasks query is ordered by creation date, so later tasks will
# overwrite earlier tasks. a "-" means no task has been run, "N" means a task ran but there was
# no output, "?" means there the code ran successfully but there was an error reading in the
# data so we MIGHT have data, and "Y" means there was definitely data.
for task in tasks:
for a_date in daterange(task.data_date_start, task.data_date_end, inclusive=True):
results[(task.participant_id, task.forest_tree, a_date)] = task
params[(task.participant_id, task.forest_tree, a_date)] = task.safe_unpickle_parameters_as_string()
key = (task.participant_id, task.forest_tree, a_date)
in_table = results[key] # will populates with a "-" on first access
output_exists = task.forest_output_exists
if in_table == "Y" or output_exists: # always force "Y"
results[key] = "Y"
elif in_table != "?":
# We have some nice constraints here:
# 1. output_exists is False or None, so chart_elements_lookup[false or None]
# can only return "N" or "?".
# 2. The chart's current field is "-", "N", or "?"
# 3. If in_table is a ? we can just skip it because we cannot upgrade from ? to Y here.
# So, we just skip if we are already at ? in the chart element, and otherwise we do the lookup.
results[key] = chart_elements_lookup[output_exists]
params[key] = task.safe_unpickle_parameters_as_string()

# generate the date range for charting
# generate the date range for the chart, we need it many times.
dates = list(daterange(start_date, end_date, inclusive=True))

chart = []
for participant in participants:
for tree in ForestTree.values():
row = [participant.patient_id, tree] + \
[results[(participant.id, tree, date)] for date in dates]
for tree_name in ForestTree.values():
# we need to make a list of lists with the participant and tree name
row = [participant.patient_id, tree_name] + \
[results[(participant.id, tree_name, date)] for date in dates]
chart.append(row)

# ensure that within each tree, only a single set of param values are used (only the most recent runs
# are considered, and unsuccessful runs are assumed to invalidate old runs, clearing params)
params_conflict = False
for tree in {k[1] for k in params.keys()}:
if len({m for k, m in params.items() if m is not None and k[1] == tree}) > 1:
for tree_name in {k[1] for k in params.keys()}:
if len({m for k, m in params.items() if m is not None and k[1] == tree_name}) > 1:
params_conflict = True
break

chart_json = orjson.dumps(chart).decode() # may be huge, but orjson is very fast.
return render(
request,
'forest/forest_tasks_progress.html', # has been renamed internally because this is imprecise.
Expand All @@ -103,7 +123,7 @@ def forest_tasks_progress(request: ResearcherRequest, study_id=None):
params_conflict=params_conflict,
start_date=start_date,
end_date=end_date,
chart=chart # this uses the jinja safe filter and should never involve user input
chart=chart_json # this uses the jinja safe filter and should never involve user input
)
)

Expand Down Expand Up @@ -157,6 +177,7 @@ def task_log(request: ResearcherRequest, study_id=None):
task_dict["forest_tree_display"] = task_dict.pop("forest_tree").title()
task_dict["created_on_display"] = task_dict.pop("created_on").strftime(DEV_TIME_FORMAT)
task_dict["forest_output_exists_display"] = display_true(task_dict["forest_output_exists"])

# dates/times that require safety
task_dict["process_end_time"] = task_dict["process_end_time"].strftime(DEV_TIME_FORMAT) \
if task_dict["process_end_time"] else None
Expand Down
2 changes: 1 addition & 1 deletion urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def path(
path('graph', mobile_pages.fetch_graph)

# forest pages
path('studies/<str:study_id>/forest/progress', forest_pages.forest_tasks_progress, login_redirect=SAFE)
path('studies/<str:study_id>/forest/tasks/create', forest_pages.create_tasks)
path('studies/<str:study_id>/forest/progress', forest_pages.forest_tasks_progress, login_redirect=SAFE)
path("studies/<str:study_id>/forest/tasks/<str:forest_task_external_id>/cancel", forest_pages.cancel_task)
path('studies/<str:study_id>/forest/tasks', forest_pages.task_log, login_redirect=SAFE)
path('forest/tasks/download', forest_pages.download_task_log)
Expand Down

0 comments on commit 699676f

Please sign in to comment.