Skip to content

Commit

Permalink
Replace magic strings with enums
Browse files Browse the repository at this point in the history
  • Loading branch information
dapomeroy committed Oct 9, 2024
1 parent 8d4abeb commit fc90f51
Showing 1 changed file with 78 additions and 62 deletions.
140 changes: 78 additions & 62 deletions lib/ramble/ramble/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import copy
import datetime
from enum import Enum
import os
import re

import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

import llnl.util.filesystem as fs

Expand All @@ -29,12 +29,27 @@
from ramble.util.logger import logger


class ReportVars(Enum):
BETTER_DIRECTION = "better_direction"
CONTEXT = "context"
FOM_NAME = "fom_name"
FOM_ORIGIN = "fom_origin"
FOM_ORIGIN_TYPE = "fom_origin_type"
FOM_UNITS = "fom_units"
FOM_VALUE = "fom_value"
FOM_VALUE_MIN = "fom_value_min"
FOM_VALUE_MAX = "fom_value_max"
IDEAL_PERF_VALUE = "ideal_perf_value"
NORMALIZED_FOM_VALUE = "normalized_fom_value"
SERIES = "series"


_FOM_DICT_MAPPING = {
"name": "fom_name",
"value": "fom_value",
"units": "fom_units",
"origin": "fom_origin",
"origin_type": "fom_origin_type",
"name": ReportVars.FOM_NAME.value,
"value": ReportVars.FOM_VALUE.value,
"units": ReportVars.FOM_UNITS.value,
"origin": ReportVars.FOM_ORIGIN.value,
"origin_type": ReportVars.FOM_ORIGIN_TYPE.value,
}


Expand Down Expand Up @@ -200,20 +215,20 @@ def prepare_data(results: dict, where_query) -> pd.DataFrame:

# Remove context dict and add the current FOM values
exp_copy.pop("CONTEXTS")
exp_copy["context"] = context["name"]
exp_copy[ReportVars.CONTEXT.value] = context["name"]
for name, val in fom.items():
if name in _FOM_DICT_MAPPING.keys():
exp_copy[_FOM_DICT_MAPPING[name]] = val
elif name == "fom_type":
exp_copy["fom_type"] = FomType.from_str(fom["fom_type"]["name"])
exp_copy["better_direction"] = BetterDirection.from_str(
fom["fom_type"]["better_direction"]
exp_copy[ReportVars.BETTER_DIRECTION.value] = BetterDirection.from_str(
fom["fom_type"][ReportVars.BETTER_DIRECTION.value]
)

# older data exports may not have fom_type stored
if "fom_type" not in exp_copy:
exp_copy["fom_type"] = FomType.UNDEFINED
exp_copy["better_direction"] = BetterDirection.INDETERMINATE
exp_copy[ReportVars.BETTER_DIRECTION.value] = BetterDirection.INDETERMINATE

# Exclude vars that aren't needed for analysis, mainly paths and commands
dir_regex = r"_dir$"
Expand Down Expand Up @@ -322,8 +337,8 @@ def normalize_data(
self,
data,
scale_to_index=False,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=False,
):
# FIXME: do we need to support more than normalizing by the first
Expand Down Expand Up @@ -359,21 +374,21 @@ def add_minmax_data(self, selected_data, min_data, max_data, scale_var):
max_data.loc[:, scale_var] = to_numeric_if_possible(max_data[scale_var])
max_data = max_data.set_index(scale_var)

selected_data.loc[:, "fom_value_min"] = to_numeric_if_possible(min_data["fom_value"])
selected_data.loc[:, "fom_value_max"] = to_numeric_if_possible(max_data["fom_value"])
selected_data.loc[:, ReportVars.FOM_VALUE_MIN.value] = to_numeric_if_possible(min_data[ReportVars.FOM_VALUE.value])
selected_data.loc[:, ReportVars.FOM_VALUE_MAX.value] = to_numeric_if_possible(max_data[ReportVars.FOM_VALUE.value])

if self.normalize:
self.normalize_data(
selected_data,
scale_to_index=True,
to_col="fom_value_min",
from_col="fom_value_min",
to_col=ReportVars.FOM_VALUE_MIN.value,
from_col=ReportVars.FOM_VALUE_MIN.value,
)
self.normalize_data(
selected_data,
scale_to_index=True,
to_col="fom_value_max",
from_col="fom_value_max",
to_col=ReportVars.FOM_VALUE_MAX.value,
from_col=ReportVars.FOM_VALUE_MAX.value,
)

# TODO: these args come from the spec, so don't need to be passed and could be stored at init
Expand All @@ -392,15 +407,15 @@ def draw(self, perf_measure, scale_var, series, y_label=None):
if self.normalize:
ax.plot(
series_data.index,
"normalized_fom_value",
ReportVars.NORMALIZED_FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{perf_measure} (Normalized)",
)
else:
ax.plot(
series_data.index,
"fom_value",
ReportVars.FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{perf_measure}",
Expand All @@ -414,11 +429,11 @@ def draw(self, perf_measure, scale_var, series, y_label=None):
if self.have_statistics:
logger.debug("Adding fill lines for min and max")
ax.fill_between(
series_data.index, "fom_value_min", "fom_value_max", data=series_data, alpha=0.2
series_data.index, ReportVars.FOM_VALUE_MIN.value, ReportVars.FOM_VALUE_MAX.value, data=series_data, alpha=0.2
)

try:
ax.plot(series_data.index, "ideal_perf_value", data=series_data, label="Ideal Value")
ax.plot(series_data.index, ReportVars.IDEAL_PERF_VALUE.value, data=series_data, label="Ideal Value")
except ValueError:
logger.debug("Failed to plot ideal_perf_value. Series not found.")

Expand Down Expand Up @@ -464,7 +479,7 @@ def validate_spec(self, chart_spec):
for var in chart_spec:
if (
var not in self.results_df.columns
and var not in self.results_df.loc[:, "fom_name"].values
and var not in self.results_df.loc[:, ReportVars.FOM_NAME.value].values
):
logger.debug(f"Available options: {self.results_df.loc[:, 'fom_name'].unique()}")
logger.die(f"{var} was not found in the results data.")
Expand All @@ -488,29 +503,29 @@ def generate_plot_data(self):
results = self.results_df.query(f'fom_name == "{perf_measure}"').copy()

# Determine which direction is 'better', or 'INDETERMINATE' if missing or ambiguous data
if len(results.loc[:, "better_direction"].unique()) == 1:
self.better_direction = results.loc[:, "better_direction"].unique()[0]
if len(results.loc[:, ReportVars.BETTER_DIRECTION.value].unique()) == 1:
self.better_direction = results.loc[:, ReportVars.BETTER_DIRECTION.value].unique()[0]

# TODO: this needs to support a list for split_by
# TODO: this currently gets overwritten by series, below
results.loc[:, "series"] = results.loc[:, self.split_by]
results.loc[:, ReportVars.SERIES.value] = results.loc[:, self.split_by]

if additional_vars:
# TODO: this would be nicer as a group by
results.loc[:, "series"] = (
results.loc[:, "series"] + "_x_" + results[additional_vars].agg("_x_".join, axis=1)
results.loc[:, ReportVars.SERIES.value] = (
results.loc[:, ReportVars.SERIES.value] + "_x_" + results[additional_vars].agg("_x_".join, axis=1)
)

for series in results.loc[:, "series"].unique():
for series in results.loc[:, ReportVars.SERIES.value].unique():

# TODO: this needs to account for repeats in a more elegant way
series_results = results.query(
f'series == "{series}" and (fom_origin_type == "application" '
'or fom_origin_type == "modifier" or fom_origin_type == "summary::mean")'
).copy()

series_results.loc[:, "fom_value"] = to_numeric_if_possible(
series_results["fom_value"]
series_results.loc[:, ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
series_results[ReportVars.FOM_VALUE.value]
)
series_results.loc[:, scale_var] = to_numeric_if_possible(series_results[scale_var])
series_results = series_results.set_index(scale_var)
Expand All @@ -527,7 +542,7 @@ def generate_plot_data(self):
self.draw_filler(perf_measure, scale_var, series, e)
continue

if series_results.loc[:, "fom_origin_type"].iloc[0] == "summary::mean":
if series_results.loc[:, ReportVars.FOM_ORIGIN_TYPE.value].iloc[0] == "summary::mean":
self.have_statistics = True

if self.have_statistics:
Expand All @@ -553,9 +568,9 @@ def add_idealized_data(self, raw_results, selected_data):
return selected_data

if self.normalize:
first_perf_value = selected_data["normalized_fom_value"].iloc[0]
first_perf_value = selected_data[ReportVars.NORMALIZED_FOM_VALUE.value].iloc[0]
else:
first_perf_value = selected_data["fom_value"].iloc[0]
first_perf_value = selected_data[ReportVars.FOM_VALUE.value].iloc[0]

if first_perf_value == 0:
logger.warn(
Expand All @@ -565,14 +580,14 @@ def add_idealized_data(self, raw_results, selected_data):

logger.debug(f"Normalizing data (by {first_perf_value})")

selected_data.loc[:, "ideal_perf_value"] = first_perf_value
selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] = first_perf_value

if self.better_direction == BetterDirection.LOWER:
selected_data["ideal_perf_value"] = selected_data.loc[:, "ideal_perf_value"] / (
selected_data[ReportVars.IDEAL_PERF_VALUE.value] = selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] / (
selected_data.index / selected_data.index[0] # set baseline scaling var to 1
)
elif self.better_direction == BetterDirection.HIGHER:
selected_data["ideal_perf_value"] = selected_data.loc[:, "ideal_perf_value"] * (
selected_data[ReportVars.IDEAL_PERF_VALUE.value] = selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] * (
selected_data.index / selected_data.index[0]
)

Expand Down Expand Up @@ -627,7 +642,8 @@ def draw(self, perf_measure, scale_var, series):
def add_idealized_data(self, raw_results, selected_data):
selected_data = super().add_idealized_data(raw_results, selected_data)

selected_data.loc[:, "ideal_perf_value"] = selected_data["ideal_perf_value"].iloc[0]
if ReportVars.IDEAL_PERF_VALUE.value in selected_data.columns:
selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] = selected_data[ReportVars.IDEAL_PERF_VALUE.value].iloc[0]
return selected_data


Expand All @@ -648,8 +664,8 @@ def normalize_data(
self,
data,
scale_to_index=True,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=True,
):
super().normalize_data(
Expand All @@ -668,17 +684,17 @@ def draw(self, perf_measure, scale_var, series):
class FomPlot(PlotGenerator):
def generate_plot_data(self):
results = self.results_df
all_foms = results.loc[:, "fom_name"].unique()
all_foms = results.loc[:, ReportVars.FOM_NAME.value].unique()
for fom in all_foms:
series_results = results.query(
f'fom_name == "{fom}" and (fom_origin_type == "application" or'
f'fom_name == "{fom}" and (fom_origin_type == "application" or '
'fom_origin_type == "modifier" or fom_origin_type == "summary::mean")'
).copy()

scale_var = "simplified_experiment_namespace"

series_results.loc[:, "fom_value"] = to_numeric_if_possible(
series_results["fom_value"]
series_results.loc[:, ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
series_results[ReportVars.FOM_VALUE.value]
)
series_results.loc[:, scale_var] = to_numeric_if_possible(series_results[scale_var])

Expand All @@ -687,7 +703,7 @@ def generate_plot_data(self):
if self.normalize:
self.normalize_data(series_results, scale_to_index=True)

if series_results.loc[:, "fom_origin_type"].iloc[0] == "summary::mean":
if series_results.loc[:, ReportVars.FOM_ORIGIN_TYPE.value].iloc[0] == "summary::mean":
self.have_statistics = True

if self.have_statistics:
Expand All @@ -701,7 +717,7 @@ def generate_plot_data(self):

self.output_df = series_results

unit = series_results.loc[:, "fom_units"].iloc[0]
unit = series_results.loc[:, ReportVars.FOM_UNITS.value].iloc[0]

perf_measure = fom
series = "experiment_name"
Expand All @@ -710,13 +726,13 @@ def generate_plot_data(self):
# TODO: dry bar plot drawing
def draw(self, perf_measure, scale_var, series, unit):
try:
self.output_df["fom_value"] = to_numeric_if_possible(self.output_df["fom_value"])
self.output_df[ReportVars.FOM_VALUE.value] = to_numeric_if_possible(self.output_df[ReportVars.FOM_VALUE.value])
except ValueError:
logger.warn(f"Skipping drawing of non numeric FOM: {perf_measure}")
return

# TODO: this should leverage the available min/max to add candle sticks
ax = self.output_df.plot(y="fom_value", kind="bar", figsize=self.figsize)
ax = self.output_df.plot(y=ReportVars.FOM_VALUE.value, kind="bar", figsize=self.figsize)
fig = ax.get_figure()

# ax.set_label('Label via method')
Expand Down Expand Up @@ -762,26 +778,26 @@ def generate_plot_data(self):
dimensions = []

for input_spec in self.spec:
if input_spec in self.results_df.loc[:, "fom_name"].values:
if input_spec in self.results_df.loc[:, ReportVars.FOM_NAME.value].values:
foms.append(input_spec)
else:
dimensions.append(input_spec)

if not dimensions:
dimensions.append("experiment_name")

raw_results = self.results_df[self.results_df.loc[:, "fom_name"].isin(foms)].copy()
raw_results = self.results_df[self.results_df.loc[:, ReportVars.FOM_NAME.value].isin(foms)].copy()

raw_results.loc[:, "Figure of Merit"] = (
raw_results.loc[:, "fom_name"] + " (" + raw_results.loc[:, "fom_units"] + ")"
raw_results.loc[:, ReportVars.FOM_NAME.value] + " (" + raw_results.loc[:, ReportVars.FOM_UNITS.value] + ")"
)

raw_results["fom_value"] = to_numeric_if_possible(raw_results["fom_value"])
raw_results[ReportVars.FOM_VALUE.value] = to_numeric_if_possible(raw_results[ReportVars.FOM_VALUE.value])

plot_col = "fom_value"
plot_col = ReportVars.FOM_VALUE.value
if self.normalize:
self.normalize_data(raw_results)
plot_col = "normalized_fom_value"
plot_col = ReportVars.NORMALIZED_FOM_VALUE.value

# TODO: remove pivot?
compare_pivot = raw_results.pivot_table(
Expand Down Expand Up @@ -814,8 +830,8 @@ def normalize_data(
self,
data,
scale_to_index=True,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=True,
):
super().normalize_data(
Expand All @@ -838,20 +854,20 @@ def draw_multiline(self, perf_measure, scale_var, y_label):
# TODO: prep_draw method in subclass ScalingPlotGenerator, not this class
fig, ax = self.prep_draw(perf_measure, scale_var)

for series in self.output_df.loc[:, "series"].unique():
for series in self.output_df.loc[:, ReportVars.SERIES.value].unique():
series_data = self.output_df.query(f'series == "{series}"').copy()
if self.normalize:
ax.plot(
series_data.index,
"normalized_fom_value",
ReportVars.NORMALIZED_FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{series} (Normalized)",
)
else:
ax.plot(
series_data.index,
"fom_value",
ReportVars.FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{series}",
Expand All @@ -861,8 +877,8 @@ def draw_multiline(self, perf_measure, scale_var, y_label):
logger.debug("Adding fill lines for min and max")
ax.fill_between(
series_data.index,
"fom_value_min",
"fom_value_max",
ReportVars.FOM_VALUE_MIN.value,
ReportVars.FOM_VALUE_MAX.value,
data=series_data,
alpha=0.2,
)
Expand Down

0 comments on commit fc90f51

Please sign in to comment.