Skip to content

Commit

Permalink
Replace magic strings with enums
Browse files Browse the repository at this point in the history
  • Loading branch information
dapomeroy committed Oct 10, 2024
1 parent 8c48681 commit 264259c
Showing 1 changed file with 94 additions and 62 deletions.
156 changes: 94 additions & 62 deletions lib/ramble/ramble/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import copy
import datetime
from enum import Enum
import os
import re

Expand Down Expand Up @@ -47,11 +48,11 @@ class ReportVars(Enum):


_FOM_DICT_MAPPING = {
"name": "fom_name",
"value": "fom_value",
"units": "fom_units",
"origin": "fom_origin",
"origin_type": "fom_origin_type",
"name": ReportVars.FOM_NAME.value,
"value": ReportVars.FOM_VALUE.value,
"units": ReportVars.FOM_UNITS.value,
"origin": ReportVars.FOM_ORIGIN.value,
"origin_type": ReportVars.FOM_ORIGIN_TYPE.value,
}


Expand Down Expand Up @@ -217,20 +218,22 @@ def prepare_data(results: dict, where_query) -> pd.DataFrame:

# Remove context dict and add the current FOM values
exp_copy.pop("CONTEXTS")
exp_copy["context"] = context["name"]
exp_copy[ReportVars.CONTEXT.value] = context["name"]
for name, val in fom.items():
if name in _FOM_DICT_MAPPING.keys():
exp_copy[_FOM_DICT_MAPPING[name]] = val
elif name == "fom_type":
exp_copy["fom_type"] = FomType.from_str(fom["fom_type"]["name"])
exp_copy["better_direction"] = BetterDirection.from_str(
fom["fom_type"]["better_direction"]
exp_copy[ReportVars.BETTER_DIRECTION.value] = BetterDirection.from_str(
fom["fom_type"][ReportVars.BETTER_DIRECTION.value]
)

# older data exports may not have fom_type stored
if "fom_type" not in exp_copy:
exp_copy["fom_type"] = FomType.UNDEFINED
exp_copy["better_direction"] = BetterDirection.INDETERMINATE
exp_copy[ReportVars.BETTER_DIRECTION.value] = (
BetterDirection.INDETERMINATE
)

# Exclude vars that aren't needed for analysis, mainly paths and commands
dir_regex = r"_dir$"
Expand Down Expand Up @@ -339,8 +342,8 @@ def normalize_data(
self,
data,
scale_to_index=False,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=False,
):
# FIXME: do we need to support more than normalizing by the first
Expand Down Expand Up @@ -376,21 +379,25 @@ def add_minmax_data(self, selected_data, min_data, max_data, scale_var):
max_data.loc[:, scale_var] = to_numeric_if_possible(max_data[scale_var])
max_data = max_data.set_index(scale_var)

selected_data.loc[:, "fom_value_min"] = to_numeric_if_possible(min_data["fom_value"])
selected_data.loc[:, "fom_value_max"] = to_numeric_if_possible(max_data["fom_value"])
selected_data.loc[:, ReportVars.FOM_VALUE_MIN.value] = to_numeric_if_possible(
min_data[ReportVars.FOM_VALUE.value]
)
selected_data.loc[:, ReportVars.FOM_VALUE_MAX.value] = to_numeric_if_possible(
max_data[ReportVars.FOM_VALUE.value]
)

if self.normalize:
self.normalize_data(
selected_data,
scale_to_index=True,
to_col="fom_value_min",
from_col="fom_value_min",
to_col=ReportVars.FOM_VALUE_MIN.value,
from_col=ReportVars.FOM_VALUE_MIN.value,
)
self.normalize_data(
selected_data,
scale_to_index=True,
to_col="fom_value_max",
from_col="fom_value_max",
to_col=ReportVars.FOM_VALUE_MAX.value,
from_col=ReportVars.FOM_VALUE_MAX.value,
)

# TODO: these args come from the spec, so don't need to be passed and could be stored at init
Expand All @@ -409,15 +416,15 @@ def draw(self, perf_measure, scale_var, series, y_label=None):
if self.normalize:
ax.plot(
series_data.index,
"normalized_fom_value",
ReportVars.NORMALIZED_FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{perf_measure} (Normalized)",
)
else:
ax.plot(
series_data.index,
"fom_value",
ReportVars.FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{perf_measure}",
Expand All @@ -431,11 +438,20 @@ def draw(self, perf_measure, scale_var, series, y_label=None):
if self.have_statistics:
logger.debug("Adding fill lines for min and max")
ax.fill_between(
series_data.index, "fom_value_min", "fom_value_max", data=series_data, alpha=0.2
series_data.index,
ReportVars.FOM_VALUE_MIN.value,
ReportVars.FOM_VALUE_MAX.value,
data=series_data,
alpha=0.2,
)

try:
ax.plot(series_data.index, "ideal_perf_value", data=series_data, label="Ideal Value")
ax.plot(
series_data.index,
ReportVars.IDEAL_PERF_VALUE.value,
data=series_data,
label="Ideal Value",
)
except ValueError:
logger.debug("Failed to plot ideal_perf_value. Series not found.")

Expand Down Expand Up @@ -481,7 +497,7 @@ def validate_spec(self, chart_spec):
for var in chart_spec:
if (
var not in self.results_df.columns
and var not in self.results_df.loc[:, "fom_name"].values
and var not in self.results_df.loc[:, ReportVars.FOM_NAME.value].values
):
logger.debug(f"Available options: {self.results_df.loc[:, 'fom_name'].unique()}")
logger.die(f"{var} was not found in the results data.")
Expand All @@ -505,29 +521,31 @@ def generate_plot_data(self):
results = self.results_df.query(f'fom_name == "{perf_measure}"').copy()

# Determine which direction is 'better', or 'INDETERMINATE' if missing or ambiguous data
if len(results.loc[:, "better_direction"].unique()) == 1:
self.better_direction = results.loc[:, "better_direction"].unique()[0]
if len(results.loc[:, ReportVars.BETTER_DIRECTION.value].unique()) == 1:
self.better_direction = results.loc[:, ReportVars.BETTER_DIRECTION.value].unique()[0]

# TODO: this needs to support a list for split_by
# TODO: this currently gets overwritten by series, below
results.loc[:, "series"] = results.loc[:, self.split_by]
results.loc[:, ReportVars.SERIES.value] = results.loc[:, self.split_by]

if additional_vars:
# TODO: this would be nicer as a group by
results.loc[:, "series"] = (
results.loc[:, "series"] + "_x_" + results[additional_vars].agg("_x_".join, axis=1)
results.loc[:, ReportVars.SERIES.value] = (
results.loc[:, ReportVars.SERIES.value]
+ "_x_"
+ results[additional_vars].agg("_x_".join, axis=1)
)

for series in results.loc[:, "series"].unique():
for series in results.loc[:, ReportVars.SERIES.value].unique():

# TODO: this needs to account for repeats in a more elegant way
series_results = results.query(
f'series == "{series}" and (fom_origin_type == "application" '
'or fom_origin_type == "modifier" or fom_origin_type == "summary::mean")'
).copy()

series_results.loc[:, "fom_value"] = to_numeric_if_possible(
series_results["fom_value"]
series_results.loc[:, ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
series_results[ReportVars.FOM_VALUE.value]
)
series_results.loc[:, scale_var] = to_numeric_if_possible(series_results[scale_var])
series_results = series_results.set_index(scale_var)
Expand All @@ -544,7 +562,7 @@ def generate_plot_data(self):
self.draw_filler(perf_measure, scale_var, series, e)
continue

if series_results.loc[:, "fom_origin_type"].iloc[0] == "summary::mean":
if series_results.loc[:, ReportVars.FOM_ORIGIN_TYPE.value].iloc[0] == "summary::mean":
self.have_statistics = True

if self.have_statistics:
Expand All @@ -570,9 +588,9 @@ def add_idealized_data(self, raw_results, selected_data):
return selected_data

if self.normalize:
first_perf_value = selected_data["normalized_fom_value"].iloc[0]
first_perf_value = selected_data[ReportVars.NORMALIZED_FOM_VALUE.value].iloc[0]
else:
first_perf_value = selected_data["fom_value"].iloc[0]
first_perf_value = selected_data[ReportVars.FOM_VALUE.value].iloc[0]

if first_perf_value == 0:
logger.warn(
Expand All @@ -582,16 +600,18 @@ def add_idealized_data(self, raw_results, selected_data):

logger.debug(f"Normalizing data (by {first_perf_value})")

selected_data.loc[:, "ideal_perf_value"] = first_perf_value
selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] = first_perf_value

if self.better_direction == BetterDirection.LOWER:
selected_data["ideal_perf_value"] = selected_data.loc[:, "ideal_perf_value"] / (
selected_data[ReportVars.IDEAL_PERF_VALUE.value] = selected_data.loc[
:, ReportVars.IDEAL_PERF_VALUE.value
] / (
selected_data.index / selected_data.index[0] # set baseline scaling var to 1
)
elif self.better_direction == BetterDirection.HIGHER:
selected_data["ideal_perf_value"] = selected_data.loc[:, "ideal_perf_value"] * (
selected_data.index / selected_data.index[0]
)
selected_data[ReportVars.IDEAL_PERF_VALUE.value] = selected_data.loc[
:, ReportVars.IDEAL_PERF_VALUE.value
] * (selected_data.index / selected_data.index[0])

return selected_data

Expand Down Expand Up @@ -644,7 +664,10 @@ def draw(self, perf_measure, scale_var, series):
def add_idealized_data(self, raw_results, selected_data):
selected_data = super().add_idealized_data(raw_results, selected_data)

selected_data.loc[:, "ideal_perf_value"] = selected_data["ideal_perf_value"].iloc[0]
if ReportVars.IDEAL_PERF_VALUE.value in selected_data.columns:
selected_data.loc[:, ReportVars.IDEAL_PERF_VALUE.value] = selected_data[
ReportVars.IDEAL_PERF_VALUE.value
].iloc[0]
return selected_data


Expand All @@ -665,8 +688,8 @@ def normalize_data(
self,
data,
scale_to_index=True,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=True,
):
super().normalize_data(
Expand All @@ -685,17 +708,17 @@ def draw(self, perf_measure, scale_var, series):
class FomPlot(PlotGenerator):
def generate_plot_data(self):
results = self.results_df
all_foms = results.loc[:, "fom_name"].unique()
all_foms = results.loc[:, ReportVars.FOM_NAME.value].unique()
for fom in all_foms:
series_results = results.query(
f'fom_name == "{fom}" and (fom_origin_type == "application" or'
f'fom_name == "{fom}" and (fom_origin_type == "application" or '
'fom_origin_type == "modifier" or fom_origin_type == "summary::mean")'
).copy()

scale_var = "simplified_experiment_namespace"

series_results.loc[:, "fom_value"] = to_numeric_if_possible(
series_results["fom_value"]
series_results.loc[:, ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
series_results[ReportVars.FOM_VALUE.value]
)
series_results.loc[:, scale_var] = to_numeric_if_possible(series_results[scale_var])

Expand All @@ -704,7 +727,7 @@ def generate_plot_data(self):
if self.normalize:
self.normalize_data(series_results, scale_to_index=True)

if series_results.loc[:, "fom_origin_type"].iloc[0] == "summary::mean":
if series_results.loc[:, ReportVars.FOM_ORIGIN_TYPE.value].iloc[0] == "summary::mean":
self.have_statistics = True

if self.have_statistics:
Expand All @@ -718,7 +741,7 @@ def generate_plot_data(self):

self.output_df = series_results

unit = series_results.loc[:, "fom_units"].iloc[0]
unit = series_results.loc[:, ReportVars.FOM_UNITS.value].iloc[0]

perf_measure = fom
series = "experiment_name"
Expand All @@ -727,13 +750,15 @@ def generate_plot_data(self):
# TODO: dry bar plot drawing
def draw(self, perf_measure, scale_var, series, unit):
try:
self.output_df["fom_value"] = to_numeric_if_possible(self.output_df["fom_value"])
self.output_df[ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
self.output_df[ReportVars.FOM_VALUE.value]
)
except ValueError:
logger.warn(f"Skipping drawing of non numeric FOM: {perf_measure}")
return

# TODO: this should leverage the available min/max to add candle sticks
ax = self.output_df.plot(y="fom_value", kind="bar", figsize=self.figsize)
ax = self.output_df.plot(y=ReportVars.FOM_VALUE.value, kind="bar", figsize=self.figsize)
fig = ax.get_figure()

# ax.set_label('Label via method')
Expand Down Expand Up @@ -779,26 +804,33 @@ def generate_plot_data(self):
dimensions = []

for input_spec in self.spec:
if input_spec in self.results_df.loc[:, "fom_name"].values:
if input_spec in self.results_df.loc[:, ReportVars.FOM_NAME.value].values:
foms.append(input_spec)
else:
dimensions.append(input_spec)

if not dimensions:
dimensions.append("experiment_name")

raw_results = self.results_df[self.results_df.loc[:, "fom_name"].isin(foms)].copy()
raw_results = self.results_df[
self.results_df.loc[:, ReportVars.FOM_NAME.value].isin(foms)
].copy()

raw_results.loc[:, "Figure of Merit"] = (
raw_results.loc[:, "fom_name"] + " (" + raw_results.loc[:, "fom_units"] + ")"
raw_results.loc[:, ReportVars.FOM_NAME.value]
+ " ("
+ raw_results.loc[:, ReportVars.FOM_UNITS.value]
+ ")"
)

raw_results["fom_value"] = to_numeric_if_possible(raw_results["fom_value"])
raw_results[ReportVars.FOM_VALUE.value] = to_numeric_if_possible(
raw_results[ReportVars.FOM_VALUE.value]
)

plot_col = "fom_value"
plot_col = ReportVars.FOM_VALUE.value
if self.normalize:
self.normalize_data(raw_results)
plot_col = "normalized_fom_value"
plot_col = ReportVars.NORMALIZED_FOM_VALUE.value

# TODO: remove pivot?
compare_pivot = raw_results.pivot_table(
Expand Down Expand Up @@ -831,8 +863,8 @@ def normalize_data(
self,
data,
scale_to_index=True,
to_col="normalized_fom_value",
from_col="fom_value",
to_col=ReportVars.NORMALIZED_FOM_VALUE.value,
from_col=ReportVars.FOM_VALUE.value,
speedup=True,
):
super().normalize_data(
Expand All @@ -855,20 +887,20 @@ def draw_multiline(self, perf_measure, scale_var, y_label):
# TODO: prep_draw method in subclass ScalingPlotGenerator, not this class
fig, ax = self.prep_draw(perf_measure, scale_var)

for series in self.output_df.loc[:, "series"].unique():
for series in self.output_df.loc[:, ReportVars.SERIES.value].unique():
series_data = self.output_df.query(f'series == "{series}"').copy()
if self.normalize:
ax.plot(
series_data.index,
"normalized_fom_value",
ReportVars.NORMALIZED_FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{series} (Normalized)",
)
else:
ax.plot(
series_data.index,
"fom_value",
ReportVars.FOM_VALUE.value,
data=series_data,
marker="o",
label=f"{series}",
Expand All @@ -878,8 +910,8 @@ def draw_multiline(self, perf_measure, scale_var, y_label):
logger.debug("Adding fill lines for min and max")
ax.fill_between(
series_data.index,
"fom_value_min",
"fom_value_max",
ReportVars.FOM_VALUE_MIN.value,
ReportVars.FOM_VALUE_MAX.value,
data=series_data,
alpha=0.2,
)
Expand Down

0 comments on commit 264259c

Please sign in to comment.