From 1aca64dd258436f1b94eaf0d037a5bdd9c532b54 Mon Sep 17 00:00:00 2001 From: Miles Olson Date: Wed, 19 Feb 2025 13:53:53 -0800 Subject: [PATCH] Add by_wallclock_time option to ProgressionPlot (#3383) Summary: Allows a user to specify that they would like to plot with wallclock time on the x axis. Starts a t=0 as the earliest start time Reviewed By: saitcakmak Differential Revision: D69800761 --- ax/analysis/plotly/progression.py | 123 +++++++++++++++++-- ax/analysis/plotly/tests/test_progression.py | 25 +++- 2 files changed, 133 insertions(+), 15 deletions(-) diff --git a/ax/analysis/plotly/progression.py b/ax/analysis/plotly/progression.py index eea108023a8..54200bf9bd2 100644 --- a/ax/analysis/plotly/progression.py +++ b/ax/analysis/plotly/progression.py @@ -4,6 +4,9 @@ # LICENSE file in the root directory of this source tree. # pyre-strict +from logging import Logger + +import numpy as np import plotly.express as px from ax.analysis.analysis import AnalysisCardLevel @@ -14,7 +17,11 @@ from ax.core.trial_status import TrialStatus from ax.exceptions.core import UserInputError from ax.generation_strategy.generation_strategy import GenerationStrategy +from ax.utils.common.logger import get_logger from plotly import graph_objects as go +from pyre_extensions import assert_is_instance + +logger: Logger = get_logger(__name__) class ProgressionPlot(PlotlyAnalysis): @@ -28,17 +35,24 @@ class ProgressionPlot(PlotlyAnalysis): - arm_name: The name of the arm - METRIC_NAME: The observed mean of the metric specified - progression: The progression at which the metric was observed + - wallclock_time: The wallclock time at which the metric was observed, in + seconds and starting at 0 from the first trial's start time. """ - def __init__(self, metric_name: str | None = None) -> None: + def __init__( + self, metric_name: str | None = None, by_wallclock_time: bool = False + ) -> None: """ Args: metric_name: The name of the metric to plot. If not specified the objective will be used. Note that the metric cannot be inferred for multi-objective or scalarized-objective experiments. + wallclock_time: If True, plot the relative wallclock time instead of the + progression on the x-axis. """ self._metric_name = metric_name + self._by_wallclock_time = by_wallclock_time def compute( self, @@ -74,28 +88,111 @@ def compute( for trial in experiment.trials_by_status[TrialStatus.EARLY_STOPPED] ] ), - ["mean", map_key], + ["trial_index", "mean", map_key], ].rename(columns={map_key: "progression", "mean": metric_name}) + # Add the wallclock time column + wallclock_series = _calculate_wallclock_timeseries( + experiment=experiment, metric_name=metric_name + ) + + df["wallclock_time"] = df.apply( + lambda row: wallclock_series[row["trial_index"]][row["progression"]], + axis=1, + ) + if len(terminal_points) > 0: + terminal_points["wallclock_time"] = terminal_points.apply( + lambda row: wallclock_series[row["trial_index"]][row["progression"]], + axis=1, + ) + # Plot the progression lines with one curve for each arm. - fig = px.line(df, x="progression", y=metric_name, color="arm_name") + if self._by_wallclock_time: + x_axis_name = "wallclock_time" + else: + x_axis_name = "progression" + + fig = px.line(df, x=x_axis_name, y=metric_name, color="arm_name") # Add a marker for each terminal point on early stopped trials. - fig.add_trace( - go.Scatter( - x=terminal_points["progression"], - y=terminal_points[metric_name], - mode="markers", - showlegend=False, - line_color="red", - hoverinfo="none", + if len(terminal_points) > 0: + fig.add_trace( + go.Scatter( + x=terminal_points[x_axis_name], + y=terminal_points[metric_name], + mode="markers", + showlegend=False, + line_color="red", + hoverinfo="none", + ) ) - ) return self._create_plotly_analysis_card( - title=f"{metric_name} by progression", + title=f"{metric_name} by {x_axis_name.replace('_', ' ')}", subtitle="Observe how the metric changes as each trial progresses", level=AnalysisCardLevel.MID, df=df, fig=fig, ) + + +def _calculate_wallclock_timeseries( + experiment: Experiment, + metric_name: str, +) -> dict[int, dict[float, float]]: + """ + Calculate a mapping from each trial index and progression to the time since the + first trial started, in seconds. Assume that the first trial started at t=0, and + that progressions are linearly spaced between the start and completion times of + each trial. + + If a trial does not have either a start or completion time the wallclock time + cannot be calculated and the value will be nan (which will not be plotted). + + Returns: + trial_index => (progression => timestamp) + """ + # Find the earliest start time. + start_time = min( + trial.time_run_started.timestamp() + for trial in experiment.trials.values() + if trial.time_run_started is not None + ) + # Calculate all start and completion times relative to the earliest start time. + # Give nan for trials that don't have a start or completion time. + relative_timestamps = { + idx: ( + trial.time_run_started.timestamp() - start_time + if trial.time_run_started is not None + else np.nan, + trial.time_completed.timestamp() - start_time + if trial.time_completed is not None + else np.nan, + ) + for idx, trial in experiment.trials.items() + } + + data = assert_is_instance(experiment.lookup_data(), MapData) + df = data.map_df[data.map_df["metric_name"] == metric_name] + map_key = data.map_key_infos[0].key + + return { + trial_index: dict( + zip( + df[df["trial_index"] == trial_index][map_key].to_numpy(), + # Map the progressions to linspace if the start and completion times + # are both available, otherwise map to nans + np.linspace( + relative_timestamps[trial_index][0], + relative_timestamps[trial_index][1], + len(df[df["trial_index"] == trial_index]), + ) + if ( + relative_timestamps[trial_index][0] is not None + and relative_timestamps[trial_index][1] is not None + ) + else np.full(len(df[df["trial_index"] == trial_index]), np.nan), + ) + ) + for trial_index in experiment.trials.keys() + } diff --git a/ax/analysis/plotly/tests/test_progression.py b/ax/analysis/plotly/tests/test_progression.py index e735242ad2b..9d110326019 100644 --- a/ax/analysis/plotly/tests/test_progression.py +++ b/ax/analysis/plotly/tests/test_progression.py @@ -5,8 +5,12 @@ # pyre-strict +import pandas as pd from ax.analysis.analysis import AnalysisCardLevel -from ax.analysis.plotly.progression import ProgressionPlot +from ax.analysis.plotly.progression import ( + _calculate_wallclock_timeseries, + ProgressionPlot, +) from ax.exceptions.core import UserInputError from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import get_test_map_data_experiment @@ -33,8 +37,25 @@ def test_compute(self) -> None: ) self.assertEqual(card.level, AnalysisCardLevel.MID) self.assertEqual( - {*card.df.columns}, {"trial_index", "arm_name", "branin_map", "progression"} + {*card.df.columns}, + {"trial_index", "arm_name", "branin_map", "progression", "wallclock_time"}, ) self.assertIsNotNone(card.blob) self.assertEqual(card.blob_annotation, "plotly") + + def test_calculate_wallclock_timeseries(self) -> None: + experiment = get_test_map_data_experiment( + num_trials=2, num_fetches=5, num_complete=2 + ) + wallclock_timeseries = _calculate_wallclock_timeseries( + experiment=experiment, metric_name="branin_map" + ) + + self.assertEqual(len(wallclock_timeseries), 2) + self.assertTrue( + all(len(timeseries) == 5 for timeseries in wallclock_timeseries.values()) + ) + + for timeseries in wallclock_timeseries.values(): + self.assertTrue(pd.Series(timeseries).is_monotonic_increasing)