From 480c8286da99f33e5311079a460e754c71e152f5 Mon Sep 17 00:00:00 2001 From: Louis Tiao Date: Thu, 13 Feb 2025 12:50:01 -0800 Subject: [PATCH] Include progression information as metadata when transforming (Map)Data to Observations (#3001) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/3001 This updates `observations_from_data` to include progression info as observation feature metadata by default. More specifically: - Updates `observations_from_data` to subsume behavior of `observations_from_map_data` as special case. - Updates calls to `observations_from_map_data` to instead call `observations_from_data` - Removes `observations_from_map_data` which is used exclusively by `MapTorchModelBridge` Reviewed By: saitcakmak Differential Revision: D65255312 --- ax/core/observation.py | 88 ++++++++++--------------------- ax/core/tests/test_observation.py | 3 +- ax/modelbridge/base.py | 1 + ax/modelbridge/map_torch.py | 8 +-- 4 files changed, 35 insertions(+), 65 deletions(-) diff --git a/ax/core/observation.py b/ax/core/observation.py index c73d0075add..3b6f85e90ab 100644 --- a/ax/core/observation.py +++ b/ax/core/observation.py @@ -439,7 +439,7 @@ def get_feature_cols(data: Data, is_map_data: bool = False) -> list[str]: feature_cols = OBS_COLS.intersection(data.df.columns) # note we use this check, rather than isinstance, since # only some Adapters (e.g. MapTorchAdapter) - # use observations_from_map_data, which is required + # use observations_from_data, which is required # to properly handle MapData features (e.g. fidelity). if is_map_data: data = assert_is_instance(data, MapData) @@ -461,77 +461,39 @@ def get_feature_cols(data: Data, is_map_data: bool = False) -> list[str]: def observations_from_data( experiment: experiment.Experiment, - data: Data, - statuses_to_include: set[TrialStatus] | None = None, - statuses_to_include_map_metric: set[TrialStatus] | None = None, -) -> list[Observation]: - """Convert Data to observations. - - Converts a Data object to a list of Observation objects. Pulls arm parameters from - from experiment. Overrides fidelity parameters in the arm with those found in the - Data object. - - Uses a diagonal covariance matrix across metric_names. - - Args: - experiment: Experiment with arm parameters. - data: Data of observations. - statuses_to_include: data from non-MapMetrics will only be included for trials - with statuses in this set. Defaults to all statuses except abandoned. - statuses_to_include_map_metric: data from MapMetrics will only be included for - trials with statuses in this set. Defaults to completed status only. - - Returns: - List of Observation objects. - """ - if statuses_to_include is None: - statuses_to_include = NON_ABANDONED_STATUSES - if statuses_to_include_map_metric is None: - statuses_to_include_map_metric = {TrialStatus.COMPLETED} - feature_cols = get_feature_cols(data) - return _observations_from_dataframe( - experiment=experiment, - df=data.df, - cols=feature_cols, - statuses_to_include=statuses_to_include, - statuses_to_include_map_metric=statuses_to_include_map_metric, - map_keys=[], - ) - - -def observations_from_map_data( - experiment: experiment.Experiment, - map_data: MapData, + data: Data | MapData, statuses_to_include: set[TrialStatus] | None = None, statuses_to_include_map_metric: set[TrialStatus] | None = None, map_keys_as_parameters: bool = False, limit_rows_per_metric: int | None = None, limit_rows_per_group: int | None = None, ) -> list[Observation]: - """Convert MapData to observations. + """Convert Data (or MapData) to observations. - Converts a MapData object to a list of Observation objects. Pulls arm parameters - from experiment. Overrides fidelity parameters in the arm with those found in the - Data object. + Converts a Data (or MapData) object to a list of Observation objects. + Pulls arm parameters from from experiment. Overrides fidelity parameters + in the arm with those found in the Data object. Uses a diagonal covariance matrix across metric_names. Args: experiment: Experiment with arm parameters. - map_data: MapData of observations. + data: Data (or MapData) of observations. statuses_to_include: data from non-MapMetrics will only be included for trials with statuses in this set. Defaults to all statuses except abandoned. statuses_to_include_map_metric: data from MapMetrics will only be included for trials with statuses in this set. Defaults to all statuses except abandoned. map_keys_as_parameters: Whether map_keys should be returned as part of the parameters of the Observation objects. - limit_rows_per_metric: If specified, uses MapData.subsample() with + limit_rows_per_metric: If specified, and if data is an instance of MapData, + uses MapData.subsample() with `limit_rows_per_metric` equal to the specified value on the first map_key (map_data.map_keys[0]) to subsample the MapData. This is useful in, e.g., cases where learning curves are frequently updated, leading to an intractable number of Observation objects created. - limit_rows_per_group: If specified, uses MapData.subsample() with + limit_rows_per_group: If specified, and if data is an instance of MapData, + uses MapData.subsample() with `limit_rows_per_group` equal to the specified value on the first map_key (map_data.map_keys[0]) to subsample the MapData. @@ -542,19 +504,27 @@ def observations_from_map_data( statuses_to_include = NON_ABANDONED_STATUSES if statuses_to_include_map_metric is None: statuses_to_include_map_metric = NON_ABANDONED_STATUSES - if limit_rows_per_metric is not None or limit_rows_per_group is not None: - map_data = map_data.subsample( - map_key=map_data.map_keys[0], - limit_rows_per_metric=limit_rows_per_metric, - limit_rows_per_group=limit_rows_per_group, - include_first_last=True, - ) - feature_cols = get_feature_cols(map_data, is_map_data=True) + is_map_data = isinstance(data, MapData) + map_keys = [] + if is_map_data: + data = assert_is_instance(data, MapData) + map_keys.extend(data.map_keys) + if limit_rows_per_metric is not None or limit_rows_per_group is not None: + data = data.subsample( + map_key=map_keys[0], + limit_rows_per_metric=limit_rows_per_metric, + limit_rows_per_group=limit_rows_per_group, + include_first_last=True, + ) + df = data.map_df + else: + df = data.df + feature_cols = get_feature_cols(data, is_map_data=is_map_data) return _observations_from_dataframe( experiment=experiment, - df=map_data.map_df, + df=df, cols=feature_cols, - map_keys=map_data.map_keys, + map_keys=map_keys, statuses_to_include=statuses_to_include, statuses_to_include_map_metric=statuses_to_include_map_metric, map_keys_as_parameters=map_keys_as_parameters, diff --git a/ax/core/tests/test_observation.py b/ax/core/tests/test_observation.py index c0b7b8f7a2e..10ec6564a16 100644 --- a/ax/core/tests/test_observation.py +++ b/ax/core/tests/test_observation.py @@ -23,7 +23,6 @@ ObservationData, ObservationFeatures, observations_from_data, - observations_from_map_data, recombine_observations, separate_observations, ) @@ -475,7 +474,7 @@ def test_ObservationsFromMapData(self) -> None: MapKeyInfo(key="timestamp", default_value=0.0), ], ) - observations = observations_from_map_data(experiment, data) + observations = observations_from_data(experiment, data) self.assertEqual(len(observations), 3) diff --git a/ax/modelbridge/base.py b/ax/modelbridge/base.py index dc1e918896f..9b4db83904c 100644 --- a/ax/modelbridge/base.py +++ b/ax/modelbridge/base.py @@ -312,6 +312,7 @@ def _prepare_observations( data=data, statuses_to_include=self.statuses_to_fit, statuses_to_include_map_metric=self.statuses_to_fit_map_metric, + map_keys_as_parameters=False, ) def _transform_data( diff --git a/ax/modelbridge/map_torch.py b/ax/modelbridge/map_torch.py index 5fdaa6a4d9d..656f10ffa8a 100644 --- a/ax/modelbridge/map_torch.py +++ b/ax/modelbridge/map_torch.py @@ -19,7 +19,7 @@ Observation, ObservationData, ObservationFeatures, - observations_from_map_data, + observations_from_data, separate_observations, ) from ax.core.optimization_config import OptimizationConfig @@ -256,14 +256,14 @@ def _prepare_observations( """ if experiment is None or data is None: return [] - return observations_from_map_data( + return observations_from_data( experiment=experiment, - map_data=data, # pyre-ignore[6]: Checked in __init__. - map_keys_as_parameters=True, + data=data, limit_rows_per_metric=self._map_data_limit_rows_per_metric, limit_rows_per_group=self._map_data_limit_rows_per_group, statuses_to_include=self.statuses_to_fit, statuses_to_include_map_metric=self.statuses_to_fit_map_metric, + map_keys_as_parameters=True, ) def _compute_in_design(