Skip to content

Commit

Permalink
Include progression information as metadata when transforming (Map)Da…
Browse files Browse the repository at this point in the history
…ta to Observations (facebook#3001)

Summary:
Pull Request resolved: facebook#3001

This updates `observations_from_data` to include progression info as observation feature metadata by default. More specifically:

- Updates `observations_from_data` to subsume behavior of `observations_from_map_data` as special case.
- Updates calls to `observations_from_map_data` to instead call `observations_from_data`
- Removes `observations_from_map_data` which is used exclusively by `MapTorchModelBridge`

Reviewed By: saitcakmak

Differential Revision: D65255312
  • Loading branch information
ltiao authored and facebook-github-bot committed Feb 14, 2025
1 parent 1024993 commit db972a8
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 74 deletions.
86 changes: 28 additions & 58 deletions ax/core/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def get_feature_cols(data: Data, is_map_data: bool = False) -> list[str]:
feature_cols = OBS_COLS.intersection(data.df.columns)
# note we use this check, rather than isinstance, since
# only some Adapters (e.g. MapTorchAdapter)
# use observations_from_map_data, which is required
# use observations_from_data, which is required
# to properly handle MapData features (e.g. fidelity).
if is_map_data:
data = assert_is_instance(data, MapData)
Expand All @@ -464,74 +464,36 @@ def observations_from_data(
data: Data,
statuses_to_include: set[TrialStatus] | None = None,
statuses_to_include_map_metric: set[TrialStatus] | None = None,
) -> list[Observation]:
"""Convert Data to observations.
Converts a Data object to a list of Observation objects. Pulls arm parameters from
from experiment. Overrides fidelity parameters in the arm with those found in the
Data object.
Uses a diagonal covariance matrix across metric_names.
Args:
experiment: Experiment with arm parameters.
data: Data of observations.
statuses_to_include: data from non-MapMetrics will only be included for trials
with statuses in this set. Defaults to all statuses except abandoned.
statuses_to_include_map_metric: data from MapMetrics will only be included for
trials with statuses in this set. Defaults to completed status only.
Returns:
List of Observation objects.
"""
if statuses_to_include is None:
statuses_to_include = NON_ABANDONED_STATUSES
if statuses_to_include_map_metric is None:
statuses_to_include_map_metric = {TrialStatus.COMPLETED}
feature_cols = get_feature_cols(data)
return _observations_from_dataframe(
experiment=experiment,
df=data.df,
cols=feature_cols,
statuses_to_include=statuses_to_include,
statuses_to_include_map_metric=statuses_to_include_map_metric,
map_keys=[],
)


def observations_from_map_data(
experiment: experiment.Experiment,
map_data: MapData,
statuses_to_include: set[TrialStatus] | None = None,
statuses_to_include_map_metric: set[TrialStatus] | None = None,
map_keys_as_parameters: bool = False,
limit_rows_per_metric: int | None = None,
limit_rows_per_group: int | None = None,
) -> list[Observation]:
"""Convert MapData to observations.
"""Convert Data (or MapData) to observations.
Converts a MapData object to a list of Observation objects. Pulls arm parameters
from experiment. Overrides fidelity parameters in the arm with those found in the
Data object.
Converts a Data (or MapData) object to a list of Observation objects.
Pulls arm parameters from from experiment. Overrides fidelity parameters
in the arm with those found in the Data object.
Uses a diagonal covariance matrix across metric_names.
Args:
experiment: Experiment with arm parameters.
map_data: MapData of observations.
data: Data (or MapData) of observations.
statuses_to_include: data from non-MapMetrics will only be included for trials
with statuses in this set. Defaults to all statuses except abandoned.
statuses_to_include_map_metric: data from MapMetrics will only be included for
trials with statuses in this set. Defaults to all statuses except abandoned.
map_keys_as_parameters: Whether map_keys should be returned as part of
the parameters of the Observation objects.
limit_rows_per_metric: If specified, uses MapData.subsample() with
limit_rows_per_metric: If specified, and if data is an instance of MapData,
uses MapData.subsample() with
`limit_rows_per_metric` equal to the specified value on the first
map_key (map_data.map_keys[0]) to subsample the MapData. This is
useful in, e.g., cases where learning curves are frequently
updated, leading to an intractable number of Observation objects
created.
limit_rows_per_group: If specified, uses MapData.subsample() with
limit_rows_per_group: If specified, and if data is an instance of MapData,
uses MapData.subsample() with
`limit_rows_per_group` equal to the specified value on the first
map_key (map_data.map_keys[0]) to subsample the MapData.
Expand All @@ -542,19 +504,27 @@ def observations_from_map_data(
statuses_to_include = NON_ABANDONED_STATUSES
if statuses_to_include_map_metric is None:
statuses_to_include_map_metric = NON_ABANDONED_STATUSES
if limit_rows_per_metric is not None or limit_rows_per_group is not None:
map_data = map_data.subsample(
map_key=map_data.map_keys[0],
limit_rows_per_metric=limit_rows_per_metric,
limit_rows_per_group=limit_rows_per_group,
include_first_last=True,
)
feature_cols = get_feature_cols(map_data, is_map_data=True)
is_map_data = isinstance(data, MapData)
map_keys = []
if is_map_data:
data = assert_is_instance(data, MapData)
map_keys.extend(data.map_keys)
if limit_rows_per_metric is not None or limit_rows_per_group is not None:
data = data.subsample(
map_key=map_keys[0],
limit_rows_per_metric=limit_rows_per_metric,
limit_rows_per_group=limit_rows_per_group,
include_first_last=True,
)
df = data.map_df
else:
df = data.df
feature_cols = get_feature_cols(data, is_map_data=is_map_data)
return _observations_from_dataframe(
experiment=experiment,
df=map_data.map_df,
df=df,
cols=feature_cols,
map_keys=map_data.map_keys,
map_keys=map_keys,
statuses_to_include=statuses_to_include,
statuses_to_include_map_metric=statuses_to_include_map_metric,
map_keys_as_parameters=map_keys_as_parameters,
Expand Down
3 changes: 1 addition & 2 deletions ax/core/tests/test_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
ObservationData,
ObservationFeatures,
observations_from_data,
observations_from_map_data,
recombine_observations,
separate_observations,
)
Expand Down Expand Up @@ -475,7 +474,7 @@ def test_ObservationsFromMapData(self) -> None:
MapKeyInfo(key="timestamp", default_value=0.0),
],
)
observations = observations_from_map_data(experiment, data)
observations = observations_from_data(experiment, data)

self.assertEqual(len(observations), 3)

Expand Down
12 changes: 2 additions & 10 deletions ax/modelbridge/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@
from ax.core.data import Data
from ax.core.experiment import Experiment
from ax.core.generator_run import extract_arm_predictions, GeneratorRun
from ax.core.map_data import MapData
from ax.core.observation import (
Observation,
ObservationData,
ObservationFeatures,
observations_from_data,
observations_from_map_data,
recombine_observations,
separate_observations,
)
Expand Down Expand Up @@ -299,19 +297,13 @@ def _prepare_observations(
) -> list[Observation]:
if experiment is None or data is None:
return []
if not self._fit_only_completed_map_metrics and isinstance(data, MapData):
return observations_from_map_data(
experiment=experiment,
map_data=data,
map_keys_as_parameters=True,
statuses_to_include=self.statuses_to_fit,
statuses_to_include_map_metric=self.statuses_to_fit_map_metric,
)
map_keys_as_parameters = not self._fit_only_completed_map_metrics
return observations_from_data(
experiment=experiment,
data=data,
statuses_to_include=self.statuses_to_fit,
statuses_to_include_map_metric=self.statuses_to_fit_map_metric,
map_keys_as_parameters=map_keys_as_parameters,
)

def _transform_data(
Expand Down
8 changes: 4 additions & 4 deletions ax/modelbridge/map_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Observation,
ObservationData,
ObservationFeatures,
observations_from_map_data,
observations_from_data,
separate_observations,
)
from ax.core.optimization_config import OptimizationConfig
Expand Down Expand Up @@ -256,14 +256,14 @@ def _prepare_observations(
"""
if experiment is None or data is None:
return []
return observations_from_map_data(
return observations_from_data(
experiment=experiment,
map_data=data, # pyre-ignore[6]: Checked in __init__.
map_keys_as_parameters=True,
data=data,
limit_rows_per_metric=self._map_data_limit_rows_per_metric,
limit_rows_per_group=self._map_data_limit_rows_per_group,
statuses_to_include=self.statuses_to_fit,
statuses_to_include_map_metric=self.statuses_to_fit_map_metric,
map_keys_as_parameters=True,
)

def _compute_in_design(
Expand Down

0 comments on commit db972a8

Please sign in to comment.