Skip to content

Commit

Permalink
Rescale runtimes on LCBench early stopping problems (#3367)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3367

On some of the problems, the typical epoch takes 40+ virtual seconds. This means that the simulator checks whether there is new data 40 * (50 epochs) * (30-50 trials). This makes these very slow to run.

This diff rescales the runtimes so that the median epoch for each LCBench problem takes one virtual time step. This does have a substantive impact on performance because it means that for trials that run faster than the median, multiple epochs can elapse before we check for early stopping. However, this is realistic.

Reviewed By: ltiao

Differential Revision: D69616468

fbshipit-source-id: 69d3ce1c433b4f5a4a76b53ce365d8b6d2686db3
  • Loading branch information
esantorella authored and facebook-github-bot committed Feb 19, 2025
1 parent 9ebd579 commit f32943f
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 6 deletions.
42 changes: 41 additions & 1 deletion ax/benchmark/problems/surrogate/lcbench/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,46 @@
"volkert": 64.02699279785156,
}

# Chosen so that for the median parameterization, one step takes one virtual
# second.
RUNTIME_MULTIPLIERS = {
"APSFailure": 0.2498361853616576,
"Amazon_employee_access": 0.5596248586092226,
"Australian": 1.0748285031033658,
"Fashion-MNIST": 0.057873893027107395,
"KDDCup09_appetency": 0.2714829300383819,
"MiniBooNE": 0.20530997463252754,
"adult": 0.4423476551967684,
"airlines": 0.06473793535537586,
"albert": 0.06832237841522835,
"bank-marketing": 0.46517394252532845,
"blood-transfusion-service-center": 1.1101296769694071,
"car": 1.0536256049024968,
"christine": 0.025742718302424954,
"cnae-9": 0.08811760797353926,
"connect-4": 0.33489219890695243,
"covertype": 0.05049155246078877,
"credit-g": 1.0400726314123157,
"dionis": 0.0231601276801126,
"fabert": 0.08971358669025394,
"helena": 0.25008050673472376,
"higgs": 0.26990484596881176,
"jannis": 0.2828372999943685,
"jasmine": 0.7655180467265444,
"jungle_chess_2pcs_raw_endgame_complete": 0.47160243094434906,
"kc1": 1.0143178289557349,
"kr-vs-kp": 0.9390239320512418,
"mfeat-factors": 0.595676967891612,
"nomao": 0.4420599860962263,
"numerai28.6": 0.28377545234818863,
"phoneme": 0.9689051773179346,
"segment": 1.000676324600838,
"shuttle": 0.39362569776573014,
"sylvine": 0.9179851039769921,
"vehicle": 1.048848701347826,
"volkert": 0.28538440509808005,
}


class RegressorProtocol(Protocol):
"""
Expand Down Expand Up @@ -227,7 +267,7 @@ def evaluate_true(self, params: Mapping[str, TParamValue]) -> torch.Tensor:
def step_runtime(self, params: Mapping[str, TParamValue]) -> float:
X = pd.DataFrame.from_records(data=[params])
Y = self.runtime_surrogate.predict(X=X) # shape: (1,)
return Y.item()
return Y.item() * RUNTIME_MULTIPLIERS[self.dataset_name]


def get_lcbench_early_stopping_benchmark_problem(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,30 @@

from unittest.mock import patch

import numpy as np

from ax.benchmark.problems.surrogate.lcbench.early_stopping import (
BASELINE_VALUES,
get_lcbench_early_stopping_benchmark_problem,
LearningCurveBenchmarkTestFunction,
OPTIMAL_VALUES,
RUNTIME_MULTIPLIERS,
)
from ax.benchmark.problems.surrogate.lcbench.utils import (
BASELINE_VALUES,
DEFAULT_METRIC_NAME,
)
from ax.benchmark.problems.surrogate.lcbench.utils import DEFAULT_METRIC_NAME
from ax.utils.common.testutils import TestCase
from ax.utils.testing.benchmark_stubs import get_mock_lcbench_data
from pyre_extensions import assert_is_instance, none_throws


class TestEarlyStoppingProblem(TestCase):
def setUp(self) -> None:
super().setUp()
self.early_stopping_path = (
get_lcbench_early_stopping_benchmark_problem.__module__
)

def test_get_lcbench_early_stopping_problem(self) -> None:
# Just test one problem for speed. We are mocking out the data load
# anyway, so there is nothing to distinguish these problems from each
Expand All @@ -29,13 +42,12 @@ def test_get_lcbench_early_stopping_problem(self) -> None:
seed = 27
dataset_name = "credit-g"

early_stopping_path = get_lcbench_early_stopping_benchmark_problem.__module__
with patch(
f"{early_stopping_path}.load_lcbench_data",
f"{self.early_stopping_path}.load_lcbench_data",
return_value=get_mock_lcbench_data(),
) as mock_load_lcbench_data, patch(
# Fitting a surrogate won't work with this small synthetic data
f"{early_stopping_path}._create_surrogate_regressor"
f"{self.early_stopping_path}._create_surrogate_regressor"
) as mock_create_surrogate_regressor:
problem = get_lcbench_early_stopping_benchmark_problem(
dataset_name=dataset_name,
Expand All @@ -61,3 +73,32 @@ def test_get_lcbench_early_stopping_problem(self) -> None:
self.assertIsNone(problem.step_runtime_function)
self.assertEqual(problem.optimal_value, OPTIMAL_VALUES[dataset_name])
self.assertEqual(problem.baseline_value, BASELINE_VALUES[dataset_name])

def test_step_scaling(self) -> None:
dataset_name = "car"
with (
patch(
f"{self.early_stopping_path}.load_lcbench_data",
return_value=get_mock_lcbench_data(),
),
patch(
# Fitting a surrogate won't work with this small synthetic data
f"{self.early_stopping_path}._create_surrogate_regressor"
),
):
problem = get_lcbench_early_stopping_benchmark_problem(
dataset_name=dataset_name,
)

predicted_runtime = 1234.5
test_function = assert_is_instance(
problem.test_function, LearningCurveBenchmarkTestFunction
)
# pyre-fixme[8]: Incompatible attribute type -- not a bound method
test_function.runtime_surrogate.predict = lambda X: np.array(
[predicted_runtime]
)
self.assertEqual(
none_throws(problem.step_runtime_function)(params={"param": 0}),
predicted_runtime * RUNTIME_MULTIPLIERS[dataset_name],
)
1 change: 1 addition & 0 deletions ax/benchmark/tests/problems/test_lcbench_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


class TestLCBenchBenchmark(TestCase):
@TestCase.ax_long_test(reason="Training random forest regressor")
def test_lcbench_predictions(self) -> None:
self.assertEqual(len(DEFAULT_AND_OPTIMAL_VALUES), 22)
# NOTE: lots of tasks, so testing only one here o/w this is very slow
Expand Down

0 comments on commit f32943f

Please sign in to comment.