Skip to content

Commit

Permalink
Merge pull request #274 from xopt-org/grid_evaluate
Browse files Browse the repository at this point in the history
Grid evaluate
  • Loading branch information
roussel-ryan authored Feb 10, 2025
2 parents 33d1a83 + f24ff70 commit ba6acb9
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 45 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,5 +129,4 @@ private_tests/*
/.virtual_documents
*.csv

# don't include vscode settings
/.vscode
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"numpy",
"pydantic>=2.3",
"pyyaml",
"botorch==0.12.0",
"botorch",
"scipy>=1.10.1",
"pandas",
"ipywidgets",
Expand Down
26 changes: 26 additions & 0 deletions xopt/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,32 @@ def random_evaluate(
result = self.evaluate_data(random_inputs)
return result

def grid_evaluate(
self,
n_samples: Union[int, Dict[str, int]],
custom_bounds: dict = None,
):
"""
Evaluate a meshgrid of points using the VOCS and add the results to the internal
DataFrame.
Parameters
----------
n_samples : int or dict
The number of samples along each axis to evaluate on a meshgrid.
If an int is provided, the same number of samples is used for all axes.
custom_bounds : dict, optional
Dictionary of vocs-like ranges for mesh sampling.
Returns
-------
pd.DataFrame
The results of the evaluations added to the internal DataFrame.
"""
grid_inputs = self.vocs.grid_inputs(n_samples, custom_bounds=custom_bounds)
result = self.evaluate_data(grid_inputs)
return result

def yaml(self, **kwargs):
"""
Serialize the Xopt configuration to a YAML string.
Expand Down
3 changes: 2 additions & 1 deletion xopt/generators/bayesian/custom_botorch/heteroskedastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import torch
from botorch.models import SingleTaskGP
from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL
from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import Log, OutcomeTransform
Expand All @@ -21,6 +20,8 @@
from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior
from torch import Tensor

MIN_INFERRED_NOISE_LEVEL = torch.tensor(1e-4)


class XoptHeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP):
r"""
Expand Down
11 changes: 9 additions & 2 deletions xopt/numerical_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,20 @@ def optimize(self, function, bounds, n_candidates=1, **kwargs):
if len(bounds) != 2:
raise ValueError("bounds must have the shape [2, ndim]")

candidates, out = optimize_acqf(
# emperical testing showed that the max time is overrun slightly on the botorch side
# fix by slightly reducing the max time passed to this function
if self.max_time is not None:
max_time = self.max_time * 0.8 - 0.01
else:
max_time = None

candidates, _ = optimize_acqf(
acq_function=function,
bounds=bounds,
q=n_candidates,
raw_samples=self.n_restarts,
num_restarts=self.n_restarts,
timeout_sec=self.max_time,
timeout_sec=max_time,
options={"maxiter": self.max_iter},
**kwargs,
)
Expand Down
39 changes: 1 addition & 38 deletions xopt/tests/generators/bayesian/test_model_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import torch
import yaml
from botorch import fit_gpytorch_mll
from botorch.models import HeteroskedasticSingleTaskGP, SingleTaskGP
from botorch.models import SingleTaskGP
from botorch.models.transforms import Normalize, Standardize
from gpytorch import ExactMarginalLogLikelihood
from gpytorch.kernels import PeriodicKernel, PolynomialKernel, ScaleKernel
Expand All @@ -22,7 +22,6 @@
)
from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator
from xopt.generators.bayesian.models.standard import StandardModelConstructor
from xopt.generators.bayesian.utils import get_training_data
from xopt.resources.testing import TEST_VOCS_BASE, TEST_VOCS_DATA
from xopt.vocs import VOCS

Expand Down Expand Up @@ -467,43 +466,7 @@ def test_heteroskedastic(self):
test_data["y1_var"] = test_data["y1"] * 0.1
model = gp_constructor.build_model_from_vocs(test_vocs, test_data)

# validate against botorch HeteroskedasticSingleTaskGP
train_x, train_y, train_yvar = get_training_data(
test_vocs.variable_names, "y1", test_data
)
bounds = torch.vstack(
[
torch.tensor(test_vocs.variables[name])
for name in test_vocs.variable_names
]
).T

# create transform
input_transform = Normalize(len(test_vocs.variable_names), bounds=bounds)

bmodel = HeteroskedasticSingleTaskGP(
train_x,
train_y,
train_yvar,
input_transform=input_transform,
outcome_transform=Standardize(1),
)
mll = ExactMarginalLogLikelihood(bmodel.likelihood, bmodel)

# TODO: model fitting fails sometimes
fit_gpytorch_mll(mll)

assert isinstance(model.models[0], XoptHeteroskedasticSingleTaskGP)
test_x = torch.rand(20, len(test_vocs.variable_names))
with torch.no_grad():
posterior = model.posterior(test_x.unsqueeze(1))
bposterior = bmodel.posterior(test_x.unsqueeze(1))
assert torch.allclose(
posterior.mean[..., 0].flatten(), bposterior.mean.flatten()
)
assert torch.allclose(
posterior.variance[..., 0].flatten(), bposterior.variance.flatten()
)

def test_custom_noise_prior(self):
test_data = deepcopy(TEST_VOCS_DATA)
Expand Down
4 changes: 2 additions & 2 deletions xopt/tests/test_numerical_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ def test_lbfgs_optimizer(self):
assert candidates.shape == torch.Size([ncandidate, ndim])

# test max time
max_time_optimizer = LBFGSOptimizer(max_time=0.01)
max_time_optimizer = LBFGSOptimizer(max_time=1.0)
ndim = 1
bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim)))
for ncandidate in [1, 3]:
start_time = time.time()
candidates = max_time_optimizer.optimize(f, bounds, ncandidate)
assert time.time() - start_time < 0.1
assert time.time() - start_time < 1.0
assert candidates.shape == torch.Size([ncandidate, ndim])

def test_grid_optimizer(self):
Expand Down
58 changes: 58 additions & 0 deletions xopt/tests/test_vocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,64 @@ def test_properties(self):
assert vocs.n_outputs == 2
assert vocs.variable_names == ["x1"]

def test_grid_inputs(self):
# Define a sample VOCS object
vocs = VOCS(
variables={"x1": [0.0, 1.0], "x2": [0.0, 1.0]},
constraints={},
objectives={},
constants={"c1": 5.0},
observables=[],
)

# Test with default parameters
n = 5
grid = vocs.grid_inputs(n=n)
assert isinstance(grid, pd.DataFrame)
assert grid.shape == (n**2, 3) # 2 variables + 1 constant
assert "x1" in grid.columns
assert "x2" in grid.columns
assert "c1" in grid.columns
assert np.all(grid["c1"] == 5.0)

# Test with custom bounds
custom_bounds = {"x1": [0.2, 0.8], "x2": [0.1, 0.9]}
grid = vocs.grid_inputs(n=n, custom_bounds=custom_bounds)
assert isinstance(grid, pd.DataFrame)
assert grid.shape == (n**2, 3) # 2 variables + 1 constant
assert "x1" in grid.columns
assert "x2" in grid.columns
assert "c1" in grid.columns
assert np.all(grid["c1"] == 5.0)
assert np.all(grid["x1"] >= 0.2) and np.all(grid["x1"] <= 0.8)
assert np.all(grid["x2"] >= 0.1) and np.all(grid["x2"] <= 0.9)

# Test with invalid custom bounds
invalid_custom_bounds = {
"x1": [1.2, 0.8], # Invalid bounds
"x2": [0.1, 0.9],
}
with pytest.raises(ValueError):
vocs.grid_inputs(n=n, custom_bounds=invalid_custom_bounds)

# Test with include_constants=False
grid = vocs.grid_inputs(n=n, include_constants=False)
assert isinstance(grid, pd.DataFrame)
assert grid.shape == (n**2, 2) # 2 variables
assert "x1" in grid.columns
assert "x2" in grid.columns
assert "c1" not in grid.columns

# Test with different number of points for each variable
n_dict = {"x1": 3, "x2": 4}
grid = vocs.grid_inputs(n=n_dict)
assert isinstance(grid, pd.DataFrame)
assert grid.shape == (3 * 4, 3) # 2 variables + 1 constant
assert "x1" in grid.columns
assert "x2" in grid.columns
assert "c1" in grid.columns
assert np.all(grid["c1"] == 5.0)

def test_random_sampling_custom_bounds(self):
vocs = deepcopy(TEST_VOCS_BASE)

Expand Down
103 changes: 103 additions & 0 deletions xopt/vocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,109 @@ def random_inputs(
else:
return pd.DataFrame(inputs).to_dict("records")

def grid_inputs(
self,
n: Union[int, Dict[str, int]],
custom_bounds: dict = None,
include_constants: bool = True,
) -> pd.DataFrame:
"""
Generate a meshgrid of inputs.
Parameters
----------
n : Union[int, Dict[str, int]]
Number of points to generate along each axis. If an integer is provided, the same number of points
is used for all variables. If a dictionary is provided, it should have variable names as keys and
the number of points as values.
custom_bounds : dict, optional
Custom bounds for the variables. If None, the default bounds from `self.variables` are used.
The dictionary should have variable names as keys and a list of two values [min, max] as values.
include_constants : bool, optional
If True, include constant values from `self.constants` in the output DataFrame.
Returns
-------
pd.DataFrame
A DataFrame containing the generated meshgrid of inputs. Each column corresponds to a variable,
and each row represents a point in the grid.
Raises
------
TypeError
If `custom_bounds` is not a dictionary.
ValueError
If `custom_bounds` are not valid or if any specified `custom_bounds` are outside the domain of `self.variables`.
Warns
-----
RuntimeWarning
If `custom_bounds` are clipped by the bounds of `self.variables`.
Notes
-----
The function generates a meshgrid of inputs based on the specified bounds. If `custom_bounds` are provided,
they are validated and clipped to ensure they lie within the domain of `self.variables`. The resulting meshgrid
is flattened and returned as a DataFrame. If `include_constants` is True, constant values from `self.constants`
are added to the DataFrame.
"""

if custom_bounds is None:
bounds = self.variables
else:
variable_bounds = pd.DataFrame(self.variables)

if not isinstance(custom_bounds, dict):
raise TypeError("`custom_bounds` must be a dict")

try:
validate_variable_bounds(custom_bounds)
except ValueError:
raise ValueError("specified `custom_bounds` not valid")

old_custom_bounds = deepcopy(custom_bounds)

custom_bounds = pd.DataFrame(custom_bounds)
custom_bounds = custom_bounds.clip(
variable_bounds.iloc[0], variable_bounds.iloc[1], axis=1
)
bounds = custom_bounds.to_dict()

for name, value in bounds.items():
if value[0] == value[1]:
raise ValueError(
f"specified `custom_bounds` for {name} is outside vocs domain"
)

if bounds != old_custom_bounds:
warnings.warn(
"custom bounds were clipped by vocs bounds", RuntimeWarning
)

for k in bounds.keys():
bounds[k] = [bounds[k][i] for i in range(2)]

grid_axes = []
for key, val in bounds.items():
if isinstance(n, int):
num_points = n
elif isinstance(n, dict) and key in n:
num_points = n[key]
else:
raise ValueError(
f"Number of points for variable '{key}' not specified."
)
grid_axes.append(np.linspace(val[0], val[1], num_points))

mesh = np.meshgrid(*grid_axes)
inputs = {key: mesh[i].flatten() for i, key in enumerate(bounds.keys())}

if include_constants and self.constants is not None:
for key, value in self.constants.items():
inputs[key] = np.full_like(next(iter(inputs.values())), value)

return pd.DataFrame(inputs)

def convert_dataframe_to_inputs(
self, data: pd.DataFrame, include_constants: bool = True
) -> pd.DataFrame:
Expand Down

0 comments on commit ba6acb9

Please sign in to comment.