Merge pull request #274 from xopt-org/grid_evaluate

Grid evaluate
xopt-org · Feb 10, 2025 · ba6acb9 · ba6acb9
2 parents 33d1a83 + f24ff70
commit ba6acb9
Show file tree

Hide file tree

Showing 9 changed files with 202 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -129,5 +129,4 @@ private_tests/*
 /.virtual_documents
 *.csv
 
-# don't include vscode settings
 /.vscode
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
   "numpy",
   "pydantic>=2.3",
   "pyyaml",
-  "botorch==0.12.0",
+  "botorch",
   "scipy>=1.10.1",
   "pandas",
   "ipywidgets",

diff --git a/xopt/base.py b/xopt/base.py
@@ -439,6 +439,32 @@ def random_evaluate(
         result = self.evaluate_data(random_inputs)
         return result
 
+    def grid_evaluate(
+        self,
+        n_samples: Union[int, Dict[str, int]],
+        custom_bounds: dict = None,
+    ):
+        """
+        Evaluate a meshgrid of points using the VOCS and add the results to the internal
+        DataFrame.
+
+        Parameters
+        ----------
+        n_samples : int or dict
+            The number of samples along each axis to evaluate on a meshgrid.
+            If an int is provided, the same number of samples is used for all axes.
+        custom_bounds : dict, optional
+            Dictionary of vocs-like ranges for mesh sampling.
+
+        Returns
+        -------
+        pd.DataFrame
+            The results of the evaluations added to the internal DataFrame.
+        """
+        grid_inputs = self.vocs.grid_inputs(n_samples, custom_bounds=custom_bounds)
+        result = self.evaluate_data(grid_inputs)
+        return result
+
     def yaml(self, **kwargs):
         """
         Serialize the Xopt configuration to a YAML string.

diff --git a/xopt/generators/bayesian/custom_botorch/heteroskedastic.py b/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
@@ -2,7 +2,6 @@
 
 import torch
 from botorch.models import SingleTaskGP
-from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL
 from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import Log, OutcomeTransform
@@ -21,6 +20,8 @@
 from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior
 from torch import Tensor
 
+MIN_INFERRED_NOISE_LEVEL = torch.tensor(1e-4)
+
 
 class XoptHeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP):
     r"""

diff --git a/xopt/numerical_optimizer.py b/xopt/numerical_optimizer.py
@@ -106,13 +106,20 @@ def optimize(self, function, bounds, n_candidates=1, **kwargs):
         if len(bounds) != 2:
             raise ValueError("bounds must have the shape [2, ndim]")
 
-        candidates, out = optimize_acqf(
+        # emperical testing showed that the max time is overrun slightly on the botorch side
+        # fix by slightly reducing the max time passed to this function
+        if self.max_time is not None:
+            max_time = self.max_time * 0.8 - 0.01
+        else:
+            max_time = None
+
+        candidates, _ = optimize_acqf(
             acq_function=function,
             bounds=bounds,
             q=n_candidates,
             raw_samples=self.n_restarts,
             num_restarts=self.n_restarts,
-            timeout_sec=self.max_time,
+            timeout_sec=max_time,
             options={"maxiter": self.max_iter},
             **kwargs,
         )

diff --git a/xopt/tests/generators/bayesian/test_model_constructor.py b/xopt/tests/generators/bayesian/test_model_constructor.py
@@ -8,7 +8,7 @@
 import torch
 import yaml
 from botorch import fit_gpytorch_mll
-from botorch.models import HeteroskedasticSingleTaskGP, SingleTaskGP
+from botorch.models import SingleTaskGP
 from botorch.models.transforms import Normalize, Standardize
 from gpytorch import ExactMarginalLogLikelihood
 from gpytorch.kernels import PeriodicKernel, PolynomialKernel, ScaleKernel
@@ -22,7 +22,6 @@
 )
 from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator
 from xopt.generators.bayesian.models.standard import StandardModelConstructor
-from xopt.generators.bayesian.utils import get_training_data
 from xopt.resources.testing import TEST_VOCS_BASE, TEST_VOCS_DATA
 from xopt.vocs import VOCS
 
@@ -467,43 +466,7 @@ def test_heteroskedastic(self):
         test_data["y1_var"] = test_data["y1"] * 0.1
         model = gp_constructor.build_model_from_vocs(test_vocs, test_data)
 
-        # validate against botorch HeteroskedasticSingleTaskGP
-        train_x, train_y, train_yvar = get_training_data(
-            test_vocs.variable_names, "y1", test_data
-        )
-        bounds = torch.vstack(
-            [
-                torch.tensor(test_vocs.variables[name])
-                for name in test_vocs.variable_names
-            ]
-        ).T
-
-        # create transform
-        input_transform = Normalize(len(test_vocs.variable_names), bounds=bounds)
-
-        bmodel = HeteroskedasticSingleTaskGP(
-            train_x,
-            train_y,
-            train_yvar,
-            input_transform=input_transform,
-            outcome_transform=Standardize(1),
-        )
-        mll = ExactMarginalLogLikelihood(bmodel.likelihood, bmodel)
-
-        # TODO: model fitting fails sometimes
-        fit_gpytorch_mll(mll)
-
         assert isinstance(model.models[0], XoptHeteroskedasticSingleTaskGP)
-        test_x = torch.rand(20, len(test_vocs.variable_names))
-        with torch.no_grad():
-            posterior = model.posterior(test_x.unsqueeze(1))
-            bposterior = bmodel.posterior(test_x.unsqueeze(1))
-        assert torch.allclose(
-            posterior.mean[..., 0].flatten(), bposterior.mean.flatten()
-        )
-        assert torch.allclose(
-            posterior.variance[..., 0].flatten(), bposterior.variance.flatten()
-        )
 
     def test_custom_noise_prior(self):
         test_data = deepcopy(TEST_VOCS_DATA)

diff --git a/xopt/tests/test_numerical_optimizer.py b/xopt/tests/test_numerical_optimizer.py
@@ -30,13 +30,13 @@ def test_lbfgs_optimizer(self):
                 assert candidates.shape == torch.Size([ncandidate, ndim])
 
         # test max time
-        max_time_optimizer = LBFGSOptimizer(max_time=0.01)
+        max_time_optimizer = LBFGSOptimizer(max_time=1.0)
         ndim = 1
         bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim)))
         for ncandidate in [1, 3]:
             start_time = time.time()
             candidates = max_time_optimizer.optimize(f, bounds, ncandidate)
-            assert time.time() - start_time < 0.1
+            assert time.time() - start_time < 1.0
             assert candidates.shape == torch.Size([ncandidate, ndim])
 
     def test_grid_optimizer(self):

diff --git a/xopt/tests/test_vocs.py b/xopt/tests/test_vocs.py
@@ -132,6 +132,64 @@ def test_properties(self):
         assert vocs.n_outputs == 2
         assert vocs.variable_names == ["x1"]
 
+    def test_grid_inputs(self):
+        # Define a sample VOCS object
+        vocs = VOCS(
+            variables={"x1": [0.0, 1.0], "x2": [0.0, 1.0]},
+            constraints={},
+            objectives={},
+            constants={"c1": 5.0},
+            observables=[],
+        )
+
+        # Test with default parameters
+        n = 5
+        grid = vocs.grid_inputs(n=n)
+        assert isinstance(grid, pd.DataFrame)
+        assert grid.shape == (n**2, 3)  # 2 variables + 1 constant
+        assert "x1" in grid.columns
+        assert "x2" in grid.columns
+        assert "c1" in grid.columns
+        assert np.all(grid["c1"] == 5.0)
+
+        # Test with custom bounds
+        custom_bounds = {"x1": [0.2, 0.8], "x2": [0.1, 0.9]}
+        grid = vocs.grid_inputs(n=n, custom_bounds=custom_bounds)
+        assert isinstance(grid, pd.DataFrame)
+        assert grid.shape == (n**2, 3)  # 2 variables + 1 constant
+        assert "x1" in grid.columns
+        assert "x2" in grid.columns
+        assert "c1" in grid.columns
+        assert np.all(grid["c1"] == 5.0)
+        assert np.all(grid["x1"] >= 0.2) and np.all(grid["x1"] <= 0.8)
+        assert np.all(grid["x2"] >= 0.1) and np.all(grid["x2"] <= 0.9)
+
+        # Test with invalid custom bounds
+        invalid_custom_bounds = {
+            "x1": [1.2, 0.8],  # Invalid bounds
+            "x2": [0.1, 0.9],
+        }
+        with pytest.raises(ValueError):
+            vocs.grid_inputs(n=n, custom_bounds=invalid_custom_bounds)
+
+        # Test with include_constants=False
+        grid = vocs.grid_inputs(n=n, include_constants=False)
+        assert isinstance(grid, pd.DataFrame)
+        assert grid.shape == (n**2, 2)  # 2 variables
+        assert "x1" in grid.columns
+        assert "x2" in grid.columns
+        assert "c1" not in grid.columns
+
+        # Test with different number of points for each variable
+        n_dict = {"x1": 3, "x2": 4}
+        grid = vocs.grid_inputs(n=n_dict)
+        assert isinstance(grid, pd.DataFrame)
+        assert grid.shape == (3 * 4, 3)  # 2 variables + 1 constant
+        assert "x1" in grid.columns
+        assert "x2" in grid.columns
+        assert "c1" in grid.columns
+        assert np.all(grid["c1"] == 5.0)
+
     def test_random_sampling_custom_bounds(self):
         vocs = deepcopy(TEST_VOCS_BASE)
 

diff --git a/xopt/vocs.py b/xopt/vocs.py
@@ -377,6 +377,109 @@ def random_inputs(
         else:
             return pd.DataFrame(inputs).to_dict("records")
 
+    def grid_inputs(
+        self,
+        n: Union[int, Dict[str, int]],
+        custom_bounds: dict = None,
+        include_constants: bool = True,
+    ) -> pd.DataFrame:
+        """
+        Generate a meshgrid of inputs.
+
+        Parameters
+        ----------
+        n : Union[int, Dict[str, int]]
+            Number of points to generate along each axis. If an integer is provided, the same number of points
+            is used for all variables. If a dictionary is provided, it should have variable names as keys and
+            the number of points as values.
+        custom_bounds : dict, optional
+            Custom bounds for the variables. If None, the default bounds from `self.variables` are used.
+            The dictionary should have variable names as keys and a list of two values [min, max] as values.
+        include_constants : bool, optional
+            If True, include constant values from `self.constants` in the output DataFrame.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame containing the generated meshgrid of inputs. Each column corresponds to a variable,
+            and each row represents a point in the grid.
+
+        Raises
+        ------
+        TypeError
+            If `custom_bounds` is not a dictionary.
+        ValueError
+            If `custom_bounds` are not valid or if any specified `custom_bounds` are outside the domain of `self.variables`.
+
+        Warns
+        -----
+        RuntimeWarning
+            If `custom_bounds` are clipped by the bounds of `self.variables`.
+
+        Notes
+        -----
+        The function generates a meshgrid of inputs based on the specified bounds. If `custom_bounds` are provided,
+        they are validated and clipped to ensure they lie within the domain of `self.variables`. The resulting meshgrid
+        is flattened and returned as a DataFrame. If `include_constants` is True, constant values from `self.constants`
+        are added to the DataFrame.
+        """
+
+        if custom_bounds is None:
+            bounds = self.variables
+        else:
+            variable_bounds = pd.DataFrame(self.variables)
+
+            if not isinstance(custom_bounds, dict):
+                raise TypeError("`custom_bounds` must be a dict")
+
+            try:
+                validate_variable_bounds(custom_bounds)
+            except ValueError:
+                raise ValueError("specified `custom_bounds` not valid")
+
+            old_custom_bounds = deepcopy(custom_bounds)
+
+            custom_bounds = pd.DataFrame(custom_bounds)
+            custom_bounds = custom_bounds.clip(
+                variable_bounds.iloc[0], variable_bounds.iloc[1], axis=1
+            )
+            bounds = custom_bounds.to_dict()
+
+            for name, value in bounds.items():
+                if value[0] == value[1]:
+                    raise ValueError(
+                        f"specified `custom_bounds` for {name} is outside vocs domain"
+                    )
+
+            if bounds != old_custom_bounds:
+                warnings.warn(
+                    "custom bounds were clipped by vocs bounds", RuntimeWarning
+                )
+
+            for k in bounds.keys():
+                bounds[k] = [bounds[k][i] for i in range(2)]
+
+        grid_axes = []
+        for key, val in bounds.items():
+            if isinstance(n, int):
+                num_points = n
+            elif isinstance(n, dict) and key in n:
+                num_points = n[key]
+            else:
+                raise ValueError(
+                    f"Number of points for variable '{key}' not specified."
+                )
+            grid_axes.append(np.linspace(val[0], val[1], num_points))
+
+        mesh = np.meshgrid(*grid_axes)
+        inputs = {key: mesh[i].flatten() for i, key in enumerate(bounds.keys())}
+
+        if include_constants and self.constants is not None:
+            for key, value in self.constants.items():
+                inputs[key] = np.full_like(next(iter(inputs.values())), value)
+
+        return pd.DataFrame(inputs)
+
     def convert_dataframe_to_inputs(
         self, data: pd.DataFrame, include_constants: bool = True
     ) -> pd.DataFrame: