Skip to content

Commit

Permalink
Refactor data out of constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
anassinator committed Jul 8, 2018
1 parent 2ad0d69 commit 1a192ba
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 90 deletions.
3 changes: 1 addition & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ name = "pypi"

[packages]
numpy = "==1.14.2"
six = "==1.11.0"
torch = "==0.4.0"

[dev-packages]
yapf = "==0.22.0"

[requires]
[requires]
10 changes: 1 addition & 9 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ After installation, :code:`import` and use as follows:
from gp.kernels import RBFKernel, WhiteNoiseKernel
k = RBFKernel() + WhiteNoiseKernel()
gp = GaussianProcess(k, X, Y)
gp = GaussianProcess(k)
gp.set_data(X, Y)
gp.fit()
where :code:`X` and :code:`Y` are your training data's inputs and outputs as
Expand Down
37 changes: 18 additions & 19 deletions examples/sin_x.ipynb

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions examples/sin_x_y.ipynb

Large diffs are not rendered by default.

171 changes: 116 additions & 55 deletions gp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,73 @@
import numpy as np


class GaussianProcess(torch.nn.Module):
class IGaussianProcess(torch.nn.Module):

"""Base Gaussian Process regressor."""

def __init__(self):
"""Constructs an IGaussianProcess."""
super(IGaussianProcess, self).__init__()
self._is_set = False

@property
def is_set(self):
"""Whether the training data is set or not."""
return self._is_set

def set_data(self, X, Y, normalize_y=True, reg=1e-5):
"""Set the training data.
Args:
X (Tensor): Training inputs.
Y (Tensor): Training outputs.
normalize_y (bool): Whether to normalize the outputs.
"""
raise NotImplementedError

def loss(self):
"""Computes the loss as the negative marginal log likelihood."""
raise NotImplementedError

def forward(self,
x,
return_mean=True,
return_var=False,
return_covar=False,
return_std=False,
**kwargs):
"""Computes the GP estimate.
Args:
x (Tensor): Inputs.
return_mean (bool): Whether to return the mean.
return_covar (bool): Whether to return the full covariance matrix.
return_var (bool): Whether to return the variance.
return_std (bool): Whether to return the standard deviation.
Returns:
Tensor or tuple of Tensors.
The order of the tuple if all outputs are requested is:
(mean, covariance, variance, standard deviation).
"""
raise NotImplementedError

def fit(self, tol=1e-6, reg_factor=10.0, max_reg=1.0, max_iter=1000):
"""Fits the model.
Args:
tol (float): Tolerance.
reg_factor (float): Regularization multiplicative factor.
max_reg (float): Maximum regularization term.
max_iter (int): Maximum number of iterations.
Returns:
Number of iterations.
"""
raise NotImplementedError


class GaussianProcess(IGaussianProcess):

"""Gaussian Process regressor.
Expand All @@ -15,44 +81,20 @@ class GaussianProcess(torch.nn.Module):
`MultiGaussianProcess` instead.
"""

def __init__(self,
kernel,
X,
Y,
sigma_n=None,
eps=1e-6,
reg=1e-5,
normalize_y=True):
def __init__(self, kernel, sigma_n=None, eps=1e-6):
"""Constructs a GaussianProcess.
Args:
kernel (Kernel): Kernel.
X (Tensor): Training inputs.
Y (Tensor): Training outputs.
sigma_n (Tensor): Noise standard deviation.
eps (float): Minimum bound for parameters.
reg (float): Regularization term to guarantee the kernel matrix is
positive definite.
normalize_y (bool): Whether to normalize the outputs.
"""
super(GaussianProcess, self).__init__()
self.kernel = kernel
self.sigma_n = torch.nn.Parameter(
torch.randn(1) if sigma_n is None else sigma_n)
self.reg = torch.nn.Parameter(torch.tensor(reg), requires_grad=False)

self._eps = eps
self._non_normalized_Y = Y

if normalize_y:
Y_mean = torch.mean(Y, dim=0)
Y_variance = torch.std(Y, dim=0)
Y = (Y - Y_mean) / Y_variance

self._X = X
self._Y = Y

self._update_k()
self._is_set = False

def _update_k(self):
"""Updates the K matrix."""
Expand All @@ -63,27 +105,40 @@ def _update_k(self):
var_n = (self.sigma_n**2).clamp(self._eps, 1e5)
K = self.kernel(X, X)
K = (K + K.t()).mul(0.5)
self._K = K + (self.reg + var_n) * torch.eye(X.shape[0])
self._K = K + (self._reg + var_n) * torch.eye(X.shape[0])

# Compute K's inverse and Cholesky factorization.
# We can't use potri() to compute the inverse since it's derivative
# isn't implemented yet.
self._L = torch.potrf(self._K)
self._K_inv = self._K.inverse()

def update(self, X, Y):
"""Update the training data.
def set_data(self, X, Y, normalize_y=True, reg=1e-5):
"""Set the training data.
Args:
X (Tensor): Training inputs.
Y (Tensor): Training outputs.
normalize_y (bool): Whether to normalize the outputs.
"""
self._non_normalized_Y = Y

if normalize_y:
Y_mean = torch.mean(Y, dim=0)
Y_variance = torch.std(Y, dim=0)
Y = (Y - Y_mean) / Y_variance

self._X = X
self._Y = Y
self._reg = reg
self._update_k()
self._is_set = True

def loss(self):
"""Computes the loss as the negative marginal log likelihood."""
if not self._is_set:
raise RuntimeError("You must call set_data() first")

Y = self._Y
self._update_k()
K_inv = self._K_inv
Expand Down Expand Up @@ -117,6 +172,9 @@ def forward(self,
The order of the tuple if all outputs are requested is:
(mean, covariance, variance, standard deviation).
"""
if not self._is_set:
raise RuntimeError("You must call set_data() first")

X = self._X
Y = self._Y
K_inv = self._K_inv
Expand Down Expand Up @@ -150,29 +208,24 @@ def forward(self,

return tuple(outputs)

def fit(self,
tol=1e-6,
reg=1e-5,
reg_factor=10.0,
max_reg=1.0,
max_iter=1000):
def fit(self, tol=1e-6, reg_factor=10.0, max_reg=1.0, max_iter=1000):
"""Fits the model.
Args:
tol (float): Tolerance.
reg (float): Regularization term to guarantee the kernel matrix is
positive definite.
reg_factor (float): Regularization multiplicative factor.
max_reg (float): Maximum regularization term.
max_iter (int): Maximum number of iterations.
Returns:
Number of iterations.
"""
if not self._is_set:
raise RuntimeError("You must call set_data() first")

opt = torch.optim.Adam(p for p in self.parameters() if p.requires_grad)
self.reg = torch.nn.Parameter(torch.tensor(reg), requires_grad=False)

while self.reg <= max_reg:
while self._reg <= max_reg:
try:
curr_loss = np.inf
n_iter = 0
Expand All @@ -193,13 +246,13 @@ def fit(self,
return n_iter
except RuntimeError:
# Increase regularization term until it succeeds.
self.reg *= reg_factor
self._reg *= reg_factor
continue

warnings.warn("exceeded maximum regularization: did not converge")


class MultiGaussianProcess(torch.nn.Module):
class MultiGaussianProcess(IGaussianProcess):

"""
Layer of abstraction to estimate vector-valued functions with a separate
Expand All @@ -209,7 +262,7 @@ class MultiGaussianProcess(torch.nn.Module):
a single set for all dimensions.
"""

def __init__(self, kernels, X, Y, *args, **kwargs):
def __init__(self, kernels, *args, **kwargs):
"""Constructs a MultiGaussianProcess.
Args:
Expand All @@ -221,31 +274,33 @@ def __init__(self, kernels, X, Y, *args, **kwargs):
"""
super(MultiGaussianProcess, self).__init__()
[
self.add_module(
"process_{}".format(i),
GaussianProcess(kernel, X, Y[:, i].reshape(-1, 1), *args,
**kwargs)) for i, kernel in enumerate(kernels)
self.add_module("process_{}".format(i),
GaussianProcess(kernel, *args, **kwargs))
for i, kernel in enumerate(kernels)
]
self._processes = [
getattr(self, "process_{}".format(i)) for i in range(len(kernels))
]
self._X = X
self._Y = Y

def update(self, X, Y):
"""Update the training data.
def set_data(self, X, Y, normalize_y=True):
"""Set the training data.
Args:
X (Tensor): Training inputs.
Y (Tensor): Training outputs.
normalize_y (bool): Whether to normalize the outputs.
"""
self._X = X
self._Y = Y
for i, gp in enumerate(self.processes):
gp.update(X, Y[:, i].reshape(-1, 1))
for i, gp in enumerate(self._processes):
gp.set_data(X, Y[:, i].reshape(-1, 1), normalize_y)
self._is_set = True

def loss(self):
"""Computes the loss as the negative marginal log likelihood."""
if not self._is_set:
raise RuntimeError("You must call set_data() first")

loss = torch.tensor(0.0)
for gp in self._processes:
loss += gp.loss()
Expand All @@ -261,8 +316,9 @@ def fit(self, *args, **kwargs):
Returns:
Total number of iterations.
"""
# Fitting them individually is faster than fitting them together as
# they are completely independent of each other.
if not self._is_set:
raise RuntimeError("You must call set_data() first")

iters = 0
for gp in self._processes:
iters += gp.fit(*args, **kwargs)
Expand All @@ -278,7 +334,12 @@ def forward(self, x, *args, **kwargs):
Returns:
Tensor or tuple of Tensors.
The order of the tuple if all outputs are requested is:
(mean, covariance, variance, standard deviation).
"""
if not self._is_set:
raise RuntimeError("You must call set_data() first")

outputs = np.array([gp(x, *args, **kwargs) for gp in self._processes])

if outputs.ndim > 1:
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def read(fname):
BASE_URL = "https://github.com/anassinator/gp"
INSTALL_REQUIRES = [
"numpy==1.14.2",
"six==1.11.0",
"torch==0.4.0",
]

Expand Down

0 comments on commit 1a192ba

Please sign in to comment.