Skip to content

Commit

Permalink
Merge pull request #354 from cornellius-gp/fix_variational_strategy
Browse files Browse the repository at this point in the history
Fix variational strategy, add diagonal correction
  • Loading branch information
gpleiss authored Nov 8, 2018
2 parents 64cc318 + 9752592 commit 490d780
Show file tree
Hide file tree
Showing 5 changed files with 535 additions and 45 deletions.
445 changes: 445 additions & 0 deletions examples/06_Scalable_GP_Classification_1D/SVGP_Classification_1D.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions examples/06_Scalable_GP_Classification_1D/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
:maxdepth: 1
:hidden:

SVGP_Classification_1D.ipynb
KISSGP_Classification_1D.ipynb
2 changes: 1 addition & 1 deletion gpytorch/beta_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class diagonal_correction(_feature_flag):
Add a diagonal correction to scalable inducing point methods
"""

pass
_state = True


class fast_pred_samples(_feature_flag):
Expand Down
20 changes: 14 additions & 6 deletions gpytorch/variational/variational_strategy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import math
import torch
from ..lazy import RootLazyTensor, MatmulLazyTensor
import gpytorch
from ..lazy import RootLazyTensor, PsdSumLazyTensor, DiagLazyTensor
from .. import beta_features
from ..module import Module
from ..distributions import MultivariateNormal
Expand Down Expand Up @@ -39,6 +41,8 @@ def __init__(self, model, inducing_points, variational_distribution, learn_induc
super(VariationalStrategy, self).__init__()
object.__setattr__(self, "model", model)

inducing_points = inducing_points.clone()

if inducing_points.dim() == 1:
inducing_points = inducing_points.unsqueeze(-1)

Expand Down Expand Up @@ -91,7 +95,7 @@ def forward(self, x):

test_mean = full_mean[n_induc:]
induc_mean = full_mean[:n_induc]
induc_induc_covar = full_covar[:n_induc, :n_induc]
induc_induc_covar = full_covar[:n_induc, :n_induc].add_jitter()
induc_data_covar = full_covar[:n_induc, n_induc:]
data_induc_covar = full_covar[n_induc:, :n_induc]
data_data_covar = full_covar[n_induc:, n_induc:]
Expand All @@ -116,16 +120,20 @@ def forward(self, x):

# Compute predictive covariance
predictive_covar = data_data_covar
if beta_features.fast_pred_var.on():
if not self.training and beta_features.fast_pred_var.on():
correction = RootLazyTensor(data_induc_covar.matmul(self.prior_root_inv)).mul(-1)
correction = correction + RootLazyTensor(data_induc_covar.matmul(self.variational_root))
predictive_covar = predictive_covar + correction
else:
induc_data_covar = induc_data_covar.evaluate()
inv_product = induc_induc_covar.inv_matmul(induc_data_covar)
factor = variational_dist.lazy_covariance_matrix.root_decomposition().matmul(inv_product)
right_factor = factor - inv_product
left_factor = (factor - induc_data_covar).transpose(-1, -2)
predictive_covar = predictive_covar + MatmulLazyTensor(left_factor, right_factor)
predictive_covar = RootLazyTensor(factor.transpose(-2, -1))

if gpytorch.beta_features.diagonal_correction.on():
fake_diagonal = (inv_product * induc_data_covar).sum(0)
real_diagonal = data_data_covar.diag()
diag_correction = DiagLazyTensor((real_diagonal - fake_diagonal).clamp(0, math.inf))
predictive_covar = PsdSumLazyTensor(predictive_covar, diag_correction)

return MultivariateNormal(predictive_mean, predictive_covar)
112 changes: 74 additions & 38 deletions test/examples/test_svgp_gp_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,52 @@
from __future__ import print_function
from __future__ import unicode_literals

from math import exp, pi
from math import pi

import os
import random
import torch
import unittest
import gpytorch
from torch import optim
from gpytorch.kernels import RBFKernel, ScaleKernel
from gpytorch.likelihoods import BernoulliLikelihood
from gpytorch.means import ConstantMean
from gpytorch.priors import SmoothedBoxPrior
from gpytorch.distributions import MultivariateNormal


train_x = torch.linspace(-1, 1, 10).unsqueeze(-1)
train_y = torch.sign(torch.cos(train_x * (2 * pi))).squeeze()


class GPClassificationModel(gpytorch.models.AbstractVariationalGP):
def __init__(self):
variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(16)
variational_strategy = gpytorch.variational.VariationalStrategy(
self, torch.randn(16, 1), variational_distribution, learn_inducing_locations=True
)

super(GPClassificationModel, self).__init__(variational_strategy)
self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-5, 5))
self.covar_module = ScaleKernel(
RBFKernel(log_lengthscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1, log_transform=True)),
log_outputscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1, log_transform=True),
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy


def train_data(cuda=False):
train_x = torch.linspace(0, 1, 260)
train_y = torch.cos(train_x * (2 * pi))
if cuda:
return train_x.cuda(), train_y.cuda()
else:
return train_x, train_y


class SVGPRegressionModel(AbstractVariationalGP):
def __init__(self, inducing_points):
variational_distribution = CholeskyVariationalDistribution(inducing_points.size(-1))
variational_strategy = VariationalStrategy(self,
inducing_points,
variational_distribution,
learn_inducing_locations=True)
super(SVGPRegressionModel, self).__init__(variational_strategy)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(
gpytorch.kernels.RBFKernel(
log_lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1., sigma=0.1, log_transform=True)
)
)
self.covar_module.base_kernel.initialize(log_lengthscale=-1)

def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
latent_pred = MultivariateNormal(mean_x, covar_x)
latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
return latent_pred


class TestSVGPClassification(unittest.TestCase):
class TestSVGPRegression(unittest.TestCase):
def setUp(self):
if os.getenv("UNLOCK_SEED") is None or os.getenv("UNLOCK_SEED").lower() == "false":
self.rng_state = torch.get_rng_state()
Expand All @@ -57,39 +61,71 @@ def tearDown(self):
if hasattr(self, "rng_state"):
torch.set_rng_state(self.rng_state)

def test_kissgp_classification_error(self):
model = GPClassificationModel()
likelihood = BernoulliLikelihood()
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.numel())
def test_regression_error(self):
train_x, train_y = train_data()
likelihood = GaussianLikelihood()
model = SVGPRegressionModel(train_x[:25])
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y))

# Find optimal model hyperparameters
model.train()
likelihood.train()

optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = optim.Adam([{'params': model.parameters()}, {'params': likelihood.parameters()}], lr=0.1)
optimizer.n_iter = 0
for _ in range(75):
for _ in range(200):
optimizer.zero_grad()
output = model(train_x)
loss = -mll(output, train_y)
loss.backward()
optimizer.n_iter += 1
optimizer.step()

for _, param in model.named_parameters():
for param in model.parameters():
self.assertTrue(param.grad is not None)
self.assertGreater(param.grad.norm().item(), 0)

for param in likelihood.parameters():
self.assertTrue(param.grad is not None)
self.assertGreater(param.grad.norm().item(), 0)

# Set back to eval mode
model.eval()
likelihood.eval()
test_preds = likelihood(model(train_x)).mean.ge(0.5).float().mul(2).sub(1).squeeze()
test_preds = likelihood(model(train_x)).mean.squeeze()
mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
assert mean_abs_error.item() < 1e-1

def test_regression_error_cuda(self):
if torch.cuda.is_available():
train_x, train_y = train_data(cuda=True)
likelihood = GaussianLikelihood().cuda()
model = SVGPRegressionModel(train_x[:25]).cuda()
mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y))

# Find optimal model hyperparameters
model.train()
optimizer = optim.Adam([{'params': model.parameters()}, {'params': likelihood.parameters()}], lr=0.1)
optimizer.n_iter = 0
for _ in range(200):
optimizer.zero_grad()
output = model(train_x)
loss = -mll(output, train_y)
loss.backward()
optimizer.n_iter += 1
optimizer.step()

for param in model.parameters():
self.assertTrue(param.grad is not None)
self.assertGreater(param.grad.norm().item(), 0)
for param in likelihood.parameters():
self.assertTrue(param.grad is not None)
self.assertGreater(param.grad.norm().item(), 0)
optimizer.step()

# Set back to eval mode
model.eval()
test_preds = likelihood(model(train_x)).mean.squeeze()
mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
self.assertLess(mean_abs_error.item(), 1e-1)


if __name__ == "__main__":
Expand Down

0 comments on commit 490d780

Please sign in to comment.