|
10 | 10 | These models are often a good starting point and are further documented in the
|
11 | 11 | tutorials.
|
12 | 12 |
|
13 |
| -`SingleTaskGP` and `HeteroskedasticSingleTaskGP` are single-task exact GP models, |
14 |
| -differing in how they treat noise. They use relatively strong priors on the Kernel |
15 |
| -hyperparameters, which work best when covariates are normalized to the unit cube |
16 |
| -and outcomes are standardized (zero mean, unit variance). By default, these models |
17 |
| -use a `Standardize` outcome transform, which applies this standardization. However, |
18 |
| -they do not (yet) use an input transform by default. |
19 |
| -
|
20 |
| -These models all work in batch mode (each batch having its own hyperparameters). |
21 |
| -When the training observations include multiple outputs, these models use |
| 13 | +`SingleTaskGP` is a single-task exact GP model that uses relatively strong priors on |
| 14 | +the Kernel hyperparameters, which work best when covariates are normalized to the unit |
| 15 | +cube and outcomes are standardized (zero mean, unit variance). By default, this model |
| 16 | +uses a `Standardize` outcome transform, which applies this standardization. However, |
| 17 | +it does not (yet) use an input transform by default. |
| 18 | +
|
| 19 | +`SingleTaskGP` model works in batch mode (each batch having its own hyperparameters). |
| 20 | +When the training observations include multiple outputs, `SingleTaskGP` uses |
22 | 21 | batching to model outputs independently.
|
23 | 22 |
|
24 |
| -These models all support multiple outputs. However, as single-task models, |
25 |
| -`SingleTaskGP` and `HeteroskedasticSingleTaskGP` should be used only when the |
26 |
| -outputs are independent and all use the same training data. If outputs are |
27 |
| -independent and outputs have different training data, use the `ModelListGP`. |
28 |
| -When modeling correlations between outputs, use a multi-task model like `MultiTaskGP`. |
| 23 | +`SingleTaskGP` supports multiple outputs. However, as a single-task model, |
| 24 | +`SingleTaskGP` should be used only when the outputs are independent and all |
| 25 | +use the same training inputs. If outputs are independent but they have different |
| 26 | +training inputs, use the `ModelListGP`. When modeling correlations between outputs, |
| 27 | +use a multi-task model like `MultiTaskGP`. |
29 | 28 | """
|
30 | 29 |
|
31 | 30 | from __future__ import annotations
|
32 | 31 |
|
33 | 32 | import warnings
|
34 |
| -from typing import NoReturn |
35 | 33 |
|
36 | 34 | import torch
|
37 | 35 | from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
|
38 | 36 | from botorch.models.model import FantasizeMixin
|
39 | 37 | from botorch.models.transforms.input import InputTransform
|
40 |
| -from botorch.models.transforms.outcome import Log, OutcomeTransform, Standardize |
| 38 | +from botorch.models.transforms.outcome import OutcomeTransform, Standardize |
41 | 39 | from botorch.models.utils import validate_input_scaling
|
42 | 40 | from botorch.models.utils.gpytorch_modules import (
|
43 | 41 | get_covar_module_with_dim_scaled_prior,
|
44 | 42 | get_gaussian_likelihood_with_lognormal_prior,
|
45 |
| - MIN_INFERRED_NOISE_LEVEL, |
46 | 43 | )
|
47 | 44 | from botorch.utils.containers import BotorchContainer
|
48 | 45 | from botorch.utils.datasets import SupervisedDataset
|
49 | 46 | from botorch.utils.types import _DefaultType, DEFAULT
|
50 |
| -from gpytorch.constraints.constraints import GreaterThan |
51 | 47 | from gpytorch.distributions.multivariate_normal import MultivariateNormal
|
52 |
| -from gpytorch.likelihoods.gaussian_likelihood import ( |
53 |
| - _GaussianLikelihoodBase, |
54 |
| - FixedNoiseGaussianLikelihood, |
55 |
| - GaussianLikelihood, |
56 |
| -) |
| 48 | +from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood |
57 | 49 | from gpytorch.likelihoods.likelihood import Likelihood
|
58 |
| -from gpytorch.likelihoods.noise_models import HeteroskedasticNoise |
59 | 50 | from gpytorch.means.constant_mean import ConstantMean
|
60 | 51 | from gpytorch.means.mean import Mean
|
61 |
| -from gpytorch.mlls.noise_model_added_loss_term import NoiseModelAddedLossTerm |
62 | 52 | from gpytorch.models.exact_gp import ExactGP
|
63 | 53 | from gpytorch.module import Module
|
64 |
| -from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior |
65 | 54 | from torch import Tensor
|
66 | 55 |
|
67 | 56 |
|
@@ -253,107 +242,3 @@ def forward(self, x: Tensor) -> MultivariateNormal:
|
253 | 242 | mean_x = self.mean_module(x)
|
254 | 243 | covar_x = self.covar_module(x)
|
255 | 244 | return MultivariateNormal(mean_x, covar_x)
|
256 |
| - |
257 |
| - |
258 |
| -class HeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP): |
259 |
| - r"""A single-task exact GP model using a heteroskedastic noise model. |
260 |
| -
|
261 |
| - This model differs from `SingleTaskGP` with observed observation noise |
262 |
| - variances (`train_Yvar`) in that it can predict noise levels out of sample. |
263 |
| - This is achieved by internally wrapping another GP (a `SingleTaskGP`) to model |
264 |
| - the (log of) the observation noise. Noise levels must be provided to |
265 |
| - `HeteroskedasticSingleTaskGP` as `train_Yvar`. |
266 |
| -
|
267 |
| - Examples of cases in which noise levels are known include online |
268 |
| - experimentation and simulation optimization. |
269 |
| -
|
270 |
| - Example: |
271 |
| - >>> train_X = torch.rand(20, 2) |
272 |
| - >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) |
273 |
| - >>> se = torch.linalg.norm(train_X, dim=1, keepdim=True) |
274 |
| - >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) |
275 |
| - >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) |
276 |
| - """ |
277 |
| - |
278 |
| - def __init__( |
279 |
| - self, |
280 |
| - train_X: Tensor, |
281 |
| - train_Y: Tensor, |
282 |
| - train_Yvar: Tensor, |
283 |
| - outcome_transform: OutcomeTransform | None = None, |
284 |
| - input_transform: InputTransform | None = None, |
285 |
| - ) -> None: |
286 |
| - r""" |
287 |
| - Args: |
288 |
| - train_X: A `batch_shape x n x d` tensor of training features. |
289 |
| - train_Y: A `batch_shape x n x m` tensor of training observations. |
290 |
| - train_Yvar: A `batch_shape x n x m` tensor of observed measurement |
291 |
| - noise. |
292 |
| - outcome_transform: An outcome transform that is applied to the |
293 |
| - training data during instantiation and to the posterior during |
294 |
| - inference (that is, the `Posterior` obtained by calling |
295 |
| - `.posterior` on the model will be on the original scale). |
296 |
| - Note that the noise model internally log-transforms the |
297 |
| - variances, which will happen after this transform is applied. |
298 |
| - input_transform: An input transfrom that is applied in the model's |
299 |
| - forward pass. |
300 |
| - """ |
301 |
| - if outcome_transform is not None: |
302 |
| - train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) |
303 |
| - self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) |
304 |
| - validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) |
305 |
| - self._set_dimensions(train_X=train_X, train_Y=train_Y) |
306 |
| - noise_likelihood = GaussianLikelihood( |
307 |
| - noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), |
308 |
| - batch_shape=self._aug_batch_shape, |
309 |
| - noise_constraint=GreaterThan( |
310 |
| - MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0 |
311 |
| - ), |
312 |
| - ) |
313 |
| - # Likelihood will always get evaluated with transformed X, so we need to |
314 |
| - # transform the training data before constructing the noise model. |
315 |
| - with torch.no_grad(): |
316 |
| - transformed_X = self.transform_inputs( |
317 |
| - X=train_X, input_transform=input_transform |
318 |
| - ) |
319 |
| - noise_model = SingleTaskGP( |
320 |
| - train_X=transformed_X, |
321 |
| - train_Y=train_Yvar, |
322 |
| - likelihood=noise_likelihood, |
323 |
| - outcome_transform=Log(), |
324 |
| - ) |
325 |
| - likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) |
326 |
| - # This is hacky -- this class used to inherit from SingleTaskGP, but it |
327 |
| - # shouldn't so this is a quick fix to enable getting rid of that |
328 |
| - # inheritance |
329 |
| - SingleTaskGP.__init__( |
330 |
| - # pyre-fixme[6]: Incompatible parameter type |
331 |
| - self, |
332 |
| - train_X=train_X, |
333 |
| - train_Y=train_Y, |
334 |
| - likelihood=likelihood, |
335 |
| - outcome_transform=None, |
336 |
| - input_transform=input_transform, |
337 |
| - ) |
338 |
| - self.register_added_loss_term("noise_added_loss") |
339 |
| - self.update_added_loss_term( |
340 |
| - "noise_added_loss", NoiseModelAddedLossTerm(noise_model) |
341 |
| - ) |
342 |
| - if outcome_transform is not None: |
343 |
| - self.outcome_transform = outcome_transform |
344 |
| - self.to(train_X) |
345 |
| - |
346 |
| - # pyre-fixme[15]: Inconsistent override |
347 |
| - def condition_on_observations(self, *_, **__) -> NoReturn: |
348 |
| - raise NotImplementedError |
349 |
| - |
350 |
| - # pyre-fixme[15]: Inconsistent override |
351 |
| - def subset_output(self, idcs) -> NoReturn: |
352 |
| - raise NotImplementedError |
353 |
| - |
354 |
| - def forward(self, x: Tensor) -> MultivariateNormal: |
355 |
| - if self.training: |
356 |
| - x = self.transform_inputs(x) |
357 |
| - mean_x = self.mean_module(x) |
358 |
| - covar_x = self.covar_module(x) |
359 |
| - return MultivariateNormal(mean_x, covar_x) |
0 commit comments