Source code for dmgp.layers.activation

# Copyright (c) 2024 Wenyuan Zhao, Haoyuan Chen
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# GP activations for deep Gaussian processes
#
# @authors: Haoyuan Chen, Wenyuan Zhao
#
# ===============================================================================================


import torch
import torch.nn as nn
from dmgp.kernels.laplace_kernel import LaplaceProductKernel
from dmgp.utils.sparse_design.design_class import HyperbolicCrossDesign, SparseGridDesign
from dmgp.utils.operators.chol_inv import mk_chol_inv, tmk_chol_inv

__all__ = [
    'TMK',
    'AMK',
]


[docs] class TMK(nn.Module): r""" Implements tensor markov GP as an activation layer using sparse grid structure. .. math:: \begin{equation*} k\left( \mathbf{x}, X^{SG} \right)R^{-1} \end{equation*} :param in_features: Size of each input sample. :type in_features: int :param n_level: Level of sparse grid design. (Default: `2`.) :type n_level: int, optional :param input_lb: Input lower boundary. (Default: `-2`.) :type input_lb: float, optional :param input_ub: Input upper boundary. (Default: `2`.) :type input_ub: float, optional :param design_class: Base design class of sparse grid. (Default: `HyperbolicCrossDesign`.) :type design_class: class, dmgp.utils.sparse_design.design_class, optional :param kernel: Kernel function of deep GP. (Default: `LaplaceProductKernel(lengthscale=1.)`.) :type kernel: class, dmgp.kernels, optional """ def __init__(self, in_features, n_level=2, input_lb=-2, input_ub=2, kernel=LaplaceProductKernel(lengthscale=1.), design_class=HyperbolicCrossDesign, ): super().__init__() self.kernel = kernel if in_features == 1: # one-dimension TMGP dyadic_design = design_class(dyadic_sort=True, return_neighbors=True)(deg=n_level, input_lb=input_lb, input_ub=input_ub) chol_inv = mk_chol_inv(dyadic_design=dyadic_design, markov_kernel=kernel, upper=True) design_points = dyadic_design.points.reshape(-1, 1) else: # multi-dimension TMGP eta = int(in_features + n_level) sg = SparseGridDesign(in_features, eta, input_lb=input_lb, input_ub=input_ub, design_class=design_class).gen_sg( dyadic_sort=True, return_neighbors=True) chol_inv = tmk_chol_inv(sparse_grid_design=sg, tensor_markov_kernel=kernel, upper=True) design_points = sg.pts_set self.register_buffer('design_points', design_points) # [m,d] size tensor, sparse grid points X^{SG} of dyadic sort self.register_buffer('chol_inv', chol_inv) # [m,m] size tensor, inverse of Cholesky decomposition of kernel(X^{SG},X^{SG}) self.out_features = design_points.shape[0]
[docs] def forward(self, x): r""" Computes the tensor markov kernel activation of :math:`\mathbf x`. :param x: [N, C] size tensor, N is the batch size, C is the feature size of input :type x: torch.Tensor.float :return: [N, M] size tensor, kernel(input, sparse_grid) @ chol_inv """ out = self.kernel(x, self.design_points) # [N, C] size tensor out = out @ self.chol_inv # [N, M] size tensor return out
[docs] class AMK(nn.Module): r""" Implements additive markov GP as an activation layer using additive structure. .. math:: \begin{equation*} \left\{ k\left( x_i, X^{SG} \right)R^{-1} \right\}^{d}_{i=1} \end{equation*} :param in_features: Size of each input sample. :type in_features: int :param n_level: Level of induced points for approximating GP. (Default: `3`.) :type n_level: int, optional :param input_lb: Input lower boundary. (Default: `-2`.) :type input_lb: float, optional :param input_ub: Input upper boundary. (Default: `2`.) :type input_ub: float, optional :param design_class: Base design class of sparse grid. (Default: `HyperbolicCrossDesign`.) :type design_class: class, dmgp.utils.sparse_design.design_class, optional :param kernel: Kernel function of deep GP. (Default: `LaplaceProductKernel(lengthscale=1.)`.) :type kernel: class, dmgp.kernels, optional """ def __init__(self, in_features, n_level=3, input_lb=-2, input_ub=2, kernel=LaplaceProductKernel(lengthscale=1.), design_class=HyperbolicCrossDesign, ): super().__init__() self.kernel = kernel dyadic_design = design_class(dyadic_sort=True, return_neighbors=True)(deg=n_level, input_lb=input_lb, input_ub=input_ub) chol_inv = mk_chol_inv(dyadic_design=dyadic_design, markov_kernel=kernel, upper=True) # [m, m] size tensor design_points = dyadic_design.points.reshape(-1, 1) # [m, 1] size tensor self.register_buffer('design_points', design_points) # [m,d] size tensor, sparse grid points X^{SG} of dyadic sort self.register_buffer('chol_inv', chol_inv) # [m,m] size tensor, inverse of Cholesky decomposition of kernel(X^{SG},X^{SG}) self.out_features = design_points.shape[0] * in_features # m*d
[docs] def forward(self, x): r""" Computes the element-wise markov kernel activation of :math:`\mathbf x`. :param x: [N, C] size tensor, N is the batch size, C is the channels of input, L is the sequence length :type x: torch.Tensor.float :return: [N, C*L*M] size tensor, kernel(input, sparse_grid) @ chol_inv """ out = torch.flatten(x, start_dim=1).unsqueeze(dim=-1) # reshape x of size [N, C, L] --> size [N, C*L, 1] out = self.kernel(out, self.design_points) # [N, C*L, M] size tenosr out = torch.matmul(out, self.chol_inv) # [N, C*L, M] size tensor out = torch.flatten(out, start_dim=1) # [N, C*L*M] size tensor return out