Module TeachMyAgent.teachers.utils.gaussian_torch_distribution
Expand source code
# Taken from https://github.com/psclklnk/spdl
# Copy of the license at TeachMyAgent/teachers/LICENSES/SPDL
import scipy.linalg as scpla
from abc import ABC
import numpy as np
import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
from TeachMyAgent.teachers.utils.torch import get_weights, set_weights, to_float_tensor
import copy
class AbstractDistribution(object):
"""
Interface for Distributions to represent a generic probability distribution.
Probability distributions are often used by black box optimization
algorithms in order to perform exploration in parameter space. In
literature, they are also known as high level policies.
"""
def sample(self):
"""
Draw a sample from the distribution.
Returns:
A random vector sampled from the distribution.
"""
raise NotImplementedError
def log_pdf(self, x):
"""
Compute the logarithm of the probability density function in the
specified point
Args:
x (np.ndarray): the point where the log pdf is calculated
Returns:
The value of the log pdf in the specified point.
"""
raise NotImplementedError
def __call__(self, x):
"""
Compute the probability density function in the specified point
Args:
x (np.ndarray): the point where the pdf is calculated
Returns:
The value of the pdf in the specified point.
"""
raise np.exp(self.log_pdf(x))
class Distribution(AbstractDistribution):
"""
Interface for Distributions to represent a generic probability distribution.
Probability distributions are often used by black box optimization
algorithms in order to perform exploration in parameter space. In
literature, they are also known as high level policies.
"""
def mle(self, theta, weights=None):
"""
Compute the (weighted) maximum likelihood estimate of the points,
and update the distribution accordingly.
Args:
theta (np.ndarray): a set of points, every row is a sample
weights (np.ndarray, None): a vector of weights. If specified
the weighted maximum likelihood
estimate is computed instead of the
plain maximum likelihood. The number of
elements of this vector must be equal
to the number of rows of the theta
matrix.
"""
raise NotImplementedError
def diff_log(self, theta):
"""
Compute the derivative of the gradient of the probability denstity
function in the specified point.
Args:
theta (np.ndarray): the point where the gradient of the log pdf is calculated
Returns:
The gradient of the log pdf in the specified point.
"""
raise NotImplementedError
def diff(self, theta):
"""
Compute the derivative of the probability density function, in the
specified point. Normally it is computed w.r.t. the
derivative of the logarithm of the probability density function,
exploiting the likelihood ratio trick, i.e.:
.. math::
\\nabla_{\\rho}p(\\theta)=p(\\theta)\\nabla_{\\rho}\\log p(\\theta)
Args:
theta (np.ndarray): the point where the gradient of the pdf is
calculated.
Returns:
The gradient of the pdf in the specified point.
"""
return self(theta) * self.diff_log(theta)
def get_parameters(self):
"""
Getter.
Returns:
The current distribution parameters.
"""
raise NotImplementedError
def set_parameters(self, rho):
"""
Setter.
Args:
rho (np.ndarray): the vector of the new parameters to be used by
the distribution
"""
raise NotImplementedError
@property
def parameters_size(self):
"""
Property.
Returns:
The size of the distribution parameters.
"""
raise NotImplementedError
class TorchDistribution(AbstractDistribution, ABC):
"""
Interface for a generic PyTorch distribution.
A PyTorch distribution is a distribution implemented using PyTorch.
Functions ending with '_t' use tensors as input, and also as output when
required.
"""
def __init__(self, use_cuda):
"""
Constructor.
Args:
use_cuda (bool): whether to use cuda or not.
"""
self._use_cuda = use_cuda
def entropy(self):
"""
Compute the entropy of the policy.
Returns:
The value of the entropy of the policy.
"""
return self.entropy_t().detach().cpu().numpy()
def entropy_t(self):
"""
Compute the entropy of the policy.
Returns:
The tensor value of the entropy of the policy.
"""
raise NotImplementedError
def mean(self):
"""
Compute the mean of the policy.
Returns:
The value of the mean of the policy.
"""
return self.mean_t().detach().cpu().numpy()
def mean_t(self):
"""
Compute the mean of the policy.
Returns:
The tensor value of the mean of the policy.
"""
raise NotImplementedError
def log_pdf(self, x):
x = to_float_tensor(x, self._use_cuda)
return self.log_pdf_t(x).detach().cpu().numpy()
def log_pdf_t(self, x):
"""
Compute the logarithm of the probability density function in the
specified point
Args:
x (torch.Tensor): the point where the log pdf is calculated
Returns:
The value of the log pdf in the specified point.
"""
raise NotImplementedError
def set_weights(self, weights):
"""
Setter.
Args:
weights (np.ndarray): the vector of the new weights to be used by the distribution
"""
raise NotImplementedError
def get_weights(self):
"""
Getter.
Returns:
The current policy weights.
"""
raise NotImplementedError
def parameters(self):
"""
Returns the trainable distribution parameters, as expected by torch optimizers.
Returns:
List of parameters to be optimized.
"""
raise NotImplementedError
def reset(self):
pass
@property
def use_cuda(self):
"""
True if the policy is using cuda_tensors.
"""
return self._use_cuda
class GaussianTorchDistribution(TorchDistribution):
def __init__(self, mu, chol_flat, use_cuda):
super().__init__(use_cuda)
self._dim = mu.shape[0]
self._mu = nn.Parameter(torch.as_tensor(mu, dtype=torch.float32), requires_grad=True)
self._chol_flat = nn.Parameter(torch.as_tensor(chol_flat, dtype=torch.float32), requires_grad=True)
self.distribution_t = MultivariateNormal(self._mu, scale_tril=self.to_tril_matrix(self._chol_flat, self._dim))
def __copy__(self):
return GaussianTorchDistribution(self._mu, self._chol_flat, self.use_cuda)
def __deepcopy__(self, memodict=None):
return GaussianTorchDistribution(copy.deepcopy(self._mu), copy.deepcopy(self._chol_flat), self.use_cuda)
@staticmethod
def to_tril_matrix(chol_flat, dim):
if isinstance(chol_flat, np.ndarray):
chol = np.zeros((dim, dim))
exp_fun = np.exp
else:
chol = torch.zeros((dim, dim))
exp_fun = torch.exp
d1, d2 = np.diag_indices(dim)
chol[d1, d2] += exp_fun(chol_flat[0: dim])
ld1, ld2 = np.tril_indices(dim, k=-1)
chol[ld1, ld2] += chol_flat[dim:]
return chol
@staticmethod
def flatten_matrix(mat, tril=False):
if not tril:
mat = scpla.cholesky(mat, lower=True)
dim = mat.shape[0]
d1, d2 = np.diag_indices(dim)
ld1, ld2 = np.tril_indices(dim, k=-1)
return np.concatenate((np.log(mat[d1, d2]), mat[ld1, ld2]))
def entropy_t(self):
return self.distribution_t.entropy()
def mean_t(self):
return self.distribution_t.mean
def log_pdf_t(self, x):
return self.distribution_t.log_prob(x)
def sample(self):
return self.distribution_t.rsample()
def covariance_matrix(self):
return self.distribution_t.covariance_matrix.detach().numpy()
def set_weights(self, weights):
set_weights([self._mu], weights[0:self._dim], self._use_cuda)
set_weights([self._chol_flat], weights[self._dim:], self._use_cuda)
# This is important - otherwise the changes will not be reflected!
self.distribution_t = MultivariateNormal(self._mu, scale_tril=self.to_tril_matrix(self._chol_flat, self._dim))
def get_weights(self):
mu_weights = get_weights([self._mu])
chol_flat_weights = get_weights([self._chol_flat])
return np.concatenate([mu_weights, chol_flat_weights])
def parameters(self):
return [self._mu, self._chol_flat]
Classes
class AbstractDistribution
-
Interface for Distributions to represent a generic probability distribution. Probability distributions are often used by black box optimization algorithms in order to perform exploration in parameter space. In literature, they are also known as high level policies.
Expand source code
class AbstractDistribution(object): """ Interface for Distributions to represent a generic probability distribution. Probability distributions are often used by black box optimization algorithms in order to perform exploration in parameter space. In literature, they are also known as high level policies. """ def sample(self): """ Draw a sample from the distribution. Returns: A random vector sampled from the distribution. """ raise NotImplementedError def log_pdf(self, x): """ Compute the logarithm of the probability density function in the specified point Args: x (np.ndarray): the point where the log pdf is calculated Returns: The value of the log pdf in the specified point. """ raise NotImplementedError def __call__(self, x): """ Compute the probability density function in the specified point Args: x (np.ndarray): the point where the pdf is calculated Returns: The value of the pdf in the specified point. """ raise np.exp(self.log_pdf(x))
Subclasses
Methods
def log_pdf(self, x)
-
Compute the logarithm of the probability density function in the specified point
Args
x
:np.ndarray
- the point where the log pdf is calculated
Returns
The value of the log pdf in the specified point.
Expand source code
def log_pdf(self, x): """ Compute the logarithm of the probability density function in the specified point Args: x (np.ndarray): the point where the log pdf is calculated Returns: The value of the log pdf in the specified point. """ raise NotImplementedError
def sample(self)
-
Draw a sample from the distribution.
Returns
A random vector sampled from the distribution.
Expand source code
def sample(self): """ Draw a sample from the distribution. Returns: A random vector sampled from the distribution. """ raise NotImplementedError
class Distribution
-
Interface for Distributions to represent a generic probability distribution. Probability distributions are often used by black box optimization algorithms in order to perform exploration in parameter space. In literature, they are also known as high level policies.
Expand source code
class Distribution(AbstractDistribution): """ Interface for Distributions to represent a generic probability distribution. Probability distributions are often used by black box optimization algorithms in order to perform exploration in parameter space. In literature, they are also known as high level policies. """ def mle(self, theta, weights=None): """ Compute the (weighted) maximum likelihood estimate of the points, and update the distribution accordingly. Args: theta (np.ndarray): a set of points, every row is a sample weights (np.ndarray, None): a vector of weights. If specified the weighted maximum likelihood estimate is computed instead of the plain maximum likelihood. The number of elements of this vector must be equal to the number of rows of the theta matrix. """ raise NotImplementedError def diff_log(self, theta): """ Compute the derivative of the gradient of the probability denstity function in the specified point. Args: theta (np.ndarray): the point where the gradient of the log pdf is calculated Returns: The gradient of the log pdf in the specified point. """ raise NotImplementedError def diff(self, theta): """ Compute the derivative of the probability density function, in the specified point. Normally it is computed w.r.t. the derivative of the logarithm of the probability density function, exploiting the likelihood ratio trick, i.e.: .. math:: \\nabla_{\\rho}p(\\theta)=p(\\theta)\\nabla_{\\rho}\\log p(\\theta) Args: theta (np.ndarray): the point where the gradient of the pdf is calculated. Returns: The gradient of the pdf in the specified point. """ return self(theta) * self.diff_log(theta) def get_parameters(self): """ Getter. Returns: The current distribution parameters. """ raise NotImplementedError def set_parameters(self, rho): """ Setter. Args: rho (np.ndarray): the vector of the new parameters to be used by the distribution """ raise NotImplementedError @property def parameters_size(self): """ Property. Returns: The size of the distribution parameters. """ raise NotImplementedError
Ancestors
Instance variables
var parameters_size
-
Property.
Returns
The size of the distribution parameters.
Expand source code
@property def parameters_size(self): """ Property. Returns: The size of the distribution parameters. """ raise NotImplementedError
Methods
def diff(self, theta)
-
Compute the derivative of the probability density function, in the specified point. Normally it is computed w.r.t. the derivative of the logarithm of the probability density function, exploiting the likelihood ratio trick, i.e.:
[ \nabla_{\rho}p(\theta)=p(\theta)\nabla_{\rho}\log p(\theta) ]
Args
theta
:np.ndarray
- the point where the gradient of the pdf is
calculated.
Returns
The gradient of the pdf in the specified point.
Expand source code
def diff(self, theta): """ Compute the derivative of the probability density function, in the specified point. Normally it is computed w.r.t. the derivative of the logarithm of the probability density function, exploiting the likelihood ratio trick, i.e.: .. math:: \\nabla_{\\rho}p(\\theta)=p(\\theta)\\nabla_{\\rho}\\log p(\\theta) Args: theta (np.ndarray): the point where the gradient of the pdf is calculated. Returns: The gradient of the pdf in the specified point. """ return self(theta) * self.diff_log(theta)
def diff_log(self, theta)
-
Compute the derivative of the gradient of the probability denstity function in the specified point.
Args
theta
:np.ndarray
- the point where the gradient of the log pdf is calculated
Returns
The gradient of the log pdf in the specified point.
Expand source code
def diff_log(self, theta): """ Compute the derivative of the gradient of the probability denstity function in the specified point. Args: theta (np.ndarray): the point where the gradient of the log pdf is calculated Returns: The gradient of the log pdf in the specified point. """ raise NotImplementedError
def get_parameters(self)
-
Getter.
Returns
The current distribution parameters.
Expand source code
def get_parameters(self): """ Getter. Returns: The current distribution parameters. """ raise NotImplementedError
def mle(self, theta, weights=None)
-
Compute the (weighted) maximum likelihood estimate of the points, and update the distribution accordingly.
Args
theta
:np.ndarray
- a set of points, every row is a sample
weights
:np.ndarray, None
- a vector of weights. If specified the weighted maximum likelihood estimate is computed instead of the plain maximum likelihood. The number of elements of this vector must be equal to the number of rows of the theta matrix.
Expand source code
def mle(self, theta, weights=None): """ Compute the (weighted) maximum likelihood estimate of the points, and update the distribution accordingly. Args: theta (np.ndarray): a set of points, every row is a sample weights (np.ndarray, None): a vector of weights. If specified the weighted maximum likelihood estimate is computed instead of the plain maximum likelihood. The number of elements of this vector must be equal to the number of rows of the theta matrix. """ raise NotImplementedError
def set_parameters(self, rho)
-
Setter.
Args
rho
:np.ndarray
- the vector of the new parameters to be used by the distribution
Expand source code
def set_parameters(self, rho): """ Setter. Args: rho (np.ndarray): the vector of the new parameters to be used by the distribution """ raise NotImplementedError
Inherited members
class GaussianTorchDistribution (mu, chol_flat, use_cuda)
-
Interface for a generic PyTorch distribution. A PyTorch distribution is a distribution implemented using PyTorch. Functions ending with '_t' use tensors as input, and also as output when required.
Constructor.
Args
use_cuda
:bool
- whether to use cuda or not.
Expand source code
class GaussianTorchDistribution(TorchDistribution): def __init__(self, mu, chol_flat, use_cuda): super().__init__(use_cuda) self._dim = mu.shape[0] self._mu = nn.Parameter(torch.as_tensor(mu, dtype=torch.float32), requires_grad=True) self._chol_flat = nn.Parameter(torch.as_tensor(chol_flat, dtype=torch.float32), requires_grad=True) self.distribution_t = MultivariateNormal(self._mu, scale_tril=self.to_tril_matrix(self._chol_flat, self._dim)) def __copy__(self): return GaussianTorchDistribution(self._mu, self._chol_flat, self.use_cuda) def __deepcopy__(self, memodict=None): return GaussianTorchDistribution(copy.deepcopy(self._mu), copy.deepcopy(self._chol_flat), self.use_cuda) @staticmethod def to_tril_matrix(chol_flat, dim): if isinstance(chol_flat, np.ndarray): chol = np.zeros((dim, dim)) exp_fun = np.exp else: chol = torch.zeros((dim, dim)) exp_fun = torch.exp d1, d2 = np.diag_indices(dim) chol[d1, d2] += exp_fun(chol_flat[0: dim]) ld1, ld2 = np.tril_indices(dim, k=-1) chol[ld1, ld2] += chol_flat[dim:] return chol @staticmethod def flatten_matrix(mat, tril=False): if not tril: mat = scpla.cholesky(mat, lower=True) dim = mat.shape[0] d1, d2 = np.diag_indices(dim) ld1, ld2 = np.tril_indices(dim, k=-1) return np.concatenate((np.log(mat[d1, d2]), mat[ld1, ld2])) def entropy_t(self): return self.distribution_t.entropy() def mean_t(self): return self.distribution_t.mean def log_pdf_t(self, x): return self.distribution_t.log_prob(x) def sample(self): return self.distribution_t.rsample() def covariance_matrix(self): return self.distribution_t.covariance_matrix.detach().numpy() def set_weights(self, weights): set_weights([self._mu], weights[0:self._dim], self._use_cuda) set_weights([self._chol_flat], weights[self._dim:], self._use_cuda) # This is important - otherwise the changes will not be reflected! self.distribution_t = MultivariateNormal(self._mu, scale_tril=self.to_tril_matrix(self._chol_flat, self._dim)) def get_weights(self): mu_weights = get_weights([self._mu]) chol_flat_weights = get_weights([self._chol_flat]) return np.concatenate([mu_weights, chol_flat_weights]) def parameters(self): return [self._mu, self._chol_flat]
Ancestors
Static methods
def flatten_matrix(mat, tril=False)
-
Expand source code
@staticmethod def flatten_matrix(mat, tril=False): if not tril: mat = scpla.cholesky(mat, lower=True) dim = mat.shape[0] d1, d2 = np.diag_indices(dim) ld1, ld2 = np.tril_indices(dim, k=-1) return np.concatenate((np.log(mat[d1, d2]), mat[ld1, ld2]))
def to_tril_matrix(chol_flat, dim)
-
Expand source code
@staticmethod def to_tril_matrix(chol_flat, dim): if isinstance(chol_flat, np.ndarray): chol = np.zeros((dim, dim)) exp_fun = np.exp else: chol = torch.zeros((dim, dim)) exp_fun = torch.exp d1, d2 = np.diag_indices(dim) chol[d1, d2] += exp_fun(chol_flat[0: dim]) ld1, ld2 = np.tril_indices(dim, k=-1) chol[ld1, ld2] += chol_flat[dim:] return chol
Methods
def covariance_matrix(self)
-
Expand source code
def covariance_matrix(self): return self.distribution_t.covariance_matrix.detach().numpy()
Inherited members
class TorchDistribution (use_cuda)
-
Interface for a generic PyTorch distribution. A PyTorch distribution is a distribution implemented using PyTorch. Functions ending with '_t' use tensors as input, and also as output when required.
Constructor.
Args
use_cuda
:bool
- whether to use cuda or not.
Expand source code
class TorchDistribution(AbstractDistribution, ABC): """ Interface for a generic PyTorch distribution. A PyTorch distribution is a distribution implemented using PyTorch. Functions ending with '_t' use tensors as input, and also as output when required. """ def __init__(self, use_cuda): """ Constructor. Args: use_cuda (bool): whether to use cuda or not. """ self._use_cuda = use_cuda def entropy(self): """ Compute the entropy of the policy. Returns: The value of the entropy of the policy. """ return self.entropy_t().detach().cpu().numpy() def entropy_t(self): """ Compute the entropy of the policy. Returns: The tensor value of the entropy of the policy. """ raise NotImplementedError def mean(self): """ Compute the mean of the policy. Returns: The value of the mean of the policy. """ return self.mean_t().detach().cpu().numpy() def mean_t(self): """ Compute the mean of the policy. Returns: The tensor value of the mean of the policy. """ raise NotImplementedError def log_pdf(self, x): x = to_float_tensor(x, self._use_cuda) return self.log_pdf_t(x).detach().cpu().numpy() def log_pdf_t(self, x): """ Compute the logarithm of the probability density function in the specified point Args: x (torch.Tensor): the point where the log pdf is calculated Returns: The value of the log pdf in the specified point. """ raise NotImplementedError def set_weights(self, weights): """ Setter. Args: weights (np.ndarray): the vector of the new weights to be used by the distribution """ raise NotImplementedError def get_weights(self): """ Getter. Returns: The current policy weights. """ raise NotImplementedError def parameters(self): """ Returns the trainable distribution parameters, as expected by torch optimizers. Returns: List of parameters to be optimized. """ raise NotImplementedError def reset(self): pass @property def use_cuda(self): """ True if the policy is using cuda_tensors. """ return self._use_cuda
Ancestors
- AbstractDistribution
- abc.ABC
Subclasses
Instance variables
var use_cuda
-
True if the policy is using cuda_tensors.
Expand source code
@property def use_cuda(self): """ True if the policy is using cuda_tensors. """ return self._use_cuda
Methods
def entropy(self)
-
Compute the entropy of the policy.
Returns
The value of the entropy of the policy.
Expand source code
def entropy(self): """ Compute the entropy of the policy. Returns: The value of the entropy of the policy. """ return self.entropy_t().detach().cpu().numpy()
def entropy_t(self)
-
Compute the entropy of the policy.
Returns
The tensor value of the entropy of the policy.
Expand source code
def entropy_t(self): """ Compute the entropy of the policy. Returns: The tensor value of the entropy of the policy. """ raise NotImplementedError
def get_weights(self)
-
Getter.
Returns
The current policy weights.
Expand source code
def get_weights(self): """ Getter. Returns: The current policy weights. """ raise NotImplementedError
def log_pdf_t(self, x)
-
Compute the logarithm of the probability density function in the specified point
Args
x
:torch.Tensor
- the point where the log pdf is calculated
Returns
The value of the log pdf in the specified point.
Expand source code
def log_pdf_t(self, x): """ Compute the logarithm of the probability density function in the specified point Args: x (torch.Tensor): the point where the log pdf is calculated Returns: The value of the log pdf in the specified point. """ raise NotImplementedError
def mean(self)
-
Compute the mean of the policy.
Returns
The value of the mean of the policy.
Expand source code
def mean(self): """ Compute the mean of the policy. Returns: The value of the mean of the policy. """ return self.mean_t().detach().cpu().numpy()
def mean_t(self)
-
Compute the mean of the policy.
Returns
The tensor value of the mean of the policy.
Expand source code
def mean_t(self): """ Compute the mean of the policy. Returns: The tensor value of the mean of the policy. """ raise NotImplementedError
def parameters(self)
-
Returns the trainable distribution parameters, as expected by torch optimizers.
Returns
List of parameters to be optimized.
Expand source code
def parameters(self): """ Returns the trainable distribution parameters, as expected by torch optimizers. Returns: List of parameters to be optimized. """ raise NotImplementedError
def reset(self)
-
Expand source code
def reset(self): pass
def set_weights(self, weights)
-
Setter.
Args
weights
:np.ndarray
- the vector of the new weights to be used by the distribution
Expand source code
def set_weights(self, weights): """ Setter. Args: weights (np.ndarray): the vector of the new weights to be used by the distribution """ raise NotImplementedError
Inherited members