mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Addresses #15738, using fritzo's suggestion. This adds a `torch._sample_dirichlet` method in `Distributions.cpp` and `Distributions.cu`. - For CPU, this leads to no perf hit since all we do is to promote the `alpha` to double when getting the gamma samples (the gamma sampler anyways uses `accscalar_t`(double for CPU)) and cast it back to float32 on return. - I have added an analogous method for CUDA as well, but the default sampler for CUDA uses scalar_t for efficiency, so I have kept it as that. With this, I do not see the bias towards 1 as reported in #15738 with `float32`, but there is a spurious mode at 0.5, as would be expected. Users would need to explicitly use `float64` for GPU to not see the spurious mode at 0.5. (EDIT: see note below, it appears that the bias issue is still there for certain builds). Added some tests and checked that there is no perf regression. My experience with C++ is very limited, so apologies in advance if I missed something basic. cc. ailzhang, fritzo, fmassa Pull Request resolved: https://github.com/pytorch/pytorch/pull/17488 Differential Revision: D14410301 Pulled By: ezyang fbshipit-source-id: 62b2f694b4642685eab06db96d74ce28e05c3992
96 lines
3.5 KiB
Python
96 lines
3.5 KiB
Python
import torch
|
|
from torch.autograd import Function
|
|
from torch.autograd.function import once_differentiable
|
|
from torch.distributions import constraints
|
|
from torch.distributions.exp_family import ExponentialFamily
|
|
|
|
|
|
# This helper is exposed for testing.
|
|
def _Dirichlet_backward(x, concentration, grad_output):
|
|
total = concentration.sum(-1, True).expand_as(concentration)
|
|
grad = torch._dirichlet_grad(x, concentration, total)
|
|
return grad * (grad_output - (x * grad_output).sum(-1, True))
|
|
|
|
|
|
class _Dirichlet(Function):
|
|
@staticmethod
|
|
def forward(ctx, concentration):
|
|
x = torch._sample_dirichlet(concentration)
|
|
ctx.save_for_backward(x, concentration)
|
|
return x
|
|
|
|
@staticmethod
|
|
@once_differentiable
|
|
def backward(ctx, grad_output):
|
|
x, concentration = ctx.saved_tensors
|
|
return _Dirichlet_backward(x, concentration, grad_output)
|
|
|
|
|
|
class Dirichlet(ExponentialFamily):
|
|
r"""
|
|
Creates a Dirichlet distribution parameterized by concentration :attr:`concentration`.
|
|
|
|
Example::
|
|
|
|
>>> m = Dirichlet(torch.tensor([0.5, 0.5]))
|
|
>>> m.sample() # Dirichlet distributed with concentrarion concentration
|
|
tensor([ 0.1046, 0.8954])
|
|
|
|
Args:
|
|
concentration (Tensor): concentration parameter of the distribution
|
|
(often referred to as alpha)
|
|
"""
|
|
arg_constraints = {'concentration': constraints.positive}
|
|
support = constraints.simplex
|
|
has_rsample = True
|
|
|
|
def __init__(self, concentration, validate_args=None):
|
|
if concentration.dim() < 1:
|
|
raise ValueError("`concentration` parameter must be at least one-dimensional.")
|
|
self.concentration = concentration
|
|
batch_shape, event_shape = concentration.shape[:-1], concentration.shape[-1:]
|
|
super(Dirichlet, self).__init__(batch_shape, event_shape, validate_args=validate_args)
|
|
|
|
def expand(self, batch_shape, _instance=None):
|
|
new = self._get_checked_instance(Dirichlet, _instance)
|
|
batch_shape = torch.Size(batch_shape)
|
|
new.concentration = self.concentration.expand(batch_shape + self.event_shape)
|
|
super(Dirichlet, new).__init__(batch_shape, self.event_shape, validate_args=False)
|
|
new._validate_args = self._validate_args
|
|
return new
|
|
|
|
def rsample(self, sample_shape=()):
|
|
shape = self._extended_shape(sample_shape)
|
|
concentration = self.concentration.expand(shape)
|
|
return _Dirichlet.apply(concentration)
|
|
|
|
def log_prob(self, value):
|
|
if self._validate_args:
|
|
self._validate_sample(value)
|
|
return ((torch.log(value) * (self.concentration - 1.0)).sum(-1) +
|
|
torch.lgamma(self.concentration.sum(-1)) -
|
|
torch.lgamma(self.concentration).sum(-1))
|
|
|
|
@property
|
|
def mean(self):
|
|
return self.concentration / self.concentration.sum(-1, True)
|
|
|
|
@property
|
|
def variance(self):
|
|
con0 = self.concentration.sum(-1, True)
|
|
return self.concentration * (con0 - self.concentration) / (con0.pow(2) * (con0 + 1))
|
|
|
|
def entropy(self):
|
|
k = self.concentration.size(-1)
|
|
a0 = self.concentration.sum(-1)
|
|
return (torch.lgamma(self.concentration).sum(-1) - torch.lgamma(a0) -
|
|
(k - a0) * torch.digamma(a0) -
|
|
((self.concentration - 1.0) * torch.digamma(self.concentration)).sum(-1))
|
|
|
|
@property
|
|
def _natural_params(self):
|
|
return (self.concentration, )
|
|
|
|
def _log_normalizer(self, x):
|
|
return x.lgamma().sum(-1) - torch.lgamma(x.sum(-1))
|