mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18598 ghimport-source-id: c74597e5e7437e94a43c163cee0639b20d0d0c6a Stack from [ghstack](https://github.com/ezyang/ghstack): * **#18598 Turn on F401: Unused import warning.** This was requested by someone at Facebook; this lint is turned on for Facebook by default. "Sure, why not." I had to noqa a number of imports in __init__. Hypothetically we're supposed to use __all__ in this case, but I was too lazy to fix it. Left for future work. Be careful! flake8-2 and flake8-3 behave differently with respect to import resolution for # type: comments. flake8-3 will report an import unused; flake8-2 will not. For now, I just noqa'd all these sites. All the changes were done by hand. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Differential Revision: D14687478 fbshipit-source-id: 30d532381e914091aadfa0d2a5a89404819663e3
3359 lines
111 KiB
Python
3359 lines
111 KiB
Python
import sys
|
|
import tempfile
|
|
import unittest
|
|
from copy import deepcopy
|
|
from itertools import product
|
|
from functools import reduce
|
|
from operator import mul
|
|
|
|
|
|
import torch
|
|
import torch.cuda
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from torch.nn.functional import _Reduction
|
|
from common_utils import TestCase, to_gpu, freeze_rng_state, is_iterable, \
|
|
TEST_WITH_ROCM
|
|
from common_cuda import TEST_CUDA
|
|
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors
|
|
from torch.autograd import Variable
|
|
import torch.backends.cudnn
|
|
|
|
|
|
# tarfile module tries to obtain a file object name in python 3.3
|
|
if sys.version_info[:2] == (3, 3):
|
|
TemporaryFile = tempfile.NamedTemporaryFile
|
|
else:
|
|
TemporaryFile = tempfile.TemporaryFile
|
|
PRECISION = 1e-5
|
|
|
|
|
|
def get_reduction(m):
|
|
result = getattr(m, 'reduction', None)
|
|
if result is None:
|
|
result = _Reduction.legacy_get_string(getattr(m, 'sizeAverage', None), True, emit_warning=False)
|
|
assert result is not None
|
|
return result
|
|
|
|
|
|
def get_weight(m):
|
|
result = getattr(m, 'weight', None)
|
|
if result is not None:
|
|
return result
|
|
return getattr(m, 'weights', None)
|
|
|
|
module_tests = [
|
|
dict(
|
|
module_name='Linear',
|
|
constructor_args=(10, 8),
|
|
input_size=(4, 10),
|
|
reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8),
|
|
),
|
|
dict(
|
|
module_name='Linear',
|
|
constructor_args=(10, 8, False),
|
|
input_size=(4, 10),
|
|
desc='no_bias',
|
|
reference_fn=lambda i, p: torch.mm(i, p[0].t())
|
|
),
|
|
dict(
|
|
module_name='Threshold',
|
|
constructor_args=(2., 1.),
|
|
input_size=(2, 3, 4, 5),
|
|
check_inplace=True,
|
|
desc='threshold_value'
|
|
),
|
|
dict(
|
|
module_name='Threshold',
|
|
constructor_args=(2., 10.),
|
|
input_size=(2, 3, 4, 5),
|
|
desc='large_value'
|
|
),
|
|
dict(
|
|
module_name='ReLU',
|
|
input_size=(2, 3, 4, 5),
|
|
check_inplace=True,
|
|
),
|
|
dict(
|
|
module_name='ReLU6',
|
|
input_size=(2, 3, 4, 5),
|
|
check_inplace=True,
|
|
),
|
|
dict(
|
|
module_name='RReLU',
|
|
input_size=(1, 2, 2),
|
|
test_cuda=False,
|
|
),
|
|
dict(
|
|
module_name='RReLU',
|
|
constructor_args=(0.1, 0.9),
|
|
input_size=(4, 4, 5),
|
|
desc='with_up_down',
|
|
test_cuda=False,
|
|
),
|
|
dict(
|
|
module_name='Hardtanh',
|
|
input_size=(3, 2, 5),
|
|
reference_fn=lambda i, _: i.clamp(-1, 1),
|
|
),
|
|
dict(
|
|
module_name='Sigmoid',
|
|
input_size=(2, 3, 4, 5)
|
|
),
|
|
dict(
|
|
module_name='Tanh',
|
|
input_size=(2, 3, 4, 5)
|
|
),
|
|
dict(
|
|
module_name='Softmax',
|
|
constructor_args=(1,),
|
|
input_size=(10, 20),
|
|
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, True).expand(10, 20)),
|
|
),
|
|
dict(
|
|
module_name='Softmax2d',
|
|
input_size=(1, 3, 10, 20),
|
|
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, False)),
|
|
),
|
|
dict(
|
|
module_name='LogSoftmax',
|
|
constructor_args=(1,),
|
|
input_size=(10, 20),
|
|
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, True).expand(10, 20)).log_(),
|
|
),
|
|
dict(
|
|
module_name='LogSoftmax',
|
|
constructor_args=(1,),
|
|
input_size=(1, 3, 10, 20),
|
|
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, False)).log_(),
|
|
desc='multiparam',
|
|
),
|
|
dict(
|
|
module_name='ELU',
|
|
constructor_args=(2.,),
|
|
input_size=(3, 2, 5),
|
|
reference_fn=lambda x, _: torch.where(x >= 0, x, 2 * (x.exp() - 1)),
|
|
),
|
|
# TODO: reference function
|
|
dict(
|
|
module_name='Hardshrink',
|
|
constructor_args=(2.,),
|
|
input_size=(4, 3, 2, 4),
|
|
),
|
|
dict(
|
|
module_name='LeakyReLU',
|
|
input_size=(3, 2, 5),
|
|
check_inplace=True
|
|
),
|
|
dict(
|
|
module_name='LeakyReLU',
|
|
constructor_args=(0.5,),
|
|
input_size=(3, 2, 5),
|
|
check_inplace=True,
|
|
desc='with_negval'
|
|
),
|
|
dict(
|
|
module_name='LogSigmoid',
|
|
input_size=(2, 3, 4),
|
|
reference_fn=lambda i, _: i.sigmoid().log(),
|
|
),
|
|
dict(
|
|
module_name='Softplus',
|
|
input_size=(10, 20),
|
|
reference_fn=lambda i, _: torch.log(1 + torch.exp(i)),
|
|
),
|
|
dict(
|
|
module_name='Softplus',
|
|
constructor_args=(2,),
|
|
input_size=(10, 20),
|
|
reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
|
|
desc='beta',
|
|
),
|
|
dict(
|
|
module_name='Softplus',
|
|
constructor_args=(2, -100),
|
|
input_size=(10, 20),
|
|
reference_fn=(lambda i, _: ((i * 2) > -100).type_as(i) * i +
|
|
((i * 2) <= -100).type_as(i) * 1. / 2. * torch.log(1 + torch.exp(2 * i))),
|
|
desc='beta_threshold',
|
|
),
|
|
dict(
|
|
module_name='Softshrink',
|
|
input_size=(3, 2, 5),
|
|
),
|
|
dict(
|
|
module_name='Softshrink',
|
|
constructor_args=(1,),
|
|
input_size=(3, 2, 5),
|
|
desc='lambda',
|
|
),
|
|
dict(
|
|
module_name='CrossMapLRN2d',
|
|
constructor_args=(5, 5e-3, 1e-3, 2),
|
|
input_size=(2, 3, 6, 6),
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
input_size=(2, 3, 4),
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
desc='1d',
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
constructor_args=(3,),
|
|
input_size=(2, 3, 4),
|
|
desc='1d_multiparam',
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
input_size=(2, 3, 4, 5),
|
|
desc='2d',
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
constructor_args=(3,),
|
|
input_size=(2, 3, 4, 5),
|
|
desc='2d_multiparam',
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
input_size=(2, 3, 4, 5, 6),
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
desc='3d',
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
constructor_args=(3,),
|
|
input_size=(2, 3, 4, 5, 6),
|
|
desc='3d_multiparam',
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
),
|
|
dict(
|
|
module_name='Softsign',
|
|
input_size=(3, 2, 5),
|
|
reference_fn=lambda i, _: i.div(1 + torch.abs(i)),
|
|
),
|
|
dict(
|
|
module_name='Softmin',
|
|
constructor_args=(1,),
|
|
input_size=(10, 20),
|
|
),
|
|
dict(
|
|
module_name='Softmin',
|
|
constructor_args=(1,),
|
|
input_size=(2, 3, 5, 10),
|
|
desc='multidim',
|
|
),
|
|
dict(
|
|
module_name='Tanhshrink',
|
|
input_size=(2, 3, 4, 5),
|
|
),
|
|
]
|
|
|
|
|
|
# Generates rand tensor with non-equal values. This ensures that duplicate
|
|
# values won't be causing test failure for modules like MaxPooling.
|
|
# size should be small, otherwise randperm fails / long overflows.
|
|
def _rand_tensor_non_equal(*size):
|
|
total = reduce(mul, size, 1)
|
|
return torch.randperm(total).view(*size).double()
|
|
|
|
|
|
def wrap_functional(fn, **kwargs):
|
|
class FunctionalModule(nn.Module):
|
|
def forward(self, *args):
|
|
return fn(*args, **kwargs)
|
|
return FunctionalModule
|
|
|
|
|
|
def poissonnllloss_no_reduce_test():
|
|
t = torch.randn(10, 10)
|
|
return dict(
|
|
fullname='PoissonNLLLLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.poisson_nll_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(10, 10),
|
|
pickle=False)
|
|
|
|
|
|
def bceloss_no_reduce_test():
|
|
t = Variable(torch.randn(15, 10).gt(0).double())
|
|
return dict(
|
|
fullname='BCELoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * i.log() + (1 - t) * (1 - i).log()),
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def bceloss_no_reduce_scalar_test():
|
|
t = torch.randn(()).gt(0).double()
|
|
return dict(
|
|
fullname='BCELoss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * i.log() + (1 - t) * (1 - i).log()),
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def bceloss_weights_no_reduce_test():
|
|
t = Variable(torch.randn(15, 10).gt(0).double())
|
|
weights = torch.rand(10)
|
|
return dict(
|
|
fullname='BCELoss_weights_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy(i, t.type_as(i),
|
|
weight=weights.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * i.log() + (1 - t) * (1 - i).log()) * weights,
|
|
check_gradgrad=False,
|
|
pickle=False
|
|
)
|
|
|
|
|
|
def bceloss_weights_no_reduce_scalar_test():
|
|
t = torch.randn(()).double()
|
|
weights = torch.rand(())
|
|
return dict(
|
|
fullname='BCELoss_weights_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy(i, t.type_as(i),
|
|
weight=weights.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * i.log() + (1 - t) * (1 - i).log()) * weights,
|
|
check_gradgrad=False,
|
|
pickle=False
|
|
)
|
|
|
|
|
|
def bce_with_logistic_legacy_enum_test():
|
|
t = Variable(torch.randn(15, 10).gt(0).double())
|
|
sigmoid = nn.Sigmoid()
|
|
return dict(
|
|
fullname='BCEWithLogitsLoss_legacy_enum',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduce=False)),
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()),
|
|
check_gradgrad=False,
|
|
pickle=False,
|
|
)
|
|
|
|
|
|
def bce_with_logistic_no_reduce_test():
|
|
t = Variable(torch.randn(15, 10).gt(0).double())
|
|
sigmoid = nn.Sigmoid()
|
|
return dict(
|
|
fullname='BCEWithLogitsLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()),
|
|
check_gradgrad=False,
|
|
pickle=False,
|
|
)
|
|
|
|
|
|
def bce_with_logistic_no_reduce_scalar_test():
|
|
t = torch.randn(()).gt(0).double()
|
|
sigmoid = nn.Sigmoid()
|
|
return dict(
|
|
fullname='BCEWithLogitsLoss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.binary_cross_entropy_with_logits(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(()).clamp_(2.8e-2, 1 - 2.8e-2),
|
|
reference_fn=lambda i, m: -(t * sigmoid(i).log() + (1 - t) * (1 - sigmoid(i)).log()),
|
|
check_gradgrad=False,
|
|
pickle=False
|
|
)
|
|
|
|
|
|
def kldivloss_with_target_no_reduce_test():
|
|
i = torch.rand(10, 10).log()
|
|
return dict(
|
|
fullname='KLDivLoss_with_target_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda t: F.kl_div(i.type_as(t), t, reduction='none')),
|
|
input_fn=lambda: torch.rand(10, 10),
|
|
reference_fn=lambda t, _:
|
|
loss_reference_fns['KLDivLoss'](i.type_as(t), t, reduction='none'),
|
|
pickle=False)
|
|
|
|
|
|
def kldivloss_no_reduce_test():
|
|
t = torch.randn(10, 10)
|
|
return dict(
|
|
fullname='KLDivLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.kl_div(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(10, 10).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['KLDivLoss'](i, t.type_as(i), reduction='none'),
|
|
pickle=False,
|
|
)
|
|
|
|
|
|
def kldivloss_no_reduce_scalar_test():
|
|
t = torch.randn(())
|
|
return dict(
|
|
fullname='KLDivLoss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.kl_div(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.rand(()).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['KLDivLoss'](i, t.type_as(i), reduction='none'),
|
|
pickle=False)
|
|
|
|
|
|
def l1loss_no_reduce_test():
|
|
t = torch.randn(2, 3, 4)
|
|
return dict(
|
|
fullname='L1Loss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.l1_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(2, 3, 4),
|
|
reference_fn=lambda i, m: (i - t.type_as(i)).abs(),
|
|
pickle=False)
|
|
|
|
|
|
def l1loss_no_reduce_scalar_test():
|
|
t = torch.randn(())
|
|
return dict(
|
|
fullname='L1Loss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.l1_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(()),
|
|
reference_fn=lambda i, m: (i - t.type_as(i)).abs(),
|
|
pickle=False)
|
|
|
|
|
|
def mseloss_no_reduce_test():
|
|
input_size = (2, 3, 4, 5)
|
|
target = torch.randn(*input_size)
|
|
return dict(
|
|
fullname='MSELoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.mse_loss(i, target.type_as(i), reduction='none')),
|
|
input_size=input_size,
|
|
reference_fn=lambda i, m: (i - target).pow(2),
|
|
pickle=False)
|
|
|
|
|
|
def mseloss_no_reduce_scalar_test():
|
|
input_size = ()
|
|
target = torch.randn(input_size)
|
|
return dict(
|
|
fullname='MSELoss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.mse_loss(i, target.type_as(i), reduction='none')),
|
|
input_size=input_size,
|
|
reference_fn=lambda i, m: (i - target).pow(2),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss_no_reduce_test():
|
|
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
|
|
kwargs = {'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(15, 10).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss_no_reduce_ignore_index_test():
|
|
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
|
|
kwargs = {'ignore_index': 2, 'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLoss_no_reduce_ignore_index',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(15, 10).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss_no_reduce_weights_test():
|
|
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
|
|
weight = torch.rand(10)
|
|
|
|
def kwargs(i):
|
|
return {'weight': weight.type_as(i), 'reduction': 'none'}
|
|
|
|
return dict(
|
|
fullname='NLLLoss_no_reduce_weights',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))),
|
|
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss_no_reduce_weights_ignore_index_test():
|
|
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
|
|
weight = torch.rand(10)
|
|
|
|
def kwargs(i):
|
|
return {'weight': weight.type_as(i), 'reduction': 'none',
|
|
'ignore_index': 2}
|
|
|
|
return dict(
|
|
fullname='NLLLoss_no_reduce_weights_ignore_index',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))),
|
|
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss_no_reduce_weights_ignore_index_neg_test():
|
|
t = Variable(torch.Tensor(15).uniform_().mul(10).floor().long())
|
|
weight = torch.rand(10)
|
|
|
|
def kwargs(i):
|
|
return {'weight': weight.type_as(i), 'reduction': 'none',
|
|
'ignore_index': -1}
|
|
|
|
return dict(
|
|
fullname='NLLLoss_no_reduce_weights_ignore_index_neg',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))),
|
|
input=torch.rand(15, 10).add(1e-2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLoss'](i, t.type_as(i).long(), **kwargs(i)),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss2d_no_reduce_test():
|
|
t = Variable(torch.rand(2, 5, 5).mul(3).floor().long())
|
|
kwargs = {'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLoss2d_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss2d_no_reduce_ignore_index_test():
|
|
t = Variable(torch.rand(2, 5, 5).mul(3).floor().long())
|
|
kwargs = {'ignore_index': 1, 'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLoss2d_no_reduce_ignore_index',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nllloss2d_no_reduce_weights_test():
|
|
t = Variable(torch.rand(2, 5, 5).mul(3).floor().long())
|
|
weight = torch.rand(3)
|
|
|
|
def kwargs(i):
|
|
return {'weight': weight.type_as(i), 'reduction': 'none'}
|
|
|
|
return dict(
|
|
fullname='NLLLoss2d_no_reduce_weights',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)),
|
|
pickle=False)
|
|
|
|
|
|
def nlllossNd_no_reduce_test():
|
|
t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
|
|
kwargs = {'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLossNd_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nlllossNd_no_reduce_ignore_index_test():
|
|
t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
|
|
kwargs = {'ignore_index': 1, 'reduction': 'none'}
|
|
return dict(
|
|
fullname='NLLLossNd_no_reduce_ignore_index',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
|
|
pickle=False)
|
|
|
|
|
|
def nlllossNd_no_reduce_weights_test():
|
|
t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
|
|
weight = torch.rand(3)
|
|
|
|
def kwargs(i):
|
|
return {'weight': weight.type_as(i), 'reduction': 'none'}
|
|
|
|
return dict(
|
|
fullname='NLLLossNd_no_reduce_weights',
|
|
constructor=wrap_functional(
|
|
lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i))),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)),
|
|
pickle=False)
|
|
|
|
|
|
def smoothl1loss_no_reduce_test():
|
|
t = torch.randn(2, 3, 4)
|
|
return dict(
|
|
fullname='SmoothL1Loss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.smooth_l1_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(2, 3, 4),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['SmoothL1Loss'](i, t.type_as(i), reduction='none'),
|
|
pickle=False)
|
|
|
|
|
|
def smoothl1loss_no_reduce_scalar_test():
|
|
t = torch.randn(())
|
|
return dict(
|
|
fullname='SmoothL1Loss_no_reduce_scalar',
|
|
constructor=wrap_functional(
|
|
lambda i: F.smooth_l1_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(()),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['SmoothL1Loss'](i, t.type_as(i), reduction='none'),
|
|
pickle=False)
|
|
|
|
|
|
def multilabelmarginloss_1d_no_reduce_test():
|
|
t = Variable(torch.rand(10).mul(10).floor().long())
|
|
return dict(
|
|
fullname='MultiLabelMarginLoss_1d_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')),
|
|
input_fn=lambda: torch.randn(10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multilabelmarginloss_index_neg_test():
|
|
t = Variable(torch.clamp(torch.rand(5, 10).add(-.5).mul(20).floor().long(), min=-1))
|
|
return dict(
|
|
fullname='MultiLabelMarginLoss_index_neg',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multilabelmarginloss_no_reduce_test():
|
|
t = Variable(torch.rand(5, 10).mul(10).floor().long())
|
|
return dict(
|
|
fullname='MultiLabelMarginLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multilabel_margin_loss(i, t.type_as(i).long(), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiLabelMarginLoss'](i, t.data.type_as(i).long(), reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def hingeembeddingloss_no_reduce_test():
|
|
t = Variable(torch.randn(10).gt(0).double().mul_(2).sub(1))
|
|
return dict(
|
|
fullname='HingeEmbeddingLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.hinge_embedding_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['HingeEmbeddingLoss'](i, t.type_as(i), reduction='none'),
|
|
check_sum_reduction=True,
|
|
pickle=False)
|
|
|
|
|
|
def hingeembeddingloss_margin_no_reduce_test():
|
|
t = Variable(torch.randn(10).gt(0).double().mul_(2).sub(1))
|
|
return dict(
|
|
fullname='HingeEmbeddingLoss_margin_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.hinge_embedding_loss(i, t.type_as(i), margin=0.5, reduction='none')),
|
|
input_fn=lambda: torch.randn(10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['HingeEmbeddingLoss'](i, t.type_as(i), margin=0.5, reduction='none'),
|
|
check_sum_reduction=True,
|
|
pickle=False)
|
|
|
|
|
|
def softmarginloss_no_reduce_test():
|
|
t = torch.randn(5, 5)
|
|
return dict(
|
|
fullname='SoftMarginLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.soft_margin_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 5),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['SoftMarginLoss'](i, t.type_as(i), reduction='none'),
|
|
pickle=False)
|
|
|
|
|
|
def multilabelsoftmarginloss_no_reduce_test():
|
|
t = torch.rand(5, 10).mul(2).floor()
|
|
return dict(
|
|
fullname='MultiLabelSoftMarginLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multilabel_soft_margin_loss(i, t.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, m:
|
|
(-(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log())).sum(dim=1) / i.size(1),
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multilabelsoftmarginloss_weights_no_reduce_test():
|
|
t = torch.rand(5, 10).mul(2).floor()
|
|
weights = torch.rand(10)
|
|
return dict(
|
|
fullname='MultiLabelSoftMarginLoss_weights_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multilabel_soft_margin_loss(i, t.type_as(i),
|
|
weight=weights.type_as(i), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, m:
|
|
(-(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()) * weights).sum(dim=1) / i.size(1),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multimarginloss_no_reduce_test():
|
|
t = torch.rand(5).mul(8).floor().long()
|
|
return dict(
|
|
fullname='MultiMarginLoss_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multi_margin_loss(i, t.type_as(i).long(), reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multimarginloss_1d_no_reduce_test():
|
|
t = torch.rand(1).mul(8).floor().long()
|
|
return dict(
|
|
fullname='MultiMarginLoss_1d_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multi_margin_loss(i, t.type_as(i).long(), reduction='none')),
|
|
input_fn=lambda: torch.randn(10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multimarginloss_p_no_reduce_test():
|
|
t = torch.rand(5).mul(8).floor().long()
|
|
return dict(
|
|
fullname='MultiMarginLoss_p_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multi_margin_loss(i, t.type_as(i).long(), p=2, reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10).clamp_(1e-2, 1 - 1e-2),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(), p=2, reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multimarginloss_margin_no_reduce_test():
|
|
t = torch.rand(5).mul(8).floor().long()
|
|
return dict(
|
|
fullname='MultiMarginLoss_margin_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multi_margin_loss(i, t.type_as(i).long(), margin=0.5, reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(),
|
|
margin=0.5, reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def multimarginloss_weights_no_reduce_test():
|
|
t = torch.rand(5).mul(8).floor().long()
|
|
weights = torch.rand(10)
|
|
return dict(
|
|
fullname='MultiMarginLoss_weights_no_reduce',
|
|
constructor=wrap_functional(
|
|
lambda i: F.multi_margin_loss(i, t.type_as(i).long(), weight=weights.type_as(i),
|
|
reduction='none')),
|
|
input_fn=lambda: torch.randn(5, 10),
|
|
reference_fn=lambda i, _:
|
|
loss_reference_fns['MultiMarginLoss'](i, t.data.type_as(i).long(),
|
|
weight=weights, reduction='none'),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
pickle=False)
|
|
|
|
|
|
def fractional_max_pool2d_test(test_case):
|
|
random_samples = torch.DoubleTensor(1, 3, 2).uniform_()
|
|
if test_case == 'ratio':
|
|
return dict(
|
|
constructor=lambda: nn.FractionalMaxPool2d(
|
|
2, output_ratio=0.5, _random_samples=random_samples),
|
|
input_size=(1, 3, 5, 7),
|
|
fullname='FractionalMaxPool2d_ratio')
|
|
elif test_case == 'size':
|
|
return dict(
|
|
constructor=lambda: nn.FractionalMaxPool2d((2, 3), output_size=(
|
|
4, 3), _random_samples=random_samples),
|
|
input_size=(1, 3, 7, 6),
|
|
fullname='FractionalMaxPool2d_size')
|
|
|
|
|
|
def fractional_max_pool3d_test(test_case):
|
|
random_samples = torch.DoubleTensor(2, 4, 3).uniform_()
|
|
if test_case == 'ratio':
|
|
return dict(
|
|
constructor=lambda: nn.FractionalMaxPool3d(
|
|
2, output_ratio=0.5, _random_samples=random_samples),
|
|
input_size=(2, 4, 5, 5, 5),
|
|
fullname='FractionalMaxPool3d_ratio')
|
|
elif test_case == 'size':
|
|
return dict(
|
|
constructor=lambda: nn.FractionalMaxPool3d((2, 2, 2), output_size=(
|
|
4, 4, 4), _random_samples=random_samples),
|
|
input_size=(2, 4, 7, 7, 7),
|
|
fullname='FractionalMaxPool3d_size')
|
|
elif test_case == 'asymsize':
|
|
return dict(
|
|
constructor=lambda: nn.FractionalMaxPool3d((4, 2, 3), output_size=(
|
|
10, 3, 2), _random_samples=random_samples),
|
|
input_size=(2, 4, 16, 7, 5),
|
|
fullname='FractionalMaxPool3d_asymsize')
|
|
|
|
|
|
new_module_tests = [
|
|
poissonnllloss_no_reduce_test(),
|
|
bceloss_no_reduce_test(),
|
|
bceloss_weights_no_reduce_test(),
|
|
bce_with_logistic_legacy_enum_test(),
|
|
bce_with_logistic_no_reduce_test(),
|
|
bceloss_no_reduce_scalar_test(),
|
|
bceloss_weights_no_reduce_scalar_test(),
|
|
bce_with_logistic_no_reduce_scalar_test(),
|
|
kldivloss_with_target_no_reduce_test(),
|
|
kldivloss_no_reduce_test(),
|
|
kldivloss_no_reduce_scalar_test(),
|
|
l1loss_no_reduce_test(),
|
|
l1loss_no_reduce_scalar_test(),
|
|
mseloss_no_reduce_test(),
|
|
mseloss_no_reduce_scalar_test(),
|
|
nllloss_no_reduce_test(),
|
|
nllloss_no_reduce_ignore_index_test(),
|
|
nllloss_no_reduce_weights_test(),
|
|
nllloss_no_reduce_weights_ignore_index_test(),
|
|
nllloss_no_reduce_weights_ignore_index_neg_test(),
|
|
nllloss2d_no_reduce_test(),
|
|
nllloss2d_no_reduce_weights_test(),
|
|
nllloss2d_no_reduce_ignore_index_test(),
|
|
nlllossNd_no_reduce_test(),
|
|
nlllossNd_no_reduce_weights_test(),
|
|
nlllossNd_no_reduce_ignore_index_test(),
|
|
smoothl1loss_no_reduce_test(),
|
|
smoothl1loss_no_reduce_scalar_test(),
|
|
multilabelmarginloss_1d_no_reduce_test(),
|
|
multilabelmarginloss_index_neg_test(),
|
|
multilabelmarginloss_no_reduce_test(),
|
|
hingeembeddingloss_no_reduce_test(),
|
|
hingeembeddingloss_margin_no_reduce_test(),
|
|
softmarginloss_no_reduce_test(),
|
|
multilabelsoftmarginloss_no_reduce_test(),
|
|
multilabelsoftmarginloss_weights_no_reduce_test(),
|
|
multimarginloss_no_reduce_test(),
|
|
multimarginloss_1d_no_reduce_test(),
|
|
multimarginloss_p_no_reduce_test(),
|
|
multimarginloss_margin_no_reduce_test(),
|
|
multimarginloss_weights_no_reduce_test(),
|
|
fractional_max_pool2d_test('ratio'),
|
|
fractional_max_pool2d_test('size'),
|
|
fractional_max_pool3d_test('ratio'),
|
|
fractional_max_pool3d_test('size'),
|
|
fractional_max_pool3d_test('asymsize'),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(10,),
|
|
input_size=(4, 10),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='affine',
|
|
test_cuda=(not TEST_WITH_ROCM),
|
|
),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(5,),
|
|
input_size=(4, 5, 3),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='3d_input',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(10, 1e-3, None),
|
|
input_size=(4, 10),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='affine_simple_average',
|
|
test_cuda=(not TEST_WITH_ROCM),
|
|
),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(10, 1e-3, 0.3, False),
|
|
input_size=(4, 10),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_affine',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(10, 1e-3, 0.3, True, False),
|
|
input_size=(4, 10),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_tracking_stats',
|
|
test_cuda=(not TEST_WITH_ROCM),
|
|
),
|
|
dict(
|
|
module_name='BatchNorm1d',
|
|
constructor_args=(5, 1e-3, 0.3, False),
|
|
input_size=(4, 5, 3),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='3d_input_not_affine',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm2d',
|
|
constructor_args=(3,),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
),
|
|
dict(
|
|
module_name='BatchNorm2d',
|
|
constructor_args=(3, 1e-3, None),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='2d_simple_average',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm2d',
|
|
constructor_args=(3, 1e-3, 0.8),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='momentum',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm2d',
|
|
constructor_args=(3, 1e-3, 0.8, False),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_affine',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm2d',
|
|
constructor_args=(3, 1e-3, 0.8, True, False),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_tracking_stats',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm3d',
|
|
constructor_args=(3,),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
),
|
|
dict(
|
|
module_name='BatchNorm3d',
|
|
constructor_args=(3, 1e-3, None),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='3d_simple_average',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm3d',
|
|
constructor_args=(3, 1e-3, 0.7),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='momentum',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm3d',
|
|
constructor_args=(3, 1e-3, 0.7, False),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_affine',
|
|
),
|
|
dict(
|
|
module_name='BatchNorm3d',
|
|
constructor_args=(3, 1e-3, 0.7, True, False),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='not_tracking_stats',
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm1d',
|
|
constructor_args=(3, 1e-3, 0.3),
|
|
input_size=(4, 3, 15),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm1d',
|
|
constructor_args=(3, 1e-3, 0.3, False, True),
|
|
input_size=(4, 3, 15),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='tracking_stats',
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm2d',
|
|
constructor_args=(3, 1e-3, 0.3),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm2d',
|
|
constructor_args=(3, 1e-3, 0.3, False, True),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='tracking_stats',
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm3d',
|
|
constructor_args=(3, 1e-3, 0.3),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
),
|
|
dict(
|
|
module_name='InstanceNorm3d',
|
|
constructor_args=(3, 1e-3, 0.3, False, True),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='tracking_stats',
|
|
),
|
|
dict(
|
|
module_name='LayerNorm',
|
|
constructor_args=([5], 1e-3),
|
|
input_size=(4, 5, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='1d_elementwise_affine',
|
|
),
|
|
dict(
|
|
module_name='LayerNorm',
|
|
constructor_args=([5], 1e-3, False),
|
|
input_size=(4, 5, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='1d_no_elementwise_affine',
|
|
),
|
|
dict(
|
|
module_name='LayerNorm',
|
|
constructor_args=([2, 2, 5], 1e-3),
|
|
input_size=(4, 2, 2, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='3d_elementwise_affine',
|
|
),
|
|
dict(
|
|
module_name='LayerNorm',
|
|
constructor_args=([2, 2, 5], 1e-3, False),
|
|
input_size=(4, 2, 2, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='3d_no_elementwise_affine',
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(3, 6, 1e-3),
|
|
input_size=(4, 6, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='1d_affine',
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(5, 5, 1e-3, False),
|
|
input_size=(4, 5, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='1d_no_affine_IN', # this setting is equivalent with InstanceNormi
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(1, 5, 1e-3, False),
|
|
input_size=(4, 5, 5),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='1d_no_affine_LN', # this setting is equivalent with LayerNorm
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(3, 6, 1e-3),
|
|
input_size=(4, 6, 2, 3),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='2d_affine',
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(3, 3, 1e-3, False),
|
|
input_size=(4, 3, 2, 3),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='2d_no_affine_IN', # this setting is equivalent with InstanceNorm
|
|
),
|
|
dict(
|
|
module_name='GroupNorm',
|
|
constructor_args=(1, 3, 1e-3, False),
|
|
input_size=(4, 3, 2, 3),
|
|
cudnn=True,
|
|
check_eval=True,
|
|
desc='2d_no_affine_LN', # this setting is equivalent with LayerNorm
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 5, 3),
|
|
input_size=(2, 4, 10),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 5, 3, 2),
|
|
input_size=(2, 4, 10),
|
|
cudnn=True,
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 5, 3, 1, 1),
|
|
input_size=(2, 4, 10),
|
|
cudnn=True,
|
|
desc='pad1',
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 5, 5, 1, 2),
|
|
input_size=(2, 4, 10),
|
|
cudnn=True,
|
|
desc='pad2',
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 4, 3, 1, 1),
|
|
input_size=(1, 4, 1),
|
|
cudnn=True,
|
|
desc='pad1size1',
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(4, 4, 5, 1, 2),
|
|
input_size=(1, 4, 1),
|
|
cudnn=True,
|
|
desc='pad2size1',
|
|
),
|
|
dict(
|
|
fullname='Conv1d_dilated',
|
|
constructor=lambda: nn.Conv1d(4, 5, kernel_size=3, dilation=2),
|
|
input_size=(2, 4, 10),
|
|
),
|
|
dict(
|
|
fullname='Conv1d_groups',
|
|
constructor=lambda: nn.Conv1d(4, 6, kernel_size=3, groups=2),
|
|
input_size=(2, 4, 6),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
fullname='ConvTranspose1d',
|
|
constructor=lambda: nn.ConvTranspose1d(3, 4, kernel_size=3, stride=(3,), padding=1, output_padding=(1,)),
|
|
cudnn=True,
|
|
input_size=(1, 3, 7),
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose1d',
|
|
constructor_args=(3, 4, 3, 2, 1, 1, 1, False),
|
|
input_size=(1, 3, 6),
|
|
cudnn=True,
|
|
desc='no_bias',
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose1d',
|
|
constructor_args=(3, 4, 3, 2, 1, 1, 1, True, 2),
|
|
input_size=(1, 3, 6),
|
|
cudnn=True,
|
|
desc='dilated',
|
|
),
|
|
dict(
|
|
fullname='ConvTranspose1d_groups',
|
|
constructor=lambda: nn.ConvTranspose1d(4, 6, 3, stride=(3,), padding=1, output_padding=(1,), groups=2),
|
|
cudnn=True,
|
|
input_size=(2, 4, 7),
|
|
),
|
|
dict(
|
|
module_name='MaxPool1d',
|
|
constructor_args=(4,),
|
|
input_size=(2, 10, 4),
|
|
),
|
|
dict(
|
|
module_name='MaxPool1d',
|
|
constructor_args=(4, 4),
|
|
input_size=(2, 10, 4),
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 4, (3, 2)),
|
|
input_size=(2, 3, 7, 5),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 4, (3, 3), (2, 2)),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
desc='strided',
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 4, (3, 3), (2, 2), (1, 1)),
|
|
input_size=(2, 3, 6, 6),
|
|
cudnn=True,
|
|
desc='padding',
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 2, (3, 3), (2, 2), (1, 1), (2, 2)),
|
|
input_size=(2, 3, 8, 8),
|
|
cudnn=True,
|
|
desc='dilated',
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 4, (3, 2), 1, 0, 1, 1, False),
|
|
input_size=(2, 3, 6, 5),
|
|
cudnn=True,
|
|
desc='no_bias',
|
|
),
|
|
dict(
|
|
fullname='Conv2d_groups',
|
|
constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
|
|
input_size=(2, 4, 6, 5),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
fullname='Conv2d_groups_thnn',
|
|
constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
|
|
input_size=(2, 4, 6, 5),
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose2d',
|
|
constructor_args=(3, 4, 3, (3, 2), 1, (1, 1)),
|
|
cudnn=True,
|
|
input_size=(1, 3, 7, 6),
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose2d',
|
|
constructor_args=(3, 4, 3, (2, 3), 1, (1, 1), 1, False, (2, 2)),
|
|
input_size=(1, 3, 6, 7),
|
|
cudnn=True,
|
|
desc='dilated',
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose2d',
|
|
constructor_args=(3, 4, 3, (2, 3), 1, (1, 1), 1, False),
|
|
input_size=(1, 3, 6, 7),
|
|
cudnn=True,
|
|
desc='no_bias',
|
|
),
|
|
dict(
|
|
fullname='ConvTranspose2d_groups',
|
|
constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2),
|
|
input_size=(1, 2, 4, 5),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
fullname='Conv2d_depthwise',
|
|
constructor=lambda: nn.Conv2d(4, 4, (3, 3), groups=4),
|
|
input_size=(2, 4, 6, 6),
|
|
),
|
|
dict(
|
|
fullname='Conv2d_depthwise_with_multiplier',
|
|
constructor=lambda: nn.Conv2d(4, 8, (3, 3), groups=4),
|
|
input_size=(2, 4, 6, 6),
|
|
),
|
|
dict(
|
|
fullname='Conv2d_depthwise_strided',
|
|
constructor=lambda: nn.Conv2d(4, 4, (3, 3), stride=(2, 2), groups=4),
|
|
input_size=(2, 4, 6, 6),
|
|
),
|
|
dict(
|
|
fullname='Conv2d_depthwise_padded',
|
|
constructor=lambda: nn.Conv2d(4, 4, (3, 3), padding=(1, 1), groups=4),
|
|
input_size=(2, 4, 6, 6),
|
|
),
|
|
dict(
|
|
fullname='Conv2d_depthwise_dilated',
|
|
constructor=lambda: nn.Conv2d(4, 4, (2, 2), dilation=(2, 2), groups=4),
|
|
input_size=(2, 4, 5, 5),
|
|
),
|
|
dict(
|
|
module_name='MaxPool2d',
|
|
constructor_args=((3, 3), (2, 2), (1, 1)),
|
|
input_size=(1, 3, 7, 7),
|
|
),
|
|
dict(
|
|
module_name='AvgPool1d',
|
|
constructor_args=(2,),
|
|
input_size=(2, 3, 6),
|
|
),
|
|
dict(
|
|
module_name='AvgPool1d',
|
|
constructor_args=((2,), (2,)),
|
|
input_size=(2, 3, 6),
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='AvgPool1d',
|
|
constructor_args=(2, 2, 1),
|
|
input_size=(2, 3, 6),
|
|
desc='stride_pad',
|
|
),
|
|
dict(
|
|
module_name='AvgPool2d',
|
|
constructor_args=((2, 2),),
|
|
input_size=(2, 3, 6, 6),
|
|
),
|
|
dict(
|
|
module_name='AvgPool2d',
|
|
constructor_args=((2, 2), (2, 2)),
|
|
input_size=(2, 3, 6, 6),
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='AvgPool2d',
|
|
constructor_args=((2, 2), (2, 2), (1, 1)),
|
|
input_size=(2, 3, 6, 6),
|
|
desc='stride_pad',
|
|
),
|
|
dict(
|
|
module_name='LPPool2d',
|
|
constructor_args=(2, 2, 2),
|
|
input_size=(1, 3, 7, 7),
|
|
),
|
|
dict(
|
|
module_name='LPPool2d',
|
|
constructor_args=(1.5, 2),
|
|
input_fn=lambda: torch.rand(1, 3, 7, 7),
|
|
desc='norm',
|
|
),
|
|
dict(
|
|
module_name='LPPool1d',
|
|
constructor_args=(1.5, 2),
|
|
input_fn=lambda: torch.rand(1, 3, 7),
|
|
desc='norm',
|
|
),
|
|
dict(
|
|
module_name='LPPool1d',
|
|
constructor_args=(2, 2, 3),
|
|
input_size=(1, 3, 7),
|
|
),
|
|
dict(
|
|
module_name='LocalResponseNorm',
|
|
constructor_args=(3, ),
|
|
input_size=(1, 5, 7),
|
|
desc='1d',
|
|
),
|
|
dict(
|
|
module_name='LocalResponseNorm',
|
|
constructor_args=(2, ),
|
|
input_size=(1, 5, 7, 7),
|
|
desc='2d_uneven_pad',
|
|
),
|
|
dict(
|
|
module_name='LocalResponseNorm',
|
|
constructor_args=(1, 1., 0.5, 2.),
|
|
input_size=(1, 5, 7, 7, 7),
|
|
desc='3d_custom_params',
|
|
),
|
|
dict(
|
|
module_name='ReflectionPad1d',
|
|
constructor_args=((1, 2),),
|
|
input_size=(2, 3, 8),
|
|
),
|
|
dict(
|
|
module_name='ReflectionPad2d',
|
|
constructor_args=((1, 2, 3, 4),),
|
|
input_size=(2, 3, 8, 8),
|
|
),
|
|
dict(
|
|
module_name='ReplicationPad1d',
|
|
constructor_args=((1, 2),),
|
|
input_size=(2, 3, 4),
|
|
),
|
|
dict(
|
|
module_name='ReplicationPad2d',
|
|
constructor_args=((1, 2, 3, 4),),
|
|
input_size=(2, 3, 4, 4),
|
|
),
|
|
dict(
|
|
module_name='ZeroPad2d',
|
|
constructor_args=((1, 2, 3, 4),),
|
|
input_size=(2, 3, 4, 4)
|
|
),
|
|
dict(
|
|
module_name='ZeroPad2d',
|
|
constructor_args=((-1, -1, -1, -2),),
|
|
input_size=(2, 3, 4, 4),
|
|
desc='negative_dims'
|
|
),
|
|
dict(
|
|
module_name='ConstantPad1d',
|
|
constructor_args=((1, 2), 2.),
|
|
input_size=(2, 3, 4)
|
|
),
|
|
dict(
|
|
module_name='ConstantPad2d',
|
|
constructor_args=((1, 2, 3, 4), 2.),
|
|
input_size=(2, 3, 4, 4)
|
|
),
|
|
dict(
|
|
module_name='ConstantPad3d',
|
|
constructor_args=((1, 2, 3, 4, 1, 0), 2.),
|
|
input_size=(2, 3, 4, 4, 5)
|
|
),
|
|
dict(
|
|
module_name='Conv3d',
|
|
constructor_args=(3, 4, (2, 3, 4)),
|
|
input_size=(2, 3, 3, 4, 5),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
module_name='Conv3d',
|
|
constructor_args=(3, 4, (2, 3, 4), 1, 0, 1, 1, False),
|
|
input_size=(2, 3, 3, 4, 5),
|
|
cudnn=True,
|
|
desc='no_bias',
|
|
),
|
|
dict(
|
|
module_name='Conv3d',
|
|
constructor_args=(3, 4, 2, 2),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
cudnn=True,
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='Conv3d',
|
|
constructor_args=(3, 4, 2, 2, 1),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
cudnn=True,
|
|
desc='stride_padding',
|
|
),
|
|
dict(
|
|
fullname='Conv3d_groups',
|
|
constructor=lambda: nn.Conv3d(4, 6, kernel_size=3, groups=2),
|
|
input_size=(2, 4, 4, 5, 4),
|
|
cudnn=True,
|
|
),
|
|
dict(
|
|
fullname='Conv3d_dilated',
|
|
constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
),
|
|
dict(
|
|
fullname='Conv3d_dilated_strided',
|
|
constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2, stride=2),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose3d',
|
|
constructor_args=(2, 3, (2, 3, 2)),
|
|
cudnn=True,
|
|
input_size=(1, 2, 4, 5, 4),
|
|
),
|
|
dict(
|
|
module_name='ConvTranspose3d',
|
|
constructor_args=(2, 3, (2, 3, 2), 1, 0, 0, 1, True, (2, 2, 2)),
|
|
cudnn=True,
|
|
input_size=(1, 2, 4, 5, 4),
|
|
desc='dilated',
|
|
),
|
|
dict(
|
|
module_name='MaxPool3d',
|
|
constructor_args=((2, 2, 2),),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
),
|
|
dict(
|
|
module_name='MaxPool3d',
|
|
constructor_args=(2, (2, 2, 2)),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='MaxPool3d',
|
|
constructor_args=(2, 2, (1, 1, 1)),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
desc='stride_padding',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=((2, 2, 2),),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=(2, (2, 2, 2)),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
desc='stride',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=(2, 2, (1, 1, 1)),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
desc='stride_pad',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=(4, 2, (1, 2, 1)),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
desc='stride_pad_gpu_fixedkw_output',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=((2, 4, 8), 1, (1, 1, 2)),
|
|
input_size=(2, 3, 2, 4, 8),
|
|
desc='stride_pad_gpu_general_output',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=(3, 1, 0),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
desc='stride1_pad0_gpu_input',
|
|
),
|
|
dict(
|
|
module_name='AvgPool3d',
|
|
constructor_args=(2, 2, (1, 1, 1)),
|
|
input_size=(2, 3, 4, 4, 4),
|
|
desc='stride_pad_gpu_input_nooverlap',
|
|
),
|
|
dict(
|
|
module_name='ReplicationPad3d',
|
|
constructor_args=((1, 2, 3, 4, 5, 6),),
|
|
input_size=(2, 3, 5, 5, 5),
|
|
),
|
|
dict(
|
|
module_name='Embedding',
|
|
constructor_args=(4, 3),
|
|
input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4),
|
|
jacobian_input=False,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='EmbeddingBag',
|
|
constructor_args=(4, 3),
|
|
input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4),
|
|
jacobian_input=False,
|
|
check_gradgrad=False,
|
|
desc='mean',
|
|
),
|
|
dict(
|
|
module_name='EmbeddingBag',
|
|
constructor_args=(4, 3, None, 2., False, 'sum'),
|
|
input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4),
|
|
jacobian_input=False,
|
|
check_gradgrad=False,
|
|
desc='sum',
|
|
),
|
|
dict(
|
|
module_name='EmbeddingBag',
|
|
constructor_args=(4, 3, None, 2., False, 'max'),
|
|
input_fn=lambda: torch.empty(2, 3, dtype=torch.long).random_(4),
|
|
jacobian_input=False,
|
|
check_gradgrad=False,
|
|
desc='max',
|
|
),
|
|
dict(
|
|
fullname='EmbeddingBag_sparse',
|
|
constructor=lambda: nn.EmbeddingBag(4, 3, sparse=True),
|
|
input_fn=lambda: torch.randperm(2).repeat(1, 2),
|
|
jacobian_input=False,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
constructor=lambda: nn.Embedding(4, 3, sparse=True),
|
|
input_fn=lambda: torch.randperm(2).repeat(1, 2),
|
|
jacobian_input=False,
|
|
fullname='Embedding_sparse',
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='PixelShuffle',
|
|
constructor_args=(3,),
|
|
input_size=(1, 9, 4, 4),
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_nearest_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(12, ), scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 3),
|
|
fullname='interpolate_nearest_tuple_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_nearest_scale_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='linear', align_corners=False),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_linear_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, ), scale_factor=None, mode='linear', align_corners=False),
|
|
input_size=(1, 2, 3),
|
|
fullname='interpolate_linear_tuple_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='linear', align_corners=False),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_linear_scale_1d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='linear', align_corners=True),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_linear_1d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='linear', align_corners=True),
|
|
input_size=(1, 2, 4),
|
|
fullname='interpolate_linear_scale_1d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_nearest_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(12, 16), scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 3, 4),
|
|
fullname='interpolate_nearest_tuple_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_nearest_scale_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='bilinear', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None,
|
|
mode='bilinear', align_corners=False),
|
|
input_size=(1, 2, 2, 3),
|
|
fullname='interpolate_bilinear_tuple_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4.,
|
|
mode='bilinear', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_scale_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 2.),
|
|
mode='bilinear', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_scale_tuple_shared_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.),
|
|
mode='bilinear', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_scale_tuple_skewed_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, mode='bilinear', align_corners=True),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_tuple_2d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.),
|
|
mode='bilinear', align_corners=True),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bilinear_scale_tuple_skewed_2d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='bicubic', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None,
|
|
mode='bicubic', align_corners=False),
|
|
input_size=(1, 2, 2, 3),
|
|
fullname='interpolate_bicubic_tuple_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='bicubic', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_scale_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 2.),
|
|
mode='bicubic', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_scale_tuple_shared_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.),
|
|
mode='bicubic', align_corners=False),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_scale_tuple_skewed_2d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6), scale_factor=None, mode='bicubic', align_corners=True),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_tuple_2d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=(2., 1.),
|
|
mode='bicubic', align_corners=True),
|
|
input_size=(1, 2, 4, 4),
|
|
fullname='interpolate_bicubic_scale_tuple_skewed_2d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 4, 4, 4),
|
|
fullname='interpolate_nearest_3d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(12, 16, 16), scale_factor=None, mode='nearest'),
|
|
input_size=(1, 2, 3, 4, 4),
|
|
fullname='interpolate_nearest_tuple_3d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=4., mode='nearest'),
|
|
input_size=(1, 2, 4, 4, 4),
|
|
fullname='interpolate_nearest_scale_3d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=12, scale_factor=None, mode='trilinear', align_corners=False),
|
|
input_size=(1, 2, 4, 4, 4),
|
|
fullname='interpolate_trilinear_3d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6, 6),
|
|
scale_factor=None, mode='trilinear', align_corners=False),
|
|
input_size=(1, 2, 2, 3, 3),
|
|
fullname='interpolate_trilinear_tuple_3d',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=3., mode='trilinear', align_corners=False),
|
|
input_size=(1, 2, 3, 4, 4),
|
|
fullname='interpolate_trilinear_scale_3d',
|
|
# See https://github.com/pytorch/pytorch/issues/5006
|
|
precision=3e-4,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=(4, 6, 6), scale_factor=None,
|
|
mode='trilinear', align_corners=True),
|
|
input_size=(1, 2, 2, 3, 3),
|
|
fullname='interpolate_trilinear_tuple_3d_align_corners',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.interpolate, size=None, scale_factor=3., mode='trilinear', align_corners=True),
|
|
input_size=(1, 2, 3, 4, 4),
|
|
fullname='interpolate_trilinear_scale_3d_align_corners',
|
|
# See https://github.com/pytorch/pytorch/issues/5006
|
|
precision=3e-4,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool1d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: _rand_tensor_non_equal(1, 3, 5),
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool2d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6),
|
|
desc='single',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool2d',
|
|
constructor_args=((3, 4),),
|
|
input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6),
|
|
desc='tuple',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool2d',
|
|
constructor_args=((3, None),),
|
|
input_fn=lambda: _rand_tensor_non_equal(1, 3, 5, 6),
|
|
desc='tuple_none',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool3d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7),
|
|
desc='single',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool3d',
|
|
constructor_args=((3, 4, 5),),
|
|
input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7),
|
|
desc='tuple',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool3d',
|
|
constructor_args=((3, None, 5),),
|
|
input_fn=lambda: _rand_tensor_non_equal(2, 3, 5, 6, 7),
|
|
desc='tuple_none',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool3d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: _rand_tensor_non_equal(2, 3, 12, 9, 3),
|
|
desc='single_nonatomic',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveMaxPool3d',
|
|
constructor_args=((3, 4, 5),),
|
|
input_fn=lambda: _rand_tensor_non_equal(2, 3, 6, 4, 10),
|
|
desc='tuple_nonatomic',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool1d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: torch.rand(1, 3, 5),
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool1d',
|
|
constructor_args=(1,),
|
|
input_fn=lambda: torch.rand(1, 3, 5),
|
|
desc='one_output',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool2d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: torch.rand(1, 3, 5, 6),
|
|
desc='single',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool2d',
|
|
constructor_args=(1,),
|
|
input_fn=lambda: torch.rand(1, 3, 5, 6),
|
|
desc='single_1x1output',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool2d',
|
|
constructor_args=((3, 4),),
|
|
input_fn=lambda: torch.rand(1, 3, 5, 6),
|
|
desc='tuple',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool2d',
|
|
constructor_args=((3, None),),
|
|
input_fn=lambda: torch.rand(1, 3, 5, 6),
|
|
desc='tuple_none',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool3d',
|
|
constructor_args=(3,),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 2, 7),
|
|
desc='single',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool3d',
|
|
constructor_args=((3, 4, 5),),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 3, 7),
|
|
desc='tuple',
|
|
),
|
|
dict(
|
|
module_name='AdaptiveAvgPool3d',
|
|
constructor_args=((None, 4, 5),),
|
|
input_fn=lambda: torch.rand(2, 3, 5, 3, 7),
|
|
desc='tuple_none',
|
|
),
|
|
dict(
|
|
module_name='SELU',
|
|
input_size=(3, 2, 5),
|
|
check_inplace=True
|
|
),
|
|
dict(
|
|
module_name='SELU',
|
|
input_size=(),
|
|
check_inplace=True,
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='CELU',
|
|
input_size=(3, 2, 5),
|
|
constructor_args=(2.,),
|
|
check_inplace=True,
|
|
reference_fn=lambda x, _: torch.where(x >= 0, x, 2. * ((.5 * x).exp() - 1)),
|
|
),
|
|
dict(
|
|
module_name='CELU',
|
|
input_size=(),
|
|
constructor_args=(2.,),
|
|
check_inplace=True,
|
|
reference_fn=lambda x, _: torch.where(x >= 0, x, 2. * ((.5 * x).exp() - 1)),
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='GLU',
|
|
input_size=(5, 6),
|
|
),
|
|
dict(
|
|
module_name='GLU',
|
|
constructor_args=(1,),
|
|
input_size=(5, 6, 7),
|
|
desc='dim',
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=-1),
|
|
input_size=(2, 128), # trigger the last-dim algo in CUDA
|
|
fullname='softmax_lastdim',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=1, dtype=torch.float64),
|
|
input_size=(2, 128),
|
|
fullname='softmax_lastdim_dtype',
|
|
pickle=False,
|
|
test_cuda=False
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=1),
|
|
input_size=(2, 128, 2, 2), # trigger special case of spatial CUDA algo
|
|
fullname='softmax_spatial_special',
|
|
pickle=False,
|
|
test_cuda=(not TEST_WITH_ROCM)
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=1),
|
|
input_size=(2, 2, 4, 4), # regular spatial algorithm
|
|
fullname='softmax_spatial',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=1, dtype=torch.float64),
|
|
input_size=(2, 2, 4, 4), # regular spatial algorithm
|
|
fullname='softmax_spatial_dtype',
|
|
pickle=False,
|
|
test_cuda=False
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=0),
|
|
input_size=(2, 3, 4, 5),
|
|
fullname='softmax_functional_dim0',
|
|
test_cuda=False,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=3),
|
|
input_size=(2, 3, 4, 5),
|
|
fullname='softmax_functional_dim3',
|
|
test_cuda=False,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.softmax, dim=-1),
|
|
input_size=(),
|
|
fullname='softmax_functional_scalar',
|
|
test_cuda=False,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=-1),
|
|
input_size=(2, 128), # trigger the last-dim algo in CUDA
|
|
fullname='log_softmax_lastdim',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=1),
|
|
input_size=(2, 128, 2, 2), # trigger special case of spatial CUDA algo
|
|
fullname='log_softmax_spatial_special',
|
|
pickle=False,
|
|
test_cuda=(not TEST_WITH_ROCM)
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=1),
|
|
input_size=(2, 2, 4, 4), # regular spatial algorithm
|
|
fullname='log_softmax_spatial',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=0),
|
|
input_size=(2, 3, 4, 5),
|
|
fullname='log_softmax_dim0',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=3),
|
|
input_size=(2, 3, 4, 5),
|
|
fullname='log_softmax_dim3',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
constructor=wrap_functional(F.log_softmax, dim=0),
|
|
input_size=(),
|
|
fullname='log_softmax_scalar',
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Unfold',
|
|
constructor=lambda: nn.Unfold((2, 2), (1, 1), (0, 0), (1, 1)),
|
|
input_size=(2, 4, 3, 3),
|
|
check_gradgrad=False,
|
|
test_cuda=True,
|
|
),
|
|
dict(
|
|
fullname='Fold',
|
|
constructor=lambda: nn.Fold((3, 3), (2, 2), (1, 1), (0, 0), (1, 1)),
|
|
input_size=(2, 16, 4),
|
|
check_gradgrad=False,
|
|
test_cuda=True,
|
|
),
|
|
dict(
|
|
fullname='Unfold_int_input',
|
|
constructor=lambda: nn.Unfold(2, 1, 0, 1),
|
|
input_size=(2, 4, 3, 3),
|
|
check_gradgrad=False,
|
|
test_cuda=True,
|
|
),
|
|
dict(
|
|
fullname='Fold_int_input',
|
|
constructor=lambda: nn.Fold(3, 2, 1, 0, 1),
|
|
input_size=(2, 16, 4),
|
|
check_gradgrad=False,
|
|
test_cuda=True,
|
|
),
|
|
dict(
|
|
module_name='Threshold',
|
|
constructor_args=(2., 1.),
|
|
input_size=(),
|
|
check_inplace=True,
|
|
desc='threshold_value_scalar'
|
|
),
|
|
|
|
dict(
|
|
module_name='ReLU',
|
|
input_size=(),
|
|
check_inplace=True,
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='ReLU6',
|
|
input_size=(),
|
|
check_inplace=True,
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='RReLU',
|
|
constructor_args=(0.1, 0.9),
|
|
input_size=(),
|
|
desc='with_up_down_scalar',
|
|
test_cuda=False,
|
|
),
|
|
dict(
|
|
module_name='Hardtanh',
|
|
input_size=(),
|
|
reference_fn=lambda i, _: i.clamp(-1, 1),
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='Sigmoid',
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Tanh',
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Softmax',
|
|
constructor_args=(0,),
|
|
input_size=(),
|
|
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(0, True)),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='LogSoftmax',
|
|
constructor_args=(0,),
|
|
input_size=(),
|
|
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(0, False)).log_(),
|
|
desc='multiparam_scalar',
|
|
),
|
|
dict(
|
|
module_name='ELU',
|
|
constructor_args=(2.,),
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Hardshrink',
|
|
constructor_args=(2.,),
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='LeakyReLU',
|
|
constructor_args=(0.5,),
|
|
input_size=(),
|
|
check_inplace=True,
|
|
desc='with_negval_scalar'
|
|
),
|
|
dict(
|
|
module_name='LogSigmoid',
|
|
input_size=(),
|
|
reference_fn=lambda i, _: i.sigmoid().log(),
|
|
desc='scalar'
|
|
),
|
|
dict(
|
|
module_name='Softplus',
|
|
constructor_args=(2, -100),
|
|
input_size=(),
|
|
reference_fn=(lambda i, _: ((i * 2) > -100).type_as(i) * i +
|
|
((i * 2) <= -100).type_as(i) * 1. / 2. * torch.log(1 + torch.exp(2 * i))),
|
|
desc='beta_threshold_scalar',
|
|
),
|
|
dict(
|
|
module_name='Softshrink',
|
|
constructor_args=(1,),
|
|
input_size=(),
|
|
desc='lambda_scalar',
|
|
),
|
|
dict(
|
|
module_name='PReLU',
|
|
input_size=(),
|
|
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Softsign',
|
|
input_size=(),
|
|
reference_fn=lambda i, _: i.div(1 + torch.abs(i)),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Softmin',
|
|
constructor_args=(0,),
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
module_name='Tanhshrink',
|
|
input_size=(),
|
|
desc='scalar',
|
|
),
|
|
dict(
|
|
fullname='Padding12_1dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(1, 2), mode='circular'),
|
|
input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]),
|
|
reference_fn=lambda i, _: padding1d_circular(i, (1, 2)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding31_1dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(3, 1), mode='circular'),
|
|
input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]),
|
|
reference_fn=lambda i, _: padding1d_circular(i, (3, 1)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding33_1dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(3, 3), mode='circular'),
|
|
input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 2, 3]),
|
|
reference_fn=lambda i, _: padding1d_circular(i, (3, 3)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding1221_2dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(1, 2, 2, 1), mode='circular'),
|
|
input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 1, 2, 3]),
|
|
reference_fn=lambda i, _: padding2d_circular(i, (1, 2, 2, 1)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding2322_2dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(2, 3, 2, 2), mode='circular'),
|
|
input_fn=lambda: torch.arange(6, out=torch.DoubleTensor()).reshape([1, 1, 2, 3]),
|
|
reference_fn=lambda i, _: padding2d_circular(i, (2, 3, 2, 2)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding3331_2dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(3, 3, 3, 1), mode='circular'),
|
|
input_fn=lambda: torch.arange(9, out=torch.DoubleTensor()).reshape([1, 1, 3, 3]),
|
|
reference_fn=lambda i, _: padding2d_circular(i, (3, 3, 3, 1)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding122112_3dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(1, 2, 2, 1, 1, 2), mode='circular'),
|
|
input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]),
|
|
reference_fn=lambda i, _: padding3d_circular(i, (1, 2, 2, 1, 1, 2)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding322112_3dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(3, 2, 2, 1, 1, 2), mode='circular'),
|
|
input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]),
|
|
reference_fn=lambda i, _: padding3d_circular(i, (3, 2, 2, 1, 1, 2)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
dict(
|
|
fullname='Padding332122_3dcircular',
|
|
constructor=wrap_functional(F.pad, pad=(3, 3, 2, 1, 2, 2), mode='circular'),
|
|
input_fn=lambda: torch.arange(12, out=torch.DoubleTensor()).reshape([1, 1, 2, 2, 3]),
|
|
reference_fn=lambda i, _: padding3d_circular(i, (3, 3, 2, 1, 2, 2)),
|
|
skip_double=TEST_WITH_ROCM,
|
|
pickle=False,
|
|
),
|
|
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(3, 4, 2, 2, (1,), 1, 1, True, 'circular'),
|
|
input_size=(2, 3, 5,),
|
|
cudnn=True,
|
|
desc='stride1_pad1circular',
|
|
),
|
|
dict(
|
|
module_name='Conv1d',
|
|
constructor_args=(3, 4, 2, 2, (2,), 1, 1, True, 'circular'),
|
|
input_size=(2, 3, 5,),
|
|
cudnn=True,
|
|
desc='stride1_pad2circular',
|
|
),
|
|
dict(
|
|
module_name='Conv2d',
|
|
constructor_args=(3, 4, (3, 3), (2, 2), (1, 2), 1, 1, True, 'circular'),
|
|
input_size=(2, 3, 3, 3),
|
|
cudnn=True,
|
|
desc='pad2circular'
|
|
),
|
|
dict(
|
|
module_name='Conv3d',
|
|
constructor_args=(3, 4, 2, 2, (1, 2, 3), 1, 1, True, 'circular'),
|
|
input_size=(2, 3, 3, 3, 3),
|
|
cudnn=True,
|
|
desc='stride_pad1circular',
|
|
),
|
|
]
|
|
|
|
|
|
def kldivloss_reference(input, target, reduction='mean'):
|
|
safe_target = target * (target > 0).type_as(target)
|
|
safe_target_log = (safe_target + (target <= 0).type_as(target)).log()
|
|
result = safe_target * (safe_target_log - input)
|
|
if reduction == 'mean':
|
|
return result.mean()
|
|
elif reduction == 'sum':
|
|
return result.sum()
|
|
elif reduction == 'batchmean' and results.dim() != 0:
|
|
return result.sum() / result.size(0)
|
|
return result
|
|
|
|
|
|
def nlllossNd_reference(input, target, weight=None, ignore_index=-100,
|
|
reduction='mean'):
|
|
assert input.dim() >= 3
|
|
N = input.size(0)
|
|
C = input.size(1)
|
|
out_size = (N,) + input.size()[2:]
|
|
output = torch.zeros(out_size).type_as(input)
|
|
|
|
if weight is None:
|
|
weight = torch.ones(C).type_as(input)
|
|
total_weight = 0
|
|
for tup in product(*[range(size) for size in out_size]):
|
|
t_nx = target[tup]
|
|
norm = 0. if ignore_index == t_nx else weight[t_nx].item()
|
|
input_index = list(tup)
|
|
input_index.insert(1, t_nx)
|
|
output[tup] = -input[tuple(input_index)] * norm
|
|
total_weight += norm
|
|
|
|
if reduction == 'mean':
|
|
return output.sum() / total_weight
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def nllloss_reference(input, target, weight=None, ignore_index=-100,
|
|
reduction='mean'):
|
|
|
|
def nll_loss_helper(input, target, weight, ignore_index):
|
|
if target == ignore_index:
|
|
return (0, 0)
|
|
norm = 1 if weight is None else weight[target]
|
|
result = -input[target] * norm
|
|
return (result, norm)
|
|
|
|
losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index)
|
|
for i, t in zip(input, target)]
|
|
losses, weights = zip(*losses_and_weights)
|
|
losses_tensor = input.new_tensor(losses)
|
|
if reduction == 'mean':
|
|
return sum(losses_tensor) / sum(weights)
|
|
elif reduction == 'sum':
|
|
return sum(losses_tensor)
|
|
else:
|
|
return losses_tensor
|
|
|
|
|
|
def smoothl1loss_reference(input, target, reduction='mean'):
|
|
abs_diff = (input - target).abs()
|
|
ge_one_mask = (abs_diff >= 1).type_as(abs_diff)
|
|
lt_one_mask = (abs_diff < 1).type_as(abs_diff)
|
|
output = ge_one_mask * (abs_diff - 0.5) + lt_one_mask * 0.5 * (abs_diff ** 2)
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def _multilabelmarginloss_reference(input, target):
|
|
targets = []
|
|
for target_index in target:
|
|
if target_index < 0:
|
|
break
|
|
targets.append(target_index)
|
|
|
|
sum = 0
|
|
for target_index in targets:
|
|
for i in range(0, len(input)):
|
|
if i not in targets:
|
|
sum += max(0, 1 - input[target_index] + input[i])
|
|
|
|
return sum
|
|
|
|
|
|
def multilabelmarginloss_reference(input, target, reduction='mean'):
|
|
if input.dim() == 1:
|
|
n = 1
|
|
dim = input.size(0)
|
|
output = input.new(n).zero_()
|
|
output[0] = _multilabelmarginloss_reference(input, target)
|
|
else:
|
|
n = input.size(0)
|
|
dim = input.size(1)
|
|
output = input.new(n).zero_()
|
|
for i in range(0, n):
|
|
output[i] = _multilabelmarginloss_reference(input[i], target[i])
|
|
|
|
if reduction == 'mean':
|
|
return output.mean() / dim
|
|
elif reduction == 'sum':
|
|
return output.sum() / dim
|
|
return output / dim
|
|
|
|
|
|
def hingeembeddingloss_reference(input, target, margin=1.0, reduction='mean'):
|
|
margin_clamp = (margin - input).clamp(min=0).type_as(input)
|
|
output = torch.where(target == 1, input, margin_clamp)
|
|
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def softmarginloss_reference(input, target, reduction='mean'):
|
|
output = (1 + (-input * target).exp()).log()
|
|
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def _multimarginloss_reference(input, target_idx, p, margin, weight):
|
|
if weight is None:
|
|
weight = input.new(len(input)).fill_(1)
|
|
|
|
output = 0
|
|
for i in range(0, len(input)):
|
|
if i != target_idx:
|
|
output += max(0, weight[target_idx] * (margin - input[target_idx] + input[i]) ** p)
|
|
return output
|
|
|
|
|
|
def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reduction='mean'):
|
|
if input.dim() == 1:
|
|
n = 1
|
|
dim = input.size(0)
|
|
return _multimarginloss_reference(input, target[0], p, margin, weight) / dim
|
|
else:
|
|
n = input.size(0)
|
|
dim = input.size(1)
|
|
output = input.new(n)
|
|
for x in range(0, n):
|
|
output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight)
|
|
|
|
if reduction == 'mean':
|
|
return output.mean() / dim
|
|
elif reduction == 'sum':
|
|
return output.sum() / dim
|
|
return output / dim
|
|
|
|
|
|
def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'):
|
|
def _cos(a, b):
|
|
cos = a.new(a.size(0))
|
|
for i in range(0, a.size(0)):
|
|
cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
|
|
return cos
|
|
|
|
output = torch.where(target == 1, 1 - _cos(input1, input2), (_cos(input1, input2) - margin).clamp(min=0))
|
|
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def tripletmarginloss_reference(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False,
|
|
reduction='mean'):
|
|
d_p = torch.pairwise_distance(anchor, positive, p, eps)
|
|
d_n = torch.pairwise_distance(anchor, negative, p, eps)
|
|
if swap:
|
|
d_s = torch.pairwise_distance(positive, negative, p, eps)
|
|
d_n = torch.min(d_n, d_s)
|
|
|
|
output = torch.clamp(margin + d_p - d_n, min=0.0)
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
def marginrankingloss_reference(input1, input2, target, margin=0, reduction='mean'):
|
|
output = (-target * (input1 - input2) + margin).clamp(min=0)
|
|
if reduction == 'mean':
|
|
return output.mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
return output
|
|
|
|
|
|
# this directly follows Graves et al's paper, in contrast to the production implementation, it does not use log-space
|
|
def ctcloss_reference(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean'):
|
|
input_lengths = torch.as_tensor(input_lengths, dtype=torch.long)
|
|
target_lengths = torch.as_tensor(target_lengths, dtype=torch.long)
|
|
dt = log_probs.dtype
|
|
log_probs = log_probs.double() # we need the accuracy as we are not in logspace
|
|
targets = targets.long()
|
|
cum_target_lengths = target_lengths.cumsum(0)
|
|
losses = []
|
|
for i in range(log_probs.size(1)):
|
|
input_length = input_lengths[i].item()
|
|
target_length = target_lengths[i].item()
|
|
cum_target_length = cum_target_lengths[i].item()
|
|
targets_prime = targets.new_full((2 * target_length + 1,), blank)
|
|
if targets.dim() == 2:
|
|
targets_prime[1::2] = targets[i, :target_length]
|
|
else:
|
|
targets_prime[1::2] = targets[cum_target_length - target_length:cum_target_length]
|
|
probs = log_probs[:input_length, i].exp()
|
|
alpha = log_probs.new_zeros((target_length * 2 + 1,))
|
|
alpha[0] = probs[0, blank]
|
|
alpha[1] = probs[0, targets_prime[1]]
|
|
mask_third = (targets_prime[:-2] != targets_prime[2:])
|
|
for t in range(1, input_length):
|
|
alpha_next = alpha.clone()
|
|
alpha_next[1:] += alpha[:-1]
|
|
alpha_next[2:] += torch.where(mask_third, alpha[:-2], alpha.new_zeros(1))
|
|
alpha = probs[t, targets_prime] * alpha_next
|
|
losses.append(-alpha[-2:].sum().log()[None])
|
|
output = torch.cat(losses, 0)
|
|
if reduction == 'mean':
|
|
return (output / target_lengths.to(dtype=output.dtype, device=output.device)).mean()
|
|
elif reduction == 'sum':
|
|
return output.sum()
|
|
output = output.to(dt)
|
|
return output
|
|
|
|
|
|
def padding1d_circular(input, pad):
|
|
r""" input:
|
|
[[[0., 1., 2.],
|
|
[3., 4., 5.]]]
|
|
pad: (1, 2)
|
|
output:
|
|
[[[2., 0., 1., 2., 0., 1.],
|
|
[5., 3., 4., 5., 3., 4.]]]
|
|
"""
|
|
return torch.cat([input[:, :, -pad[0]:], input,
|
|
input[:, :, 0:pad[1]]], dim=2)
|
|
|
|
|
|
def padding2d_circular(input, pad):
|
|
r"""input:
|
|
[[[[0., 1., 2],
|
|
[3., 4., 5.]]]]
|
|
pad: (1, 2, 2, 1)
|
|
output:
|
|
[[[[2., 0., 1., 2., 0., 1.],
|
|
[5., 3., 4., 5., 3., 4.],
|
|
[2., 0., 1., 2., 0., 1.],
|
|
[5., 3., 4., 5., 3., 4.],
|
|
[2., 0., 1., 2., 0., 1.]]]]
|
|
"""
|
|
input = torch.cat([input[:, :, -pad[2]:], input, input[:, :, 0:pad[3]]], dim=2)
|
|
return torch.cat([input[:, :, :, -pad[0]:], input, input[:, :, :, 0:pad[1]]], dim=3)
|
|
|
|
|
|
def padding3d_circular(input, pad):
|
|
r"""input:
|
|
[[[[[ 0., 1., 2.],
|
|
[ 3., 4., 5.]],
|
|
[[ 6., 7., 8.],
|
|
[ 9., 10., 11.]]]]]
|
|
pad: (1, 2, 2, 1, 1, 2)
|
|
output: [[[[[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.]],
|
|
|
|
[[ 2., 0., 1., 2., 0., 1.],
|
|
[ 5., 3., 4., 5., 3., 4.],
|
|
[ 2., 0., 1., 2., 0., 1.],
|
|
[ 5., 3., 4., 5., 3., 4.],
|
|
[ 2., 0., 1., 2., 0., 1.]],
|
|
|
|
[[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.]],
|
|
|
|
[[ 2., 0., 1., 2., 0., 1.],
|
|
[ 5., 3., 4., 5., 3., 4.],
|
|
[ 2., 0., 1., 2., 0., 1.],
|
|
[ 5., 3., 4., 5., 3., 4.],
|
|
[ 2., 0., 1., 2., 0., 1.]],
|
|
|
|
[[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.],
|
|
[11., 9., 10., 11., 9., 10.],
|
|
[ 8., 6., 7., 8., 6., 7.]]]]]
|
|
"""
|
|
input = torch.cat([input[:, :, -pad[4]:], input, input[:, :, 0:pad[5]]], dim=2)
|
|
input = torch.cat([input[:, :, :, -pad[2]:], input, input[:, :, :, 0:pad[3]]], dim=3)
|
|
return torch.cat([input[:, :, :, :, -pad[0]:], input, input[:, :, :, :, 0:pad[1]]], dim=4)
|
|
|
|
|
|
loss_reference_fns = {
|
|
'KLDivLoss': kldivloss_reference,
|
|
'NLLLoss': nllloss_reference,
|
|
'NLLLossNd': nlllossNd_reference,
|
|
'SmoothL1Loss': smoothl1loss_reference,
|
|
'MultiLabelMarginLoss': multilabelmarginloss_reference,
|
|
'HingeEmbeddingLoss': hingeembeddingloss_reference,
|
|
'SoftMarginLoss': softmarginloss_reference,
|
|
'MultiMarginLoss': multimarginloss_reference,
|
|
'CosineEmbeddingLoss': cosineembeddingloss_reference,
|
|
'TripletMarginLoss': tripletmarginloss_reference,
|
|
'MarginRankingLoss': marginrankingloss_reference,
|
|
'CTCLoss': ctcloss_reference,
|
|
}
|
|
|
|
|
|
criterion_tests = [
|
|
dict(
|
|
module_name='L1Loss',
|
|
input_size=(2, 3, 4),
|
|
target_size=(2, 3, 4),
|
|
reference_fn=lambda i, t, _: 1. / i.numel() *
|
|
sum((a - b).abs().sum() for a, b in zip(i, t)),
|
|
),
|
|
dict(
|
|
module_name='NLLLoss',
|
|
input_fn=lambda: torch.rand(15, 10).log(),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
nllloss_reference(i, t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True
|
|
),
|
|
dict(
|
|
module_name='NLLLoss',
|
|
constructor_args=(None, None, 2),
|
|
input_fn=lambda: torch.rand(15, 10).log(),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
reference_fn=lambda i, t, _: nllloss_reference(i, t, ignore_index=2),
|
|
desc='ignore_index'
|
|
),
|
|
dict(
|
|
module_name='NLLLoss',
|
|
constructor_args_fn=lambda: (torch.rand(10),),
|
|
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
nllloss_reference(i, t, weight=get_weight(m)),
|
|
desc='weights',
|
|
),
|
|
dict(
|
|
module_name='NLLLoss',
|
|
constructor_args_fn=lambda: (torch.rand(10), None, 2),
|
|
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
nllloss_reference(i, t, weight=get_weight(m), ignore_index=2),
|
|
desc='weights_ignore_index'
|
|
),
|
|
dict(
|
|
module_name='NLLLoss',
|
|
constructor_args_fn=lambda: (torch.rand(10), None, -1),
|
|
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10 + 1).floor().long() - 1,
|
|
reference_fn=lambda i, t, m:
|
|
nllloss_reference(i, t, weight=get_weight(m), ignore_index=-1),
|
|
desc='weights_ignore_index_neg'
|
|
),
|
|
dict(
|
|
module_name='KLDivLoss',
|
|
input_fn=lambda: torch.rand(10, 10).log(),
|
|
target_fn=lambda: torch.rand(10, 10),
|
|
reference_fn=lambda i, t, m:
|
|
kldivloss_reference(i, t, get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='MSELoss',
|
|
input_size=(2, 3, 4, 5),
|
|
target_size=(2, 3, 4, 5),
|
|
reference_fn=lambda i, t, m: ((i - t).abs().pow(2).sum() / (i.numel()
|
|
if get_reduction(m) == 'mean' else 1)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='BCELoss',
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
|
|
target_fn=lambda: torch.randn(15, 10).gt(0).double(),
|
|
reference_fn=lambda i, t, m: -(t * i.log() + (1 - t) * (1 - i).log()).sum() /
|
|
(i.numel() if get_reduction(m) else 1),
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='BCELoss',
|
|
constructor_args_fn=lambda: (torch.rand(10),),
|
|
input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
|
|
target_fn=lambda: torch.randn(15, 10).gt(0).double(),
|
|
reference_fn=lambda i, t, m: -((t * i.log() + (1 - t) * (1 - i).log()) * get_weight(m)).sum() /
|
|
(i.numel() if get_reduction(m) else 1),
|
|
desc='weights',
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='CrossEntropyLoss',
|
|
input_size=(15, 10),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
),
|
|
dict(
|
|
module_name='CrossEntropyLoss',
|
|
constructor_args_fn=lambda: (torch.rand(10),),
|
|
input_size=(15, 10),
|
|
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
|
|
desc='weights',
|
|
),
|
|
dict(
|
|
module_name='HingeEmbeddingLoss',
|
|
input_size=(10,),
|
|
target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1),
|
|
reference_fn=lambda i, t, m:
|
|
hingeembeddingloss_reference(i, t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='HingeEmbeddingLoss',
|
|
constructor_args=(0.5,),
|
|
input_size=(10,),
|
|
target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1),
|
|
reference_fn=lambda i, t, m:
|
|
hingeembeddingloss_reference(i, t, margin=0.5, reduction=get_reduction(m)),
|
|
desc='margin',
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='MultiLabelMarginLoss',
|
|
input_size=(10,),
|
|
target_fn=lambda: torch.rand(10).mul(10).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multilabelmarginloss_reference(i, t, reduction=get_reduction(m)),
|
|
desc="1d",
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiLabelMarginLoss',
|
|
input_size=(5, 10),
|
|
target_fn=lambda: torch.rand(5, 10).mul(10).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multilabelmarginloss_reference(i, t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiLabelSoftMarginLoss',
|
|
input_size=(5, 10),
|
|
target_fn=lambda: torch.rand(5, 10).mul(2).floor(),
|
|
reference_fn=lambda i, t, m: -(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()).sum() / i.numel(),
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiMarginLoss',
|
|
input_size=(5, 10),
|
|
target_fn=lambda: torch.rand(5).mul(8).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multimarginloss_reference(i, t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiMarginLoss',
|
|
input_size=(10,),
|
|
target_fn=lambda: torch.rand(1).mul(8).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multimarginloss_reference(i, t, reduction=get_reduction(m)),
|
|
desc='1d',
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiMarginLoss',
|
|
constructor_args=(2,),
|
|
input_fn=lambda: torch.rand(5, 10).clamp_(1e-2, 1 - 1e-2),
|
|
target_fn=lambda: torch.rand(5).mul(8).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multimarginloss_reference(i, t, p=2, reduction=get_reduction(m)),
|
|
desc='p',
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiMarginLoss',
|
|
constructor_args=(1, 0.5),
|
|
legacy_constructor_args=(1, None, 0.5),
|
|
input_size=(5, 10),
|
|
target_fn=lambda: torch.rand(5).mul(8).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multimarginloss_reference(i, t, margin=0.5, reduction=get_reduction(m)),
|
|
desc='margin',
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='MultiMarginLoss',
|
|
constructor_args=(1, 1., torch.rand(10)),
|
|
legacy_constructor_args=(1, torch.rand(10)),
|
|
input_size=(5, 10),
|
|
target_fn=lambda: torch.rand(5).mul(8).floor().long(),
|
|
reference_fn=lambda i, t, m:
|
|
multimarginloss_reference(i, t, weight=get_weight(m), reduction=get_reduction(m)),
|
|
desc='weights',
|
|
check_sum_reduction=True,
|
|
check_gradgrad=False,
|
|
),
|
|
dict(
|
|
module_name='SmoothL1Loss',
|
|
input_size=(5, 10),
|
|
target_size=(5, 10),
|
|
check_sum_reduction=True,
|
|
reference_fn=lambda i, t, m:
|
|
smoothl1loss_reference(i, t, reduction=get_reduction(m)),
|
|
),
|
|
dict(
|
|
module_name='SoftMarginLoss',
|
|
input_size=(5, 5),
|
|
target_fn=lambda: torch.randn(5, 5).sign(),
|
|
reference_fn=lambda i, t, m:
|
|
softmarginloss_reference(i, t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='CosineEmbeddingLoss',
|
|
input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)),
|
|
target_fn=lambda: torch.randn(15).sign(),
|
|
reference_fn=lambda i, t, m:
|
|
cosineembeddingloss_reference(i[0], i[1], t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='CosineEmbeddingLoss',
|
|
constructor_args=(0.7,),
|
|
input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)),
|
|
target_fn=lambda: torch.randn(15).sign(),
|
|
reference_fn=lambda i, t, m:
|
|
cosineembeddingloss_reference(i[0], i[1], t, margin=0.7, reduction=get_reduction(m)),
|
|
desc='margin',
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='MarginRankingLoss',
|
|
input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)),
|
|
target_fn=lambda: torch.randn(50).sign(),
|
|
reference_fn=lambda i, t, m:
|
|
marginrankingloss_reference(i[0], i[1], t, reduction=get_reduction(m)),
|
|
check_sum_reduction=True,
|
|
),
|
|
dict(
|
|
module_name='MarginRankingLoss',
|
|
constructor_args=(0.5,),
|
|
input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)),
|
|
target_fn=lambda: torch.randn(50).sign(),
|
|
reference_fn=lambda i, t, m:
|
|
marginrankingloss_reference(i[0], i[1], t, margin=0.5, reduction=get_reduction(m)),
|
|
desc='margin',
|
|
check_sum_reduction=True,
|
|
),
|
|
]
|
|
|
|
|
|
class NNTestCase(TestCase):
|
|
|
|
def _jacobian(self, input, num_out):
|
|
if isinstance(input, tuple):
|
|
return tuple(self._jacobian(elem, num_out) for elem in input)
|
|
elif isinstance(input, list):
|
|
return [self._jacobian(elem, num_out) for elem in input]
|
|
else:
|
|
return torch.zeros(input.nelement(), num_out)
|
|
|
|
def _flatten_tensors(self, x):
|
|
if isinstance(x, torch.Tensor):
|
|
if x.is_sparse:
|
|
return x.to_dense().view(-1)
|
|
else:
|
|
return x.view(-1)
|
|
else:
|
|
return tuple(self._flatten_tensors(a) for a in x)
|
|
|
|
def _zero_grad_input(self, input):
|
|
if isinstance(input, torch.Tensor):
|
|
if input.requires_grad and input.grad is not None:
|
|
input.grad.zero_()
|
|
input.grad.detach_()
|
|
else:
|
|
for i in input:
|
|
self._zero_grad_input(i)
|
|
|
|
def _analytical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True):
|
|
output = self._forward(module, input)
|
|
output_size = output.nelement()
|
|
|
|
if jacobian_input:
|
|
jacobian_inp = self._jacobian(input, output_size)
|
|
flat_jacobian_input = list(iter_tensors(jacobian_inp))
|
|
|
|
if jacobian_parameters:
|
|
num_param = sum(p.numel() for p in self._get_parameters(module)[0])
|
|
jacobian_param = torch.zeros(num_param, output_size)
|
|
|
|
for i in range(output_size):
|
|
param, d_param = self._get_parameters(module)
|
|
# make non grad zeros
|
|
d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param)]
|
|
|
|
d_out = torch.zeros_like(output)
|
|
flat_d_out = d_out.view(-1)
|
|
flat_d_out[i] = 1
|
|
|
|
if jacobian_parameters:
|
|
self._zero_grad_parameters(module)
|
|
# Tensors will accumulate gradient from multiple steps
|
|
if jacobian_input:
|
|
self._zero_grad_input(input)
|
|
d_input = self._backward(module, input, output, d_out)
|
|
|
|
if jacobian_input:
|
|
for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)):
|
|
jacobian_x[:, i] = d_x.contiguous().view(-1)
|
|
if jacobian_parameters:
|
|
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
|
|
|
|
res = tuple()
|
|
if jacobian_input:
|
|
res += jacobian_inp,
|
|
if jacobian_parameters:
|
|
res += jacobian_param,
|
|
|
|
return res
|
|
|
|
def _numerical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True):
|
|
def fw(input):
|
|
return self._forward(module, input).detach()
|
|
|
|
res = tuple()
|
|
if jacobian_input:
|
|
res += get_numerical_jacobian(fw, input, eps=1e-6),
|
|
if jacobian_parameters:
|
|
param, _ = self._get_parameters(module)
|
|
res += torch.cat([get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param], 0),
|
|
return res
|
|
|
|
def check_jacobian(self, module, input, jacobian_input=True):
|
|
jacobian_parameters = bool(self._get_parameters(module)[0])
|
|
analytical = self._analytical_jacobian(module, input, jacobian_input, jacobian_parameters)
|
|
numerical = self._numerical_jacobian(module, input, jacobian_input, jacobian_parameters)
|
|
analytical_t = list(iter_tensors(analytical))
|
|
numerical_t = list(iter_tensors(numerical))
|
|
|
|
# TODO: compare structure
|
|
self.assertLessEqual(
|
|
max(a.add(-1, n).abs().max() for a, n in zip(analytical_t, numerical_t)),
|
|
PRECISION
|
|
)
|
|
|
|
def check_criterion_jacobian(self, criterion, input, target):
|
|
eps = 1e-6
|
|
self._forward_criterion(criterion, input, target)
|
|
analytical_d_x = self._backward_criterion(criterion, input, target)
|
|
numerical_d_x = deepcopy(analytical_d_x)
|
|
|
|
input_t = iter_tensors(input)
|
|
numerical_t = iter_tensors(numerical_d_x)
|
|
for x, d_x in zip(input_t, numerical_t):
|
|
x = x.view(-1).data
|
|
d_x = d_x.view(-1).data
|
|
for i in range(x.nelement()):
|
|
original = x[i].item()
|
|
x[i] = original + eps
|
|
fx1 = self._forward_criterion(criterion, input, target)
|
|
x[i] = original - eps
|
|
fx2 = self._forward_criterion(criterion, input, target)
|
|
deriv = (fx1 - fx2) / (2. * eps)
|
|
d_x[i] = float(deriv)
|
|
x[i] = original
|
|
|
|
# TODO: check structure
|
|
analytical_t = list(iter_tensors(analytical_d_x))
|
|
numerical_t = list(iter_tensors(numerical_d_x))
|
|
|
|
self.assertLessEqual(
|
|
max(a.add(-1, n).abs().max() for a, n in zip(analytical_t, numerical_t)),
|
|
PRECISION
|
|
)
|
|
|
|
|
|
class TestBase(object):
|
|
|
|
_required_arg_names = {'constructor_args', 'input', 'extra_args'}
|
|
|
|
def __init__(self, constructor, desc='', reference_fn=None, fullname=None, **kwargs):
|
|
self.desc = desc
|
|
self.fullname = fullname
|
|
self.constructor = constructor
|
|
self.reference_fn = reference_fn
|
|
for name in self._required_arg_names:
|
|
if name not in kwargs and name + '_fn' not in kwargs and name + '_size' not in kwargs:
|
|
if name in {'constructor_args', 'extra_args'}:
|
|
kwargs[name] = tuple()
|
|
else:
|
|
raise ValueError("{}: Specify {} by a value, a function to generate it, or it's size!"
|
|
.format(self.get_name(), name))
|
|
self._extra_kwargs = kwargs
|
|
self._arg_cache = {}
|
|
|
|
def get_name(self):
|
|
if self.fullname is not None:
|
|
return 'test_' + self.fullname
|
|
|
|
test_name = 'test_' + self.constructor.__name__
|
|
if self.desc:
|
|
test_name += '_' + self.desc
|
|
return test_name
|
|
|
|
def _unpack(self, value):
|
|
if isinstance(value, torch.Tensor):
|
|
return value
|
|
elif is_iterable(value):
|
|
return type(value)(self._unpack(v) for v in value)
|
|
else:
|
|
return value
|
|
|
|
@property
|
|
def constructor_args(self):
|
|
return self._get_arg('constructor_args', True)
|
|
|
|
@property
|
|
def extra_args(self):
|
|
return self._get_arg('extra_args', True)
|
|
|
|
def _get_arg(self, name, unpack):
|
|
assert name in self._required_arg_names
|
|
|
|
if name not in self._arg_cache:
|
|
fn_name = name + '_fn'
|
|
size_name = name + '_size'
|
|
|
|
if name in self._extra_kwargs:
|
|
self._arg_cache[name] = self._extra_kwargs[name]
|
|
elif fn_name in self._extra_kwargs:
|
|
self._arg_cache[name] = self._extra_kwargs[fn_name]()
|
|
else:
|
|
assert size_name in self._extra_kwargs
|
|
|
|
def map_tensor_sizes(sizes):
|
|
if isinstance(sizes, list):
|
|
return [map_tensor_sizes(s) for s in sizes]
|
|
elif isinstance(sizes, torch.Tensor):
|
|
return sizes.double()
|
|
else:
|
|
return torch.randn(sizes)
|
|
|
|
self._arg_cache[name] = map_tensor_sizes(self._extra_kwargs[size_name])
|
|
|
|
return self._unpack(self._arg_cache[name]) if unpack else self._arg_cache[name]
|
|
|
|
def _get_input(self, unpack=True):
|
|
return self._get_arg('input', unpack)
|
|
|
|
def __call__(self, test_case):
|
|
raise NotImplementedError
|
|
|
|
|
|
class ModuleTest(TestBase):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(ModuleTest, self).__init__(*args, **kwargs)
|
|
self.jacobian_input = kwargs.get('jacobian_input', True)
|
|
self.should_test_cuda = kwargs.get('test_cuda', True)
|
|
self.should_test_pickle = kwargs.get('pickle', True)
|
|
self.check_gradgrad = kwargs.get('check_gradgrad', True)
|
|
self.FIXME_no_cuda_gradgrad_comparison = \
|
|
kwargs.get('FIXME_no_cuda_gradgrad_comparison', False)
|
|
self.precision = kwargs.get('precision', 2e-4)
|
|
|
|
def __call__(self, test_case):
|
|
module = self.constructor(*self.constructor_args)
|
|
input = self._get_input()
|
|
|
|
if self.reference_fn is not None:
|
|
out = test_case._forward(module, input)
|
|
ref_input = deepcopy(input)
|
|
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
|
|
test_case.assertEqual(out, expected_out)
|
|
self.test_noncontig(test_case, module, input)
|
|
|
|
if self.should_test_pickle:
|
|
# TODO: do this with in-memory files as soon as torch.save will support it
|
|
with TemporaryFile() as f:
|
|
test_case._forward(module, input)
|
|
torch.save(module, f)
|
|
f.seek(0)
|
|
module_copy = torch.load(f)
|
|
test_case.assertEqual(test_case._forward(module, input), test_case._forward(module_copy, input))
|
|
|
|
self._do_test(test_case, module, input)
|
|
|
|
def noncontiguize(self, obj):
|
|
if isinstance(obj, list):
|
|
return [self.noncontiguize(o) for o in obj]
|
|
tensor = obj
|
|
ndim = tensor.dim()
|
|
# Always making only the last dimension noncontiguous is easy to hide
|
|
# bugs because .view(-1) will still work. So try to find a dim with size
|
|
# > 1 and make that non-contiguous, i.e., stack + select on the
|
|
# dimension directly after that.
|
|
dim = ndim
|
|
for d in range(ndim):
|
|
if tensor.size(d) > 1:
|
|
dim = d + 1
|
|
break
|
|
noncontig = torch.stack([torch.empty_like(tensor), tensor], dim).select(dim, 1).detach()
|
|
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
|
|
noncontig.requires_grad = tensor.requires_grad
|
|
return noncontig
|
|
|
|
def test_noncontig(self, test_case, module, input):
|
|
# check no scalars, can't make non-contig
|
|
if isinstance(input, torch.Tensor) and input.dim() == 0:
|
|
return
|
|
if any(i.dim() == 0 for i in input if isinstance(i, torch.Tensor)):
|
|
return
|
|
|
|
test_case._zero_grad_parameters(module)
|
|
test_case._zero_grad_input(input)
|
|
with freeze_rng_state():
|
|
output = test_case._forward(module, input)
|
|
grad_output = output.new(output.shape).normal_()
|
|
output = output.clone()
|
|
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
|
|
d_param = deepcopy(test_case._get_parameters(module)[1])
|
|
|
|
nc_input = self.noncontiguize(input)
|
|
nc_grad_output = self.noncontiguize(grad_output)
|
|
for contig_i, contig_g in product((True, False), repeat=2):
|
|
i = input if contig_i else nc_input
|
|
go = grad_output if contig_g else nc_grad_output
|
|
test_case._zero_grad_parameters(module)
|
|
test_case._zero_grad_input(i)
|
|
with freeze_rng_state():
|
|
out = test_case._forward(module, i)
|
|
grad = test_case._backward(module, i, out, go)
|
|
|
|
test_case.assertEqual(out, output)
|
|
test_case.assertEqual(grad, d_input, 1e-4)
|
|
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
|
|
|
|
def test_cuda(self, test_case):
|
|
if not TEST_CUDA or not self.should_test_cuda:
|
|
raise unittest.SkipTest('Excluded from CUDA tests')
|
|
try:
|
|
cpu_input = self._get_input()
|
|
type_map = {'torch.DoubleTensor': torch.cuda.FloatTensor}
|
|
gpu_input = to_gpu(cpu_input, type_map=type_map)
|
|
|
|
cpu_module = self.constructor(*self.constructor_args)
|
|
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
|
cpu_param = test_case._get_parameters(cpu_module)
|
|
gpu_param = test_case._get_parameters(gpu_module)
|
|
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
|
|
gpu_p.data.copy_(cpu_p)
|
|
|
|
test_case._zero_grad_input(cpu_input)
|
|
test_case._zero_grad_input(gpu_input)
|
|
test_case._zero_grad_parameters(cpu_module)
|
|
test_case._zero_grad_parameters(gpu_module)
|
|
cpu_output = test_case._forward(cpu_module, cpu_input)
|
|
gpu_output = test_case._forward(gpu_module, gpu_input)
|
|
test_case.assertEqual(cpu_output, gpu_output, self.precision)
|
|
|
|
# Run backwards on CPU and GPU and compare results
|
|
for _ in range(5):
|
|
cpu_gradOutput = cpu_output.clone().normal_()
|
|
gpu_gradOutput = cpu_gradOutput.type('torch.cuda.FloatTensor')
|
|
cpu_gradInput = test_case._backward(cpu_module, cpu_input, cpu_output, cpu_gradOutput)
|
|
gpu_gradInput = test_case._backward(gpu_module, gpu_input, gpu_output, gpu_gradOutput)
|
|
test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision)
|
|
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
|
|
test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision)
|
|
|
|
# Run double-backwards on CPU and GPU and compare results
|
|
if self.check_gradgrad and not self.FIXME_no_cuda_gradgrad_comparison:
|
|
cpu_output = cpu_module(cpu_input)
|
|
gpu_output = gpu_module(gpu_input)
|
|
|
|
cpu_gradOutput = torch.randn_like(cpu_output, requires_grad=True)
|
|
gpu_gradOutput = cpu_gradOutput.type_as(gpu_output).detach()
|
|
gpu_gradOutput.requires_grad = True
|
|
|
|
cpu_gradInputs = torch.autograd.grad(
|
|
cpu_output,
|
|
(cpu_input,) + tuple(cpu_module.parameters()),
|
|
cpu_gradOutput,
|
|
create_graph=True)
|
|
gpu_gradInputs = torch.autograd.grad(
|
|
gpu_output,
|
|
(gpu_input,) + tuple(gpu_module.parameters()),
|
|
gpu_gradOutput,
|
|
create_graph=True)
|
|
|
|
for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
|
|
test_case.assertEqual(cpu_d_i, gpu_d_i, self.precision)
|
|
|
|
# We mix output into the second backwards computation so that
|
|
# torch.autograd.grad doesn't complain that some inputs
|
|
# are unreachable (which can happen if you differentiate
|
|
# only on the gradient.
|
|
cpu_gg = torch.autograd.grad(
|
|
cpu_output.sum() + sum(map(lambda x: x.sum(), cpu_gradInputs)),
|
|
(cpu_input, cpu_gradOutput) + tuple(cpu_module.parameters()),
|
|
retain_graph=True)
|
|
gpu_gg = torch.autograd.grad(
|
|
gpu_output.sum() + sum(map(lambda x: x.sum(), gpu_gradInputs)),
|
|
(gpu_input, gpu_gradOutput) + tuple(gpu_module.parameters()),
|
|
retain_graph=True)
|
|
|
|
test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision)
|
|
for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg):
|
|
test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision)
|
|
|
|
self.test_noncontig(test_case, gpu_module, gpu_input)
|
|
except NotImplementedError:
|
|
pass
|
|
# TODO: remove this after CUDA scatter_ is implemented
|
|
except AttributeError as e:
|
|
if len(e.args) == 1 and "'FloatTensor' object has no attribute 'scatter_'" in e.args[0]:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
|
|
class CriterionTest(TestBase):
|
|
|
|
_required_arg_names = TestBase._required_arg_names.union({'target'})
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(CriterionTest, self).__init__(*args, **kwargs)
|
|
self.should_test_cuda = kwargs.get('test_cuda', True)
|
|
self.check_forward_only = kwargs.get('check_forward_only', True)
|
|
|
|
def _get_target(self):
|
|
return self._get_arg('target', True)
|
|
|
|
def __call__(self, test_case):
|
|
module = self.constructor(*self.constructor_args)
|
|
input = self._get_input()
|
|
|
|
# Check that these methods don't raise errors
|
|
module.__repr__()
|
|
str(module)
|
|
|
|
target = self._get_target()
|
|
|
|
if self.reference_fn is not None:
|
|
out = test_case._forward_criterion(module, input, target, extra_args=self.extra_args)
|
|
ref_args = (deepcopy(input), deepcopy(target)) + self.extra_args + (module,)
|
|
expected_out = self.reference_fn(*ref_args)
|
|
test_case.assertEqual(out, expected_out)
|
|
|
|
if self.check_forward_only:
|
|
return
|
|
|
|
test_case.check_criterion_jacobian(module, input, target)
|
|
self._do_extra_tests(test_case, module, input, target)
|
|
|
|
def test_cuda(self, test_case):
|
|
if not TEST_CUDA or not self.should_test_cuda:
|
|
raise unittest.SkipTest('Excluded from CUDA tests')
|
|
try:
|
|
cpu_input = self._get_input()
|
|
type_map = {
|
|
'torch.DoubleTensor': torch.cuda.FloatTensor,
|
|
}
|
|
gpu_input = to_gpu(cpu_input, type_map=type_map)
|
|
|
|
cpu_target = self._get_target()
|
|
gpu_target = to_gpu(cpu_target, type_map=type_map)
|
|
|
|
cpu_module = self.constructor(*self.constructor_args)
|
|
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
|
|
|
cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target)
|
|
gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target)
|
|
test_case.assertEqual(cpu_output, gpu_output, 4e-4)
|
|
|
|
gradOutput = torch.randn(())
|
|
cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target, gradOutput)
|
|
gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target, gradOutput)
|
|
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 4e-4)
|
|
except NotImplementedError:
|
|
pass
|
|
|
|
def _do_extra_tests(self, test_case, module, input, target):
|
|
pass
|