Remove torch/legacy (#11823)

Summary:
Largely unused and hinders current development
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11823

Differential Revision: D9925094

Pulled By: cpuhrsch

fbshipit-source-id: c797f62180e2128f9a567b0c57c8347957470ea5
This commit is contained in:
Christian Puhrsch
2018-09-20 13:57:22 -07:00
committed by Facebook Github Bot
parent 24ec813967
commit d8f6be686d
167 changed files with 1 additions and 13137 deletions

View File

@ -27,7 +27,6 @@ TESTS = [
'distributions', 'distributions',
'indexing', 'indexing',
'jit', 'jit',
'legacy_nn',
'multiprocessing', 'multiprocessing',
'nccl', 'nccl',
'nn', 'nn',

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,6 @@ import torch.nn.functional as F
import torch.nn.parallel as dp import torch.nn.parallel as dp
import torch.nn.init as init import torch.nn.init as init
import torch.nn.utils.rnn as rnn_utils import torch.nn.utils.rnn as rnn_utils
import torch.legacy.nn as legacy
from torch.nn.utils import clip_grad_norm_, clip_grad_value_ from torch.nn.utils import clip_grad_norm_, clip_grad_value_
from torch.nn.utils import parameters_to_vector, vector_to_parameters from torch.nn.utils import parameters_to_vector, vector_to_parameters
from torch.autograd import Variable, gradcheck from torch.autograd import Variable, gradcheck
@ -5821,42 +5820,6 @@ class TestNN(NNTestCase):
expected = m(inp.view(6, 5)).view(2, 3, 8) expected = m(inp.view(6, 5)).view(2, 3, 8)
self.assertEqual(expected, m(inp)) self.assertEqual(expected, m(inp))
def test_bilinear(self):
module = nn.Bilinear(10, 10, 8)
module_legacy = legacy.Bilinear(10, 10, 8)
module_legacy.weight.copy_(module.weight.data)
module_legacy.bias.copy_(module.bias.data)
input1 = torch.randn(4, 10)
input2 = torch.randn(4, 10)
output = module(Variable(input1), Variable(input2))
output_legacy = module_legacy.forward([input1, input2])
self.assertEqual(output.data, output_legacy)
input1_1 = torch.tensor(input1, requires_grad=True)
input2_1 = torch.tensor(input2, requires_grad=True)
module.zero_grad()
module_legacy.zeroGradParameters()
output = module(input1_1, input2_1)
grad_output = torch.randn(*output.size())
gi1_legacy, gi2_legacy = module_legacy.backward([input1, input2], grad_output)
output.backward(grad_output)
gi1 = input1_1.grad.data.clone()
gi2 = input2_1.grad.data.clone()
self.assertEqual(gi1, gi1_legacy)
self.assertEqual(gi2, gi2_legacy)
self.assertEqual(module.weight.grad.data, module_legacy.gradWeight)
self.assertEqual(module.bias.grad.data, module_legacy.gradBias)
_assertGradAndGradgradChecks(self, lambda x1, x2: F.bilinear(x1, x2, module.weight, module.bias),
(input1_1, input2_1))
def test_bilinear_no_bias(self): def test_bilinear_no_bias(self):
module = nn.Bilinear(10, 10, 8) module = nn.Bilinear(10, 10, 8)
module_no_bias = nn.Bilinear(10, 10, 8, False) module_no_bias = nn.Bilinear(10, 10, 8, False)

View File

@ -5,7 +5,6 @@ from copy import deepcopy
import torch import torch
from torch._six import inf from torch._six import inf
import torch.optim as optim import torch.optim as optim
import torch.legacy.optim as old_optim
import torch.nn.functional as F import torch.nn.functional as F
from torch.optim import SGD from torch.optim import SGD
from torch.autograd import Variable from torch.autograd import Variable
@ -24,44 +23,7 @@ def drosenbrock(tensor):
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2))) return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
def wrap_old_fn(old_fn, **config):
def wrapper(closure, params, state):
return old_fn(closure, params, config, state)
return wrapper
class TestOptim(TestCase): class TestOptim(TestCase):
def _test_rosenbrock(self, constructor, old_fn):
params_t = torch.Tensor([1.5, 1.5])
state = {}
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
optimizer = constructor([params])
solution = torch.Tensor([1, 1])
initial_dist = params.data.dist(solution)
def eval():
optimizer.zero_grad()
loss = rosenbrock(params)
loss.backward()
# loss.backward() will give **slightly** different
# gradients, than drosenbtock, because of a different ordering
# of floating point operations. In most cases it doesn't matter,
# but some optimizers are so sensitive that they can temporarily
# diverge up to 1e-4, just to converge again. This makes the
# comparison more stable.
params.grad.data.copy_(drosenbrock(params.data))
return loss
for i in range(2000):
optimizer.step(eval)
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
params_t, state)
self.assertEqual(params.data, params_t)
self.assertLessEqual(params.data.dist(solution), initial_dist)
def _test_rosenbrock_sparse(self, constructor, sparse_only=False): def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
params_t = torch.Tensor([1.5, 1.5]) params_t = torch.Tensor([1.5, 1.5])
@ -237,16 +199,6 @@ class TestOptim(TestCase):
return [dict(params=bias, **kwargs)] return [dict(params=bias, **kwargs)]
def test_sgd(self): def test_sgd(self):
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3),
wrap_old_fn(old_optim.sgd, learningRate=1e-3)
)
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
dampening=0, weight_decay=1e-4),
wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
dampening=0, weightDecay=1e-4)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.SGD([weight, bias], lr=1e-3) lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
) )
@ -273,14 +225,6 @@ class TestOptim(TestCase):
) )
def test_adam(self): def test_adam(self):
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.Adam([weight, bias], lr=1e-3) lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
) )
@ -310,18 +254,6 @@ class TestOptim(TestCase):
optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0)) optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0))
def test_adadelta(self): def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias]) lambda weight, bias: optim.Adadelta([weight, bias])
) )
@ -333,18 +265,6 @@ class TestOptim(TestCase):
optim.Adadelta(None, lr=1e-2, rho=1.1) optim.Adadelta(None, lr=1e-2, rho=1.1)
def test_adagrad(self): def test_adagrad(self):
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, lr_decay=1e-3),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, learningRateDecay=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, weightDecay=1e-2)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1) lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
) )
@ -367,18 +287,6 @@ class TestOptim(TestCase):
@skipIfRocm @skipIfRocm
def test_adamax(self): def test_adamax(self):
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1),
wrap_old_fn(old_optim.adamax, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, betas=(0.95, 0.998)),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, beta1=0.95, beta2=0.998)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1) lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1)
) )
@ -391,18 +299,6 @@ class TestOptim(TestCase):
optim.Adamax(None, lr=1e-2, betas=(0.0, 1.0)) optim.Adamax(None, lr=1e-2, betas=(0.0, 1.0))
def test_rmsprop(self): def test_rmsprop(self):
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2) lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2)
) )
@ -415,18 +311,6 @@ class TestOptim(TestCase):
optim.RMSprop(None, lr=1e-2, momentum=-1.0) optim.RMSprop(None, lr=1e-2, momentum=-1.0)
def test_asgd(self): def test_asgd(self):
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3),
wrap_old_fn(old_optim.asgd, eta0=1e-3)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100) lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
) )
@ -440,18 +324,6 @@ class TestOptim(TestCase):
@skipIfRocm @skipIfRocm
def test_rprop(self): def test_rprop(self):
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3),
wrap_old_fn(old_optim.rprop, stepsize=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, etas=(0.6, 1.1)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, etaminus=0.6, etaplus=1.1)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, step_sizes=(1e-4, 3)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, stepsizemin=1e-4, stepsizemax=3)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3) lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
) )
@ -464,14 +336,6 @@ class TestOptim(TestCase):
optim.Rprop(None, lr=1e-2, etas=(1.0, 0.5)) optim.Rprop(None, lr=1e-2, etas=(1.0, 0.5))
def test_lbfgs(self): def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases( self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]), lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True ignore_multidevice=True

View File

@ -441,98 +441,6 @@ class TestFFI(TestCase):
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5)) lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
class TestLuaReader(TestCase):
@staticmethod
def _module_test(name, test):
def do_test(self):
module = test['module']
input = test['input']
grad_output = test['grad_output']
if hasattr(self, '_transform_' + name):
input = getattr(self, '_transform_' + name)(input)
output = module.forward(input)
module.zeroGradParameters()
grad_input = module.backward(input, grad_output)
self.assertEqual(output, test['output'])
self.assertEqual(grad_input, test['grad_input'])
if module.parameters() is not None:
params, d_params = module.parameters()
self.assertEqual(params, test['params'])
self.assertEqual(d_params, test['d_params'])
else:
self.assertFalse('params' in test and test['params'])
self.assertFalse('params' in test and test['d_params'])
return do_test
@staticmethod
def _criterion_test(name, test):
def do_test(self):
module = test['module']
input = test['input']
if name == 'L1Cost':
target = None
else:
target = test['target']
if hasattr(self, '_transform_' + name):
input, target = getattr(self, '_transform_' + name)(input, target)
output = module.forward(input, target)
grad_input = module.backward(input, target)
self.assertEqual(output, test['loss'])
self.assertEqual(grad_input, test['grad_input'])
return do_test
@classmethod
def init(cls):
try:
path = download_file('https://download.pytorch.org/test_data/legacy_modules.t7')
except unittest.SkipTest:
return
long_size = 8 if sys.platform == 'win32' else None
tests = load_lua(path, long_size=long_size)
for name, test in tests['modules'].items():
if name == "HardShrink":
continue
test_name = 'test_' + name.replace('nn.', '')
setattr(cls, test_name, cls._module_test(name, test))
for name, test in tests['criterions'].items():
if name == "HardShrink":
continue
test_name = 'test_' + name.replace('nn.', '')
setattr(cls, test_name, cls._criterion_test(name, test))
def _transform_Index(self, input):
return [input[0], input[1].sub(1)]
def _transform_LookupTable(self, input):
return input.sub(1)
def _transform_MultiLabelMarginCriterion(self, input, target):
return input, target.sub(1)
def _transform_ClassNLLCriterion(self, input, target):
return input, target.sub(1)
def _transform_SpatialClassNLLCriterion(self, input, target):
return input, target.sub(1)
def _transform_ClassSimplexCriterion(self, input, target):
return input, target.sub(1)
def _transform_CrossEntropyCriterion(self, input, target):
return input, target.sub(1)
def _transform_ParallelCriterion(self, input, target):
return input, [target[0].sub(1), target[1]]
def _transform_MultiCriterion(self, input, target):
return input, target.sub(1)
def _transform_MultiMarginCriterion(self, input, target):
return input, target.sub(1)
@unittest.skipIf('SKIP_TEST_BOTTLENECK' in os.environ.keys(), 'SKIP_TEST_BOTTLENECK is set') @unittest.skipIf('SKIP_TEST_BOTTLENECK' in os.environ.keys(), 'SKIP_TEST_BOTTLENECK is set')
class TestBottleneck(TestCase): class TestBottleneck(TestCase):
def _run(self, command): def _run(self, command):
@ -700,6 +608,4 @@ class TestONNXUtils(TestCase):
if __name__ == '__main__': if __name__ == '__main__':
from torch.utils.serialization import load_lua
TestLuaReader.init()
run_tests() run_tests()

1
torch/legacy/README.md Normal file
View File

@ -0,0 +1 @@
If you're looking for this legacy code please consider versions of PyTorch before 0.5

View File

@ -1,7 +0,0 @@
"""Package containing code ported from Lua torch.
To make it possible to work with existing models and ease the transition
for current Lua torch users, we've created this package. You can find the
``nn`` code in ``torch.legacy.nn``, and ``optim`` in ``torch.legacy.optim``.
The APIs should exactly match Lua torch.
"""

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class Abs(Module):
def __init__(self):
super(Abs, self).__init__()
def updateOutput(self, input):
self._backend.Abs_updateOutput(
self._backend.library_state,
input,
self.output
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.Abs_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput
)
return self.gradInput

View File

@ -1,36 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class AbsCriterion(Criterion):
def __init__(self, sizeAverage=True):
super(AbsCriterion, self).__init__()
self.sizeAverage = sizeAverage
self.output_tensor = torch.Tensor(1)
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.AbsCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.AbsCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,57 +0,0 @@
import math
import torch
from .Module import Module
class Add(Module):
def __init__(self, inputSize, scalar=False):
super(Add, self).__init__()
size = inputSize
if scalar:
assert size == 1
self.scalar = scalar
self.bias = torch.Tensor(size)
self.gradBias = torch.Tensor(size)
self._ones = torch.Tensor((1,))
self.reset()
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.uniform_(-stdv, stdv)
def updateOutput(self, input):
self.output.resize_as_(input).copy_(input)
if self.scalar:
self.output.add_(self.bias[0])
else:
batchSize = input.size(0)
if self._ones.size(0) != batchSize:
self._ones.resize_(batchSize).fill_(1)
bias = self.bias.view(-1)
output = self.output.view(batchSize, -1)
output.addr_(self._ones, bias)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is not None:
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
if self.gradBias.size(0) == 1:
self.gradBias[0] = self.gradBias[0] + scale * gradOutput.sum()
else:
if input.is_same_size(self.bias):
self.gradBias.add_(scale, gradOutput)
else:
gradOutput = gradOutput.contiguous().view(input.size(0), -1)
self.gradBias.view(-1).addmv_(scale, gradOutput.t(), self._ones)

View File

@ -1,32 +0,0 @@
import torch
from .Module import Module
class AddConstant(Module):
def __init__(self, constant_scalar, inplace=False):
super(AddConstant, self).__init__()
self.constant_scalar = constant_scalar
self.inplace = inplace
def updateOutput(self, input):
if self.inplace:
input.add_(self.constant_scalar)
self.output.set_(input)
else:
self.output.resize_as_(input)
self.output.copy_(input)
self.output.add_(self.constant_scalar)
return self.output
def updateGradInput(self, input, gradOutput):
if self.inplace:
self.gradInput.set_(gradOutput)
# restore previous input value
input.add_(-self.constant_scalar)
else:
self.gradInput.resize_as_(gradOutput)
self.gradInput.copy_(gradOutput)
return self.gradInput

View File

@ -1,95 +0,0 @@
import torch
from .Criterion import Criterion
# TODO: use THNN
class BCECriterion(Criterion):
eps = 1e-12
def __init__(self, weights=None, sizeAverage=True):
if weights is not None and weights.dim() != 1:
raise ValueError("weights input should be 1D Tensor")
super(BCECriterion, self).__init__()
self.sizeAverage = sizeAverage
self.buffer = None
self.weights = weights
def updateOutput(self, input, target):
# - log(input) * target - log(1 - input) * (1 - target)
if input.nelement() != target.nelement():
raise RuntimeError("input and target size mismatch")
if self.buffer is None:
self.buffer = input.new()
buffer = self.buffer
weights = self.weights
buffer.resize_as_(input)
if weights is not None and target.dim() != 1:
weights = self.weights.view(1, target.size(1)).expand_as(target)
# log(input) * target
torch.add(input, self.eps, out=buffer).log_()
if weights is not None:
buffer.mul_(weights)
target_1d = target.contiguous().view(-1)
# don't save a 1-d view of buffer: it should already be contiguous, and it's
# used as non-1d tensor later.
output = torch.dot(target_1d, buffer.contiguous().view(-1))
# log(1 - input) * (1 - target)
torch.mul(input, -1, out=buffer).add_(1 + self.eps).log_()
if weights is not None:
buffer.mul_(weights)
output = output + torch.sum(buffer)
output = output - torch.dot(target_1d, buffer.contiguous().view(-1))
if self.sizeAverage:
output = output / input.nelement()
self.output = - output.item()
return self.output
def updateGradInput(self, input, target):
# - (target - input) / ( input (1 - input) )
# The gradient is slightly incorrect:
# It should have be divided by (input + self.eps) (1 - input + self.eps)
# but it is divided by input (1 - input + self.eps) + self.eps
# This modification requires less memory to be computed.
if input.nelement() != target.nelement():
raise RuntimeError("input and target size mismatch")
if self.buffer is None:
self.buffer = input.new()
buffer = self.buffer
weights = self.weights
gradInput = self.gradInput
if weights is not None and target.dim() != 1:
weights = self.weights.view(1, target.size(1)).expand_as(target)
buffer.resize_as_(input)
# - x ( 1 + self.eps -x ) + self.eps
torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps)
gradInput.resize_as_(input)
# y - x
torch.add(target, -1, input, out=gradInput)
# - (y - x) / ( x ( 1 + self.eps -x ) + self.eps )
gradInput.div_(buffer)
if weights is not None:
gradInput.mul_(weights)
if self.sizeAverage:
gradInput.div_(target.nelement())
return gradInput

View File

@ -1,192 +0,0 @@
"""
This file implements Batch Normalization as described in the paper:
"Batch Normalization: Accelerating Deep Network Training
by Reducing Internal Covariate Shift"
by Sergey Ioffe, Christian Szegedy
This implementation is useful for inputs NOT coming from convolution layers.
For convolution layers, use nn.SpatialBatchNormalization.
The operation implemented is:
y = ( x - mean(x) )
########## * gamma + beta
standard-deviation(x)
where gamma and beta are learnable parameters.
The learning of gamma and beta is optional.
Usage:
with learnable parameters: nn.BatchNormalization(N [, eps] [, momentum])
where N = dimensionality of input
without learnable parameters: nn.BatchNormalization(N [, eps] [, momentum], False)
eps is a small value added to the standard-deviation to avoid divide-by-zero.
Defaults to 1e-5
In training time, this layer keeps a running estimate of it's computed mean and std.
The running sum is kept with a default momentum of 0.1 (unless over-ridden)
In test time, this running mean/std is used to normalize.
"""
import torch
from .Module import Module
from .utils import clear
class BatchNormalization(Module):
# expected dimension of input
nDim = 2
def __init__(self, nOutput, eps=1e-5, momentum=0.1, affine=True):
super(BatchNormalization, self).__init__()
assert nOutput != 0
self.affine = affine
self.eps = eps
self.train = True
self.momentum = momentum
self.running_mean = torch.zeros(nOutput)
self.running_var = torch.ones(nOutput)
self.save_mean = None
self.save_std = None
self._input = None
self._gradOutput = None
if self.affine:
self.weight = torch.Tensor(nOutput)
self.bias = torch.Tensor(nOutput)
self.gradWeight = torch.Tensor(nOutput)
self.gradBias = torch.Tensor(nOutput)
self.reset()
else:
self.weight = None
self.bias = None
self.gradWeight = None
self.gradBias = None
def reset(self):
if self.weight is not None:
self.weight.uniform_()
if self.bias is not None:
self.bias.zero_()
self.running_mean.zero_()
self.running_var.fill_(1)
def _checkInputDim(self, input):
if input.dim() != self.nDim:
raise RuntimeError(
'only mini-batch supported ({}D tensor), got {}D tensor instead'.format(self.nDim, input.dim()))
if input.size(1) != self.running_mean.nelement():
raise RuntimeError('got {}-feature tensor, expected {}'.format(input.size(1), self.running_mean.nelement()))
def _makeContiguous(self, input, gradOutput=None):
if not input.is_contiguous():
if self._input is None:
self._input = input.new()
self._input.resize_as_(input).copy_(input)
input = self._input
if gradOutput is not None:
if not gradOutput.is_contiguous():
if self._gradOutput is None:
self._gradOutput = gradOutput.new()
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
gradOutput = self._gradOutput
return input, gradOutput
def updateOutput(self, input):
self._checkInputDim(input)
input = self._makeContiguous(input)[0]
self.output.resize_as_(input)
if self.save_mean is None:
self.save_mean = input.new()
self.save_mean.resize_as_(self.running_mean)
if self.save_std is None:
self.save_std = input.new()
self.save_std.resize_as_(self.running_var)
self._backend.BatchNormalization_updateOutput(
self._backend.library_state,
input,
self.output,
self.weight,
self.bias,
self.running_mean,
self.running_var,
self.save_mean,
self.save_std,
self.train,
self.momentum,
self.eps
)
return self.output
def _backward(self, input, gradOutput, scale, gradInput=None, gradWeight=None, gradBias=None):
self._checkInputDim(input)
self._checkInputDim(gradOutput)
if not hasattr(self, 'save_mean') or not hasattr(self, 'save_std'):
raise RuntimeError('you have to call updateOutput() at least once before backward()')
input, gradOutput = self._makeContiguous(input, gradOutput)
scale = scale or 1.
if gradInput is not None:
gradInput.resize_as_(gradOutput)
self._backend.BatchNormalization_backward(
self._backend.library_state,
input,
gradOutput,
gradInput,
gradWeight,
gradBias,
self.weight,
self.running_mean,
self.running_var,
self.save_mean,
self.save_std,
self.train,
scale,
self.eps
)
return self.gradInput
def backward(self, input, gradOutput, scale=1.):
return self._backward(input, gradOutput, scale, self.gradInput, self.gradWeight, self.gradBias)
def updateGradInput(self, input, gradOutput):
return self._backward(input, gradOutput, 1., self.gradInput)
def accGradParameters(self, input, gradOutput, scale=1.):
return self._backward(input, gradOutput, scale, None, self.gradWeight, self.gradBias)
def read(self, file, version):
super(BatchNormalization, self).read(self, file)
if version < 2:
if self.running_std:
self.running_var = self.running_std.pow_(-2).add_(-self.eps)
self.running_std = None
def clearState(self):
# first 5 buffers are not present in the current implementation,
# but we keep them for cleaning old saved models
clear(self, [
'buffer',
'buffer2',
'centered',
'std',
'normalized',
'_input',
'_gradOutput',
'save_mean',
'save_std',
])
return super(BatchNormalization, self).clearState()

View File

@ -1,137 +0,0 @@
import math
import torch
from .Module import Module
from .utils import clear
class Bilinear(Module):
def _assertInput(self, input):
if len(input) != 2 or not isinstance(input[0], torch.Tensor) or not isinstance(input[1], torch.Tensor):
raise RuntimeError('input should be a table containing two data Tensors')
if input[0].ndimension() != 2 or input[1].ndimension() != 2:
raise RuntimeError('input Tensors should be two-dimensional')
if input[0].size(0) != input[1].size(0):
raise RuntimeError('input Tensors should have the same number of rows')
if input[0].size(1) != self.weight.size(1):
raise RuntimeError('dimensionality of first input is erroneous')
if input[1].size(1) != self.weight.size(2):
raise RuntimeError('dimensionality of second input is erroneous')
def _assertInputGradOutput(self, input, gradOutput):
if input[0].size(0) != gradOutput.size(0):
raise RuntimeError('number of rows in gradOutput.es not match input')
if gradOutput.size(1) != self.weight.size(0):
raise RuntimeError('number of columns in gradOutput does not match layer\'s output size')
def __init__(self, inputSize1, inputSize2, outputSize, bias=True):
# set up model:
super(Bilinear, self).__init__()
self.weight = torch.Tensor(outputSize, inputSize1, inputSize2)
self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2)
if bias:
self.bias = torch.Tensor(outputSize)
self.gradBias = torch.Tensor(outputSize)
else:
self.bias = None
self.gradBias = None
self.buff1 = None
self.buff2 = None
self.gradInput = [torch.Tensor(), torch.Tensor()]
self.reset()
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.uniform_(-stdv, stdv)
return self
def updateOutput(self, input):
self._assertInput(input)
# set up buffer:
if self.buff2 is None:
self.buff2 = input[0].new()
self.buff2.resize_as_(input[1])
# compute output scores:
self.output.resize_(input[0].size(0), self.weight.size(0))
for k in range(self.weight.size(0)):
torch.mm(input[0], self.weight[k], out=self.buff2)
self.buff2.mul_(input[1])
torch.sum(self.buff2, 1, True, out=self.output.narrow(1, k, 1))
if self.bias is not None:
self.output.add_(self.bias.view(1, self.bias.nelement()).expand_as(self.output))
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
self._assertInputGradOutput(input, gradOutput)
# compute d output / d input:
self.gradInput[0].resize_as_(input[0]).fill_(0)
self.gradInput[1].resize_as_(input[1]).fill_(0)
#: first slice of weight tensor (k = 1)
self.gradInput[0].addmm_(input[1], self.weight[0].t())
self.gradInput[0].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[0].size(0),
self.gradInput[0].size(1)))
self.gradInput[1].addmm_(input[0], self.weight[0])
self.gradInput[1].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[1].size(0),
self.gradInput[1].size(1)))
#: remaining slices of weight tensor
if self.weight.size(0) > 1:
if self.buff1 is None:
self.buff1 = input[0].new()
self.buff1.resize_as_(input[0])
for k in range(1, self.weight.size(0)):
torch.mm(input[1], self.weight[k].t(), out=self.buff1)
self.buff1.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[0].size(0),
self.gradInput[0].size(1)))
self.gradInput[0].add_(self.buff1)
torch.mm(input[0], self.weight[k], out=self.buff2)
self.buff2.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[1].size(0),
self.gradInput[1].size(1)))
self.gradInput[1].add_(self.buff2)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
self._assertInputGradOutput(input, gradOutput)
# make sure we have buffer:
if self.buff1 is None:
self.buff1 = input[0].new()
self.buff1.resize_as_(input[0])
# accumulate parameter gradients:
for k in range(self.weight.size(0)):
torch.mul(input[0], gradOutput.narrow(1, k, 1).expand_as(input[0]), out=self.buff1)
self.gradWeight[k].addmm_(self.buff1.t(), input[1])
if self.bias is not None:
self.gradBias.add_(scale, gradOutput.sum(0, keepdim=False))
def __repr__(self):
return str(type(self)) + \
'({}x{} -> {}) {}'.format(
self.weight.size(1), self.weight.size(2), self.weight.size(0),
(' without bias' if self.bias is None else '')
)
def clearState(self):
clear(self, 'buff1', 'buff2')
return super(Bilinear, self).clearState()

View File

@ -1,36 +0,0 @@
import torch
from .Module import Module
class CAddTable(Module):
def __init__(self, inplace=False):
super(CAddTable, self).__init__()
self.inplace = inplace
self.gradInput = []
def updateOutput(self, input):
if self.inplace:
self.output.set_(input[0])
else:
self.output.resize_as_(input[0]).copy_(input[0])
for i in range(1, len(input)):
self.output.add_(input[i])
return self.output
def updateGradInput(self, input, gradOutput):
for i in range(len(input)):
if i >= len(self.gradInput):
assert i == len(self.gradInput)
self.gradInput.append(input[0].new())
if self.inplace:
self.gradInput[i].set_(gradOutput)
else:
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
del self.gradInput[len(input):]
return self.gradInput

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class CDivTable(Module):
def __init__(self, ):
super(CDivTable, self).__init__()
self.gradInput = []
def updateOutput(self, input):
self.output.resize_as_(input[0]).copy_(input[0])
self.output.div_(input[1])
return self.output
def updateGradInput(self, input, gradOutput):
while len(self.gradInput) < 2:
self.gradInput.append(input[0].new())
gradOutput = gradOutput.contiguous().view_as(input[0])
self.gradInput[0].resize_as_(input[0]).copy_(gradOutput).div_(input[1])
self.gradInput[1].resize_as_(input[1]).zero_().addcdiv_(-1, self.gradInput[0], input[1]).mul_(input[0])
del self.gradInput[len(input):]
return self.gradInput

View File

@ -1,117 +0,0 @@
import math
import torch
from .Module import Module
from .utils import clear, contiguousView
class CMul(Module):
def __init__(self, *args):
super(CMul, self).__init__()
if len(args) == 1 and isinstance(args[0], torch.Size):
self.size = args[0]
else:
self.size = torch.Size(args)
self.weight = torch.Tensor(self.size)
self.gradWeight = torch.Tensor(self.size)
self.output.resize_(self.size)
self.reset()
self._output = None
self._weight = None
self._expand = None
self._repeat = None
self._gradOutput = None
self._gradInput = None
self._input = None
self._gradWeight = None
self._sum = None
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.nelement())
self.weight.uniform_(-stdv, stdv)
def updateOutput(self, input):
# lazy-initialize
if self._output is None:
self._output = input.new()
self._weight = input.new()
self._expand = input.new()
self._repeat = input.new()
self.output.resize_as_(input).copy_(input)
batchSize = input.size(0)
# TODO: expand_as_, view_
self._output = self.output.view(batchSize, -1)
self._weight = self.weight.view(1, -1)
self._expand = self._weight.expand_as(self._output)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat.resize_as_(self._expand).copy_(self._expand)
self._output.mul_(self._repeat)
else:
self._output.mul_(self._expand)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._gradOutput is None:
self._gradOutput = input.new()
self._gradInput = input.new()
self.gradInput.resize_as_(input).zero_()
batchSize = input.size(0)
contiguousView(self._gradOutput, gradOutput, batchSize, -1)
contiguousView(self._gradInput, self.gradInput, batchSize, -1)
self._weight = self.weight.view(1, -1)
self._expand = self._weight.expand_as(self._gradOutput)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat.resize_as_(self._expand).copy_(self._expand)
self._gradInput.addcmul_(1, self._repeat, self._gradOutput)
else:
self._gradInput.addcmul_(1, self._expand, self._gradOutput)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
if self._input is None:
self._input = input.new()
self._gradWeight = input.new()
self._sum = input.new()
batchSize = input.size(0)
contiguousView(self._input, input, batchSize, -1)
contiguousView(self._gradOutput, gradOutput, batchSize, -1)
self._gradWeight = self.gradWeight.view(1, -1)
torch.mul(self._input, self._gradOutput, out=self._repeat)
torch.sum(self._repeat, 0, True, out=self._sum)
self._gradWeight.add_(scale, self._sum)
def type(self, type=None, tensorCache=None):
if type:
self.clearState()
return super(CMul, self).type(type, tensorCache)
def clearState(self):
clear(self, [
'_input',
'_output',
'_weight',
'_gradWeight',
'_expand',
'_repeat',
'_sum',
])
return super(CMul, self).clearState()

View File

@ -1,49 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class CMulTable(Module):
def __init__(self, ):
super(CMulTable, self).__init__()
self.gradInput = []
def updateOutput(self, input):
self.output.resize_as_(input[0]).copy_(input[0])
for i in range(1, len(input)):
self.output.mul_(input[i])
return self.output
def updateGradInput_efficient(self, input, gradOutput):
if self.tout is None:
self.tout = input[0].new()
self.tout.resize_as_(self.output)
for i in range(len(input)):
if len(self.gradInput) <= i:
assert i == len(self.gradInput)
self.gradInput.append(input[0].new())
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
self.tout.copy_(self.output).div_(input[i])
self.gradInput[i].mul_(self.tout)
self.gradInput = self.gradInput[:len(input)]
return self.gradInput
def updateGradInput(self, input, gradOutput):
for i in range(len(input)):
if len(self.gradInput) <= i:
assert i == len(self.gradInput)
self.gradInput.append(input[0].new())
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
for j in range(len(input)):
if i != j:
self.gradInput[i].mul_(input[j])
self.gradInput = self.gradInput[:len(input)]
return self.gradInput
def clearState(self):
clear(self, 'tout')
return super(CMulTable, self).clearState()

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class CSubTable(Module):
def __init__(self, ):
super(CSubTable, self).__init__()
self.gradInput = [torch.Tensor(), torch.Tensor()]
def updateOutput(self, input):
self.output.resize_as_(input[0]).copy_(input[0])
self.output.add_(-1, input[1])
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput[0] is None:
self.gradInput[0] = input[0].new()
if self.gradInput[1] is None:
self.gradInput[1] = input[1].new()
self.gradInput[0].resize_as_(input[0]).copy_(gradOutput)
self.gradInput[1].resize_as_(input[1]).copy_(gradOutput).mul_(-1)
self.gradInput = self.gradInput[:2]
return self.gradInput

View File

@ -1,8 +0,0 @@
import torch
from .HardTanh import HardTanh
class Clamp(HardTanh):
def __init__(self, min_value, max_value):
super(Clamp, self,).__init__(min_value, max_value)

View File

@ -1,53 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class ClassNLLCriterion(Criterion):
def __init__(self, weights=None, sizeAverage=True, ignore_index=-100):
super(ClassNLLCriterion, self).__init__()
self.sizeAverage = sizeAverage
self.ignore_index = ignore_index
if weights is not None:
assert weights.dim() == 1
self.weights = weights
self.output_tensor = torch.zeros(1)
self.total_weight_tensor = torch.ones(1)
def updateOutput(self, input, target):
self.ignore_index = getattr(self, "ignore_index", -100)
target = target.long()
self._backend.ClassNLLCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
self.weights,
self.total_weight_tensor,
self.ignore_index,
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
self.gradInput.resize_as_(input).zero_()
target = target.long()
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.ClassNLLCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
self.weights,
self.total_weight_tensor,
self.ignore_index,
)
return self.gradInput

View File

@ -1,108 +0,0 @@
import math
import torch
from torch.nn.functional import _Reduction
from .MSECriterion import MSECriterion
"""
This file implements a criterion for multi-class classification.
It learns an embedding per class, where each class' embedding
is a point on an (N-1)-dimensional simplex, where N is
the number of classes.
For example usage of this class, look at.c/criterion.md
Reference: http.//arxiv.org/abs/1506.08230
"""
class ClassSimplexCriterion(MSECriterion):
def __init__(self, nClasses):
super(ClassSimplexCriterion, self).__init__()
self.nClasses = nClasses
# embedding the simplex in a space of dimension strictly greater than
# the minimum possible (nClasses-1) is critical for effective training.
simp = self._regsplex(nClasses - 1)
self.simplex = torch.cat((simp, torch.zeros(simp.size(0), nClasses - simp.size(1))), 1)
self._target = torch.Tensor(nClasses)
self.output_tensor = None
def _regsplex(self, n):
"""
regsplex returns the coordinates of the vertices of a
regular simplex centered at the origin.
The Euclidean norms of the vectors specifying the vertices are
all equal to 1. The input n is the dimension of the vectors;
the simplex has n+1 vertices.
input:
n # dimension of the vectors specifying the vertices of the simplex
output:
a # tensor dimensioned (n+1, n) whose rows are
vectors specifying the vertices
reference:
http.//en.wikipedia.org/wiki/Simplex#Cartesian_coordinates_for_regular_n-dimensional_simplex_in_Rn
"""
a = torch.zeros(n + 1, n)
for k in range(n):
# determine the last nonzero entry in the vector for the k-th vertex
if k == 0:
a[k][k] = 1
else:
a[k][k] = math.sqrt(1 - a[k:k + 1, 0:k + 1].norm() ** 2)
# fill_ the k-th coordinates for the vectors of the remaining vertices
c = (a[k][k] ** 2 - 1 - 1 / n) / a[k][k]
a[k + 1:n + 2, k:k + 1].fill_(c)
return a
# handle target being both 1D tensor, and
# target being 2D tensor (2D tensor means.nt: anything)
def _transformTarget(self, target):
assert target.dim() == 1
nSamples = target.size(0)
self._target.resize_(nSamples, self.nClasses)
for i in range(nSamples):
self._target[i].copy_(self.simplex[int(target[i])])
def updateOutput(self, input, target):
self._transformTarget(target)
assert input.nelement() == self._target.nelement()
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.MSECriterion_updateOutput(
self._backend.library_state,
input,
self._target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
assert input.nelement() == self._target.nelement()
implicit_gradOutput = torch.Tensor([1]).type(input.type())
self._backend.MSECriterion_updateGradInput(
self._backend.library_state,
input,
self._target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput
def getPredictions(self, input):
return torch.mm(input, self.simplex.t())
def getTopPrediction(self, input):
prod = self.getPredictions(input)
_, maxs = prod.max(prod.ndimension() - 1)
return maxs.view(-1)

View File

@ -1,106 +0,0 @@
import torch
from .Container import Container
class Concat(Container):
def __init__(self, dimension):
super(Concat, self).__init__()
self.outputSize = torch.Size()
self.dimension = dimension
def updateOutput(self, input):
outs = []
for i in range(len(self.modules)):
currentOutput = self.modules[i].updateOutput(input)
outs.append(currentOutput)
if i == 0:
size = list(currentOutput.size())
else:
size[self.dimension] += currentOutput.size(self.dimension)
self.outputSize = torch.Size(size)
self.output.resize_(self.outputSize)
offset = 0
for i, module in enumerate(self.modules):
currentOutput = outs[i]
self.output.narrow(self.dimension, offset, currentOutput.size(self.dimension)).copy_(currentOutput)
offset = offset + currentOutput.size(self.dimension)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input)
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
currentGradInput = module.updateGradInput(input, gradOutput.narrow(
self.dimension, offset, currentOutput.size(self.dimension)))
# if the module does not produce a gradInput (for example first layer),: ignore it and move on.
if currentGradInput:
if i == 0:
self.gradInput.copy_(currentGradInput)
else:
self.gradInput.add_(currentGradInput)
offset = offset + currentOutput.size(self.dimension)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
module.accGradParameters(
input,
gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)),
scale)
offset = offset + currentOutput.size(self.dimension)
def backward(self, input, gradOutput, scale=1):
self.gradInput.resize_as_(input)
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
currentGradInput = module.backward(input, gradOutput.narrow(
self.dimension, offset, currentOutput.size(self.dimension)), scale)
# if the module.es not produce a gradInput (for example first layer),: ignore it and move on.
if currentGradInput is not None:
if i == 0:
self.gradInput.copy_(currentGradInput)
else:
self.gradInput.add_(currentGradInput)
offset = offset + currentOutput.size(self.dimension)
return self.gradInput
def accUpdateGradParameters(self, input, gradOutput, lr):
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
module.accUpdateGradParameters(
input,
gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)),
lr)
offset = offset + currentOutput.size(self.dimension)
def __tostring__(self):
tab = ' '
line = '\n'
next = ' |`-> '
ext = ' | '
extlast = ' '
last = ' +. -> '
res = torch.type(self)
res += ' {' + line + tab + 'input'
for i in range(len(self.modules)):
if i == len(self.modules) - 1:
res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + extlast)
else:
res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + ext)
res += line + tab + last + 'output'
res += line + '}'
return res

View File

@ -1,112 +0,0 @@
import torch
from .Container import Container
class ConcatTable(Container):
def __init__(self, ):
super(ConcatTable, self).__init__()
self.modules = []
self.output = []
def updateOutput(self, input):
self.output = [module.updateOutput(input) for module in self.modules]
return self.output
def _map_list(self, l1, l2, f):
for i, v in enumerate(l2):
if isinstance(v, list):
res = self._map_list(l1[i] if i < len(l1) else [], v, f)
if i >= len(l1):
assert i == len(l1)
l1.append(res)
else:
l1[i] = res
else:
f(l1, i, v)
for i in range(len(l1) - 1, len(l2) - 1, -1):
del l1[i]
return l1
def _backward(self, method, input, gradOutput, scale=1):
isTable = isinstance(input, list)
wasTable = isinstance(self.gradInput, list)
if isTable:
for i, module in enumerate(self.modules):
if method == 'updateGradInput':
currentGradInput = module.updateGradInput(input, gradOutput[i])
elif method == 'backward':
currentGradInput = module.backward(input, gradOutput[i], scale)
if not isinstance(currentGradInput, list):
raise RuntimeError("currentGradInput is not a table!")
if len(input) != len(currentGradInput):
raise RuntimeError("table size mismatch")
if i == 0:
self.gradInput = self.gradInput if wasTable else []
def fn(l, i, v):
if i >= len(l):
assert len(l) == i
l.append(v.clone())
else:
l[i].resize_as_(v)
l[i].copy_(v)
self._map_list(self.gradInput, currentGradInput, fn)
else:
def fn(l, i, v):
if i < len(l):
l[i].add_(v)
else:
assert len(l) == i
l.append(v.clone())
self._map_list(self.gradInput, currentGradInput, fn)
else:
self.gradInput = self.gradInput if not wasTable else input.clone()
for i, module in enumerate(self.modules):
if method == 'updateGradInput':
currentGradInput = module.updateGradInput(input, gradOutput[i])
elif method == 'backward':
currentGradInput = module.backward(input, gradOutput[i], scale)
if i == 0:
self.gradInput.resize_as_(currentGradInput).copy_(currentGradInput)
else:
self.gradInput.add_(currentGradInput)
return self.gradInput
def updateGradInput(self, input, gradOutput):
return self._backward('updateGradInput', input, gradOutput)
def backward(self, input, gradOutput, scale=1):
return self._backward('backward', input, gradOutput, scale)
def accGradParameters(self, input, gradOutput, scale=1):
for i, module in ipairs(self.modules):
self.rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale)
def accUpdateGradParameters(self, input, gradOutput, lr):
for i, module in ipairs(self.modules):
self.rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr)
def __repr__(self):
tab = ' '
line = '\n'
next = ' |`-> '
ext = ' | '
extlast = ' '
last = ' +. -> '
res = torch.typename(self)
res = res + ' {' + line + tab + 'input'
for i in range(len(self.modules)):
if i == len(self.modules) - 1:
res = res + line + tab + next + '(' + str(i) + '): ' + \
str(self.modules[i]).replace(line, line + tab + extlast)
else:
res = res + line + tab + next + '(' + str(i) + '): ' + \
str(self.modules[i]).replace(line, line + tab + ext)
res = res + line + tab + last + 'output'
res = res + line + '}'
return res

View File

@ -1,66 +0,0 @@
import torch
from .Module import Module
from .utils import clear
from functools import wraps
import sys
class Container(Module):
def __init__(self, *args):
super(Container, self).__init__(*args)
self.modules = []
def add(self, module):
self.modules.append(module)
return self
def get(self, index):
return self.modules[index]
def size(self):
return len(self.modules)
def applyToModules(self, func):
for module in self.modules:
func(module)
def zeroGradParameters(self):
self.applyToModules(lambda m: m.zeroGradParameters())
def updateParameters(self, learningRate):
self.applyToModules(lambda m: m.updateParameters(learningRate))
def training(self):
self.applyToModules(lambda m: m.training())
super(Container, self).training()
def evaluate(self, ):
self.applyToModules(lambda m: m.evaluate())
super(Container, self).evaluate()
def share(self, mlp, *args):
for module, other_module in zip(self.modules, mlp.modules):
module.share(other_module, *args)
def reset(self, stdv=None):
self.applyToModules(lambda m: m.reset(stdv))
def parameters(self):
w = []
gw = []
for module in self.modules:
mparam = module.parameters()
if mparam is not None:
w.extend(mparam[0])
gw.extend(mparam[1])
if not w:
return
return w, gw
def clearState(self):
clear('output')
clear('gradInput')
for module in self.modules:
module.clearState()
return self

View File

@ -1,21 +0,0 @@
import torch
from .Module import Module
class Contiguous(Module):
def updateOutput(self, input):
if not input.is_contiguous():
self.output.resize_as_(input).copy_(input)
else:
self.output.set_(input)
return self.output
def updateGradInput(self, input, gradOutput):
if not gradOutput.is_contiguous():
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
else:
self.gradInput.set_(gradOutput)
return self.gradInput

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class Copy(Module):
def __init__(self, intype, outtype, dontCast=False):
self.dontCast = dontCast
super(Copy, self).__init__()
self.gradInput = intype()
self.output = outtype()
def updateOutput(self, input):
self.output.resize_(input.size()).copy_(input)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_(gradOutput.size()).copy_(gradOutput)
return self.gradInput
def type(self, type=None, tensorCache=None):
if type and self.dontCast:
return self
return super(Copy, self).type(self, type, tensorCache)

View File

@ -1,153 +0,0 @@
import math
import torch
from .Module import Module
from .utils import clear
class Cosine(Module):
def __init__(self, inputSize, outputSize):
super(Cosine, self).__init__()
self.weight = torch.Tensor(outputSize, inputSize)
self.gradWeight = torch.Tensor(outputSize, inputSize)
self.reset()
self._weight = None
self._sum = None
self._gradOutput = None
self._sum = None
self._weightNorm = None
self._inputNorm = None
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.size(0))
self.weight.uniform_(-stdv, stdv)
def updateOutput(self, input):
assert input.dim() == 2
inputSize = self.weight.size(1)
outputSize = self.weight.size(0)
if self._weightNorm is None:
self._weightNorm = self.weight.new()
if self._inputNorm is None:
self._inputNorm = self.weight.new()
# y_j = (w_j * x) / ( || w_j || * || x || )
torch.norm(self.weight, 2, 1, out=self._weightNorm, keepdim=True).add_(1e-12)
batchSize = input.size(0)
nelement = self.output.nelement()
self.output.resize_(batchSize, outputSize)
if self.output.nelement() != nelement:
self.output.zero_()
self.output.addmm_(0., 1., input, self.weight.t())
torch.norm(input, 2, 1, out=self._inputNorm, keepdim=True).add_(1e-12)
self.output.div_(self._weightNorm.view(1, outputSize).expand_as(self.output))
self.output.div_(self._inputNorm.expand_as(self.output))
return self.output
def updateGradInput(self, input, gradOutput):
assert input.dim() == 2
if self.gradInput is None:
return
inputSize = self.weight.size(1)
outputSize = self.weight.size(0)
"""
dy_j w_ji x_i
---- = ------------------- - y_j ---------
dx_i || w_j || * || x || || x ||^2
"""
nelement = self.gradInput.nelement()
self.gradInput.resize_as_(input)
if self.gradInput.nelement() != nelement:
self.gradInput.zero_()
inputNorm = self._inputNorm.expand_as(input)
weightNorm = self._weightNorm.view(1, outputSize).expand_as(gradOutput)
if self._gradOutput is None:
self._gradOutput = gradOutput.new()
if self._sum is None:
self._sum = input.new()
self.gradInput.copy_(input).div_(inputNorm)
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
self._gradOutput.mul_(self.output)
torch.sum(self._gradOutput, 1, out=self._sum, keepdim=True)
self.gradInput.mul_(self._sum.expand_as(input))
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
self._gradOutput.div_(weightNorm)
self.gradInput.addmm_(-1, 1, self._gradOutput, self.weight)
self.gradInput.div_(inputNorm)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
assert input.dim() == 2
inputSize = self.weight.size(1)
outputSize = self.weight.size(0)
"""
dy_j x_i w_ji
----- = ------------------- - y_j -----------
dw_ji || w_j || * || x || || w_j ||^2
"""
if self._weight is None:
self._weight = self.weight.new()
if self._sum is None:
self._sum = input.new()
self._weight.resize_as_(self.weight).copy_(self.weight)
if self._gradOutput is None:
self._gradOutput = gradOutput.new()
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
self._gradOutput.mul_(self.output)
torch.sum(self._gradOutput, 0, out=self._sum, keepdim=True)
grad = self._sum[0]
grad.div_(self._weightNorm.select(1, 0))
self._weight.mul_(grad.view(outputSize, 1).expand_as(self._weight))
input_ = self._gradOutput
input_.resize_as_(input).copy_(input)
input_.div_(self._inputNorm.expand_as(input))
self._weight.addmm_(-1, 1, gradOutput.t(), input_)
self._weight.div_(self._weightNorm.expand_as(self._weight))
self.gradWeight.add_(self._weight)
def type(self, type=None, tensorCache=None):
if type is not None:
# prevent premature memory allocations
self._input = None
self._weight = None
self._inputNorm = None
self._weightNorm = None
self._gradOutput = None
self._sum = None
return super(Cosine, self).type(type, tensorCache)
def clearState(self):
clear(self, [
'_input',
'_weight',
'_gradOutput',
'_sum',
'_inputNorm',
'_weightNorm',
])
return super(Cosine, self).clearState()

View File

@ -1,108 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class CosineDistance(Module):
def __init__(self, ):
super(CosineDistance, self).__init__()
self.gradInput = [torch.Tensor(), torch.Tensor()]
self._input1 = None
self._input2 = None
self.buffer = None
self.w1 = None
self.w22 = None
self.w = None
self.w32 = None
self.ones = None
def _makeContiguous(self, input1, input2):
if not input1.is_contiguous():
if self._input1 is None:
self._input1 = input1.new()
self._input1.resize_as_(input1).copy_(input1)
input1 = self._input1
if not input2.is_contiguous():
if self._input2 is None:
self._input2 = input2.new()
self._input2.resize_as_(input2).copy_(input2)
input2 = self._input2
return input1, input2
def updateOutput(self, input):
input1, input2 = input[0], input[1]
input1, input2 = self._makeContiguous(input1, input2)
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self.ones = input1.new()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
self.w22.reciprocal_()
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
self.w32.reciprocal_()
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self.output)
self.output.resize_(input1.size(0))
return self.output
def updateGradInput(self, input, gradOutput):
v1 = input[0]
v2 = input[1]
v1, v2 = self._makeContiguous(v1, v2)
if len(self.gradInput) != 2:
if self.gradInput[0] is None:
self.gradInput[0] = v1.new()
if self.gradInput[1] is None:
self.gradInput[1] = v1.new()
self.gradInput = self.gradInput[:2]
gw1 = self.gradInput[0]
gw2 = self.gradInput[1]
gw1.resize_as_(v1).copy_(v2)
gw2.resize_as_(v1).copy_(v1)
torch.mul(self.w1, self.w22, out=self.buffer)
gw1.addcmul_(-1, self.buffer.expand_as(v1), v1)
gw1.mul_(self.w.expand_as(v1))
torch.mul(self.w1, self.w32, out=self.buffer)
gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
gw2.mul_(self.w.expand_as(v1))
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
gw1.mul_(go)
gw2.mul_(go)
return self.gradInput
def clearState(self):
clear(self, [
'buffer',
'w1',
'w22',
'w',
'w32',
'ones',
])
return super(CosineDistance, self).clearState()

View File

@ -1,117 +0,0 @@
import torch
from .Criterion import Criterion
class CosineEmbeddingCriterion(Criterion):
def __init__(self, margin=0, sizeAverage=True):
super(CosineEmbeddingCriterion, self).__init__()
self.margin = margin
self.sizeAverage = sizeAverage
self.gradInput = [torch.Tensor(), torch.Tensor()]
self.buffer = None
self.w1 = None
self.w22 = None
self.w = None
self.w32 = None
self._outputs = None
self._idx = None
def updateOutput(self, input, y):
input1, input2 = input[0], input[1]
# keep backward compatibility
if self.buffer is None:
self.buffer = input1.new()
self.w1 = input1.new()
self.w22 = input1.new()
self.w = input1.new()
self.w32 = input1.new()
self._outputs = input1.new()
# comparison operators behave differently from cuda/c implementations
# TODO: verify name
if input1.type() == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
epsilon = 1e-12
torch.mul(input1, input1, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
# self._outputs is also used as a temporary buffer
self._outputs.resize_as_(self.w22).fill_(1)
torch.div(self._outputs, self.w22, out=self.w22)
self.w.resize_as_(self.w22).copy_(self.w22)
torch.mul(input2, input2, out=self.buffer)
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
torch.div(self._outputs, self.w32, out=self.w32)
self.w.mul_(self.w32)
self.w.sqrt_()
torch.mul(self.w1, self.w, out=self._outputs)
self._outputs = self._outputs.select(1, 0)
torch.eq(y, -1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
torch.eq(y, 1, out=self._idx)
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
self.output = self._outputs.sum().item()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, y):
v1 = input[0]
v2 = input[1]
gw1 = self.gradInput[0]
gw2 = self.gradInput[1]
gw1.resize_as_(v1).copy_(v2)
gw2.resize_as_(v1).copy_(v1)
torch.mul(self.w1, self.w22, out=self.buffer)
gw1.addcmul_(-1, self.buffer.expand_as(v1), v1)
gw1.mul_(self.w.expand_as(v1))
torch.mul(self.w1, self.w32, out=self.buffer)
gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
gw2.mul_(self.w.expand_as(v1))
# self._idx = self._outputs <= 0
torch.le(self._outputs, 0, out=self._idx)
self._idx = self._idx.view(-1, 1).expand(gw1.size())
gw1[self._idx] = 0
gw2[self._idx] = 0
torch.eq(y, 1, out=self._idx)
self._idx = self._idx.view(-1, 1).expand(gw2.size())
gw1[self._idx] = gw1[self._idx].mul_(-1)
gw2[self._idx] = gw2[self._idx].mul_(-1)
if self.sizeAverage:
gw1.div_(y.size(0))
gw2.div_(y.size(0))
return self.gradInput
def type(self, type=None, tensorCache=None):
if not type:
return self._type
self._idx = None
super(CosineEmbeddingCriterion, self).type(type, tensorCache)
# comparison operators behave differently from cuda/c implementations
if type == 'torch.cuda.FloatTensor':
self._idx = torch.cuda.ByteTensor()
else:
self._idx = torch.ByteTensor()
return self

View File

@ -1,44 +0,0 @@
import torch
from .Module import Module
from .utils import recursiveType
import torch._thnn
class Criterion(object):
def __init__(self):
self.gradInput = torch.Tensor()
self.output = 0
self._backend = torch._thnn.type2backend[self.gradInput.type()]
def updateOutput(self, input, target):
raise NotImplementedError
def forward(self, input, target):
return self.updateOutput(input, target)
def backward(self, input, target):
return self.updateGradInput(input, target)
def updateGradInput(self, input, target):
raise NotImplementedError
def clone(self):
raise NotImplementedError
def type(self, type, tensorCache=None):
# find all tensors and convert them
for key, param in self.__dict__.items():
setattr(self, key, recursiveType(param, type, tensorCache or {}))
self._backend = torch._thnn.type2backend[type]
return self
def float(self):
return self.type('torch.FloatTensor')
def double(self):
return self.type('torch.DoubleTensor')
def cuda(self):
return self.type('torch.cuda.FloatTensor')

View File

@ -1,18 +0,0 @@
import torch
from .Module import Module
class CriterionTable(Module):
def __init__(self, criterion):
super(CriterionTable, self).__init__()
self.criterion = criterion
self.gradInput = [criterion.gradInput]
def updateOutput(self, input):
self.output = self.criterion.updateOutput(*input)
return self.output
def updateGradInput(self, input, grad_output):
self.criterion.updateGradInput(*input)
return self.gradInput

View File

@ -1,29 +0,0 @@
import torch
from .Criterion import Criterion
from .LogSoftMax import LogSoftMax
from .ClassNLLCriterion import ClassNLLCriterion
class CrossEntropyCriterion(Criterion):
def __init__(self, weights=None):
super(CrossEntropyCriterion, self).__init__()
self.lsm = LogSoftMax()
self.nll = ClassNLLCriterion(weights)
def updateOutput(self, input, target):
input = input.squeeze()
target = target.squeeze()
self.lsm.updateOutput(input)
self.nll.updateOutput(self.lsm.output, target)
self.output = self.nll.output
return self.output
def updateGradInput(self, input, target):
size = input.size()
input = input.squeeze()
target = target.squeeze()
self.nll.updateGradInput(self.lsm.output, target)
self.lsm.updateGradInput(input, self.nll.gradInput)
self.gradInput = self.lsm.gradInput.view(size)
return self.gradInput

View File

@ -1,106 +0,0 @@
####################################
# DepthConcat
# Concatenates the output of Convolutions along the depth dimension
# (nOutputFrame). This is used to implement the DepthConcat layer
# of the Going deeper with convolutions paper :
# http.//arxiv.org/pdf/1409.4842v1.pdf
# The normal Concat Module can't be used since the spatial dimensions
# of tensors to be concatenated may have different values. To deal with
# this, we select the largest spatial dimensions and add zero-padding
# around the smaller dimensions.
####################################
import math
import torch
from .Concat import Concat
class DepthConcat(Concat):
def windowNarrow(self, output, currentOutput, offset):
outputWindow = output.narrow(self.dimension, offset, currentOutput.size(self.dimension))
for dim in range(len(self.outputSize)):
currentSize = currentOutput.size(dim)
if dim != self.dimension and self.outputSize[dim] != currentSize:
# 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side)
# 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side)
# 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad)
start = int(math.floor(((self.outputSize[dim] - currentSize) / 2)))
outputWindow = outputWindow.narrow(dim, start, currentSize)
return outputWindow
def updateOutput(self, input):
outs = []
for i in range(len(self.modules)):
currentOutput = self.modules[i].updateOutput(input)
outs.append(currentOutput)
if i == 0:
size = list(currentOutput.size())
else:
size[self.dimension] += currentOutput.size(self.dimension)
for dim in range(len(self.outputSize)):
if dim != self.dimension:
# take the maximum size (shouldn't change anything for batch dim)
size[dim] = max(size[dim], currentOutput.size(dim))
self.outputSize = torch.Size(size)
self.output.resize_(self.outputSize).zero_() # zero for padding
offset = 0
for i, module in enumerate(self.modules):
currentOutput = outs[i]
outputWindow = self.windowNarrow(self.output, currentOutput, offset)
outputWindow.copy_(currentOutput)
offset = offset + currentOutput.size(self.dimension)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input)
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
currentGradInput = module.updateGradInput(input, gradOutputWindow)
if i == 0:
self.gradInput.copy_(currentGradInput)
else:
self.gradInput.add_(currentGradInput)
offset += currentOutput.size(self.dimension)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
module.accGradParameters(input, gradOutputWindow, scale)
offset += currentOutput.size(self.dimension)
def backward(self, input, gradOutput, scale=1):
self.gradInput.resize_as_(input)
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
currentGradInput = module.backward(input, gradOutputWindow)
if i == 0:
self.gradInput.copy_(currentGradInput)
else:
self.gradInput.add_(currentGradInput)
offset = offset + currentOutput.size(self.dimension)
return self.gradInput
def accUpdateGradParameters(self, input, gradOutput, lr):
offset = 0
for i, module in enumerate(self.modules):
currentOutput = module.output
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
module.accUpdateGradParameters(input, gradOutputWindow, lr)
offset = offset + currentOutput.size(self.dimension)

View File

@ -1,38 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class DistKLDivCriterion(Criterion):
def __init__(self, sizeAverage=True):
super(DistKLDivCriterion, self).__init__()
self.sizeAverage = sizeAverage
self.output_tensor = torch.Tensor(1)
def updateOutput(self, input, target):
assert input.is_same_size(target)
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.DistKLDivCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
assert input.is_same_size(target)
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.DistKLDivCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,49 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class DotProduct(Module):
def __init__(self):
super(DotProduct, self).__init__()
self.gradInput = [torch.Tensor(), torch.Tensor()]
self.buffer = None
def updateOutput(self, input):
input1, input2 = input[0], input[1]
if self.buffer is None:
self.buffer = input1.new()
torch.mul(input1, input2, out=self.buffer)
torch.sum(self.buffer, 1, True, out=self.output)
self.output.resize_(input1.size(0))
return self.output
def updateGradInput(self, input, gradOutput):
v1 = input[0]
v2 = input[1]
not_batch = False
if len(self.gradInput) != 2:
if self.gradInput[0] is None:
self.gradInput[0] = input[0].new()
if self.gradInput[1] is None:
self.gradInput[1] = input[1].new()
self.gradInput = self.gradInput[:2]
gw1 = self.gradInput[0]
gw2 = self.gradInput[1]
gw1.resize_as_(v1).copy_(v2)
gw2.resize_as_(v2).copy_(v1)
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
gw1.mul_(go)
gw2.mul_(go)
return self.gradInput
def clearState(self):
clear(self, 'buffer')
return super(DotProduct, self).clearState()

View File

@ -1,48 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class Dropout(Module):
def __init__(self, p=0.5, inplace=False):
super(Dropout, self).__init__()
self.p = p
self.inplace = inplace
self.train = True
self.noise = torch.Tensor()
def updateOutput(self, input):
if self.inplace:
self.output.set_(input)
else:
self.output.resize_as_(input).copy_(input)
if self.p > 0 and self.train:
self.noise.resize_as_(input)
self.noise.bernoulli_(1 - self.p)
self.noise.div_(1 - self.p)
self.output.mul_(self.noise)
return self.output
def updateGradInput(self, input, gradOutput):
if self.inplace:
self.gradInput.set_(gradOutput)
else:
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
if self.p > 0 and self.train:
self.gradInput.mul_(self.noise) # simply mask the gradients with the noise vector
return self.gradInput
def setp(self, p):
self.p = p
def __repr__(self):
return super(Dropout, self).__repr__() + '({:.4f})'.format(self.p)
def clearState(self):
clear(self, 'noise')
return super(Dropout, self).clearState()

View File

@ -1,44 +0,0 @@
# -*- coding: utf8 -*-
import torch
from .Module import Module
class ELU(Module):
"""
Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter
Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
http.//arxiv.org/pdf/1511.07289.pdf
"""
def __init__(self, alpha=1., inplace=False):
assert type(alpha) == float
super(ELU, self).__init__()
self.alpha = alpha
self.inplace = inplace
def updateOutput(self, input):
self._backend.ELU_updateOutput(
self._backend.library_state,
input,
self.output,
self.alpha,
1.0,
1.0,
self.inplace
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.ELU_updateGradInput(
self._backend.library_state,
gradOutput,
self.gradInput,
self.output,
self.alpha,
1.0,
1.0
)
return self.gradInput
def __repr__(self):
return '{}(alpha={:.3f})'.format(str(type(self)), self.alpha)

View File

@ -1,172 +0,0 @@
import math
import torch
from .Module import Module
from .utils import clear
class Euclidean(Module):
def __init__(self, inputSize, outputSize):
super(Euclidean, self).__init__()
self.weight = torch.Tensor(inputSize, outputSize)
self.gradWeight = torch.Tensor(inputSize, outputSize)
# state
self.gradInput.resize_(inputSize)
self.output.resize_(outputSize)
self.fastBackward = True
self.reset()
self._input = None
self._weight = None
self._expand = None
self._expand2 = None
self._repeat = None
self._repeat2 = None
self._div = None
self._output = None
self._gradOutput = None
self._expand3 = None
self._sum = None
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.size(0))
self.weight.uniform_(-stdv, stdv)
def _view(self, res, src, *args):
if src.is_contiguous():
res.set_(src.view(*args))
else:
res.set_(src.contiguous().view(*args))
def updateOutput(self, input):
# lazy initialize buffers
if self._input is None:
self._input = input.new()
if self._weight is None:
self._weight = self.weight.new()
if self._expand is None:
self._expand = self.output.new()
if self._expand2 is None:
self._expand2 = self.output.new()
if self._repeat is None:
self._repeat = self.output.new()
if self._repeat2 is None:
self._repeat2 = self.output.new()
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
# y_j = || w_j - x || = || x - w_j ||
assert input.dim() == 2
batchSize = input.size(0)
self._view(self._input, input, batchSize, inputSize, 1)
self._expand = self._input.expand(batchSize, inputSize, outputSize)
# make the expanded tensor contiguous (requires lots of memory)
self._repeat.resize_as_(self._expand).copy_(self._expand)
self._weight = self.weight.view(1, inputSize, outputSize)
self._expand2 = self._weight.expand_as(self._repeat)
if torch.typename(input) == 'torch.cuda.FloatTensor':
# TODO: after adding new allocators this can be changed
# requires lots of memory, but minimizes cudaMallocs and loops
self._repeat2.resize_as_(self._expand2).copy_(self._expand2)
self._repeat.add_(-1, self._repeat2)
else:
self._repeat.add_(-1, self._expand2)
torch.norm(self._repeat, 2, 1, True, out=self.output)
self.output.resize_(batchSize, outputSize)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self._div is None:
self._div = input.new()
if self._output is None:
self._output = self.output.new()
if self._gradOutput is None:
self._gradOutput = input.new()
if self._expand3 is None:
self._expand3 = input.new()
if not self.fastBackward:
self.updateOutput(input)
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j -2 * (w_j - x) x - w_j
---- = ---------------- = -------
dx 2 || w_j - x || y_j
"""
# to prevent div by zero (NaN) bugs
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
self._view(self._gradOutput, gradOutput, gradOutput.size())
torch.div(gradOutput, self._output, out=self._div)
assert input.dim() == 2
batchSize = input.size(0)
self._div.resize_(batchSize, 1, outputSize)
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
if torch.typename(input) == 'torch.cuda.FloatTensor':
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
self._repeat2.mul_(self._repeat)
else:
torch.mul(self._repeat, self._expand3, out=self._repeat2)
torch.sum(self._repeat2, 2, True, out=self.gradInput)
self.gradInput.resize_as_(input)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
"""
dy_j 2 * (w_j - x) w_j - x
---- = --------------- = -------
dw_j 2 || w_j - x || y_j
"""
# assumes a preceding call to updateGradInput
assert input.dim() == 2
if self._sum is None:
self._sum = input.new()
torch.sum(self._repeat2, 0, True, out=self._sum)
self._sum.resize_(inputSize, outputSize)
self.gradWeight.add_(-scale, self._sum)
def type(self, type=None, tensorCache=None):
if type:
# prevent premature memory allocations
self.clearState()
return super(Euclidean, self).type(type, tensorCache)
def clearState(self):
clear(self, [
'_input',
'_output',
'_gradOutput',
'_weight',
'_div',
'_sum',
'_expand',
'_expand2',
'_expand3',
'_repeat',
'_repeat2',
])
return super(Euclidean, self).clearState()

View File

@ -1,11 +0,0 @@
import torch
from .Module import Module
class Exp(Module):
def updateOutput(self, input):
return torch.exp(input, out=self.output)
def updateGradInput(self, input, gradOutput):
return torch.mul(self.output, gradOutput, out=self.gradInput)

View File

@ -1,85 +0,0 @@
import torch
from .Module import Module
class FlattenTable(Module):
def __init__(self):
super(FlattenTable, self).__init__()
self.output = []
self.input_map = []
self.gradInput = []
def _flatten(self, output, input):
if isinstance(input, list):
input_map = []
# forward DFS order
for i in range(len(input)):
input_map.append(self._flatten(output, input[i]))
else:
input_map = len(output)
output.append(input)
return input_map
def _checkMapping(self, output, input, input_map):
if isinstance(input, list):
if len(input) != len(input_map):
return False
# forward DFS order
for i in range(len(input)):
if not self._checkMapping(output, input[i], input_map[i]):
return False
return True
else:
return output[input_map] is input
# During BPROP we have to build a gradInput with the same shape as the
# input. This is a recursive function to build up a gradInput
def _inverseFlatten(self, gradOutput, input_map):
if isinstance(input_map, list):
gradInput = []
for i in range(len(input_map)):
gradInput.append(self._inverseFlatten(gradOutput, input_map[i]))
return gradInput
else:
return gradOutput[input_map]
def updateOutput(self, input):
assert isinstance(input, list)
# to avoid updating rebuilding the flattened table every updateOutput call
# we will: a DFS pass over the existing output table and the inputs to
# see if it needs to be rebuilt.
if not self._checkMapping(self.output, input, self.input_map):
self.output = []
self.input_map = self._flatten(self.output, input)
return self.output
def updateGradInput(self, input, gradOutput):
assert isinstance(input, list)
assert isinstance(gradOutput, list)
# If the input changes between the updateOutput and updateGradInput call,
#: we may have to rebuild the input_map! However, let's assume that
# the input_map is valid and that forward has already been called.
# However, we should check that the gradInput is valid:
if not self._checkMapping(gradOutput, self.gradInput, self.input_map):
self.gradInput = self._inverseFlatten(gradOutput, self.input_map)
return self.gradInput
def type(self, type=None, tensorCache=None):
if not type:
return self._type
# This function just stores references so we don't need to do any type
# conversions. Just force the tables to be empty.
self.clearState()
def clearState(self):
self.input_map = []
return super(FlattenTable, self).clearState()

View File

@ -1,22 +0,0 @@
import torch
from .Module import Module
class GradientReversal(Module):
def __init__(self, lambd=1):
super(GradientReversal, self).__init__()
self.lambd = lambd
def setLambda(self, lambd):
self.lambd = lambd
def updateOutput(self, input):
self.output.set_(input)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(gradOutput)
self.gradInput.copy_(gradOutput)
self.gradInput.mul_(-self.lambd)
return self.gradInput

View File

@ -1,29 +0,0 @@
import torch
from .Module import Module
class HardShrink(Module):
def __init__(self, lambd=0.5):
assert type(lambd) == float
super(HardShrink, self).__init__()
self.lambd = lambd
def updateOutput(self, input):
self._backend.HardShrink_updateOutput(
self._backend.library_state,
input,
self.output,
self.lambd
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.HardShrink_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.lambd
)
return self.gradInput

View File

@ -1,35 +0,0 @@
import torch
from .Module import Module
class HardTanh(Module):
def __init__(self, min_value=-1, max_value=1, inplace=False):
super(HardTanh, self).__init__()
self.min_val = min_value
self.max_val = max_value
self.inplace = inplace
assert self.max_val > self.min_val
def updateOutput(self, input):
self._backend.HardTanh_updateOutput(
self._backend.library_state,
input,
self.output,
self.min_val,
self.max_val,
self.inplace
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.HardTanh_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.min_val,
self.max_val,
self.inplace
)
return self.gradInput

View File

@ -1,37 +0,0 @@
import torch
from .Criterion import Criterion
class HingeEmbeddingCriterion(Criterion):
def __init__(self, margin=1, sizeAverage=True):
super(HingeEmbeddingCriterion, self).__init__()
self.margin = margin
self.sizeAverage = sizeAverage
self.buffer = None
def updateOutput(self, input, y):
if self.buffer is None:
self.buffer = input.new()
self.buffer.resize_as_(input).copy_(input)
self.buffer[torch.eq(y, -1.)] = 0
self.output = self.buffer.sum().item()
self.buffer.fill_(self.margin).add_(-1, input)
self.buffer.clamp_(min=0)
self.buffer[torch.eq(y, 1.)] = 0
self.output = self.output + self.buffer.sum().item()
if self.sizeAverage:
self.output = self.output / input.nelement()
return self.output
def updateGradInput(self, input, y):
self.gradInput.resize_as_(input).copy_(y)
self.gradInput[torch.mul(torch.eq(y, -1), torch.gt(input, self.margin))] = 0
if self.sizeAverage:
self.gradInput.mul_(1. / input.nelement())
return self.gradInput

View File

@ -1,17 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class Identity(Module):
def updateOutput(self, input):
self.output = input
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput = gradOutput
return self.gradInput
def clearState(self):
clear(self, 'gradInput')

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class Index(Module):
def __init__(self, dimension):
super(Index, self).__init__()
self.dimension = dimension
self.gradInput = [self.gradInput]
def updateOutput(self, input):
t = input[0]
index = input[1]
torch.index_select(t, self.dimension, index, out=self.output)
return self.output
def updateGradInput(self, input, gradOutput):
t = input[0]
index = input[1]
gradInput = self.gradInput[0] # no gradient for the index tensor
gradInput.resize_as_(t).zero_()
gradInput.index_add_(self.dimension, index, gradOutput)
return self.gradInput

View File

@ -1,62 +0,0 @@
import torch
from .Module import Module
class JoinTable(Module):
def __init__(self, dimension):
super(JoinTable, self).__init__()
self.size = torch.Size()
self.dimension = dimension
self.gradInput = []
def _getPositiveDimension(self, input):
dimension = self.dimension
if dimension < 0:
dimension = input[0].dim() + dimension
return dimension
def updateOutput(self, input):
dim = self._getPositiveDimension(input)
for i in range(len(input)):
currentOutput = input[i]
if i == 0:
size = list(currentOutput.size())
else:
size[dim] += currentOutput.size(dim)
self.size = torch.Size(size)
self.output.resize_(self.size)
# TODO: use cat?
offset = 0
for i in range(len(input)):
currentOutput = input[i]
self.output.narrow(dim, offset, currentOutput.size(dim)).copy_(currentOutput)
offset += currentOutput.size(dim)
return self.output
def updateGradInput(self, input, gradOutput):
dim = self._getPositiveDimension(input)
for i in range(len(input)):
if len(self.gradInput) < i + 1:
self.gradInput.append(input[i].new())
self.gradInput[i].resize_as_(input[i])
self.gradInput = self.gradInput[:len(input)]
offset = 0
for i in range(len(input)):
currentOutput = input[i]
currentGradInput = gradOutput.narrow(dim, offset, currentOutput.size(dim))
self.gradInput[i].copy_(currentGradInput)
offset = offset + currentOutput.size(dim)
return self.gradInput
def type(self, type=None, tensorCache=None):
self.gradInput = []
return super(JoinTable, self).type(type, tensorCache)

View File

@ -1,36 +0,0 @@
import torch
from .Criterion import Criterion
from .utils import clear
class L1Cost(Criterion):
def __init__(self):
super(L1Cost, self).__init__()
self.output_tensor = torch.Tensor(1)
def updateOutput(self, input, target=None):
assert target is None
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.L1Cost_updateOutput(
self._backend.library_state,
input,
self.output_tensor
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target=None):
assert target is None
self._backend.L1Cost_updateGradInput(
self._backend.library_state,
input,
None,
self.gradInput
)
return self.gradInput
def clearState(self):
clear(self, 'output_tensor')
return super(L1Cost, self).clearState()

View File

@ -1,36 +0,0 @@
import torch
from .Criterion import Criterion
class L1HingeEmbeddingCriterion(Criterion):
def __init__(self, margin=1):
super(L1HingeEmbeddingCriterion, self).__init__()
self.margin = float(margin)
self.gradInput = [torch.Tensor(), torch.Tensor()]
def updateOutput(self, input, y):
self.output = float(input[0].dist(input[1], 1))
if y == -1:
self.output = max(0, self.margin - self.output)
return self.output
def _mathsign(t):
return 1 if x > 0 else -1
def updateGradInput(self, input, y):
self.gradInput[0].resize_as_(input[0])
self.gradInput[1].resize_as_(input[1])
self.gradInput[0].copy_(input[0])
self.gradInput[0].add_(-1, input[1])
dist = self.gradInput[0].norm(1)
self.gradInput[0].sign_()
if y == -1: # just to avoid a mul by 1
if dist > self.margin:
self.gradInput[0].zero_()
else:
self.gradInput[0].mul_(-1)
self.gradInput[1].zero_().add_(-1, self.gradInput[0])
return self.gradInput

View File

@ -1,37 +0,0 @@
import torch
from .Module import Module
# This module acts as an L1 latent state regularizer, adding the
# [gradOutput] to the gradient of the L1 loss. The [input] is copied to
# the [output].
class L1Penalty(Module):
def __init__(self, l1weight, sizeAverage=False, provideOutput=True):
super(L1Penalty, self).__init__()
self.l1weight = l1weight
self.sizeAverage = sizeAverage
self.provideOutput = provideOutput
def updateOutput(self, input):
m = self.l1weight
if self.sizeAverage:
m = m / input.nelement()
loss = m * input.norm(1)
self.loss = loss
self.output = input
return self.output
def updateGradInput(self, input, gradOutput):
m = self.l1weight
if self.sizeAverage:
m = m / input.nelement()
self.gradInput.resize_as_(input).copy_(input).sign_().mul_(m)
if self.provideOutput:
self.gradInput.add_(gradOutput)
return self.gradInput

View File

@ -1,43 +0,0 @@
import torch
from .Module import Module
class LeakyReLU(Module):
def __init__(self, negval=1 / 100, inplace=False):
super(LeakyReLU, self).__init__()
if isinstance(negval, bool):
inplace = negval
self.negval = 1 / 100
else:
self.negval = negval
# default for inplace is False
self.inplace = inplace
if self.negval < 0:
# TODO: warning here
self.inplace = False
def updateOutput(self, input):
self._backend.LeakyReLU_updateOutput(
self._backend.library_state,
input,
self.output,
self.negval,
self.inplace
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.LeakyReLU_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.negval,
self.inplace
)
return self.gradInput
def __repr__(self):
return str(type(self)) + '({:.4f})'.format(self.negval)

View File

@ -1,87 +0,0 @@
import math
import torch
from .Module import Module
from .utils import clear
class Linear(Module):
def __init__(self, inputSize, outputSize, bias=True):
super(Linear, self).__init__()
self.weight = torch.Tensor(outputSize, inputSize)
self.gradWeight = torch.Tensor(outputSize, inputSize)
self.bias = torch.Tensor(outputSize) if bias else None
self.gradBias = torch.Tensor(outputSize) if bias else None
self.reset()
self.addBuffer = None
def noBias(self):
self.bias = None
self.gradBias = None
return self
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.uniform_(-stdv, stdv)
return self
def _updateAddBuffer(self, input):
nframe = input.size(0)
if self.addBuffer is None:
self.addBuffer = input.new()
if self.addBuffer.nelement() != nframe:
self.addBuffer.resize_(nframe).fill_(1)
def updateOutput(self, input):
assert input.dim() == 2
nframe = input.size(0)
nelement = self.output.nelement()
self.output.resize_(nframe, self.weight.size(0))
if self.output.nelement() != nelement:
self.output.zero_()
self._updateAddBuffer(input)
self.output.addmm_(0, 1, input, self.weight.t())
if self.bias is not None:
self.output.addr_(self.addBuffer, self.bias)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
nelement = self.gradInput.nelement()
self.gradInput.resize_as_(input)
if self.gradInput.nelement() != nelement:
self.gradInput.zero_()
assert input.dim() == 2
self.gradInput.addmm_(0, 1, gradOutput, self.weight)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
assert input.dim() == 2
self.gradWeight.addmm_(scale, gradOutput.t(), input)
if self.bias is not None:
# update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput
self._updateAddBuffer(input)
self.gradBias.addmv_(scale, gradOutput.t(), self.addBuffer)
def clearState(self):
clear(self, 'addBuffer')
return super(Linear, self).clearState()
def __repr__(self):
return super(Linear, self).__repr__() + \
'({} -> {})'.format(self.weight.size(1), self.weight.size(0)) + \
(' without bias' if self.bias is None else '')

View File

@ -1,18 +0,0 @@
import torch
from .Module import Module
class Log(Module):
def updateOutput(self, input):
self.output.resize_as_(input)
self.output.copy_(input)
self.output.log_()
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input)
self.gradInput.fill_(1)
self.gradInput.div_(input)
self.gradInput.mul_(gradOutput)
return self.gradInput

View File

@ -1,35 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class LogSigmoid(Module):
def __init__(self):
super(LogSigmoid, self).__init__()
self.buffer = None
def updateOutput(self, input):
if self.buffer is None:
self.buffer = input.new()
self._backend.LogSigmoid_updateOutput(
self._backend.library_state,
input,
self.output,
self.buffer
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.LogSigmoid_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.buffer
)
return self.gradInput
def clearState(self):
clear(self, 'buffer')
return super(LogSigmoid, self).clearState()

View File

@ -1,29 +0,0 @@
import torch
from .Module import Module
class LogSoftMax(Module):
def __init__(self, dim=None):
super(LogSoftMax, self).__init__()
if dim is not None:
self.dim = dim
def _get_dim(self, input):
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
def updateOutput(self, input):
self.output = torch.log_softmax(
input,
self._get_dim(input)
)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput = torch.log_softmax_backward_data(
gradOutput,
self.output,
self._get_dim(input),
input
)
return self.gradInput

View File

@ -1,152 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class LookupTable(Module):
def __init__(self, nIndex, nOutput, paddingValue=-1, maxNorm=None, normType=None):
super(LookupTable, self).__init__()
self.weight = torch.Tensor(nIndex, nOutput)
self.gradWeight = torch.Tensor(nIndex, nOutput).zero_()
self.paddingValue = paddingValue
self.maxNorm = maxNorm
self.normType = normType
self.shouldScaleGradByFreq = False
self._gradOutput = None
self._sorted = None
self._indices = None
self._count = torch.IntTensor()
self._input = torch.LongTensor()
self.reset()
def accUpdateOnly(self):
self.gradWeight = None
return self
def setPadding(self, paddingValue):
self.paddingValue = paddingValue
return self
def setMaxNorm(self, maxNorm):
self.maxNorm = maxNorm
return self
def setNormType(self, normType):
self.normType = normType
return self
def scaleGradByFreq(self):
self.shouldScaleGradByFreq = True
return self
def reset(self, stdv=1):
self.weight.normal_(0, stdv)
def _makeInputContiguous(self, input):
# make sure input is a contiguous torch.LongTensor
if not input.is_contiguous() or input.type() != self._input.type():
self.copiedInput = True
self._input.resize_(input.size()).copy_(input)
return self._input
else:
self.copiedInput = False
return input
def updateOutput(self, input):
self.renorm(input)
input = self._makeInputContiguous(input)
if input.dim() == 1:
torch.index_select(self.weight, 0, input, out=self.output)
elif input.dim() == 2:
torch.index_select(self.weight, 0, input.view(-1), out=self.output)
self.output = self.output.view(input.size(0), input.size(1), self.weight.size(1))
else:
raise RuntimeError("input must be a vector or matrix")
return self.output
def updateGradInput(self, input, gradOutput):
# the input can be of any type (as in the forward it's
# converted anyway to LongTensor) thus, need to allocate
# new memory each time the user changes the input type
if self.gradInput.type() != input.type():
self.gradInput = input.new()
if not self.gradInput.is_same_size(input):
self.gradInput.resize_as_(input).zero_()
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
input = self._input if self.copiedInput else input
if input.dim() == 2:
input = input.view(-1)
elif input.dim() != 1:
raise RuntimeError("input must be a vector or matrix")
if not gradOutput.is_contiguous():
if self._gradOutput is None:
self._gradOutput = gradOutput.new()
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
gradOutput = self._gradOutput
self._backend.LookupTable_accGradParameters(
self._backend.library_state,
input,
gradOutput,
self.gradWeight,
self._count,
self._sorted,
self._indices,
self.shouldScaleGradByFreq,
self.paddingValue or 0,
scale
)
def renorm(self, input):
if self.maxNorm is None:
return
# copy input into _input, so _input is continuous.
# The copied _input will be modified in the C code.
self._input.resize_(input.size()).copy_(input)
row_idx = self._input
if row_idx.dim() == 2:
row_idx = row_idx.view(-1)
elif row_idx.dim() != 1:
raise RuntimeError("input must be a vector or matrix")
# "row_idx" and "weight" will be modified in the C code
self._backend.LookupTable_renorm(
self._backend.library_state,
row_idx,
self.weight,
self.maxNorm,
self.normType or 2
)
def type(self, type=None, tensorCache=None):
if type is None:
return self._type
super(LookupTable, self).type(type, tensorCache)
if type == 'torch.cuda.FloatTensor':
# CUDA uses _sorted and _indices temporary tensors
self._sorted = torch.cuda.LongTensor()
self._indices = torch.cuda.LongTensor()
self._count = torch.cuda.LongTensor()
self._input = torch.cuda.LongTensor()
else:
# self._count and self._input should only be converted if using Cuda
self._count = torch.IntTensor()
self._input = torch.LongTensor()
return self
def clearState(self):
clear(self, '_count', '_input', '_sorted', '_indices', '_gradOutput')
return super(LookupTable, self).clearState()

View File

@ -1,72 +0,0 @@
import torch
from .Module import Module
class MM(Module):
def __init__(self, transA=False, transB=False):
super(MM, self).__init__()
self.transA = transA
self.transB = transB
self.gradInput = [torch.Tensor(), torch.Tensor()]
def updateOutput(self, input):
assert len(input) == 2
a, b = input
assert a.ndimension() == 2 or a.ndimension() == 3
assert a.dim() == b.dim()
if a.ndimension() == 2:
if self.transA:
a = a.t()
if self.transB:
b = b.t()
self.output.resize_(a.size(0), b.size(1))
torch.mm(a, b, out=self.output)
else:
if self.transA:
a = a.transpose(1, 2)
if self.transB:
b = b.transpose(1, 2)
self.output.resize_(a.size(0), a.size(1), b.size(2))
torch.bmm(a, b, out=self.output)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput[0] is None:
self.gradInput[0] = input[0].new()
if self.gradInput[1] is None:
self.gradInput[1] = input[1].new()
assert len(input) == 2
a, b = input
self.gradInput[0].resize_as_(a)
self.gradInput[1].resize_as_(b)
assert gradOutput.ndimension() == 2 or gradOutput.ndimension() == 3
assert a.dim() == b.dim() == gradOutput.dim()
if gradOutput.ndimension() == 2:
h_dim, w_dim = 0, 1
f = "mm"
else:
h_dim, w_dim = 1, 2
f = "bmm"
if self.transA == self.transB:
a = a.transpose(h_dim, w_dim)
b = b.transpose(h_dim, w_dim)
if self.transA:
getattr(torch, f)(b, gradOutput.transpose(h_dim, w_dim), out=self.gradInput[0])
else:
getattr(torch, f)(gradOutput, b, out=self.gradInput[0])
if self.transB:
getattr(torch, f)(gradOutput.transpose(h_dim, w_dim), a, out=self.gradInput[1])
else:
getattr(torch, f)(a, gradOutput, out=self.gradInput[1])
return self.gradInput

View File

@ -1,37 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class MSECriterion(Criterion):
def __init__(self, sizeAverage=True):
super(MSECriterion, self).__init__()
self.sizeAverage = sizeAverage
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.MSECriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
implicit_gradOutput = torch.Tensor([1]).type(input.type())
self._backend.MSECriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,67 +0,0 @@
import torch
from .Module import Module
class MV(Module):
"""Module to perform matrix vector multiplication on two minibatch inputs,
producing a minibatch.
"""
def __init__(self, trans=False):
super(MV, self).__init__()
self.trans = trans
self.gradInput = [torch.Tensor(), torch.Tensor()]
def updateOutput(self, input):
M, v = input
assert M.ndimension() == 2 or M.ndimension() == 3
if M.ndimension() == 2:
assert v.ndimension() == 1
if self.trans:
M = M.transpose(0, 1)
self.output.resize_(M.size(0))
torch.mv(M, v, out=self.output)
else:
assert v.ndimension() == 2
if self.trans:
M = M.transpose(1, 2)
self.output.resize_(M.size(0), M.size(1), 1)
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
return self.output
def updateGradInput(self, input, gradOutput):
M, v = input
self.gradInput[0].resize_as_(M)
self.gradInput[1].resize_as_(v)
gradOutput = gradOutput.contiguous()
assert gradOutput.ndimension() == 1 or gradOutput.ndimension() == 2
if gradOutput.ndimension() == 2:
assert M.ndimension() == 3
assert v.ndimension() == 2
bdim = M.size(0)
odim = M.size(1)
idim = M.size(2)
if self.trans:
torch.bmm(v.view(bdim, odim, 1), gradOutput.view(bdim, 1, idim), out=self.gradInput[0])
torch.bmm(M, gradOutput.view(bdim, idim, 1), out=self.gradInput[1].view(bdim, odim, 1))
else:
torch.bmm(gradOutput.view(bdim, odim, 1), v.view(bdim, 1, idim), out=self.gradInput[0])
torch.bmm(M.transpose(1, 2), gradOutput.view(bdim, odim, 1), out=self.gradInput[1].view(bdim, idim, 1))
else:
assert M.ndimension() == 2
assert v.ndimension() == 1
if self.trans:
torch.ger(v, gradOutput, out=self.gradInput[0])
self.gradInput[1] = M * gradOutput
else:
torch.ger(gradOutput, v, out=self.gradInput[0])
self.gradInput[1] = M.t() * gradOutput
return self.gradInput

View File

@ -1,36 +0,0 @@
import torch
from .Criterion import Criterion
class MarginCriterion(Criterion):
def __init__(self, margin=1, sizeAverage=True):
super(MarginCriterion, self).__init__()
self.sizeAverage = True
self.margin = margin
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.MarginCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
self.sizeAverage,
self.margin
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
self._backend.MarginCriterion_updateGradInput(
self._backend.library_state,
input,
target,
self.gradInput,
self.sizeAverage,
self.margin
)
return self.gradInput

View File

@ -1,75 +0,0 @@
import torch
from .Criterion import Criterion
class MarginRankingCriterion(Criterion):
def __init__(self, margin=0, sizeAverage=True):
super(MarginRankingCriterion, self).__init__()
self.margin = margin
self.sizeAverage = sizeAverage
self.gradInput = [torch.Tensor(), torch.Tensor()]
self._output = None
self.dist = None
self.mask = None
def updateOutput(self, input, y):
if input[0].size(0) == 1:
self.output = max(0, -y * (input[0][0] - input[1][0]) + self.margin)
else:
if self._output is None:
self._output = input[0].clone()
self._output.resize_as_(input[0])
self._output.copy_(input[0])
self._output.add_(-1, input[1])
self._output.mul_(-1).mul_(y)
self._output.add_(self.margin)
self._output.clamp_(min=0)
self.output = self._output.sum().item()
if self.sizeAverage:
self.output = self.output / y.size(0)
return self.output
def updateGradInput(self, input, y):
if input[0].size(0) == 1:
dist = -y * (input[0][0] - input[1][0]) + self.margin
if dist < 0:
self.gradInput[0][0] = 0
self.gradInput[1][0] = 0
else:
self.gradInput[0][0] = -y
self.gradInput[1][0] = y
else:
if self.dist is None:
self.dist = input[0].new()
self.dist = self.dist.resize_as_(input[0]).copy_(input[0])
dist = self.dist
dist.add_(-1, input[1])
dist.mul_(-1).mul_(y)
dist.add_(self.margin)
self.mask = dist > 0
mask = self.mask
torch.ge(dist, 0, out=mask)
self.gradInput[0].resize_(dist.size())
self.gradInput[1].resize_(dist.size())
self.gradInput[0].copy_(mask)
self.gradInput[0].mul_(-1).mul_(y)
self.gradInput[1].copy_(mask)
self.gradInput[1].mul_(y)
if self.sizeAverage:
self.gradInput[0].div_(y.size(0))
self.gradInput[1].div_(y.size(0))
return self.gradInput

View File

@ -1,64 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class MaskedSelect(Module):
def __init__(self):
super(MaskedSelect, self).__init__()
self._maskIndices = torch.LongTensor()
self._maskIndexBuffer = torch.LongTensor()
self._maskIndexBufferCPU = torch.FloatTensor()
self._gradBuffer = torch.Tensor()
self._gradMask = torch.ByteTensor()
def updateOutput(self, input):
input, mask = input
torch.masked_select(input, mask, out=self.output)
return self.output
def updateGradInput(self, input, gradOutput):
input, mask = input
if input.type() == 'torch.cuda.FloatTensor':
torch.arange(0, mask.nelement(), out=self._maskIndexBufferCPU).resize_(mask.size())
self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU)
else:
torch.arange(0, mask.nelement(), out=self._maskIndexBuffer).resize_(mask.size())
torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices)
self._gradBuffer.resize_(input.nelement()).zero_()
self._gradBuffer.scatter_(0, self._maskIndices, gradOutput)
self._gradBuffer.resize_(input.size())
self.gradInput = [self._gradBuffer, self._gradMask.resize_(mask.size()).fill_(0)]
return self.gradInput
def type(self, type=None, tensorCache=None):
if type is None:
return self._type
self._gradBuffer = self._gradBuffer.type(type)
self.gradInput = self.gradInput.type(type)
self.output = self.output.type(type)
# These casts apply when switching between cuda/non-cuda types
if type != 'torch.cuda.FloatTensor':
self._maskIndexBuffer = self._maskIndexBuffer.long()
self._maskIndices = self._maskIndices.long()
self._gradMask = self._gradMask.byte()
else:
self._maskIndexBuffer = self._maskIndexBuffer.cuda()
self._maskIndices = self._maskIndices.cuda()
self._gradMask = self._gradMask.cuda()
self._type = type
return self
def clearState(self):
return clear(self, ['output',
'gradInput',
'_maskIndexBuffer',
'_maskIndexBufferCPU',
'_maskIndices',
'_gradBuffer',
'_gradMask'])

View File

@ -1,67 +0,0 @@
import torch
from .Module import Module
from .utils import clear, addSingletondimension
class Max(Module):
def __init__(self, dimension=0):
super(Max, self).__init__()
self.dimension = dimension
self._output = None
self._indices = None
def _getPositiveDimension(self, input):
dimension = self.dimension
if dimension < 0:
dimension = input.dim() + dimension
return dimension
def _lazyInit(self):
if self._output is None:
self._output = self.output.new()
if self._indices is None:
self._indices = \
(torch.cuda.LongTensor() if self.output.is_cuda else torch.LongTensor())
def updateOutput(self, input):
self._lazyInit()
dimension = self._getPositiveDimension(input)
torch.max(input, dimension, out=(self._output, self._indices), keepdim=True)
if input.dim() > 1:
self.output.set_(self._output.select(dimension, 0))
else:
self.output.set_(self._output)
return self.output
def updateGradInput(self, input, gradOutput):
self._lazyInit()
dimension = self._getPositiveDimension(input)
if input.dim() > 1:
gradOutputView = addSingletondimension(gradOutput, dimension)
else:
gradOutputView = gradOutput
self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView)
return self.gradInput
def type(self, type, tensorCache=None):
# torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
if type == 'torch.cuda.FloatTensor':
indices, self._indices = self._indices, None
super(Max, self).type(type, tensorCache)
self._indices = indices.type('torch.cuda.LongTensor') if indices is not None else None
else:
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
# unnecessary memory allocations.
indices, self._indices = self._indices, None
super(Max, self).type(type, tensorCache)
self._indices = indices.long() if indices is not None else None
return self
def clearState(self):
clear(self, '_indices', '_output')
return super(Max, self).clearState()

View File

@ -1,16 +0,0 @@
import torch
from .Sum import Sum
"""
This file is still here because of backward compatibility.
Please use instead "nn.Sum(dimension, nInputDims, sizeAverage)"
"""
class Mean(Sum):
def __init__(self, dimension):
super(Mean, self).__init__(dimension, True)

View File

@ -1,68 +0,0 @@
import torch
from .Module import Module
from .utils import clear, addSingletondimension
class Min(Module):
def __init__(self, dimension=0):
super(Min, self).__init__()
self.dimension = dimension
self._output = None
self._indices = None
def _getPositiveDimension(self, input):
dimension = self.dimension
if dimension < 0:
dimension = input.dim() + dimension
return dimension
def _lazyInit(self):
if self._output is None:
self._output = self.output.new()
if self._indices is None:
self._indices = \
(torch.cuda.LongTensor() if self.output.type() == 'torch.cuda.FloatTensor'
else torch.LongTensor())
def updateOutput(self, input):
self._lazyInit()
dimension = self._getPositiveDimension(input)
torch.min(input, dimension, out=(self._output, self._indices), keepdim=True)
if input.dim() > 1:
self.output.set_(self._output.select(dimension, 0))
else:
self.output.set_(self._output)
return self.output
def updateGradInput(self, input, gradOutput):
self._lazyInit()
dimension = self._getPositiveDimension(input)
if input.dim() > 1:
gradOutputView = addSingletondimension(gradOutput, dimension)
else:
gradOutputView = gradOutput
self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView)
return self.gradInput
def type(self, type, tensorCache=None):
# torch.min expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
if type == 'torch.cuda.FloatTensor':
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.type('torch.cuda.LongTensor') if indices is not None else None
else:
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
# unnecessary memory allocations.
indices, self._indices = self._indices, None
super(Min, self).type(type, tensorCache)
self._indices = indices.long() if indices is not None else None
return self
def clearState(self):
clear(self, '_indices', '_output')
return super(Min, self).clearState()

View File

@ -1,168 +0,0 @@
import torch
from .Module import Module
from .utils import clear, recursiveResizeAs
class MixtureTable(Module):
def __init__(self, dim=1):
super(MixtureTable, self).__init__()
self.dim = dim
self.size = torch.Size()
self.size2 = torch.Size()
self.batchSize = 0
self.backwardSetup = False
self.gradInput = []
self._gaterView = None
self._expert = None
self._expertView = None
self._sum = None
self._expertView2 = None
self._expert2 = None
self.table = False
def updateOutput(self, input):
gaterInput, expertInputs = input
# buffers
if self._gaterView is None:
self._gaterView = input[0].new()
if self._expert is None:
self._expert = input[0].new()
if self._expertView is None:
self._expertView = input[0].new()
self.dimG = 1
batchSize = gaterInput.size(0)
if self.table or isinstance(expertInputs, list):
self.table = True
if gaterInput.size(self.dimG) != len(expertInputs):
raise RuntimeError("Should be one gater output per expert")
expertInput = expertInputs[0]
if self.batchSize != batchSize:
size = [1] * (expertInput.dim() + 1)
if self.dimG > 0:
size[0] = gaterInput.size(0)
size[self.dim] = gaterInput.size(self.dimG)
self.size = torch.Size(size)
self.output.resize_as_(expertInput)
self.backwardSetup = False
self.batchSize = batchSize
self._gaterView = gaterInput.view(self.size)
self.output.zero_()
# multiply accumulate gater outputs by their commensurate expert
for i, expertInput in enumerate(expertInputs):
gate = self._gaterView.select(self.dim, i).expand_as(expertInput)
self.output.addcmul_(expertInput, gate)
else:
if self.batchSize != batchSize:
size = [1] * expertInputs.dim()
if self.dimG > 0:
size[0] = gaterInput.size(0)
size[self.dim] = gaterInput.size(self.dimG)
self.size = torch.Size(size)
self.output.resize_as_(expertInputs.select(self.dim, 0))
self.batchSize = batchSize
self.backwardSetup = False
self._gaterView = gaterInput.view(self.size)
torch.mul(self._gaterView.expand_as(expertInputs), expertInputs, out=self._expert)
torch.sum(self._expert, self.dim, True, out=self.output)
self.output.resize_as_(expertInputs.select(self.dim, 0))
return self.output
def updateGradInput(self, input, gradOutput):
gaterInput, expertInputs = input
recursiveResizeAs(self.gradInput, input)
gaterGradInput, expertGradInputs = self.gradInput
# buffers
if self._sum is None:
self._sum = input[0].new()
if self._expertView2 is None:
self._expertView2 = input[0].new()
if self._expert2 is None:
self._expert2 = input[0].new()
if self.table:
if not self.backwardSetup:
for i, expertInput in enumerate(expertInputs):
expertGradInput = expertGradInputs[i] or expertInput.clone()
expertGradInput.resize_as_(expertInput)
expertGradInputs[i] = expertGradInput
gaterGradInput.resize_as_(gaterInput)
self.backwardSetup = True
# like CMulTable, but with broadcasting
for i, expertGradInput in enumerate(expertGradInputs):
# gater updateGradInput
torch.mul(gradOutput, expertInputs[i], out=self._expert)
if self.dimG == 0:
self._expertView = self._expert.view(-1)
else:
self._expertView = self._expert.view(gradOutput.size(0), -1)
torch.sum(self._expertView, self.dimG, True, out=self._sum)
if self.dimG == 0:
gaterGradInput[i] = self._sum.select(self.dimG, 0)
else:
gaterGradInput.select(self.dimG, i).copy_(self._sum.select(self.dimG, 0))
# expert updateGradInput
gate = self._gaterView.select(self.dim, i).expand_as(expertGradInput)
expertGradInput.mul_(gate, gradOutput)
else:
if not self.backwardSetup:
size2 = list(expertInputs.size())
size2[self.dim] = 1
self.size2 = torch.Size(size2)
gaterGradInput.resize_as_(gaterInput)
self.backwardSetup = True
# gater updateGradInput
self._expertView = gradOutput.contiguous().view(torch.Size(self.size2))
gradOutput = self._expertView.expand_as(expertInputs)
torch.mul(gradOutput, expertInputs, out=self._expert)
expert = self._expert.transpose(self.dim, self.dimG)
if not expert.is_contiguous():
self._expert2.resize_as_(expert)
self._expert2.copy_(expert)
expert = self._expert2
if self.dimG == 0:
self._expertView2 = expert.view(gaterInput.size(0), -1)
else:
self._expertView2 = expert.view(gaterInput.size(0), gaterInput.size(1), -1)
torch.sum(self._expertView2, self.dimG + 1, True, out=gaterGradInput)
gaterGradInput.resize_as_(gaterInput)
# expert updateGradInput
torch.mul(self._gaterView.expand_as(expertInputs), gradOutput, out=expertGradInputs)
return self.gradInput
def type(self, type, tensorCache=None):
self._gaterView = None
self._expert = None
self._expertView = None
self._sum = None
self._expert2 = None
self._expertView2 = None
return super(MixtureTable, self).type(type, tensorCache)
def clearState(self, ):
clear(self, [
'_gaterView',
'_expert',
'_expertView',
'_sum',
'_expert2',
'_expertView2',
])
return super(MixtureTable, self).clearState()

View File

@ -1,296 +0,0 @@
import torch
import torch._thnn
from .utils import clear, recursiveType
class Module(object):
def __init__(self):
self.gradInput = torch.Tensor()
self.output = torch.Tensor()
self._type = self.output.type()
self._backend = torch._thnn.type2backend[self.output.type()]
def __repr__(self):
return 'nn.' + self.__class__.__name__
def parameters(self):
has_weight = hasattr(self, 'weight') and self.weight is not None
has_bias = hasattr(self, 'bias') and self.bias is not None
if has_weight and has_bias:
return [self.weight, self.bias], [self.gradWeight, self.gradBias]
elif has_weight:
return [self.weight], [self.gradWeight]
elif has_bias:
return [self.bias], [self.gradBias]
else:
return
def updateOutput(self, input):
return self.output
def forward(self, input):
return self.updateOutput(input)
def backward(self, input, gradOutput, scale=1):
self.updateGradInput(input, gradOutput)
self.accGradParameters(input, gradOutput, scale)
return self.gradInput
def backwardUpdate(self, input, gradOutput, lr):
self.updateGradInput(input, gradOutput)
self.accUpdateGradParameters(input, gradOutput, lr)
return self.gradInput
def updateGradInput(self, input, gradOutput):
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
pass
def accUpdateGradParameters(self, input, gradOutput, lr):
has_weight = hasattr(self, 'weight') and self.weight is not None
has_bias = hasattr(self, 'bias') and self.bias is not None
if has_weight:
gradWeight = self.gradWeight
self.gradWeight = self.weight
if has_bias:
gradBias = self.gradBias
self.gradBias = self.bias
self.accGradParameters(input, gradOutput, -lr)
if has_weight:
self.gradWeight = gradWeight
if has_bias:
self.gradBias = gradBias
def sharedAccUpdateGradParameters(self, input, gradOutput, lr):
if self.parameters():
self.zeroGradParameters()
self.accGradParameters(input, gradOutput, 1)
self.updateParameters(lr)
def zeroGradParameters(self):
params = self.parameters()
if params is not None:
for grad in params[1]:
grad.zero_()
def updateParameters(self, learningRate):
if self.parameters() is not None:
params, gradParams = self.parameters()
if params:
for p, gp in zip(params, gradParams):
p.add_(-learningRate, gp)
def training(self):
self.train = True
def evaluate(self):
self.train = False
# TODO
def share(self, mlp, *arg):
raise NotImplementedError
def clone(self, *arg):
raise NotImplementedError
def type(self, type=None, tensorCache=None):
if type is None:
return self._type
tensorCache = tensorCache or {}
# find all tensors and convert them
for key, param in self.__dict__.items():
setattr(self, key, recursiveType(param, type, tensorCache))
self._backend = torch._thnn.type2backend[type]
self._type = type
return self
def float(self, *args):
return self.type('torch.FloatTensor', *args)
def double(self, *args):
return self.type('torch.DoubleTensor', *args)
def cuda(self, *args):
return self.type('torch.cuda.FloatTensor', *args)
def reset(self):
pass
def write(self, f):
raise NotImplementedError
def read(self, f):
raise NotImplementedError
# This function is not easy to understand. It works as follows:
#
# - gather all parameter tensors for this module (and children);
# count all parameter values (floats)
# - create one ginormous memory area (Storage object) with room for all
# parameters
# - remap each parameter tensor to point to an area within the ginormous
# Storage, and copy it there
#
# It has the effect of making all parameters point to the same memory area,
# which is: returned.
#
# The purpose is to allow operations over all parameters (such as momentum
# updates and serialization), but it assumes that all parameters are of
# the same type (and, in the case of CUDA, on the same device), which
# is not always True. Use for_each() to iterate over this module and
# children instead.
#
# Module._flattenTensorBuffer can be used by other packages (e.g. cunn)
# to specify the type of temporary buffers. For example, the temporary
# buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage.
#
# TODO: This logically belongs to torch.Tensor, not nn.
_flattenTensorBuffer = {}
def _flatten(self, parameters=[]):
# returns True if tensor occupies a contiguous region of memory (no holes)
def isCompact(tensor):
# isn't it enough to check if strides == size.cumprod(0)?
sortedStride, perm = torch.sort(torch.LongTensor(tensor.stride()), 0, True)
sortedSize = torch.LongTensor(list(tensor.size())).index_select(0, perm)
nRealDim = int(torch.clamp(sortedStride, 0, 1).sum())
sortedStride = sortedStride.narrow(0, 0, nRealDim).clone()
sortedSize = sortedSize.narrow(0, 0, nRealDim).clone()
t = tensor.new().set_(tensor.storage(), 0,
tuple(sortedSize),
tuple(sortedStride))
return t.is_contiguous()
if not parameters:
return torch.Tensor()
Tensor = parameters[0].new
BufferTensor = Module._flattenTensorBuffer.get(type(parameters[0]), Tensor)
# 1. construct the set of all unique storages referenced by parameter tensors
storages = {}
num_parameters = 0
parameterMeta = []
for i, param in enumerate(parameters):
storage = param.storage()
key = storage._cdata
if key not in storages:
storages[key] = (storage, num_parameters)
num_parameters = num_parameters + storage.size()
parameterMeta.append({
'storage_offset': param.storage_offset() + storages[key][1],
'size': param.size(),
'stride': param.stride()
})
# 2. construct a single tensor that will hold all the parameters
flatParameters = BufferTensor(num_parameters).zero_()
# 3. determine if there are elements in the storage that none of the
# parameter tensors reference ('holes')
tensorsCompact = True
for meta in parameterMeta:
tmp = BufferTensor().set_(flatParameters.storage(), meta['storage_offset'], meta['size'], meta['stride'])
tmp.fill_(1)
tensorsCompact = tensorsCompact and isCompact(tmp)
maskParameters = flatParameters.byte().clone()
compactOffsets = flatParameters.long().cumsum(0)
used_parameters = compactOffsets[-1]
# 4. copy storages into the flattened parameter tensor
for storageAndOffset in storages.values():
storage, offset = storageAndOffset
flatParameters[slice(offset, offset + storage.size())].copy_(Tensor().set_(storage))
# 5. allow garbage collection
storages = None
for param in parameters:
param.set_()
# 6. compact the flattened parameters if there were holes
if used_parameters != num_parameters:
assert tensorsCompact
flatParameters = BufferTensor(used_parameters).copy_(
flatParameters.masked_select(maskParameters))
for meta in parameterMeta:
meta['storage_offset'] = compactOffsets[meta['storage_offset']]
if BufferTensor != Tensor:
flatParameters = Tensor(flatParameters.nelement()).copy_(flatParameters)
# 7. fix up the parameter tensors to point at the flattened parameters
for param, meta in zip(parameters, parameterMeta):
param.set_(flatParameters.storage(),
meta['storage_offset'],
meta['size'],
meta['stride'])
return flatParameters
def flattenParameters(self):
_params = self.parameters()
if _params is None:
return
parameters, gradParameters = _params
p, g = self._flatten(parameters), self._flatten(gradParameters)
assert p.nelement() == g.nelement()
if parameters:
for param, grad in zip(parameters, gradParameters):
assert param.storage_offset() == grad.storage_offset()
return p, g
def apply(self, callback):
callback(self)
if hasattr(self, 'modules'):
for module in self.modules:
module.apply(callback)
def findModules(self, cls, container=None):
nodes = []
containers = []
if isinstance(self, cls):
nodes.append(self)
containers.append(container)
# Recurse on nodes with 'modules'
if hasattr(self, 'modules'):
for child in self.modules:
child_nodes, child_containers = child.findModules(cls, self)
assert len(child_nodes) == len(child_containers)
# add the list items from our child to our list (i.e. return a
# flattened table of the return nodes).
nodes.extend(child_nodes)
containers.extend(child_containers)
return nodes, containers
def listModules(self):
# include self first
modules = [self]
if hasattr(self, 'modules'):
for child in self.modules:
modules.extend(child.listModules())
return modules
def clearState(self):
return clear(self, 'output', 'gradInput')
def replace(self, callback):
out = callback(self)
# TODO: not out.modules?
if hasattr(self, 'modules'):
for i, module in enumerate(self.modules):
self.modules[i] = module.replace(callback)
return out

View File

@ -1,33 +0,0 @@
import math
import torch
from .Module import Module
class Mul(Module):
def __init__(self):
super(Mul, self).__init__()
self.weight = torch.Tensor(1)
self.gradWeight = torch.Tensor(1)
self.reset()
def reset(self, stdv=None):
if stdv is not None:
stdv = stdv * math.sqrt(3)
else:
stdv = 1. / math.sqrt(self.weight.size(0))
self.weight.uniform_(-stdv, stdv)
def updateOutput(self, input):
self.output.resize_as_(input).copy_(input)
self.output.mul_(self.weight[0])
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input).zero_()
self.gradInput.add_(self.weight[0], gradOutput)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
self.gradWeight[0] = (self.gradWeight[0] +
scale * input.contiguous().view(-1).dot(gradOutput.contiguous().view(-1)))

View File

@ -1,37 +0,0 @@
import torch
from .Module import Module
class MulConstant(Module):
def __init__(self, constant_scalar, inplace=False):
super(MulConstant, self).__init__()
self.constant_scalar = constant_scalar
self.inplace = inplace
def updateOutput(self, input):
if self.inplace:
input.mul_(self.constant_scalar)
self.output.set_(input)
else:
self.output.resize_as_(input)
self.output.copy_(input)
self.output.mul_(self.constant_scalar)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is None:
return
if self.inplace:
gradOutput.mul_(self.constant_scalar)
self.gradInput.set_(gradOutput)
# restore previous input value
input.div_(self.constant_scalar)
else:
self.gradInput.resize_as_(gradOutput)
self.gradInput.copy_(gradOutput)
self.gradInput.mul_(self.constant_scalar)
return self.gradInput

View File

@ -1,41 +0,0 @@
import torch
from .Criterion import Criterion
from .utils import recursiveResizeAs, recursiveFill, recursiveAdd
class MultiCriterion(Criterion):
def __init__(self, ):
super(MultiCriterion, self).__init__()
self.criterions = []
self.weights = torch.DoubleStorage()
def add(self, criterion, weight=1):
self.criterions.append(criterion)
new_weights = torch.DoubleStorage(len(self.criterions))
for i, v in enumerate(self.weights):
new_weights[i] = v
new_weights[len(self.criterions) - 1] = weight
self.weights = new_weights
return self
def updateOutput(self, input, target):
self.output = 0
for i in range(len(self.criterions)):
self.output = self.output + self.weights[i] * self.criterions[i].updateOutput(input, target)
return self.output
def updateGradInput(self, input, target):
self.gradInput = recursiveResizeAs(self.gradInput, input)[0]
recursiveFill(self.gradInput, 0)
for i in range(len(self.criterions)):
recursiveAdd(self.gradInput, self.weights[i], self.criterions[i].updateGradInput(input, target))
return self.gradInput
def type(self, type):
for criterion in self.criterions:
criterion.type(type)
return super(MultiCriterion, self).type(type)

View File

@ -1,41 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class MultiLabelMarginCriterion(Criterion):
def __init__(self, sizeAverage=True):
super(MultiLabelMarginCriterion, self).__init__()
self.sizeAverage = sizeAverage
self.isTarget = torch.Tensor()
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
target = target.long()
self._backend.MultiLabelMarginCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
self.isTarget,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
target = target.long()
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.MultiLabelMarginCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
self.isTarget,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,41 +0,0 @@
import torch
from .Criterion import Criterion
from .Sigmoid import Sigmoid
from .BCECriterion import BCECriterion
class MultiLabelSoftMarginCriterion(Criterion):
"""
A MultiLabel multiclass criterion based on sigmoid:
the loss is:
l(x, y) = - sum_i y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i])
where p[i] = exp(x[i]) / (1 + exp(x[i]))
and with weights:
l(x, y) = - sum_i weights[i] (y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i]))
"""
def __init__(self, weights=None):
super(MultiLabelSoftMarginCriterion, self).__init__()
self.lsm = Sigmoid()
self.nll = BCECriterion(weights)
def updateOutput(self, input, target):
input = input if input.nelement() == 1 else input.squeeze()
target = target if target.nelement() == 1 else target.squeeze()
self.lsm.updateOutput(input)
self.nll.updateOutput(self.lsm.output, target)
self.output = self.nll.output
return self.output
def updateGradInput(self, input, target):
size = input.size()
input = input if input.nelement() == 1 else input.squeeze()
target = target if target.nelement() == 1 else target.squeeze()
self.nll.updateGradInput(self.lsm.output, target)
self.lsm.updateGradInput(input, self.nll.gradInput)
self.gradInput = self.lsm.gradInput.view(size)
return self.gradInput

View File

@ -1,51 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class MultiMarginCriterion(Criterion):
def __init__(self, p=1, weights=None, margin=1, sizeAverage=True):
super(MultiMarginCriterion, self).__init__()
if p != 1 and p != 2:
raise ValueError("only p == 1 and p == 2 supported")
self.p = p
self.margin = margin
self.sizeAverage = sizeAverage
if weights is not None:
assert weights.dim() == 1
self.weights = weights
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
target = target.long()
self._backend.MultiMarginCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
self.p,
self.weights,
self.margin,
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
target = target.long()
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.MultiMarginCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
self.p,
self.weights,
self.margin,
)
return self.gradInput

View File

@ -1,31 +0,0 @@
import torch
from .Module import Module
class Narrow(Module):
def __init__(self, dimension, offset, length=1):
super(Narrow, self).__init__()
self.dimension = dimension
self.index = offset
self.length = length
def updateOutput(self, input):
length = self.length
if length < 0:
length = input.size(self.dimension) - self.index + self.length + 1
output = input.narrow(self.dimension, self.index, length)
self.output = self.output.type_as(output)
self.output.resize_as_(output).copy_(output)
return self.output
def updateGradInput(self, input, gradOutput):
length = self.length
if length < 0:
length = input.size(self.dimension) - self.index + self.length + 1
self.gradInput = self.gradInput.type_as(input)
self.gradInput.resize_as_(input).zero_()
self.gradInput.narrow(self.dimension, self.index, length).copy_(gradOutput)
return self.gradInput

View File

@ -1,41 +0,0 @@
import torch
from .Module import Module
from .utils import clear, recursiveResizeAs, recursiveFill
class NarrowTable(Module):
def __init__(self, offset, length=1):
super(NarrowTable, self).__init__()
self.offset = offset
self.length = length
self.output = []
self.gradInput = []
def updateOutput(self, input):
self.output[:] = [input[self.offset + i] for i in range(self.length)]
return self.output
def updateGradInput(self, input, gradOutput):
if len(self.gradInput) != len(input):
self.gradInput[:] = [None for i in range(len(input))]
assert len(gradOutput) == self.length
for i in range(self.length):
self.gradInput[self.offset + i] = gradOutput[i]
for i in range(len(input)):
if i < self.offset or i >= self.offset + self.length:
gi = self.gradInput[i]
if gi is None:
gi = input[i].new()
self.gradInput[i] = recursiveResizeAs(gi, input[i])[0]
recursiveFill(self.gradInput[i], 0)
return self.gradInput
def type(self, type=None, tensorCache=None):
if not type:
return self._type
clear(self, 'output', 'gradInput')
return super(NarrowTable, self).type(self, type, tensorCache)

View File

@ -1,155 +0,0 @@
import torch
from torch._six import inf
from .Module import Module
from .utils import clear
class Normalize(Module):
def __init__(self, p, eps=1e-10):
super(Normalize, self).__init__()
assert p > 0
self.p = p
self.eps = eps
self._output = None
self.norm = None
self.buffer = None
self._indices = None
self.normp = None
self._gradInput = None
self.cross = None
self.buffer2 = None
def updateOutput(self, input):
assert input.dim() == 2
input_size = input.size()
if self._output is None:
self._output = input.new()
if self.norm is None:
self.norm = input.new()
if self.buffer is None:
self.buffer = input.new()
self._output.resize_as_(input)
# specialization for the infinity norm
if self.p == inf:
if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor()
torch.abs(input, out=self.buffer)
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
self.norm.add_(self.eps)
else:
if self.normp is None:
self.normp = input.new()
if self.p % 2 != 0:
torch.abs(input, out=self.buffer).pow_(self.p)
else:
torch.pow(input, self.p, out=self.buffer)
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
torch.pow(self.normp, 1. / self.p, out=self.norm)
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
self.output = self._output.view(input_size)
return self.output
def updateGradInput(self, input, gradOutput):
assert input.dim() == 2
assert gradOutput.dim() == 2
input_size = input.size()
n = input.size(0) # batch size
d = input.size(1) # dimensionality of vectors
if self._gradInput is None:
self._gradInput = input.new()
if self.cross is None:
self.cross = input.new()
# compute diagonal term with gradOutput
self._gradInput.resize_(n, d)
if self.p == inf:
# specialization for the inf case
torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput)
self.buffer.resize_as_(input).zero_()
self.cross.resize_(n, 1)
torch.gather(input, 1, self._indices, out=self.cross)
self.cross.div_(self.norm)
self.buffer.scatter_(1, self._indices, self.cross)
else:
torch.mul(self.normp.view(n, 1).expand(n, d), gradOutput, out=self._gradInput)
# small optimizations for different p
# buffer = input*|input|^(p-2)
# for non-even p, need to add absolute value
if self.p % 2 != 0:
if self.p < 2:
# add eps to avoid possible division by 0
torch.abs(input, out=self.buffer).add_(self.eps).pow_(self.p - 2).mul_(input)
else:
torch.abs(input, out=self.buffer).pow_(self.p - 2).mul_(input)
# special case for p == 2, pow(x, 0) = 1
elif self.p == 2:
self.buffer.copy_(input)
else:
# p is even and > 2, pow(x, p) is always positive
torch.pow(input, self.p - 2, out=self.buffer).mul_(input)
# compute cross term in two steps
self.cross.resize_(n, 1)
# instead of having a huge temporary matrix (b1*b2),
#: the computations as b1*(b2*gradOutput). This avoids redundant
# computation and also a huge buffer of size n*d^2
if self.buffer2 is None:
self.buffer2 = input.new() # nxd
torch.mul(input, gradOutput, out=self.buffer2)
torch.sum(self.buffer2, 1, out=self.cross, keepdim=True)
self.buffer.mul_(self.cross.expand_as(self.buffer))
self._gradInput.add_(-1, self.buffer)
# reuse cross buffer for normalization
if self.p == inf:
torch.mul(self.norm, self.norm, out=self.cross)
else:
torch.mul(self.normp, self.norm, out=self.cross)
self._gradInput.div_(self.cross.expand(n, d))
self.gradInput = self._gradInput.view(input_size)
return self.gradInput
def __repr__(self):
return super(Normalize, self).__repr__() + '({})'.format(self.p)
def type(self, type, tensorCache=None):
if not type:
return self._type
# torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
if type == 'torch.cuda.FloatTensor':
super(Normalize, self).type(type, tensorCache)
else:
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
# unnecessary memory allocations.
indices, self._indices = self._indices, None
super(Normalize, self).type(type, tensorCache)
self._indices = indices.long() if indices else None
return self
def clearState(self):
clear(self, [
'_output',
'_indices',
'_gradInput',
'buffer',
'norm',
'normp',
'cross',
])
return super(Normalize, self).clearState()

View File

@ -1,48 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class PReLU(Module):
def __init__(self, nOutputPlane=0):
super(PReLU, self).__init__()
# if no argument provided, use shared model (weight is scalar)
self.nOutputPlane = nOutputPlane
self.weight = torch.Tensor(nOutputPlane or 1).fill_(0.25)
self.gradWeight = torch.Tensor(nOutputPlane or 1)
def updateOutput(self, input):
self._backend.PReLU_updateOutput(
self._backend.library_state,
input,
self.output,
self.weight
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.PReLU_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.weight
)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
self._backend.PReLU_accGradParameters(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.weight,
self.gradWeight,
scale
)
return self.gradWeight
def clearState(self):
clear(self, 'gradWeightBuf', 'gradWeightBuf2')
return super(PReLU, self).clearState()

View File

@ -1,74 +0,0 @@
import torch
from .Module import Module
class Padding(Module):
# pad puts in [pad] amount of [value] over dimension [dim], starting at
# index [index] in that dimension. If pad<0, index counts from the left.
# If pad>0 index counts from the right index = 1 pads before index 1.
# index = 2 pads starting before index 2 and after index 1 in dimension [dim]
# When nInputDim is provided, inputs larger than that value will be considered batches
# where the actual dim to be padded will be dimension dim + 1.
def __init__(self, dim, pad, value=0, index=0, nInputDim=0):
self.value = value
self.index = index
self.dim = dim
self.pad = pad
self.nInputDim = nInputDim
self.outputSize = torch.Size()
super(Padding, self).__init__()
def updateOutput(self, input):
dim = self.dim
if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim:
dim = dim + 1
outputSize = list(input.size())
outputSize[dim] += abs(self.pad)
self.outputSize = torch.Size(outputSize)
self.output.resize_(self.outputSize)
self.output.fill_(self.value)
index = self.index
pad = self.pad
if pad > 0:
index = input.size(dim) - index
else:
pad = -pad
if index == 0:
self.output.narrow(dim, pad, input.size(dim)).copy_(input)
elif index == input.size(dim):
self.output.narrow(dim, 0, input.size(dim)).copy_(input)
else:
self.output.narrow(dim, 0, index).copy_(input.narrow(dim, 0, index))
self.output.narrow(dim, index + pad, input.size(dim) -
index).copy_(input.narrow(dim, index, input.size(dim) - index))
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input)
dim = self.dim
if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim:
dim = dim + 1
index = self.index
pad = self.pad
if pad > 0:
index = input.size(dim) - index
else:
pad = -pad
if index == 0:
self.gradInput.copy_(gradOutput.narrow(dim, pad, input.size(dim)))
elif index == input.size(dim):
self.gradInput.copy_(gradOutput.narrow(dim, 0, input.size(dim)))
else:
self.gradInput.narrow(dim, 0, index).copy_(gradOutput.narrow(dim, 0, index))
self.gradInput.narrow(dim, index, input.size(
dim) - index).copy_(gradOutput.narrow(dim, index + pad, input.size(dim) - index))
return self.gradInput

View File

@ -1,83 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class PairwiseDistance(Module):
def __init__(self, p):
super(PairwiseDistance, self).__init__()
assert p % 1 == 0
self.gradInput = []
self.diff = torch.Tensor()
self.norm = p
self.outExpand = None
self.grad = None
self.ones = None
def updateOutput(self, input):
self.output.resize_(1)
assert input[0].dim() == 2
if self.diff is None:
self.diff = input[0].new()
torch.add(input[0], -1, input[1], out=self.diff).abs_()
self.output.resize_(input[0].size(0))
self.output.zero_()
self.output.add_(self.diff.pow_(self.norm).sum(1, keepdim=False))
self.output.pow_(1. / self.norm)
return self.output
def updateGradInput(self, input, gradOutput):
assert input[0].dim() == 2
if len(self.gradInput) != 2:
self.gradInput[:] = [None, None]
if self.gradInput[0] is None:
self.gradInput[0] = input[0].new()
self.gradInput[0].resize_(input[0].size())
if self.gradInput[1] is None:
self.gradInput[1] = input[1].new()
self.gradInput[1].resize_(input[1].size())
self.gradInput[0].copy_(input[0])
self.gradInput[0].add_(-1, input[1])
if self.norm == 1:
self.gradInput[0].sign_()
else:
# Note: derivative of p-norm:
# d/dx_k(||x||_p) = (x_k * abs(x_k)^(p-2)) / (||x||_p)^(p-1)
if self.norm > 2:
self.gradInput[0].mul_(self.gradInput[0].abs().pow_(self.norm - 2))
if self.outExpand is None:
self.outExpand = self.output.new()
self.outExpand.resize_(self.output.size(0), 1)
self.outExpand.copy_(self.output.view(self.output.size(0), 1))
self.outExpand.add_(1e-6) # Prevent divide by zero errors
self.outExpand.pow_(-(self.norm - 1))
self.gradInput[0].mul_(self.outExpand.expand(self.gradInput[0].size(0),
self.gradInput[0].size(1)))
if self.grad is None:
self.grad = gradOutput.new()
if self.ones is None:
self.ones = gradOutput.new()
self.grad.resize_as_(input[0]).zero_()
self.ones.resize_(input[0].size(1)).fill_(1)
self.grad.addr_(gradOutput, self.ones)
self.gradInput[0].mul_(self.grad)
self.gradInput[1].zero_().add_(-1, self.gradInput[0])
return self.gradInput
def clearState(self):
clear(self, 'diff', 'outExpand', 'grad', 'ones')
return super(PairwiseDistance, self).clearState()

View File

@ -1,105 +0,0 @@
import torch
from .Container import Container
class Parallel(Container):
def __init__(self, inputDimension, outputDimension):
super(Parallel, self).__init__()
self.inputDimension = inputDimension
self.outputDimension = outputDimension
self.totalOutputSize = None
def updateOutput(self, input):
nModule = input.size(self.inputDimension)
outputs = []
for i in range(nModule):
currentInput = input.select(self.inputDimension, i)
currentOutput = self.modules[i].updateOutput(currentInput)
outputs.append(currentOutput)
outputSize = currentOutput.size(self.outputDimension)
if i == 0:
totalOutputSize = list(currentOutput.size())
else:
totalOutputSize[self.outputDimension] += outputSize
self.totalOutputSize = torch.Size(totalOutputSize)
self.output.resize_(self.totalOutputSize)
offset = 0
for i in range(nModule):
currentOutput = outputs[i]
outputSize = currentOutput.size(self.outputDimension)
self.output.narrow(self.outputDimension, offset, outputSize).copy_(currentOutput)
offset = offset + currentOutput.size(self.outputDimension)
return self.output
def updateGradInput(self, input, gradOutput):
nModule = input.size(self.inputDimension)
self.gradInput.resize_as_(input)
offset = 0
for i in range(nModule):
module = self.modules[i]
currentInput = input.select(self.inputDimension, i)
currentOutput = module.output
outputSize = currentOutput.size(self.outputDimension)
currentGradOutput = gradOutput.narrow(self.outputDimension, offset, outputSize)
currentGradInput = module.updateGradInput(currentInput, currentGradOutput)
self.gradInput.select(self.inputDimension, i).copy_(currentGradInput)
offset = offset + outputSize
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
nModule = input.size(self.inputDimension)
offset = 0
for i in range(nModule):
module = self.modules[i]
currentOutput = module.output
outputSize = currentOutput.size(self.outputDimension)
module.accGradParameters(
input.select(self.inputDimension, i),
gradOutput.narrow(self.outputDimension, offset, outputSize),
scale)
offset += outputSize
def accUpdateGradParameters(self, input, gradOutput, lr):
nModule = input.size(self.inputDimension)
offset = 0
for i in range(nModule):
module = self.modules[i]
currentOutput = module.output
module.accupdateGradParameters(
input.select(self.inputDimension, i),
gradOutput.narrow(self.outputDimension, offset, currentOutput.size(self.outputDimension)),
lr)
offset = offset + currentOutput.size(self.outputDimension)
def __repr__(self):
tab = ' '
line = '\n'
next = ' |`-> '
ext = ' | '
extlast = ' '
last = ' ... -> '
res = torch.typename(self)
res += ' {' + line + tab + 'input'
for i in range(len(self.modules)):
if i == len(self.modules) - 1:
res += line + tab + next + '(' + str(i) + '): ' + \
str(self.modules[i]).replace(line, line + tab + extlast)
else:
res += line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + ext)
res += line + tab + last + 'output'
res += line + '}'
return res

View File

@ -1,39 +0,0 @@
import torch
from .Criterion import Criterion
from .utils import recursiveResizeAs, recursiveFill, recursiveAdd
class ParallelCriterion(Criterion):
def __init__(self, repeatTarget=False):
super(ParallelCriterion, self).__init__()
self.criterions = []
self.weights = []
self.gradInput = []
self.repeatTarget = repeatTarget
def add(self, criterion, weight=1):
self.criterions.append(criterion)
self.weights.append(weight)
return self
def updateOutput(self, input, target):
self.output = 0
for i, criterion in enumerate(self.criterions):
current_target = target if self.repeatTarget else target[i]
self.output += self.weights[i] * criterion.updateOutput(input[i], current_target)
return self.output
def updateGradInput(self, input, target):
self.gradInput = recursiveResizeAs(self.gradInput, input)[0]
recursiveFill(self.gradInput, 0)
for i, criterion in enumerate(self.criterions):
current_target = target if self.repeatTarget else target[i]
recursiveAdd(self.gradInput[i], self.weights[i], criterion.updateGradInput(input[i], current_target))
return self.gradInput
def type(self, type=None, tensorCache=None):
self.gradInput = []
return super(ParallelCriterion, self).type(type, tensorCache)

View File

@ -1,60 +0,0 @@
import torch
from .Container import Container
class ParallelTable(Container):
def __init__(self, ):
super(ParallelTable, self).__init__()
self.modules = []
self.output = []
self.gradInput = []
def updateOutput(self, input):
for i in range(len(self.modules)):
tmp = self.modules[i].updateOutput(input[i])
if len(self.output) <= i:
self.output.append(tmp)
else:
self.output[i] = tmp
return self.output
def updateGradInput(self, input, gradOutput):
for i, module in enumerate(self.modules):
tmp = module.updateGradInput(input[i], gradOutput[i])
if len(self.gradInput) <= i:
self.gradInput.append(tmp)
else:
self.gradInput[i] = tmp
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
for i, module in enumerate(self.modules):
module.accGradParameters(input[i], gradOutput[i], scale)
def accUpdateGradParameters(self, input, gradOutput, lr=1):
for i, module in enumerate(self.modules):
module.accUpdateGradParameters(input[i], gradOutput[i], lr)
def __repr__(self):
tab = ' '
line = '\n'
next = ' |`-> '
ext = ' | '
extlast = ' '
last = ' ... -> '
res = torch.typename(self)
res = res + ' {' + line + tab + 'input'
for i in range(len(self.modules)):
if i == len(self.modules) - 1:
res = res + line + tab + next + '(' + str(i) + '): ' + \
str(self.modules[i]).replace(line, line + tab + extlast)
else:
res = res + line + tab + next + '(' + str(i) + '): ' + \
str(self.modules[i]).replace(line, line + tab + ext)
res = res + line + tab + last + 'output'
res = res + line + '}'
return res

View File

@ -1,115 +0,0 @@
import torch
from .Module import Module
from .Identity import Identity
from .LookupTable import LookupTable
from .Sequential import Sequential
from .ParallelTable import ParallelTable
from .MM import MM
class PartialLinear(Module):
"""
PartialLinear is a Linear layer that allows the user to a set a collection of
column indices. When the column indices are set, the layer will behave like a
Linear layer that only has those columns. Meanwhile, all parameters are
preserved, so resetting the PartialLinear layer will result in a module that
behaves just like a regular Linear layer.
This module is useful, for instance, when you want to: forward-backward on
only a subset of a Linear layer during training but use the full Linear layer
at test time.
"""
def __init__(self, inputsize, outputsize, bias=True):
super(PartialLinear, self).__init__()
# define the layer as a small network:
pt = ParallelTable()
pt.add(Identity()).add(LookupTable(outputsize, inputsize))
self.network = Sequential().add(pt).add(MM(False, True))
if bias:
self.bias = torch.zeros(1, outputsize)
self.gradBias = torch.zeros(1, outputsize)
else:
self.bias = self.gradBias = None
# set partition:
self.inputsize = inputsize
self.outputsize = outputsize
self.allcolumns = torch.arange(0, self.outputsize).long()
self.resetPartition()
self.addBuffer = None
self.buffer = None
def setPartition(self, indices):
self.partition = indices.type(self.allcolumns.type())
return self
def resetPartition(self):
self.partition = self.allcolumns
return self
def parameters(self):
return [self.network.get(0).get(1).weight, self.bias], \
[self.network.get(0).get(1).gradWeight, self.gradBias]
# should return only the relevant partition?
def updateOutput(self, input):
self.output.set_(self.network.forward([input, self.partition]))
if self.bias is not None:
self.output.add_(torch.index_select(self.bias, 1, self.partition).expand_as(self.output))
if self.addBuffer is None:
self.addBuffer = input.new()
if self.addBuffer.nelement() != input.size(0):
self.addBuffer.resize_(input.size(0)).fill_(1)
return self.output
def updateGradInput(self, input, gradOutput):
if self.gradInput is not None:
self.network.updateGradInput([input, self.partition], gradOutput)
self.gradInput.set_(self.network.gradInput[0])
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
self.network.accGradParameters([input, self.partition], gradOutput, scale)
if self.bias is not None:
if self.buffer is None:
self.buffer = input.new()
self.buffer.resize_(gradOutput.size(1))
torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
self.gradBias.index_add_(
1, self.partition, self.buffer.view(1, self.buffer.nelement())
)
def accUpdateGradParameters(self, input, gradOutput, lr):
gradWeight = self.network.get(0).get(1).gradWeight
gradBias = self.gradBias
self.network.get(0).get(1).gradWeight = self.network.get(0).get(1).weight
self.gradBias = self.bias
self.accGradParameters(input, gradOutput, -lr)
self.network.get(0).get(1).gradWeight = gradWeight
self.gradBias = gradBias
def zeroGradParameters(self):
self.network.zeroGradParameters()
self.gradBias.zero_()
def updateParameters(self, learningRate):
self.network.updateParameters(learningRate)
self.bias._add(-learningRate, self.gradBias)
def type(self, type=None, tensorCache=None):
result = super(PartialLinear, self).type(type, tensorCache)
self.partition = self.partition.long()
self.allcolumns = self.allcolumns.long()
if type == 'torch.cuda.FloatTensor':
self.allcolumns = self.allcolumns.cuda()
self.partition = self.partition.cuda()
return result
def __repr__(self):
return super(ParallelTable, self).__repr__() + \
'({} -> {})'.format(self.inputsize, self.outputsize) + \
' without bias' if self.bias is None else ''

View File

@ -1,20 +0,0 @@
import torch
from .Module import Module
class Power(Module):
def __init__(self, p):
super(Power, self).__init__()
self.pow = p
def updateOutput(self, input):
self.output.resize_as_(input).copy_(input)
self.output.pow_(self.pow)
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input).copy_(input)
self.gradInput.pow_(self.pow - 1)
self.gradInput.mul_(gradOutput).mul_(self.pow)
return self.gradInput

View File

@ -1,51 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class RReLU(Module):
def __init__(self, lower=1. / 8, upper=1. / 3, inplace=False):
super(RReLU, self).__init__()
self.lower = lower
self.upper = upper
self.inplace = inplace
assert self.lower <= self.upper and self.lower >= 0 and self.upper >= 0
self.noise = torch.Tensor()
self.train = True
def updateOutput(self, input):
self._backend.RReLU_updateOutput(
self._backend.library_state,
input,
self.output,
self.noise,
self.lower,
self.upper,
self.train,
self.inplace,
torch.default_generator if not input.is_cuda else 0
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.RReLU_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.noise,
self.lower,
self.upper,
self.train,
self.inplace
)
return self.gradInput
def __repr__(self):
return super(RReLU, self).__repr__() + '({:.4f}, {:.4f})'.format(self.lower, self.upper)
def clearState(self):
clear(self, 'noise')
return super(RReLU, self).clearState()

View File

@ -1,8 +0,0 @@
import torch
from .Threshold import Threshold
class ReLU(Threshold):
def __init__(self, inplace=False):
super(ReLU, self).__init__(0, 0, inplace)

View File

@ -1,28 +0,0 @@
import torch
from .Module import Module
class ReLU6(Module):
def __init__(self, inplace=False):
super(ReLU6, self).__init__()
self.inplace = inplace
def updateOutput(self, input):
self._backend.HardTanh_updateOutput(
self._backend.library_state,
input,
self.output,
0, 6, self.inplace
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.HardTanh_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
0, 6, self.inplace
)
return self.gradInput

View File

@ -1,33 +0,0 @@
import torch
from .Module import Module
class Replicate(Module):
def __init__(self, nf, dim=0):
super(Replicate, self).__init__()
self.nfeatures = nf
self.dim = dim
assert self.dim >= 0
def updateOutput(self, input):
assert self.dim < input.dim()
size = list(input.size())
size.insert(self.dim, self.nfeatures)
stride = list(input.stride())
stride.insert(self.dim, 0)
self.output.set_(input.storage(), input.storage_offset(),
torch.Size(size), tuple(stride))
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput.resize_as_(input).zero_()
size = list(input.size())
size.insert(self.dim, 1)
gradInput = self.gradInput.view(*size)
torch.sum(gradOutput, self.dim, True, out=gradInput)
return self.gradInput

View File

@ -1,53 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class Reshape(Module):
def __init__(self, *args):
super(Reshape, self).__init__()
if len(args) == 0 and isinstance(args[0], torch.Size):
self.size = args[0]
else:
self.size = torch.Size(args)
self.nelement = 1
for s in self.size:
self.nelement *= s
self._input = None
self._gradOutput = None
def updateOutput(self, input):
if not input.is_contiguous():
if self._input is None:
self._input = input.new()
self._input.resize_as_(input)
self._input.copy_(input)
input = self._input
batchsize = [input.size(0)] + list(self.size)
self.output = input.view(torch.Size(batchsize))
return self.output
def updateGradInput(self, input, gradOutput):
if not gradOutput.is_contiguous():
if self._gradOutput is None:
self._gradOutput = gradOutput.new()
self._gradOutput.resize_as_(gradOutput)
self._gradOutput.copy_(gradOutput)
gradOutput = self._gradOutput
self.gradInput = gradOutput.view_as(input)
return self.gradInput
def __repr__(self):
return super(Reshape, self).__repr__() + \
'({})'.format('x'.join(map(lambda x: str(x), self.size)))
def clearState(self):
clear(self, '_input', '_gradOutput')
return super(Reshape, self).clearState()

View File

@ -1,23 +0,0 @@
import torch
from .Module import Module
class Select(Module):
def __init__(self, dimension, index):
super(Select, self).__init__()
self.dimension = dimension
self.index = index
def updateOutput(self, input):
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
output = input.select(self.dimension, index)
self.output.resize_as_(output)
return self.output.copy_(output)
def updateGradInput(self, input, gradOutput):
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
self.gradInput.resize_as_(input)
self.gradInput.zero_()
self.gradInput.select(self.dimension, index).copy_(gradOutput)
return self.gradInput

View File

@ -1,56 +0,0 @@
import torch
from .Module import Module
from .utils import recursiveCopy, clear
class SelectTable(Module):
def __init__(self, index):
super(SelectTable, self).__init__()
self.index = index
self.gradInput = []
def updateOutput(self, input):
# handle negative indices
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
assert len(input) > index
self.output = input[index]
return self.output
def _zeroTableCopy(self, l1, l2):
for i, v in enumerate(l2):
if isinstance(v, list):
if len(l1) > i:
l1[i] = self._zeroTableCopy(l1[i], l2[i])
else:
l1.append(self._zeroTableCopy([], l2[i]))
else:
if i >= len(l1):
l1.append(v.new().resize_as_(v).zero_())
else:
l1[i].resize_as_(v)
l1[i].zero_()
del l1[len(l2):]
return l1
def updateGradInput(self, input, gradOutput):
# make gradInput a zeroed copy of input
self._zeroTableCopy(self.gradInput, input)
# handle negative indices
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
# copy into gradInput[index] (necessary for variable sized inputs)
assert self.gradInput[index] is not None
recursiveCopy(self.gradInput[index], gradOutput)
return self.gradInput
def type(self, type, tensorCache=None):
del self.gradInput[:]
if isinstance(self.output, list):
del self.output[:]
return super(SelectTable, self).type(type, tensorCache)
def __repr__(self):
return super(SelectTable, self).__repr__() + '({})'.format(self.index)
def clearState(self):
clear(self, 'gradInput')

View File

@ -1,86 +0,0 @@
import torch
from .Container import Container
class Sequential(Container):
def __len__(self):
return len(self.modules)
def add(self, module):
if len(self.modules) == 0:
self.gradInput = module.gradInput
self.modules.append(module)
self.output = module.output
return self
def insert(self, module, index):
self.modules.insert(module, index)
self.output = self.modules[-1].output
self.gradInput = self.modules[0].gradInput
def remove(self, index=-1):
del self.modules[index]
if len(self.modules) > 0:
self.output = self.modules[-1].output
self.gradInput = self.modules[0].gradInput
else:
self.output = torch.Tensor()
self.gradInput = torch.Tensor()
def updateOutput(self, input):
currentOutput = input
for i, module in enumerate(self.modules):
currentOutput = module.updateOutput(currentOutput)
self.output = currentOutput
return self.output
def _iter_with_prev(self):
return zip(self.modules[-2::-1], self.modules[-1:0:-1])
def updateGradInput(self, input, gradOutput):
currentGradOutput = gradOutput
for prev, current in self._iter_with_prev():
currentGradOutput = current.updateGradInput(prev.output, currentGradOutput)
self.gradInput = self.modules[0].updateGradInput(input, currentGradOutput)
return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1):
currentGradOutput = gradOutput
for prev, current in self._iter_with_prev():
current.accGradParameters(prev.output, currentGradOutput, scale)
currentGradOutput = current.gradInput
self.modules[0].accGradParameters(input, currentGradOutput, scale)
def backward(self, input, gradOutput, scale=1):
currentGradOutput = gradOutput
for prev, current in self._iter_with_prev():
currentGradOutput = current.backward(prev.output, currentGradOutput, scale)
# currentModule.gradInput = currentGradOutput
self.gradInput = self.modules[0].backward(input, currentGradOutput, scale)
return self.gradInput
def accUpdateGradParameters(self, input, gradOutput, lr):
currentGradOutput = gradOutput
for prev, current in self._iter_with_prev():
current.accUpdateGradParameters(prev.output, currentGradOutput, lr)
currentGradOutput = current.gradInput
self.modules[0].accUpdateGradParameters(input, currentGradOutput, lr)
def __repr__(self):
tab = ' '
line = '\n'
next = ' -> '
res = 'nn.Sequential'
res = res + ' {' + line + tab + '[input'
for i in range(len(self.modules)):
res = res + next + '(' + str(i) + ')'
res = res + next + 'output]'
for i in range(len(self.modules)):
res = res + line + tab + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab)
res = res + line + '}'
return res

View File

@ -1,22 +0,0 @@
import torch
from .Module import Module
class Sigmoid(Module):
def updateOutput(self, input):
self._backend.Sigmoid_updateOutput(
self._backend.library_state,
input,
self.output
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.Sigmoid_updateGradInput(
self._backend.library_state,
gradOutput,
self.gradInput,
self.output
)
return self.gradInput

View File

@ -1,36 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class SmoothL1Criterion(Criterion):
def __init__(self, sizeAverage=True):
super(SmoothL1Criterion, self).__init__()
self.sizeAverage = sizeAverage
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.SmoothL1Criterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.SmoothL1Criterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,36 +0,0 @@
import torch
from torch.nn.functional import _Reduction
from .Criterion import Criterion
class SoftMarginCriterion(Criterion):
def __init__(self, ):
super(SoftMarginCriterion, self).__init__()
self.sizeAverage = True
self.output_tensor = None
def updateOutput(self, input, target):
if self.output_tensor is None:
self.output_tensor = input.new(1)
self._backend.SoftMarginCriterion_updateOutput(
self._backend.library_state,
input,
target,
self.output_tensor,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
self.output = self.output_tensor[0].item()
return self.output
def updateGradInput(self, input, target):
implicit_gradOutput = torch.ones(1).type_as(input)
self._backend.SoftMarginCriterion_updateGradInput(
self._backend.library_state,
input,
target,
implicit_gradOutput,
self.gradInput,
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
)
return self.gradInput

View File

@ -1,25 +0,0 @@
import torch
from .Module import Module
class SoftMax(Module):
def __init__(self, dim=None):
super(SoftMax, self).__init__()
if dim is not None:
self.dim = dim
def _get_dim(self, input):
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
def updateOutput(self, input):
self.output = torch.softmax(input, self._get_dim(input))
return self.output
def updateGradInput(self, input, gradOutput):
self.gradInput = torch.softmax_backward_data(
gradOutput,
self.output,
self._get_dim(input),
input)
return self.gradInput

View File

@ -1,43 +0,0 @@
import torch
from .Module import Module
from .utils import clear
class SoftMin(Module):
def __init__(self, dim=None):
super(SoftMin, self).__init__()
self.mininput = None
if dim is not None:
self.dim = dim
def _get_dim(self, input):
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
def updateOutput(self, input):
if self.mininput is None:
self.mininput = input.new()
self.mininput.resize_as_(input).copy_(input).mul_(-1)
self.output = torch.softmax(
self.mininput,
self._get_dim(input)
)
return self.output
def updateGradInput(self, input, gradOutput):
if self.mininput is None:
self.mininput = input.new()
self.mininput.resize_as_(input).copy_(input).mul_(-1)
self.gradInput = torch.softmax_backward_data(
gradOutput,
self.output,
self._get_dim(input),
self.mininput
)
self.gradInput.mul_(-1)
return self.gradInput
def clearState(self):
clear(self, 'mininput')
return super(SoftMin, self).clearState()

View File

@ -1,38 +0,0 @@
import torch
from .Module import Module
class SoftPlus(Module):
def __init__(self, beta=1, threshold=20):
super(SoftPlus, self).__init__()
self.beta = beta # Beta controls sharpness of transfer function
self.threshold = threshold # Avoid floating point issues with exp(x), x>20
def updateOutput(self, input):
# f(x) = 1/beta * log(1 + exp(beta * x))
self._backend.SoftPlus_updateOutput(
self._backend.library_state,
input,
self.output,
self.beta,
self.threshold
)
return self.output
def updateGradInput(self, input, gradOutput):
# d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
# SINCE
# y = (1/k)*log(1+exp(k*x)) #> x = (1/k)*log(exp(k*y)-1)
# THEREFORE:
# d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
self._backend.SoftPlus_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.output,
self.beta,
self.threshold
)
return self.gradInput

View File

@ -1,28 +0,0 @@
import torch
from .Module import Module
class SoftShrink(Module):
def __init__(self, lambd=0.5):
super(SoftShrink, self).__init__()
self.lambd = lambd
def updateOutput(self, input):
self._backend.SoftShrink_updateOutput(
self._backend.library_state,
input,
self.output,
self.lambd
)
return self.output
def updateGradInput(self, input, gradOutput):
self._backend.SoftShrink_updateGradInput(
self._backend.library_state,
input,
gradOutput,
self.gradInput,
self.lambd
)
return self.gradInput

Some files were not shown because too many files have changed in this diff Show More