mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Remove torch/legacy (#11823)
Summary: Largely unused and hinders current development Pull Request resolved: https://github.com/pytorch/pytorch/pull/11823 Differential Revision: D9925094 Pulled By: cpuhrsch fbshipit-source-id: c797f62180e2128f9a567b0c57c8347957470ea5
This commit is contained in:
committed by
Facebook Github Bot
parent
24ec813967
commit
d8f6be686d
@ -27,7 +27,6 @@ TESTS = [
|
||||
'distributions',
|
||||
'indexing',
|
||||
'jit',
|
||||
'legacy_nn',
|
||||
'multiprocessing',
|
||||
'nccl',
|
||||
'nn',
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,6 @@ import torch.nn.functional as F
|
||||
import torch.nn.parallel as dp
|
||||
import torch.nn.init as init
|
||||
import torch.nn.utils.rnn as rnn_utils
|
||||
import torch.legacy.nn as legacy
|
||||
from torch.nn.utils import clip_grad_norm_, clip_grad_value_
|
||||
from torch.nn.utils import parameters_to_vector, vector_to_parameters
|
||||
from torch.autograd import Variable, gradcheck
|
||||
@ -5821,42 +5820,6 @@ class TestNN(NNTestCase):
|
||||
expected = m(inp.view(6, 5)).view(2, 3, 8)
|
||||
self.assertEqual(expected, m(inp))
|
||||
|
||||
def test_bilinear(self):
|
||||
module = nn.Bilinear(10, 10, 8)
|
||||
module_legacy = legacy.Bilinear(10, 10, 8)
|
||||
|
||||
module_legacy.weight.copy_(module.weight.data)
|
||||
module_legacy.bias.copy_(module.bias.data)
|
||||
|
||||
input1 = torch.randn(4, 10)
|
||||
input2 = torch.randn(4, 10)
|
||||
|
||||
output = module(Variable(input1), Variable(input2))
|
||||
output_legacy = module_legacy.forward([input1, input2])
|
||||
|
||||
self.assertEqual(output.data, output_legacy)
|
||||
|
||||
input1_1 = torch.tensor(input1, requires_grad=True)
|
||||
input2_1 = torch.tensor(input2, requires_grad=True)
|
||||
|
||||
module.zero_grad()
|
||||
module_legacy.zeroGradParameters()
|
||||
|
||||
output = module(input1_1, input2_1)
|
||||
grad_output = torch.randn(*output.size())
|
||||
gi1_legacy, gi2_legacy = module_legacy.backward([input1, input2], grad_output)
|
||||
output.backward(grad_output)
|
||||
gi1 = input1_1.grad.data.clone()
|
||||
gi2 = input2_1.grad.data.clone()
|
||||
|
||||
self.assertEqual(gi1, gi1_legacy)
|
||||
self.assertEqual(gi2, gi2_legacy)
|
||||
self.assertEqual(module.weight.grad.data, module_legacy.gradWeight)
|
||||
self.assertEqual(module.bias.grad.data, module_legacy.gradBias)
|
||||
|
||||
_assertGradAndGradgradChecks(self, lambda x1, x2: F.bilinear(x1, x2, module.weight, module.bias),
|
||||
(input1_1, input2_1))
|
||||
|
||||
def test_bilinear_no_bias(self):
|
||||
module = nn.Bilinear(10, 10, 8)
|
||||
module_no_bias = nn.Bilinear(10, 10, 8, False)
|
||||
|
@ -5,7 +5,6 @@ from copy import deepcopy
|
||||
import torch
|
||||
from torch._six import inf
|
||||
import torch.optim as optim
|
||||
import torch.legacy.optim as old_optim
|
||||
import torch.nn.functional as F
|
||||
from torch.optim import SGD
|
||||
from torch.autograd import Variable
|
||||
@ -24,44 +23,7 @@ def drosenbrock(tensor):
|
||||
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
|
||||
|
||||
|
||||
def wrap_old_fn(old_fn, **config):
|
||||
def wrapper(closure, params, state):
|
||||
return old_fn(closure, params, config, state)
|
||||
return wrapper
|
||||
|
||||
|
||||
class TestOptim(TestCase):
|
||||
def _test_rosenbrock(self, constructor, old_fn):
|
||||
params_t = torch.Tensor([1.5, 1.5])
|
||||
state = {}
|
||||
|
||||
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
|
||||
optimizer = constructor([params])
|
||||
|
||||
solution = torch.Tensor([1, 1])
|
||||
initial_dist = params.data.dist(solution)
|
||||
|
||||
def eval():
|
||||
optimizer.zero_grad()
|
||||
loss = rosenbrock(params)
|
||||
loss.backward()
|
||||
# loss.backward() will give **slightly** different
|
||||
# gradients, than drosenbtock, because of a different ordering
|
||||
# of floating point operations. In most cases it doesn't matter,
|
||||
# but some optimizers are so sensitive that they can temporarily
|
||||
# diverge up to 1e-4, just to converge again. This makes the
|
||||
# comparison more stable.
|
||||
params.grad.data.copy_(drosenbrock(params.data))
|
||||
return loss
|
||||
|
||||
for i in range(2000):
|
||||
optimizer.step(eval)
|
||||
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
|
||||
params_t, state)
|
||||
self.assertEqual(params.data, params_t)
|
||||
|
||||
self.assertLessEqual(params.data.dist(solution), initial_dist)
|
||||
|
||||
def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
|
||||
params_t = torch.Tensor([1.5, 1.5])
|
||||
|
||||
@ -237,16 +199,6 @@ class TestOptim(TestCase):
|
||||
return [dict(params=bias, **kwargs)]
|
||||
|
||||
def test_sgd(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.SGD(params, lr=1e-3),
|
||||
wrap_old_fn(old_optim.sgd, learningRate=1e-3)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
|
||||
dampening=0, weight_decay=1e-4),
|
||||
wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
|
||||
dampening=0, weightDecay=1e-4)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
|
||||
)
|
||||
@ -273,14 +225,6 @@ class TestOptim(TestCase):
|
||||
)
|
||||
|
||||
def test_adam(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adam(params, lr=1e-2),
|
||||
wrap_old_fn(old_optim.adam, learningRate=1e-2)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2),
|
||||
wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
|
||||
)
|
||||
@ -310,18 +254,6 @@ class TestOptim(TestCase):
|
||||
optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0))
|
||||
|
||||
def test_adadelta(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adadelta(params),
|
||||
wrap_old_fn(old_optim.adadelta)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adadelta(params, rho=0.95),
|
||||
wrap_old_fn(old_optim.adadelta, rho=0.95)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adadelta(params, weight_decay=1e-2),
|
||||
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.Adadelta([weight, bias])
|
||||
)
|
||||
@ -333,18 +265,6 @@ class TestOptim(TestCase):
|
||||
optim.Adadelta(None, lr=1e-2, rho=1.1)
|
||||
|
||||
def test_adagrad(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adagrad(params, lr=1e-1),
|
||||
wrap_old_fn(old_optim.adagrad, learningRate=1e-1)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adagrad(params, lr=1e-1, lr_decay=1e-3),
|
||||
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, learningRateDecay=1e-3)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adagrad(params, lr=1e-1, weight_decay=1e-2),
|
||||
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, weightDecay=1e-2)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
|
||||
)
|
||||
@ -367,18 +287,6 @@ class TestOptim(TestCase):
|
||||
|
||||
@skipIfRocm
|
||||
def test_adamax(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adamax(params, lr=1e-1),
|
||||
wrap_old_fn(old_optim.adamax, learningRate=1e-1)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adamax(params, lr=1e-1, weight_decay=1e-2),
|
||||
wrap_old_fn(old_optim.adamax, learningRate=1e-1, weightDecay=1e-2)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adamax(params, lr=1e-1, betas=(0.95, 0.998)),
|
||||
wrap_old_fn(old_optim.adamax, learningRate=1e-1, beta1=0.95, beta2=0.998)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1)
|
||||
)
|
||||
@ -391,18 +299,6 @@ class TestOptim(TestCase):
|
||||
optim.Adamax(None, lr=1e-2, betas=(0.0, 1.0))
|
||||
|
||||
def test_rmsprop(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.RMSprop(params, lr=1e-2),
|
||||
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
|
||||
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
|
||||
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2)
|
||||
)
|
||||
@ -415,18 +311,6 @@ class TestOptim(TestCase):
|
||||
optim.RMSprop(None, lr=1e-2, momentum=-1.0)
|
||||
|
||||
def test_asgd(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.ASGD(params, lr=1e-3),
|
||||
wrap_old_fn(old_optim.asgd, eta0=1e-3)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
|
||||
wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
|
||||
wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
|
||||
)
|
||||
@ -440,18 +324,6 @@ class TestOptim(TestCase):
|
||||
|
||||
@skipIfRocm
|
||||
def test_rprop(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Rprop(params, lr=1e-3),
|
||||
wrap_old_fn(old_optim.rprop, stepsize=1e-3)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Rprop(params, lr=1e-3, etas=(0.6, 1.1)),
|
||||
wrap_old_fn(old_optim.rprop, stepsize=1e-3, etaminus=0.6, etaplus=1.1)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Rprop(params, lr=1e-3, step_sizes=(1e-4, 3)),
|
||||
wrap_old_fn(old_optim.rprop, stepsize=1e-3, stepsizemin=1e-4, stepsizemax=3)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
|
||||
)
|
||||
@ -464,14 +336,6 @@ class TestOptim(TestCase):
|
||||
optim.Rprop(None, lr=1e-2, etas=(1.0, 0.5))
|
||||
|
||||
def test_lbfgs(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.LBFGS(params),
|
||||
wrap_old_fn(old_optim.lbfgs)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
|
||||
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.LBFGS([weight, bias]),
|
||||
ignore_multidevice=True
|
||||
|
@ -441,98 +441,6 @@ class TestFFI(TestCase):
|
||||
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
|
||||
|
||||
|
||||
class TestLuaReader(TestCase):
|
||||
|
||||
@staticmethod
|
||||
def _module_test(name, test):
|
||||
def do_test(self):
|
||||
module = test['module']
|
||||
input = test['input']
|
||||
grad_output = test['grad_output']
|
||||
if hasattr(self, '_transform_' + name):
|
||||
input = getattr(self, '_transform_' + name)(input)
|
||||
output = module.forward(input)
|
||||
module.zeroGradParameters()
|
||||
grad_input = module.backward(input, grad_output)
|
||||
self.assertEqual(output, test['output'])
|
||||
self.assertEqual(grad_input, test['grad_input'])
|
||||
if module.parameters() is not None:
|
||||
params, d_params = module.parameters()
|
||||
self.assertEqual(params, test['params'])
|
||||
self.assertEqual(d_params, test['d_params'])
|
||||
else:
|
||||
self.assertFalse('params' in test and test['params'])
|
||||
self.assertFalse('params' in test and test['d_params'])
|
||||
return do_test
|
||||
|
||||
@staticmethod
|
||||
def _criterion_test(name, test):
|
||||
def do_test(self):
|
||||
module = test['module']
|
||||
input = test['input']
|
||||
if name == 'L1Cost':
|
||||
target = None
|
||||
else:
|
||||
target = test['target']
|
||||
if hasattr(self, '_transform_' + name):
|
||||
input, target = getattr(self, '_transform_' + name)(input, target)
|
||||
|
||||
output = module.forward(input, target)
|
||||
grad_input = module.backward(input, target)
|
||||
self.assertEqual(output, test['loss'])
|
||||
self.assertEqual(grad_input, test['grad_input'])
|
||||
return do_test
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
try:
|
||||
path = download_file('https://download.pytorch.org/test_data/legacy_modules.t7')
|
||||
except unittest.SkipTest:
|
||||
return
|
||||
long_size = 8 if sys.platform == 'win32' else None
|
||||
tests = load_lua(path, long_size=long_size)
|
||||
for name, test in tests['modules'].items():
|
||||
if name == "HardShrink":
|
||||
continue
|
||||
test_name = 'test_' + name.replace('nn.', '')
|
||||
setattr(cls, test_name, cls._module_test(name, test))
|
||||
for name, test in tests['criterions'].items():
|
||||
if name == "HardShrink":
|
||||
continue
|
||||
test_name = 'test_' + name.replace('nn.', '')
|
||||
setattr(cls, test_name, cls._criterion_test(name, test))
|
||||
|
||||
def _transform_Index(self, input):
|
||||
return [input[0], input[1].sub(1)]
|
||||
|
||||
def _transform_LookupTable(self, input):
|
||||
return input.sub(1)
|
||||
|
||||
def _transform_MultiLabelMarginCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_ClassNLLCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_SpatialClassNLLCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_ClassSimplexCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_CrossEntropyCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_ParallelCriterion(self, input, target):
|
||||
return input, [target[0].sub(1), target[1]]
|
||||
|
||||
def _transform_MultiCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
def _transform_MultiMarginCriterion(self, input, target):
|
||||
return input, target.sub(1)
|
||||
|
||||
|
||||
@unittest.skipIf('SKIP_TEST_BOTTLENECK' in os.environ.keys(), 'SKIP_TEST_BOTTLENECK is set')
|
||||
class TestBottleneck(TestCase):
|
||||
def _run(self, command):
|
||||
@ -700,6 +608,4 @@ class TestONNXUtils(TestCase):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from torch.utils.serialization import load_lua
|
||||
TestLuaReader.init()
|
||||
run_tests()
|
||||
|
1
torch/legacy/README.md
Normal file
1
torch/legacy/README.md
Normal file
@ -0,0 +1 @@
|
||||
If you're looking for this legacy code please consider versions of PyTorch before 0.5
|
@ -1,7 +0,0 @@
|
||||
"""Package containing code ported from Lua torch.
|
||||
|
||||
To make it possible to work with existing models and ease the transition
|
||||
for current Lua torch users, we've created this package. You can find the
|
||||
``nn`` code in ``torch.legacy.nn``, and ``optim`` in ``torch.legacy.optim``.
|
||||
The APIs should exactly match Lua torch.
|
||||
"""
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Abs(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Abs, self).__init__()
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.Abs_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.Abs_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput
|
||||
)
|
||||
return self.gradInput
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class AbsCriterion(Criterion):
|
||||
|
||||
def __init__(self, sizeAverage=True):
|
||||
super(AbsCriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.output_tensor = torch.Tensor(1)
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.AbsCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.AbsCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,57 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Add(Module):
|
||||
|
||||
def __init__(self, inputSize, scalar=False):
|
||||
super(Add, self).__init__()
|
||||
size = inputSize
|
||||
if scalar:
|
||||
assert size == 1
|
||||
self.scalar = scalar
|
||||
self.bias = torch.Tensor(size)
|
||||
self.gradBias = torch.Tensor(size)
|
||||
|
||||
self._ones = torch.Tensor((1,))
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.bias.size(0))
|
||||
|
||||
self.bias.uniform_(-stdv, stdv)
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
if self.scalar:
|
||||
self.output.add_(self.bias[0])
|
||||
else:
|
||||
batchSize = input.size(0)
|
||||
if self._ones.size(0) != batchSize:
|
||||
self._ones.resize_(batchSize).fill_(1)
|
||||
|
||||
bias = self.bias.view(-1)
|
||||
output = self.output.view(batchSize, -1)
|
||||
output.addr_(self._ones, bias)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is not None:
|
||||
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
if self.gradBias.size(0) == 1:
|
||||
self.gradBias[0] = self.gradBias[0] + scale * gradOutput.sum()
|
||||
else:
|
||||
if input.is_same_size(self.bias):
|
||||
self.gradBias.add_(scale, gradOutput)
|
||||
else:
|
||||
gradOutput = gradOutput.contiguous().view(input.size(0), -1)
|
||||
self.gradBias.view(-1).addmv_(scale, gradOutput.t(), self._ones)
|
@ -1,32 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class AddConstant(Module):
|
||||
|
||||
def __init__(self, constant_scalar, inplace=False):
|
||||
super(AddConstant, self).__init__()
|
||||
self.constant_scalar = constant_scalar
|
||||
self.inplace = inplace
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.inplace:
|
||||
input.add_(self.constant_scalar)
|
||||
self.output.set_(input)
|
||||
else:
|
||||
self.output.resize_as_(input)
|
||||
self.output.copy_(input)
|
||||
self.output.add_(self.constant_scalar)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.inplace:
|
||||
self.gradInput.set_(gradOutput)
|
||||
# restore previous input value
|
||||
input.add_(-self.constant_scalar)
|
||||
else:
|
||||
self.gradInput.resize_as_(gradOutput)
|
||||
self.gradInput.copy_(gradOutput)
|
||||
|
||||
return self.gradInput
|
@ -1,95 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
# TODO: use THNN
|
||||
|
||||
|
||||
class BCECriterion(Criterion):
|
||||
eps = 1e-12
|
||||
|
||||
def __init__(self, weights=None, sizeAverage=True):
|
||||
if weights is not None and weights.dim() != 1:
|
||||
raise ValueError("weights input should be 1D Tensor")
|
||||
|
||||
super(BCECriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.buffer = None
|
||||
self.weights = weights
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
# - log(input) * target - log(1 - input) * (1 - target)
|
||||
if input.nelement() != target.nelement():
|
||||
raise RuntimeError("input and target size mismatch")
|
||||
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
|
||||
buffer = self.buffer
|
||||
weights = self.weights
|
||||
|
||||
buffer.resize_as_(input)
|
||||
|
||||
if weights is not None and target.dim() != 1:
|
||||
weights = self.weights.view(1, target.size(1)).expand_as(target)
|
||||
|
||||
# log(input) * target
|
||||
torch.add(input, self.eps, out=buffer).log_()
|
||||
if weights is not None:
|
||||
buffer.mul_(weights)
|
||||
|
||||
target_1d = target.contiguous().view(-1)
|
||||
# don't save a 1-d view of buffer: it should already be contiguous, and it's
|
||||
# used as non-1d tensor later.
|
||||
output = torch.dot(target_1d, buffer.contiguous().view(-1))
|
||||
|
||||
# log(1 - input) * (1 - target)
|
||||
torch.mul(input, -1, out=buffer).add_(1 + self.eps).log_()
|
||||
if weights is not None:
|
||||
buffer.mul_(weights)
|
||||
|
||||
output = output + torch.sum(buffer)
|
||||
output = output - torch.dot(target_1d, buffer.contiguous().view(-1))
|
||||
|
||||
if self.sizeAverage:
|
||||
output = output / input.nelement()
|
||||
|
||||
self.output = - output.item()
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
# - (target - input) / ( input (1 - input) )
|
||||
# The gradient is slightly incorrect:
|
||||
# It should have be divided by (input + self.eps) (1 - input + self.eps)
|
||||
# but it is divided by input (1 - input + self.eps) + self.eps
|
||||
# This modification requires less memory to be computed.
|
||||
if input.nelement() != target.nelement():
|
||||
raise RuntimeError("input and target size mismatch")
|
||||
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
|
||||
buffer = self.buffer
|
||||
weights = self.weights
|
||||
gradInput = self.gradInput
|
||||
|
||||
if weights is not None and target.dim() != 1:
|
||||
weights = self.weights.view(1, target.size(1)).expand_as(target)
|
||||
|
||||
buffer.resize_as_(input)
|
||||
# - x ( 1 + self.eps -x ) + self.eps
|
||||
torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps)
|
||||
|
||||
gradInput.resize_as_(input)
|
||||
# y - x
|
||||
torch.add(target, -1, input, out=gradInput)
|
||||
# - (y - x) / ( x ( 1 + self.eps -x ) + self.eps )
|
||||
gradInput.div_(buffer)
|
||||
|
||||
if weights is not None:
|
||||
gradInput.mul_(weights)
|
||||
|
||||
if self.sizeAverage:
|
||||
gradInput.div_(target.nelement())
|
||||
|
||||
return gradInput
|
@ -1,192 +0,0 @@
|
||||
"""
|
||||
This file implements Batch Normalization as described in the paper:
|
||||
"Batch Normalization: Accelerating Deep Network Training
|
||||
by Reducing Internal Covariate Shift"
|
||||
by Sergey Ioffe, Christian Szegedy
|
||||
|
||||
This implementation is useful for inputs NOT coming from convolution layers.
|
||||
For convolution layers, use nn.SpatialBatchNormalization.
|
||||
|
||||
The operation implemented is:
|
||||
y = ( x - mean(x) )
|
||||
########## * gamma + beta
|
||||
standard-deviation(x)
|
||||
where gamma and beta are learnable parameters.
|
||||
|
||||
The learning of gamma and beta is optional.
|
||||
|
||||
Usage:
|
||||
with learnable parameters: nn.BatchNormalization(N [, eps] [, momentum])
|
||||
where N = dimensionality of input
|
||||
without learnable parameters: nn.BatchNormalization(N [, eps] [, momentum], False)
|
||||
|
||||
eps is a small value added to the standard-deviation to avoid divide-by-zero.
|
||||
Defaults to 1e-5
|
||||
|
||||
In training time, this layer keeps a running estimate of it's computed mean and std.
|
||||
The running sum is kept with a default momentum of 0.1 (unless over-ridden)
|
||||
In test time, this running mean/std is used to normalize.
|
||||
"""
|
||||
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class BatchNormalization(Module):
|
||||
# expected dimension of input
|
||||
nDim = 2
|
||||
|
||||
def __init__(self, nOutput, eps=1e-5, momentum=0.1, affine=True):
|
||||
super(BatchNormalization, self).__init__()
|
||||
assert nOutput != 0
|
||||
|
||||
self.affine = affine
|
||||
self.eps = eps
|
||||
self.train = True
|
||||
self.momentum = momentum
|
||||
self.running_mean = torch.zeros(nOutput)
|
||||
self.running_var = torch.ones(nOutput)
|
||||
|
||||
self.save_mean = None
|
||||
self.save_std = None
|
||||
self._input = None
|
||||
self._gradOutput = None
|
||||
|
||||
if self.affine:
|
||||
self.weight = torch.Tensor(nOutput)
|
||||
self.bias = torch.Tensor(nOutput)
|
||||
self.gradWeight = torch.Tensor(nOutput)
|
||||
self.gradBias = torch.Tensor(nOutput)
|
||||
self.reset()
|
||||
else:
|
||||
self.weight = None
|
||||
self.bias = None
|
||||
self.gradWeight = None
|
||||
self.gradBias = None
|
||||
|
||||
def reset(self):
|
||||
if self.weight is not None:
|
||||
self.weight.uniform_()
|
||||
|
||||
if self.bias is not None:
|
||||
self.bias.zero_()
|
||||
|
||||
self.running_mean.zero_()
|
||||
self.running_var.fill_(1)
|
||||
|
||||
def _checkInputDim(self, input):
|
||||
if input.dim() != self.nDim:
|
||||
raise RuntimeError(
|
||||
'only mini-batch supported ({}D tensor), got {}D tensor instead'.format(self.nDim, input.dim()))
|
||||
if input.size(1) != self.running_mean.nelement():
|
||||
raise RuntimeError('got {}-feature tensor, expected {}'.format(input.size(1), self.running_mean.nelement()))
|
||||
|
||||
def _makeContiguous(self, input, gradOutput=None):
|
||||
if not input.is_contiguous():
|
||||
if self._input is None:
|
||||
self._input = input.new()
|
||||
self._input.resize_as_(input).copy_(input)
|
||||
input = self._input
|
||||
|
||||
if gradOutput is not None:
|
||||
if not gradOutput.is_contiguous():
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = gradOutput.new()
|
||||
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
gradOutput = self._gradOutput
|
||||
|
||||
return input, gradOutput
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._checkInputDim(input)
|
||||
|
||||
input = self._makeContiguous(input)[0]
|
||||
|
||||
self.output.resize_as_(input)
|
||||
if self.save_mean is None:
|
||||
self.save_mean = input.new()
|
||||
self.save_mean.resize_as_(self.running_mean)
|
||||
if self.save_std is None:
|
||||
self.save_std = input.new()
|
||||
self.save_std.resize_as_(self.running_var)
|
||||
|
||||
self._backend.BatchNormalization_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.weight,
|
||||
self.bias,
|
||||
self.running_mean,
|
||||
self.running_var,
|
||||
self.save_mean,
|
||||
self.save_std,
|
||||
self.train,
|
||||
self.momentum,
|
||||
self.eps
|
||||
)
|
||||
|
||||
return self.output
|
||||
|
||||
def _backward(self, input, gradOutput, scale, gradInput=None, gradWeight=None, gradBias=None):
|
||||
self._checkInputDim(input)
|
||||
self._checkInputDim(gradOutput)
|
||||
if not hasattr(self, 'save_mean') or not hasattr(self, 'save_std'):
|
||||
raise RuntimeError('you have to call updateOutput() at least once before backward()')
|
||||
|
||||
input, gradOutput = self._makeContiguous(input, gradOutput)
|
||||
|
||||
scale = scale or 1.
|
||||
if gradInput is not None:
|
||||
gradInput.resize_as_(gradOutput)
|
||||
|
||||
self._backend.BatchNormalization_backward(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
gradInput,
|
||||
gradWeight,
|
||||
gradBias,
|
||||
self.weight,
|
||||
self.running_mean,
|
||||
self.running_var,
|
||||
self.save_mean,
|
||||
self.save_std,
|
||||
self.train,
|
||||
scale,
|
||||
self.eps
|
||||
)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def backward(self, input, gradOutput, scale=1.):
|
||||
return self._backward(input, gradOutput, scale, self.gradInput, self.gradWeight, self.gradBias)
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
return self._backward(input, gradOutput, 1., self.gradInput)
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1.):
|
||||
return self._backward(input, gradOutput, scale, None, self.gradWeight, self.gradBias)
|
||||
|
||||
def read(self, file, version):
|
||||
super(BatchNormalization, self).read(self, file)
|
||||
if version < 2:
|
||||
if self.running_std:
|
||||
self.running_var = self.running_std.pow_(-2).add_(-self.eps)
|
||||
self.running_std = None
|
||||
|
||||
def clearState(self):
|
||||
# first 5 buffers are not present in the current implementation,
|
||||
# but we keep them for cleaning old saved models
|
||||
clear(self, [
|
||||
'buffer',
|
||||
'buffer2',
|
||||
'centered',
|
||||
'std',
|
||||
'normalized',
|
||||
'_input',
|
||||
'_gradOutput',
|
||||
'save_mean',
|
||||
'save_std',
|
||||
])
|
||||
return super(BatchNormalization, self).clearState()
|
@ -1,137 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Bilinear(Module):
|
||||
|
||||
def _assertInput(self, input):
|
||||
if len(input) != 2 or not isinstance(input[0], torch.Tensor) or not isinstance(input[1], torch.Tensor):
|
||||
raise RuntimeError('input should be a table containing two data Tensors')
|
||||
if input[0].ndimension() != 2 or input[1].ndimension() != 2:
|
||||
raise RuntimeError('input Tensors should be two-dimensional')
|
||||
if input[0].size(0) != input[1].size(0):
|
||||
raise RuntimeError('input Tensors should have the same number of rows')
|
||||
if input[0].size(1) != self.weight.size(1):
|
||||
raise RuntimeError('dimensionality of first input is erroneous')
|
||||
if input[1].size(1) != self.weight.size(2):
|
||||
raise RuntimeError('dimensionality of second input is erroneous')
|
||||
|
||||
def _assertInputGradOutput(self, input, gradOutput):
|
||||
if input[0].size(0) != gradOutput.size(0):
|
||||
raise RuntimeError('number of rows in gradOutput.es not match input')
|
||||
if gradOutput.size(1) != self.weight.size(0):
|
||||
raise RuntimeError('number of columns in gradOutput does not match layer\'s output size')
|
||||
|
||||
def __init__(self, inputSize1, inputSize2, outputSize, bias=True):
|
||||
# set up model:
|
||||
super(Bilinear, self).__init__()
|
||||
self.weight = torch.Tensor(outputSize, inputSize1, inputSize2)
|
||||
self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2)
|
||||
if bias:
|
||||
self.bias = torch.Tensor(outputSize)
|
||||
self.gradBias = torch.Tensor(outputSize)
|
||||
else:
|
||||
self.bias = None
|
||||
self.gradBias = None
|
||||
|
||||
self.buff1 = None
|
||||
self.buff2 = None
|
||||
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
self.reset()
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.size(1))
|
||||
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
if self.bias is not None:
|
||||
self.bias.uniform_(-stdv, stdv)
|
||||
return self
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._assertInput(input)
|
||||
|
||||
# set up buffer:
|
||||
if self.buff2 is None:
|
||||
self.buff2 = input[0].new()
|
||||
self.buff2.resize_as_(input[1])
|
||||
|
||||
# compute output scores:
|
||||
self.output.resize_(input[0].size(0), self.weight.size(0))
|
||||
for k in range(self.weight.size(0)):
|
||||
torch.mm(input[0], self.weight[k], out=self.buff2)
|
||||
self.buff2.mul_(input[1])
|
||||
torch.sum(self.buff2, 1, True, out=self.output.narrow(1, k, 1))
|
||||
|
||||
if self.bias is not None:
|
||||
self.output.add_(self.bias.view(1, self.bias.nelement()).expand_as(self.output))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
self._assertInputGradOutput(input, gradOutput)
|
||||
# compute d output / d input:
|
||||
self.gradInput[0].resize_as_(input[0]).fill_(0)
|
||||
self.gradInput[1].resize_as_(input[1]).fill_(0)
|
||||
|
||||
#: first slice of weight tensor (k = 1)
|
||||
self.gradInput[0].addmm_(input[1], self.weight[0].t())
|
||||
self.gradInput[0].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[0].size(0),
|
||||
self.gradInput[0].size(1)))
|
||||
self.gradInput[1].addmm_(input[0], self.weight[0])
|
||||
self.gradInput[1].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[1].size(0),
|
||||
self.gradInput[1].size(1)))
|
||||
|
||||
#: remaining slices of weight tensor
|
||||
if self.weight.size(0) > 1:
|
||||
if self.buff1 is None:
|
||||
self.buff1 = input[0].new()
|
||||
self.buff1.resize_as_(input[0])
|
||||
|
||||
for k in range(1, self.weight.size(0)):
|
||||
torch.mm(input[1], self.weight[k].t(), out=self.buff1)
|
||||
self.buff1.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[0].size(0),
|
||||
self.gradInput[0].size(1)))
|
||||
self.gradInput[0].add_(self.buff1)
|
||||
|
||||
torch.mm(input[0], self.weight[k], out=self.buff2)
|
||||
self.buff2.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[1].size(0),
|
||||
self.gradInput[1].size(1)))
|
||||
self.gradInput[1].add_(self.buff2)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
self._assertInputGradOutput(input, gradOutput)
|
||||
|
||||
# make sure we have buffer:
|
||||
if self.buff1 is None:
|
||||
self.buff1 = input[0].new()
|
||||
self.buff1.resize_as_(input[0])
|
||||
|
||||
# accumulate parameter gradients:
|
||||
for k in range(self.weight.size(0)):
|
||||
torch.mul(input[0], gradOutput.narrow(1, k, 1).expand_as(input[0]), out=self.buff1)
|
||||
self.gradWeight[k].addmm_(self.buff1.t(), input[1])
|
||||
|
||||
if self.bias is not None:
|
||||
self.gradBias.add_(scale, gradOutput.sum(0, keepdim=False))
|
||||
|
||||
def __repr__(self):
|
||||
return str(type(self)) + \
|
||||
'({}x{} -> {}) {}'.format(
|
||||
self.weight.size(1), self.weight.size(2), self.weight.size(0),
|
||||
(' without bias' if self.bias is None else '')
|
||||
)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'buff1', 'buff2')
|
||||
return super(Bilinear, self).clearState()
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class CAddTable(Module):
|
||||
|
||||
def __init__(self, inplace=False):
|
||||
super(CAddTable, self).__init__()
|
||||
self.inplace = inplace
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.inplace:
|
||||
self.output.set_(input[0])
|
||||
else:
|
||||
self.output.resize_as_(input[0]).copy_(input[0])
|
||||
|
||||
for i in range(1, len(input)):
|
||||
self.output.add_(input[i])
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
for i in range(len(input)):
|
||||
if i >= len(self.gradInput):
|
||||
assert i == len(self.gradInput)
|
||||
self.gradInput.append(input[0].new())
|
||||
|
||||
if self.inplace:
|
||||
self.gradInput[i].set_(gradOutput)
|
||||
else:
|
||||
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
|
||||
|
||||
del self.gradInput[len(input):]
|
||||
|
||||
return self.gradInput
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class CDivTable(Module):
|
||||
|
||||
def __init__(self, ):
|
||||
super(CDivTable, self).__init__()
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input[0]).copy_(input[0])
|
||||
self.output.div_(input[1])
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
while len(self.gradInput) < 2:
|
||||
self.gradInput.append(input[0].new())
|
||||
gradOutput = gradOutput.contiguous().view_as(input[0])
|
||||
self.gradInput[0].resize_as_(input[0]).copy_(gradOutput).div_(input[1])
|
||||
self.gradInput[1].resize_as_(input[1]).zero_().addcdiv_(-1, self.gradInput[0], input[1]).mul_(input[0])
|
||||
|
||||
del self.gradInput[len(input):]
|
||||
|
||||
return self.gradInput
|
@ -1,117 +0,0 @@
|
||||
import math
|
||||
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear, contiguousView
|
||||
|
||||
|
||||
class CMul(Module):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(CMul, self).__init__()
|
||||
|
||||
if len(args) == 1 and isinstance(args[0], torch.Size):
|
||||
self.size = args[0]
|
||||
else:
|
||||
self.size = torch.Size(args)
|
||||
|
||||
self.weight = torch.Tensor(self.size)
|
||||
self.gradWeight = torch.Tensor(self.size)
|
||||
self.output.resize_(self.size)
|
||||
self.reset()
|
||||
|
||||
self._output = None
|
||||
self._weight = None
|
||||
self._expand = None
|
||||
self._repeat = None
|
||||
self._gradOutput = None
|
||||
self._gradInput = None
|
||||
self._input = None
|
||||
self._gradWeight = None
|
||||
self._sum = None
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.nelement())
|
||||
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
|
||||
def updateOutput(self, input):
|
||||
# lazy-initialize
|
||||
if self._output is None:
|
||||
self._output = input.new()
|
||||
self._weight = input.new()
|
||||
self._expand = input.new()
|
||||
self._repeat = input.new()
|
||||
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
batchSize = input.size(0)
|
||||
# TODO: expand_as_, view_
|
||||
self._output = self.output.view(batchSize, -1)
|
||||
self._weight = self.weight.view(1, -1)
|
||||
self._expand = self._weight.expand_as(self._output)
|
||||
|
||||
if torch.typename(input) == 'torch.cuda.FloatTensor':
|
||||
self._repeat.resize_as_(self._expand).copy_(self._expand)
|
||||
self._output.mul_(self._repeat)
|
||||
else:
|
||||
self._output.mul_(self._expand)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = input.new()
|
||||
self._gradInput = input.new()
|
||||
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
batchSize = input.size(0)
|
||||
contiguousView(self._gradOutput, gradOutput, batchSize, -1)
|
||||
contiguousView(self._gradInput, self.gradInput, batchSize, -1)
|
||||
self._weight = self.weight.view(1, -1)
|
||||
self._expand = self._weight.expand_as(self._gradOutput)
|
||||
|
||||
if torch.typename(input) == 'torch.cuda.FloatTensor':
|
||||
self._repeat.resize_as_(self._expand).copy_(self._expand)
|
||||
self._gradInput.addcmul_(1, self._repeat, self._gradOutput)
|
||||
else:
|
||||
self._gradInput.addcmul_(1, self._expand, self._gradOutput)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
if self._input is None:
|
||||
self._input = input.new()
|
||||
self._gradWeight = input.new()
|
||||
self._sum = input.new()
|
||||
|
||||
batchSize = input.size(0)
|
||||
contiguousView(self._input, input, batchSize, -1)
|
||||
contiguousView(self._gradOutput, gradOutput, batchSize, -1)
|
||||
self._gradWeight = self.gradWeight.view(1, -1)
|
||||
|
||||
torch.mul(self._input, self._gradOutput, out=self._repeat)
|
||||
torch.sum(self._repeat, 0, True, out=self._sum)
|
||||
self._gradWeight.add_(scale, self._sum)
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type:
|
||||
self.clearState()
|
||||
return super(CMul, self).type(type, tensorCache)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, [
|
||||
'_input',
|
||||
'_output',
|
||||
'_weight',
|
||||
'_gradWeight',
|
||||
'_expand',
|
||||
'_repeat',
|
||||
'_sum',
|
||||
])
|
||||
return super(CMul, self).clearState()
|
@ -1,49 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class CMulTable(Module):
|
||||
|
||||
def __init__(self, ):
|
||||
super(CMulTable, self).__init__()
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input[0]).copy_(input[0])
|
||||
for i in range(1, len(input)):
|
||||
self.output.mul_(input[i])
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput_efficient(self, input, gradOutput):
|
||||
if self.tout is None:
|
||||
self.tout = input[0].new()
|
||||
self.tout.resize_as_(self.output)
|
||||
for i in range(len(input)):
|
||||
if len(self.gradInput) <= i:
|
||||
assert i == len(self.gradInput)
|
||||
self.gradInput.append(input[0].new())
|
||||
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
|
||||
self.tout.copy_(self.output).div_(input[i])
|
||||
self.gradInput[i].mul_(self.tout)
|
||||
|
||||
self.gradInput = self.gradInput[:len(input)]
|
||||
return self.gradInput
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
for i in range(len(input)):
|
||||
if len(self.gradInput) <= i:
|
||||
assert i == len(self.gradInput)
|
||||
self.gradInput.append(input[0].new())
|
||||
self.gradInput[i].resize_as_(input[i]).copy_(gradOutput)
|
||||
for j in range(len(input)):
|
||||
if i != j:
|
||||
self.gradInput[i].mul_(input[j])
|
||||
|
||||
self.gradInput = self.gradInput[:len(input)]
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'tout')
|
||||
return super(CMulTable, self).clearState()
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class CSubTable(Module):
|
||||
|
||||
def __init__(self, ):
|
||||
super(CSubTable, self).__init__()
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input[0]).copy_(input[0])
|
||||
self.output.add_(-1, input[1])
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput[0] is None:
|
||||
self.gradInput[0] = input[0].new()
|
||||
if self.gradInput[1] is None:
|
||||
self.gradInput[1] = input[1].new()
|
||||
self.gradInput[0].resize_as_(input[0]).copy_(gradOutput)
|
||||
self.gradInput[1].resize_as_(input[1]).copy_(gradOutput).mul_(-1)
|
||||
|
||||
self.gradInput = self.gradInput[:2]
|
||||
return self.gradInput
|
@ -1,8 +0,0 @@
|
||||
import torch
|
||||
from .HardTanh import HardTanh
|
||||
|
||||
|
||||
class Clamp(HardTanh):
|
||||
|
||||
def __init__(self, min_value, max_value):
|
||||
super(Clamp, self,).__init__(min_value, max_value)
|
@ -1,53 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class ClassNLLCriterion(Criterion):
|
||||
|
||||
def __init__(self, weights=None, sizeAverage=True, ignore_index=-100):
|
||||
super(ClassNLLCriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
if weights is not None:
|
||||
assert weights.dim() == 1
|
||||
self.weights = weights
|
||||
|
||||
self.output_tensor = torch.zeros(1)
|
||||
self.total_weight_tensor = torch.ones(1)
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
self.ignore_index = getattr(self, "ignore_index", -100)
|
||||
target = target.long()
|
||||
self._backend.ClassNLLCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
self.weights,
|
||||
self.total_weight_tensor,
|
||||
self.ignore_index,
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
target = target.long()
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
|
||||
self._backend.ClassNLLCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
self.weights,
|
||||
self.total_weight_tensor,
|
||||
self.ignore_index,
|
||||
)
|
||||
|
||||
return self.gradInput
|
@ -1,108 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .MSECriterion import MSECriterion
|
||||
|
||||
"""
|
||||
This file implements a criterion for multi-class classification.
|
||||
It learns an embedding per class, where each class' embedding
|
||||
is a point on an (N-1)-dimensional simplex, where N is
|
||||
the number of classes.
|
||||
For example usage of this class, look at.c/criterion.md
|
||||
|
||||
Reference: http.//arxiv.org/abs/1506.08230
|
||||
"""
|
||||
|
||||
|
||||
class ClassSimplexCriterion(MSECriterion):
|
||||
|
||||
def __init__(self, nClasses):
|
||||
super(ClassSimplexCriterion, self).__init__()
|
||||
self.nClasses = nClasses
|
||||
|
||||
# embedding the simplex in a space of dimension strictly greater than
|
||||
# the minimum possible (nClasses-1) is critical for effective training.
|
||||
simp = self._regsplex(nClasses - 1)
|
||||
self.simplex = torch.cat((simp, torch.zeros(simp.size(0), nClasses - simp.size(1))), 1)
|
||||
self._target = torch.Tensor(nClasses)
|
||||
|
||||
self.output_tensor = None
|
||||
|
||||
def _regsplex(self, n):
|
||||
"""
|
||||
regsplex returns the coordinates of the vertices of a
|
||||
regular simplex centered at the origin.
|
||||
The Euclidean norms of the vectors specifying the vertices are
|
||||
all equal to 1. The input n is the dimension of the vectors;
|
||||
the simplex has n+1 vertices.
|
||||
|
||||
input:
|
||||
n # dimension of the vectors specifying the vertices of the simplex
|
||||
|
||||
output:
|
||||
a # tensor dimensioned (n+1, n) whose rows are
|
||||
vectors specifying the vertices
|
||||
|
||||
reference:
|
||||
http.//en.wikipedia.org/wiki/Simplex#Cartesian_coordinates_for_regular_n-dimensional_simplex_in_Rn
|
||||
"""
|
||||
a = torch.zeros(n + 1, n)
|
||||
|
||||
for k in range(n):
|
||||
# determine the last nonzero entry in the vector for the k-th vertex
|
||||
if k == 0:
|
||||
a[k][k] = 1
|
||||
else:
|
||||
a[k][k] = math.sqrt(1 - a[k:k + 1, 0:k + 1].norm() ** 2)
|
||||
|
||||
# fill_ the k-th coordinates for the vectors of the remaining vertices
|
||||
c = (a[k][k] ** 2 - 1 - 1 / n) / a[k][k]
|
||||
a[k + 1:n + 2, k:k + 1].fill_(c)
|
||||
|
||||
return a
|
||||
|
||||
# handle target being both 1D tensor, and
|
||||
# target being 2D tensor (2D tensor means.nt: anything)
|
||||
def _transformTarget(self, target):
|
||||
assert target.dim() == 1
|
||||
nSamples = target.size(0)
|
||||
self._target.resize_(nSamples, self.nClasses)
|
||||
for i in range(nSamples):
|
||||
self._target[i].copy_(self.simplex[int(target[i])])
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
self._transformTarget(target)
|
||||
|
||||
assert input.nelement() == self._target.nelement()
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.MSECriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self._target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
assert input.nelement() == self._target.nelement()
|
||||
implicit_gradOutput = torch.Tensor([1]).type(input.type())
|
||||
self._backend.MSECriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self._target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def getPredictions(self, input):
|
||||
return torch.mm(input, self.simplex.t())
|
||||
|
||||
def getTopPrediction(self, input):
|
||||
prod = self.getPredictions(input)
|
||||
_, maxs = prod.max(prod.ndimension() - 1)
|
||||
return maxs.view(-1)
|
@ -1,106 +0,0 @@
|
||||
import torch
|
||||
from .Container import Container
|
||||
|
||||
|
||||
class Concat(Container):
|
||||
|
||||
def __init__(self, dimension):
|
||||
super(Concat, self).__init__()
|
||||
self.outputSize = torch.Size()
|
||||
self.dimension = dimension
|
||||
|
||||
def updateOutput(self, input):
|
||||
outs = []
|
||||
for i in range(len(self.modules)):
|
||||
currentOutput = self.modules[i].updateOutput(input)
|
||||
outs.append(currentOutput)
|
||||
if i == 0:
|
||||
size = list(currentOutput.size())
|
||||
else:
|
||||
size[self.dimension] += currentOutput.size(self.dimension)
|
||||
self.outputSize = torch.Size(size)
|
||||
self.output.resize_(self.outputSize)
|
||||
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = outs[i]
|
||||
self.output.narrow(self.dimension, offset, currentOutput.size(self.dimension)).copy_(currentOutput)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input)
|
||||
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
currentGradInput = module.updateGradInput(input, gradOutput.narrow(
|
||||
self.dimension, offset, currentOutput.size(self.dimension)))
|
||||
|
||||
# if the module does not produce a gradInput (for example first layer),: ignore it and move on.
|
||||
if currentGradInput:
|
||||
if i == 0:
|
||||
self.gradInput.copy_(currentGradInput)
|
||||
else:
|
||||
self.gradInput.add_(currentGradInput)
|
||||
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
module.accGradParameters(
|
||||
input,
|
||||
gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)),
|
||||
scale)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
def backward(self, input, gradOutput, scale=1):
|
||||
self.gradInput.resize_as_(input)
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
currentGradInput = module.backward(input, gradOutput.narrow(
|
||||
self.dimension, offset, currentOutput.size(self.dimension)), scale)
|
||||
# if the module.es not produce a gradInput (for example first layer),: ignore it and move on.
|
||||
if currentGradInput is not None:
|
||||
if i == 0:
|
||||
self.gradInput.copy_(currentGradInput)
|
||||
else:
|
||||
self.gradInput.add_(currentGradInput)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
module.accUpdateGradParameters(
|
||||
input,
|
||||
gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)),
|
||||
lr)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
def __tostring__(self):
|
||||
tab = ' '
|
||||
line = '\n'
|
||||
next = ' |`-> '
|
||||
ext = ' | '
|
||||
extlast = ' '
|
||||
last = ' +. -> '
|
||||
res = torch.type(self)
|
||||
res += ' {' + line + tab + 'input'
|
||||
for i in range(len(self.modules)):
|
||||
if i == len(self.modules) - 1:
|
||||
res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + extlast)
|
||||
else:
|
||||
res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + ext)
|
||||
|
||||
res += line + tab + last + 'output'
|
||||
res += line + '}'
|
||||
return res
|
@ -1,112 +0,0 @@
|
||||
import torch
|
||||
from .Container import Container
|
||||
|
||||
|
||||
class ConcatTable(Container):
|
||||
|
||||
def __init__(self, ):
|
||||
super(ConcatTable, self).__init__()
|
||||
self.modules = []
|
||||
self.output = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output = [module.updateOutput(input) for module in self.modules]
|
||||
return self.output
|
||||
|
||||
def _map_list(self, l1, l2, f):
|
||||
for i, v in enumerate(l2):
|
||||
if isinstance(v, list):
|
||||
res = self._map_list(l1[i] if i < len(l1) else [], v, f)
|
||||
if i >= len(l1):
|
||||
assert i == len(l1)
|
||||
l1.append(res)
|
||||
else:
|
||||
l1[i] = res
|
||||
else:
|
||||
f(l1, i, v)
|
||||
for i in range(len(l1) - 1, len(l2) - 1, -1):
|
||||
del l1[i]
|
||||
return l1
|
||||
|
||||
def _backward(self, method, input, gradOutput, scale=1):
|
||||
isTable = isinstance(input, list)
|
||||
wasTable = isinstance(self.gradInput, list)
|
||||
if isTable:
|
||||
for i, module in enumerate(self.modules):
|
||||
if method == 'updateGradInput':
|
||||
currentGradInput = module.updateGradInput(input, gradOutput[i])
|
||||
elif method == 'backward':
|
||||
currentGradInput = module.backward(input, gradOutput[i], scale)
|
||||
if not isinstance(currentGradInput, list):
|
||||
raise RuntimeError("currentGradInput is not a table!")
|
||||
|
||||
if len(input) != len(currentGradInput):
|
||||
raise RuntimeError("table size mismatch")
|
||||
|
||||
if i == 0:
|
||||
self.gradInput = self.gradInput if wasTable else []
|
||||
|
||||
def fn(l, i, v):
|
||||
if i >= len(l):
|
||||
assert len(l) == i
|
||||
l.append(v.clone())
|
||||
else:
|
||||
l[i].resize_as_(v)
|
||||
l[i].copy_(v)
|
||||
self._map_list(self.gradInput, currentGradInput, fn)
|
||||
else:
|
||||
def fn(l, i, v):
|
||||
if i < len(l):
|
||||
l[i].add_(v)
|
||||
else:
|
||||
assert len(l) == i
|
||||
l.append(v.clone())
|
||||
self._map_list(self.gradInput, currentGradInput, fn)
|
||||
else:
|
||||
self.gradInput = self.gradInput if not wasTable else input.clone()
|
||||
for i, module in enumerate(self.modules):
|
||||
if method == 'updateGradInput':
|
||||
currentGradInput = module.updateGradInput(input, gradOutput[i])
|
||||
elif method == 'backward':
|
||||
currentGradInput = module.backward(input, gradOutput[i], scale)
|
||||
if i == 0:
|
||||
self.gradInput.resize_as_(currentGradInput).copy_(currentGradInput)
|
||||
else:
|
||||
self.gradInput.add_(currentGradInput)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
return self._backward('updateGradInput', input, gradOutput)
|
||||
|
||||
def backward(self, input, gradOutput, scale=1):
|
||||
return self._backward('backward', input, gradOutput, scale)
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
for i, module in ipairs(self.modules):
|
||||
self.rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale)
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
for i, module in ipairs(self.modules):
|
||||
self.rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr)
|
||||
|
||||
def __repr__(self):
|
||||
tab = ' '
|
||||
line = '\n'
|
||||
next = ' |`-> '
|
||||
ext = ' | '
|
||||
extlast = ' '
|
||||
last = ' +. -> '
|
||||
res = torch.typename(self)
|
||||
res = res + ' {' + line + tab + 'input'
|
||||
for i in range(len(self.modules)):
|
||||
if i == len(self.modules) - 1:
|
||||
res = res + line + tab + next + '(' + str(i) + '): ' + \
|
||||
str(self.modules[i]).replace(line, line + tab + extlast)
|
||||
else:
|
||||
res = res + line + tab + next + '(' + str(i) + '): ' + \
|
||||
str(self.modules[i]).replace(line, line + tab + ext)
|
||||
|
||||
res = res + line + tab + last + 'output'
|
||||
res = res + line + '}'
|
||||
return res
|
@ -1,66 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
from functools import wraps
|
||||
import sys
|
||||
|
||||
|
||||
class Container(Module):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(Container, self).__init__(*args)
|
||||
self.modules = []
|
||||
|
||||
def add(self, module):
|
||||
self.modules.append(module)
|
||||
return self
|
||||
|
||||
def get(self, index):
|
||||
return self.modules[index]
|
||||
|
||||
def size(self):
|
||||
return len(self.modules)
|
||||
|
||||
def applyToModules(self, func):
|
||||
for module in self.modules:
|
||||
func(module)
|
||||
|
||||
def zeroGradParameters(self):
|
||||
self.applyToModules(lambda m: m.zeroGradParameters())
|
||||
|
||||
def updateParameters(self, learningRate):
|
||||
self.applyToModules(lambda m: m.updateParameters(learningRate))
|
||||
|
||||
def training(self):
|
||||
self.applyToModules(lambda m: m.training())
|
||||
super(Container, self).training()
|
||||
|
||||
def evaluate(self, ):
|
||||
self.applyToModules(lambda m: m.evaluate())
|
||||
super(Container, self).evaluate()
|
||||
|
||||
def share(self, mlp, *args):
|
||||
for module, other_module in zip(self.modules, mlp.modules):
|
||||
module.share(other_module, *args)
|
||||
|
||||
def reset(self, stdv=None):
|
||||
self.applyToModules(lambda m: m.reset(stdv))
|
||||
|
||||
def parameters(self):
|
||||
w = []
|
||||
gw = []
|
||||
for module in self.modules:
|
||||
mparam = module.parameters()
|
||||
if mparam is not None:
|
||||
w.extend(mparam[0])
|
||||
gw.extend(mparam[1])
|
||||
if not w:
|
||||
return
|
||||
return w, gw
|
||||
|
||||
def clearState(self):
|
||||
clear('output')
|
||||
clear('gradInput')
|
||||
for module in self.modules:
|
||||
module.clearState()
|
||||
return self
|
@ -1,21 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Contiguous(Module):
|
||||
|
||||
def updateOutput(self, input):
|
||||
if not input.is_contiguous():
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
else:
|
||||
self.output.set_(input)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if not gradOutput.is_contiguous():
|
||||
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
else:
|
||||
self.gradInput.set_(gradOutput)
|
||||
|
||||
return self.gradInput
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Copy(Module):
|
||||
|
||||
def __init__(self, intype, outtype, dontCast=False):
|
||||
self.dontCast = dontCast
|
||||
super(Copy, self).__init__()
|
||||
self.gradInput = intype()
|
||||
self.output = outtype()
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_(input.size()).copy_(input)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_(gradOutput.size()).copy_(gradOutput)
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type and self.dontCast:
|
||||
return self
|
||||
|
||||
return super(Copy, self).type(self, type, tensorCache)
|
@ -1,153 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Cosine(Module):
|
||||
|
||||
def __init__(self, inputSize, outputSize):
|
||||
super(Cosine, self).__init__()
|
||||
self.weight = torch.Tensor(outputSize, inputSize)
|
||||
self.gradWeight = torch.Tensor(outputSize, inputSize)
|
||||
self.reset()
|
||||
|
||||
self._weight = None
|
||||
self._sum = None
|
||||
self._gradOutput = None
|
||||
self._sum = None
|
||||
self._weightNorm = None
|
||||
self._inputNorm = None
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.size(0))
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert input.dim() == 2
|
||||
|
||||
inputSize = self.weight.size(1)
|
||||
outputSize = self.weight.size(0)
|
||||
|
||||
if self._weightNorm is None:
|
||||
self._weightNorm = self.weight.new()
|
||||
if self._inputNorm is None:
|
||||
self._inputNorm = self.weight.new()
|
||||
|
||||
# y_j = (w_j * x) / ( || w_j || * || x || )
|
||||
|
||||
torch.norm(self.weight, 2, 1, out=self._weightNorm, keepdim=True).add_(1e-12)
|
||||
|
||||
batchSize = input.size(0)
|
||||
nelement = self.output.nelement()
|
||||
self.output.resize_(batchSize, outputSize)
|
||||
if self.output.nelement() != nelement:
|
||||
self.output.zero_()
|
||||
|
||||
self.output.addmm_(0., 1., input, self.weight.t())
|
||||
|
||||
torch.norm(input, 2, 1, out=self._inputNorm, keepdim=True).add_(1e-12)
|
||||
self.output.div_(self._weightNorm.view(1, outputSize).expand_as(self.output))
|
||||
self.output.div_(self._inputNorm.expand_as(self.output))
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert input.dim() == 2
|
||||
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
inputSize = self.weight.size(1)
|
||||
outputSize = self.weight.size(0)
|
||||
|
||||
"""
|
||||
dy_j w_ji x_i
|
||||
---- = ------------------- - y_j ---------
|
||||
dx_i || w_j || * || x || || x ||^2
|
||||
"""
|
||||
|
||||
nelement = self.gradInput.nelement()
|
||||
self.gradInput.resize_as_(input)
|
||||
if self.gradInput.nelement() != nelement:
|
||||
self.gradInput.zero_()
|
||||
|
||||
inputNorm = self._inputNorm.expand_as(input)
|
||||
weightNorm = self._weightNorm.view(1, outputSize).expand_as(gradOutput)
|
||||
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = gradOutput.new()
|
||||
if self._sum is None:
|
||||
self._sum = input.new()
|
||||
|
||||
self.gradInput.copy_(input).div_(inputNorm)
|
||||
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
self._gradOutput.mul_(self.output)
|
||||
torch.sum(self._gradOutput, 1, out=self._sum, keepdim=True)
|
||||
self.gradInput.mul_(self._sum.expand_as(input))
|
||||
|
||||
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
self._gradOutput.div_(weightNorm)
|
||||
self.gradInput.addmm_(-1, 1, self._gradOutput, self.weight)
|
||||
self.gradInput.div_(inputNorm)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
assert input.dim() == 2
|
||||
inputSize = self.weight.size(1)
|
||||
outputSize = self.weight.size(0)
|
||||
|
||||
"""
|
||||
dy_j x_i w_ji
|
||||
----- = ------------------- - y_j -----------
|
||||
dw_ji || w_j || * || x || || w_j ||^2
|
||||
"""
|
||||
|
||||
if self._weight is None:
|
||||
self._weight = self.weight.new()
|
||||
if self._sum is None:
|
||||
self._sum = input.new()
|
||||
|
||||
self._weight.resize_as_(self.weight).copy_(self.weight)
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = gradOutput.new()
|
||||
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
self._gradOutput.mul_(self.output)
|
||||
torch.sum(self._gradOutput, 0, out=self._sum, keepdim=True)
|
||||
grad = self._sum[0]
|
||||
grad.div_(self._weightNorm.select(1, 0))
|
||||
self._weight.mul_(grad.view(outputSize, 1).expand_as(self._weight))
|
||||
|
||||
input_ = self._gradOutput
|
||||
input_.resize_as_(input).copy_(input)
|
||||
input_.div_(self._inputNorm.expand_as(input))
|
||||
self._weight.addmm_(-1, 1, gradOutput.t(), input_)
|
||||
|
||||
self._weight.div_(self._weightNorm.expand_as(self._weight))
|
||||
self.gradWeight.add_(self._weight)
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type is not None:
|
||||
# prevent premature memory allocations
|
||||
self._input = None
|
||||
self._weight = None
|
||||
self._inputNorm = None
|
||||
self._weightNorm = None
|
||||
self._gradOutput = None
|
||||
self._sum = None
|
||||
|
||||
return super(Cosine, self).type(type, tensorCache)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, [
|
||||
'_input',
|
||||
'_weight',
|
||||
'_gradOutput',
|
||||
'_sum',
|
||||
'_inputNorm',
|
||||
'_weightNorm',
|
||||
])
|
||||
return super(Cosine, self).clearState()
|
@ -1,108 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class CosineDistance(Module):
|
||||
|
||||
def __init__(self, ):
|
||||
super(CosineDistance, self).__init__()
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
self._input1 = None
|
||||
self._input2 = None
|
||||
self.buffer = None
|
||||
self.w1 = None
|
||||
self.w22 = None
|
||||
self.w = None
|
||||
self.w32 = None
|
||||
self.ones = None
|
||||
|
||||
def _makeContiguous(self, input1, input2):
|
||||
if not input1.is_contiguous():
|
||||
if self._input1 is None:
|
||||
self._input1 = input1.new()
|
||||
self._input1.resize_as_(input1).copy_(input1)
|
||||
input1 = self._input1
|
||||
|
||||
if not input2.is_contiguous():
|
||||
if self._input2 is None:
|
||||
self._input2 = input2.new()
|
||||
self._input2.resize_as_(input2).copy_(input2)
|
||||
input2 = self._input2
|
||||
|
||||
return input1, input2
|
||||
|
||||
def updateOutput(self, input):
|
||||
input1, input2 = input[0], input[1]
|
||||
input1, input2 = self._makeContiguous(input1, input2)
|
||||
|
||||
if self.buffer is None:
|
||||
self.buffer = input1.new()
|
||||
self.w1 = input1.new()
|
||||
self.w22 = input1.new()
|
||||
self.w = input1.new()
|
||||
self.w32 = input1.new()
|
||||
self.ones = input1.new()
|
||||
|
||||
torch.mul(input1, input2, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
|
||||
|
||||
epsilon = 1e-12
|
||||
torch.mul(input1, input1, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
|
||||
self.w22.reciprocal_()
|
||||
self.w.resize_as_(self.w22).copy_(self.w22)
|
||||
|
||||
torch.mul(input2, input2, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
|
||||
self.w32.reciprocal_()
|
||||
self.w.mul_(self.w32)
|
||||
self.w.sqrt_()
|
||||
|
||||
torch.mul(self.w1, self.w, out=self.output)
|
||||
self.output.resize_(input1.size(0))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
v1 = input[0]
|
||||
v2 = input[1]
|
||||
v1, v2 = self._makeContiguous(v1, v2)
|
||||
|
||||
if len(self.gradInput) != 2:
|
||||
if self.gradInput[0] is None:
|
||||
self.gradInput[0] = v1.new()
|
||||
if self.gradInput[1] is None:
|
||||
self.gradInput[1] = v1.new()
|
||||
self.gradInput = self.gradInput[:2]
|
||||
|
||||
gw1 = self.gradInput[0]
|
||||
gw2 = self.gradInput[1]
|
||||
gw1.resize_as_(v1).copy_(v2)
|
||||
gw2.resize_as_(v1).copy_(v1)
|
||||
|
||||
torch.mul(self.w1, self.w22, out=self.buffer)
|
||||
gw1.addcmul_(-1, self.buffer.expand_as(v1), v1)
|
||||
gw1.mul_(self.w.expand_as(v1))
|
||||
|
||||
torch.mul(self.w1, self.w32, out=self.buffer)
|
||||
gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
|
||||
gw2.mul_(self.w.expand_as(v1))
|
||||
|
||||
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
|
||||
gw1.mul_(go)
|
||||
gw2.mul_(go)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, [
|
||||
'buffer',
|
||||
'w1',
|
||||
'w22',
|
||||
'w',
|
||||
'w32',
|
||||
'ones',
|
||||
])
|
||||
return super(CosineDistance, self).clearState()
|
@ -1,117 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class CosineEmbeddingCriterion(Criterion):
|
||||
|
||||
def __init__(self, margin=0, sizeAverage=True):
|
||||
super(CosineEmbeddingCriterion, self).__init__()
|
||||
self.margin = margin
|
||||
self.sizeAverage = sizeAverage
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
self.buffer = None
|
||||
self.w1 = None
|
||||
self.w22 = None
|
||||
self.w = None
|
||||
self.w32 = None
|
||||
self._outputs = None
|
||||
self._idx = None
|
||||
|
||||
def updateOutput(self, input, y):
|
||||
input1, input2 = input[0], input[1]
|
||||
|
||||
# keep backward compatibility
|
||||
if self.buffer is None:
|
||||
self.buffer = input1.new()
|
||||
self.w1 = input1.new()
|
||||
self.w22 = input1.new()
|
||||
self.w = input1.new()
|
||||
self.w32 = input1.new()
|
||||
self._outputs = input1.new()
|
||||
|
||||
# comparison operators behave differently from cuda/c implementations
|
||||
# TODO: verify name
|
||||
if input1.type() == 'torch.cuda.FloatTensor':
|
||||
self._idx = torch.cuda.ByteTensor()
|
||||
else:
|
||||
self._idx = torch.ByteTensor()
|
||||
|
||||
torch.mul(input1, input2, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w1, keepdim=True)
|
||||
|
||||
epsilon = 1e-12
|
||||
torch.mul(input1, input1, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w22, keepdim=True).add_(epsilon)
|
||||
# self._outputs is also used as a temporary buffer
|
||||
self._outputs.resize_as_(self.w22).fill_(1)
|
||||
torch.div(self._outputs, self.w22, out=self.w22)
|
||||
self.w.resize_as_(self.w22).copy_(self.w22)
|
||||
|
||||
torch.mul(input2, input2, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, out=self.w32, keepdim=True).add_(epsilon)
|
||||
torch.div(self._outputs, self.w32, out=self.w32)
|
||||
self.w.mul_(self.w32)
|
||||
self.w.sqrt_()
|
||||
|
||||
torch.mul(self.w1, self.w, out=self._outputs)
|
||||
self._outputs = self._outputs.select(1, 0)
|
||||
|
||||
torch.eq(y, -1, out=self._idx)
|
||||
self._outputs[self._idx] = self._outputs[self._idx].add_(-self.margin).clamp_(min=0)
|
||||
torch.eq(y, 1, out=self._idx)
|
||||
self._outputs[self._idx] = self._outputs[self._idx].mul_(-1).add_(1)
|
||||
|
||||
self.output = self._outputs.sum().item()
|
||||
|
||||
if self.sizeAverage:
|
||||
self.output = self.output / y.size(0)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, y):
|
||||
v1 = input[0]
|
||||
v2 = input[1]
|
||||
|
||||
gw1 = self.gradInput[0]
|
||||
gw2 = self.gradInput[1]
|
||||
gw1.resize_as_(v1).copy_(v2)
|
||||
gw2.resize_as_(v1).copy_(v1)
|
||||
|
||||
torch.mul(self.w1, self.w22, out=self.buffer)
|
||||
gw1.addcmul_(-1, self.buffer.expand_as(v1), v1)
|
||||
gw1.mul_(self.w.expand_as(v1))
|
||||
|
||||
torch.mul(self.w1, self.w32, out=self.buffer)
|
||||
gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
|
||||
gw2.mul_(self.w.expand_as(v1))
|
||||
|
||||
# self._idx = self._outputs <= 0
|
||||
torch.le(self._outputs, 0, out=self._idx)
|
||||
self._idx = self._idx.view(-1, 1).expand(gw1.size())
|
||||
gw1[self._idx] = 0
|
||||
gw2[self._idx] = 0
|
||||
|
||||
torch.eq(y, 1, out=self._idx)
|
||||
self._idx = self._idx.view(-1, 1).expand(gw2.size())
|
||||
gw1[self._idx] = gw1[self._idx].mul_(-1)
|
||||
gw2[self._idx] = gw2[self._idx].mul_(-1)
|
||||
|
||||
if self.sizeAverage:
|
||||
gw1.div_(y.size(0))
|
||||
gw2.div_(y.size(0))
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if not type:
|
||||
return self._type
|
||||
|
||||
self._idx = None
|
||||
super(CosineEmbeddingCriterion, self).type(type, tensorCache)
|
||||
# comparison operators behave differently from cuda/c implementations
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
self._idx = torch.cuda.ByteTensor()
|
||||
else:
|
||||
self._idx = torch.ByteTensor()
|
||||
|
||||
return self
|
@ -1,44 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import recursiveType
|
||||
import torch._thnn
|
||||
|
||||
|
||||
class Criterion(object):
|
||||
|
||||
def __init__(self):
|
||||
self.gradInput = torch.Tensor()
|
||||
self.output = 0
|
||||
self._backend = torch._thnn.type2backend[self.gradInput.type()]
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
raise NotImplementedError
|
||||
|
||||
def forward(self, input, target):
|
||||
return self.updateOutput(input, target)
|
||||
|
||||
def backward(self, input, target):
|
||||
return self.updateGradInput(input, target)
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
raise NotImplementedError
|
||||
|
||||
def clone(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
# find all tensors and convert them
|
||||
for key, param in self.__dict__.items():
|
||||
setattr(self, key, recursiveType(param, type, tensorCache or {}))
|
||||
|
||||
self._backend = torch._thnn.type2backend[type]
|
||||
return self
|
||||
|
||||
def float(self):
|
||||
return self.type('torch.FloatTensor')
|
||||
|
||||
def double(self):
|
||||
return self.type('torch.DoubleTensor')
|
||||
|
||||
def cuda(self):
|
||||
return self.type('torch.cuda.FloatTensor')
|
@ -1,18 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class CriterionTable(Module):
|
||||
|
||||
def __init__(self, criterion):
|
||||
super(CriterionTable, self).__init__()
|
||||
self.criterion = criterion
|
||||
self.gradInput = [criterion.gradInput]
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output = self.criterion.updateOutput(*input)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, grad_output):
|
||||
self.criterion.updateGradInput(*input)
|
||||
return self.gradInput
|
@ -1,29 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
from .LogSoftMax import LogSoftMax
|
||||
from .ClassNLLCriterion import ClassNLLCriterion
|
||||
|
||||
|
||||
class CrossEntropyCriterion(Criterion):
|
||||
|
||||
def __init__(self, weights=None):
|
||||
super(CrossEntropyCriterion, self).__init__()
|
||||
self.lsm = LogSoftMax()
|
||||
self.nll = ClassNLLCriterion(weights)
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
input = input.squeeze()
|
||||
target = target.squeeze()
|
||||
self.lsm.updateOutput(input)
|
||||
self.nll.updateOutput(self.lsm.output, target)
|
||||
self.output = self.nll.output
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
size = input.size()
|
||||
input = input.squeeze()
|
||||
target = target.squeeze()
|
||||
self.nll.updateGradInput(self.lsm.output, target)
|
||||
self.lsm.updateGradInput(input, self.nll.gradInput)
|
||||
self.gradInput = self.lsm.gradInput.view(size)
|
||||
return self.gradInput
|
@ -1,106 +0,0 @@
|
||||
####################################
|
||||
# DepthConcat
|
||||
# Concatenates the output of Convolutions along the depth dimension
|
||||
# (nOutputFrame). This is used to implement the DepthConcat layer
|
||||
# of the Going deeper with convolutions paper :
|
||||
# http.//arxiv.org/pdf/1409.4842v1.pdf
|
||||
# The normal Concat Module can't be used since the spatial dimensions
|
||||
# of tensors to be concatenated may have different values. To deal with
|
||||
# this, we select the largest spatial dimensions and add zero-padding
|
||||
# around the smaller dimensions.
|
||||
####################################
|
||||
|
||||
import math
|
||||
import torch
|
||||
from .Concat import Concat
|
||||
|
||||
|
||||
class DepthConcat(Concat):
|
||||
|
||||
def windowNarrow(self, output, currentOutput, offset):
|
||||
outputWindow = output.narrow(self.dimension, offset, currentOutput.size(self.dimension))
|
||||
for dim in range(len(self.outputSize)):
|
||||
currentSize = currentOutput.size(dim)
|
||||
if dim != self.dimension and self.outputSize[dim] != currentSize:
|
||||
# 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side)
|
||||
# 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side)
|
||||
# 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad)
|
||||
start = int(math.floor(((self.outputSize[dim] - currentSize) / 2)))
|
||||
outputWindow = outputWindow.narrow(dim, start, currentSize)
|
||||
return outputWindow
|
||||
|
||||
def updateOutput(self, input):
|
||||
outs = []
|
||||
for i in range(len(self.modules)):
|
||||
currentOutput = self.modules[i].updateOutput(input)
|
||||
outs.append(currentOutput)
|
||||
if i == 0:
|
||||
size = list(currentOutput.size())
|
||||
else:
|
||||
size[self.dimension] += currentOutput.size(self.dimension)
|
||||
for dim in range(len(self.outputSize)):
|
||||
if dim != self.dimension:
|
||||
# take the maximum size (shouldn't change anything for batch dim)
|
||||
size[dim] = max(size[dim], currentOutput.size(dim))
|
||||
|
||||
self.outputSize = torch.Size(size)
|
||||
self.output.resize_(self.outputSize).zero_() # zero for padding
|
||||
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = outs[i]
|
||||
outputWindow = self.windowNarrow(self.output, currentOutput, offset)
|
||||
outputWindow.copy_(currentOutput)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input)
|
||||
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
|
||||
currentGradInput = module.updateGradInput(input, gradOutputWindow)
|
||||
if i == 0:
|
||||
self.gradInput.copy_(currentGradInput)
|
||||
else:
|
||||
self.gradInput.add_(currentGradInput)
|
||||
|
||||
offset += currentOutput.size(self.dimension)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
|
||||
module.accGradParameters(input, gradOutputWindow, scale)
|
||||
offset += currentOutput.size(self.dimension)
|
||||
|
||||
def backward(self, input, gradOutput, scale=1):
|
||||
self.gradInput.resize_as_(input)
|
||||
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
|
||||
currentGradInput = module.backward(input, gradOutputWindow)
|
||||
if i == 0:
|
||||
self.gradInput.copy_(currentGradInput)
|
||||
else:
|
||||
self.gradInput.add_(currentGradInput)
|
||||
|
||||
offset = offset + currentOutput.size(self.dimension)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
offset = 0
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.output
|
||||
gradOutputWindow = self.windowNarrow(gradOutput, currentOutput, offset)
|
||||
module.accUpdateGradParameters(input, gradOutputWindow, lr)
|
||||
offset = offset + currentOutput.size(self.dimension)
|
@ -1,38 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class DistKLDivCriterion(Criterion):
|
||||
|
||||
def __init__(self, sizeAverage=True):
|
||||
super(DistKLDivCriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.output_tensor = torch.Tensor(1)
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
assert input.is_same_size(target)
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.DistKLDivCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
assert input.is_same_size(target)
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.DistKLDivCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,49 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class DotProduct(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(DotProduct, self).__init__()
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
self.buffer = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
input1, input2 = input[0], input[1]
|
||||
|
||||
if self.buffer is None:
|
||||
self.buffer = input1.new()
|
||||
|
||||
torch.mul(input1, input2, out=self.buffer)
|
||||
torch.sum(self.buffer, 1, True, out=self.output)
|
||||
self.output.resize_(input1.size(0))
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
v1 = input[0]
|
||||
v2 = input[1]
|
||||
not_batch = False
|
||||
|
||||
if len(self.gradInput) != 2:
|
||||
if self.gradInput[0] is None:
|
||||
self.gradInput[0] = input[0].new()
|
||||
if self.gradInput[1] is None:
|
||||
self.gradInput[1] = input[1].new()
|
||||
self.gradInput = self.gradInput[:2]
|
||||
|
||||
gw1 = self.gradInput[0]
|
||||
gw2 = self.gradInput[1]
|
||||
gw1.resize_as_(v1).copy_(v2)
|
||||
gw2.resize_as_(v2).copy_(v1)
|
||||
|
||||
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
|
||||
gw1.mul_(go)
|
||||
gw2.mul_(go)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'buffer')
|
||||
return super(DotProduct, self).clearState()
|
@ -1,48 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Dropout(Module):
|
||||
|
||||
def __init__(self, p=0.5, inplace=False):
|
||||
super(Dropout, self).__init__()
|
||||
self.p = p
|
||||
self.inplace = inplace
|
||||
self.train = True
|
||||
self.noise = torch.Tensor()
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.inplace:
|
||||
self.output.set_(input)
|
||||
else:
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
|
||||
if self.p > 0 and self.train:
|
||||
self.noise.resize_as_(input)
|
||||
self.noise.bernoulli_(1 - self.p)
|
||||
self.noise.div_(1 - self.p)
|
||||
self.output.mul_(self.noise)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.inplace:
|
||||
self.gradInput.set_(gradOutput)
|
||||
else:
|
||||
self.gradInput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
|
||||
if self.p > 0 and self.train:
|
||||
self.gradInput.mul_(self.noise) # simply mask the gradients with the noise vector
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def setp(self, p):
|
||||
self.p = p
|
||||
|
||||
def __repr__(self):
|
||||
return super(Dropout, self).__repr__() + '({:.4f})'.format(self.p)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'noise')
|
||||
return super(Dropout, self).clearState()
|
@ -1,44 +0,0 @@
|
||||
# -*- coding: utf8 -*-
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class ELU(Module):
|
||||
"""
|
||||
Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter
|
||||
Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
|
||||
http.//arxiv.org/pdf/1511.07289.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1., inplace=False):
|
||||
assert type(alpha) == float
|
||||
super(ELU, self).__init__()
|
||||
self.alpha = alpha
|
||||
self.inplace = inplace
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.ELU_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.alpha,
|
||||
1.0,
|
||||
1.0,
|
||||
self.inplace
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.ELU_updateGradInput(
|
||||
self._backend.library_state,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.output,
|
||||
self.alpha,
|
||||
1.0,
|
||||
1.0
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
return '{}(alpha={:.3f})'.format(str(type(self)), self.alpha)
|
@ -1,172 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Euclidean(Module):
|
||||
|
||||
def __init__(self, inputSize, outputSize):
|
||||
super(Euclidean, self).__init__()
|
||||
|
||||
self.weight = torch.Tensor(inputSize, outputSize)
|
||||
self.gradWeight = torch.Tensor(inputSize, outputSize)
|
||||
|
||||
# state
|
||||
self.gradInput.resize_(inputSize)
|
||||
self.output.resize_(outputSize)
|
||||
|
||||
self.fastBackward = True
|
||||
self.reset()
|
||||
|
||||
self._input = None
|
||||
self._weight = None
|
||||
self._expand = None
|
||||
self._expand2 = None
|
||||
self._repeat = None
|
||||
self._repeat2 = None
|
||||
self._div = None
|
||||
self._output = None
|
||||
self._gradOutput = None
|
||||
self._expand3 = None
|
||||
self._sum = None
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.size(0))
|
||||
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
|
||||
def _view(self, res, src, *args):
|
||||
if src.is_contiguous():
|
||||
res.set_(src.view(*args))
|
||||
else:
|
||||
res.set_(src.contiguous().view(*args))
|
||||
|
||||
def updateOutput(self, input):
|
||||
# lazy initialize buffers
|
||||
if self._input is None:
|
||||
self._input = input.new()
|
||||
if self._weight is None:
|
||||
self._weight = self.weight.new()
|
||||
if self._expand is None:
|
||||
self._expand = self.output.new()
|
||||
if self._expand2 is None:
|
||||
self._expand2 = self.output.new()
|
||||
if self._repeat is None:
|
||||
self._repeat = self.output.new()
|
||||
if self._repeat2 is None:
|
||||
self._repeat2 = self.output.new()
|
||||
|
||||
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
|
||||
|
||||
# y_j = || w_j - x || = || x - w_j ||
|
||||
assert input.dim() == 2
|
||||
|
||||
batchSize = input.size(0)
|
||||
self._view(self._input, input, batchSize, inputSize, 1)
|
||||
self._expand = self._input.expand(batchSize, inputSize, outputSize)
|
||||
# make the expanded tensor contiguous (requires lots of memory)
|
||||
self._repeat.resize_as_(self._expand).copy_(self._expand)
|
||||
|
||||
self._weight = self.weight.view(1, inputSize, outputSize)
|
||||
self._expand2 = self._weight.expand_as(self._repeat)
|
||||
|
||||
if torch.typename(input) == 'torch.cuda.FloatTensor':
|
||||
# TODO: after adding new allocators this can be changed
|
||||
# requires lots of memory, but minimizes cudaMallocs and loops
|
||||
self._repeat2.resize_as_(self._expand2).copy_(self._expand2)
|
||||
self._repeat.add_(-1, self._repeat2)
|
||||
else:
|
||||
self._repeat.add_(-1, self._expand2)
|
||||
|
||||
torch.norm(self._repeat, 2, 1, True, out=self.output)
|
||||
self.output.resize_(batchSize, outputSize)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
if self._div is None:
|
||||
self._div = input.new()
|
||||
if self._output is None:
|
||||
self._output = self.output.new()
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = input.new()
|
||||
if self._expand3 is None:
|
||||
self._expand3 = input.new()
|
||||
|
||||
if not self.fastBackward:
|
||||
self.updateOutput(input)
|
||||
|
||||
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
|
||||
|
||||
"""
|
||||
dy_j -2 * (w_j - x) x - w_j
|
||||
---- = ---------------- = -------
|
||||
dx 2 || w_j - x || y_j
|
||||
"""
|
||||
|
||||
# to prevent div by zero (NaN) bugs
|
||||
self._output.resize_as_(self.output).copy_(self.output).add_(0.0000001)
|
||||
self._view(self._gradOutput, gradOutput, gradOutput.size())
|
||||
torch.div(gradOutput, self._output, out=self._div)
|
||||
assert input.dim() == 2
|
||||
batchSize = input.size(0)
|
||||
|
||||
self._div.resize_(batchSize, 1, outputSize)
|
||||
self._expand3 = self._div.expand(batchSize, inputSize, outputSize)
|
||||
|
||||
if torch.typename(input) == 'torch.cuda.FloatTensor':
|
||||
self._repeat2.resize_as_(self._expand3).copy_(self._expand3)
|
||||
self._repeat2.mul_(self._repeat)
|
||||
else:
|
||||
torch.mul(self._repeat, self._expand3, out=self._repeat2)
|
||||
|
||||
torch.sum(self._repeat2, 2, True, out=self.gradInput)
|
||||
self.gradInput.resize_as_(input)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
inputSize, outputSize = self.weight.size(0), self.weight.size(1)
|
||||
|
||||
"""
|
||||
dy_j 2 * (w_j - x) w_j - x
|
||||
---- = --------------- = -------
|
||||
dw_j 2 || w_j - x || y_j
|
||||
"""
|
||||
# assumes a preceding call to updateGradInput
|
||||
assert input.dim() == 2
|
||||
if self._sum is None:
|
||||
self._sum = input.new()
|
||||
torch.sum(self._repeat2, 0, True, out=self._sum)
|
||||
self._sum.resize_(inputSize, outputSize)
|
||||
self.gradWeight.add_(-scale, self._sum)
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type:
|
||||
# prevent premature memory allocations
|
||||
self.clearState()
|
||||
|
||||
return super(Euclidean, self).type(type, tensorCache)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, [
|
||||
'_input',
|
||||
'_output',
|
||||
'_gradOutput',
|
||||
'_weight',
|
||||
'_div',
|
||||
'_sum',
|
||||
'_expand',
|
||||
'_expand2',
|
||||
'_expand3',
|
||||
'_repeat',
|
||||
'_repeat2',
|
||||
])
|
||||
return super(Euclidean, self).clearState()
|
@ -1,11 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Exp(Module):
|
||||
|
||||
def updateOutput(self, input):
|
||||
return torch.exp(input, out=self.output)
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
return torch.mul(self.output, gradOutput, out=self.gradInput)
|
@ -1,85 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class FlattenTable(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(FlattenTable, self).__init__()
|
||||
|
||||
self.output = []
|
||||
self.input_map = []
|
||||
self.gradInput = []
|
||||
|
||||
def _flatten(self, output, input):
|
||||
if isinstance(input, list):
|
||||
input_map = []
|
||||
# forward DFS order
|
||||
for i in range(len(input)):
|
||||
input_map.append(self._flatten(output, input[i]))
|
||||
else:
|
||||
input_map = len(output)
|
||||
output.append(input)
|
||||
|
||||
return input_map
|
||||
|
||||
def _checkMapping(self, output, input, input_map):
|
||||
if isinstance(input, list):
|
||||
if len(input) != len(input_map):
|
||||
return False
|
||||
|
||||
# forward DFS order
|
||||
for i in range(len(input)):
|
||||
if not self._checkMapping(output, input[i], input_map[i]):
|
||||
return False
|
||||
|
||||
return True
|
||||
else:
|
||||
return output[input_map] is input
|
||||
|
||||
# During BPROP we have to build a gradInput with the same shape as the
|
||||
# input. This is a recursive function to build up a gradInput
|
||||
def _inverseFlatten(self, gradOutput, input_map):
|
||||
if isinstance(input_map, list):
|
||||
gradInput = []
|
||||
for i in range(len(input_map)):
|
||||
gradInput.append(self._inverseFlatten(gradOutput, input_map[i]))
|
||||
|
||||
return gradInput
|
||||
else:
|
||||
return gradOutput[input_map]
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert isinstance(input, list)
|
||||
# to avoid updating rebuilding the flattened table every updateOutput call
|
||||
# we will: a DFS pass over the existing output table and the inputs to
|
||||
# see if it needs to be rebuilt.
|
||||
if not self._checkMapping(self.output, input, self.input_map):
|
||||
self.output = []
|
||||
self.input_map = self._flatten(self.output, input)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert isinstance(input, list)
|
||||
assert isinstance(gradOutput, list)
|
||||
# If the input changes between the updateOutput and updateGradInput call,
|
||||
#: we may have to rebuild the input_map! However, let's assume that
|
||||
# the input_map is valid and that forward has already been called.
|
||||
|
||||
# However, we should check that the gradInput is valid:
|
||||
if not self._checkMapping(gradOutput, self.gradInput, self.input_map):
|
||||
self.gradInput = self._inverseFlatten(gradOutput, self.input_map)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if not type:
|
||||
return self._type
|
||||
# This function just stores references so we don't need to do any type
|
||||
# conversions. Just force the tables to be empty.
|
||||
self.clearState()
|
||||
|
||||
def clearState(self):
|
||||
self.input_map = []
|
||||
return super(FlattenTable, self).clearState()
|
@ -1,22 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class GradientReversal(Module):
|
||||
|
||||
def __init__(self, lambd=1):
|
||||
super(GradientReversal, self).__init__()
|
||||
self.lambd = lambd
|
||||
|
||||
def setLambda(self, lambd):
|
||||
self.lambd = lambd
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.set_(input)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(gradOutput)
|
||||
self.gradInput.copy_(gradOutput)
|
||||
self.gradInput.mul_(-self.lambd)
|
||||
return self.gradInput
|
@ -1,29 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class HardShrink(Module):
|
||||
|
||||
def __init__(self, lambd=0.5):
|
||||
assert type(lambd) == float
|
||||
super(HardShrink, self).__init__()
|
||||
self.lambd = lambd
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.HardShrink_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.lambd
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.HardShrink_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.lambd
|
||||
)
|
||||
return self.gradInput
|
@ -1,35 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class HardTanh(Module):
|
||||
|
||||
def __init__(self, min_value=-1, max_value=1, inplace=False):
|
||||
super(HardTanh, self).__init__()
|
||||
self.min_val = min_value
|
||||
self.max_val = max_value
|
||||
self.inplace = inplace
|
||||
assert self.max_val > self.min_val
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.HardTanh_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.min_val,
|
||||
self.max_val,
|
||||
self.inplace
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.HardTanh_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.min_val,
|
||||
self.max_val,
|
||||
self.inplace
|
||||
)
|
||||
return self.gradInput
|
@ -1,37 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class HingeEmbeddingCriterion(Criterion):
|
||||
|
||||
def __init__(self, margin=1, sizeAverage=True):
|
||||
super(HingeEmbeddingCriterion, self).__init__()
|
||||
self.margin = margin
|
||||
self.sizeAverage = sizeAverage
|
||||
self.buffer = None
|
||||
|
||||
def updateOutput(self, input, y):
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
self.buffer.resize_as_(input).copy_(input)
|
||||
self.buffer[torch.eq(y, -1.)] = 0
|
||||
self.output = self.buffer.sum().item()
|
||||
|
||||
self.buffer.fill_(self.margin).add_(-1, input)
|
||||
self.buffer.clamp_(min=0)
|
||||
self.buffer[torch.eq(y, 1.)] = 0
|
||||
self.output = self.output + self.buffer.sum().item()
|
||||
|
||||
if self.sizeAverage:
|
||||
self.output = self.output / input.nelement()
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, y):
|
||||
self.gradInput.resize_as_(input).copy_(y)
|
||||
self.gradInput[torch.mul(torch.eq(y, -1), torch.gt(input, self.margin))] = 0
|
||||
|
||||
if self.sizeAverage:
|
||||
self.gradInput.mul_(1. / input.nelement())
|
||||
|
||||
return self.gradInput
|
@ -1,17 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Identity(Module):
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output = input
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput = gradOutput
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'gradInput')
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Index(Module):
|
||||
|
||||
def __init__(self, dimension):
|
||||
super(Index, self).__init__()
|
||||
self.dimension = dimension
|
||||
self.gradInput = [self.gradInput]
|
||||
|
||||
def updateOutput(self, input):
|
||||
t = input[0]
|
||||
index = input[1]
|
||||
torch.index_select(t, self.dimension, index, out=self.output)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
t = input[0]
|
||||
index = input[1]
|
||||
|
||||
gradInput = self.gradInput[0] # no gradient for the index tensor
|
||||
gradInput.resize_as_(t).zero_()
|
||||
gradInput.index_add_(self.dimension, index, gradOutput)
|
||||
return self.gradInput
|
@ -1,62 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class JoinTable(Module):
|
||||
|
||||
def __init__(self, dimension):
|
||||
super(JoinTable, self).__init__()
|
||||
self.size = torch.Size()
|
||||
self.dimension = dimension
|
||||
self.gradInput = []
|
||||
|
||||
def _getPositiveDimension(self, input):
|
||||
dimension = self.dimension
|
||||
if dimension < 0:
|
||||
dimension = input[0].dim() + dimension
|
||||
|
||||
return dimension
|
||||
|
||||
def updateOutput(self, input):
|
||||
dim = self._getPositiveDimension(input)
|
||||
|
||||
for i in range(len(input)):
|
||||
currentOutput = input[i]
|
||||
if i == 0:
|
||||
size = list(currentOutput.size())
|
||||
else:
|
||||
size[dim] += currentOutput.size(dim)
|
||||
|
||||
self.size = torch.Size(size)
|
||||
self.output.resize_(self.size)
|
||||
|
||||
# TODO: use cat?
|
||||
offset = 0
|
||||
for i in range(len(input)):
|
||||
currentOutput = input[i]
|
||||
self.output.narrow(dim, offset, currentOutput.size(dim)).copy_(currentOutput)
|
||||
offset += currentOutput.size(dim)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
dim = self._getPositiveDimension(input)
|
||||
|
||||
for i in range(len(input)):
|
||||
if len(self.gradInput) < i + 1:
|
||||
self.gradInput.append(input[i].new())
|
||||
self.gradInput[i].resize_as_(input[i])
|
||||
self.gradInput = self.gradInput[:len(input)]
|
||||
|
||||
offset = 0
|
||||
for i in range(len(input)):
|
||||
currentOutput = input[i]
|
||||
currentGradInput = gradOutput.narrow(dim, offset, currentOutput.size(dim))
|
||||
self.gradInput[i].copy_(currentGradInput)
|
||||
offset = offset + currentOutput.size(dim)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
self.gradInput = []
|
||||
return super(JoinTable, self).type(type, tensorCache)
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class L1Cost(Criterion):
|
||||
|
||||
def __init__(self):
|
||||
super(L1Cost, self).__init__()
|
||||
self.output_tensor = torch.Tensor(1)
|
||||
|
||||
def updateOutput(self, input, target=None):
|
||||
assert target is None
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.L1Cost_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output_tensor
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target=None):
|
||||
assert target is None
|
||||
self._backend.L1Cost_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
None,
|
||||
self.gradInput
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'output_tensor')
|
||||
return super(L1Cost, self).clearState()
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class L1HingeEmbeddingCriterion(Criterion):
|
||||
|
||||
def __init__(self, margin=1):
|
||||
super(L1HingeEmbeddingCriterion, self).__init__()
|
||||
self.margin = float(margin)
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
def updateOutput(self, input, y):
|
||||
self.output = float(input[0].dist(input[1], 1))
|
||||
if y == -1:
|
||||
self.output = max(0, self.margin - self.output)
|
||||
|
||||
return self.output
|
||||
|
||||
def _mathsign(t):
|
||||
return 1 if x > 0 else -1
|
||||
|
||||
def updateGradInput(self, input, y):
|
||||
self.gradInput[0].resize_as_(input[0])
|
||||
self.gradInput[1].resize_as_(input[1])
|
||||
self.gradInput[0].copy_(input[0])
|
||||
self.gradInput[0].add_(-1, input[1])
|
||||
dist = self.gradInput[0].norm(1)
|
||||
self.gradInput[0].sign_()
|
||||
if y == -1: # just to avoid a mul by 1
|
||||
if dist > self.margin:
|
||||
self.gradInput[0].zero_()
|
||||
else:
|
||||
self.gradInput[0].mul_(-1)
|
||||
|
||||
self.gradInput[1].zero_().add_(-1, self.gradInput[0])
|
||||
return self.gradInput
|
@ -1,37 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
# This module acts as an L1 latent state regularizer, adding the
|
||||
# [gradOutput] to the gradient of the L1 loss. The [input] is copied to
|
||||
# the [output].
|
||||
|
||||
|
||||
class L1Penalty(Module):
|
||||
|
||||
def __init__(self, l1weight, sizeAverage=False, provideOutput=True):
|
||||
super(L1Penalty, self).__init__()
|
||||
self.l1weight = l1weight
|
||||
self.sizeAverage = sizeAverage
|
||||
self.provideOutput = provideOutput
|
||||
|
||||
def updateOutput(self, input):
|
||||
m = self.l1weight
|
||||
if self.sizeAverage:
|
||||
m = m / input.nelement()
|
||||
|
||||
loss = m * input.norm(1)
|
||||
self.loss = loss
|
||||
self.output = input
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
m = self.l1weight
|
||||
if self.sizeAverage:
|
||||
m = m / input.nelement()
|
||||
|
||||
self.gradInput.resize_as_(input).copy_(input).sign_().mul_(m)
|
||||
|
||||
if self.provideOutput:
|
||||
self.gradInput.add_(gradOutput)
|
||||
|
||||
return self.gradInput
|
@ -1,43 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class LeakyReLU(Module):
|
||||
|
||||
def __init__(self, negval=1 / 100, inplace=False):
|
||||
super(LeakyReLU, self).__init__()
|
||||
if isinstance(negval, bool):
|
||||
inplace = negval
|
||||
self.negval = 1 / 100
|
||||
else:
|
||||
self.negval = negval
|
||||
|
||||
# default for inplace is False
|
||||
self.inplace = inplace
|
||||
if self.negval < 0:
|
||||
# TODO: warning here
|
||||
self.inplace = False
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.LeakyReLU_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.negval,
|
||||
self.inplace
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.LeakyReLU_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.negval,
|
||||
self.inplace
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
return str(type(self)) + '({:.4f})'.format(self.negval)
|
@ -1,87 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Linear(Module):
|
||||
|
||||
def __init__(self, inputSize, outputSize, bias=True):
|
||||
super(Linear, self).__init__()
|
||||
self.weight = torch.Tensor(outputSize, inputSize)
|
||||
self.gradWeight = torch.Tensor(outputSize, inputSize)
|
||||
self.bias = torch.Tensor(outputSize) if bias else None
|
||||
self.gradBias = torch.Tensor(outputSize) if bias else None
|
||||
self.reset()
|
||||
|
||||
self.addBuffer = None
|
||||
|
||||
def noBias(self):
|
||||
self.bias = None
|
||||
self.gradBias = None
|
||||
return self
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.size(1))
|
||||
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
if self.bias is not None:
|
||||
self.bias.uniform_(-stdv, stdv)
|
||||
|
||||
return self
|
||||
|
||||
def _updateAddBuffer(self, input):
|
||||
nframe = input.size(0)
|
||||
if self.addBuffer is None:
|
||||
self.addBuffer = input.new()
|
||||
if self.addBuffer.nelement() != nframe:
|
||||
self.addBuffer.resize_(nframe).fill_(1)
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert input.dim() == 2
|
||||
nframe = input.size(0)
|
||||
nelement = self.output.nelement()
|
||||
self.output.resize_(nframe, self.weight.size(0))
|
||||
if self.output.nelement() != nelement:
|
||||
self.output.zero_()
|
||||
|
||||
self._updateAddBuffer(input)
|
||||
self.output.addmm_(0, 1, input, self.weight.t())
|
||||
if self.bias is not None:
|
||||
self.output.addr_(self.addBuffer, self.bias)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
nelement = self.gradInput.nelement()
|
||||
self.gradInput.resize_as_(input)
|
||||
if self.gradInput.nelement() != nelement:
|
||||
self.gradInput.zero_()
|
||||
|
||||
assert input.dim() == 2
|
||||
self.gradInput.addmm_(0, 1, gradOutput, self.weight)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
assert input.dim() == 2
|
||||
self.gradWeight.addmm_(scale, gradOutput.t(), input)
|
||||
if self.bias is not None:
|
||||
# update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput
|
||||
self._updateAddBuffer(input)
|
||||
self.gradBias.addmv_(scale, gradOutput.t(), self.addBuffer)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'addBuffer')
|
||||
return super(Linear, self).clearState()
|
||||
|
||||
def __repr__(self):
|
||||
return super(Linear, self).__repr__() + \
|
||||
'({} -> {})'.format(self.weight.size(1), self.weight.size(0)) + \
|
||||
(' without bias' if self.bias is None else '')
|
@ -1,18 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Log(Module):
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input)
|
||||
self.output.copy_(input)
|
||||
self.output.log_()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input)
|
||||
self.gradInput.fill_(1)
|
||||
self.gradInput.div_(input)
|
||||
self.gradInput.mul_(gradOutput)
|
||||
return self.gradInput
|
@ -1,35 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class LogSigmoid(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(LogSigmoid, self).__init__()
|
||||
self.buffer = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
self._backend.LogSigmoid_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.buffer
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.LogSigmoid_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.buffer
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'buffer')
|
||||
return super(LogSigmoid, self).clearState()
|
@ -1,29 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class LogSoftMax(Module):
|
||||
|
||||
def __init__(self, dim=None):
|
||||
super(LogSoftMax, self).__init__()
|
||||
if dim is not None:
|
||||
self.dim = dim
|
||||
|
||||
def _get_dim(self, input):
|
||||
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output = torch.log_softmax(
|
||||
input,
|
||||
self._get_dim(input)
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput = torch.log_softmax_backward_data(
|
||||
gradOutput,
|
||||
self.output,
|
||||
self._get_dim(input),
|
||||
input
|
||||
)
|
||||
return self.gradInput
|
@ -1,152 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class LookupTable(Module):
|
||||
|
||||
def __init__(self, nIndex, nOutput, paddingValue=-1, maxNorm=None, normType=None):
|
||||
super(LookupTable, self).__init__()
|
||||
self.weight = torch.Tensor(nIndex, nOutput)
|
||||
self.gradWeight = torch.Tensor(nIndex, nOutput).zero_()
|
||||
self.paddingValue = paddingValue
|
||||
self.maxNorm = maxNorm
|
||||
self.normType = normType
|
||||
self.shouldScaleGradByFreq = False
|
||||
|
||||
self._gradOutput = None
|
||||
self._sorted = None
|
||||
self._indices = None
|
||||
|
||||
self._count = torch.IntTensor()
|
||||
self._input = torch.LongTensor()
|
||||
|
||||
self.reset()
|
||||
|
||||
def accUpdateOnly(self):
|
||||
self.gradWeight = None
|
||||
return self
|
||||
|
||||
def setPadding(self, paddingValue):
|
||||
self.paddingValue = paddingValue
|
||||
return self
|
||||
|
||||
def setMaxNorm(self, maxNorm):
|
||||
self.maxNorm = maxNorm
|
||||
return self
|
||||
|
||||
def setNormType(self, normType):
|
||||
self.normType = normType
|
||||
return self
|
||||
|
||||
def scaleGradByFreq(self):
|
||||
self.shouldScaleGradByFreq = True
|
||||
return self
|
||||
|
||||
def reset(self, stdv=1):
|
||||
self.weight.normal_(0, stdv)
|
||||
|
||||
def _makeInputContiguous(self, input):
|
||||
# make sure input is a contiguous torch.LongTensor
|
||||
if not input.is_contiguous() or input.type() != self._input.type():
|
||||
self.copiedInput = True
|
||||
self._input.resize_(input.size()).copy_(input)
|
||||
return self._input
|
||||
else:
|
||||
self.copiedInput = False
|
||||
return input
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.renorm(input)
|
||||
input = self._makeInputContiguous(input)
|
||||
if input.dim() == 1:
|
||||
torch.index_select(self.weight, 0, input, out=self.output)
|
||||
elif input.dim() == 2:
|
||||
torch.index_select(self.weight, 0, input.view(-1), out=self.output)
|
||||
self.output = self.output.view(input.size(0), input.size(1), self.weight.size(1))
|
||||
else:
|
||||
raise RuntimeError("input must be a vector or matrix")
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
# the input can be of any type (as in the forward it's
|
||||
# converted anyway to LongTensor) thus, need to allocate
|
||||
# new memory each time the user changes the input type
|
||||
if self.gradInput.type() != input.type():
|
||||
self.gradInput = input.new()
|
||||
|
||||
if not self.gradInput.is_same_size(input):
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
input = self._input if self.copiedInput else input
|
||||
if input.dim() == 2:
|
||||
input = input.view(-1)
|
||||
elif input.dim() != 1:
|
||||
raise RuntimeError("input must be a vector or matrix")
|
||||
|
||||
if not gradOutput.is_contiguous():
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = gradOutput.new()
|
||||
self._gradOutput.resize_as_(gradOutput).copy_(gradOutput)
|
||||
gradOutput = self._gradOutput
|
||||
|
||||
self._backend.LookupTable_accGradParameters(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradWeight,
|
||||
self._count,
|
||||
self._sorted,
|
||||
self._indices,
|
||||
self.shouldScaleGradByFreq,
|
||||
self.paddingValue or 0,
|
||||
scale
|
||||
)
|
||||
|
||||
def renorm(self, input):
|
||||
if self.maxNorm is None:
|
||||
return
|
||||
|
||||
# copy input into _input, so _input is continuous.
|
||||
# The copied _input will be modified in the C code.
|
||||
self._input.resize_(input.size()).copy_(input)
|
||||
row_idx = self._input
|
||||
if row_idx.dim() == 2:
|
||||
row_idx = row_idx.view(-1)
|
||||
elif row_idx.dim() != 1:
|
||||
raise RuntimeError("input must be a vector or matrix")
|
||||
|
||||
# "row_idx" and "weight" will be modified in the C code
|
||||
self._backend.LookupTable_renorm(
|
||||
self._backend.library_state,
|
||||
row_idx,
|
||||
self.weight,
|
||||
self.maxNorm,
|
||||
self.normType or 2
|
||||
)
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type is None:
|
||||
return self._type
|
||||
super(LookupTable, self).type(type, tensorCache)
|
||||
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
# CUDA uses _sorted and _indices temporary tensors
|
||||
self._sorted = torch.cuda.LongTensor()
|
||||
self._indices = torch.cuda.LongTensor()
|
||||
self._count = torch.cuda.LongTensor()
|
||||
self._input = torch.cuda.LongTensor()
|
||||
else:
|
||||
# self._count and self._input should only be converted if using Cuda
|
||||
self._count = torch.IntTensor()
|
||||
self._input = torch.LongTensor()
|
||||
|
||||
return self
|
||||
|
||||
def clearState(self):
|
||||
clear(self, '_count', '_input', '_sorted', '_indices', '_gradOutput')
|
||||
return super(LookupTable, self).clearState()
|
@ -1,72 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class MM(Module):
|
||||
|
||||
def __init__(self, transA=False, transB=False):
|
||||
super(MM, self).__init__()
|
||||
self.transA = transA
|
||||
self.transB = transB
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert len(input) == 2
|
||||
a, b = input
|
||||
assert a.ndimension() == 2 or a.ndimension() == 3
|
||||
assert a.dim() == b.dim()
|
||||
|
||||
if a.ndimension() == 2:
|
||||
if self.transA:
|
||||
a = a.t()
|
||||
if self.transB:
|
||||
b = b.t()
|
||||
self.output.resize_(a.size(0), b.size(1))
|
||||
torch.mm(a, b, out=self.output)
|
||||
else:
|
||||
if self.transA:
|
||||
a = a.transpose(1, 2)
|
||||
if self.transB:
|
||||
b = b.transpose(1, 2)
|
||||
|
||||
self.output.resize_(a.size(0), a.size(1), b.size(2))
|
||||
torch.bmm(a, b, out=self.output)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput[0] is None:
|
||||
self.gradInput[0] = input[0].new()
|
||||
if self.gradInput[1] is None:
|
||||
self.gradInput[1] = input[1].new()
|
||||
|
||||
assert len(input) == 2
|
||||
a, b = input
|
||||
self.gradInput[0].resize_as_(a)
|
||||
self.gradInput[1].resize_as_(b)
|
||||
|
||||
assert gradOutput.ndimension() == 2 or gradOutput.ndimension() == 3
|
||||
assert a.dim() == b.dim() == gradOutput.dim()
|
||||
|
||||
if gradOutput.ndimension() == 2:
|
||||
h_dim, w_dim = 0, 1
|
||||
f = "mm"
|
||||
else:
|
||||
h_dim, w_dim = 1, 2
|
||||
f = "bmm"
|
||||
|
||||
if self.transA == self.transB:
|
||||
a = a.transpose(h_dim, w_dim)
|
||||
b = b.transpose(h_dim, w_dim)
|
||||
|
||||
if self.transA:
|
||||
getattr(torch, f)(b, gradOutput.transpose(h_dim, w_dim), out=self.gradInput[0])
|
||||
else:
|
||||
getattr(torch, f)(gradOutput, b, out=self.gradInput[0])
|
||||
|
||||
if self.transB:
|
||||
getattr(torch, f)(gradOutput.transpose(h_dim, w_dim), a, out=self.gradInput[1])
|
||||
else:
|
||||
getattr(torch, f)(a, gradOutput, out=self.gradInput[1])
|
||||
|
||||
return self.gradInput
|
@ -1,37 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class MSECriterion(Criterion):
|
||||
|
||||
def __init__(self, sizeAverage=True):
|
||||
super(MSECriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.MSECriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
implicit_gradOutput = torch.Tensor([1]).type(input.type())
|
||||
|
||||
self._backend.MSECriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,67 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class MV(Module):
|
||||
"""Module to perform matrix vector multiplication on two minibatch inputs,
|
||||
producing a minibatch.
|
||||
"""
|
||||
|
||||
def __init__(self, trans=False):
|
||||
super(MV, self).__init__()
|
||||
|
||||
self.trans = trans
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
def updateOutput(self, input):
|
||||
M, v = input
|
||||
assert M.ndimension() == 2 or M.ndimension() == 3
|
||||
|
||||
if M.ndimension() == 2:
|
||||
assert v.ndimension() == 1
|
||||
if self.trans:
|
||||
M = M.transpose(0, 1)
|
||||
self.output.resize_(M.size(0))
|
||||
torch.mv(M, v, out=self.output)
|
||||
else:
|
||||
assert v.ndimension() == 2
|
||||
if self.trans:
|
||||
M = M.transpose(1, 2)
|
||||
self.output.resize_(M.size(0), M.size(1), 1)
|
||||
torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
M, v = input
|
||||
self.gradInput[0].resize_as_(M)
|
||||
self.gradInput[1].resize_as_(v)
|
||||
gradOutput = gradOutput.contiguous()
|
||||
|
||||
assert gradOutput.ndimension() == 1 or gradOutput.ndimension() == 2
|
||||
|
||||
if gradOutput.ndimension() == 2:
|
||||
assert M.ndimension() == 3
|
||||
assert v.ndimension() == 2
|
||||
bdim = M.size(0)
|
||||
odim = M.size(1)
|
||||
idim = M.size(2)
|
||||
|
||||
if self.trans:
|
||||
torch.bmm(v.view(bdim, odim, 1), gradOutput.view(bdim, 1, idim), out=self.gradInput[0])
|
||||
torch.bmm(M, gradOutput.view(bdim, idim, 1), out=self.gradInput[1].view(bdim, odim, 1))
|
||||
else:
|
||||
torch.bmm(gradOutput.view(bdim, odim, 1), v.view(bdim, 1, idim), out=self.gradInput[0])
|
||||
torch.bmm(M.transpose(1, 2), gradOutput.view(bdim, odim, 1), out=self.gradInput[1].view(bdim, idim, 1))
|
||||
else:
|
||||
assert M.ndimension() == 2
|
||||
assert v.ndimension() == 1
|
||||
|
||||
if self.trans:
|
||||
torch.ger(v, gradOutput, out=self.gradInput[0])
|
||||
self.gradInput[1] = M * gradOutput
|
||||
else:
|
||||
torch.ger(gradOutput, v, out=self.gradInput[0])
|
||||
self.gradInput[1] = M.t() * gradOutput
|
||||
|
||||
return self.gradInput
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class MarginCriterion(Criterion):
|
||||
|
||||
def __init__(self, margin=1, sizeAverage=True):
|
||||
super(MarginCriterion, self).__init__()
|
||||
self.sizeAverage = True
|
||||
self.margin = margin
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.MarginCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
self.sizeAverage,
|
||||
self.margin
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
self._backend.MarginCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.gradInput,
|
||||
self.sizeAverage,
|
||||
self.margin
|
||||
)
|
||||
return self.gradInput
|
@ -1,75 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class MarginRankingCriterion(Criterion):
|
||||
|
||||
def __init__(self, margin=0, sizeAverage=True):
|
||||
super(MarginRankingCriterion, self).__init__()
|
||||
self.margin = margin
|
||||
self.sizeAverage = sizeAverage
|
||||
self.gradInput = [torch.Tensor(), torch.Tensor()]
|
||||
|
||||
self._output = None
|
||||
self.dist = None
|
||||
self.mask = None
|
||||
|
||||
def updateOutput(self, input, y):
|
||||
if input[0].size(0) == 1:
|
||||
self.output = max(0, -y * (input[0][0] - input[1][0]) + self.margin)
|
||||
else:
|
||||
if self._output is None:
|
||||
self._output = input[0].clone()
|
||||
self._output.resize_as_(input[0])
|
||||
self._output.copy_(input[0])
|
||||
|
||||
self._output.add_(-1, input[1])
|
||||
self._output.mul_(-1).mul_(y)
|
||||
self._output.add_(self.margin)
|
||||
|
||||
self._output.clamp_(min=0)
|
||||
|
||||
self.output = self._output.sum().item()
|
||||
|
||||
if self.sizeAverage:
|
||||
self.output = self.output / y.size(0)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, y):
|
||||
if input[0].size(0) == 1:
|
||||
dist = -y * (input[0][0] - input[1][0]) + self.margin
|
||||
if dist < 0:
|
||||
self.gradInput[0][0] = 0
|
||||
self.gradInput[1][0] = 0
|
||||
else:
|
||||
self.gradInput[0][0] = -y
|
||||
self.gradInput[1][0] = y
|
||||
else:
|
||||
if self.dist is None:
|
||||
self.dist = input[0].new()
|
||||
self.dist = self.dist.resize_as_(input[0]).copy_(input[0])
|
||||
dist = self.dist
|
||||
|
||||
dist.add_(-1, input[1])
|
||||
dist.mul_(-1).mul_(y)
|
||||
dist.add_(self.margin)
|
||||
|
||||
self.mask = dist > 0
|
||||
mask = self.mask
|
||||
|
||||
torch.ge(dist, 0, out=mask)
|
||||
|
||||
self.gradInput[0].resize_(dist.size())
|
||||
self.gradInput[1].resize_(dist.size())
|
||||
|
||||
self.gradInput[0].copy_(mask)
|
||||
self.gradInput[0].mul_(-1).mul_(y)
|
||||
self.gradInput[1].copy_(mask)
|
||||
self.gradInput[1].mul_(y)
|
||||
|
||||
if self.sizeAverage:
|
||||
self.gradInput[0].div_(y.size(0))
|
||||
self.gradInput[1].div_(y.size(0))
|
||||
|
||||
return self.gradInput
|
@ -1,64 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class MaskedSelect(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(MaskedSelect, self).__init__()
|
||||
self._maskIndices = torch.LongTensor()
|
||||
self._maskIndexBuffer = torch.LongTensor()
|
||||
self._maskIndexBufferCPU = torch.FloatTensor()
|
||||
self._gradBuffer = torch.Tensor()
|
||||
self._gradMask = torch.ByteTensor()
|
||||
|
||||
def updateOutput(self, input):
|
||||
input, mask = input
|
||||
torch.masked_select(input, mask, out=self.output)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
input, mask = input
|
||||
if input.type() == 'torch.cuda.FloatTensor':
|
||||
torch.arange(0, mask.nelement(), out=self._maskIndexBufferCPU).resize_(mask.size())
|
||||
self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU)
|
||||
else:
|
||||
torch.arange(0, mask.nelement(), out=self._maskIndexBuffer).resize_(mask.size())
|
||||
|
||||
torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices)
|
||||
self._gradBuffer.resize_(input.nelement()).zero_()
|
||||
self._gradBuffer.scatter_(0, self._maskIndices, gradOutput)
|
||||
self._gradBuffer.resize_(input.size())
|
||||
self.gradInput = [self._gradBuffer, self._gradMask.resize_(mask.size()).fill_(0)]
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type is None:
|
||||
return self._type
|
||||
|
||||
self._gradBuffer = self._gradBuffer.type(type)
|
||||
self.gradInput = self.gradInput.type(type)
|
||||
self.output = self.output.type(type)
|
||||
|
||||
# These casts apply when switching between cuda/non-cuda types
|
||||
if type != 'torch.cuda.FloatTensor':
|
||||
self._maskIndexBuffer = self._maskIndexBuffer.long()
|
||||
self._maskIndices = self._maskIndices.long()
|
||||
self._gradMask = self._gradMask.byte()
|
||||
else:
|
||||
self._maskIndexBuffer = self._maskIndexBuffer.cuda()
|
||||
self._maskIndices = self._maskIndices.cuda()
|
||||
self._gradMask = self._gradMask.cuda()
|
||||
|
||||
self._type = type
|
||||
return self
|
||||
|
||||
def clearState(self):
|
||||
return clear(self, ['output',
|
||||
'gradInput',
|
||||
'_maskIndexBuffer',
|
||||
'_maskIndexBufferCPU',
|
||||
'_maskIndices',
|
||||
'_gradBuffer',
|
||||
'_gradMask'])
|
@ -1,67 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear, addSingletondimension
|
||||
|
||||
|
||||
class Max(Module):
|
||||
|
||||
def __init__(self, dimension=0):
|
||||
super(Max, self).__init__()
|
||||
self.dimension = dimension
|
||||
self._output = None
|
||||
self._indices = None
|
||||
|
||||
def _getPositiveDimension(self, input):
|
||||
dimension = self.dimension
|
||||
if dimension < 0:
|
||||
dimension = input.dim() + dimension
|
||||
|
||||
return dimension
|
||||
|
||||
def _lazyInit(self):
|
||||
if self._output is None:
|
||||
self._output = self.output.new()
|
||||
if self._indices is None:
|
||||
self._indices = \
|
||||
(torch.cuda.LongTensor() if self.output.is_cuda else torch.LongTensor())
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._lazyInit()
|
||||
dimension = self._getPositiveDimension(input)
|
||||
torch.max(input, dimension, out=(self._output, self._indices), keepdim=True)
|
||||
if input.dim() > 1:
|
||||
self.output.set_(self._output.select(dimension, 0))
|
||||
else:
|
||||
self.output.set_(self._output)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._lazyInit()
|
||||
dimension = self._getPositiveDimension(input)
|
||||
if input.dim() > 1:
|
||||
gradOutputView = addSingletondimension(gradOutput, dimension)
|
||||
else:
|
||||
gradOutputView = gradOutput
|
||||
|
||||
self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView)
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
# torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
indices, self._indices = self._indices, None
|
||||
super(Max, self).type(type, tensorCache)
|
||||
self._indices = indices.type('torch.cuda.LongTensor') if indices is not None else None
|
||||
else:
|
||||
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
|
||||
# unnecessary memory allocations.
|
||||
indices, self._indices = self._indices, None
|
||||
super(Max, self).type(type, tensorCache)
|
||||
self._indices = indices.long() if indices is not None else None
|
||||
|
||||
return self
|
||||
|
||||
def clearState(self):
|
||||
clear(self, '_indices', '_output')
|
||||
return super(Max, self).clearState()
|
@ -1,16 +0,0 @@
|
||||
import torch
|
||||
from .Sum import Sum
|
||||
|
||||
"""
|
||||
|
||||
This file is still here because of backward compatibility.
|
||||
|
||||
Please use instead "nn.Sum(dimension, nInputDims, sizeAverage)"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Mean(Sum):
|
||||
|
||||
def __init__(self, dimension):
|
||||
super(Mean, self).__init__(dimension, True)
|
@ -1,68 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear, addSingletondimension
|
||||
|
||||
|
||||
class Min(Module):
|
||||
|
||||
def __init__(self, dimension=0):
|
||||
super(Min, self).__init__()
|
||||
self.dimension = dimension
|
||||
self._output = None
|
||||
self._indices = None
|
||||
|
||||
def _getPositiveDimension(self, input):
|
||||
dimension = self.dimension
|
||||
if dimension < 0:
|
||||
dimension = input.dim() + dimension
|
||||
|
||||
return dimension
|
||||
|
||||
def _lazyInit(self):
|
||||
if self._output is None:
|
||||
self._output = self.output.new()
|
||||
if self._indices is None:
|
||||
self._indices = \
|
||||
(torch.cuda.LongTensor() if self.output.type() == 'torch.cuda.FloatTensor'
|
||||
else torch.LongTensor())
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._lazyInit()
|
||||
dimension = self._getPositiveDimension(input)
|
||||
torch.min(input, dimension, out=(self._output, self._indices), keepdim=True)
|
||||
if input.dim() > 1:
|
||||
self.output.set_(self._output.select(dimension, 0))
|
||||
else:
|
||||
self.output.set_(self._output)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._lazyInit()
|
||||
dimension = self._getPositiveDimension(input)
|
||||
if input.dim() > 1:
|
||||
gradOutputView = addSingletondimension(gradOutput, dimension)
|
||||
else:
|
||||
gradOutputView = gradOutput
|
||||
|
||||
self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView)
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
# torch.min expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
indices, self._indices = self._indices, None
|
||||
super(Min, self).type(type, tensorCache)
|
||||
self._indices = indices.type('torch.cuda.LongTensor') if indices is not None else None
|
||||
else:
|
||||
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
|
||||
# unnecessary memory allocations.
|
||||
indices, self._indices = self._indices, None
|
||||
super(Min, self).type(type, tensorCache)
|
||||
self._indices = indices.long() if indices is not None else None
|
||||
|
||||
return self
|
||||
|
||||
def clearState(self):
|
||||
clear(self, '_indices', '_output')
|
||||
return super(Min, self).clearState()
|
@ -1,168 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear, recursiveResizeAs
|
||||
|
||||
|
||||
class MixtureTable(Module):
|
||||
|
||||
def __init__(self, dim=1):
|
||||
super(MixtureTable, self).__init__()
|
||||
self.dim = dim
|
||||
self.size = torch.Size()
|
||||
self.size2 = torch.Size()
|
||||
self.batchSize = 0
|
||||
self.backwardSetup = False
|
||||
self.gradInput = []
|
||||
|
||||
self._gaterView = None
|
||||
self._expert = None
|
||||
self._expertView = None
|
||||
self._sum = None
|
||||
self._expertView2 = None
|
||||
self._expert2 = None
|
||||
self.table = False
|
||||
|
||||
def updateOutput(self, input):
|
||||
gaterInput, expertInputs = input
|
||||
|
||||
# buffers
|
||||
if self._gaterView is None:
|
||||
self._gaterView = input[0].new()
|
||||
if self._expert is None:
|
||||
self._expert = input[0].new()
|
||||
if self._expertView is None:
|
||||
self._expertView = input[0].new()
|
||||
|
||||
self.dimG = 1
|
||||
batchSize = gaterInput.size(0)
|
||||
|
||||
if self.table or isinstance(expertInputs, list):
|
||||
self.table = True
|
||||
if gaterInput.size(self.dimG) != len(expertInputs):
|
||||
raise RuntimeError("Should be one gater output per expert")
|
||||
|
||||
expertInput = expertInputs[0]
|
||||
if self.batchSize != batchSize:
|
||||
size = [1] * (expertInput.dim() + 1)
|
||||
if self.dimG > 0:
|
||||
size[0] = gaterInput.size(0)
|
||||
size[self.dim] = gaterInput.size(self.dimG)
|
||||
self.size = torch.Size(size)
|
||||
self.output.resize_as_(expertInput)
|
||||
self.backwardSetup = False
|
||||
self.batchSize = batchSize
|
||||
|
||||
self._gaterView = gaterInput.view(self.size)
|
||||
self.output.zero_()
|
||||
# multiply accumulate gater outputs by their commensurate expert
|
||||
for i, expertInput in enumerate(expertInputs):
|
||||
gate = self._gaterView.select(self.dim, i).expand_as(expertInput)
|
||||
self.output.addcmul_(expertInput, gate)
|
||||
else:
|
||||
if self.batchSize != batchSize:
|
||||
size = [1] * expertInputs.dim()
|
||||
if self.dimG > 0:
|
||||
size[0] = gaterInput.size(0)
|
||||
size[self.dim] = gaterInput.size(self.dimG)
|
||||
self.size = torch.Size(size)
|
||||
self.output.resize_as_(expertInputs.select(self.dim, 0))
|
||||
self.batchSize = batchSize
|
||||
self.backwardSetup = False
|
||||
|
||||
self._gaterView = gaterInput.view(self.size)
|
||||
torch.mul(self._gaterView.expand_as(expertInputs), expertInputs, out=self._expert)
|
||||
torch.sum(self._expert, self.dim, True, out=self.output)
|
||||
self.output.resize_as_(expertInputs.select(self.dim, 0))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
gaterInput, expertInputs = input
|
||||
recursiveResizeAs(self.gradInput, input)
|
||||
gaterGradInput, expertGradInputs = self.gradInput
|
||||
|
||||
# buffers
|
||||
if self._sum is None:
|
||||
self._sum = input[0].new()
|
||||
if self._expertView2 is None:
|
||||
self._expertView2 = input[0].new()
|
||||
if self._expert2 is None:
|
||||
self._expert2 = input[0].new()
|
||||
|
||||
if self.table:
|
||||
if not self.backwardSetup:
|
||||
for i, expertInput in enumerate(expertInputs):
|
||||
expertGradInput = expertGradInputs[i] or expertInput.clone()
|
||||
expertGradInput.resize_as_(expertInput)
|
||||
expertGradInputs[i] = expertGradInput
|
||||
|
||||
gaterGradInput.resize_as_(gaterInput)
|
||||
self.backwardSetup = True
|
||||
|
||||
# like CMulTable, but with broadcasting
|
||||
for i, expertGradInput in enumerate(expertGradInputs):
|
||||
# gater updateGradInput
|
||||
torch.mul(gradOutput, expertInputs[i], out=self._expert)
|
||||
if self.dimG == 0:
|
||||
self._expertView = self._expert.view(-1)
|
||||
else:
|
||||
self._expertView = self._expert.view(gradOutput.size(0), -1)
|
||||
|
||||
torch.sum(self._expertView, self.dimG, True, out=self._sum)
|
||||
if self.dimG == 0:
|
||||
gaterGradInput[i] = self._sum.select(self.dimG, 0)
|
||||
else:
|
||||
gaterGradInput.select(self.dimG, i).copy_(self._sum.select(self.dimG, 0))
|
||||
|
||||
# expert updateGradInput
|
||||
gate = self._gaterView.select(self.dim, i).expand_as(expertGradInput)
|
||||
expertGradInput.mul_(gate, gradOutput)
|
||||
else:
|
||||
if not self.backwardSetup:
|
||||
size2 = list(expertInputs.size())
|
||||
size2[self.dim] = 1
|
||||
self.size2 = torch.Size(size2)
|
||||
gaterGradInput.resize_as_(gaterInput)
|
||||
self.backwardSetup = True
|
||||
|
||||
# gater updateGradInput
|
||||
self._expertView = gradOutput.contiguous().view(torch.Size(self.size2))
|
||||
gradOutput = self._expertView.expand_as(expertInputs)
|
||||
torch.mul(gradOutput, expertInputs, out=self._expert)
|
||||
expert = self._expert.transpose(self.dim, self.dimG)
|
||||
if not expert.is_contiguous():
|
||||
self._expert2.resize_as_(expert)
|
||||
self._expert2.copy_(expert)
|
||||
expert = self._expert2
|
||||
if self.dimG == 0:
|
||||
self._expertView2 = expert.view(gaterInput.size(0), -1)
|
||||
else:
|
||||
self._expertView2 = expert.view(gaterInput.size(0), gaterInput.size(1), -1)
|
||||
|
||||
torch.sum(self._expertView2, self.dimG + 1, True, out=gaterGradInput)
|
||||
gaterGradInput.resize_as_(gaterInput)
|
||||
|
||||
# expert updateGradInput
|
||||
torch.mul(self._gaterView.expand_as(expertInputs), gradOutput, out=expertGradInputs)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
self._gaterView = None
|
||||
self._expert = None
|
||||
self._expertView = None
|
||||
self._sum = None
|
||||
self._expert2 = None
|
||||
self._expertView2 = None
|
||||
return super(MixtureTable, self).type(type, tensorCache)
|
||||
|
||||
def clearState(self, ):
|
||||
clear(self, [
|
||||
'_gaterView',
|
||||
'_expert',
|
||||
'_expertView',
|
||||
'_sum',
|
||||
'_expert2',
|
||||
'_expertView2',
|
||||
])
|
||||
return super(MixtureTable, self).clearState()
|
@ -1,296 +0,0 @@
|
||||
import torch
|
||||
import torch._thnn
|
||||
from .utils import clear, recursiveType
|
||||
|
||||
|
||||
class Module(object):
|
||||
|
||||
def __init__(self):
|
||||
self.gradInput = torch.Tensor()
|
||||
self.output = torch.Tensor()
|
||||
self._type = self.output.type()
|
||||
self._backend = torch._thnn.type2backend[self.output.type()]
|
||||
|
||||
def __repr__(self):
|
||||
return 'nn.' + self.__class__.__name__
|
||||
|
||||
def parameters(self):
|
||||
has_weight = hasattr(self, 'weight') and self.weight is not None
|
||||
has_bias = hasattr(self, 'bias') and self.bias is not None
|
||||
if has_weight and has_bias:
|
||||
return [self.weight, self.bias], [self.gradWeight, self.gradBias]
|
||||
elif has_weight:
|
||||
return [self.weight], [self.gradWeight]
|
||||
elif has_bias:
|
||||
return [self.bias], [self.gradBias]
|
||||
else:
|
||||
return
|
||||
|
||||
def updateOutput(self, input):
|
||||
return self.output
|
||||
|
||||
def forward(self, input):
|
||||
return self.updateOutput(input)
|
||||
|
||||
def backward(self, input, gradOutput, scale=1):
|
||||
self.updateGradInput(input, gradOutput)
|
||||
self.accGradParameters(input, gradOutput, scale)
|
||||
return self.gradInput
|
||||
|
||||
def backwardUpdate(self, input, gradOutput, lr):
|
||||
self.updateGradInput(input, gradOutput)
|
||||
self.accUpdateGradParameters(input, gradOutput, lr)
|
||||
return self.gradInput
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
pass
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
has_weight = hasattr(self, 'weight') and self.weight is not None
|
||||
has_bias = hasattr(self, 'bias') and self.bias is not None
|
||||
if has_weight:
|
||||
gradWeight = self.gradWeight
|
||||
self.gradWeight = self.weight
|
||||
if has_bias:
|
||||
gradBias = self.gradBias
|
||||
self.gradBias = self.bias
|
||||
self.accGradParameters(input, gradOutput, -lr)
|
||||
if has_weight:
|
||||
self.gradWeight = gradWeight
|
||||
if has_bias:
|
||||
self.gradBias = gradBias
|
||||
|
||||
def sharedAccUpdateGradParameters(self, input, gradOutput, lr):
|
||||
if self.parameters():
|
||||
self.zeroGradParameters()
|
||||
self.accGradParameters(input, gradOutput, 1)
|
||||
self.updateParameters(lr)
|
||||
|
||||
def zeroGradParameters(self):
|
||||
params = self.parameters()
|
||||
if params is not None:
|
||||
for grad in params[1]:
|
||||
grad.zero_()
|
||||
|
||||
def updateParameters(self, learningRate):
|
||||
if self.parameters() is not None:
|
||||
params, gradParams = self.parameters()
|
||||
if params:
|
||||
for p, gp in zip(params, gradParams):
|
||||
p.add_(-learningRate, gp)
|
||||
|
||||
def training(self):
|
||||
self.train = True
|
||||
|
||||
def evaluate(self):
|
||||
self.train = False
|
||||
|
||||
# TODO
|
||||
def share(self, mlp, *arg):
|
||||
raise NotImplementedError
|
||||
|
||||
def clone(self, *arg):
|
||||
raise NotImplementedError
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if type is None:
|
||||
return self._type
|
||||
|
||||
tensorCache = tensorCache or {}
|
||||
|
||||
# find all tensors and convert them
|
||||
for key, param in self.__dict__.items():
|
||||
setattr(self, key, recursiveType(param, type, tensorCache))
|
||||
|
||||
self._backend = torch._thnn.type2backend[type]
|
||||
self._type = type
|
||||
return self
|
||||
|
||||
def float(self, *args):
|
||||
return self.type('torch.FloatTensor', *args)
|
||||
|
||||
def double(self, *args):
|
||||
return self.type('torch.DoubleTensor', *args)
|
||||
|
||||
def cuda(self, *args):
|
||||
return self.type('torch.cuda.FloatTensor', *args)
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def write(self, f):
|
||||
raise NotImplementedError
|
||||
|
||||
def read(self, f):
|
||||
raise NotImplementedError
|
||||
|
||||
# This function is not easy to understand. It works as follows:
|
||||
#
|
||||
# - gather all parameter tensors for this module (and children);
|
||||
# count all parameter values (floats)
|
||||
# - create one ginormous memory area (Storage object) with room for all
|
||||
# parameters
|
||||
# - remap each parameter tensor to point to an area within the ginormous
|
||||
# Storage, and copy it there
|
||||
#
|
||||
# It has the effect of making all parameters point to the same memory area,
|
||||
# which is: returned.
|
||||
#
|
||||
# The purpose is to allow operations over all parameters (such as momentum
|
||||
# updates and serialization), but it assumes that all parameters are of
|
||||
# the same type (and, in the case of CUDA, on the same device), which
|
||||
# is not always True. Use for_each() to iterate over this module and
|
||||
# children instead.
|
||||
#
|
||||
# Module._flattenTensorBuffer can be used by other packages (e.g. cunn)
|
||||
# to specify the type of temporary buffers. For example, the temporary
|
||||
# buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage.
|
||||
#
|
||||
# TODO: This logically belongs to torch.Tensor, not nn.
|
||||
_flattenTensorBuffer = {}
|
||||
|
||||
def _flatten(self, parameters=[]):
|
||||
|
||||
# returns True if tensor occupies a contiguous region of memory (no holes)
|
||||
def isCompact(tensor):
|
||||
# isn't it enough to check if strides == size.cumprod(0)?
|
||||
sortedStride, perm = torch.sort(torch.LongTensor(tensor.stride()), 0, True)
|
||||
sortedSize = torch.LongTensor(list(tensor.size())).index_select(0, perm)
|
||||
nRealDim = int(torch.clamp(sortedStride, 0, 1).sum())
|
||||
sortedStride = sortedStride.narrow(0, 0, nRealDim).clone()
|
||||
sortedSize = sortedSize.narrow(0, 0, nRealDim).clone()
|
||||
t = tensor.new().set_(tensor.storage(), 0,
|
||||
tuple(sortedSize),
|
||||
tuple(sortedStride))
|
||||
return t.is_contiguous()
|
||||
|
||||
if not parameters:
|
||||
return torch.Tensor()
|
||||
|
||||
Tensor = parameters[0].new
|
||||
BufferTensor = Module._flattenTensorBuffer.get(type(parameters[0]), Tensor)
|
||||
|
||||
# 1. construct the set of all unique storages referenced by parameter tensors
|
||||
storages = {}
|
||||
num_parameters = 0
|
||||
parameterMeta = []
|
||||
for i, param in enumerate(parameters):
|
||||
storage = param.storage()
|
||||
key = storage._cdata
|
||||
|
||||
if key not in storages:
|
||||
storages[key] = (storage, num_parameters)
|
||||
num_parameters = num_parameters + storage.size()
|
||||
|
||||
parameterMeta.append({
|
||||
'storage_offset': param.storage_offset() + storages[key][1],
|
||||
'size': param.size(),
|
||||
'stride': param.stride()
|
||||
})
|
||||
|
||||
# 2. construct a single tensor that will hold all the parameters
|
||||
flatParameters = BufferTensor(num_parameters).zero_()
|
||||
|
||||
# 3. determine if there are elements in the storage that none of the
|
||||
# parameter tensors reference ('holes')
|
||||
tensorsCompact = True
|
||||
for meta in parameterMeta:
|
||||
tmp = BufferTensor().set_(flatParameters.storage(), meta['storage_offset'], meta['size'], meta['stride'])
|
||||
tmp.fill_(1)
|
||||
tensorsCompact = tensorsCompact and isCompact(tmp)
|
||||
|
||||
maskParameters = flatParameters.byte().clone()
|
||||
compactOffsets = flatParameters.long().cumsum(0)
|
||||
used_parameters = compactOffsets[-1]
|
||||
|
||||
# 4. copy storages into the flattened parameter tensor
|
||||
for storageAndOffset in storages.values():
|
||||
storage, offset = storageAndOffset
|
||||
flatParameters[slice(offset, offset + storage.size())].copy_(Tensor().set_(storage))
|
||||
|
||||
# 5. allow garbage collection
|
||||
storages = None
|
||||
for param in parameters:
|
||||
param.set_()
|
||||
|
||||
# 6. compact the flattened parameters if there were holes
|
||||
if used_parameters != num_parameters:
|
||||
assert tensorsCompact
|
||||
|
||||
flatParameters = BufferTensor(used_parameters).copy_(
|
||||
flatParameters.masked_select(maskParameters))
|
||||
for meta in parameterMeta:
|
||||
meta['storage_offset'] = compactOffsets[meta['storage_offset']]
|
||||
|
||||
if BufferTensor != Tensor:
|
||||
flatParameters = Tensor(flatParameters.nelement()).copy_(flatParameters)
|
||||
|
||||
# 7. fix up the parameter tensors to point at the flattened parameters
|
||||
for param, meta in zip(parameters, parameterMeta):
|
||||
param.set_(flatParameters.storage(),
|
||||
meta['storage_offset'],
|
||||
meta['size'],
|
||||
meta['stride'])
|
||||
|
||||
return flatParameters
|
||||
|
||||
def flattenParameters(self):
|
||||
_params = self.parameters()
|
||||
if _params is None:
|
||||
return
|
||||
parameters, gradParameters = _params
|
||||
p, g = self._flatten(parameters), self._flatten(gradParameters)
|
||||
|
||||
assert p.nelement() == g.nelement()
|
||||
if parameters:
|
||||
for param, grad in zip(parameters, gradParameters):
|
||||
assert param.storage_offset() == grad.storage_offset()
|
||||
|
||||
return p, g
|
||||
|
||||
def apply(self, callback):
|
||||
callback(self)
|
||||
if hasattr(self, 'modules'):
|
||||
for module in self.modules:
|
||||
module.apply(callback)
|
||||
|
||||
def findModules(self, cls, container=None):
|
||||
nodes = []
|
||||
containers = []
|
||||
if isinstance(self, cls):
|
||||
nodes.append(self)
|
||||
containers.append(container)
|
||||
|
||||
# Recurse on nodes with 'modules'
|
||||
if hasattr(self, 'modules'):
|
||||
for child in self.modules:
|
||||
child_nodes, child_containers = child.findModules(cls, self)
|
||||
assert len(child_nodes) == len(child_containers)
|
||||
# add the list items from our child to our list (i.e. return a
|
||||
# flattened table of the return nodes).
|
||||
nodes.extend(child_nodes)
|
||||
containers.extend(child_containers)
|
||||
|
||||
return nodes, containers
|
||||
|
||||
def listModules(self):
|
||||
# include self first
|
||||
modules = [self]
|
||||
if hasattr(self, 'modules'):
|
||||
for child in self.modules:
|
||||
modules.extend(child.listModules())
|
||||
return modules
|
||||
|
||||
def clearState(self):
|
||||
return clear(self, 'output', 'gradInput')
|
||||
|
||||
def replace(self, callback):
|
||||
out = callback(self)
|
||||
# TODO: not out.modules?
|
||||
if hasattr(self, 'modules'):
|
||||
for i, module in enumerate(self.modules):
|
||||
self.modules[i] = module.replace(callback)
|
||||
return out
|
@ -1,33 +0,0 @@
|
||||
import math
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Mul(Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Mul, self).__init__()
|
||||
self.weight = torch.Tensor(1)
|
||||
self.gradWeight = torch.Tensor(1)
|
||||
self.reset()
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
stdv = stdv * math.sqrt(3)
|
||||
else:
|
||||
stdv = 1. / math.sqrt(self.weight.size(0))
|
||||
self.weight.uniform_(-stdv, stdv)
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
self.output.mul_(self.weight[0])
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
self.gradInput.add_(self.weight[0], gradOutput)
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
self.gradWeight[0] = (self.gradWeight[0] +
|
||||
scale * input.contiguous().view(-1).dot(gradOutput.contiguous().view(-1)))
|
@ -1,37 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class MulConstant(Module):
|
||||
|
||||
def __init__(self, constant_scalar, inplace=False):
|
||||
super(MulConstant, self).__init__()
|
||||
self.constant_scalar = constant_scalar
|
||||
self.inplace = inplace
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.inplace:
|
||||
input.mul_(self.constant_scalar)
|
||||
self.output.set_(input)
|
||||
else:
|
||||
self.output.resize_as_(input)
|
||||
self.output.copy_(input)
|
||||
self.output.mul_(self.constant_scalar)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
return
|
||||
|
||||
if self.inplace:
|
||||
gradOutput.mul_(self.constant_scalar)
|
||||
self.gradInput.set_(gradOutput)
|
||||
# restore previous input value
|
||||
input.div_(self.constant_scalar)
|
||||
else:
|
||||
self.gradInput.resize_as_(gradOutput)
|
||||
self.gradInput.copy_(gradOutput)
|
||||
self.gradInput.mul_(self.constant_scalar)
|
||||
|
||||
return self.gradInput
|
@ -1,41 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
from .utils import recursiveResizeAs, recursiveFill, recursiveAdd
|
||||
|
||||
|
||||
class MultiCriterion(Criterion):
|
||||
|
||||
def __init__(self, ):
|
||||
super(MultiCriterion, self).__init__()
|
||||
self.criterions = []
|
||||
self.weights = torch.DoubleStorage()
|
||||
|
||||
def add(self, criterion, weight=1):
|
||||
self.criterions.append(criterion)
|
||||
new_weights = torch.DoubleStorage(len(self.criterions))
|
||||
for i, v in enumerate(self.weights):
|
||||
new_weights[i] = v
|
||||
new_weights[len(self.criterions) - 1] = weight
|
||||
self.weights = new_weights
|
||||
return self
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
self.output = 0
|
||||
for i in range(len(self.criterions)):
|
||||
self.output = self.output + self.weights[i] * self.criterions[i].updateOutput(input, target)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
self.gradInput = recursiveResizeAs(self.gradInput, input)[0]
|
||||
recursiveFill(self.gradInput, 0)
|
||||
for i in range(len(self.criterions)):
|
||||
recursiveAdd(self.gradInput, self.weights[i], self.criterions[i].updateGradInput(input, target))
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type):
|
||||
for criterion in self.criterions:
|
||||
criterion.type(type)
|
||||
|
||||
return super(MultiCriterion, self).type(type)
|
@ -1,41 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class MultiLabelMarginCriterion(Criterion):
|
||||
|
||||
def __init__(self, sizeAverage=True):
|
||||
super(MultiLabelMarginCriterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.isTarget = torch.Tensor()
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
target = target.long()
|
||||
self._backend.MultiLabelMarginCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
self.isTarget,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
target = target.long()
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.MultiLabelMarginCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
self.isTarget,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,41 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
from .Sigmoid import Sigmoid
|
||||
from .BCECriterion import BCECriterion
|
||||
|
||||
|
||||
class MultiLabelSoftMarginCriterion(Criterion):
|
||||
"""
|
||||
A MultiLabel multiclass criterion based on sigmoid:
|
||||
|
||||
the loss is:
|
||||
l(x, y) = - sum_i y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i])
|
||||
where p[i] = exp(x[i]) / (1 + exp(x[i]))
|
||||
|
||||
and with weights:
|
||||
l(x, y) = - sum_i weights[i] (y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i]))
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, weights=None):
|
||||
super(MultiLabelSoftMarginCriterion, self).__init__()
|
||||
self.lsm = Sigmoid()
|
||||
self.nll = BCECriterion(weights)
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
input = input if input.nelement() == 1 else input.squeeze()
|
||||
target = target if target.nelement() == 1 else target.squeeze()
|
||||
self.lsm.updateOutput(input)
|
||||
self.nll.updateOutput(self.lsm.output, target)
|
||||
self.output = self.nll.output
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
size = input.size()
|
||||
input = input if input.nelement() == 1 else input.squeeze()
|
||||
target = target if target.nelement() == 1 else target.squeeze()
|
||||
self.nll.updateGradInput(self.lsm.output, target)
|
||||
self.lsm.updateGradInput(input, self.nll.gradInput)
|
||||
self.gradInput = self.lsm.gradInput.view(size)
|
||||
return self.gradInput
|
@ -1,51 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class MultiMarginCriterion(Criterion):
|
||||
|
||||
def __init__(self, p=1, weights=None, margin=1, sizeAverage=True):
|
||||
super(MultiMarginCriterion, self).__init__()
|
||||
if p != 1 and p != 2:
|
||||
raise ValueError("only p == 1 and p == 2 supported")
|
||||
self.p = p
|
||||
self.margin = margin
|
||||
self.sizeAverage = sizeAverage
|
||||
if weights is not None:
|
||||
assert weights.dim() == 1
|
||||
self.weights = weights
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
target = target.long()
|
||||
self._backend.MultiMarginCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
self.p,
|
||||
self.weights,
|
||||
self.margin,
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
target = target.long()
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.MultiMarginCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
self.p,
|
||||
self.weights,
|
||||
self.margin,
|
||||
)
|
||||
return self.gradInput
|
@ -1,31 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Narrow(Module):
|
||||
|
||||
def __init__(self, dimension, offset, length=1):
|
||||
super(Narrow, self).__init__()
|
||||
self.dimension = dimension
|
||||
self.index = offset
|
||||
self.length = length
|
||||
|
||||
def updateOutput(self, input):
|
||||
length = self.length
|
||||
if length < 0:
|
||||
length = input.size(self.dimension) - self.index + self.length + 1
|
||||
|
||||
output = input.narrow(self.dimension, self.index, length)
|
||||
self.output = self.output.type_as(output)
|
||||
self.output.resize_as_(output).copy_(output)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
length = self.length
|
||||
if length < 0:
|
||||
length = input.size(self.dimension) - self.index + self.length + 1
|
||||
|
||||
self.gradInput = self.gradInput.type_as(input)
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
self.gradInput.narrow(self.dimension, self.index, length).copy_(gradOutput)
|
||||
return self.gradInput
|
@ -1,41 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear, recursiveResizeAs, recursiveFill
|
||||
|
||||
|
||||
class NarrowTable(Module):
|
||||
|
||||
def __init__(self, offset, length=1):
|
||||
super(NarrowTable, self).__init__()
|
||||
self.offset = offset
|
||||
self.length = length
|
||||
self.output = []
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output[:] = [input[self.offset + i] for i in range(self.length)]
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if len(self.gradInput) != len(input):
|
||||
self.gradInput[:] = [None for i in range(len(input))]
|
||||
|
||||
assert len(gradOutput) == self.length
|
||||
for i in range(self.length):
|
||||
self.gradInput[self.offset + i] = gradOutput[i]
|
||||
|
||||
for i in range(len(input)):
|
||||
if i < self.offset or i >= self.offset + self.length:
|
||||
gi = self.gradInput[i]
|
||||
if gi is None:
|
||||
gi = input[i].new()
|
||||
self.gradInput[i] = recursiveResizeAs(gi, input[i])[0]
|
||||
recursiveFill(self.gradInput[i], 0)
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
if not type:
|
||||
return self._type
|
||||
clear(self, 'output', 'gradInput')
|
||||
return super(NarrowTable, self).type(self, type, tensorCache)
|
@ -1,155 +0,0 @@
|
||||
import torch
|
||||
from torch._six import inf
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Normalize(Module):
|
||||
|
||||
def __init__(self, p, eps=1e-10):
|
||||
super(Normalize, self).__init__()
|
||||
assert p > 0
|
||||
self.p = p
|
||||
self.eps = eps
|
||||
|
||||
self._output = None
|
||||
self.norm = None
|
||||
self.buffer = None
|
||||
self._indices = None
|
||||
self.normp = None
|
||||
self._gradInput = None
|
||||
self.cross = None
|
||||
self.buffer2 = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert input.dim() == 2
|
||||
input_size = input.size()
|
||||
|
||||
if self._output is None:
|
||||
self._output = input.new()
|
||||
if self.norm is None:
|
||||
self.norm = input.new()
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
|
||||
self._output.resize_as_(input)
|
||||
|
||||
# specialization for the infinity norm
|
||||
if self.p == inf:
|
||||
if not self._indices:
|
||||
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
|
||||
else torch.LongTensor()
|
||||
|
||||
torch.abs(input, out=self.buffer)
|
||||
torch.max(self._indices, self.buffer, 1, out=self.norm, keepdim=True)
|
||||
self.norm.add_(self.eps)
|
||||
else:
|
||||
if self.normp is None:
|
||||
self.normp = input.new()
|
||||
if self.p % 2 != 0:
|
||||
torch.abs(input, out=self.buffer).pow_(self.p)
|
||||
else:
|
||||
torch.pow(input, self.p, out=self.buffer)
|
||||
|
||||
torch.sum(self.buffer, 1, out=self.normp, keepdim=True).add_(self.eps)
|
||||
torch.pow(self.normp, 1. / self.p, out=self.norm)
|
||||
|
||||
torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output)
|
||||
|
||||
self.output = self._output.view(input_size)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert input.dim() == 2
|
||||
assert gradOutput.dim() == 2
|
||||
|
||||
input_size = input.size()
|
||||
n = input.size(0) # batch size
|
||||
d = input.size(1) # dimensionality of vectors
|
||||
|
||||
if self._gradInput is None:
|
||||
self._gradInput = input.new()
|
||||
if self.cross is None:
|
||||
self.cross = input.new()
|
||||
# compute diagonal term with gradOutput
|
||||
self._gradInput.resize_(n, d)
|
||||
if self.p == inf:
|
||||
# specialization for the inf case
|
||||
torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput)
|
||||
self.buffer.resize_as_(input).zero_()
|
||||
self.cross.resize_(n, 1)
|
||||
torch.gather(input, 1, self._indices, out=self.cross)
|
||||
self.cross.div_(self.norm)
|
||||
self.buffer.scatter_(1, self._indices, self.cross)
|
||||
else:
|
||||
torch.mul(self.normp.view(n, 1).expand(n, d), gradOutput, out=self._gradInput)
|
||||
# small optimizations for different p
|
||||
# buffer = input*|input|^(p-2)
|
||||
# for non-even p, need to add absolute value
|
||||
if self.p % 2 != 0:
|
||||
if self.p < 2:
|
||||
# add eps to avoid possible division by 0
|
||||
torch.abs(input, out=self.buffer).add_(self.eps).pow_(self.p - 2).mul_(input)
|
||||
else:
|
||||
torch.abs(input, out=self.buffer).pow_(self.p - 2).mul_(input)
|
||||
# special case for p == 2, pow(x, 0) = 1
|
||||
elif self.p == 2:
|
||||
self.buffer.copy_(input)
|
||||
else:
|
||||
# p is even and > 2, pow(x, p) is always positive
|
||||
torch.pow(input, self.p - 2, out=self.buffer).mul_(input)
|
||||
|
||||
# compute cross term in two steps
|
||||
self.cross.resize_(n, 1)
|
||||
|
||||
# instead of having a huge temporary matrix (b1*b2),
|
||||
#: the computations as b1*(b2*gradOutput). This avoids redundant
|
||||
# computation and also a huge buffer of size n*d^2
|
||||
if self.buffer2 is None:
|
||||
self.buffer2 = input.new() # nxd
|
||||
torch.mul(input, gradOutput, out=self.buffer2)
|
||||
torch.sum(self.buffer2, 1, out=self.cross, keepdim=True)
|
||||
|
||||
self.buffer.mul_(self.cross.expand_as(self.buffer))
|
||||
self._gradInput.add_(-1, self.buffer)
|
||||
|
||||
# reuse cross buffer for normalization
|
||||
if self.p == inf:
|
||||
torch.mul(self.norm, self.norm, out=self.cross)
|
||||
else:
|
||||
torch.mul(self.normp, self.norm, out=self.cross)
|
||||
|
||||
self._gradInput.div_(self.cross.expand(n, d))
|
||||
|
||||
self.gradInput = self._gradInput.view(input_size)
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
return super(Normalize, self).__repr__() + '({})'.format(self.p)
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
if not type:
|
||||
return self._type
|
||||
# torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
super(Normalize, self).type(type, tensorCache)
|
||||
else:
|
||||
# self._indices must be a LongTensor. Setting it to nil temporarily avoids
|
||||
# unnecessary memory allocations.
|
||||
indices, self._indices = self._indices, None
|
||||
super(Normalize, self).type(type, tensorCache)
|
||||
self._indices = indices.long() if indices else None
|
||||
|
||||
return self
|
||||
|
||||
def clearState(self):
|
||||
clear(self, [
|
||||
'_output',
|
||||
'_indices',
|
||||
'_gradInput',
|
||||
'buffer',
|
||||
'norm',
|
||||
'normp',
|
||||
'cross',
|
||||
])
|
||||
return super(Normalize, self).clearState()
|
@ -1,48 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class PReLU(Module):
|
||||
|
||||
def __init__(self, nOutputPlane=0):
|
||||
super(PReLU, self).__init__()
|
||||
# if no argument provided, use shared model (weight is scalar)
|
||||
self.nOutputPlane = nOutputPlane
|
||||
self.weight = torch.Tensor(nOutputPlane or 1).fill_(0.25)
|
||||
self.gradWeight = torch.Tensor(nOutputPlane or 1)
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.PReLU_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.weight
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.PReLU_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.weight
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
self._backend.PReLU_accGradParameters(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.weight,
|
||||
self.gradWeight,
|
||||
scale
|
||||
)
|
||||
return self.gradWeight
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'gradWeightBuf', 'gradWeightBuf2')
|
||||
return super(PReLU, self).clearState()
|
@ -1,74 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Padding(Module):
|
||||
# pad puts in [pad] amount of [value] over dimension [dim], starting at
|
||||
# index [index] in that dimension. If pad<0, index counts from the left.
|
||||
# If pad>0 index counts from the right index = 1 pads before index 1.
|
||||
# index = 2 pads starting before index 2 and after index 1 in dimension [dim]
|
||||
# When nInputDim is provided, inputs larger than that value will be considered batches
|
||||
# where the actual dim to be padded will be dimension dim + 1.
|
||||
|
||||
def __init__(self, dim, pad, value=0, index=0, nInputDim=0):
|
||||
self.value = value
|
||||
self.index = index
|
||||
self.dim = dim
|
||||
self.pad = pad
|
||||
self.nInputDim = nInputDim
|
||||
self.outputSize = torch.Size()
|
||||
super(Padding, self).__init__()
|
||||
|
||||
def updateOutput(self, input):
|
||||
dim = self.dim
|
||||
if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim:
|
||||
dim = dim + 1
|
||||
|
||||
outputSize = list(input.size())
|
||||
outputSize[dim] += abs(self.pad)
|
||||
self.outputSize = torch.Size(outputSize)
|
||||
|
||||
self.output.resize_(self.outputSize)
|
||||
self.output.fill_(self.value)
|
||||
index = self.index
|
||||
pad = self.pad
|
||||
if pad > 0:
|
||||
index = input.size(dim) - index
|
||||
else:
|
||||
pad = -pad
|
||||
|
||||
if index == 0:
|
||||
self.output.narrow(dim, pad, input.size(dim)).copy_(input)
|
||||
elif index == input.size(dim):
|
||||
self.output.narrow(dim, 0, input.size(dim)).copy_(input)
|
||||
else:
|
||||
self.output.narrow(dim, 0, index).copy_(input.narrow(dim, 0, index))
|
||||
self.output.narrow(dim, index + pad, input.size(dim) -
|
||||
index).copy_(input.narrow(dim, index, input.size(dim) - index))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input)
|
||||
dim = self.dim
|
||||
|
||||
if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim:
|
||||
dim = dim + 1
|
||||
|
||||
index = self.index
|
||||
pad = self.pad
|
||||
if pad > 0:
|
||||
index = input.size(dim) - index
|
||||
else:
|
||||
pad = -pad
|
||||
|
||||
if index == 0:
|
||||
self.gradInput.copy_(gradOutput.narrow(dim, pad, input.size(dim)))
|
||||
elif index == input.size(dim):
|
||||
self.gradInput.copy_(gradOutput.narrow(dim, 0, input.size(dim)))
|
||||
else:
|
||||
self.gradInput.narrow(dim, 0, index).copy_(gradOutput.narrow(dim, 0, index))
|
||||
self.gradInput.narrow(dim, index, input.size(
|
||||
dim) - index).copy_(gradOutput.narrow(dim, index + pad, input.size(dim) - index))
|
||||
|
||||
return self.gradInput
|
@ -1,83 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class PairwiseDistance(Module):
|
||||
|
||||
def __init__(self, p):
|
||||
super(PairwiseDistance, self).__init__()
|
||||
assert p % 1 == 0
|
||||
self.gradInput = []
|
||||
self.diff = torch.Tensor()
|
||||
self.norm = p
|
||||
|
||||
self.outExpand = None
|
||||
self.grad = None
|
||||
self.ones = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_(1)
|
||||
assert input[0].dim() == 2
|
||||
|
||||
if self.diff is None:
|
||||
self.diff = input[0].new()
|
||||
|
||||
torch.add(input[0], -1, input[1], out=self.diff).abs_()
|
||||
|
||||
self.output.resize_(input[0].size(0))
|
||||
self.output.zero_()
|
||||
self.output.add_(self.diff.pow_(self.norm).sum(1, keepdim=False))
|
||||
self.output.pow_(1. / self.norm)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert input[0].dim() == 2
|
||||
|
||||
if len(self.gradInput) != 2:
|
||||
self.gradInput[:] = [None, None]
|
||||
|
||||
if self.gradInput[0] is None:
|
||||
self.gradInput[0] = input[0].new()
|
||||
self.gradInput[0].resize_(input[0].size())
|
||||
if self.gradInput[1] is None:
|
||||
self.gradInput[1] = input[1].new()
|
||||
self.gradInput[1].resize_(input[1].size())
|
||||
self.gradInput[0].copy_(input[0])
|
||||
self.gradInput[0].add_(-1, input[1])
|
||||
|
||||
if self.norm == 1:
|
||||
self.gradInput[0].sign_()
|
||||
else:
|
||||
# Note: derivative of p-norm:
|
||||
# d/dx_k(||x||_p) = (x_k * abs(x_k)^(p-2)) / (||x||_p)^(p-1)
|
||||
if self.norm > 2:
|
||||
self.gradInput[0].mul_(self.gradInput[0].abs().pow_(self.norm - 2))
|
||||
|
||||
if self.outExpand is None:
|
||||
self.outExpand = self.output.new()
|
||||
self.outExpand.resize_(self.output.size(0), 1)
|
||||
self.outExpand.copy_(self.output.view(self.output.size(0), 1))
|
||||
self.outExpand.add_(1e-6) # Prevent divide by zero errors
|
||||
self.outExpand.pow_(-(self.norm - 1))
|
||||
self.gradInput[0].mul_(self.outExpand.expand(self.gradInput[0].size(0),
|
||||
self.gradInput[0].size(1)))
|
||||
|
||||
if self.grad is None:
|
||||
self.grad = gradOutput.new()
|
||||
if self.ones is None:
|
||||
self.ones = gradOutput.new()
|
||||
|
||||
self.grad.resize_as_(input[0]).zero_()
|
||||
self.ones.resize_(input[0].size(1)).fill_(1)
|
||||
|
||||
self.grad.addr_(gradOutput, self.ones)
|
||||
self.gradInput[0].mul_(self.grad)
|
||||
|
||||
self.gradInput[1].zero_().add_(-1, self.gradInput[0])
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'diff', 'outExpand', 'grad', 'ones')
|
||||
return super(PairwiseDistance, self).clearState()
|
@ -1,105 +0,0 @@
|
||||
import torch
|
||||
from .Container import Container
|
||||
|
||||
|
||||
class Parallel(Container):
|
||||
|
||||
def __init__(self, inputDimension, outputDimension):
|
||||
super(Parallel, self).__init__()
|
||||
self.inputDimension = inputDimension
|
||||
self.outputDimension = outputDimension
|
||||
self.totalOutputSize = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
nModule = input.size(self.inputDimension)
|
||||
outputs = []
|
||||
|
||||
for i in range(nModule):
|
||||
currentInput = input.select(self.inputDimension, i)
|
||||
currentOutput = self.modules[i].updateOutput(currentInput)
|
||||
outputs.append(currentOutput)
|
||||
outputSize = currentOutput.size(self.outputDimension)
|
||||
|
||||
if i == 0:
|
||||
totalOutputSize = list(currentOutput.size())
|
||||
else:
|
||||
totalOutputSize[self.outputDimension] += outputSize
|
||||
|
||||
self.totalOutputSize = torch.Size(totalOutputSize)
|
||||
self.output.resize_(self.totalOutputSize)
|
||||
|
||||
offset = 0
|
||||
for i in range(nModule):
|
||||
currentOutput = outputs[i]
|
||||
outputSize = currentOutput.size(self.outputDimension)
|
||||
self.output.narrow(self.outputDimension, offset, outputSize).copy_(currentOutput)
|
||||
offset = offset + currentOutput.size(self.outputDimension)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
nModule = input.size(self.inputDimension)
|
||||
self.gradInput.resize_as_(input)
|
||||
|
||||
offset = 0
|
||||
for i in range(nModule):
|
||||
module = self.modules[i]
|
||||
currentInput = input.select(self.inputDimension, i)
|
||||
currentOutput = module.output
|
||||
outputSize = currentOutput.size(self.outputDimension)
|
||||
currentGradOutput = gradOutput.narrow(self.outputDimension, offset, outputSize)
|
||||
|
||||
currentGradInput = module.updateGradInput(currentInput, currentGradOutput)
|
||||
|
||||
self.gradInput.select(self.inputDimension, i).copy_(currentGradInput)
|
||||
offset = offset + outputSize
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
nModule = input.size(self.inputDimension)
|
||||
|
||||
offset = 0
|
||||
for i in range(nModule):
|
||||
module = self.modules[i]
|
||||
currentOutput = module.output
|
||||
outputSize = currentOutput.size(self.outputDimension)
|
||||
|
||||
module.accGradParameters(
|
||||
input.select(self.inputDimension, i),
|
||||
gradOutput.narrow(self.outputDimension, offset, outputSize),
|
||||
scale)
|
||||
offset += outputSize
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
nModule = input.size(self.inputDimension)
|
||||
|
||||
offset = 0
|
||||
for i in range(nModule):
|
||||
module = self.modules[i]
|
||||
currentOutput = module.output
|
||||
module.accupdateGradParameters(
|
||||
input.select(self.inputDimension, i),
|
||||
gradOutput.narrow(self.outputDimension, offset, currentOutput.size(self.outputDimension)),
|
||||
lr)
|
||||
offset = offset + currentOutput.size(self.outputDimension)
|
||||
|
||||
def __repr__(self):
|
||||
tab = ' '
|
||||
line = '\n'
|
||||
next = ' |`-> '
|
||||
ext = ' | '
|
||||
extlast = ' '
|
||||
last = ' ... -> '
|
||||
res = torch.typename(self)
|
||||
res += ' {' + line + tab + 'input'
|
||||
for i in range(len(self.modules)):
|
||||
if i == len(self.modules) - 1:
|
||||
res += line + tab + next + '(' + str(i) + '): ' + \
|
||||
str(self.modules[i]).replace(line, line + tab + extlast)
|
||||
else:
|
||||
res += line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + ext)
|
||||
|
||||
res += line + tab + last + 'output'
|
||||
res += line + '}'
|
||||
return res
|
@ -1,39 +0,0 @@
|
||||
import torch
|
||||
from .Criterion import Criterion
|
||||
from .utils import recursiveResizeAs, recursiveFill, recursiveAdd
|
||||
|
||||
|
||||
class ParallelCriterion(Criterion):
|
||||
|
||||
def __init__(self, repeatTarget=False):
|
||||
super(ParallelCriterion, self).__init__()
|
||||
self.criterions = []
|
||||
self.weights = []
|
||||
self.gradInput = []
|
||||
self.repeatTarget = repeatTarget
|
||||
|
||||
def add(self, criterion, weight=1):
|
||||
self.criterions.append(criterion)
|
||||
self.weights.append(weight)
|
||||
return self
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
self.output = 0
|
||||
for i, criterion in enumerate(self.criterions):
|
||||
current_target = target if self.repeatTarget else target[i]
|
||||
self.output += self.weights[i] * criterion.updateOutput(input[i], current_target)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
self.gradInput = recursiveResizeAs(self.gradInput, input)[0]
|
||||
recursiveFill(self.gradInput, 0)
|
||||
for i, criterion in enumerate(self.criterions):
|
||||
current_target = target if self.repeatTarget else target[i]
|
||||
recursiveAdd(self.gradInput[i], self.weights[i], criterion.updateGradInput(input[i], current_target))
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
self.gradInput = []
|
||||
return super(ParallelCriterion, self).type(type, tensorCache)
|
@ -1,60 +0,0 @@
|
||||
import torch
|
||||
from .Container import Container
|
||||
|
||||
|
||||
class ParallelTable(Container):
|
||||
|
||||
def __init__(self, ):
|
||||
super(ParallelTable, self).__init__()
|
||||
self.modules = []
|
||||
self.output = []
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
for i in range(len(self.modules)):
|
||||
tmp = self.modules[i].updateOutput(input[i])
|
||||
if len(self.output) <= i:
|
||||
self.output.append(tmp)
|
||||
else:
|
||||
self.output[i] = tmp
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
for i, module in enumerate(self.modules):
|
||||
tmp = module.updateGradInput(input[i], gradOutput[i])
|
||||
if len(self.gradInput) <= i:
|
||||
self.gradInput.append(tmp)
|
||||
else:
|
||||
self.gradInput[i] = tmp
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
for i, module in enumerate(self.modules):
|
||||
module.accGradParameters(input[i], gradOutput[i], scale)
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr=1):
|
||||
for i, module in enumerate(self.modules):
|
||||
module.accUpdateGradParameters(input[i], gradOutput[i], lr)
|
||||
|
||||
def __repr__(self):
|
||||
tab = ' '
|
||||
line = '\n'
|
||||
next = ' |`-> '
|
||||
ext = ' | '
|
||||
extlast = ' '
|
||||
last = ' ... -> '
|
||||
res = torch.typename(self)
|
||||
res = res + ' {' + line + tab + 'input'
|
||||
for i in range(len(self.modules)):
|
||||
if i == len(self.modules) - 1:
|
||||
res = res + line + tab + next + '(' + str(i) + '): ' + \
|
||||
str(self.modules[i]).replace(line, line + tab + extlast)
|
||||
else:
|
||||
res = res + line + tab + next + '(' + str(i) + '): ' + \
|
||||
str(self.modules[i]).replace(line, line + tab + ext)
|
||||
|
||||
res = res + line + tab + last + 'output'
|
||||
res = res + line + '}'
|
||||
return res
|
@ -1,115 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .Identity import Identity
|
||||
from .LookupTable import LookupTable
|
||||
from .Sequential import Sequential
|
||||
from .ParallelTable import ParallelTable
|
||||
from .MM import MM
|
||||
|
||||
|
||||
class PartialLinear(Module):
|
||||
"""
|
||||
PartialLinear is a Linear layer that allows the user to a set a collection of
|
||||
column indices. When the column indices are set, the layer will behave like a
|
||||
Linear layer that only has those columns. Meanwhile, all parameters are
|
||||
preserved, so resetting the PartialLinear layer will result in a module that
|
||||
behaves just like a regular Linear layer.
|
||||
|
||||
This module is useful, for instance, when you want to: forward-backward on
|
||||
only a subset of a Linear layer during training but use the full Linear layer
|
||||
at test time.
|
||||
"""
|
||||
|
||||
def __init__(self, inputsize, outputsize, bias=True):
|
||||
super(PartialLinear, self).__init__()
|
||||
|
||||
# define the layer as a small network:
|
||||
pt = ParallelTable()
|
||||
pt.add(Identity()).add(LookupTable(outputsize, inputsize))
|
||||
self.network = Sequential().add(pt).add(MM(False, True))
|
||||
if bias:
|
||||
self.bias = torch.zeros(1, outputsize)
|
||||
self.gradBias = torch.zeros(1, outputsize)
|
||||
else:
|
||||
self.bias = self.gradBias = None
|
||||
|
||||
# set partition:
|
||||
self.inputsize = inputsize
|
||||
self.outputsize = outputsize
|
||||
self.allcolumns = torch.arange(0, self.outputsize).long()
|
||||
self.resetPartition()
|
||||
self.addBuffer = None
|
||||
self.buffer = None
|
||||
|
||||
def setPartition(self, indices):
|
||||
self.partition = indices.type(self.allcolumns.type())
|
||||
return self
|
||||
|
||||
def resetPartition(self):
|
||||
self.partition = self.allcolumns
|
||||
return self
|
||||
|
||||
def parameters(self):
|
||||
return [self.network.get(0).get(1).weight, self.bias], \
|
||||
[self.network.get(0).get(1).gradWeight, self.gradBias]
|
||||
# should return only the relevant partition?
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.set_(self.network.forward([input, self.partition]))
|
||||
if self.bias is not None:
|
||||
self.output.add_(torch.index_select(self.bias, 1, self.partition).expand_as(self.output))
|
||||
if self.addBuffer is None:
|
||||
self.addBuffer = input.new()
|
||||
if self.addBuffer.nelement() != input.size(0):
|
||||
self.addBuffer.resize_(input.size(0)).fill_(1)
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is not None:
|
||||
self.network.updateGradInput([input, self.partition], gradOutput)
|
||||
self.gradInput.set_(self.network.gradInput[0])
|
||||
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
self.network.accGradParameters([input, self.partition], gradOutput, scale)
|
||||
if self.bias is not None:
|
||||
if self.buffer is None:
|
||||
self.buffer = input.new()
|
||||
self.buffer.resize_(gradOutput.size(1))
|
||||
torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
|
||||
self.gradBias.index_add_(
|
||||
1, self.partition, self.buffer.view(1, self.buffer.nelement())
|
||||
)
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
gradWeight = self.network.get(0).get(1).gradWeight
|
||||
gradBias = self.gradBias
|
||||
self.network.get(0).get(1).gradWeight = self.network.get(0).get(1).weight
|
||||
self.gradBias = self.bias
|
||||
self.accGradParameters(input, gradOutput, -lr)
|
||||
self.network.get(0).get(1).gradWeight = gradWeight
|
||||
self.gradBias = gradBias
|
||||
|
||||
def zeroGradParameters(self):
|
||||
self.network.zeroGradParameters()
|
||||
self.gradBias.zero_()
|
||||
|
||||
def updateParameters(self, learningRate):
|
||||
self.network.updateParameters(learningRate)
|
||||
self.bias._add(-learningRate, self.gradBias)
|
||||
|
||||
def type(self, type=None, tensorCache=None):
|
||||
result = super(PartialLinear, self).type(type, tensorCache)
|
||||
self.partition = self.partition.long()
|
||||
self.allcolumns = self.allcolumns.long()
|
||||
if type == 'torch.cuda.FloatTensor':
|
||||
self.allcolumns = self.allcolumns.cuda()
|
||||
self.partition = self.partition.cuda()
|
||||
return result
|
||||
|
||||
def __repr__(self):
|
||||
return super(ParallelTable, self).__repr__() + \
|
||||
'({} -> {})'.format(self.inputsize, self.outputsize) + \
|
||||
' without bias' if self.bias is None else ''
|
@ -1,20 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Power(Module):
|
||||
|
||||
def __init__(self, p):
|
||||
super(Power, self).__init__()
|
||||
self.pow = p
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output.resize_as_(input).copy_(input)
|
||||
self.output.pow_(self.pow)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input).copy_(input)
|
||||
self.gradInput.pow_(self.pow - 1)
|
||||
self.gradInput.mul_(gradOutput).mul_(self.pow)
|
||||
return self.gradInput
|
@ -1,51 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class RReLU(Module):
|
||||
|
||||
def __init__(self, lower=1. / 8, upper=1. / 3, inplace=False):
|
||||
super(RReLU, self).__init__()
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.inplace = inplace
|
||||
|
||||
assert self.lower <= self.upper and self.lower >= 0 and self.upper >= 0
|
||||
self.noise = torch.Tensor()
|
||||
self.train = True
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.RReLU_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.noise,
|
||||
self.lower,
|
||||
self.upper,
|
||||
self.train,
|
||||
self.inplace,
|
||||
torch.default_generator if not input.is_cuda else 0
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.RReLU_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.noise,
|
||||
self.lower,
|
||||
self.upper,
|
||||
self.train,
|
||||
self.inplace
|
||||
)
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
return super(RReLU, self).__repr__() + '({:.4f}, {:.4f})'.format(self.lower, self.upper)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'noise')
|
||||
return super(RReLU, self).clearState()
|
@ -1,8 +0,0 @@
|
||||
import torch
|
||||
from .Threshold import Threshold
|
||||
|
||||
|
||||
class ReLU(Threshold):
|
||||
|
||||
def __init__(self, inplace=False):
|
||||
super(ReLU, self).__init__(0, 0, inplace)
|
@ -1,28 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class ReLU6(Module):
|
||||
|
||||
def __init__(self, inplace=False):
|
||||
super(ReLU6, self).__init__()
|
||||
self.inplace = inplace
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.HardTanh_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
0, 6, self.inplace
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.HardTanh_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
0, 6, self.inplace
|
||||
)
|
||||
return self.gradInput
|
@ -1,33 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Replicate(Module):
|
||||
|
||||
def __init__(self, nf, dim=0):
|
||||
super(Replicate, self).__init__()
|
||||
self.nfeatures = nf
|
||||
self.dim = dim
|
||||
assert self.dim >= 0
|
||||
|
||||
def updateOutput(self, input):
|
||||
assert self.dim < input.dim()
|
||||
|
||||
size = list(input.size())
|
||||
size.insert(self.dim, self.nfeatures)
|
||||
|
||||
stride = list(input.stride())
|
||||
stride.insert(self.dim, 0)
|
||||
|
||||
self.output.set_(input.storage(), input.storage_offset(),
|
||||
torch.Size(size), tuple(stride))
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput.resize_as_(input).zero_()
|
||||
size = list(input.size())
|
||||
size.insert(self.dim, 1)
|
||||
|
||||
gradInput = self.gradInput.view(*size)
|
||||
torch.sum(gradOutput, self.dim, True, out=gradInput)
|
||||
return self.gradInput
|
@ -1,53 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class Reshape(Module):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(Reshape, self).__init__()
|
||||
|
||||
if len(args) == 0 and isinstance(args[0], torch.Size):
|
||||
self.size = args[0]
|
||||
else:
|
||||
self.size = torch.Size(args)
|
||||
|
||||
self.nelement = 1
|
||||
for s in self.size:
|
||||
self.nelement *= s
|
||||
|
||||
self._input = None
|
||||
self._gradOutput = None
|
||||
|
||||
def updateOutput(self, input):
|
||||
if not input.is_contiguous():
|
||||
if self._input is None:
|
||||
self._input = input.new()
|
||||
self._input.resize_as_(input)
|
||||
self._input.copy_(input)
|
||||
input = self._input
|
||||
|
||||
batchsize = [input.size(0)] + list(self.size)
|
||||
self.output = input.view(torch.Size(batchsize))
|
||||
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if not gradOutput.is_contiguous():
|
||||
if self._gradOutput is None:
|
||||
self._gradOutput = gradOutput.new()
|
||||
self._gradOutput.resize_as_(gradOutput)
|
||||
self._gradOutput.copy_(gradOutput)
|
||||
gradOutput = self._gradOutput
|
||||
|
||||
self.gradInput = gradOutput.view_as(input)
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
return super(Reshape, self).__repr__() + \
|
||||
'({})'.format('x'.join(map(lambda x: str(x), self.size)))
|
||||
|
||||
def clearState(self):
|
||||
clear(self, '_input', '_gradOutput')
|
||||
return super(Reshape, self).clearState()
|
@ -1,23 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Select(Module):
|
||||
|
||||
def __init__(self, dimension, index):
|
||||
super(Select, self).__init__()
|
||||
self.dimension = dimension
|
||||
self.index = index
|
||||
|
||||
def updateOutput(self, input):
|
||||
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
|
||||
output = input.select(self.dimension, index)
|
||||
self.output.resize_as_(output)
|
||||
return self.output.copy_(output)
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
|
||||
self.gradInput.resize_as_(input)
|
||||
self.gradInput.zero_()
|
||||
self.gradInput.select(self.dimension, index).copy_(gradOutput)
|
||||
return self.gradInput
|
@ -1,56 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import recursiveCopy, clear
|
||||
|
||||
|
||||
class SelectTable(Module):
|
||||
|
||||
def __init__(self, index):
|
||||
super(SelectTable, self).__init__()
|
||||
self.index = index
|
||||
self.gradInput = []
|
||||
|
||||
def updateOutput(self, input):
|
||||
# handle negative indices
|
||||
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
|
||||
assert len(input) > index
|
||||
self.output = input[index]
|
||||
return self.output
|
||||
|
||||
def _zeroTableCopy(self, l1, l2):
|
||||
for i, v in enumerate(l2):
|
||||
if isinstance(v, list):
|
||||
if len(l1) > i:
|
||||
l1[i] = self._zeroTableCopy(l1[i], l2[i])
|
||||
else:
|
||||
l1.append(self._zeroTableCopy([], l2[i]))
|
||||
else:
|
||||
if i >= len(l1):
|
||||
l1.append(v.new().resize_as_(v).zero_())
|
||||
else:
|
||||
l1[i].resize_as_(v)
|
||||
l1[i].zero_()
|
||||
del l1[len(l2):]
|
||||
return l1
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
# make gradInput a zeroed copy of input
|
||||
self._zeroTableCopy(self.gradInput, input)
|
||||
# handle negative indices
|
||||
index = self.index if self.index >= 0 else input.size(self.dimension) + self.index
|
||||
# copy into gradInput[index] (necessary for variable sized inputs)
|
||||
assert self.gradInput[index] is not None
|
||||
recursiveCopy(self.gradInput[index], gradOutput)
|
||||
return self.gradInput
|
||||
|
||||
def type(self, type, tensorCache=None):
|
||||
del self.gradInput[:]
|
||||
if isinstance(self.output, list):
|
||||
del self.output[:]
|
||||
return super(SelectTable, self).type(type, tensorCache)
|
||||
|
||||
def __repr__(self):
|
||||
return super(SelectTable, self).__repr__() + '({})'.format(self.index)
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'gradInput')
|
@ -1,86 +0,0 @@
|
||||
import torch
|
||||
from .Container import Container
|
||||
|
||||
|
||||
class Sequential(Container):
|
||||
|
||||
def __len__(self):
|
||||
return len(self.modules)
|
||||
|
||||
def add(self, module):
|
||||
if len(self.modules) == 0:
|
||||
self.gradInput = module.gradInput
|
||||
|
||||
self.modules.append(module)
|
||||
self.output = module.output
|
||||
return self
|
||||
|
||||
def insert(self, module, index):
|
||||
self.modules.insert(module, index)
|
||||
self.output = self.modules[-1].output
|
||||
self.gradInput = self.modules[0].gradInput
|
||||
|
||||
def remove(self, index=-1):
|
||||
del self.modules[index]
|
||||
|
||||
if len(self.modules) > 0:
|
||||
self.output = self.modules[-1].output
|
||||
self.gradInput = self.modules[0].gradInput
|
||||
else:
|
||||
self.output = torch.Tensor()
|
||||
self.gradInput = torch.Tensor()
|
||||
|
||||
def updateOutput(self, input):
|
||||
currentOutput = input
|
||||
for i, module in enumerate(self.modules):
|
||||
currentOutput = module.updateOutput(currentOutput)
|
||||
self.output = currentOutput
|
||||
return self.output
|
||||
|
||||
def _iter_with_prev(self):
|
||||
return zip(self.modules[-2::-1], self.modules[-1:0:-1])
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
currentGradOutput = gradOutput
|
||||
for prev, current in self._iter_with_prev():
|
||||
currentGradOutput = current.updateGradInput(prev.output, currentGradOutput)
|
||||
self.gradInput = self.modules[0].updateGradInput(input, currentGradOutput)
|
||||
return self.gradInput
|
||||
|
||||
def accGradParameters(self, input, gradOutput, scale=1):
|
||||
currentGradOutput = gradOutput
|
||||
for prev, current in self._iter_with_prev():
|
||||
current.accGradParameters(prev.output, currentGradOutput, scale)
|
||||
currentGradOutput = current.gradInput
|
||||
self.modules[0].accGradParameters(input, currentGradOutput, scale)
|
||||
|
||||
def backward(self, input, gradOutput, scale=1):
|
||||
currentGradOutput = gradOutput
|
||||
for prev, current in self._iter_with_prev():
|
||||
currentGradOutput = current.backward(prev.output, currentGradOutput, scale)
|
||||
# currentModule.gradInput = currentGradOutput
|
||||
self.gradInput = self.modules[0].backward(input, currentGradOutput, scale)
|
||||
return self.gradInput
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
currentGradOutput = gradOutput
|
||||
for prev, current in self._iter_with_prev():
|
||||
current.accUpdateGradParameters(prev.output, currentGradOutput, lr)
|
||||
currentGradOutput = current.gradInput
|
||||
self.modules[0].accUpdateGradParameters(input, currentGradOutput, lr)
|
||||
|
||||
def __repr__(self):
|
||||
tab = ' '
|
||||
line = '\n'
|
||||
next = ' -> '
|
||||
res = 'nn.Sequential'
|
||||
res = res + ' {' + line + tab + '[input'
|
||||
for i in range(len(self.modules)):
|
||||
res = res + next + '(' + str(i) + ')'
|
||||
|
||||
res = res + next + 'output]'
|
||||
for i in range(len(self.modules)):
|
||||
res = res + line + tab + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab)
|
||||
|
||||
res = res + line + '}'
|
||||
return res
|
@ -1,22 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class Sigmoid(Module):
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.Sigmoid_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.Sigmoid_updateGradInput(
|
||||
self._backend.library_state,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.output
|
||||
)
|
||||
return self.gradInput
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class SmoothL1Criterion(Criterion):
|
||||
|
||||
def __init__(self, sizeAverage=True):
|
||||
super(SmoothL1Criterion, self).__init__()
|
||||
self.sizeAverage = sizeAverage
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.SmoothL1Criterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.SmoothL1Criterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.functional import _Reduction
|
||||
from .Criterion import Criterion
|
||||
|
||||
|
||||
class SoftMarginCriterion(Criterion):
|
||||
|
||||
def __init__(self, ):
|
||||
super(SoftMarginCriterion, self).__init__()
|
||||
self.sizeAverage = True
|
||||
self.output_tensor = None
|
||||
|
||||
def updateOutput(self, input, target):
|
||||
if self.output_tensor is None:
|
||||
self.output_tensor = input.new(1)
|
||||
self._backend.SoftMarginCriterion_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
self.output = self.output_tensor[0].item()
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.SoftMarginCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
_Reduction.legacy_get_enum(self.sizeAverage, True, emit_warning=False),
|
||||
)
|
||||
return self.gradInput
|
@ -1,25 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class SoftMax(Module):
|
||||
|
||||
def __init__(self, dim=None):
|
||||
super(SoftMax, self).__init__()
|
||||
if dim is not None:
|
||||
self.dim = dim
|
||||
|
||||
def _get_dim(self, input):
|
||||
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
|
||||
|
||||
def updateOutput(self, input):
|
||||
self.output = torch.softmax(input, self._get_dim(input))
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self.gradInput = torch.softmax_backward_data(
|
||||
gradOutput,
|
||||
self.output,
|
||||
self._get_dim(input),
|
||||
input)
|
||||
return self.gradInput
|
@ -1,43 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
|
||||
class SoftMin(Module):
|
||||
|
||||
def __init__(self, dim=None):
|
||||
super(SoftMin, self).__init__()
|
||||
self.mininput = None
|
||||
if dim is not None:
|
||||
self.dim = dim
|
||||
|
||||
def _get_dim(self, input):
|
||||
return getattr(self, 'dim', 0 if input.dim() == 1 or input.dim() == 3 else 1)
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.mininput is None:
|
||||
self.mininput = input.new()
|
||||
self.mininput.resize_as_(input).copy_(input).mul_(-1)
|
||||
self.output = torch.softmax(
|
||||
self.mininput,
|
||||
self._get_dim(input)
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.mininput is None:
|
||||
self.mininput = input.new()
|
||||
self.mininput.resize_as_(input).copy_(input).mul_(-1)
|
||||
self.gradInput = torch.softmax_backward_data(
|
||||
gradOutput,
|
||||
self.output,
|
||||
self._get_dim(input),
|
||||
self.mininput
|
||||
)
|
||||
|
||||
self.gradInput.mul_(-1)
|
||||
return self.gradInput
|
||||
|
||||
def clearState(self):
|
||||
clear(self, 'mininput')
|
||||
return super(SoftMin, self).clearState()
|
@ -1,38 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class SoftPlus(Module):
|
||||
|
||||
def __init__(self, beta=1, threshold=20):
|
||||
super(SoftPlus, self).__init__()
|
||||
self.beta = beta # Beta controls sharpness of transfer function
|
||||
self.threshold = threshold # Avoid floating point issues with exp(x), x>20
|
||||
|
||||
def updateOutput(self, input):
|
||||
# f(x) = 1/beta * log(1 + exp(beta * x))
|
||||
self._backend.SoftPlus_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.beta,
|
||||
self.threshold
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
# d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
|
||||
# SINCE
|
||||
# y = (1/k)*log(1+exp(k*x)) #> x = (1/k)*log(exp(k*y)-1)
|
||||
# THEREFORE:
|
||||
# d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
|
||||
self._backend.SoftPlus_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.output,
|
||||
self.beta,
|
||||
self.threshold
|
||||
)
|
||||
return self.gradInput
|
@ -1,28 +0,0 @@
|
||||
import torch
|
||||
from .Module import Module
|
||||
|
||||
|
||||
class SoftShrink(Module):
|
||||
|
||||
def __init__(self, lambd=0.5):
|
||||
super(SoftShrink, self).__init__()
|
||||
self.lambd = lambd
|
||||
|
||||
def updateOutput(self, input):
|
||||
self._backend.SoftShrink_updateOutput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
self.output,
|
||||
self.lambd
|
||||
)
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.SoftShrink_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.lambd
|
||||
)
|
||||
return self.gradInput
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user