mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-23 14:59:34 +08:00
Compare commits
70 Commits
Author | SHA1 | Date | |
---|---|---|---|
aaf41c61a6 | |||
dd844f741b | |||
7117a9012e | |||
1bdc28161a | |||
5e150caf38 | |||
c0c62d099a | |||
b9ece39685 | |||
7c44506441 | |||
937ba581d7 | |||
2ae54f1194 | |||
a217fefee1 | |||
5221745c21 | |||
000ca44b16 | |||
8f3d44033b | |||
7cc14c595a | |||
797544c47a | |||
0426f2f3ec | |||
336eeee895 | |||
593f867e3e | |||
385913be1c | |||
6aaa14f5fe | |||
07f5b21ef1 | |||
e454870396 | |||
2822013437 | |||
72c1982734 | |||
0de2ea305a | |||
d899385a3d | |||
c6d6cbe8a6 | |||
85e82e85d8 | |||
a1534cc37d | |||
8c8dc791ef | |||
63edca44f2 | |||
8d90ab2d9b | |||
bd5303010d | |||
16d2c3d7b3 | |||
407a92dc26 | |||
0a893abc7b | |||
34fa5e0dc7 | |||
712686ce91 | |||
518864a7e0 | |||
d9dccfdd71 | |||
4d37ef878c | |||
126e77d5c6 | |||
53eec78bea | |||
a4edaec81a | |||
92481b59d3 | |||
6c77fa9121 | |||
aeb7a72620 | |||
73d232ee45 | |||
c0c65bf915 | |||
f6cee952af | |||
e74184f679 | |||
3884d36176 | |||
e7c6886a00 | |||
ed8e92f63d | |||
fb97df5d65 | |||
e9b05c71b4 | |||
7926324385 | |||
1527b37c26 | |||
de4659659b | |||
a96a8c8336 | |||
691aa19b88 | |||
6b07dc9e22 | |||
ac9312e9f8 | |||
91a17b702b | |||
a9785bba44 | |||
fc354a0d6e | |||
262611fcd3 | |||
b8a34f3033 | |||
6328981fcf |
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,6 +2,7 @@ build/
|
||||
dist/
|
||||
torch.egg-info/
|
||||
*/**/__pycache__
|
||||
torch/version.py
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
torch/lib/*.so*
|
||||
torch/lib/*.dylib*
|
||||
|
50
setup.py
50
setup.py
@ -1,6 +1,8 @@
|
||||
from setuptools import setup, Extension, distutils, Command, find_packages
|
||||
import setuptools.command.build_ext
|
||||
import setuptools.command.install
|
||||
import setuptools.command.develop
|
||||
import setuptools.command.build_py
|
||||
import distutils.unixccompiler
|
||||
import distutils.command.build
|
||||
import distutils.command.clean
|
||||
@ -94,6 +96,28 @@ class build_module(Command):
|
||||
self.run_command('build_ext')
|
||||
|
||||
|
||||
class build_py(setuptools.command.build_py.build_py):
|
||||
|
||||
def run(self):
|
||||
self.create_version_file()
|
||||
setuptools.command.build_py.build_py.run(self)
|
||||
|
||||
@staticmethod
|
||||
def create_version_file():
|
||||
global version, cwd
|
||||
print('-- Building version ' + version)
|
||||
version_path = os.path.join(cwd, 'torch', 'version.py')
|
||||
with open(version_path, 'w') as f:
|
||||
f.write("__version__ = '{}'\n".format(version))
|
||||
|
||||
|
||||
class develop(setuptools.command.develop.develop):
|
||||
|
||||
def run(self):
|
||||
build_py.create_version_file()
|
||||
setuptools.command.develop.develop.run(self)
|
||||
|
||||
|
||||
class build_ext(setuptools.command.build_ext.build_ext):
|
||||
|
||||
def run(self):
|
||||
@ -220,14 +244,23 @@ main_sources = [
|
||||
"torch/csrc/Exceptions.cpp",
|
||||
"torch/csrc/Tensor.cpp",
|
||||
"torch/csrc/Storage.cpp",
|
||||
"torch/csrc/DynamicTypes.cpp",
|
||||
"torch/csrc/byte_order.cpp",
|
||||
"torch/csrc/utils.cpp",
|
||||
"torch/csrc/utils/object_ptr.cpp",
|
||||
"torch/csrc/allocators.cpp",
|
||||
"torch/csrc/serialization.cpp",
|
||||
"torch/csrc/autograd/init.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/engine.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/grad_buffer.cpp",
|
||||
"torch/csrc/autograd/python_function.cpp",
|
||||
"torch/csrc/autograd/python_cpp_function.cpp",
|
||||
"torch/csrc/autograd/python_variable.cpp",
|
||||
"torch/csrc/autograd/python_engine.cpp",
|
||||
"torch/csrc/autograd/functions/batch_normalization.cpp",
|
||||
"torch/csrc/autograd/functions/init.cpp",
|
||||
"torch/csrc/nn/THNN_generic.cpp",
|
||||
]
|
||||
|
||||
@ -266,6 +299,7 @@ if WITH_CUDA:
|
||||
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
|
||||
extra_compile_args += ['-DWITH_CUDA']
|
||||
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
|
||||
main_libraries += ['cudart']
|
||||
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
|
||||
main_sources += [
|
||||
"torch/csrc/cuda/Module.cpp",
|
||||
@ -352,18 +386,28 @@ if WITH_CUDA:
|
||||
)
|
||||
extensions.append(THCUNN)
|
||||
|
||||
version = "0.1"
|
||||
version = '0.1.9'
|
||||
if os.getenv('PYTORCH_BUILD_VERSION'):
|
||||
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
|
||||
version = os.getenv('PYTORCH_BUILD_VERSION') \
|
||||
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
|
||||
else:
|
||||
try:
|
||||
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
|
||||
version += '+' + sha[:7]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
|
||||
setup(name="torch", version=version,
|
||||
ext_modules=extensions,
|
||||
cmdclass={
|
||||
'build': build,
|
||||
'build_py': build_py,
|
||||
'build_ext': build_ext,
|
||||
'build_deps': build_deps,
|
||||
'build_module': build_module,
|
||||
'develop': develop,
|
||||
'install': install,
|
||||
'clean': clean,
|
||||
},
|
||||
|
@ -2,11 +2,13 @@ import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch.autograd import Variable
|
||||
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
|
||||
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous, \
|
||||
freeze_rng_state
|
||||
import torch.backends.cudnn
|
||||
|
||||
# tarfile module tries to obtain a file object name in python 3.3
|
||||
@ -336,7 +338,8 @@ class NNTestCase(TestCase):
|
||||
|
||||
def _zero_grad_input(self, input):
|
||||
if isinstance(input, Variable):
|
||||
input.grad.data.zero_()
|
||||
if input.requires_grad:
|
||||
input.grad.data.zero_()
|
||||
elif torch.is_tensor(input):
|
||||
return
|
||||
else:
|
||||
@ -516,6 +519,8 @@ class ModuleTest(TestBase):
|
||||
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
|
||||
test_case.assertEqual(out, expected_out)
|
||||
|
||||
self.test_noncontig(test_case, module, input)
|
||||
|
||||
# TODO: do this with in-memory files as soon as torch.save will support it
|
||||
with TemporaryFile() as f:
|
||||
test_case._forward(module, input)
|
||||
@ -526,6 +531,51 @@ class ModuleTest(TestBase):
|
||||
|
||||
self._do_test(test_case, module, input)
|
||||
|
||||
def noncontiguize(self, obj):
|
||||
if isinstance(obj, list):
|
||||
return [self.noncontiguize(o) for o in obj]
|
||||
tensor = obj.data if isinstance(obj, Variable) else obj
|
||||
ndim = tensor.dim()
|
||||
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
|
||||
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
|
||||
if isinstance(obj, Variable):
|
||||
return Variable(noncontig, requires_grad=obj.requires_grad)
|
||||
return noncontig
|
||||
|
||||
def test_noncontig(self, test_case, module, input):
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(input)
|
||||
with freeze_rng_state():
|
||||
output = test_case._forward(module, input)
|
||||
grad_output = output
|
||||
if isinstance(grad_output, Variable):
|
||||
grad_output = grad_output.data.clone()
|
||||
else:
|
||||
grad_output = grad_output.clone()
|
||||
output = output.clone()
|
||||
grad_output.normal_()
|
||||
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
|
||||
d_param = deepcopy(test_case._get_parameters(module)[1])
|
||||
|
||||
nc_input = self.noncontiguize(input)
|
||||
nc_grad_output = self.noncontiguize(grad_output)
|
||||
for contig_i, contig_g in product((True, False), repeat=2):
|
||||
i = input if contig_i else nc_input
|
||||
go = grad_output if contig_g else nc_grad_output
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(i)
|
||||
with freeze_rng_state():
|
||||
try:
|
||||
out = test_case._forward(module, i)
|
||||
except Exception:
|
||||
# Some modules will fail because of non contiguous inputs and we're ok with that
|
||||
continue
|
||||
grad = test_case._backward(module, i, out, go)
|
||||
|
||||
test_case.assertEqual(out, output)
|
||||
test_case.assertEqual(grad, d_input, 1e-4)
|
||||
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
|
||||
|
||||
def test_cuda(self, test_case):
|
||||
if not TEST_CUDA or not self.should_test_cuda:
|
||||
raise unittest.SkipTest('Excluded from CUDA tests')
|
||||
@ -536,8 +586,6 @@ class ModuleTest(TestBase):
|
||||
|
||||
cpu_module = self.constructor(*self.constructor_args)
|
||||
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_param = test_case._get_parameters(cpu_module)
|
||||
gpu_param = test_case._get_parameters(gpu_module)
|
||||
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
|
||||
@ -547,6 +595,10 @@ class ModuleTest(TestBase):
|
||||
gpu_p = gpu_p.data
|
||||
gpu_p.copy_(cpu_p)
|
||||
|
||||
test_case._zero_grad_input(cpu_input)
|
||||
test_case._zero_grad_input(gpu_input)
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_output = test_case._forward(cpu_module, cpu_input)
|
||||
gpu_output = test_case._forward(gpu_module, gpu_input)
|
||||
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
|
||||
@ -560,6 +612,8 @@ class ModuleTest(TestBase):
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
|
||||
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
|
||||
|
||||
self.test_noncontig(test_case, gpu_module, gpu_input)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
# TODO: remove this after CUDA scatter_ is implemented
|
||||
|
@ -74,6 +74,7 @@ class TestAutograd(TestCase):
|
||||
counter[0] += inc
|
||||
|
||||
z = x ** 2 + x * 2 + x * y + y
|
||||
x.register_hook(lambda *args: bw_hook(0, *args))
|
||||
test = z.register_hook(lambda *args: bw_hook(1, *args))
|
||||
z.backward(torch.ones(5, 5), retain_variables=True)
|
||||
self.assertEqual(counter[0], 1)
|
||||
@ -253,6 +254,24 @@ class TestAutograd(TestCase):
|
||||
y._backward_hooks['test'] = error
|
||||
b.backward(torch.ones(5, 5))
|
||||
|
||||
def test_previous_functions(self):
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
y = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
a = x + y
|
||||
self.assertIsNotNone(a.creator)
|
||||
previous_functions = a.creator.previous_functions
|
||||
self.assertEqual(len(previous_functions), 2)
|
||||
self.assertIs(previous_functions[0][0], x)
|
||||
self.assertEqual(previous_functions[0][1], 0)
|
||||
self.assertIs(previous_functions[1][0], y)
|
||||
self.assertEqual(previous_functions[1][1], 0)
|
||||
|
||||
b = a + 5
|
||||
previous_functions = b.creator.previous_functions
|
||||
self.assertEqual(len(previous_functions), 1)
|
||||
self.assertIs(previous_functions[0][0], a.creator)
|
||||
|
||||
def test_inplace(self):
|
||||
x = Variable(torch.ones(5, 5), requires_grad=True)
|
||||
y = Variable(torch.ones(5, 5) * 4, requires_grad=True)
|
||||
@ -408,13 +427,30 @@ class TestAutograd(TestCase):
|
||||
y = x * 2
|
||||
y = y.detach()
|
||||
self.assertFalse(y.requires_grad)
|
||||
self.assertFalse(y.creator.requires_grad)
|
||||
self.assertIsNone(y.creator)
|
||||
z = x + y
|
||||
z.sum().backward()
|
||||
# This is an incorrect gradient, but we assume that's what the user
|
||||
# wanted. detach() is an advanced option.
|
||||
self.assertEqual(x.grad.data, torch.ones(10, 10))
|
||||
|
||||
# detach() should preserve volatile flag
|
||||
x = Variable(torch.randn(10, 10), volatile=True)
|
||||
y = x * 2
|
||||
y = y.detach()
|
||||
self.assertTrue(y.volatile)
|
||||
|
||||
# in-place detach
|
||||
x = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
y = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
a = x * 2
|
||||
(y + a).sum().backward(retain_variables=True)
|
||||
a.detach_()
|
||||
self.assertFalse(a.requires_grad)
|
||||
(y + a).sum().backward() # this won't backprop to x
|
||||
self.assertEqual(x.grad.data, torch.ones(10, 10) * 2)
|
||||
self.assertEqual(y.grad.data, torch.ones(10, 10) * 2)
|
||||
|
||||
def test_type_conversions(self):
|
||||
import torch.cuda
|
||||
x = Variable(torch.randn(5, 5))
|
||||
@ -435,6 +471,15 @@ class TestAutograd(TestCase):
|
||||
self.assertIs(type(x2.data), torch.cuda.FloatTensor)
|
||||
self.assertIs(x2.get_device(), 1)
|
||||
|
||||
def test_isolated_node(self):
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
y = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
a = x + y
|
||||
b = torch.max(a, 1)[1].repeat(1, 5).double()
|
||||
o = (b + a).sum()
|
||||
o.backward()
|
||||
|
||||
def test_return_leaf(self):
|
||||
class Identity(Function):
|
||||
|
||||
@ -646,6 +691,18 @@ class TestAutograd(TestCase):
|
||||
|
||||
self.assertGreater(x.grad.data.abs().sum(), 0)
|
||||
|
||||
def test_stochastic_require_grad(self):
|
||||
# This tests a DSD function sequence (D=deterministic, S=stochastic),
|
||||
# where all functions require grad.
|
||||
x = Variable(torch.randn(2, 10), requires_grad=True)
|
||||
y = Variable(torch.randn(2, 10), requires_grad=True)
|
||||
z = torch.normal(x + 2, 2)
|
||||
o = z + y
|
||||
z.reinforce(torch.randn(2, 10))
|
||||
o.sum().backward()
|
||||
self.assertEqual(y.grad.data, torch.ones(2, 10))
|
||||
self.assertGreater(x.grad.data.abs().sum(), 0)
|
||||
|
||||
def test_stochastic_sequence(self):
|
||||
x = Variable(torch.rand(10).clamp_(0, 1), requires_grad=True)
|
||||
b = x.bernoulli()
|
||||
@ -819,8 +876,8 @@ function_tests = [
|
||||
(Norm, (3, 0), ((S, S, S),), '3_dim'),
|
||||
(Addcmul, (), ((S, S), (S, S), (S, S))),
|
||||
(Addcmul, (0.6,), ((S, S), (S, S), (S, S)), 'scale'),
|
||||
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 1e-2)),
|
||||
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 1e-2), 'scale'),
|
||||
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 5e-2)),
|
||||
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 5e-2), 'scale'),
|
||||
(IndexAdd, (0,), ((S, S), index_variable(2, S), (2, S))),
|
||||
# (IndexCopy, (0,), ((S, S), index_variable(2, S), (2, S)) ),
|
||||
(IndexFill, (0, 2), ((S, S), index_variable(2, S))),
|
||||
|
@ -233,6 +233,7 @@ tests = [
|
||||
('triu', medium_2d, lambda t: [],),
|
||||
('triu', medium_2d, lambda t: [2], 'positive'),
|
||||
('triu', medium_2d, lambda t: [-2], 'negative'),
|
||||
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
|
||||
('view', small_3d, lambda t: [100, 10],),
|
||||
('view_as', small_3d, lambda t: [t(100, 10)],),
|
||||
('zero', small_3d, lambda t: [],),
|
||||
@ -338,21 +339,21 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
|
||||
|
||||
class TestCuda(TestCase):
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_autogpu(self):
|
||||
if torch.cuda.device_count() > 1:
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
with torch.cuda.device(1):
|
||||
z = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(z.get_device(), 1)
|
||||
q = x.add(y)
|
||||
self.assertEqual(q.get_device(), 0)
|
||||
w = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(w.get_device(), 1)
|
||||
z = z.cuda()
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
with torch.cuda.device(1):
|
||||
z = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(z.get_device(), 1)
|
||||
q = x.add(y)
|
||||
self.assertEqual(q.get_device(), 0)
|
||||
w = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(w.get_device(), 1)
|
||||
z = z.cuda()
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_copy_device(self):
|
||||
@ -512,6 +513,13 @@ class TestCuda(TestCase):
|
||||
self.assertEqual(x, y)
|
||||
self.assertEqual(torch.cuda.initial_seed(), 2)
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_cat_autogpu(self):
|
||||
x = torch.randn(4, 4).cuda(1)
|
||||
y = torch.randn(4, 4).cuda(1)
|
||||
z = torch.cat([x, y], 0)
|
||||
self.assertEqual(z.get_device(), x.get_device())
|
||||
|
||||
def test_serialization(self):
|
||||
x = torch.randn(4, 4).cuda()
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
|
@ -27,8 +27,8 @@ class TestTensorDataset(TestCase):
|
||||
l = torch.randn(15)
|
||||
source = TensorDataset(t, l)
|
||||
for i in range(15):
|
||||
self.assertEqual(t[i:i + 1], source[i][0])
|
||||
self.assertEqual(l[i:i + 1], source[i][1])
|
||||
self.assertEqual(t[i], source[i][0])
|
||||
self.assertEqual(l[i], source[i][1])
|
||||
|
||||
|
||||
class ErrorDataset(Dataset):
|
||||
@ -52,7 +52,7 @@ class TestDataLoader(TestCase):
|
||||
for i, (sample, target) in enumerate(loader):
|
||||
idx = i * batch_size
|
||||
self.assertEqual(sample, self.data[idx:idx + batch_size])
|
||||
self.assertEqual(target, self.labels[idx:idx + batch_size].view(-1, 1))
|
||||
self.assertEqual(target, self.labels[idx:idx + batch_size])
|
||||
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
|
||||
|
||||
def _test_shuffle(self, loader):
|
||||
@ -66,7 +66,7 @@ class TestDataLoader(TestCase):
|
||||
self.assertFalse(found_data[data_point_idx])
|
||||
found_data[data_point_idx] += 1
|
||||
break
|
||||
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
|
||||
self.assertEqual(target, self.labels[data_point_idx])
|
||||
found_labels[data_point_idx] += 1
|
||||
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
|
||||
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
|
||||
|
@ -1154,6 +1154,15 @@ class TestNN(NNTestCase):
|
||||
module.__repr__()
|
||||
str(module)
|
||||
|
||||
def test_accUpdateGradParameters(self):
|
||||
module = nn.LookupTable(5, 3)
|
||||
module.weight.fill_(2)
|
||||
input = torch.LongTensor([1, 3])
|
||||
output = module.updateOutput(input)
|
||||
module.backwardUpdate(input, output, 0.1)
|
||||
self.assertEqual(module.weight[0, 0], 2)
|
||||
self.assertEqual(module.weight[3, 0], 1.8)
|
||||
|
||||
def _build_net(self):
|
||||
return (nn.Sequential()
|
||||
.add(nn.Concat(0)
|
||||
|
@ -19,6 +19,7 @@ HAS_SHM_FILES = os.path.isdir('/dev/shm')
|
||||
TEST_CUDA_IPC = torch.cuda.is_available() and \
|
||||
sys.version_info[0] == 3 and \
|
||||
sys.platform != 'darwin'
|
||||
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
|
||||
|
||||
|
||||
def simple_fill(queue, event):
|
||||
@ -79,9 +80,8 @@ def autograd_sharing(queue, ready, master_modified):
|
||||
is_ok = var.data.equal(expected_var)
|
||||
var.data[:] = torch.ones(5, 5)
|
||||
|
||||
if var.grad is not None:
|
||||
is_ok &= var.grad.data.equal(torch.ones(5, 5) * 4)
|
||||
var.grad.data[:] = torch.ones(5, 5)
|
||||
is_ok &= var.grad.data.equal(torch.zeros(5, 5))
|
||||
var.grad.data[:] = torch.ones(5, 5)
|
||||
|
||||
queue.put(is_ok)
|
||||
|
||||
@ -289,6 +289,7 @@ class TestMultiprocessing(TestCase):
|
||||
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
|
||||
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
|
||||
def test_cuda_small_tensors(self):
|
||||
# Check multiple small tensors which will likely use the same
|
||||
# underlying cached allocation
|
||||
@ -357,20 +358,19 @@ class TestMultiprocessing(TestCase):
|
||||
queue = mp.Queue()
|
||||
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
|
||||
p.start()
|
||||
var.grad.data.zero_()
|
||||
queue.put(var)
|
||||
|
||||
ready.wait()
|
||||
var.data[0, 0] = 1000
|
||||
if var.grad is not None:
|
||||
var.grad.data[:] = torch.ones(5, 5) * 4
|
||||
var.grad.data[:] = torch.ones(5, 5) * 4
|
||||
master_modified.set()
|
||||
|
||||
worker_ok = queue.get()
|
||||
self.assertTrue(worker_ok)
|
||||
|
||||
self.assertEqual(var.data, torch.ones(5, 5))
|
||||
if var.grad is not None:
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5))
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
|
||||
p.join()
|
||||
|
||||
def test_variable_sharing(self):
|
||||
|
159
test/test_nn.py
159
test/test_nn.py
@ -4,7 +4,7 @@ import random
|
||||
import unittest
|
||||
import contextlib
|
||||
from copy import deepcopy
|
||||
from itertools import repeat
|
||||
from itertools import repeat, product
|
||||
from functools import wraps
|
||||
|
||||
import torch.nn as nn
|
||||
@ -166,10 +166,6 @@ class NewCriterionTest(InputVariableMixin, CriterionTest):
|
||||
|
||||
|
||||
class TestNN(NNTestCase):
|
||||
# # protip: uncomment this line to figure out which test is segfaulting
|
||||
# def setUp(self):
|
||||
# print("In method", self._testMethodName)
|
||||
# super(TestNN, self).setUp()
|
||||
|
||||
def _forward(self, module, input):
|
||||
with freeze_rng_state():
|
||||
@ -345,6 +341,24 @@ class TestNN(NNTestCase):
|
||||
expected_grad = torch.ones(5, 5).mm(module.weight.data) * 2
|
||||
self.assertEqual(input.grad.data, expected_grad)
|
||||
|
||||
def test_zero_grad(self):
|
||||
module = nn.Linear(5, 5)
|
||||
for p in module.parameters():
|
||||
p.requires_grad = False
|
||||
module.zero_grad()
|
||||
|
||||
module.weight.requires_grad = True
|
||||
module.weight.grad.data.fill_(1)
|
||||
module.zero_grad()
|
||||
self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
|
||||
|
||||
module.bias.requires_grad = True
|
||||
module.weight.grad.data.fill_(1)
|
||||
module.bias.grad.data.fill_(1)
|
||||
module.zero_grad()
|
||||
self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
|
||||
self.assertEqual(module.bias.grad.data, module.bias.data.clone().zero_())
|
||||
|
||||
def test_volatile(self):
|
||||
module = nn.Conv2d(2, 5, kernel_size=3, padding=1)
|
||||
input = torch.randn(1, 2, 10, 10)
|
||||
@ -1052,9 +1066,33 @@ class TestNN(NNTestCase):
|
||||
self.assertEqual(output1, output2)
|
||||
self.assertEqual(hidden1, hidden2)
|
||||
|
||||
def _test_rnn_retain_variables(self, dtype):
|
||||
rnn = nn.LSTM(10, 20, num_layers=2).type(dtype)
|
||||
input = Variable(torch.randn(5, 6, 10).type(dtype), requires_grad=True)
|
||||
output = rnn(input)
|
||||
output[0].sum().backward(retain_variables=True)
|
||||
grads = [input.grad.data.clone()] + [p.grad.data.clone() for p in rnn.parameters()]
|
||||
rnn.zero_grad()
|
||||
input.grad.data.zero_()
|
||||
output[0].sum().backward(retain_variables=True)
|
||||
grads2 = [input.grad.data] + [p.grad.data for p in rnn.parameters()]
|
||||
self.assertEqual(grads, grads2)
|
||||
|
||||
def test_rnn_retain_variables(self):
|
||||
self._test_rnn_retain_variables(torch.DoubleTensor)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
|
||||
def test_rnn_retain_variables_cuda(self):
|
||||
try:
|
||||
torch.backends.cudnn.enabled = False
|
||||
self._test_rnn_retain_variables(torch.cuda.FloatTensor)
|
||||
finally:
|
||||
torch.backends.cudnn.enabled = True
|
||||
self._test_rnn_retain_variables(torch.cuda.FloatTensor)
|
||||
|
||||
def _test_RNN_cpu_vs_cudnn(self, dropout):
|
||||
|
||||
def forward_backward(cuda, rnn, input_val, hx_val, weights_val):
|
||||
def forward_backward(cuda, rnn, input_val, hx_val, grad_output, grad_hy, weights_val):
|
||||
is_lstm = type(rnn) == nn.LSTM
|
||||
|
||||
for x_layer, y_layer in zip(rnn.all_weights, weights_val):
|
||||
@ -1076,16 +1114,15 @@ class TestNN(NNTestCase):
|
||||
hx[1].data = hx[1].data.cuda()
|
||||
else:
|
||||
hx.data = hx.data.cuda()
|
||||
grad_output = grad_output.cuda()
|
||||
grad_hy = grad_hy.cuda()
|
||||
|
||||
output, hy = rnn(input, hx)
|
||||
# FIXME this is because of a pytorch bug
|
||||
if is_lstm:
|
||||
fake_loss = 0 * (hy[0] + hy[1]).sum()
|
||||
else:
|
||||
fake_loss = 0 * hy.sum()
|
||||
|
||||
loss = output.sum() + fake_loss
|
||||
loss.backward()
|
||||
if is_lstm:
|
||||
torch.autograd.backward([output + 0, hy[0] + 0, hy[1] + 0], [grad_output, grad_hy, grad_hy + 1])
|
||||
else:
|
||||
torch.autograd.backward([output + 0, hy + 0], [grad_output, grad_hy])
|
||||
|
||||
return {'output': output.data,
|
||||
'hy': hy[0].data if is_lstm else hy.data,
|
||||
@ -1101,6 +1138,10 @@ class TestNN(NNTestCase):
|
||||
seq_length = 7
|
||||
batch = 5
|
||||
|
||||
def make_noncontig(tensor):
|
||||
ndim = tensor.dim()
|
||||
return torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
|
||||
|
||||
def compare_cpu_gpu(outputs_cpu, outputs_gpu):
|
||||
self.assertEqual(list(outputs_cpu.keys()), list(outputs_gpu.keys()))
|
||||
for key in outputs_cpu.keys():
|
||||
@ -1113,49 +1154,58 @@ class TestNN(NNTestCase):
|
||||
self.assertEqual(cpu_weight.grad.data, gpu_weight.grad.data, prec=5e-5)
|
||||
|
||||
for module in (nn.RNN, nn.LSTM, nn.GRU):
|
||||
for bias in (True, False):
|
||||
for bidirectional in (False, True):
|
||||
for batch_first in (False, True):
|
||||
num_directions = 2 if bidirectional else 1
|
||||
if batch_first:
|
||||
input_val = torch.randn(batch, seq_length, input_size)
|
||||
else:
|
||||
input_val = torch.randn(seq_length, batch, input_size)
|
||||
hx_val = torch.randn(num_layers * num_directions, batch, hidden_size)
|
||||
for bias, bidirectional, batch_first, contig in product((True, False), repeat=4):
|
||||
num_directions = 2 if bidirectional else 1
|
||||
if batch_first:
|
||||
input_val = torch.randn(batch, seq_length, input_size)
|
||||
grad_output = torch.randn(batch, seq_length, hidden_size * num_directions)
|
||||
else:
|
||||
input_val = torch.randn(seq_length, batch, input_size)
|
||||
grad_output = torch.randn(seq_length, batch, hidden_size * num_directions)
|
||||
hx_val = torch.randn(num_layers * num_directions, batch, hidden_size)
|
||||
grad_hy = torch.randn(num_layers * num_directions, batch, hidden_size)
|
||||
|
||||
rnn = module(input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
bias=bias,
|
||||
dropout=dropout,
|
||||
bidirectional=bidirectional,
|
||||
batch_first=batch_first)
|
||||
if not contig:
|
||||
grad_output = make_noncontig(grad_output)
|
||||
grad_hy = make_noncontig(grad_hy)
|
||||
input_var = make_noncontig(input_val)
|
||||
hx_val = make_noncontig(hx_val)
|
||||
|
||||
outputs_cpu = forward_backward(
|
||||
False, rnn, input_val, hx_val, rnn.all_weights)
|
||||
rnn = module(input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
bias=bias,
|
||||
dropout=dropout,
|
||||
bidirectional=bidirectional,
|
||||
batch_first=batch_first)
|
||||
|
||||
rnn_gpu = module(input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
bias=bias,
|
||||
dropout=dropout,
|
||||
bidirectional=bidirectional,
|
||||
batch_first=batch_first)
|
||||
outputs_cpu = forward_backward(
|
||||
False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
|
||||
|
||||
outputs_gpu = forward_backward(
|
||||
True, rnn_gpu, input_val, hx_val, rnn.all_weights)
|
||||
rnn_gpu = module(input_size,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
bias=bias,
|
||||
dropout=dropout,
|
||||
bidirectional=bidirectional,
|
||||
batch_first=batch_first)
|
||||
|
||||
compare_cpu_gpu(outputs_cpu, outputs_gpu)
|
||||
outputs_gpu = forward_backward(
|
||||
True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
|
||||
|
||||
compare_cpu_gpu(outputs_cpu, outputs_gpu)
|
||||
|
||||
for nonlinearity in ('tanh', 'relu'):
|
||||
hx_val = torch.randn(num_layers, batch, hidden_size)
|
||||
input_val = torch.randn(seq_length, batch, input_size)
|
||||
grad_output = torch.randn(seq_length, batch, hidden_size * num_directions)
|
||||
grad_hy = torch.randn(num_layers * num_directions, batch, hidden_size)
|
||||
|
||||
rnn = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
|
||||
outputs_cpu = forward_backward(False, rnn, input_val, hx_val, rnn.all_weights)
|
||||
outputs_cpu = forward_backward(False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
|
||||
|
||||
rnn_gpu = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
|
||||
outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, rnn.all_weights)
|
||||
outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
|
||||
|
||||
compare_cpu_gpu(outputs_cpu, outputs_gpu)
|
||||
|
||||
@ -1274,6 +1324,22 @@ class TestNN(NNTestCase):
|
||||
output.backward(grad_output)
|
||||
self.assertEqual(grad_output, grad_output_clone)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
|
||||
def test_noncontig_conv_grad(self):
|
||||
# FIXME: remove after adding non-contiguous grad tests for all modules
|
||||
module = nn.Conv2d(3, 5, kernel_size=3, padding=1).cuda()
|
||||
input = Variable(torch.randn(2, 3, 10, 10).cuda(), requires_grad=True)
|
||||
output = module(input)
|
||||
|
||||
grad = torch.randn(2, 2, 5, 10, 10).cuda()[:, 1]
|
||||
assert not grad.is_contiguous()
|
||||
output.backward(grad, retain_variables=True)
|
||||
result = output.grad.data.clone()
|
||||
output.grad.data.zero_()
|
||||
|
||||
output.backward(grad.contiguous())
|
||||
self.assertEqual(result, output.grad.data)
|
||||
|
||||
def test_pixel_shuffle(self):
|
||||
batch_size = random.randint(1, 3)
|
||||
upscale_factor = random.randint(2, 5)
|
||||
@ -1565,6 +1631,13 @@ new_module_tests = [
|
||||
input_size=(2, 3, 3, 4, 5),
|
||||
cudnn=True,
|
||||
),
|
||||
dict(
|
||||
module_name='Conv3d',
|
||||
constructor_args=(3, 4, (2, 3, 4), 1, 0, 1, 1, False),
|
||||
input_size=(2, 3, 3, 4, 5),
|
||||
cudnn=True,
|
||||
desc='no_bias'
|
||||
),
|
||||
dict(
|
||||
module_name='Conv3d',
|
||||
constructor_args=(3, 4, 2, 2),
|
||||
|
@ -2152,6 +2152,9 @@ class TestTorch(TestCase):
|
||||
self.assertEqual((tensor_view - tensor).abs().max(), 0)
|
||||
self.assertEqual(empty.view_as(empty), empty)
|
||||
self.assertEqual(empty.view(0), empty)
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(15, 0))
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
|
||||
|
||||
def test_expand(self):
|
||||
result = torch.Tensor()
|
||||
@ -2600,6 +2603,8 @@ class TestTorch(TestCase):
|
||||
y = x.clone().unsqueeze_(2)
|
||||
self.assertEqual(y, x.contiguous().view(2, 4, 1))
|
||||
|
||||
self.assertRaises(RuntimeError, lambda: torch.Tensor().unsqueeze(0))
|
||||
|
||||
def test_iter(self):
|
||||
x = torch.randn(5, 5)
|
||||
for i, sub in enumerate(x):
|
||||
@ -2835,8 +2840,13 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(x[0], 1)
|
||||
self.assertEqual(x[1], 2)
|
||||
self.assertEqual(x[2], 3)
|
||||
self.assertEqual(len(x), 3)
|
||||
self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3)))
|
||||
|
||||
self.assertIsInstance(x * 2, torch.Size)
|
||||
self.assertIsInstance(x[:-1], torch.Size)
|
||||
self.assertIsInstance(x + x, torch.Size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
@ -10,6 +10,7 @@ on an NVIDIA GPU with compute capability >= 2.0.
|
||||
|
||||
import sys
|
||||
from ._utils import _import_dotted_name
|
||||
from .version import __version__
|
||||
|
||||
__all__ = [
|
||||
'typename', 'is_tensor', 'is_storage', 'set_default_tensor_type',
|
||||
@ -75,10 +76,20 @@ def typename(o):
|
||||
|
||||
|
||||
def is_tensor(obj):
|
||||
r"""Returns True if `obj` is a pytorch tensor.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _tensor_classes
|
||||
|
||||
|
||||
def is_storage(obj):
|
||||
r"""Returns True if `obj` is a pytorch storage object.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _storage_classes
|
||||
|
||||
|
||||
|
@ -1632,6 +1632,20 @@ Fills this tensor with numbers sampled from the uniform distribution:
|
||||
P(x) = \dfrac{1}{to - from}
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.unsqueeze,
|
||||
"""
|
||||
unsqueeze(dim)
|
||||
|
||||
See :func:`torch.unsqueeze`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.unsqueeze_,
|
||||
"""
|
||||
unsqueeze_(dim)
|
||||
|
||||
In-place version of :meth:`~Tensor.unsqueeze`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.var,
|
||||
"""
|
||||
var() -> float
|
||||
@ -1639,6 +1653,31 @@ var() -> float
|
||||
See :func:`torch.var`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.view,
|
||||
"""
|
||||
view(*args) -> Tensor
|
||||
|
||||
Returns a new tensor with the same data but different size.
|
||||
|
||||
The returned tensor shares the same data and must have the same number
|
||||
of elements, but may have a different size. A tensor must be
|
||||
:func:`contiguous` to be viewed.
|
||||
|
||||
Args:
|
||||
args (torch.Size or int...): Desired size
|
||||
|
||||
Example:
|
||||
>>> x = torch.randn(4, 4)
|
||||
>>> x.size()
|
||||
torch.Size([4, 4])
|
||||
>>> y = x.view(16)
|
||||
>>> y.size()
|
||||
torch.Size([16])
|
||||
>>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions
|
||||
>>> z.size()
|
||||
torch.Size([2, 8])
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.zero_,
|
||||
"""
|
||||
zero_()
|
||||
|
@ -58,7 +58,10 @@ for t in ['Float', 'Double']:
|
||||
type2backend.backends['torch.{}Tensor'.format(t)] = backend
|
||||
type2backend.backends[getattr(torch, '{}Tensor'.format(t))] = backend
|
||||
|
||||
backend = Backend('Cuda', 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCudaBackend'] = backend
|
||||
type2backend.backends['torch.cuda.FloatTensor'] = backend
|
||||
type2backend.backends[torch.cuda.FloatTensor] = backend
|
||||
|
||||
for t in ['Half', '', 'Double']:
|
||||
backend = Backend('Cuda' + t, 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCuda{}Backend'.format(t)] = backend
|
||||
py_name = 'Float' if t == '' else t
|
||||
type2backend.backends['torch.cuda.{}Tensor'.format(py_name)] = backend
|
||||
type2backend.backends[getattr(torch.cuda, '{}Tensor'.format(py_name))] = backend
|
||||
|
@ -3621,7 +3621,6 @@ Example::
|
||||
>>> y = torch.squeeze(x, 1)
|
||||
>>> y.size()
|
||||
(2L, 2L, 1L, 2L)
|
||||
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.std,
|
||||
@ -4214,6 +4213,33 @@ Example::
|
||||
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.unsqueeze,
|
||||
"""
|
||||
unsqueeze(input, dim, out=None)
|
||||
|
||||
Returns a new tensor with a dimension of size one inserted at the
|
||||
specified position.
|
||||
|
||||
The returned tensor shares the same underlying data with this tensor.
|
||||
|
||||
Args:
|
||||
input (Tensor): the input `Tensor`
|
||||
dim (int): The index at which to insert the singleton dimension
|
||||
out (Tensor, optional): The result `Tensor`
|
||||
|
||||
Example:
|
||||
>>> x = torch.Tensor([1, 2, 3, 4])
|
||||
>>> torch.unsqueeze(x, 0)
|
||||
1 2 3 4
|
||||
[torch.FloatTensor of size 1x4]
|
||||
>>> torch.unsqueeze(x, 1)
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
[torch.FloatTensor of size 4x1]
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.var,
|
||||
"""
|
||||
.. function:: var(input) -> float
|
||||
|
@ -2,7 +2,6 @@ import torch
|
||||
import torch._C as _C
|
||||
import torch.utils.hooks as hooks
|
||||
from collections import OrderedDict
|
||||
from itertools import chain
|
||||
|
||||
|
||||
class Function(_C._FunctionBase):
|
||||
@ -98,9 +97,9 @@ class Function(_C._FunctionBase):
|
||||
**This should be called at most once, only from inside the**
|
||||
:func:`forward` **method, and all arguments should be outputs.**
|
||||
|
||||
This will mark outputs as non requiring gradient, increasing the
|
||||
This will mark outputs as not requiring gradients, increasing the
|
||||
efficiency of backward computation. You still need to accept a gradient
|
||||
for this output in :meth:`~Function.backward`, but it's always going to
|
||||
for each output in :meth:`~Function.backward`, but it's always going to
|
||||
be ``None``.
|
||||
|
||||
This is used e.g. for indices returned from a max :class:`Function`.
|
||||
@ -204,11 +203,17 @@ class NestedIOFunction(Function):
|
||||
nested_variables = _unflatten(flat_output, self._nested_output)
|
||||
return nested_variables
|
||||
|
||||
def _do_backward(self, gradients, retain_variables):
|
||||
self.retain_variables = retain_variables
|
||||
result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
|
||||
if not retain_variables:
|
||||
del self._nested_output
|
||||
del self._to_save_nested
|
||||
return result
|
||||
|
||||
def backward(self, *gradients):
|
||||
nested_gradients = _unflatten(gradients, self._nested_output)
|
||||
del self._nested_output
|
||||
result = self.backward_extended(*nested_gradients)
|
||||
del self._to_save_nested
|
||||
return tuple(_iter_None_tensors(result))
|
||||
|
||||
__call__ = _do_forward
|
||||
|
@ -56,30 +56,6 @@ class Variable(_C._VariableBase):
|
||||
'is_cuda',
|
||||
}
|
||||
|
||||
@property
|
||||
def grad(self):
|
||||
if self.requires_grad and self._grad is None:
|
||||
# TODO: this won't have to be zeroed in the future
|
||||
self._grad = Variable(self.data.new(self.data.size()).zero_())
|
||||
return self._grad
|
||||
|
||||
@property
|
||||
def requires_grad(self):
|
||||
return self._requires_grad
|
||||
|
||||
@requires_grad.setter
|
||||
def requires_grad(self, value):
|
||||
if self.creator is not None:
|
||||
if value is False:
|
||||
hint = (" If you want to use a computed variable in a subgraph "
|
||||
"that doesn't require differentiation use "
|
||||
"var_no_grad = var.detach().")
|
||||
else:
|
||||
hint = ''
|
||||
raise RuntimeError("you can only change requires_grad flags of "
|
||||
"leaf variables." + hint)
|
||||
self._requires_grad = value
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._fallthrough_methods:
|
||||
return getattr(self.data, name)
|
||||
@ -108,19 +84,30 @@ class Variable(_C._VariableBase):
|
||||
if self.creator is not None:
|
||||
raise RuntimeError("Only Variables created explicitly by the user "
|
||||
"(graph leaves) support the deepcopy protocol at the moment")
|
||||
result = type(self)(self.data.clone(), requires_grad=self.requires_grad,
|
||||
volatile=self.volatile)
|
||||
result = type(self)(self.data.clone())
|
||||
result.requires_grad = self.requires_grad
|
||||
result.volatile = self.volatile
|
||||
memo[id(self)] = result
|
||||
return result
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
state = (self.requires_grad, self.volatile, self._backward_hooks)
|
||||
if proto > 1:
|
||||
return super(Variable, self).__reduce_ex__(proto)
|
||||
return type(self), (self.data,), state
|
||||
if sys.version_info[0] == 2:
|
||||
from copy_reg import __newobj__
|
||||
else:
|
||||
from copyreg import __newobj__
|
||||
return __newobj__, (type(self),), self.__getstate__()
|
||||
return __newobj__, (type(self), self.data), state
|
||||
|
||||
def __setstate__(self, state):
|
||||
if len(state) == 5:
|
||||
# legacy serialization of Variable
|
||||
self.data = state[0]
|
||||
state = (state[3], state[4], state[2])
|
||||
if self.creator is not None:
|
||||
raise RuntimeError('__setstate__ can be only called on leaf variables')
|
||||
self.requires_grad, self.volatile, self._backward_hooks = state
|
||||
|
||||
def __repr__(self):
|
||||
return 'Variable containing:' + self.data.__repr__()
|
||||
@ -225,8 +212,25 @@ class Variable(_C._VariableBase):
|
||||
self.creator._reinforce(reward)
|
||||
|
||||
def detach(self):
|
||||
"""Detaches the Variable from the graph that created it."""
|
||||
return NoGrad()(self)
|
||||
"""Returns a new Variable, detached from the current graph.
|
||||
|
||||
Result will never require gradient. If the input is volatile, the output
|
||||
will be volatile too.
|
||||
|
||||
.. note::
|
||||
|
||||
Returned Variable uses the same data tensor, as the original one, and
|
||||
in-place modifications on either of them will be seen, and may trigger
|
||||
errors in correctness checks.
|
||||
"""
|
||||
result = NoGrad()(self) # this is needed, because it merges version counters
|
||||
result._creator = None
|
||||
return result
|
||||
|
||||
def detach_(self):
|
||||
"""Detaches the Variable from the graph that created it, making it a leaf."""
|
||||
self._creator = None
|
||||
self.requires_grad = False
|
||||
|
||||
def contiguous(self):
|
||||
self.data = self.data.contiguous()
|
||||
|
@ -198,7 +198,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
'input must have 3 dimensions, got {}'.format(input.dim()))
|
||||
if fn.input_size != input.size(2):
|
||||
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
|
||||
fn.input_size
|
||||
fn.input_size, input.size(2)
|
||||
))
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
|
||||
@ -206,6 +206,9 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
output.resize_(*output_size)
|
||||
hy.resize_(*hidden_size)
|
||||
@ -319,6 +322,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
dy = grad_output.contiguous()
|
||||
y = output
|
||||
@ -351,6 +356,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
if dcy is not None and tuple(dcy.size()) != hidden_size:
|
||||
raise RuntimeError('Expected d_cell size {}, got {}'.format(
|
||||
hidden_size, dcy.size()))
|
||||
if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
|
||||
raise RuntimeError('Gradients aren\'t CUDA tensors')
|
||||
|
||||
check_error(cudnn.lib.cudnnRNNBackwardData(
|
||||
handle,
|
||||
@ -395,6 +402,7 @@ def backward_weight(fn, input, hx, output, weight, grad_weight):
|
||||
hx, cx = hx
|
||||
else:
|
||||
cx = None
|
||||
|
||||
if fn.batch_first:
|
||||
input = input.transpose(0, 1)
|
||||
output = output.transpose(0, 1)
|
||||
@ -407,12 +415,12 @@ def backward_weight(fn, input, hx, output, weight, grad_weight):
|
||||
if tuple(input.size()) != input_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
input_size, tuple(input.size())))
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_weight can only be called when training!')
|
||||
if tuple(hx.size()) != hidden_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
hidden_size, hx.size()))
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
y = output
|
||||
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()
|
||||
|
161
torch/csrc/DynamicTypes.cpp
Normal file
161
torch/csrc/DynamicTypes.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#include "THP.h"
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <THPP/tensors/THTensor.hpp>
|
||||
#include <THPP/tensors/THSTensor.hpp>
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include <THC/THC.h>
|
||||
#include <THPP/tensors/THCTensor.hpp>
|
||||
extern THCState* state;
|
||||
#endif
|
||||
|
||||
|
||||
using namespace thpp;
|
||||
|
||||
namespace torch {
|
||||
|
||||
struct TensorType {
|
||||
Type data_type;
|
||||
bool is_cuda;
|
||||
bool is_sparse;
|
||||
|
||||
friend bool operator==(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return (t1.data_type == t2.data_type &&
|
||||
t1.is_cuda == t2.is_cuda &&
|
||||
t1.is_sparse == t2.is_sparse);
|
||||
}
|
||||
|
||||
friend bool operator!=(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return !(t1 == t2);
|
||||
}
|
||||
};
|
||||
|
||||
struct TensorTypeHasher
|
||||
{
|
||||
std::size_t operator()(const TensorType& k) const
|
||||
{
|
||||
size_t hash = static_cast<size_t>(k.data_type);
|
||||
hash = (hash << 8) + k.is_cuda;
|
||||
hash = (hash << 1) + k.is_sparse;
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
static std::unordered_map<std::string, Type> type_names = {
|
||||
{"Float", Type::FLOAT},
|
||||
{"Double", Type::DOUBLE},
|
||||
{"Half", Type::HALF},
|
||||
{"Byte", Type::UCHAR},
|
||||
{"Char", Type::CHAR},
|
||||
{"Short", Type::SHORT},
|
||||
{"Int", Type::INT},
|
||||
{"Long", Type::LONG},
|
||||
};
|
||||
static std::unordered_map<PyTypeObject*, TensorType> pytype_to_tensortype;
|
||||
static std::unordered_map<TensorType, PyTypeObject*, TensorTypeHasher> tensortype_to_pytype;
|
||||
|
||||
void registerPyTypeObject(PyTypeObject *pytype, const std::string& name, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = type_names.at(name);
|
||||
type.is_cuda = is_cuda;
|
||||
type.is_sparse = is_sparse;
|
||||
|
||||
pytype_to_tensortype[pytype] = type;
|
||||
tensortype_to_pytype[type] = pytype;
|
||||
}
|
||||
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = tensor.type();
|
||||
type.is_cuda = tensor.isCuda();
|
||||
type.is_sparse = tensor.isSparse();
|
||||
|
||||
return tensortype_to_pytype.at(type);
|
||||
}
|
||||
|
||||
static std::unique_ptr<Tensor> createTensor(void *tensor, Type type, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
if (is_cuda) {
|
||||
#ifdef WITH_CUDA
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<unsigned char>(state, (THCudaByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<char>(state, (THCudaCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<short>(state, (THCudaShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<int>(state, (THCudaIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<long>(state, (THCudaLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<float>(state, (THCudaTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<double>(state, (THCudaDoubleTensor*)tensor));
|
||||
} else if (type == Type::HALF) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<half>(state, (THCudaHalfTensor*)tensor));
|
||||
}
|
||||
#else
|
||||
throw std::runtime_error("Compiled without CUDA support");
|
||||
#endif
|
||||
} else if (is_sparse) {
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<unsigned char>((THSByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<char>((THSCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<short>((THSShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<int>((THSIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<long>((THSLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<float>((THSFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<double>((THSDoubleTensor*)tensor));
|
||||
}
|
||||
} else if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<unsigned char>((THByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<char>((THCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<short>((THShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<int>((THIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<long>((THLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<float>((THFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<double>((THDoubleTensor*)tensor));
|
||||
}
|
||||
throw std::invalid_argument("Unsupported tensor type");
|
||||
}
|
||||
|
||||
std::unique_ptr<Tensor> createTensor(PyObject *data)
|
||||
{
|
||||
auto tensor_type = pytype_to_tensortype.at(Py_TYPE(data));
|
||||
auto type = tensor_type.data_type;
|
||||
auto tensor = ((THPVoidTensor *)data)->cdata;
|
||||
auto wrapper = createTensor(tensor, type, tensor_type.is_cuda, tensor_type.is_sparse);
|
||||
wrapper->retain();
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
auto type = getPyTypeObject(tensor);
|
||||
PyObject *obj = type->tp_alloc(type, 0);
|
||||
if (obj) {
|
||||
((THPVoidTensor*)obj)->cdata = (THVoidTensor *)const_cast<thpp::Tensor&>(tensor).retain().cdata();
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
} // namespace
|
25
torch/csrc/DynamicTypes.h
Normal file
25
torch/csrc/DynamicTypes.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
// Provides conversions between Python tensor objects and thpp::Tensors.
|
||||
|
||||
#include <memory>
|
||||
#include <Python.h>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
namespace torch {
|
||||
|
||||
// Register a PyTypeObject* with the given attributes
|
||||
void registerPyTypeObject(
|
||||
PyTypeObject *pytype, const std::string& name,
|
||||
bool is_cuda, bool is_sparse);
|
||||
|
||||
// Gets the PyTypeObject* corresponding to the Tensor
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor);
|
||||
|
||||
// Creates a Tensor from a Python tensor object
|
||||
std::unique_ptr<thpp::Tensor> createTensor(PyObject *data);
|
||||
|
||||
// Creates Python tensor object from a Tensor
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor);
|
||||
|
||||
} // namespace torch
|
@ -5,8 +5,6 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "THP.h"
|
||||
|
||||
#define HANDLE_TH_ERRORS \
|
||||
try {
|
||||
|
||||
@ -21,6 +19,11 @@
|
||||
extern PyObject *THPException_FatalError;
|
||||
|
||||
#ifdef _THP_CORE
|
||||
|
||||
// Throwing this exception means that the python error flags have been already
|
||||
// set and control should be immediately returned to the interpreter.
|
||||
class python_error : public std::exception {};
|
||||
|
||||
struct THException: public std::exception {
|
||||
THException(const char* msg): msg(msg) {};
|
||||
|
||||
|
@ -33,25 +33,23 @@ static bool THPModule_loadClasses(PyObject *self)
|
||||
THPUtils_setError("class loader couldn't access torch module");
|
||||
return false;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
|
||||
ASSERT_NOT_NULL(tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_tensor_classes"));
|
||||
ASSERT_NOT_NULL(tensor_classes = PyObject_GetAttrString(torch_module, (char*)"_tensor_classes"));
|
||||
if (!THPDoubleTensor_postInit(torch_module)) return false;
|
||||
if (!THPFloatTensor_postInit(torch_module)) return false;
|
||||
if (!THPLongTensor_postInit(torch_module)) return false;
|
||||
if (!THPIntTensor_postInit(torch_module)) return false;
|
||||
if (!THPShortTensor_postInit(torch_module)) return false;
|
||||
if (!THPCharTensor_postInit(torch_module)) return false;
|
||||
if (!THPByteTensor_postInit(torch_module)) return false;
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyMapping_GetItemString(module_dict,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyMapping_GetItemString(module_dict,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyMapping_GetItemString(module_dict,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyMapping_GetItemString(module_dict,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyMapping_GetItemString(module_dict,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyMapping_GetItemString(module_dict,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyMapping_GetItemString(module_dict,(char*)"ByteStorage"));
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleTensorClass = PyMapping_GetItemString(module_dict,(char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THPFloatTensorClass = PyMapping_GetItemString(module_dict,(char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THPLongTensorClass = PyMapping_GetItemString(module_dict,(char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THPIntTensorClass = PyMapping_GetItemString(module_dict,(char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THPShortTensorClass = PyMapping_GetItemString(module_dict,(char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THPCharTensorClass = PyMapping_GetItemString(module_dict,(char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THPByteTensorClass = PyMapping_GetItemString(module_dict,(char*)"ByteTensor"));
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyObject_GetAttrString(torch_module,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyObject_GetAttrString(torch_module,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyObject_GetAttrString(torch_module,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyObject_GetAttrString(torch_module,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyObject_GetAttrString(torch_module,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyObject_GetAttrString(torch_module,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyObject_GetAttrString(torch_module,(char*)"ByteStorage"));
|
||||
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
@ -92,6 +90,7 @@ static PyObject * THPModule_initExtension(PyObject *self, PyObject *shm_manager_
|
||||
libshm_init(THPUtils_bytesAsString(shm_manager_path));
|
||||
if (!THPModule_loadClasses(self)) return NULL;
|
||||
if (!THPModule_assignStateless(self)) return NULL;
|
||||
if (!THPAutograd_initFunctions(self)) return NULL;
|
||||
return PyBool_FromLong(true);
|
||||
}
|
||||
|
||||
@ -243,6 +242,7 @@ IMPLEMENT_STATELESS(topk)
|
||||
IMPLEMENT_STATELESS(t)
|
||||
IMPLEMENT_STATELESS(transpose)
|
||||
IMPLEMENT_STATELESS(squeeze)
|
||||
IMPLEMENT_STATELESS(unsqueeze)
|
||||
IMPLEMENT_STATELESS(renorm)
|
||||
IMPLEMENT_STATELESS(dist)
|
||||
IMPLEMENT_STATELESS(linspace)
|
||||
@ -593,6 +593,7 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"t", (PyCFunction)THPModule_t, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"unsqueeze", (PyCFunction)THPModule_unsqueeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"nonzero", (PyCFunction)THPModule_nonzero, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
|
@ -6,20 +6,16 @@ PyObject* sparse_tensor_classes;
|
||||
// SPARSE MODULE INITIALIZATION
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool THSPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THSPModule_loadClasses(PyObject *sparse_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(sparse_tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_sparse_tensor_classes"));
|
||||
ASSERT_NOT_NULL(THSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
|
||||
if (!THSPDoubleTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPFloatTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPLongTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPIntTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPShortTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPCharTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPByteTensor_postInit(sparse_module)) return false;
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
}
|
||||
|
||||
static bool THSPModule_assignStateless()
|
||||
@ -50,18 +46,11 @@ static bool THSPModule_assignStateless()
|
||||
// Callback for python part. Used for additional initialization of python classes
|
||||
PyObject *THSPModule_initExtension(PyObject *self)
|
||||
{
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { Py_RETURN_FALSE; }
|
||||
PyObject *module = PyImport_ImportModule("torch.sparse");
|
||||
if (!module) {
|
||||
THPUtils_setError("class loader couldn't access torch.sparse module");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject* module_dict = PyModule_GetDict(module);
|
||||
ASSERT_TRUE(THSPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THSPModule_assignStateless());
|
||||
Py_RETURN_TRUE;
|
||||
#undef ASSERT_TRUE
|
||||
if (!module) return NULL;
|
||||
if (!THSPModule_loadClasses(module)) return NULL;
|
||||
if (!THSPModule_assignStateless()) return NULL;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -80,19 +69,19 @@ bool THPModule_isSparseTensor(PyObject *obj)
|
||||
#define IMPLEMENT_SPARSE_STATELESS(name) \
|
||||
static PyObject * TH_CONCAT_2(THSPModule_, name)(PyObject *_unused, PyObject *args, PyObject *kwargs) \
|
||||
{ \
|
||||
PyObject *tensor = THSPFloatTensorClass; \
|
||||
PyObject *tensor = THSPFloatTensorClass; \
|
||||
PyObject *key, *value; \
|
||||
Py_ssize_t pos = 0; \
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) { \
|
||||
PyObject *item = PyTuple_GET_ITEM(args, i); \
|
||||
if (THPModule_isTensor(item) || THPVariable_CheckType(item, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
|
||||
tensor = item; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
} \
|
||||
if (kwargs) { \
|
||||
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_CheckType(value, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
|
||||
tensor = value; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
|
@ -54,6 +54,50 @@ static PyObject * THPSize_repr(THPSize *self)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern PyTypeObject THPSizeType;
|
||||
|
||||
template<typename FnType, FnType fn, typename ...Args>
|
||||
static PyObject* wrap_tuple_fn(Args ... args)
|
||||
{
|
||||
PyObject *result = (*fn)(std::forward<Args>(args)...);
|
||||
if (!result) return NULL;
|
||||
if (PyTuple_Check(result)) {
|
||||
return PyObject_CallFunctionObjArgs((PyObject*)&THPSizeType, result, NULL);
|
||||
}
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static auto sq_concat = PyTuple_Type.tp_as_sequence->sq_concat;
|
||||
static auto sq_repeat = PyTuple_Type.tp_as_sequence->sq_repeat;
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
static auto sq_slice = PyTuple_Type.tp_as_sequence->sq_slice;
|
||||
#endif
|
||||
static auto mp_subscript = PyTuple_Type.tp_as_mapping->mp_subscript;
|
||||
|
||||
|
||||
static PySequenceMethods THPSize_as_sequence = {
|
||||
PyTuple_Type.tp_as_sequence->sq_length,
|
||||
wrap_tuple_fn<decltype(&sq_concat), &sq_concat>,
|
||||
wrap_tuple_fn<decltype(&sq_repeat), &sq_repeat>,
|
||||
PyTuple_Type.tp_as_sequence->sq_item,
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
wrap_tuple_fn<decltype(&sq_slice), &sq_slice>,
|
||||
#else
|
||||
0, /* sq_slice */
|
||||
#endif
|
||||
0, /* sq_ass_item */
|
||||
0, /* sq_ass_slice */
|
||||
PyTuple_Type.tp_as_sequence->sq_contains
|
||||
};
|
||||
|
||||
static PyMappingMethods THPSize_as_mapping = {
|
||||
PyTuple_Type.tp_as_mapping->mp_length,
|
||||
wrap_tuple_fn<decltype(&mp_subscript), &mp_subscript>,
|
||||
0
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPSizeType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch.Size", /* tp_name */
|
||||
@ -66,8 +110,8 @@ PyTypeObject THPSizeType = {
|
||||
0, /* tp_reserved */
|
||||
(reprfunc)THPSize_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
&THPSize_as_sequence, /* tp_as_sequence */
|
||||
&THPSize_as_mapping, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "THP.h"
|
||||
#include "copy_utils.h"
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#include "generic/Tensor.cpp"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
@ -2,9 +2,10 @@
|
||||
#define THP_AUTOGRAD_H
|
||||
|
||||
PyObject * THPAutograd_initExtension(PyObject *_unused);
|
||||
bool THPAutograd_initFunctions(PyObject* module);
|
||||
|
||||
#include "variable.h"
|
||||
#include "function.h"
|
||||
#include "engine.h"
|
||||
#include "torch/csrc/autograd/python_function.h"
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
#include "torch/csrc/autograd/python_engine.h"
|
||||
|
||||
#endif
|
||||
|
@ -1,342 +1,177 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "torch/csrc/autograd/engine.h"
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "THP.h"
|
||||
using thpp::Tensor;
|
||||
|
||||
PyObject *THPEngineClass = NULL;
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// used for topological sort
|
||||
using dependencies_type = std::unordered_map<THPFunction *, int>;
|
||||
// stores gradient buffers
|
||||
using grad_list_type = std::vector<THPObjectPtr>;
|
||||
// used for need_copy set (to ensure correct gradient buffering)
|
||||
using buffer_set_type = std::set<std::pair<size_t, int>>;
|
||||
// gradient buffer - a list of gradient tensors + id
|
||||
struct grad_buffer_type: public grad_list_type {
|
||||
template<typename... Args>
|
||||
grad_buffer_type(size_t buffer_id, Args&&... args):
|
||||
grad_list_type(std::forward<Args>(args)...),
|
||||
buffer_id(buffer_id) {};
|
||||
grad_buffer_type(grad_buffer_type &&other):
|
||||
grad_list_type(std::move(other)),
|
||||
buffer_id(other.buffer_id) {};
|
||||
grad_buffer_type& operator=(grad_buffer_type &&other) {
|
||||
grad_list_type::operator=(std::move(other));
|
||||
buffer_id = other.buffer_id;
|
||||
return *this;
|
||||
};
|
||||
|
||||
size_t buffer_id;
|
||||
};
|
||||
// used for the queue of nodes ready for processing
|
||||
using ready_queue_type = std::deque<std::pair<THPFunction *, grad_buffer_type>>;
|
||||
|
||||
// Computes graph dependencies (using a super simple topological sort)
|
||||
void THPEngine_compute_dependencies(std::vector<THPFunction*> queue,
|
||||
dependencies_type& dependencies, ready_queue_type& ready)
|
||||
{
|
||||
std::set<THPFunction *> seen;
|
||||
while (queue.size() > 0) {
|
||||
THPFunction *fn = queue.back(); queue.pop_back();
|
||||
for (int i = 0; i < fn->num_inputs; i++) {
|
||||
THPFunction *prev_fn = (THPFunction*)fn->previous_functions[i].get();
|
||||
// We can ignore variables (their backprop is called every time we have
|
||||
// gradient ready).
|
||||
if (THPVariable_Check((PyObject*)prev_fn))
|
||||
continue;
|
||||
// Stochastic functions are ready for backward immediately
|
||||
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass) &&
|
||||
prev_fn->requires_grad &&
|
||||
seen.count(prev_fn) == 0) {
|
||||
ready.emplace_back(prev_fn, grad_buffer_type(0));
|
||||
} else if (fn->requires_grad && prev_fn->requires_grad) {
|
||||
dependencies[prev_fn] += 1;
|
||||
auto Engine::compute_dependencies(function_queue queue, ready_queue_type& ready) -> dependencies_type {
|
||||
// First, search the graph and find all stochastic functions. Append them to the queue.
|
||||
std::unordered_set<Function*> seen;
|
||||
function_queue search_queue(queue);
|
||||
while (search_queue.size() > 0) {
|
||||
auto fn = search_queue.back(); search_queue.pop_back();
|
||||
for (auto& prev_fn_pair : fn->previous_functions) {
|
||||
auto& prev_fn = prev_fn_pair.first;
|
||||
Function* prev_ptr = prev_fn.get();
|
||||
if (!prev_ptr) continue;
|
||||
if (prev_ptr->is_stochastic && prev_ptr->requires_grad && seen.count(prev_ptr) == 0) {
|
||||
ready.emplace_back(prev_fn, GradBuffer(0));
|
||||
queue.push_back(prev_ptr);
|
||||
}
|
||||
if (seen.count(prev_fn) == 0) {
|
||||
seen.insert(prev_fn);
|
||||
queue.push_back(prev_fn);
|
||||
if (seen.count(prev_ptr) == 0) {
|
||||
seen.insert(prev_ptr);
|
||||
search_queue.push_back(prev_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Frees backward dependency and returns true if prev_fn is ready for backward
|
||||
bool THPEngine_free_backward_dependency(dependencies_type &dependencies,
|
||||
THPFunction *prev_fn)
|
||||
{
|
||||
int deps = --dependencies[prev_fn];
|
||||
if (deps < 0) {
|
||||
std::string msg = "dependencies is negative: ";
|
||||
msg += Py_TYPE((PyObject*)prev_fn)->tp_name;
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
if (deps == 0) {
|
||||
dependencies.erase(prev_fn);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Accumulates d_prev_fn gradient tensor into output_idx position of prev_grad buffer
|
||||
bool THPEngine_add_grad(buffer_set_type &need_copy, grad_buffer_type &prev_grad,
|
||||
int output_nr, PyObject *d_prev_fn)
|
||||
{
|
||||
// TODO: we should probably clean up need_copy, because most tensors will
|
||||
// probably never hit the else clause
|
||||
auto set_key = std::make_pair(prev_grad.buffer_id, output_nr);
|
||||
if (!prev_grad[output_nr]) {
|
||||
Py_INCREF(d_prev_fn);
|
||||
prev_grad[output_nr] = d_prev_fn;
|
||||
need_copy.insert(set_key);
|
||||
} else {
|
||||
PyObject *grad_tensor = prev_grad[output_nr];
|
||||
if (need_copy.count(set_key) != 0) {
|
||||
grad_tensor = PyObject_CallMethod(grad_tensor, "clone", "");
|
||||
if (!grad_tensor)
|
||||
return false;
|
||||
need_copy.erase(set_key);
|
||||
prev_grad[output_nr] = grad_tensor;
|
||||
}
|
||||
THPObjectPtr result = PyObject_CallMethod(grad_tensor, "add_", "O", d_prev_fn);
|
||||
if (!result)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Main backward function
|
||||
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *variables = NULL;
|
||||
PyObject *grad_variables = NULL;
|
||||
unsigned char retain_variables = 0;
|
||||
size_t next_buf_id = 0;
|
||||
const char *accepted_kwargs[] = {"variables", "grad_variables",
|
||||
"retain_variables", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
|
||||
&variables, &grad_variables, &retain_variables))
|
||||
return NULL;
|
||||
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
|
||||
|
||||
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
|
||||
"retain_variables argument is expected to be a bool, but got %s",
|
||||
THPUtils_typename(retain_variables_obj));
|
||||
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
|
||||
"be a tuple, but got %s", THPUtils_typename(variables));
|
||||
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
|
||||
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
|
||||
|
||||
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
|
||||
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
|
||||
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
|
||||
"gradients", num_variables, num_gradients);
|
||||
|
||||
ready_queue_type ready;
|
||||
std::unordered_map<THPFunction *, grad_buffer_type> not_ready;
|
||||
// Now, queue contains all nodes that will start propagating gradients. We no longer have
|
||||
// to expand functions that don't require grad.
|
||||
dependencies_type dependencies;
|
||||
buffer_set_type need_copy;
|
||||
seen.clear();
|
||||
// Just to make sure that they will never be added to the queue again
|
||||
seen.insert(queue.begin(), queue.end());
|
||||
while (queue.size() > 0) {
|
||||
auto fn = std::move(queue.back()); queue.pop_back();
|
||||
// This is needed only to filter out backward roots that don't require grad
|
||||
if (!fn->requires_grad) continue;
|
||||
for (auto& prev_fn_pair : fn->previous_functions) {
|
||||
Function* prev_ptr = prev_fn_pair.first.get();
|
||||
if (!prev_ptr) continue;
|
||||
if (dynamic_cast<Variable*>(prev_ptr)) continue;
|
||||
if (!prev_ptr->requires_grad) continue;
|
||||
if (prev_ptr->is_stochastic) continue; // Stochastic nodes were in the queue already
|
||||
dependencies[prev_ptr] += 1;
|
||||
if (seen.count(prev_ptr) == 0) {
|
||||
seen.insert(prev_ptr);
|
||||
queue.push_back(prev_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
return dependencies;
|
||||
}
|
||||
|
||||
auto Engine::backward(const variable_list& variables,
|
||||
tensor_list& grad_variables,
|
||||
bool retain_variables) -> void {
|
||||
function_queue creators;
|
||||
ready_queue_type ready;
|
||||
|
||||
bool did_leaf_backward = false;
|
||||
std::vector<THPFunction*> creators;
|
||||
for (int i = 0; i < num_variables; i++) {
|
||||
THPVariable *variable = (THPVariable*)PyTuple_GET_ITEM(variables, i);
|
||||
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
|
||||
THPUtils_assert(THPVariable_Check((PyObject*)variable), "element %d of variables "
|
||||
"tuple is not a Variable", i);
|
||||
// If someone calls .backward() on a leaf, it's simple...
|
||||
if (variable->creator == NULL) {
|
||||
if (variable->requires_grad) {
|
||||
THPObjectPtr result = PyObject_CallMethod((PyObject*)variable,
|
||||
"_do_backward", "(O)O", grad, retain_variables_obj);
|
||||
if (!result) return NULL;
|
||||
int size = variables.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto& var = variables[i];
|
||||
auto& grad = grad_variables[i];
|
||||
if (!var->creator) {
|
||||
// If someone calls .backward() on a leaf, it's simple...
|
||||
if (var->requires_grad) {
|
||||
var->backward(std::make_shared<Variable>(std::move(grad), false, true));
|
||||
did_leaf_backward = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
THPFunction *creator = (THPFunction*)variable->creator;
|
||||
creators.push_back(creator);
|
||||
// Initialize the queue
|
||||
if (creator->requires_grad) {
|
||||
grad_buffer_type buf(next_buf_id++, creator->num_outputs);
|
||||
Py_INCREF(grad);
|
||||
buf[variable->output_nr] = grad;
|
||||
ready.emplace_front(creator, std::move(buf));
|
||||
} else {
|
||||
creators.push_back(var->creator.get());
|
||||
if (var->creator->requires_grad) {
|
||||
GradBuffer buf(var->creator->num_outputs);
|
||||
buf.addGrad(var->output_nr, Variable::of(std::move(grad)));
|
||||
ready.emplace_front(var->creator, std::move(buf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THPEngine_compute_dependencies(std::move(creators), dependencies, ready);
|
||||
auto dependencies = compute_dependencies(std::move(creators), ready);
|
||||
|
||||
THPUtils_assert(did_leaf_backward || ready.size() > 0, "there are no graph "
|
||||
"nodes that require computing gradients");
|
||||
if (!did_leaf_backward && ready.size() == 0) {
|
||||
throw std::runtime_error(
|
||||
"there are no graph nodes that require computing gradients");
|
||||
}
|
||||
|
||||
std::unordered_map<Function*, GradBuffer> not_ready;
|
||||
while (ready.size() > 0) {
|
||||
std::pair<THPFunction *, grad_buffer_type> ready_pair =
|
||||
std::move(ready.back()); ready.pop_back();
|
||||
THPFunction *fn = ready_pair.first;
|
||||
grad_buffer_type &fn_grad_buffer = ready_pair.second;
|
||||
auto ready_pair = std::move(ready.back()); ready.pop_back();
|
||||
auto& fn = ready_pair.first;
|
||||
|
||||
// Prepare a tuple for a call to _do_backward
|
||||
THPObjectPtr grad_tuple = PyTuple_New(fn_grad_buffer.size());
|
||||
if (!grad_tuple) return NULL;
|
||||
for (unsigned int i = 0; i < fn_grad_buffer.size(); i++) {
|
||||
PyObject *_grad;
|
||||
if (fn_grad_buffer[i]) {
|
||||
_grad = fn_grad_buffer[i].release();
|
||||
} else {
|
||||
_grad = Py_None;
|
||||
Py_INCREF(_grad);
|
||||
}
|
||||
PyTuple_SET_ITEM(grad_tuple.get(), i, _grad);
|
||||
auto grad_inputs = fn->apply(GradBuffer::variables(std::move(ready_pair.second)));
|
||||
if (!retain_variables) {
|
||||
fn->releaseVariables();
|
||||
}
|
||||
|
||||
// Call _do_backward and make sure grad_input is sound
|
||||
THPObjectPtr grad_input = PyObject_CallMethod((PyObject*)fn, "_do_backward",
|
||||
"OO", grad_tuple.get(), retain_variables_obj);
|
||||
if (!grad_input)
|
||||
return NULL;
|
||||
THPUtils_assert(PyTuple_Check(grad_input), "error, _do_backward should "
|
||||
"return a tuple, but got %s", THPUtils_typename(grad_input));
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
if (grad_inputs.size() != fn->previous_functions.size()) {
|
||||
std::string msg("Function returned an invalid number of gradients - expected ");
|
||||
msg += fn->previous_functions.size();
|
||||
msg += ", but got ";
|
||||
msg += grad_inputs.size();
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
// Process tensors inside grad_input
|
||||
for (int i = 0; i < num_grads; i++) {
|
||||
PyObject *prev_obj = fn->previous_functions[i].get();
|
||||
PyObject *grad_prev = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
int size = grad_inputs.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto& grad_input = grad_inputs[i];
|
||||
auto& prev_fn = fn->previous_functions[i].first;
|
||||
int output_nr = fn->previous_functions[i].second;
|
||||
|
||||
// A shortcut for variables - there's no need to buffer gradients for them
|
||||
// as their _do_backward is super fast (and we can save memory).
|
||||
// FIXME: this might call leaf variable hooks multiple times
|
||||
if (THPVariable_Check(prev_obj)) {
|
||||
THPVariable *prev_var = (THPVariable*)prev_obj;
|
||||
if (prev_var->requires_grad) {
|
||||
THPObjectPtr ret = PyObject_CallMethod(prev_obj, "_do_backward",
|
||||
"(O)O", grad_prev, retain_variables_obj);
|
||||
if (!ret) return NULL;
|
||||
// null inputs have no previous_function and we skip them here
|
||||
if (!prev_fn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto var = dynamic_cast<Variable*>(prev_fn.get())) {
|
||||
if (var->requires_grad) {
|
||||
var->backward(grad_input);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// No need to do any work for functions that don't require gradients
|
||||
THPFunction *prev_fn = (THPFunction*)prev_obj;
|
||||
if (!prev_fn->requires_grad)
|
||||
continue;
|
||||
// Stochastic functions are immediately ready
|
||||
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass))
|
||||
// Stochastic functions are placed in the ready queue by
|
||||
// compute_dependencies, so we can skip them here.
|
||||
if (prev_fn->is_stochastic || !prev_fn->requires_grad) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the function is ready for backward and see if it has any
|
||||
// buffers allocated
|
||||
int output_idx = fn->previous_functions[i].output_nr;
|
||||
bool is_ready = THPEngine_free_backward_dependency(dependencies, prev_fn);
|
||||
auto not_ready_it = not_ready.find(prev_fn);
|
||||
// Check if the function is ready for backward
|
||||
bool is_ready = false;
|
||||
auto it = dependencies.find(prev_fn.get());
|
||||
if (it == dependencies.end()) {
|
||||
throw std::runtime_error("dependency not found");
|
||||
} else if (--it->second == 0) {
|
||||
dependencies.erase(it);
|
||||
is_ready = true;
|
||||
}
|
||||
|
||||
auto not_ready_it = not_ready.find(prev_fn.get());
|
||||
if (is_ready) {
|
||||
// this is only a temporary, so no need for a correct id
|
||||
grad_buffer_type prev_buffer(-1);
|
||||
if (not_ready_it == not_ready.end()) {
|
||||
// The function is ready and no buffers have been allocated for it.
|
||||
prev_buffer = grad_buffer_type(next_buf_id++, prev_fn->num_outputs);
|
||||
Py_INCREF(grad_prev);
|
||||
prev_buffer[output_idx] = grad_prev;
|
||||
// The function is ready and no buffers have been allocated for it
|
||||
GradBuffer prev_buffer(prev_fn->num_outputs);
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
} else {
|
||||
// The function is ready and it already has a buffer allocated.
|
||||
prev_buffer = std::move(not_ready_it->second);
|
||||
auto prev_buffer = std::move(not_ready_it->second);
|
||||
not_ready.erase(not_ready_it);
|
||||
if (!THPEngine_add_grad(need_copy, prev_buffer, output_idx, grad_prev))
|
||||
return NULL;
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
}
|
||||
// Put the function into the ready queue.
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
} else {
|
||||
// Allocate a buffer if necessary
|
||||
// Allocate a buffer if necessary and accumulate gradient
|
||||
if (not_ready_it == not_ready.end()) {
|
||||
int num_prev_fn_outputs = prev_fn->num_outputs;
|
||||
std::tie(not_ready_it, std::ignore) =
|
||||
not_ready.emplace(prev_fn, grad_buffer_type(next_buf_id++, num_prev_fn_outputs));
|
||||
GradBuffer prev_buffer(prev_fn->num_outputs);
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
not_ready.emplace(prev_fn.get(), std::move(prev_buffer));
|
||||
} else {
|
||||
auto &prev_buffer = not_ready_it->second;
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
}
|
||||
// Accumulate the gradient into the buffer
|
||||
grad_buffer_type &grad_buffer = not_ready_it->second;
|
||||
if (!THPEngine_add_grad(need_copy, grad_buffer, output_idx, grad_prev))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!not_ready.empty()) {
|
||||
std::string names;
|
||||
for (auto &it : not_ready) {
|
||||
if (!names.empty()) names += ", ";
|
||||
names += Py_TYPE((PyObject *)it.first)->tp_name;
|
||||
}
|
||||
THPUtils_assert(not_ready.empty(),
|
||||
"could not compute gradients for some functions (%s)", names.c_str());
|
||||
throw std::runtime_error("could not compute gradients for some functions");
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
return type->tp_alloc(type, 0);
|
||||
}
|
||||
|
||||
static struct PyMethodDef THPEngine_methods[] = {
|
||||
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPEngineType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._EngineBase", /* tp_name */
|
||||
sizeof(THPEngine), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
0, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPEngine_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPEngine_new /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
bool THPEngine_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPEngineType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPEngineType);
|
||||
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
@ -1,10 +1,35 @@
|
||||
#ifndef THP_ENGINE_H
|
||||
#define THP_ENGINE_H
|
||||
#pragma once
|
||||
|
||||
struct THPEngine {
|
||||
PyObject_HEAD
|
||||
// Engine implements backpropagation from output variables and their gradients
|
||||
// to "root" variables (variables created by the user with requires_grad=True).
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/grad_buffer.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct Engine {
|
||||
using ready_queue_type = std::deque<std::pair<std::shared_ptr<Function>, GradBuffer>>;
|
||||
using function_queue = std::vector<Function*>;
|
||||
using dependencies_type = std::unordered_map<Function*, int>;
|
||||
|
||||
// Given a list of output variables and their gradients, computes the
|
||||
// gradients of "root" variables by backpropagation.
|
||||
static void backward(
|
||||
const variable_list& variables,
|
||||
tensor_list& grad_variables,
|
||||
bool retain_variables);
|
||||
|
||||
private:
|
||||
static dependencies_type compute_dependencies(
|
||||
function_queue queue,
|
||||
ready_queue_type& ready);
|
||||
};
|
||||
|
||||
bool THPEngine_initModule(PyObject *module);
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
@ -1,976 +1,31 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "function.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <exception>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "THP.h"
|
||||
#include "variable.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include "cuda/AutoGPU.h"
|
||||
#endif
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// Throwing this exception means that the python error flags have been already
|
||||
// set and control should be immediately returned to the interpreter.
|
||||
class python_error : public std::exception {};
|
||||
|
||||
#define THPFunction_assert(condition, ...) \
|
||||
if (!(condition)) { THPUtils_setError(__VA_ARGS__); throw python_error(); }
|
||||
|
||||
|
||||
PyObject *THPFunctionClass = NULL;
|
||||
PyObject *THPStochasticFunctionClass = NULL;
|
||||
|
||||
// Traverse and clear are required for supporting Python's GC cycle handling.
|
||||
static int THPFunction_traverse(THPFunction *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->needs_input_grad);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
for (int i = 0; i < self->num_inputs; i++)
|
||||
Py_VISIT(self->previous_functions[i].get());
|
||||
if (self->saved_variables) {
|
||||
for (unsigned int i = 0; i < self->saved_variables->size(); i++)
|
||||
Py_VISIT(std::get<0>(self->saved_variables->at(i)));
|
||||
}
|
||||
if (self->output_backward_hooks) {
|
||||
for (int i = 0; i < self->num_inputs; i++)
|
||||
Py_VISIT(self->output_backward_hooks[i].get());
|
||||
}
|
||||
|
||||
Py_VISIT(self->to_save);
|
||||
Py_VISIT(self->shared_pairs);
|
||||
Py_VISIT(self->non_differentiable);
|
||||
Py_VISIT(self->dirty_tensors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPFunction_clear(THPFunction *self)
|
||||
{
|
||||
self->num_inputs = 0;
|
||||
self->num_outputs = 0;
|
||||
|
||||
Py_CLEAR(self->needs_input_grad);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
|
||||
Py_CLEAR(self->to_save);
|
||||
Py_CLEAR(self->shared_pairs);
|
||||
Py_CLEAR(self->non_differentiable);
|
||||
Py_CLEAR(self->dirty_tensors);
|
||||
|
||||
THPFunctionPtr *previous_functions = self->previous_functions;
|
||||
self->previous_functions = NULL;
|
||||
delete[] previous_functions;
|
||||
|
||||
auto saved_variables = self->saved_variables;
|
||||
self->saved_variables = NULL;
|
||||
delete saved_variables;
|
||||
|
||||
auto output_backward_hooks = self->output_backward_hooks;
|
||||
self->output_backward_hooks = NULL;
|
||||
delete[] output_backward_hooks;
|
||||
|
||||
auto output_info = self->output_info;
|
||||
self->output_info = NULL;
|
||||
delete output_info;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPFunction_dealloc(THPFunction* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
THPFunction_clear(self);
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
PyObject *THPFunction_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
THPFunction *self = (THPFunction*)type->tp_alloc(type, 0);
|
||||
if (!self)
|
||||
return NULL;
|
||||
// Python zero-initializes the object memory, so there's no need to initialize
|
||||
// most fields
|
||||
self->num_outputs = -1;
|
||||
return (PyObject*)self;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Forward
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
using t2var_type = std::unordered_map<PyObject *, THPVariable *>;
|
||||
|
||||
static void _mark_dirty(THPFunction *self, t2var_type &t2var,
|
||||
std::unordered_set<PyObject *> &dirty_inputs)
|
||||
{
|
||||
// Increase versions of modified tensors
|
||||
if (!self->dirty_tensors) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->dirty_tensors), "autograd "
|
||||
"internal error: dirty_tensors attribute is expected to be a tuple "
|
||||
"but is %s", THPUtils_typename(self->dirty_tensors));
|
||||
Py_ssize_t num_dirty = PyTuple_GET_SIZE(self->dirty_tensors);
|
||||
for (int i = 0; i < num_dirty; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(self->dirty_tensors, i);
|
||||
dirty_inputs.insert(tensor);
|
||||
THPVariable *variable;
|
||||
try {
|
||||
variable = t2var.at(tensor);
|
||||
} catch (std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(tensor), "mark_dirty can "
|
||||
"only accept tensors, but argument %d is of type %s", i,
|
||||
THPUtils_typename(tensor));
|
||||
THPFunction_assert(false, "mark_dirty only accepts input tensors, but "
|
||||
"argument %d isn't one", i);
|
||||
}
|
||||
auto &v_counter = *variable->version_counter;
|
||||
THPFunction_assert(v_counter.var_refcnt() == 1, "in-place operations can be "
|
||||
"only used on variables that don't share storage with any other "
|
||||
"variables, but detected that there are %d objects sharing it",
|
||||
v_counter.var_refcnt());
|
||||
v_counter++;
|
||||
}
|
||||
// We're not going to ever need this so let's remove references now
|
||||
Py_DECREF(self->dirty_tensors);
|
||||
self->dirty_tensors = NULL;
|
||||
}
|
||||
|
||||
static void _wrap_outputs(THPFunction *self, t2var_type &t2var,
|
||||
std::unordered_set<PyObject *> &dirty_inputs, PyObject *raw_output,
|
||||
PyObject *outputs)
|
||||
{
|
||||
// Wrap outputs in Variables
|
||||
Py_ssize_t num_outputs = PyTuple_GET_SIZE(raw_output);
|
||||
self->output_info = new std::vector<output_info_type>(num_outputs);
|
||||
auto &output_info = *self->output_info;
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
PyObject *output = PyTuple_GET_ITEM(raw_output, i);
|
||||
THPVariable *output_var;
|
||||
auto it = t2var.find(output);
|
||||
if (it == t2var.end()) {
|
||||
// A completely new tensor - just wrap it and continue
|
||||
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self, self->requires_grad);
|
||||
} else {
|
||||
// If one of the outputs was also an input tensor it's a bit more complicated.
|
||||
THPVariable *input_var = it->second;
|
||||
if (input_var->creator) {
|
||||
// If it's not a leaf we want to move it in the graph so backprop
|
||||
// will be computed correctly:
|
||||
// creator <- variable <- self ==> creator <- self <- variable
|
||||
Py_INCREF(input_var);
|
||||
output_var = input_var;
|
||||
Py_DECREF(input_var->creator);
|
||||
Py_INCREF(self);
|
||||
input_var->creator = (PyObject*)self;
|
||||
auto Function::flags(const variable_list& inputs) -> FunctionFlags {
|
||||
int num_inputs = inputs.size();
|
||||
FunctionFlags f;
|
||||
f.requires_grad = false;
|
||||
f.is_volatile = false;
|
||||
f.previous_functions.resize(num_inputs);
|
||||
for (int i = 0; i != num_inputs; ++i) {
|
||||
auto& var = inputs[i];
|
||||
if (var) {
|
||||
f.requires_grad |= var->requires_grad;
|
||||
f.is_volatile |= var->is_volatile;
|
||||
if (var->creator) {
|
||||
f.previous_functions[i] = std::make_pair<>(var->creator, var->output_nr);
|
||||
} else {
|
||||
// If the Variable has been changed, we have to move it after the
|
||||
// current function to ensure the gradient is computed correctly.
|
||||
// There are two cases now:
|
||||
// 1. If it requires grad, it is an error, and this will be caught
|
||||
// when its _do_backward is called, because it won't be a leaf anymore.
|
||||
// Also we'll change its version.
|
||||
// 2. If it doesn't require grad, we can safely move it in the graph,
|
||||
// because its _do_backward will never be called.
|
||||
if (dirty_inputs.count(output) > 0) {
|
||||
Py_INCREF(input_var);
|
||||
output_var = input_var;
|
||||
Py_INCREF(self);
|
||||
output_var->creator = (PyObject*)self;
|
||||
if (!output_var->requires_grad && self->requires_grad) {
|
||||
// Now, there's another subtlety. We move the input in the graph
|
||||
// and we change its requires_grad to True. However, remember
|
||||
// that we're still holding a reference to is as a previous
|
||||
// function. Backward engine will think that it was really a
|
||||
// leaf that initialy did require grad and call its _do_backward
|
||||
// and that will throw. Because of this, we need to allocate
|
||||
// a dummy leaf that doesn't require grad and put it as our
|
||||
// previous function.
|
||||
output_var->requires_grad = self->requires_grad;
|
||||
PyObject* dummy_prev_fn = THPVariable_New(output, NULL, false);
|
||||
if (!dummy_prev_fn) throw python_error();
|
||||
self->previous_functions[i] = THPFunctionPtr(dummy_prev_fn, 0);
|
||||
}
|
||||
} else {
|
||||
// An input has been returned, but it wasn't modified. It's better
|
||||
// not to move the Variable, because there are some legitimate cases
|
||||
// where making it non-leaf would break stuff (e.g. broadcast). Also,
|
||||
// returning the input Variable is not a good option either,
|
||||
// because if someone registers hooks on it, they will fire with grads
|
||||
// from all usages, not only from usages of this output. This is why
|
||||
// we'll return a copy and join their version counters. This has
|
||||
// a side-effect of making in-place ops on any of these Variables an
|
||||
// immediate error, but it would be raised anyway once someone
|
||||
// calls backward.
|
||||
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self,
|
||||
self->requires_grad);
|
||||
if (!output_var) throw python_error();
|
||||
output_var->version_counter->join_with(*input_var->version_counter);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!output_var) throw python_error();
|
||||
|
||||
torch::THPVoidTensor *output_obj = (torch::THPVoidTensor*)output_var->data;
|
||||
torch::THVoidTensor *output_tensor = output_obj->cdata;
|
||||
long ndim = output_tensor->nDimension;
|
||||
int device_id = -1;
|
||||
THPObjectPtr is_cuda = PyObject_GetAttrString(output_var->data, "is_cuda");
|
||||
if (is_cuda.get() == Py_True) {
|
||||
THPObjectPtr device_id_obj = PyObject_CallMethod(output_var->data,
|
||||
"get_device", "");
|
||||
THPFunction_assert(THPUtils_checkLong(device_id_obj), "get_device "
|
||||
"should return an int, but got %s", THPUtils_typename(device_id_obj));
|
||||
device_id = THPUtils_unpackLong(device_id_obj);
|
||||
}
|
||||
output_info[i] = std::make_tuple(
|
||||
(PyObject*)Py_TYPE(output_var->data),
|
||||
device_id,
|
||||
std::vector<long>(output_tensor->size, output_tensor->size + ndim)
|
||||
);
|
||||
t2var[output] = output_var;
|
||||
output_var->output_nr = i;
|
||||
PyTuple_SET_ITEM(outputs, i, (PyObject*)output_var);
|
||||
}
|
||||
}
|
||||
|
||||
static void _save_variables(THPFunction*self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->to_save) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->to_save), "autograd internal "
|
||||
"error: to_save attribute is expected to be a tuple but is %s",
|
||||
THPUtils_typename(self->to_save));
|
||||
Py_ssize_t num_saved = PyTuple_GET_SIZE(self->to_save);
|
||||
self->saved_variables = new std::vector<saved_var_info_type>();
|
||||
self->saved_variables->reserve(num_saved);
|
||||
for (int i = 0; i < num_saved; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(self->to_save, i);
|
||||
if (tensor == Py_None) {
|
||||
Py_INCREF(tensor);
|
||||
self->saved_variables->emplace_back(tensor, 0, nullptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
THPVariable *variable;
|
||||
try {
|
||||
variable = t2var.at(tensor);
|
||||
} catch(std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(tensor),
|
||||
"save_for_backward can only save tensors, but argument %d is of "
|
||||
"type %s", i, THPUtils_typename(tensor));
|
||||
THPFunction_assert(false, "save_for_backward can only save input or output "
|
||||
"tensors, but argument %d doesn't satisfy this condition", i);
|
||||
}
|
||||
|
||||
Py_INCREF(tensor);
|
||||
self->saved_variables->emplace_back(
|
||||
tensor,
|
||||
**variable->version_counter,
|
||||
std::unique_ptr<THPVariableVersion>(variable->version_counter->new_saved_ref())
|
||||
);
|
||||
}
|
||||
// Free .to_save
|
||||
Py_DECREF(self->to_save);
|
||||
self->to_save = NULL;
|
||||
}
|
||||
|
||||
static void _join_version_counters(THPFunction *self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->shared_pairs) return;
|
||||
THPFunction_assert(PyTuple_Check(self->shared_pairs), "autograd internal "
|
||||
"error: shared_pairs attribute is expected to be a tuple but is %s",
|
||||
THPUtils_typename(self->shared_pairs));
|
||||
Py_ssize_t num_shared = PyTuple_GET_SIZE(self->shared_pairs);
|
||||
for (int i = 0; i < num_shared; i++) {
|
||||
PyObject *shared_tuple = PyTuple_GET_ITEM(self->shared_pairs, i);
|
||||
THPFunction_assert(PyTuple_Check(shared_tuple), "mark_shared_storages "
|
||||
"accepts a number of pairs, but one of the arguments is of type %s",
|
||||
THPUtils_typename(shared_tuple));
|
||||
THPFunction_assert(PyTuple_GET_SIZE(shared_tuple) == 2,
|
||||
"mark_shared_storages accepts pairs, but argument %d is a tuple of "
|
||||
"%d elements", i, PyTuple_GET_SIZE(shared_tuple));
|
||||
|
||||
// Now we're sure it's really a pair!
|
||||
THPVariable *v1, *v2;
|
||||
try {
|
||||
v1 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 0));
|
||||
v2 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 1));
|
||||
} catch(std::out_of_range &e) {
|
||||
// One tuple items wasn't present in t2var, so there are two cases:
|
||||
// 1. it's not a tensor
|
||||
// 2. it's not an input nor an output
|
||||
PyObject *t1 = PyTuple_GET_ITEM(shared_tuple, 0);
|
||||
PyObject *t2 = PyTuple_GET_ITEM(shared_tuple, 1);
|
||||
THPFunction_assert(THPModule_isTensor(t1) && THPModule_isTensor(t2),
|
||||
"mark_shared_storages accepts pairs of tensors, but one of them "
|
||||
"contains %s and %s", THPUtils_typename(t1), THPUtils_typename(t2));
|
||||
THPFunction_assert(false, "mark_shared_storages only accepts pairs of input "
|
||||
"and output tensors, but argument %d doesn't satify this "
|
||||
"condition", i);
|
||||
}
|
||||
v2->version_counter->join_with(*v1->version_counter);
|
||||
}
|
||||
// Free .shared_pairs
|
||||
Py_DECREF(self->shared_pairs);
|
||||
self->shared_pairs = NULL;
|
||||
}
|
||||
|
||||
static void _mark_non_differentiable(THPFunction *self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->non_differentiable) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->non_differentiable), "autograd "
|
||||
"internal error: non_differentiable attribute is expected to be a "
|
||||
"tuple but is %s", THPUtils_typename(self->non_differentiable));
|
||||
Py_ssize_t num_nondiff = PyTuple_GET_SIZE(self->non_differentiable);
|
||||
for (int i = 0; i < num_nondiff; i++) {
|
||||
PyObject *t = PyTuple_GET_ITEM(self->non_differentiable, i);
|
||||
THPVariable *var;
|
||||
try {
|
||||
var = t2var.at(t);
|
||||
THPFunction_assert(var->creator == (PyObject*)self,
|
||||
"mark_non_differentiable only accepts output tensors, but "
|
||||
"argument %d isn't an output", i);
|
||||
} catch (std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(t), "mark_non_differentiable "
|
||||
"only accepts tensor arguments, but got %s", THPUtils_typename(t));
|
||||
THPFunction_assert(false, "mark_non_differentiable only accepts function "
|
||||
"outputs");
|
||||
}
|
||||
var->requires_grad = 0;
|
||||
}
|
||||
Py_DECREF(self->non_differentiable);
|
||||
self->non_differentiable = NULL;
|
||||
}
|
||||
|
||||
static bool _ensure_tuple(THPObjectPtr& obj)
|
||||
{
|
||||
if (PyTuple_Check(obj.get()))
|
||||
return false;
|
||||
|
||||
PyObject *tuple = PyTuple_New(1);
|
||||
if (!tuple) throw python_error();
|
||||
PyTuple_SET_ITEM(tuple, 0, obj.release());
|
||||
obj = tuple;
|
||||
return true;
|
||||
}
|
||||
|
||||
PyObject *THPFunction_do_forward(THPFunction *self, PyObject *inputs)
|
||||
{
|
||||
try {
|
||||
Py_ssize_t num_inputs = inputs ? PyTuple_GET_SIZE(inputs) : 0;
|
||||
|
||||
// Unpack inputs and check if they require gradients or are volatile
|
||||
THPObjectPtr unpacked_inputs = PyTuple_New(num_inputs);
|
||||
self->needs_input_grad = PyTuple_New(num_inputs);
|
||||
self->requires_grad = false;
|
||||
bool is_volatile = false;
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
PyObject *input = PyTuple_GET_ITEM(inputs, i);
|
||||
THPUtils_assert(THPVariable_Check(input), "expected a Variable argument, "
|
||||
"but got %s", THPUtils_typename(input));
|
||||
THPVariable *variable = (THPVariable*)input;
|
||||
|
||||
// Unpack the variable - SET_ITEM steals a reference so INCREF it
|
||||
Py_INCREF(variable->data);
|
||||
PyTuple_SET_ITEM(unpacked_inputs.get(), i, variable->data);
|
||||
|
||||
// We can't move this to C, because it's going to be accessed from user code.
|
||||
PyTuple_SET_ITEM(self->needs_input_grad, i, PyBool_FromLong(variable->requires_grad));
|
||||
|
||||
is_volatile = is_volatile || variable->is_volatile;
|
||||
self->requires_grad = self->requires_grad || variable->requires_grad;
|
||||
}
|
||||
|
||||
|
||||
// Now we're ready to call a forward (implemented in Python)
|
||||
THPObjectPtr forward_fn = PyObject_GetAttrString((PyObject*)self, "forward");
|
||||
THPUtils_assert(forward_fn.get(), "function %s doesn't implement a required "
|
||||
"'forward' method", THPUtils_typename((PyObject*)self));
|
||||
THPObjectPtr raw_output = PyObject_CallObject(forward_fn, unpacked_inputs);
|
||||
if (!raw_output) return NULL;
|
||||
// Wrap output in a tuple, if it's not one already
|
||||
bool unpack_output = _ensure_tuple(raw_output);
|
||||
int num_outputs = PyTuple_GET_SIZE(raw_output.get());
|
||||
|
||||
|
||||
THPObjectPtr outputs = PyTuple_New(num_outputs);
|
||||
if (!outputs) return NULL;
|
||||
if (is_volatile) {
|
||||
// If one of the inputs is volatile let's take a fast path - we want
|
||||
// minimize the overhead of inference
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
PyObject *output = PyTuple_GET_ITEM(raw_output.get(), i);
|
||||
THPVariable *output_var = (THPVariable*)THPVariable_NewVolatile(output);
|
||||
if (!output_var) return NULL;
|
||||
output_var->output_nr = i;
|
||||
PyTuple_SET_ITEM(outputs.get(), i, (PyObject*)output_var);
|
||||
}
|
||||
} else {
|
||||
// We're not volatile, so there's a lot of bookkeeping to do...
|
||||
self->num_inputs = num_inputs;
|
||||
self->num_outputs = num_outputs;
|
||||
t2var_type t2var;
|
||||
|
||||
// Save previous functions and initialize t2var map
|
||||
self->previous_functions = new THPFunctionPtr[num_inputs];
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
THPVariable *input_var = (THPVariable*)PyTuple_GET_ITEM(inputs, i);
|
||||
t2var.emplace(input_var->data, input_var);
|
||||
|
||||
// Save previous function in a helper class (that has a smart pointer to
|
||||
// the object and remembers which output did we use).
|
||||
PyObject *prev_fn = input_var->creator ? input_var->creator : (PyObject*)input_var;
|
||||
Py_INCREF(prev_fn);
|
||||
self->previous_functions[i] = THPFunctionPtr(prev_fn, input_var->output_nr);
|
||||
}
|
||||
|
||||
std::unordered_set<PyObject *> dirty_inputs;
|
||||
_mark_dirty(self, t2var, dirty_inputs);
|
||||
_wrap_outputs(self, t2var, dirty_inputs, raw_output, outputs);
|
||||
_join_version_counters(self, t2var);
|
||||
if (self->requires_grad ||
|
||||
PyObject_IsInstance((PyObject*)self, THPStochasticFunctionClass)) {
|
||||
_save_variables(self, t2var);
|
||||
_mark_non_differentiable(self, t2var);
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack the output, unless .forward() returned a tuple
|
||||
if (unpack_output) {
|
||||
PyObject *output = PyTuple_GET_ITEM(outputs.get(), 0);
|
||||
Py_INCREF(output);
|
||||
return output;
|
||||
}
|
||||
|
||||
return outputs.release();
|
||||
|
||||
} catch (python_error& e) {
|
||||
return NULL;
|
||||
} catch (std::exception& e) {
|
||||
THPUtils_setError(e.what());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Backward
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// We need a reference to a smart pointer that will outlive the duration of
|
||||
// a function call, so that the char* pointer is valid even after it returns
|
||||
static char* _try_get_name(PyObject *hook, THPObjectPtr& tmp) {
|
||||
tmp = PyObject_GetAttrString(hook, "__name__");
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
if (tmp && PyString_Check(tmp.get())) {
|
||||
return PyString_AS_STRING(tmp.get());
|
||||
}
|
||||
#else
|
||||
if (tmp && PyUnicode_Check(tmp.get())) {
|
||||
tmp = PyUnicode_AsASCIIString(tmp.get());
|
||||
return PyBytes_AS_STRING(tmp.get());
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define OPTIONAL_HOOK_NAME \
|
||||
hook_name ? "'" : "", \
|
||||
hook_name ? hook_name : "", \
|
||||
hook_name ? "' " : ""
|
||||
|
||||
static void _ensure_correct_hook_result_single(PyObject *original,
|
||||
PyObject *returned, PyObject *hook)
|
||||
{
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
static PyObject *IS_SAME_SIZE_NAME = PyString_FromString("is_same_size");
|
||||
#else
|
||||
static PyObject *IS_SAME_SIZE_NAME = PyUnicode_FromString("is_same_size");
|
||||
#endif
|
||||
THPObjectPtr tmp;
|
||||
// Check that the type matches
|
||||
if(Py_TYPE(original) != Py_TYPE(returned)) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the type of "
|
||||
"grad_input (was %s, but got %s)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
THPUtils_typename(original),
|
||||
THPUtils_typename(returned)
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
// Special case - None gradient. The type matches so it's everything we
|
||||
// had to check.
|
||||
if (original == Py_None) return;
|
||||
|
||||
THPVariable *original_var = (THPVariable*)original;
|
||||
THPVariable *returned_var = (THPVariable*)returned;
|
||||
|
||||
// Check that data types match
|
||||
if (Py_TYPE(original_var->data) != Py_TYPE(returned_var->data)) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the type of "
|
||||
"grad_input data (was %s, but got %s)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
THPUtils_typename(original_var->data),
|
||||
THPUtils_typename(returned_var->data)
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
// Check that the size matches
|
||||
THPObjectPtr is_same_size = PyObject_CallMethodObjArgs(original,
|
||||
IS_SAME_SIZE_NAME, returned, NULL);
|
||||
if(is_same_size.get() != Py_True) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the size of "
|
||||
"grad_input",
|
||||
OPTIONAL_HOOK_NAME
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
}
|
||||
|
||||
static void _ensure_correct_hook_result(THPObjectPtr& grad_input,
|
||||
THPObjectPtr& result, PyObject *hook)
|
||||
{
|
||||
THPObjectPtr tmp;
|
||||
// Check that the tuple sizes match
|
||||
if (PyTuple_GET_SIZE(result.get()) != PyTuple_GET_SIZE(grad_input.get())) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%sreturned an incorrect number "
|
||||
"of gradients (got %ld, but expected %ld)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
PyTuple_GET_SIZE(result.get()),
|
||||
PyTuple_GET_SIZE(grad_input.get())
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(grad_input.get());
|
||||
for (int i = 0; i < size; i++) {
|
||||
PyObject *original = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
PyObject *returned = PyTuple_GET_ITEM(result.get(), i);
|
||||
_ensure_correct_hook_result_single(original, returned, hook);
|
||||
}
|
||||
}
|
||||
|
||||
static void _call_output_hooks(THPFunction *self, THPObjectPtr& grad_output)
|
||||
{
|
||||
if (!self->output_backward_hooks) return;
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
// We can't reuse the tuple we got, so allocate a new one.
|
||||
THPObjectPtr new_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!new_grad_output) throw python_error();
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
bool updated_gradient = false;
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
// Copy grad to a new tuple
|
||||
PyObject *old_grad = PyTuple_GET_ITEM(grad_output.get(), i);
|
||||
// FIXME: no need to pack them again after changing grads to Variables
|
||||
PyObject *old_grad_var;
|
||||
if (old_grad == Py_None) {
|
||||
old_grad_var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
old_grad_var = THPVariable_NewVolatile(old_grad);
|
||||
if (!old_grad_var) throw python_error();
|
||||
}
|
||||
PyTuple_SET_ITEM(new_grad_output.get(), i, old_grad_var);
|
||||
|
||||
// Make sure that we're really going to operate on a dict
|
||||
PyObject *hook_dict = self->output_backward_hooks[i];
|
||||
if (!hook_dict) continue;
|
||||
THPFunction_assert(PyDict_Check(hook_dict), "backward_hooks "
|
||||
"attribute has to be a dictionary");
|
||||
|
||||
while (PyDict_Next(hook_dict, &pos, &key, &value)) {
|
||||
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
|
||||
old_grad_var, NULL);
|
||||
if (!result) throw python_error();
|
||||
|
||||
// If the hook returns a something else than None, we treat that as a sign
|
||||
// to replace this grad with the return value.
|
||||
if (result.get() != Py_None) {
|
||||
updated_gradient = true;
|
||||
|
||||
// Check all possible inconsistencies of the output that we can detect
|
||||
// (sizes, types, etc.)
|
||||
_ensure_correct_hook_result_single(old_grad_var, result, value);
|
||||
|
||||
// Replace the old gradient
|
||||
PyTuple_SET_ITEM(new_grad_output.get(), i, result.release());
|
||||
Py_XDECREF(old_grad_var);
|
||||
old_grad_var = PyTuple_GET_ITEM(new_grad_output.get(), i);
|
||||
f.previous_functions[i] = std::make_pair<>(var, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: no need to do this after multiple backward
|
||||
if (updated_gradient) {
|
||||
THPObjectPtr unpacked_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!unpacked_grad_output) throw python_error();
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(new_grad_output.get(), i);
|
||||
if (grad == Py_None) {
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, Py_None);
|
||||
} else {
|
||||
THPVariable *var = (THPVariable*)grad;
|
||||
Py_INCREF(var->data);
|
||||
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, var->data);
|
||||
}
|
||||
}
|
||||
grad_output = unpacked_grad_output.release();
|
||||
}
|
||||
f.requires_grad &= !f.is_volatile;
|
||||
return f;
|
||||
}
|
||||
|
||||
static void _call_function_hooks(THPFunction *self, THPObjectPtr& grad_input, THPObjectPtr& grad_output)
|
||||
{
|
||||
if (!self->backward_hooks) return;
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
|
||||
THPFunction_assert(PyDict_Check(self->backward_hooks), "backward_hooks "
|
||||
"attribute has to be a dictionary");
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
bool updated_gradient = false;
|
||||
THPObjectPtr packed_grad_input = PyTuple_New(self->num_inputs);
|
||||
if (!packed_grad_input.get()) throw python_error();
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
PyObject *var;
|
||||
if (tensor == Py_None) {
|
||||
var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
var = THPVariable_NewVolatile(tensor);
|
||||
}
|
||||
if (!var) throw python_error();
|
||||
PyTuple_SET_ITEM(packed_grad_input.get(), i, var);
|
||||
}
|
||||
THPObjectPtr packed_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!packed_grad_output.get()) throw python_error();
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(grad_output.get(), i);
|
||||
PyObject *var;
|
||||
if (tensor == Py_None) {
|
||||
var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
var = THPVariable_NewVolatile(tensor);
|
||||
}
|
||||
if (!var) throw python_error();
|
||||
PyTuple_SET_ITEM(packed_grad_output.get(), i, var);
|
||||
}
|
||||
|
||||
while (PyDict_Next(self->backward_hooks, &pos, &key, &value)) {
|
||||
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
|
||||
packed_grad_input.get(), packed_grad_output.get(), NULL);
|
||||
if (!result) throw python_error();
|
||||
|
||||
// If the hook returns a something else than None, we treat that as a sign
|
||||
// to replace grad_input with its return value.
|
||||
if (result.get() != Py_None) {
|
||||
updated_gradient = true;
|
||||
// Make sure we're working with a tuple
|
||||
_ensure_tuple(result);
|
||||
// Check all possible inconsistencies of the output that we can detect
|
||||
// (sizes, types, etc.)
|
||||
_ensure_correct_hook_result(packed_grad_input, result, value);
|
||||
packed_grad_input = result.release();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
if (updated_gradient) {
|
||||
THPObjectPtr unpacked_grad_input = PyTuple_New(self->num_inputs);
|
||||
if (!unpacked_grad_input) throw python_error();
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(packed_grad_input.get(), i);
|
||||
if (grad == Py_None) {
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, Py_None);
|
||||
} else {
|
||||
THPVariable *var = (THPVariable*)grad;
|
||||
Py_INCREF(var->data);
|
||||
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, var->data);
|
||||
}
|
||||
}
|
||||
grad_input = unpacked_grad_input.release();
|
||||
}
|
||||
}
|
||||
|
||||
static void _prepare_grad_output(THPFunction *self, THPObjectPtr& raw_grad_output)
|
||||
{
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU gpu_guard(-1);
|
||||
#endif
|
||||
int num_grad_output = PyTuple_GET_SIZE(raw_grad_output.get());
|
||||
// First, check if any of grad_outputs is None. If not, there's nothing to do
|
||||
bool has_none = false;
|
||||
for (int i = 0; i < num_grad_output; i++) {
|
||||
if (PyTuple_GET_ITEM(raw_grad_output.get(), i) == Py_None) {
|
||||
has_none = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_none)
|
||||
return;
|
||||
|
||||
THPObjectPtr grad_output;
|
||||
grad_output = PyTuple_New(num_grad_output);
|
||||
if (!grad_output) throw python_error();
|
||||
|
||||
// Look for Nones and replace them with new buffers
|
||||
for (int i = 0; i < num_grad_output; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(raw_grad_output.get(), i);
|
||||
if (grad == Py_None) {
|
||||
auto &info = (*self->output_info)[i];
|
||||
PyObject *tensor_cls = std::get<0>(info);
|
||||
#ifdef WITH_CUDA
|
||||
gpu_guard.setDevice(std::get<1>(info));
|
||||
#endif
|
||||
std::vector<long> &sizes = std::get<2>(info);
|
||||
THPObjectPtr grad_size = THPSize_New(sizes.size(), sizes.data());
|
||||
THPObjectPtr new_grad = PyObject_CallFunctionObjArgs(tensor_cls, grad_size.get(), NULL);
|
||||
if (!new_grad) throw python_error();
|
||||
THPObjectPtr result = PyObject_CallMethod(new_grad.get(), "zero_", "");
|
||||
if (!result) throw python_error();
|
||||
grad = new_grad.release();
|
||||
} else {
|
||||
Py_INCREF(grad);
|
||||
}
|
||||
PyTuple_SET_ITEM(grad_output.get(), i, grad);
|
||||
}
|
||||
raw_grad_output = grad_output.release();
|
||||
}
|
||||
|
||||
static void _trim_grad_input(THPFunction *self, THPObjectPtr& grad_input)
|
||||
{
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
int num_prev_fns = self->num_inputs;
|
||||
if (num_grads > num_prev_fns) {
|
||||
// Check that all extra grads are none
|
||||
bool all_none = true;
|
||||
for (int i = num_prev_fns; i < num_grads; i++) {
|
||||
all_none = (PyTuple_GET_ITEM(grad_input.get(), i) == Py_None);
|
||||
if (!all_none) break;
|
||||
}
|
||||
// If yes, slice the tuple
|
||||
if (all_none) {
|
||||
num_grads = num_prev_fns;
|
||||
grad_input = PyTuple_GetSlice(grad_input.get(), 0, num_grads);
|
||||
if (!grad_input) throw python_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PyObject * THPFunction_do_backward(THPFunction *self, PyObject *args)
|
||||
{
|
||||
try {
|
||||
Py_ssize_t num_args = args ? PyTuple_GET_SIZE(args) : 0;
|
||||
THPUtils_assert(num_args == 2, "_do_backward expects exactly two arguments");
|
||||
PyObject *raw_grad_output = PyTuple_GET_ITEM(args, 0);
|
||||
PyObject *retain_variables = PyTuple_GET_ITEM(args, 1);
|
||||
if (!PyTuple_Check(raw_grad_output) || !PyBool_Check(retain_variables)) {
|
||||
THPUtils_invalidArguments(args, NULL, "_do_backward", 1, "(tuple, bool)");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Some of the output might have been unused, so we have to allocate
|
||||
// zero-filled buffers instead
|
||||
Py_INCREF(raw_grad_output);
|
||||
THPObjectPtr grad_output = raw_grad_output;
|
||||
_prepare_grad_output(self, grad_output);
|
||||
|
||||
// Call output hooks (this can modify grad_output!)
|
||||
_call_output_hooks(self, grad_output);
|
||||
|
||||
// self.backward(*grad_output)
|
||||
THPObjectPtr backward_fn = PyObject_GetAttrString((PyObject*)self, "backward");
|
||||
THPUtils_assert(backward_fn.get(), "function %s doesn't implement a required "
|
||||
"'backward' method", THPUtils_typename((PyObject*)self));
|
||||
THPObjectPtr grad_input = PyObject_CallObject(backward_fn, grad_output.get());
|
||||
if (!grad_input) return NULL;
|
||||
_ensure_tuple(grad_input);
|
||||
|
||||
// We allow functions to return more gradients, than there were outputs,
|
||||
// if and only if the additional ones are all None
|
||||
_trim_grad_input(self, grad_input);
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
int num_prev_fns = self->num_inputs;
|
||||
THPUtils_assert(num_grads == num_prev_fns, "%s returned an invalid number of "
|
||||
"gradient tensors (expected %d, but got %d)", THPUtils_typename(self),
|
||||
num_prev_fns, num_grads);
|
||||
|
||||
// Call function hooks (this can modify grad_input!)
|
||||
_call_function_hooks(self, grad_input, grad_output);
|
||||
|
||||
// Free buffers only if they're not going to be ever used again
|
||||
if (retain_variables == Py_False) {
|
||||
delete self->saved_variables;
|
||||
self->saved_variables = nullptr;
|
||||
self->has_freed_buffers = 1;
|
||||
}
|
||||
|
||||
return grad_input.release();
|
||||
|
||||
} catch (python_error& e) {
|
||||
return NULL;
|
||||
} catch (std::exception& e) {
|
||||
THPUtils_setError(e.what());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Other methods / attributes
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PyObject* THPFunction__register_hook_dict(THPFunction *self, PyObject *_var)
|
||||
{
|
||||
THPUtils_assert(THPVariable_Check(_var), "_register_hook_dict expected a variable");
|
||||
THPVariable *var = (THPVariable*)_var;
|
||||
|
||||
if (!self->output_backward_hooks)
|
||||
self->output_backward_hooks = new THPObjectPtr[self->num_inputs];
|
||||
Py_INCREF(var->backward_hooks);
|
||||
self->output_backward_hooks[var->output_nr] = var->backward_hooks;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
PyObject *THPFunction_saved_tensors(THPFunction *self, void *_unused)
|
||||
{
|
||||
THPUtils_assert(!self->has_freed_buffers, "Trying to backward through the "
|
||||
"graph second time, but the buffers have already been freed. Please "
|
||||
"specify retain_variables=True when calling backward for the first time.");
|
||||
if (!self->saved_variables)
|
||||
return PyTuple_New(0);
|
||||
|
||||
int num_saved = self->saved_variables->size();
|
||||
THPObjectPtr saved_tensors = PyTuple_New(num_saved);
|
||||
if (!saved_tensors)
|
||||
return NULL;
|
||||
for (int i = 0; i < num_saved; i++) {
|
||||
saved_var_info_type &tuple = (*self->saved_variables)[i];
|
||||
PyObject *tensor = std::get<0>(tuple);
|
||||
if (tensor != Py_None) {
|
||||
int expected_version = std::get<1>(tuple);
|
||||
int current_version = **(std::get<2>(tuple));
|
||||
THPUtils_assert(expected_version == current_version, "one of the variables "
|
||||
"needed for gradient computation has been modified by an "
|
||||
"inplace operation");
|
||||
}
|
||||
Py_INCREF(tensor);
|
||||
PyTuple_SET_ITEM(saved_tensors.get(), i, tensor);
|
||||
}
|
||||
return saved_tensors.release();
|
||||
}
|
||||
|
||||
PyObject *THPFunction_previous_functions(THPFunction *self, void *_unused)
|
||||
{
|
||||
THPObjectPtr previous_functions = PyTuple_New(self->num_inputs);
|
||||
if (!previous_functions)
|
||||
return NULL;
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
THPObjectPtr fn_tuple = PyTuple_New(2);
|
||||
if (!fn_tuple)
|
||||
return NULL;
|
||||
Py_INCREF(self->previous_functions[i].get());
|
||||
PyTuple_SET_ITEM(fn_tuple.get(), 0, self->previous_functions[i].get());
|
||||
PyTuple_SET_ITEM(fn_tuple.get(), 1, PyInt_FromLong(self->previous_functions[i].output_nr));
|
||||
PyTuple_SET_ITEM(previous_functions.get(), i, fn_tuple.release());
|
||||
}
|
||||
return previous_functions.release();
|
||||
}
|
||||
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
static struct PyGetSetDef THPFunction_properties[] = {
|
||||
{"saved_tensors", (getter)THPFunction_saved_tensors, NULL, NULL, NULL},
|
||||
{"previous_functions", (getter)THPFunction_previous_functions, NULL, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMemberDef THPFunction_members[] = {
|
||||
{(char*)"_backward_hooks", T_OBJECT, offsetof(THPFunction, backward_hooks), 0, NULL},
|
||||
{(char*)"to_save", T_OBJECT, offsetof(THPFunction, to_save), 0, NULL},
|
||||
{(char*)"shared_pairs", T_OBJECT, offsetof(THPFunction, shared_pairs), 0, NULL},
|
||||
{(char*)"non_differentiable", T_OBJECT, offsetof(THPFunction, non_differentiable), 0, NULL},
|
||||
{(char*)"dirty_tensors", T_OBJECT, offsetof(THPFunction, dirty_tensors), 0, NULL},
|
||||
{(char*)"needs_input_grad", T_OBJECT, offsetof(THPFunction, needs_input_grad), 0, NULL},
|
||||
{(char*)"requires_grad", T_BOOL, offsetof(THPFunction, requires_grad), 0, NULL},
|
||||
{(char*)"num_inputs", T_INT, offsetof(THPFunction, num_inputs), 0, NULL},
|
||||
{(char*)"num_outputs", T_INT, offsetof(THPFunction, num_outputs), 0, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMethodDef THPFunction_methods[] = {
|
||||
{(char*)"_do_forward", (PyCFunction)THPFunction_do_forward, METH_VARARGS, NULL},
|
||||
{(char*)"_do_backward", (PyCFunction)THPFunction_do_backward, METH_VARARGS, NULL},
|
||||
{(char*)"_register_hook_dict", (PyCFunction)THPFunction__register_hook_dict, METH_O, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
PyTypeObject THPFunctionType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._FunctionBase", /* tp_name */
|
||||
sizeof(THPFunction), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPFunction_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPFunction_traverse, /* tp_traverse */
|
||||
(inquiry)THPFunction_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPFunction_methods, /* tp_methods */
|
||||
THPFunction_members, /* tp_members */
|
||||
THPFunction_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPFunction_new /* tp_new */
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPFunctionType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPFunctionType);
|
||||
PyModule_AddObject(module, "_FunctionBase", (PyObject *)&THPFunctionType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
@ -1,61 +1,73 @@
|
||||
#ifndef THP_FUNCTION_H
|
||||
#define THP_FUNCTION_H
|
||||
#pragma once
|
||||
|
||||
struct THPFunction;
|
||||
// Function is an abstract class that represents a single operation from one or
|
||||
// more variables to one more or varaibles.
|
||||
//
|
||||
// Subclasses may represent "forward" or "backward" operations (i.e functions
|
||||
// and their derivatives). Some functions may be used as both.
|
||||
|
||||
struct THPFunctionPtr: public THPObjectPtr {
|
||||
THPFunctionPtr(): THPObjectPtr(nullptr), output_nr(-1) {};
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
#include <vector>
|
||||
|
||||
THPFunctionPtr(PyObject *fn, int output_nr):
|
||||
THPObjectPtr(fn), output_nr(output_nr) {};
|
||||
#include "torch/csrc/autograd/saved_variable.h"
|
||||
|
||||
THPFunctionPtr(THPFunction *fn, int output_nr):
|
||||
THPObjectPtr((PyObject*)fn), output_nr(output_nr) {};
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
THPFunctionPtr(THPFunctionPtr &&other):
|
||||
THPObjectPtr(std::move(other)), output_nr(other.output_nr) {}
|
||||
struct Function;
|
||||
struct Variable;
|
||||
|
||||
THPPointer& operator =(THPFunctionPtr &&other) {
|
||||
output_nr = other.output_nr;
|
||||
THPObjectPtr::operator=(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
using tensor_list = std::vector<std::unique_ptr<thpp::Tensor>>;
|
||||
using variable_list = std::vector<std::shared_ptr<Variable>>;
|
||||
using function_list = std::vector<std::pair<std::shared_ptr<Function>, int>>;
|
||||
|
||||
int output_nr;
|
||||
// State used to create "backward" functions
|
||||
struct FunctionFlags {
|
||||
bool requires_grad;
|
||||
bool is_volatile;
|
||||
function_list previous_functions;
|
||||
};
|
||||
|
||||
// (class, gpu id, sizes)
|
||||
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
|
||||
// (tensor, version when saved, version counter)
|
||||
// or
|
||||
// (None, 0, nullptr)
|
||||
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<THPVariableVersion>>;
|
||||
struct Function {
|
||||
Function()
|
||||
: num_outputs(0)
|
||||
, previous_functions()
|
||||
, requires_grad(false)
|
||||
, is_volatile(false)
|
||||
, is_stochastic(false)
|
||||
{}
|
||||
|
||||
struct THPFunction {
|
||||
PyObject_HEAD
|
||||
Function(FunctionFlags flags)
|
||||
: num_outputs(0)
|
||||
, previous_functions(std::move(flags.previous_functions))
|
||||
, requires_grad(flags.requires_grad)
|
||||
, is_volatile(flags.is_volatile)
|
||||
, is_stochastic(false)
|
||||
{}
|
||||
|
||||
PyObject *needs_input_grad;
|
||||
PyObject *backward_hooks;
|
||||
THPObjectPtr *output_backward_hooks;
|
||||
Function(const Function& other) = delete;
|
||||
Function(Function&& other) = delete;
|
||||
virtual ~Function() {}
|
||||
|
||||
PyObject *to_save;
|
||||
PyObject *shared_pairs;
|
||||
PyObject *non_differentiable;
|
||||
PyObject *dirty_tensors;
|
||||
// Implements the operation
|
||||
virtual variable_list apply(const variable_list& inputs) = 0;
|
||||
|
||||
THPFunctionPtr *previous_functions;
|
||||
std::vector<output_info_type> *output_info;
|
||||
std::vector<saved_var_info_type> *saved_variables;
|
||||
int num_inputs;
|
||||
int num_outputs;
|
||||
char requires_grad;
|
||||
char has_freed_buffers;
|
||||
// Computes requires_grad, is_volatile, and previous_functions from a list
|
||||
// of input variables
|
||||
static FunctionFlags flags(const variable_list& inputs);
|
||||
|
||||
// Releases saved variables if the operation won't be reused
|
||||
virtual inline void releaseVariables() {}
|
||||
|
||||
// These variables are usually only meaningful for "backward" functions.
|
||||
// num_outputs is the number of outputs of corresponding "forward" function;
|
||||
// it's actually the number of inputs of this function.
|
||||
int num_outputs;
|
||||
function_list previous_functions;
|
||||
bool requires_grad;
|
||||
bool is_volatile;
|
||||
bool is_stochastic;
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module);
|
||||
extern PyObject *THPFunctionClass;
|
||||
extern PyObject *THPStochasticFunctionClass;
|
||||
|
||||
#define THPFunction_Check(obj) PyObject_IsInstance(obj, THPFunctionClass)
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
166
torch/csrc/autograd/functions/batch_normalization.cpp
Normal file
166
torch/csrc/autograd/functions/batch_normalization.cpp
Normal file
@ -0,0 +1,166 @@
|
||||
#include "batch_normalization.h"
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
#include "torch/csrc/nn/THNN_generic.h"
|
||||
|
||||
#ifdef WITH_CUDNN
|
||||
#include "torch/csrc/cudnn/BatchNorm.h"
|
||||
#include "torch/csrc/cudnn/Handles.h"
|
||||
#include "torch/csrc/cudnn/Types.h"
|
||||
extern THCState* state;
|
||||
#endif
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
using thpp::Tensor;
|
||||
|
||||
auto BatchNormForward::apply(const variable_list& inputs) -> variable_list {
|
||||
if (inputs.size() != 3) throw std::runtime_error("expected three inputs");
|
||||
|
||||
auto& input = inputs[0];
|
||||
auto& weight = inputs[1];
|
||||
auto& bias = inputs[2];
|
||||
|
||||
bool use_cudnn = false;
|
||||
#ifdef WITH_CUDNN
|
||||
use_cudnn = (input->data->isCuda()
|
||||
&& input->data->type() != thpp::Type::HALF
|
||||
&& weight && bias);
|
||||
#endif
|
||||
|
||||
auto output = input->data->newTensor();
|
||||
output->resizeAs(*input->data);
|
||||
|
||||
std::unique_ptr<Tensor> save_mean(output->newTensor());
|
||||
save_mean->resizeAs(*running_mean);
|
||||
std::unique_ptr<Tensor> save_std(output->newTensor());
|
||||
save_std->resizeAs(*running_var);
|
||||
|
||||
if (use_cudnn) {
|
||||
#ifdef WITH_CUDNN
|
||||
torch::cudnn::cudnn_batch_norm_forward(
|
||||
state,
|
||||
torch::cudnn::getCudnnHandle(),
|
||||
torch::cudnn::getCudnnDataType(*input->data),
|
||||
(THVoidTensor*)input->data->cdata(),
|
||||
(THVoidTensor*)output->cdata(),
|
||||
(THVoidTensor*)weight->data->cdata(),
|
||||
(THVoidTensor*)bias->data->cdata(),
|
||||
(THVoidTensor*)running_mean->cdata(),
|
||||
(THVoidTensor*)running_var->cdata(),
|
||||
(THVoidTensor*)save_mean->cdata(),
|
||||
(THVoidTensor*)save_std->cdata(),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
#endif
|
||||
} else {
|
||||
torch::nn::BatchNormalization_updateOutput(
|
||||
input->data.get(),
|
||||
output.get(),
|
||||
weight ? weight->data.get() : nullptr,
|
||||
bias ? bias->data.get() : nullptr,
|
||||
running_mean.get(),
|
||||
running_var.get(),
|
||||
save_mean.get(),
|
||||
save_std.get(),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
}
|
||||
|
||||
auto creator = std::make_shared<BatchNormBackward>(
|
||||
flags(inputs),
|
||||
std::unique_ptr<thpp::Tensor>(running_mean->clone_shallow()),
|
||||
std::unique_ptr<thpp::Tensor>(running_var->clone_shallow()),
|
||||
std::move(save_mean),
|
||||
std::move(save_std),
|
||||
input->save(),
|
||||
Variable::save_opt(weight.get()),
|
||||
Variable::save_opt(bias.get()),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
variable_list results(1);
|
||||
results[0] = std::make_shared<Variable>(std::move(output), creator);
|
||||
return results;
|
||||
};
|
||||
|
||||
auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_list {
|
||||
auto& input = this->input.unpack();
|
||||
auto& weight = this->weight.unpack();
|
||||
auto& bias = this->bias.unpack();
|
||||
|
||||
bool use_cudnn = false;
|
||||
#ifdef WITH_CUDNN
|
||||
use_cudnn = (input->isCuda()
|
||||
&& input->type() != thpp::Type::HALF
|
||||
&& weight && bias && training);
|
||||
#endif
|
||||
|
||||
std::unique_ptr<Tensor> grad_input = input->newTensor();
|
||||
grad_input->resizeAs(*input);
|
||||
|
||||
std::unique_ptr<Tensor> grad_weight;
|
||||
if (weight) {
|
||||
grad_weight = weight->newTensor();
|
||||
grad_weight->resizeAs(*weight);
|
||||
if (!use_cudnn) {
|
||||
grad_weight->zero();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Tensor> grad_bias;
|
||||
if (bias) {
|
||||
grad_bias = bias->newTensor();
|
||||
grad_bias->resizeAs(*bias);
|
||||
grad_bias->zero();
|
||||
if (!use_cudnn) {
|
||||
grad_bias->zero();
|
||||
}
|
||||
}
|
||||
|
||||
if (use_cudnn) {
|
||||
#ifdef WITH_CUDNN
|
||||
torch::cudnn::cudnn_batch_norm_backward(
|
||||
state,
|
||||
torch::cudnn::getCudnnHandle(),
|
||||
torch::cudnn::getCudnnDataType(*input),
|
||||
(THVoidTensor*)input->cdata(),
|
||||
(THVoidTensor*)grad_outputs[0]->data->cdata(),
|
||||
(THVoidTensor*)grad_input->cdata(),
|
||||
(THVoidTensor*)grad_weight->cdata(),
|
||||
(THVoidTensor*)grad_bias->cdata(),
|
||||
(THVoidTensor*)weight->cdata(),
|
||||
(THVoidTensor*)running_mean->cdata(),
|
||||
(THVoidTensor*)running_var->cdata(),
|
||||
(THVoidTensor*)save_mean->cdata(),
|
||||
(THVoidTensor*)save_std->cdata(),
|
||||
training,
|
||||
eps);
|
||||
#endif
|
||||
} else {
|
||||
torch::nn::BatchNormalization_backward(
|
||||
input.get(),
|
||||
grad_outputs[0]->data.get(),
|
||||
grad_input.get(),
|
||||
grad_weight.get(),
|
||||
grad_bias.get(),
|
||||
weight.get(),
|
||||
running_mean.get(),
|
||||
running_var.get(),
|
||||
save_mean.get(),
|
||||
save_std.get(),
|
||||
training,
|
||||
1.0,
|
||||
eps);
|
||||
}
|
||||
|
||||
variable_list results(3);
|
||||
results[0] = Variable::of(std::move(grad_input));
|
||||
results[1] = Variable::of(std::move(grad_weight));
|
||||
results[2] = Variable::of(std::move(grad_bias));
|
||||
return results;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
72
torch/csrc/autograd/functions/batch_normalization.h
Normal file
72
torch/csrc/autograd/functions/batch_normalization.h
Normal file
@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct BatchNormForward : public Function {
|
||||
BatchNormForward(
|
||||
std::unique_ptr<thpp::Tensor> running_mean,
|
||||
std::unique_ptr<thpp::Tensor> running_var,
|
||||
bool training,
|
||||
double momentum,
|
||||
double eps)
|
||||
: running_mean(std::move(running_mean))
|
||||
, running_var(std::move(running_var))
|
||||
, training(training)
|
||||
, momentum(momentum)
|
||||
, eps(eps) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& inputs) override;
|
||||
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
bool training;
|
||||
double momentum;
|
||||
double eps;
|
||||
};
|
||||
|
||||
struct BatchNormBackward : public Function {
|
||||
BatchNormBackward(
|
||||
FunctionFlags flags,
|
||||
std::unique_ptr<thpp::Tensor> running_mean,
|
||||
std::unique_ptr<thpp::Tensor> running_var,
|
||||
std::unique_ptr<thpp::Tensor> save_mean,
|
||||
std::unique_ptr<thpp::Tensor> save_std,
|
||||
SavedVariable input,
|
||||
SavedVariable weight,
|
||||
SavedVariable bias,
|
||||
bool training,
|
||||
double momentum,
|
||||
double eps)
|
||||
: Function(std::move(flags))
|
||||
, running_mean(std::move(running_mean))
|
||||
, running_var(std::move(running_var))
|
||||
, save_mean(std::move(save_mean))
|
||||
, save_std(std::move(save_std))
|
||||
, input(std::move(input))
|
||||
, weight(std::move(weight))
|
||||
, bias(std::move(bias))
|
||||
, training(training)
|
||||
, momentum(momentum)
|
||||
, eps(eps) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& gradOutputs) override;
|
||||
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
std::unique_ptr<thpp::Tensor> save_mean;
|
||||
std::unique_ptr<thpp::Tensor> save_std;
|
||||
SavedVariable input;
|
||||
SavedVariable weight;
|
||||
SavedVariable bias;
|
||||
bool training;
|
||||
double momentum;
|
||||
double eps;
|
||||
};
|
||||
|
||||
}}
|
55
torch/csrc/autograd/functions/init.cpp
Normal file
55
torch/csrc/autograd/functions/init.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include <Python.h>
|
||||
#include "batch_normalization.h"
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
static PyTypeObject BatchNormClass;
|
||||
static PyTypeObject BatchNormBackwardClass;
|
||||
|
||||
struct BatchNormCtor {
|
||||
BatchNormForward* operator()(PyObject* args) {
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
char training;
|
||||
double momentum;
|
||||
double eps;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O&O&Bdd:BatchNorm",
|
||||
TensorConverter, &running_mean,
|
||||
TensorConverter, &running_var,
|
||||
&training, &momentum, &eps)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return new BatchNormForward(
|
||||
std::move(running_mean),
|
||||
std::move(running_var),
|
||||
(bool)training,
|
||||
momentum,
|
||||
eps);
|
||||
}
|
||||
};
|
||||
|
||||
struct NoCtor {
|
||||
Function* operator()(PyObject* args) {
|
||||
throw std::runtime_error("Cannot construct");
|
||||
}
|
||||
};
|
||||
|
||||
template<typename C, typename T>
|
||||
static void addClass(PyObject* module, PyTypeObject& type, const char* name)
|
||||
{
|
||||
createForwardFunctionPyTypeObject<T>(type, name);
|
||||
Py_INCREF(&type);
|
||||
PyModule_AddObject(module, name, (PyObject*)&type);
|
||||
registerCppFunction(typeid(C), &type);
|
||||
}
|
||||
|
||||
bool THPAutograd_initFunctions(PyObject* _unused)
|
||||
{
|
||||
THPObjectPtr module = PyImport_ImportModule("torch.nn._functions.thnn");
|
||||
addClass<BatchNormForward, BatchNormCtor>(module, BatchNormClass, "BatchNorm");
|
||||
addClass<BatchNormBackward, NoCtor>(module, BatchNormBackwardClass, "BatchNormBackward");
|
||||
return true;
|
||||
}
|
46
torch/csrc/autograd/grad_buffer.cpp
Normal file
46
torch/csrc/autograd/grad_buffer.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
#include "torch/csrc/autograd/grad_buffer.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#endif
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
GradBuffer::GradBuffer(size_t size)
|
||||
: buffer(size)
|
||||
{}
|
||||
|
||||
auto GradBuffer::addGrad(size_t pos, std::shared_ptr<Variable>&& var) -> void {
|
||||
auto& item = buffer[pos];
|
||||
if (!var) {
|
||||
return;
|
||||
}
|
||||
auto& tensor = var->data;
|
||||
if (!item.first) {
|
||||
buffer[pos] = std::make_pair<>(std::move(tensor), true);
|
||||
} else {
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU auto_gpu(tensor->getDevice());
|
||||
#endif
|
||||
if (item.second) {
|
||||
item.first.reset(item.first->clone());
|
||||
item.second = false;
|
||||
}
|
||||
item.first->cadd(*item.first, *tensor);
|
||||
}
|
||||
}
|
||||
|
||||
auto GradBuffer::variables(GradBuffer&& g) -> std::vector<std::shared_ptr<Variable>> {
|
||||
auto buffer = std::move(g.buffer);
|
||||
int size = buffer.size();
|
||||
std::vector<std::shared_ptr<Variable>> result(size);
|
||||
for (int i = 0; i != size; ++i) {
|
||||
if (buffer[i].first) {
|
||||
result[i] = std::make_shared<Variable>(
|
||||
std::move(buffer[i].first), false, true);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}} // namespace torch::autograd
|
31
torch/csrc/autograd/grad_buffer.h
Normal file
31
torch/csrc/autograd/grad_buffer.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
// The GradBuffer class accumulates a list of gradients for use by a
|
||||
// "backward" function. It implements logic to avoid modiyfing the passed
|
||||
// gradients in-place
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct GradBuffer {
|
||||
explicit GradBuffer(size_t size);
|
||||
GradBuffer(const GradBuffer& other) = delete;
|
||||
GradBuffer(GradBuffer&& other) = default;
|
||||
|
||||
// Accumulates the gradient "var" at the specified index
|
||||
void addGrad(size_t idx, std::shared_ptr<Variable>&& var);
|
||||
|
||||
// Returns the gradients as a list of variables. Destroys this GradBuffer.
|
||||
static std::vector<std::shared_ptr<Variable>> variables(GradBuffer&& buffer);
|
||||
|
||||
private:
|
||||
std::vector<std::pair<std::unique_ptr<thpp::Tensor>, bool>> buffer;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
133
torch/csrc/autograd/python_cpp_function.cpp
Normal file
133
torch/csrc/autograd/python_cpp_function.cpp
Normal file
@ -0,0 +1,133 @@
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
#include <stdio.h>
|
||||
#include <THPP/THPP.h>
|
||||
#include <typeindex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "torch/csrc/autograd/python_function.h"
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
#include "torch/csrc/Exceptions.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
namespace {
|
||||
|
||||
PyObject* THPCppFunction_call(PyObject* self, PyObject* args, PyObject *kwargs)
|
||||
{
|
||||
if (kwargs && PyDict_Size(kwargs) != 0) {
|
||||
return PyErr_Format(PyExc_TypeError, "keyword arguments are not supported");
|
||||
}
|
||||
|
||||
int num_inputs = PyTuple_GET_SIZE(args);
|
||||
variable_list vars(num_inputs);
|
||||
for (int i = 0; i != num_inputs; ++i) {
|
||||
PyObject* arg = PyTuple_GET_ITEM(args, i);
|
||||
if (arg == Py_None) {
|
||||
continue;
|
||||
}
|
||||
if (!THPVariable_Check(arg)) {
|
||||
return PyErr_Format(PyExc_TypeError, "argument %d is not a Variable", i);
|
||||
}
|
||||
vars[i] = ((THPVariable*)arg)->cdata;
|
||||
}
|
||||
|
||||
variable_list output;
|
||||
|
||||
HANDLE_TH_ERRORS {
|
||||
AutoNoGIL nogil;
|
||||
output = ((THPCppFunction*)self)->cdata->apply(vars);
|
||||
}
|
||||
END_HANDLE_TH_ERRORS
|
||||
|
||||
int num_outputs = output.size();
|
||||
if (num_outputs == 1) {
|
||||
// assume we want to unpack one element tuples for now
|
||||
return THPVariable_Wrap(output[0]);
|
||||
}
|
||||
|
||||
THPObjectPtr tuple = PyTuple_New(num_outputs);
|
||||
for (int i = 0; i != num_outputs; ++i) {
|
||||
PyTuple_SET_ITEM(tuple.get(), i, THPVariable_Wrap(output[i]));
|
||||
}
|
||||
return tuple.release();
|
||||
}
|
||||
|
||||
void THPCppFunction_dealloc(PyObject* self)
|
||||
{
|
||||
((THPCppFunction*)self)->cdata.~shared_ptr();
|
||||
Py_TYPE(self)->tp_free(self);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address)
|
||||
{
|
||||
try {
|
||||
*address = createTensor(obj);
|
||||
} catch (std::exception& e) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"expected a tensor, got %s", Py_TYPE(obj)->tp_name);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name)
|
||||
{
|
||||
type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||||
type.tp_name = name;
|
||||
type.tp_basicsize = sizeof(THPCppFunction);
|
||||
type.tp_call = THPCppFunction_call;
|
||||
type.tp_dealloc = THPCppFunction_dealloc;
|
||||
if (PyType_Ready(&type) < 0) {
|
||||
auto msg = std::string("Unable to instantiate PyTypeObject for ") + name;
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
return &type;
|
||||
}
|
||||
|
||||
static std::unordered_map<std::type_index, THPObjectPtr> cpp_function_types;
|
||||
|
||||
PyObject* functionToPyObject(std::shared_ptr<Function> cdata)
|
||||
{
|
||||
if (auto pfw = dynamic_cast<PyFunction*>(cdata.get())) {
|
||||
PyObject* obj = pfw->obj;
|
||||
Py_INCREF(obj);
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (auto var = std::dynamic_pointer_cast<Variable>(cdata)) {
|
||||
return THPVariable_Wrap(var);
|
||||
}
|
||||
|
||||
auto it = cpp_function_types.find(std::type_index(typeid(*cdata)));
|
||||
if (it == cpp_function_types.end()) {
|
||||
return PyErr_Format(PyExc_TypeError,
|
||||
"Don't know how to create Python object for %s", typeid(*cdata).name());
|
||||
}
|
||||
|
||||
PyTypeObject* type = (PyTypeObject*)it->second.get();
|
||||
THPObjectPtr obj = type->tp_alloc(type, 0);
|
||||
if (!obj) return NULL;
|
||||
THPCppFunction* f = (THPCppFunction*)obj.get();
|
||||
new (&f->cdata) std::shared_ptr<Function>(cdata);
|
||||
if (!f->cdata) {
|
||||
return NULL;
|
||||
}
|
||||
return obj.release();
|
||||
}
|
||||
|
||||
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype)
|
||||
{
|
||||
Py_INCREF((PyObject*)pytype);
|
||||
cpp_function_types[std::type_index(type)] = THPObjectPtr((PyObject*)pytype);
|
||||
}
|
||||
|
||||
}} // namespace torch::autograd
|
45
torch/csrc/autograd/python_cpp_function.h
Normal file
45
torch/csrc/autograd/python_cpp_function.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/utils/object_ptr.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct THPCppFunction {
|
||||
PyObject_HEAD
|
||||
std::shared_ptr<Function> cdata;
|
||||
};
|
||||
|
||||
template<typename Ctor>
|
||||
PyObject* CppFunction_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
THPObjectPtr obj = type->tp_alloc(type, 0);
|
||||
if (!obj) return NULL;
|
||||
THPCppFunction* f = (THPCppFunction*)obj.get();
|
||||
new (&f->cdata) std::shared_ptr<Function>(Ctor()(args));
|
||||
if (!f->cdata) {
|
||||
return NULL;
|
||||
}
|
||||
return obj.release();
|
||||
}
|
||||
|
||||
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name);
|
||||
|
||||
template<typename Ctor>
|
||||
PyTypeObject* createForwardFunctionPyTypeObject(PyTypeObject& type, const char* name)
|
||||
{
|
||||
type.tp_new = &CppFunction_pynew<Ctor>;
|
||||
return _initFunctionPyTypeObject(type, name);
|
||||
}
|
||||
|
||||
// conversion utilities for PyArg_ParseTuple
|
||||
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address);
|
||||
|
||||
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype);
|
||||
PyObject* functionToPyObject(std::shared_ptr<Function> cdata);
|
||||
|
||||
}} // namespace torch::autograd
|
129
torch/csrc/autograd/python_engine.cpp
Normal file
129
torch/csrc/autograd/python_engine.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
#include "torch/csrc/autograd/python_engine.h"
|
||||
|
||||
#include "torch/csrc/autograd/engine.h"
|
||||
#include "torch/csrc/THP.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
struct THPEngine {
|
||||
PyObject_HEAD
|
||||
};
|
||||
|
||||
PyObject *THPEngineClass = NULL;
|
||||
|
||||
// Main backward function
|
||||
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *variables = NULL;
|
||||
PyObject *grad_variables = NULL;
|
||||
unsigned char retain_variables = 0;
|
||||
const char *accepted_kwargs[] = {"variables", "grad_variables",
|
||||
"retain_variables", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
|
||||
&variables, &grad_variables, &retain_variables))
|
||||
return NULL;
|
||||
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
|
||||
|
||||
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
|
||||
"retain_variables argument is expected to be a bool, but got %s",
|
||||
THPUtils_typename(retain_variables_obj));
|
||||
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
|
||||
"be a tuple, but got %s", THPUtils_typename(variables));
|
||||
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
|
||||
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
|
||||
|
||||
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
|
||||
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
|
||||
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
|
||||
"gradients", num_variables, num_gradients);
|
||||
|
||||
variable_list vars(num_variables);
|
||||
tensor_list grads(num_variables);
|
||||
for (int i = 0; i < num_variables; i++) {
|
||||
PyObject *variable = PyTuple_GET_ITEM(variables, i);
|
||||
THPUtils_assert(THPVariable_Check(variable), "element %d of variables "
|
||||
"tuple is not a Variable", i);
|
||||
vars[i] = ((THPVariable*)variable)->cdata;
|
||||
|
||||
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
|
||||
if (THPModule_isTensor(grad)) {
|
||||
grads[i] = torch::createTensor(grad);
|
||||
} else {
|
||||
THPUtils_assert(grad == Py_None,
|
||||
"element %d of gradients tuple is not a Tensor or None", i);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
Engine::backward(vars, grads, retain_variables);
|
||||
} catch (python_error &e) {
|
||||
return nullptr;
|
||||
} catch (std::exception &e) {
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
return type->tp_alloc(type, 0);
|
||||
}
|
||||
|
||||
static struct PyMethodDef THPEngine_methods[] = {
|
||||
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPEngineType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._EngineBase", /* tp_name */
|
||||
sizeof(THPEngine), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
0, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPEngine_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPEngine_new /* tp_new */
|
||||
};
|
||||
|
||||
bool THPEngine_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPEngineType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPEngineType);
|
||||
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
|
||||
return true;
|
||||
}
|
5
torch/csrc/autograd/python_engine.h
Normal file
5
torch/csrc/autograd/python_engine.h
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
bool THPEngine_initModule(PyObject *module);
|
1084
torch/csrc/autograd/python_function.cpp
Normal file
1084
torch/csrc/autograd/python_function.cpp
Normal file
File diff suppressed because it is too large
Load Diff
59
torch/csrc/autograd/python_function.h
Normal file
59
torch/csrc/autograd/python_function.h
Normal file
@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
#include "torch/csrc/utils/object_ptr.h"
|
||||
|
||||
// (class, gpu id, sizes)
|
||||
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
|
||||
// (tensor, version when saved, version counter)
|
||||
// or
|
||||
// (None, 0, nullptr)
|
||||
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<torch::autograd::VariableVersion>>;
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct PyFunction : public Function {
|
||||
PyFunction(PyObject* obj) : obj(obj) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& inputs) override;
|
||||
virtual void releaseVariables() override;
|
||||
|
||||
PyObject* obj;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
|
||||
struct THPFunction {
|
||||
PyObject_HEAD
|
||||
|
||||
PyObject *needs_input_grad;
|
||||
PyObject *backward_hooks;
|
||||
THPObjectPtr *output_backward_hooks;
|
||||
|
||||
PyObject *to_save;
|
||||
PyObject *shared_pairs;
|
||||
PyObject *non_differentiable;
|
||||
PyObject *dirty_tensors;
|
||||
|
||||
std::vector<output_info_type> *output_info;
|
||||
std::vector<saved_var_info_type> *saved_variables;
|
||||
int num_inputs;
|
||||
char has_freed_buffers;
|
||||
|
||||
torch::autograd::PyFunction cdata;
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module);
|
||||
extern PyObject *THPFunctionClass;
|
||||
extern PyObject *THPStochasticFunctionClass;
|
||||
|
||||
std::shared_ptr<torch::autograd::PyFunction> THPFunction_asFunction(THPFunction* self);
|
||||
|
||||
inline bool THPFunction_Check(PyObject* obj) {
|
||||
return PyObject_IsInstance(obj, THPFunctionClass);
|
||||
}
|
404
torch/csrc/autograd/python_variable.cpp
Normal file
404
torch/csrc/autograd/python_variable.cpp
Normal file
@ -0,0 +1,404 @@
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
|
||||
#include <structmember.h>
|
||||
|
||||
#include "THP.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
#include "torch/csrc/Types.h"
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
#include "torch/csrc/Exceptions.h"
|
||||
#include <THPP/tensors/THTensor.hpp>
|
||||
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
PyObject *THPVariableClass = NULL;
|
||||
|
||||
static PyObject* THPVariable_NewWithVar(PyTypeObject* type, std::shared_ptr<Variable> var)
|
||||
{
|
||||
PyObject* obj = type->tp_alloc(type, 0);
|
||||
if (obj) {
|
||||
auto v = (THPVariable*) obj;
|
||||
new (&v->cdata) std::shared_ptr<Variable>(std::move(var));
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
PyObject * THPVariable_Wrap(const std::shared_ptr<Variable>& var)
|
||||
{
|
||||
if (var->pyobj) {
|
||||
Py_INCREF(var->pyobj);
|
||||
} else {
|
||||
var->pyobj = THPVariable_NewWithVar((PyTypeObject *)THPVariableClass, var);
|
||||
}
|
||||
return var->pyobj;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data and creator
|
||||
// To create a leaf Variable pass NULL as creator.
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile)
|
||||
{
|
||||
THPUtils_assert(THPModule_isTensor(data), "data must be a Tensor");
|
||||
THPUtils_assert(!creator || THPFunction_Check(creator), "creator must be a Function");
|
||||
auto v = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
|
||||
PyObject* obj = THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, v);
|
||||
if (obj) {
|
||||
v->pyobj = obj;
|
||||
v->creator = THPFunction_asFunction((THPFunction*)creator);
|
||||
((THPVariable*)obj)->data = data;
|
||||
Py_INCREF(data);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data)
|
||||
{
|
||||
return THPVariable_New(data, nullptr, false, true);
|
||||
}
|
||||
|
||||
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->data);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPVariable_clear(THPVariable *self)
|
||||
{
|
||||
Py_CLEAR(self->data);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPVariable_dealloc(THPVariable* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
Py_XDECREF(self->data);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
self->cdata->pyobj = nullptr;
|
||||
self->cdata.~shared_ptr<Variable>();
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
THPObjectPtr _data;
|
||||
PyObject *data = NULL;
|
||||
PyObject *creator = NULL;
|
||||
char is_volatile = 0;
|
||||
char requires_grad = 0;
|
||||
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OObb", (char**)accepted_args,
|
||||
&data, &creator, &is_volatile, &requires_grad))
|
||||
return NULL;
|
||||
|
||||
if (creator == Py_None)
|
||||
creator = NULL;
|
||||
|
||||
if (data == NULL || data == Py_None) {
|
||||
// For legacy serialization code, create an empty tensor temporarily.
|
||||
thpp::THTensor<float> tensor;
|
||||
_data = torch::createPyObject(tensor);
|
||||
data = _data.get();
|
||||
}
|
||||
|
||||
THPUtils_assert(!(is_volatile && requires_grad),
|
||||
"Variable can't be volatile and require_grad at the same time!");
|
||||
THPUtils_assert(!creator || THPFunction_Check(creator),
|
||||
"Variable creator has to be a Function object or None, but got %s",
|
||||
THPUtils_typename(creator));
|
||||
THPUtils_assert(THPModule_isTensor(data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(data));
|
||||
|
||||
auto var = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
|
||||
PyObject* self = THPVariable_NewWithVar(type, var);
|
||||
if (self) {
|
||||
var->pyobj = self;
|
||||
var->creator = THPFunction_asFunction((THPFunction*)creator);
|
||||
((THPVariable*)self)->cdata = var;
|
||||
((THPVariable*)self)->data = data;
|
||||
Py_INCREF(data);
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
int THPVariable_pyinit(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
// Ensures that calls to Variable() and subclasses contain data argument.
|
||||
// The 'data' argument is optional in __new__ to handle legacy serialized
|
||||
// Variables.
|
||||
PyObject *data;
|
||||
PyObject *creator = NULL;
|
||||
char is_volatile = 0;
|
||||
char requires_grad = 0;
|
||||
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Obb", (char**)accepted_args,
|
||||
&data, &creator, &is_volatile, &requires_grad))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
PyObject *THPVariable_get_version(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyInt_FromLong(**var.version_counter);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_creator(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.creator) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return functionToPyObject(var.creator);
|
||||
}
|
||||
|
||||
int THPVariable_set_creator(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, obj == Py_None, "_creator can be only set to None");
|
||||
self->cdata->creator = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject * THPVariable_get_data(THPVariable *self)
|
||||
{
|
||||
if (!self->data) {
|
||||
auto& var = *self->cdata;
|
||||
PyTypeObject* type = torch::getPyTypeObject(*var.data);
|
||||
self->data = type->tp_alloc(type, 0);
|
||||
if (self->data) {
|
||||
((torch::THPVoidTensor*)self->data)->cdata =
|
||||
(torch::THVoidTensor *)var.data->retain().cdata();
|
||||
}
|
||||
}
|
||||
Py_INCREF(self->data);
|
||||
return self->data;
|
||||
}
|
||||
|
||||
int THPVariable_set_data(THPVariable *self, PyObject *data)
|
||||
{
|
||||
THPUtils_assertRet(-1, THPModule_isTensor(data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(data));
|
||||
Py_INCREF(data);
|
||||
Py_XDECREF(self->data);
|
||||
self->data = data;
|
||||
auto& var = *self->cdata;
|
||||
auto tensor = torch::createTensor(data);
|
||||
var.data.swap(tensor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_raw_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.grad) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return THPVariable_Wrap(var.grad);
|
||||
}
|
||||
|
||||
int THPVariable_set_raw_grad(THPVariable *self, PyObject *data)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (data == Py_None) {
|
||||
var.grad.reset();
|
||||
return 0;
|
||||
}
|
||||
THPUtils_assertRet(-1, THPVariable_Check(data),
|
||||
"expected Variable or None (got %s)", THPUtils_typename(data));
|
||||
var.grad = ((THPVariable*)data)->cdata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.grad) {
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU __guard(var.data->getDevice());
|
||||
#endif
|
||||
auto grad = var.data->newTensor();
|
||||
grad->resizeAs(*var.data).zero();
|
||||
var.grad = std::make_shared<Variable>(std::move(grad), 0, 1);
|
||||
}
|
||||
return THPVariable_Wrap(var.grad);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_volatile(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyBool_FromLong(var.is_volatile);
|
||||
}
|
||||
|
||||
int THPVariable_set_volatile(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, PyBool_Check(obj), "volatile must be a bool");
|
||||
THPUtils_assertRet(-1, !self->cdata->creator,
|
||||
"volatile can only be set on leaf variables");
|
||||
auto& var = *self->cdata;
|
||||
var.is_volatile = (obj == Py_True);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_output_nr(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyInt_FromLong(var.output_nr);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_requires_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyBool_FromLong(var.requires_grad);
|
||||
}
|
||||
|
||||
int THPVariable_set_requires_grad(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, PyBool_Check(obj), "requires_grad must be a bool");
|
||||
auto& var = *self->cdata;
|
||||
if (var.creator) {
|
||||
const char *hint = "";
|
||||
if (obj == Py_False) {
|
||||
hint = " If you want to use a computed variable in a subgraph "
|
||||
"that doesn't require differentiation use "
|
||||
"var_no_grad = var.detach().";
|
||||
}
|
||||
THPUtils_setError("you can only change requires_grad flags of leaf variables.%s", hint);
|
||||
return -1;
|
||||
}
|
||||
var.requires_grad = (obj == Py_True);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct PyVariableHook : public VariableHook {
|
||||
PyVariableHook(PyObject* dict) : dict(dict) {
|
||||
Py_INCREF(dict);
|
||||
}
|
||||
~PyVariableHook() {
|
||||
AutoGIL gil;
|
||||
Py_DECREF(dict);
|
||||
}
|
||||
|
||||
std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& _grad) override {
|
||||
AutoGIL gil;
|
||||
|
||||
THPObjectPtr grad = THPVariable_Wrap(_grad);
|
||||
if (!grad) throw python_error();
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
while (PyDict_Next(dict, &pos, &key, &value)) {
|
||||
THPObjectPtr res = PyObject_CallFunctionObjArgs(value, grad.get(), nullptr);
|
||||
if (!res) throw python_error();
|
||||
if (res == Py_None) continue;
|
||||
if (!PyObject_IsInstance(res.get(), THPVariableClass)) {
|
||||
PyErr_Format(PyExc_TypeError, "expected Variable, but hook returned '%s'",
|
||||
THPUtils_typename(res.get()));
|
||||
throw python_error();
|
||||
}
|
||||
grad = std::move(res);
|
||||
}
|
||||
return ((THPVariable*)grad.get())->cdata;
|
||||
}
|
||||
|
||||
PyObject* dict;
|
||||
};
|
||||
|
||||
PyObject *THPVariable_get_backwards_hooks(THPVariable *self)
|
||||
{
|
||||
if (self->backward_hooks) {
|
||||
Py_INCREF(self->backward_hooks);
|
||||
return self->backward_hooks;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
int THPVariable_set_backwards_hooks(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
if (obj == Py_None) {
|
||||
obj = nullptr;
|
||||
}
|
||||
Py_XINCREF(obj);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
self->backward_hooks = obj;
|
||||
if (obj) {
|
||||
self->cdata->backward_hook.reset(new PyVariableHook(obj));
|
||||
} else {
|
||||
self->cdata->backward_hook.reset();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct PyGetSetDef THPVariable_properties[] = {
|
||||
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
|
||||
{"creator", (getter)THPVariable_get_creator, NULL, NULL, NULL},
|
||||
{"_creator", (getter)THPVariable_get_creator, (setter)THPVariable_set_creator, NULL, NULL},
|
||||
{"data", (getter)THPVariable_get_data, (setter)THPVariable_set_data, NULL, NULL},
|
||||
{"_grad", (getter)THPVariable_get_raw_grad, (setter)THPVariable_set_raw_grad, NULL, NULL},
|
||||
{"grad", (getter)THPVariable_get_grad, NULL, NULL, NULL},
|
||||
{"volatile", (getter)THPVariable_get_volatile, (setter)THPVariable_set_volatile, NULL, NULL},
|
||||
{"output_nr", (getter)THPVariable_get_output_nr, NULL, NULL, NULL},
|
||||
{"requires_grad", (getter)THPVariable_get_requires_grad, (setter)THPVariable_set_requires_grad, NULL, NULL},
|
||||
{"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
PyTypeObject THPVariableType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._VariableBase", /* tp_name */
|
||||
sizeof(THPVariable), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPVariable_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPVariable_traverse, /* tp_traverse */
|
||||
(inquiry)THPVariable_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
THPVariable_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
THPVariable_pyinit, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPVariable_pynew /* tp_new */
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPVariableType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPVariableType);
|
||||
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
|
||||
return true;
|
||||
}
|
25
torch/csrc/autograd/python_variable.h
Normal file
25
torch/csrc/autograd/python_variable.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
struct THPVariable {
|
||||
PyObject_HEAD
|
||||
std::shared_ptr<torch::autograd::Variable> cdata;
|
||||
PyObject* data;
|
||||
PyObject* backward_hooks;
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module);
|
||||
extern PyObject *THPVariableClass;
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data);
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile=false);
|
||||
PyObject * THPVariable_Wrap(const std::shared_ptr<torch::autograd::Variable>& var);
|
||||
PyObject * THPVariable_get_data(THPVariable *self);
|
||||
|
||||
inline bool THPVariable_Check(PyObject *obj)
|
||||
{
|
||||
return THPVariableClass && PyObject_IsInstance(obj, THPVariableClass);
|
||||
}
|
31
torch/csrc/autograd/saved_variable.h
Normal file
31
torch/csrc/autograd/saved_variable.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <THPP/THPP.h>
|
||||
#include <memory>
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct VariableVersion;
|
||||
|
||||
struct SavedVariable {
|
||||
SavedVariable()
|
||||
: data()
|
||||
, expected_version(-1)
|
||||
, version() {}
|
||||
|
||||
SavedVariable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
int expected_version,
|
||||
std::unique_ptr<VariableVersion> version)
|
||||
: data(std::move(data))
|
||||
, expected_version(expected_version)
|
||||
, version(std::move(version)) {}
|
||||
|
||||
std::unique_ptr<thpp::Tensor> data;
|
||||
int expected_version;
|
||||
std::unique_ptr<VariableVersion> version;
|
||||
|
||||
std::unique_ptr<thpp::Tensor>& unpack();
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
@ -1,276 +1,105 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
#include "THP.h"
|
||||
#ifdef WITH_CUDA
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#endif
|
||||
|
||||
PyObject *THPVariableClass = NULL;
|
||||
using namespace torch;
|
||||
using namespace thpp;
|
||||
|
||||
constexpr size_t CACHE_SIZE = 100000;
|
||||
static THPVariable *cached_variables[CACHE_SIZE];
|
||||
static size_t num_cached;
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// This helper steals a reference to data and creator
|
||||
static inline THPVariable * pop_cache(PyObject *data, PyObject *creator, char requires_grad)
|
||||
Variable::Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
bool requires_grad,
|
||||
bool is_volatile)
|
||||
: data(std::move(data))
|
||||
, creator(nullptr)
|
||||
, grad(nullptr)
|
||||
, version_counter(new VariableVersion())
|
||||
, output_nr(0)
|
||||
, backward_hook()
|
||||
, pyobj(nullptr)
|
||||
{
|
||||
THPVariable *self = cached_variables[--num_cached];
|
||||
PyObject_Init((PyObject*)self, Py_TYPE(self));
|
||||
PyObject_GC_Track(self);
|
||||
|
||||
self->is_volatile = 0;
|
||||
self->version_counter = new THPVariableVersion();
|
||||
self->grad = NULL;
|
||||
self->backward_hooks = NULL;
|
||||
self->requires_grad = requires_grad;
|
||||
|
||||
self->data = data;
|
||||
self->creator = creator;
|
||||
return self;
|
||||
if (!this->data) {
|
||||
throw std::runtime_error("Variable data is NULL");
|
||||
}
|
||||
this->is_volatile = is_volatile;
|
||||
this->requires_grad = requires_grad;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data)
|
||||
Variable::Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
std::shared_ptr<Function> creator)
|
||||
: data(std::move(data))
|
||||
, creator(creator)
|
||||
, grad(nullptr)
|
||||
, version_counter(new VariableVersion())
|
||||
, output_nr(creator->num_outputs++)
|
||||
, backward_hook()
|
||||
, pyobj(nullptr)
|
||||
{
|
||||
THPVariable *variable;
|
||||
if (num_cached > 0) {
|
||||
Py_INCREF(data);
|
||||
variable = pop_cache(data, NULL, 0);
|
||||
if (!this->data) {
|
||||
throw std::runtime_error("Variable data is NULL");
|
||||
}
|
||||
this->is_volatile = creator->is_volatile;
|
||||
this->requires_grad = creator->requires_grad;
|
||||
previous_functions.resize(1);
|
||||
previous_functions[0] = std::make_pair<>(creator, output_nr);
|
||||
}
|
||||
|
||||
bool Variable::is_cuda()
|
||||
{
|
||||
return data->isCuda();
|
||||
}
|
||||
|
||||
auto Variable::backward(std::shared_ptr<Variable> gradOutput) -> void {
|
||||
if (backward_hook) {
|
||||
gradOutput = (*backward_hook)(gradOutput);
|
||||
}
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU auto_gpu(gradOutput->data->getDevice());
|
||||
#endif
|
||||
if (!grad) {
|
||||
std::unique_ptr<Tensor> data(gradOutput->data->clone());
|
||||
grad = std::make_shared<Variable>(std::move(data), false, true);
|
||||
} else {
|
||||
variable = (THPVariable*)PyObject_CallFunctionObjArgs(THPVariableClass, data, NULL);
|
||||
}
|
||||
if (variable) ((THPVariable*)variable)->is_volatile = 1;
|
||||
return (PyObject*)variable;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data and creator
|
||||
// To create a leaf Variable pass NULL as creator.
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad)
|
||||
{
|
||||
if (num_cached > 0) {
|
||||
Py_INCREF(data);
|
||||
Py_XINCREF(creator);
|
||||
return (PyObject*)pop_cache(data, creator, requires_grad);
|
||||
}
|
||||
// We can't pass a NULL creator to this Python call, because Py_BuildValue
|
||||
// will raise an error (it tries to be overly smart by setting its own error
|
||||
// if there's no flag set at the moment and we're giving NULL to some
|
||||
// function).
|
||||
creator = creator ? creator : Py_None;
|
||||
return PyObject_CallFunction(THPVariableClass, "OObb", data, creator, (char)0, requires_grad);
|
||||
}
|
||||
|
||||
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->creator);
|
||||
Py_VISIT(self->data);
|
||||
Py_VISIT(self->grad);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPVariable_clear(THPVariable *self)
|
||||
{
|
||||
Py_CLEAR(self->creator);
|
||||
Py_CLEAR(self->data);
|
||||
Py_CLEAR(self->grad);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPVariable_dealloc(THPVariable* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
Py_XDECREF(self->creator);
|
||||
Py_XDECREF(self->data);
|
||||
Py_XDECREF(self->grad);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
delete self->version_counter;
|
||||
self->version_counter = nullptr;
|
||||
|
||||
// We don't want to cache any subclasses
|
||||
if ((PyObject*)Py_TYPE(self) == THPVariableClass && num_cached < CACHE_SIZE) {
|
||||
cached_variables[num_cached++] = self;
|
||||
// Variable class is defined in Python code, and as such has a
|
||||
// Py_TPFLAGS_HEAPTYPE flag set, so python DECREFs the class at each
|
||||
// object dealloc.
|
||||
Py_INCREF(Py_TYPE(self));
|
||||
} else {
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
grad->data->cadd(*grad->data, *gradOutput->data);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *THPVariable_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
THPVariable *self;
|
||||
if ((PyObject*)type != THPVariableClass || num_cached == 0) {
|
||||
self = (THPVariable*)type->tp_alloc(type, 0);
|
||||
if (!self) return NULL;
|
||||
self->version_counter = new THPVariableVersion();
|
||||
} else {
|
||||
self = pop_cache(NULL, NULL, 0);
|
||||
auto Variable::apply(const variable_list& gradOutputs) -> variable_list {
|
||||
if (creator || **version_counter != 0) {
|
||||
throw std::runtime_error("leaf variable was used in an inplace operation");
|
||||
}
|
||||
return (PyObject*)self;
|
||||
if (gradOutputs.size() != 1) {
|
||||
throw std::runtime_error("incorrect number of gradOutputs");
|
||||
}
|
||||
backward(gradOutputs[0]);
|
||||
return variable_list();
|
||||
}
|
||||
|
||||
int THPVariable_init(THPVariable *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Obb", (char**)accepted_args,
|
||||
&self->data, &self->creator, &self->is_volatile,
|
||||
&self->requires_grad))
|
||||
return -1;
|
||||
Py_INCREF(self->data);
|
||||
if (self->creator == Py_None)
|
||||
self->creator = NULL;
|
||||
Py_XINCREF(self->creator);
|
||||
THPUtils_assertRet(-1, !(self->is_volatile && self->requires_grad),
|
||||
"Variable can't be volatile and require_grad at the same time!");
|
||||
THPUtils_assertRet(-1, !self->creator || THPFunction_Check(self->creator),
|
||||
"Variable creator has to be a Function object or None, but got %s",
|
||||
THPUtils_typename(self->creator));
|
||||
THPUtils_assertRet(-1, THPModule_isTensor(self->data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(self->data));
|
||||
return 0;
|
||||
auto Variable::save() const -> SavedVariable {
|
||||
return SavedVariable(
|
||||
std::unique_ptr<Tensor>(data->clone_shallow()),
|
||||
**version_counter,
|
||||
std::unique_ptr<VariableVersion>(version_counter->new_saved_ref()));
|
||||
}
|
||||
|
||||
PyObject * THPVariable_getstate(THPVariable *self)
|
||||
{
|
||||
THPUtils_assert(!self->creator, "serialization of non-leaf variables is not "
|
||||
"implemented yet");
|
||||
THPObjectPtr state = PyTuple_New(5);
|
||||
if (!state)
|
||||
return NULL;
|
||||
|
||||
Py_INCREF(self->data);
|
||||
PyTuple_SET_ITEM(state.get(), 0, self->data);
|
||||
|
||||
PyObject *grad = self->grad ? self->grad : Py_None;
|
||||
Py_INCREF(grad);
|
||||
PyTuple_SET_ITEM(state.get(), 1, grad);
|
||||
|
||||
PyObject *backward_hooks = self->backward_hooks ? self->backward_hooks : Py_None;
|
||||
Py_INCREF(backward_hooks);
|
||||
PyTuple_SET_ITEM(state.get(), 2, backward_hooks);
|
||||
|
||||
PyTuple_SET_ITEM(state.get(), 3, PyBool_FromLong(self->requires_grad));
|
||||
PyTuple_SET_ITEM(state.get(), 4, PyBool_FromLong(self->is_volatile));
|
||||
|
||||
return state.release();
|
||||
auto Variable::save_opt(Variable* var) -> SavedVariable {
|
||||
return var ? var->save() : SavedVariable();
|
||||
}
|
||||
|
||||
PyObject * THPVariable_setstate(THPVariable *self, PyObject *state)
|
||||
{
|
||||
THPUtils_assert(!self->creator, "__setstate__ can be only called on leaf "
|
||||
"variables");
|
||||
THPUtils_assert(PyTuple_Check(state), "__setstate__ expects state to be a "
|
||||
"tuple");
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(state);
|
||||
THPUtils_assert(size == 5, "__setstate__ expects state tuple to have 5 "
|
||||
"elements, but it has %d", size);
|
||||
|
||||
#define LOAD(NAME, IDX) \
|
||||
Py_XDECREF(self->NAME); \
|
||||
self->NAME = PyTuple_GET_ITEM(state, IDX) == Py_None ? NULL : PyTuple_GET_ITEM(state, IDX); \
|
||||
Py_XINCREF(self->NAME);
|
||||
THPUtils_assert(THPModule_isTensor(PyTuple_GET_ITEM(state, 0)), "first "
|
||||
"element of variable state tuple has to be a tensor");
|
||||
LOAD(data, 0);
|
||||
|
||||
LOAD(grad, 1);
|
||||
LOAD(backward_hooks, 2);
|
||||
#undef LOAD
|
||||
|
||||
PyObject *requires_grad_obj = PyTuple_GET_ITEM(state, 3);
|
||||
PyObject *is_volatile_obj = PyTuple_GET_ITEM(state, 4);
|
||||
THPUtils_assert(PyBool_Check(requires_grad_obj), "requires_grad "
|
||||
"found in state was expected to be a bool, but got %s",
|
||||
THPUtils_typename(requires_grad_obj));
|
||||
THPUtils_assert(PyBool_Check(is_volatile_obj), "is_volatile "
|
||||
"found in state was expected to be a bool, but got %s",
|
||||
THPUtils_typename(is_volatile_obj));
|
||||
self->requires_grad= requires_grad_obj == Py_True ? 1 : 0;
|
||||
self->is_volatile = is_volatile_obj == Py_True ? 1 : 0;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
auto SavedVariable::unpack() -> std::unique_ptr<thpp::Tensor>& {
|
||||
if (data) {
|
||||
int current_version = **version;
|
||||
if (expected_version != current_version) {
|
||||
throw std::runtime_error("one of the variables "
|
||||
"needed for gradient computation has been modified by an "
|
||||
"inplace operation");
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
PyObject *THPVariable_get_version(THPVariable *self)
|
||||
{
|
||||
return PyInt_FromLong(**self->version_counter);
|
||||
}
|
||||
|
||||
static struct PyGetSetDef THPVariable_properties[] = {
|
||||
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMemberDef THPVariable_members[] = {
|
||||
{(char*)"creator", T_OBJECT, offsetof(THPVariable, creator), 0, NULL},
|
||||
{(char*)"data", T_OBJECT, offsetof(THPVariable, data), 0, NULL},
|
||||
{(char*)"_grad", T_OBJECT, offsetof(THPVariable, grad), 0, NULL},
|
||||
{(char*)"volatile", T_BOOL, offsetof(THPVariable, is_volatile), 0, NULL},
|
||||
{(char*)"output_nr", T_INT, offsetof(THPVariable, output_nr), 0, NULL},
|
||||
{(char*)"_backward_hooks",T_OBJECT, offsetof(THPVariable, backward_hooks), 0, NULL},
|
||||
{(char*)"_requires_grad", T_BOOL, offsetof(THPVariable, requires_grad), 0, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMethodDef THPVariable_methods[] = {
|
||||
{"__getstate__", (PyCFunction)THPVariable_getstate, METH_NOARGS, NULL},
|
||||
{"__setstate__", (PyCFunction)THPVariable_setstate, METH_O, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPVariableType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._VariableBase", /* tp_name */
|
||||
sizeof(THPVariable), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPVariable_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPVariable_traverse, /* tp_traverse */
|
||||
(inquiry)THPVariable_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPVariable_methods, /* tp_methods */
|
||||
THPVariable_members, /* tp_members */
|
||||
THPVariable_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)THPVariable_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPVariable_new /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
bool THPVariable_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPVariableType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPVariableType);
|
||||
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
@ -1,8 +1,57 @@
|
||||
#ifndef THP_VARIABLE_H
|
||||
#define THP_VARIABLE_H
|
||||
#pragma once
|
||||
|
||||
struct THPVariableVersion {
|
||||
THPVariableVersion() {
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/saved_variable.h"
|
||||
#include "torch/csrc/Types.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct VariableHook;
|
||||
struct VariableVersion;
|
||||
|
||||
struct Variable : public Function {
|
||||
Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
std::shared_ptr<Function> creator);
|
||||
Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
bool requires_grad,
|
||||
bool is_volatile);
|
||||
|
||||
bool is_cuda();
|
||||
bool is_sparse();
|
||||
void backward(std::shared_ptr<Variable> gradOutput);
|
||||
virtual variable_list apply(const variable_list& gradOutputs) override;
|
||||
|
||||
SavedVariable save() const;
|
||||
static SavedVariable save_opt(Variable* var);
|
||||
|
||||
static inline std::shared_ptr<Variable> of(std::unique_ptr<thpp::Tensor> data) {
|
||||
if (!data) {
|
||||
return std::shared_ptr<Variable>();
|
||||
}
|
||||
return std::make_shared<Variable>(std::move(data), 0, 0);
|
||||
}
|
||||
|
||||
std::unique_ptr<thpp::Tensor> data;
|
||||
std::shared_ptr<Function> creator;
|
||||
std::shared_ptr<Variable> grad;
|
||||
std::unique_ptr<VariableVersion> version_counter;
|
||||
int output_nr;
|
||||
std::unique_ptr<VariableHook> backward_hook;
|
||||
PyObject *pyobj; // weak reference
|
||||
};
|
||||
|
||||
struct VariableHook {
|
||||
virtual std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& grad) = 0;
|
||||
};
|
||||
|
||||
struct VariableVersion {
|
||||
VariableVersion() {
|
||||
saved_ref = false;
|
||||
version_block = new int[3];
|
||||
version_block[0] = 0; // version
|
||||
@ -16,15 +65,15 @@ struct THPVariableVersion {
|
||||
|
||||
int var_refcnt() { return version_block[2]; }
|
||||
|
||||
void join_with(THPVariableVersion &other) {
|
||||
void join_with(VariableVersion &other) {
|
||||
cleanup();
|
||||
version_block = other.version_block;
|
||||
version_block[1]++;
|
||||
version_block[2]++;
|
||||
}
|
||||
|
||||
THPVariableVersion* new_saved_ref() {
|
||||
auto new_ver = new THPVariableVersion();
|
||||
VariableVersion* new_saved_ref() {
|
||||
auto new_ver = new VariableVersion();
|
||||
new_ver->cleanup();
|
||||
new_ver->version_block = version_block;
|
||||
version_block[1]++;
|
||||
@ -39,36 +88,10 @@ struct THPVariableVersion {
|
||||
version_block = nullptr;
|
||||
}
|
||||
|
||||
~THPVariableVersion() { cleanup(); }
|
||||
~VariableVersion() { cleanup(); }
|
||||
|
||||
int *version_block;
|
||||
bool saved_ref;
|
||||
};
|
||||
|
||||
struct THPVariable {
|
||||
PyObject_HEAD
|
||||
PyObject *creator;
|
||||
PyObject *data;
|
||||
PyObject *grad;
|
||||
PyObject *backward_hooks;
|
||||
THPVariableVersion *version_counter;
|
||||
int output_nr;
|
||||
char is_volatile;
|
||||
char requires_grad;
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module);
|
||||
extern PyObject *THPVariableClass;
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data);
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad);
|
||||
|
||||
#define THPVariable_Check(obj) \
|
||||
(THPVariableClass && \
|
||||
PyObject_IsInstance(obj, THPVariableClass))
|
||||
|
||||
#define THPVariable_CheckType(obj, func) \
|
||||
(THPVariableClass && \
|
||||
(PyObject_IsInstance(obj, THPVariableClass) && \
|
||||
func(((THPVariable*)obj)->data)))
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
@ -15,26 +15,26 @@ THCState *state;
|
||||
// Class pointer cache
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool THCPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THCPModule_loadClasses(PyObject *torch_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyMapping_GetItemString(module_dict, (char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THCPFloatStorageClass = PyMapping_GetItemString(module_dict, (char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THCPHalfStorageClass = PyMapping_GetItemString(module_dict, (char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THCPLongStorageClass = PyMapping_GetItemString(module_dict, (char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THCPIntStorageClass = PyMapping_GetItemString(module_dict, (char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THCPShortStorageClass = PyMapping_GetItemString(module_dict, (char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THCPCharStorageClass = PyMapping_GetItemString(module_dict, (char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THCPByteStorageClass = PyMapping_GetItemString(module_dict, (char*)"ByteStorage"));
|
||||
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyObject_GetAttrString(torch_module, (char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THCPFloatStorageClass = PyObject_GetAttrString(torch_module, (char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THCPHalfStorageClass = PyObject_GetAttrString(torch_module, (char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THCPLongStorageClass = PyObject_GetAttrString(torch_module, (char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THCPIntStorageClass = PyObject_GetAttrString(torch_module, (char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THCPShortStorageClass = PyObject_GetAttrString(torch_module, (char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THCPCharStorageClass = PyObject_GetAttrString(torch_module, (char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THCPByteStorageClass = PyObject_GetAttrString(torch_module, (char*)"ByteStorage"));
|
||||
|
||||
ASSERT_NOT_NULL(THCPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THCPHalfTensorClass = PyMapping_GetItemString(module_dict, (char*)"HalfTensor"));
|
||||
ASSERT_NOT_NULL(THCPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THCPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THCPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THCPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THCPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THCPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
if (!THCPDoubleTensor_postInit(torch_module)) return false;
|
||||
if (!THCPFloatTensor_postInit(torch_module)) return false;
|
||||
if (!THCPHalfTensor_postInit(torch_module)) return false;
|
||||
if (!THCPLongTensor_postInit(torch_module)) return false;
|
||||
if (!THCPIntTensor_postInit(torch_module)) return false;
|
||||
if (!THCPShortTensor_postInit(torch_module)) return false;
|
||||
if (!THCPCharTensor_postInit(torch_module)) return false;
|
||||
if (!THCPByteTensor_postInit(torch_module)) return false;
|
||||
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
@ -255,7 +255,7 @@ PyObject * THCPModule_getLibPath(PyObject *_unused)
|
||||
// Cuda module initialization
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool THCPModule_initCuda(PyObject *module_dict) {
|
||||
bool THCPModule_initCuda(PyObject *torch_module) {
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
|
||||
state = THCState_alloc();
|
||||
THCState_setDeviceAllocator(state, THCCachingAllocator_get());
|
||||
@ -264,21 +264,21 @@ bool THCPModule_initCuda(PyObject *module_dict) {
|
||||
|
||||
#ifdef USE_MAGMA
|
||||
THCMagma_init(state);
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(true)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(true)) != -1);
|
||||
#else
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(false)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(false)) != -1);
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(true)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(true)) != -1);
|
||||
#else
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(false)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(false)) != -1);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE(THCPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THCPModule_loadClasses(torch_module));
|
||||
ASSERT_TRUE(THCPModule_assignStateless());
|
||||
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
|
||||
|
||||
// TODO: register THCudaShutdown handler at exit
|
||||
return true;
|
||||
@ -293,6 +293,5 @@ PyObject * THCPModule_initExtension(PyObject *self)
|
||||
THPUtils_setError("class loader couldn't access torch module");
|
||||
return NULL;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
return PyBool_FromLong(THCPModule_initCuda(module_dict));
|
||||
return PyBool_FromLong(THCPModule_initCuda(torch_module));
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "override_macros.h"
|
||||
#include "torch/csrc/copy_utils.h"
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#define THC_GENERIC_FILE "torch/csrc/generic/Tensor.cpp"
|
||||
#include <THC/THCGenerateAllTypes.h>
|
||||
|
@ -62,6 +62,8 @@ void cudnn_batch_norm_forward(
|
||||
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
|
||||
double exponential_average_factor, double epsilon)
|
||||
{
|
||||
assertSameGPU(dataType, input, output, weight, bias, running_mean, running_var,
|
||||
save_mean, save_var);
|
||||
cudnnBatchNormMode_t mode;
|
||||
if (input->nDimension == 2) {
|
||||
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
|
||||
@ -120,6 +122,8 @@ void cudnn_batch_norm_backward(
|
||||
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
|
||||
double epsilon)
|
||||
{
|
||||
assertSameGPU(dataType, input, grad_output, grad_input, grad_weight, grad_bias, weight,
|
||||
running_mean, running_var, save_mean, save_var);
|
||||
cudnnBatchNormMode_t mode;
|
||||
if (input->nDimension == 2) {
|
||||
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
|
||||
@ -143,7 +147,7 @@ void cudnn_batch_norm_backward(
|
||||
THVoidTensor_assertContiguous(save_mean);
|
||||
THVoidTensor_assertContiguous(save_var);
|
||||
CHECK(cudnnBatchNormalizationBackward(
|
||||
handle, mode, &one, &zero, &one, &one,
|
||||
handle, mode, &one, &zero, &one, &zero,
|
||||
idesc.desc, tensorPointer(dataType, input),
|
||||
odesc.desc, tensorPointer(dataType, grad_output),
|
||||
gdesc.desc, tensorPointer(dataType, grad_input),
|
||||
|
@ -285,6 +285,7 @@ void cudnn_convolution_forward(
|
||||
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* output,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, input, weight, output);
|
||||
int groups = info->groups;
|
||||
|
||||
cudnnConvolutionFwdAlgo_t fwdAlg;
|
||||
@ -309,6 +310,7 @@ void cudnn_convolution_add_bias(
|
||||
THVoidTensor* bias, THVoidTensor* output,
|
||||
Convolution* info)
|
||||
{
|
||||
assertSameGPU(dataType, bias, output);
|
||||
CHECK_ARG(output->nDimension <= 5);
|
||||
TensorDescriptor& bdesc = info->bdesc;
|
||||
|
||||
@ -329,6 +331,7 @@ void cudnn_convolution_backward_data(
|
||||
THVoidTensor* gradOutput, THVoidTensor* gradInput, THVoidTensor* weight,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, gradInput, weight);
|
||||
int groups = info->params.groups;
|
||||
|
||||
cudnnConvolutionBwdDataAlgo_t bwdDataAlg;
|
||||
@ -353,6 +356,7 @@ void cudnn_convolution_backward_filter(
|
||||
THVoidTensor* gradOutput, THVoidTensor* input, THVoidTensor* gradWeight,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, input, gradWeight);
|
||||
int groups = info->params.groups;
|
||||
|
||||
cudnnConvolutionBwdFilterAlgo_t bwdFilterAlg;
|
||||
@ -380,6 +384,7 @@ void cudnn_convolution_backward_bias(
|
||||
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
|
||||
THVoidTensor* gradOutput, THVoidTensor* gradBias, Convolution* info)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, gradBias);
|
||||
Constant one(dataType, 1);
|
||||
Constant zero(dataType, 0);
|
||||
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, 0, 1, 0);
|
||||
|
@ -1,17 +1,42 @@
|
||||
#ifndef THP_CUDNN_EXCEPTIONS_INC
|
||||
#define THP_CUDNN_EXCEPTIONS_INC
|
||||
|
||||
#include <THC/THC.h>
|
||||
#include <cudnn.h>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
#define CHECK_ARG(cond) _CHECK_ARG(cond, #cond, __FILE__, __LINE__)
|
||||
|
||||
extern THCState* state;
|
||||
|
||||
namespace torch { namespace cudnn {
|
||||
|
||||
template<typename ...T>
|
||||
void assertSameGPU(cudnnDataType_t dataType, T* ... tensors) {
|
||||
static_assert(std::is_same<THVoidTensor, typename std::common_type<T...>::type>::value,
|
||||
"all arguments to assertSameGPU have to be THVoidTensor*");
|
||||
int is_same;
|
||||
if (dataType == CUDNN_DATA_FLOAT) {
|
||||
is_same = THCudaTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaTensor*>(tensors)...);
|
||||
} else if (dataType == CUDNN_DATA_HALF) {
|
||||
is_same = THCudaHalfTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaHalfTensor*>(tensors)...);
|
||||
} else if (dataType == CUDNN_DATA_DOUBLE) {
|
||||
is_same = THCudaDoubleTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaDoubleTensor*>(tensors)...);
|
||||
} else {
|
||||
throw std::runtime_error("unknown cuDNN data type");
|
||||
}
|
||||
if (!is_same) {
|
||||
throw std::runtime_error("tensors are on different GPUs");
|
||||
}
|
||||
}
|
||||
|
||||
class cudnn_exception : public std::runtime_error {
|
||||
public:
|
||||
cudnnStatus_t status;
|
||||
|
@ -20,6 +20,20 @@ cudnnDataType_t getCudnnDataType(PyObject *tensorClass)
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor)
|
||||
{
|
||||
if (tensor.type() == thpp::Type::FLOAT) {
|
||||
return CUDNN_DATA_FLOAT;
|
||||
} else if (tensor.type() == thpp::Type::DOUBLE) {
|
||||
return CUDNN_DATA_DOUBLE;
|
||||
} else if (tensor.type() == thpp::Type::HALF) {
|
||||
return CUDNN_DATA_HALF;
|
||||
}
|
||||
std::string msg("getCudnnDataType() not supported for ");
|
||||
msg += (int)tensor.type();
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
PyObject * getTensorClass(PyObject *args)
|
||||
{
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) {
|
||||
|
@ -6,11 +6,13 @@
|
||||
#include <string>
|
||||
#include <cudnn.h>
|
||||
#include "../Types.h"
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
namespace torch { namespace cudnn {
|
||||
|
||||
PyObject * getTensorClass(PyObject *args);
|
||||
cudnnDataType_t getCudnnDataType(PyObject *tensorClass);
|
||||
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor);
|
||||
void _THVoidTensor_assertContiguous(THVoidTensor *tensor, const std::string& name);
|
||||
|
||||
#define THVoidTensor_assertContiguous(tensor) \
|
||||
|
@ -227,3 +227,16 @@ bool THSPTensor_(init)(PyObject *module)
|
||||
PyModule_AddObject(module, THSPTensorBaseStr, (PyObject *)&THSPTensorType);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool THSPTensor_(postInit)(PyObject *module)
|
||||
{
|
||||
THSPTensorClass = PyObject_GetAttrString(module, TH_CONCAT_STRING_2(Real,Tensor));
|
||||
if (!THSPTensorClass) return false;
|
||||
bool is_cuda = false;
|
||||
#ifdef THC_GENERIC_FILE
|
||||
is_cuda = true;
|
||||
#endif
|
||||
const char *type_name = TH_CONCAT_STRING_2(Real,);
|
||||
torch::registerPyTypeObject((PyTypeObject*)THSPTensorClass, type_name, is_cuda, true);
|
||||
return true;
|
||||
}
|
||||
|
@ -867,6 +867,20 @@ bool THPTensor_(init)(PyObject *module)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool THPTensor_(postInit)(PyObject *module)
|
||||
{
|
||||
THPTensorClass = PyObject_GetAttrString(module,(char*)TH_CONCAT_STRING_2(Real,Tensor));
|
||||
if (!THPTensorClass) return false;
|
||||
|
||||
bool is_cuda = false;
|
||||
#ifdef THC_GENERIC_FILE
|
||||
is_cuda = true;
|
||||
#endif
|
||||
const char *type_name = TH_CONCAT_STRING_2(Real,);
|
||||
torch::registerPyTypeObject((PyTypeObject*)THPTensorClass, type_name, is_cuda, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef NUMPY_TYPE_ENUM
|
||||
|
||||
#endif
|
||||
|
@ -49,6 +49,8 @@ extern PyTypeObject THSPTensorStatelessType;
|
||||
bool THPTensor_(init)(PyObject *module);
|
||||
#ifndef THD_GENERIC_FILE
|
||||
bool THSPTensor_(init)(PyObject *module);
|
||||
bool THPTensor_(postInit)(PyObject *module);
|
||||
bool THSPTensor_(postInit)(PyObject *module);
|
||||
#endif
|
||||
|
||||
extern PyTypeObject THPTensorType;
|
||||
|
@ -408,6 +408,28 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: unsqueeze
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
cname: unsqueeze1d
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- THTensor* self
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: unsqueeze_
|
||||
return: self
|
||||
cname: unsqueeze1d
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THTensor* self
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: nonzero
|
||||
with_stateless: True
|
||||
@ -434,6 +456,16 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- THTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: view
|
||||
cname: newView
|
||||
return: THTensor*
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- arg: THSize* size
|
||||
long_args: True
|
||||
]]
|
||||
|
||||
[[
|
||||
name: resizeAs_
|
||||
python_name: resize_as_
|
||||
@ -572,10 +604,10 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
]]
|
||||
static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
|
||||
{
|
||||
#if IS_CUDA && THCP_AUTO_GPU
|
||||
THCPAutoGPU __autogpu_guard = THCPAutoGPU(args);
|
||||
#endif
|
||||
HANDLE_TH_ERRORS
|
||||
#if IS_CUDA
|
||||
THCPAutoGPU __autogpu_guard(-1);
|
||||
#endif
|
||||
Py_ssize_t _argcount = args ? PyTuple_Size(args) : 0;
|
||||
std::vector<THPObjectPtr> items;
|
||||
std::vector<THTensor *> item_tensors;
|
||||
@ -608,6 +640,10 @@ static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
|
||||
dimension = 0;
|
||||
}
|
||||
|
||||
#if IS_CUDA
|
||||
__autogpu_guard.setDevice(THTensor_(getDevice)(LIBRARY_STATE item_tensors[0]));
|
||||
#endif
|
||||
|
||||
result = (THPTensor *)THPTensor_(NewEmpty)();
|
||||
if (!result) return NULL;
|
||||
|
||||
|
@ -39,11 +39,19 @@ void THPStorage_(writeFileRaw)(THStorage *self, int fd)
|
||||
SYSCHECK(write(fd, &self->size, sizeof(long)));
|
||||
// fast track for bytes and little endian
|
||||
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
|
||||
SYSCHECK(write(fd, data, sizeof(real) * self->size));
|
||||
char *bytes = (char *) data;
|
||||
uint64_t remaining = sizeof(real) * self->size;
|
||||
while (remaining > 0) {
|
||||
ssize_t result = write(fd, bytes, remaining);
|
||||
if (result < 0)
|
||||
throw std::system_error(result, std::system_category());
|
||||
bytes += result;
|
||||
remaining -= result;
|
||||
}
|
||||
} else {
|
||||
long buffer_size = std::min(self->size, (long)5000);
|
||||
int64_t buffer_size = std::min(self->size, (long)5000);
|
||||
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
|
||||
for (long i = 0; i < self->size; i += buffer_size) {
|
||||
for (int64_t i = 0; i < self->size; i += buffer_size) {
|
||||
size_t to_convert = std::min(self->size - i, buffer_size);
|
||||
if (sizeof(real) == 2) {
|
||||
THP_encodeInt16Buffer((uint8_t*)le_buffer.get(),
|
||||
@ -61,7 +69,7 @@ void THPStorage_(writeFileRaw)(THStorage *self, int fd)
|
||||
THPByteOrder::THP_LITTLE_ENDIAN,
|
||||
to_convert);
|
||||
}
|
||||
SYSCHECK(write(fd, data, to_convert * sizeof(real)));
|
||||
SYSCHECK(write(fd, le_buffer.get(), to_convert * sizeof(real)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -82,11 +90,19 @@ THStorage * THPStorage_(readFileRaw)(int fd)
|
||||
|
||||
// fast track for bytes and little endian
|
||||
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
|
||||
SYSCHECK(read(fd, data, sizeof(real) * storage->size));
|
||||
char *bytes = (char *) data;
|
||||
uint64_t remaining = sizeof(real) * storage->size;
|
||||
while (remaining > 0) {
|
||||
ssize_t result = read(fd, bytes, remaining);
|
||||
if (result < 0)
|
||||
throw std::system_error(result, std::system_category());
|
||||
bytes += result;
|
||||
remaining -= result;
|
||||
}
|
||||
} else {
|
||||
long buffer_size = std::min(size, (long)5000);
|
||||
int64_t buffer_size = std::min(size, (long)5000);
|
||||
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
|
||||
for (long i = 0; i < size; i += buffer_size) {
|
||||
for (int64_t i = 0; i < size; i += buffer_size) {
|
||||
size_t to_convert = std::min(size - i, buffer_size);
|
||||
SYSCHECK(read(fd, le_buffer.get(), sizeof(real) * to_convert));
|
||||
if (sizeof(real) == 2) {
|
||||
|
@ -577,11 +577,4 @@ void THPPointer<THPGenerator>::free() {
|
||||
Py_DECREF(ptr);
|
||||
}
|
||||
|
||||
template<>
|
||||
void THPPointer<PyObject>::free() {
|
||||
if (ptr)
|
||||
Py_DECREF(ptr);
|
||||
}
|
||||
|
||||
template class THPPointer<THPGenerator>;
|
||||
template class THPPointer<PyObject>;
|
||||
|
@ -53,5 +53,5 @@ class Add(Module):
|
||||
if input.is_same_size(self.bias):
|
||||
self.gradBias.add_(scale, gradOutput)
|
||||
else:
|
||||
gradOutput = gradOutput.view(input.size(0), -1)
|
||||
gradOutput = gradOutput.contiguous().view(input.size(0), -1)
|
||||
self.gradBias.view(-1).addmv_(scale, gradOutput.t(), self._ones)
|
||||
|
@ -50,6 +50,7 @@ class BatchNormalization(Module):
|
||||
|
||||
self.save_mean = None
|
||||
self.save_std = None
|
||||
self._gradOutput = None
|
||||
|
||||
if self.affine:
|
||||
self.weight = torch.Tensor(nOutput)
|
||||
|
@ -90,7 +90,7 @@ class CosineDistance(Module):
|
||||
gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
|
||||
gw2.mul_(self.w.expand_as(v1))
|
||||
|
||||
go = gradOutput.view(-1, 1).expand_as(v1)
|
||||
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
|
||||
gw1.mul_(go)
|
||||
gw2.mul_(go)
|
||||
|
||||
|
@ -38,7 +38,7 @@ class DotProduct(Module):
|
||||
gw1.resize_as_(v1).copy_(v2)
|
||||
gw2.resize_as_(v2).copy_(v1)
|
||||
|
||||
go = gradOutput.view(-1, 1).expand_as(v1)
|
||||
go = gradOutput.contiguous().view(-1, 1).expand_as(v1)
|
||||
gw1.mul_(go)
|
||||
gw2.mul_(go)
|
||||
|
||||
|
@ -36,6 +36,7 @@ class MV(Module):
|
||||
M, v = input
|
||||
self.gradInput[0].resize_as_(M)
|
||||
self.gradInput[1].resize_as_(v)
|
||||
gradOutput = gradOutput.contiguous()
|
||||
|
||||
assert gradOutput.ndimension() == 1 or gradOutput.ndimension() == 2
|
||||
|
||||
|
@ -126,7 +126,7 @@ class MixtureTable(Module):
|
||||
self.backwardSetup = True
|
||||
|
||||
# gater updateGradInput
|
||||
self._expertView = gradOutput.view(torch.Size(self.size2))
|
||||
self._expertView = gradOutput.contiguous().view(torch.Size(self.size2))
|
||||
gradOutput = self._expertView.expand_as(expertInputs)
|
||||
torch.mul(gradOutput, expertInputs, out=self._expert)
|
||||
expert = self._expert.transpose(self.dim, self.dimG)
|
||||
|
@ -49,13 +49,19 @@ class Module(object):
|
||||
pass
|
||||
|
||||
def accUpdateGradParameters(self, input, gradOutput, lr):
|
||||
gradWeight = self.gradWeight
|
||||
gradBias = self.gradBias
|
||||
self.gradWeight = self.weight
|
||||
self.gradBias = self.bias
|
||||
has_weight = hasattr(self, 'weight') and self.weight is not None
|
||||
has_bias = hasattr(self, 'bias') and self.bias is not None
|
||||
if has_weight:
|
||||
gradWeight = self.gradWeight
|
||||
self.gradWeight = self.weight
|
||||
if has_bias:
|
||||
gradBias = self.gradBias
|
||||
self.gradBias = self.bias
|
||||
self.accGradParameters(input, gradOutput, -lr)
|
||||
self.gradWeight = gradWeight
|
||||
self.gradBias = gradBias
|
||||
if has_weight:
|
||||
self.gradWeight = gradWeight
|
||||
if has_bias:
|
||||
self.gradBias = gradBias
|
||||
|
||||
def sharedAccUpdateGradParameters(self, input, gradOutput, lr):
|
||||
if self.parameters():
|
||||
|
@ -32,6 +32,7 @@ class SpatialConvolutionLocal(Module):
|
||||
self.reset()
|
||||
self.finput = None
|
||||
self.fgradInput = None
|
||||
self._gradOutput = None
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
|
@ -32,6 +32,7 @@ class SpatialFullConvolution(Module):
|
||||
self.finput = None
|
||||
self.fgradInput = None
|
||||
self.zeroScalar = None
|
||||
self._gradOutput = None
|
||||
|
||||
self.reset()
|
||||
|
||||
|
@ -31,7 +31,7 @@ class SpatialMaxPooling(Module):
|
||||
return self
|
||||
|
||||
def updateOutput(self, input):
|
||||
if self.indices is None:
|
||||
if not hasattr(self, 'indices') or self.indices is None:
|
||||
self.indices = input.new()
|
||||
self.indices = self.indices.long()
|
||||
|
||||
|
@ -15,5 +15,5 @@ class Squeeze(Module):
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert input.nelement() == gradOutput.nelement()
|
||||
self.gradInput.set_(gradOutput.view_as(input))
|
||||
self.gradInput.set_(gradOutput.contiguous().view_as(input))
|
||||
return self.gradInput
|
||||
|
@ -15,7 +15,7 @@ class Unsqueeze(Module):
|
||||
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
assert input.nelement() == gradOutput.nelement()
|
||||
self.gradInput = gradOutput.view(input.size())
|
||||
self.gradInput = gradOutput.contiguous().view(input.size())
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -36,7 +36,7 @@ class View(Module):
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
if self.gradInput is None:
|
||||
self.gradInput = gradOutput.new()
|
||||
self.gradInput = gradOutput.view(input.size())
|
||||
self.gradInput = gradOutput.contiguous().view(input.size())
|
||||
return self.gradInput
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -29,6 +29,7 @@ class VolumetricConvolution(Module):
|
||||
|
||||
self.finput = None
|
||||
self.fgradInput = None
|
||||
self._gradOutput = None
|
||||
|
||||
def reset(self, stdv=None):
|
||||
if stdv is not None:
|
||||
|
@ -39,6 +39,7 @@ class VolumetricFullConvolution(Module):
|
||||
self.ones = torch.Tensor()
|
||||
self.finput = torch.Tensor()
|
||||
self.fgradInput = torch.Tensor()
|
||||
self._gradOutput = None
|
||||
|
||||
self.reset()
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
#define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val)
|
||||
#define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val)
|
||||
#define Real Half
|
||||
#define THInf TH_HALF_MAX
|
||||
#define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF)
|
||||
#define TH_REAL_IS_HALF
|
||||
#line 1 TH_GENERIC_FILE
|
||||
#include TH_GENERIC_FILE
|
||||
|
@ -2,13 +2,28 @@
|
||||
|
||||
/* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */
|
||||
|
||||
THHalf TH_float2half(float f)
|
||||
{
|
||||
THHalf h;
|
||||
TH_float2halfbits(&f, &h.x);
|
||||
return h;
|
||||
}
|
||||
|
||||
TH_API float TH_half2float(THHalf h)
|
||||
{
|
||||
float f;
|
||||
TH_halfbits2float(&h.x, &f);
|
||||
return f;
|
||||
}
|
||||
|
||||
// Host functions for converting between FP32 and FP16 formats
|
||||
|
||||
float TH_half2float(THHalf h)
|
||||
void TH_halfbits2float(unsigned short* src, float* res)
|
||||
{
|
||||
unsigned sign = ((h.x >> 15) & 1);
|
||||
unsigned exponent = ((h.x >> 10) & 0x1f);
|
||||
unsigned mantissa = ((h.x & 0x3ff) << 13);
|
||||
unsigned h = *src;
|
||||
unsigned sign = ((h >> 15) & 1);
|
||||
unsigned exponent = ((h >> 10) & 0x1f);
|
||||
unsigned mantissa = ((h & 0x3ff) << 13);
|
||||
|
||||
if (exponent == 0x1f) { /* NaN or Inf */
|
||||
mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
|
||||
@ -28,37 +43,31 @@ float TH_half2float(THHalf h)
|
||||
exponent += 0x70;
|
||||
}
|
||||
|
||||
int temp = ((sign << 31) | (exponent << 23) | mantissa);
|
||||
float x;
|
||||
memcpy(&x,&temp,sizeof(float));
|
||||
return x;
|
||||
*(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
|
||||
}
|
||||
|
||||
THHalf TH_float2half(float f)
|
||||
void TH_float2halfbits(float* src, unsigned short* dest)
|
||||
{
|
||||
THHalf ret;
|
||||
|
||||
unsigned x;
|
||||
memcpy(&x,&f,sizeof(f));
|
||||
unsigned x = *(unsigned*)src;
|
||||
unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
|
||||
unsigned sign, exponent, mantissa;
|
||||
|
||||
// Get rid of +NaN/-NaN case first.
|
||||
if (u > 0x7f800000) {
|
||||
ret.x = 0x7fffU;
|
||||
return ret;
|
||||
*dest = 0x7fffU;
|
||||
return ;
|
||||
}
|
||||
|
||||
sign = ((x >> 16) & 0x8000);
|
||||
|
||||
// Get rid of +Inf/-Inf, +0/-0.
|
||||
if (u > 0x477fefff) {
|
||||
ret.x = sign | 0x7c00U;
|
||||
return ret;
|
||||
*dest = sign | 0x7c00U;
|
||||
return;
|
||||
}
|
||||
if (u < 0x33000001) {
|
||||
ret.x = (sign | 0x0000);
|
||||
return ret;
|
||||
*dest = (sign | 0x0000);
|
||||
return;
|
||||
}
|
||||
|
||||
exponent = ((u >> 23) & 0xff);
|
||||
@ -87,6 +96,5 @@ THHalf TH_float2half(float f)
|
||||
}
|
||||
}
|
||||
|
||||
ret.x = (sign | (exponent << 10) | mantissa);
|
||||
return ret;
|
||||
*dest = (sign | (exponent << 10) | mantissa);
|
||||
}
|
||||
|
@ -18,23 +18,24 @@ typedef struct __thalign__(2){
|
||||
} __THHalf;
|
||||
|
||||
typedef struct __thalign__(4) {
|
||||
unsigned int x;
|
||||
unsigned int x;
|
||||
} __THHalf2;
|
||||
|
||||
typedef __THHalf THHalf;
|
||||
typedef __THHalf2 THHalf2;
|
||||
|
||||
/* numeric limits */
|
||||
TH_API void TH_float2halfbits(float*, unsigned short*);
|
||||
TH_API void TH_halfbits2float(unsigned short*, float*);
|
||||
|
||||
|
||||
TH_API THHalf TH_float2half(float a);
|
||||
TH_API float TH_half2float(THHalf a);
|
||||
TH_API THHalf TH_float2half(float);
|
||||
TH_API float TH_half2float(THHalf);
|
||||
|
||||
#ifndef TH_HALF_BITS_TO_LITERAL
|
||||
# define TH_HALF_BITS_TO_LITERAL(n) { n }
|
||||
#endif
|
||||
|
||||
#define TH_HALF_MAX TH_HALF_BITS_TO_LITERAL(0x7BFF)
|
||||
#define TH_HALF_ZERO 0x0U
|
||||
#define TH_HALF_INF 0x7C00U
|
||||
|
||||
#undef __thalign__
|
||||
#endif
|
||||
|
@ -12,3 +12,56 @@
|
||||
|
||||
#include "generic/THStorageCopy.c"
|
||||
#include "THGenerateHalfType.h"
|
||||
|
||||
|
||||
THDescBuff THLongStorage_sizeDesc(const THLongStorage *size) {
|
||||
const int L = TH_DESC_BUFF_LEN;
|
||||
THDescBuff buf;
|
||||
char *str = buf.str;
|
||||
int n = 0;
|
||||
n += snprintf(str, L-n, "[");
|
||||
int i;
|
||||
for(i = 0; i < size->size; i++) {
|
||||
if(n >= L) break;
|
||||
n += snprintf(str+n, L-n, "%ld", size->data[i]);
|
||||
if(i < size->size-1) {
|
||||
n += snprintf(str+n, L-n, " x ");
|
||||
}
|
||||
}
|
||||
if(n < L - 2) {
|
||||
snprintf(str+n, L-n, "]");
|
||||
} else {
|
||||
snprintf(str+L-5, 5, "...]");
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
TH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement)
|
||||
{
|
||||
ptrdiff_t total_size = (size->size > 0 ? 1 : 0);
|
||||
ptrdiff_t dim_infer = -1;
|
||||
ptrdiff_t i;
|
||||
for (i = 0; i < size->size; i++) {
|
||||
if (size->data[i] == -1) {
|
||||
THArgCheck(dim_infer == -1, 1, "only one dimension can be inferred");
|
||||
dim_infer = i;
|
||||
} else {
|
||||
total_size *= size->data[i];
|
||||
}
|
||||
}
|
||||
if (dim_infer != -1) {
|
||||
THDescBuff buf = THLongStorage_sizeDesc(size);
|
||||
THArgCheck(total_size > 0 && nElement % total_size == 0, 2,
|
||||
"size '%s' is invalid for input of with %td elements", buf.str, nElement);
|
||||
} else {
|
||||
THDescBuff buf = THLongStorage_sizeDesc(size);
|
||||
THArgCheck(nElement == total_size, 2,
|
||||
"size '%s' is invalid for input of with %td elements", buf.str, nElement);
|
||||
}
|
||||
THLongStorage* copy = THLongStorage_newWithSize(size->size);
|
||||
THLongStorage_copy(copy, size);
|
||||
if (dim_infer != -1) {
|
||||
copy->data[dim_infer] = nElement / total_size;
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
@ -7,6 +7,11 @@
|
||||
#define THStorage TH_CONCAT_3(TH,Real,Storage)
|
||||
#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)
|
||||
|
||||
#define TH_DESC_BUFF_LEN 64
|
||||
typedef struct {
|
||||
char str[TH_DESC_BUFF_LEN];
|
||||
} THDescBuff;
|
||||
|
||||
/* fast access methods */
|
||||
#define TH_STORAGE_GET(storage, idx) ((storage)->data[(idx)])
|
||||
#define TH_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value))
|
||||
@ -23,4 +28,7 @@
|
||||
#include "generic/THStorageCopy.h"
|
||||
#include "THGenerateHalfType.h"
|
||||
|
||||
TH_API THDescBuff THLongStorage_sizeDesc(const THLongStorage *size);
|
||||
TH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement);
|
||||
|
||||
#endif
|
||||
|
@ -7,11 +7,6 @@
|
||||
#define THTensor TH_CONCAT_3(TH,Real,Tensor)
|
||||
#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
|
||||
|
||||
#define TH_DESC_BUFF_LEN 64
|
||||
typedef struct {
|
||||
char str[TH_DESC_BUFF_LEN];
|
||||
} THDescBuff;
|
||||
|
||||
/* basics */
|
||||
#include "generic/THTensor.h"
|
||||
#include "THGenerateAllTypes.h"
|
||||
|
@ -67,8 +67,6 @@ void THTensor_(clearFlag)(THTensor *self, const char flag)
|
||||
/**** creation methods ****/
|
||||
|
||||
static void THTensor_(rawInit)(THTensor *self);
|
||||
static void THTensor_(rawSet)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride);
|
||||
static void THTensor_(rawResize)(THTensor *self, int nDimension, long *size, long *stride);
|
||||
|
||||
|
||||
/* Empty init */
|
||||
@ -84,12 +82,12 @@ THTensor *THTensor_(newWithTensor)(THTensor *tensor)
|
||||
{
|
||||
THTensor *self = THAlloc(sizeof(THTensor));
|
||||
THTensor_(rawInit)(self);
|
||||
THTensor_(rawSet)(self,
|
||||
tensor->storage,
|
||||
tensor->storageOffset,
|
||||
tensor->nDimension,
|
||||
tensor->size,
|
||||
tensor->stride);
|
||||
THTensor_(setStorageNd)(self,
|
||||
tensor->storage,
|
||||
tensor->storageOffset,
|
||||
tensor->nDimension,
|
||||
tensor->size,
|
||||
tensor->stride);
|
||||
return self;
|
||||
}
|
||||
|
||||
@ -104,12 +102,12 @@ THTensor *THTensor_(newWithStorage)(THStorage *storage, ptrdiff_t storageOffset,
|
||||
#ifdef DEBUG
|
||||
THAssert((size ? size->size : (stride ? stride->size : 0)) <= INT_MAX);
|
||||
#endif
|
||||
THTensor_(rawSet)(self,
|
||||
storage,
|
||||
storageOffset,
|
||||
(size ? size->size : (stride ? stride->size : 0)),
|
||||
(size ? size->data : NULL),
|
||||
(stride ? stride->data : NULL));
|
||||
THTensor_(setStorageNd)(self,
|
||||
storage,
|
||||
storageOffset,
|
||||
(size ? size->size : (stride ? stride->size : 0)),
|
||||
(size ? size->data : NULL),
|
||||
(stride ? stride->data : NULL));
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -145,7 +143,7 @@ THTensor *THTensor_(newWithStorage4d)(THStorage *storage, ptrdiff_t storageOffse
|
||||
|
||||
THTensor *self = THAlloc(sizeof(THTensor));
|
||||
THTensor_(rawInit)(self);
|
||||
THTensor_(rawSet)(self, storage, storageOffset, 4, size, stride);
|
||||
THTensor_(setStorageNd)(self, storage, storageOffset, 4, size, stride);
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -176,7 +174,7 @@ THTensor *THTensor_(newWithSize4d)(long size0, long size1, long size2, long size
|
||||
|
||||
THTensor *self = THAlloc(sizeof(THTensor));
|
||||
THTensor_(rawInit)(self);
|
||||
THTensor_(rawResize)(self, 4, size, NULL);
|
||||
THTensor_(resizeNd)(self, 4, size, NULL);
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -228,6 +226,17 @@ THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, lon
|
||||
return self;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size)
|
||||
{
|
||||
THArgCheck(THTensor_(isContiguous)(tensor), 1, "input is not contiguous");
|
||||
ptrdiff_t numel = THTensor_(nElement)(tensor);
|
||||
THTensor *self = THTensor_(new)();
|
||||
THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);
|
||||
THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL);
|
||||
THLongStorage_free(inferred_size);
|
||||
return self;
|
||||
}
|
||||
|
||||
/* Resize */
|
||||
void THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *stride)
|
||||
{
|
||||
@ -238,13 +247,13 @@ void THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *strid
|
||||
#ifdef DEBUG
|
||||
THAssert(size->size <= INT_MAX);
|
||||
#endif
|
||||
THTensor_(rawResize)(self, size->size, size->data, (stride ? stride->data : NULL));
|
||||
THTensor_(resizeNd)(self, size->size, size->data, (stride ? stride->data : NULL));
|
||||
}
|
||||
|
||||
void THTensor_(resizeAs)(THTensor *self, THTensor *src)
|
||||
{
|
||||
if(!THTensor_(isSameSizeAs)(self, src))
|
||||
THTensor_(rawResize)(self, src->nDimension, src->size, NULL);
|
||||
THTensor_(resizeNd)(self, src->nDimension, src->size, NULL);
|
||||
}
|
||||
|
||||
void THTensor_(resize1d)(THTensor *tensor, long size0)
|
||||
@ -266,25 +275,25 @@ void THTensor_(resize4d)(THTensor *self, long size0, long size1, long size2, lon
|
||||
{
|
||||
long size[4] = {size0, size1, size2, size3};
|
||||
|
||||
THTensor_(rawResize)(self, 4, size, NULL);
|
||||
THTensor_(resizeNd)(self, 4, size, NULL);
|
||||
}
|
||||
|
||||
void THTensor_(resize5d)(THTensor *self, long size0, long size1, long size2, long size3, long size4)
|
||||
{
|
||||
long size[5] = {size0, size1, size2, size3, size4};
|
||||
|
||||
THTensor_(rawResize)(self, 5, size, NULL);
|
||||
THTensor_(resizeNd)(self, 5, size, NULL);
|
||||
}
|
||||
|
||||
void THTensor_(set)(THTensor *self, THTensor *src)
|
||||
{
|
||||
if(self != src)
|
||||
THTensor_(rawSet)(self,
|
||||
src->storage,
|
||||
src->storageOffset,
|
||||
src->nDimension,
|
||||
src->size,
|
||||
src->stride);
|
||||
THTensor_(setStorageNd)(self,
|
||||
src->storage,
|
||||
src->storageOffset,
|
||||
src->nDimension,
|
||||
src->size,
|
||||
src->stride);
|
||||
}
|
||||
|
||||
void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_)
|
||||
@ -295,12 +304,12 @@ void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storag
|
||||
#ifdef DEBUG
|
||||
THAssert((size_ ? size_->size : (stride_ ? stride_->size : 0)) <= INT_MAX);
|
||||
#endif
|
||||
THTensor_(rawSet)(self,
|
||||
storage_,
|
||||
storageOffset_,
|
||||
(size_ ? size_->size : (stride_ ? stride_->size : 0)),
|
||||
(size_ ? size_->data : NULL),
|
||||
(stride_ ? stride_->data : NULL));
|
||||
THTensor_(setStorageNd)(self,
|
||||
storage_,
|
||||
storageOffset_,
|
||||
(size_ ? size_->size : (stride_ ? stride_->size : 0)),
|
||||
(size_ ? size_->data : NULL),
|
||||
(stride_ ? stride_->data : NULL));
|
||||
}
|
||||
|
||||
void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
|
||||
@ -346,7 +355,7 @@ void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t stor
|
||||
long size[4] = {size0_, size1_, size2_, size3_};
|
||||
long stride[4] = {stride0_, stride1_, stride2_, stride3_};
|
||||
|
||||
THTensor_(rawSet)(self, storage_, storageOffset_, 4, size, stride);
|
||||
THTensor_(setStorageNd)(self, storage_, storageOffset_, 4, size, stride);
|
||||
}
|
||||
|
||||
|
||||
@ -510,6 +519,33 @@ void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension)
|
||||
}
|
||||
}
|
||||
|
||||
void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension)
|
||||
{
|
||||
int d;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 2, "dimension out of range");
|
||||
THArgCheck(src->nDimension > 0, 2, "cannot unsqueeze empty tensor");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
|
||||
self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1));
|
||||
self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1));
|
||||
self->nDimension++;
|
||||
for (d = self->nDimension-1; d > dimension; d--) {
|
||||
self->size[d] = self->size[d-1];
|
||||
self->stride[d] = self->stride[d-1];
|
||||
}
|
||||
if (dimension+1 < self->nDimension) {
|
||||
self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1];
|
||||
} else {
|
||||
self->stride[dimension] = 1;
|
||||
}
|
||||
self->size[dimension] = 1;
|
||||
}
|
||||
|
||||
int THTensor_(isContiguous)(const THTensor *self)
|
||||
{
|
||||
long z = 1;
|
||||
@ -632,7 +668,7 @@ static void THTensor_(rawInit)(THTensor *self)
|
||||
self->flag = TH_TENSOR_REFCOUNTED;
|
||||
}
|
||||
|
||||
static void THTensor_(rawSet)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
|
||||
void THTensor_(setStorageNd)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
|
||||
{
|
||||
/* storage */
|
||||
if(self->storage != storage)
|
||||
@ -655,10 +691,10 @@ static void THTensor_(rawSet)(THTensor *self, THStorage *storage, ptrdiff_t stor
|
||||
self->storageOffset = storageOffset;
|
||||
|
||||
/* size and stride */
|
||||
THTensor_(rawResize)(self, nDimension, size, stride);
|
||||
THTensor_(resizeNd)(self, nDimension, size, stride);
|
||||
}
|
||||
|
||||
static void THTensor_(rawResize)(THTensor *self, int nDimension, long *size, long *stride)
|
||||
void THTensor_(resizeNd)(THTensor *self, int nDimension, long *size, long *stride)
|
||||
{
|
||||
int d;
|
||||
int nDimension_;
|
||||
@ -804,24 +840,9 @@ THDescBuff THTensor_(desc)(const THTensor *tensor) {
|
||||
}
|
||||
|
||||
THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) {
|
||||
const int L = TH_DESC_BUFF_LEN;
|
||||
THDescBuff buf;
|
||||
char *str = buf.str;
|
||||
int n = 0;
|
||||
n += snprintf(str, L-n, "[");
|
||||
int i;
|
||||
for(i = 0; i < tensor->nDimension; i++) {
|
||||
if(n >= L) break;
|
||||
n += snprintf(str+n, L-n, "%ld", tensor->size[i]);
|
||||
if(i < tensor->nDimension-1) {
|
||||
n += snprintf(str+n, L-n, " x ");
|
||||
}
|
||||
}
|
||||
if(n < L - 2) {
|
||||
snprintf(str+n, L-n, "]");
|
||||
} else {
|
||||
snprintf(str+L-5, 5, "...]");
|
||||
}
|
||||
THLongStorage *size = THTensor_(newSizeOf)((THTensor*)tensor);
|
||||
THDescBuff buf = THLongStorage_sizeDesc(size);
|
||||
THLongStorage_free(size);
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ typedef struct THTensor
|
||||
long *size;
|
||||
long *stride;
|
||||
int nDimension;
|
||||
|
||||
|
||||
THStorage *storage;
|
||||
ptrdiff_t storageOffset;
|
||||
int refcount;
|
||||
@ -68,9 +68,11 @@ TH_API THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sli
|
||||
TH_API THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_);
|
||||
TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_);
|
||||
TH_API THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_);
|
||||
|
||||
TH_API THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size);
|
||||
|
||||
TH_API void THTensor_(resize)(THTensor *tensor, THLongStorage *size, THLongStorage *stride);
|
||||
TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src);
|
||||
TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, long *size, long *stride);
|
||||
TH_API void THTensor_(resize1d)(THTensor *tensor, long size0_);
|
||||
TH_API void THTensor_(resize2d)(THTensor *tensor, long size0_, long size1_);
|
||||
TH_API void THTensor_(resize3d)(THTensor *tensor, long size0_, long size1_, long size2_);
|
||||
@ -79,6 +81,7 @@ TH_API void THTensor_(resize5d)(THTensor *tensor, long size0_, long size1_, long
|
||||
|
||||
TH_API void THTensor_(set)(THTensor *self, THTensor *src);
|
||||
TH_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
|
||||
TH_API void THTensor_(setStorageNd)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, int nDimension, long *size, long *stride);
|
||||
TH_API void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
|
||||
long size0_, long stride0_);
|
||||
TH_API void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
|
||||
@ -101,6 +104,7 @@ TH_API void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension_, lon
|
||||
|
||||
TH_API void THTensor_(squeeze)(THTensor *self, THTensor *src);
|
||||
TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_);
|
||||
TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_);
|
||||
|
||||
TH_API int THTensor_(isContiguous)(const THTensor *self);
|
||||
TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src);
|
||||
|
@ -99,7 +99,7 @@ void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor)
|
||||
long dim;
|
||||
long div = 1;
|
||||
#ifdef TH_REAL_IS_HALF
|
||||
#define IS_NONZERO(val) (TH_half2float(val)!=0)
|
||||
#define IS_NONZERO(val) ((val.x & 0x7fff) != 0)
|
||||
#else
|
||||
#define IS_NONZERO(val) ((val)!=0)
|
||||
#endif
|
||||
@ -2178,25 +2178,25 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb)
|
||||
#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \
|
||||
void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, real value) \
|
||||
{ \
|
||||
THByteTensor_rawResize(r_, t->nDimension, t->size, NULL); \
|
||||
THByteTensor_resizeNd(r_, t->nDimension, t->size, NULL); \
|
||||
TH_TENSOR_APPLY2(unsigned char, r_, real, t, \
|
||||
*r__data = (*t_data OP value) ? 1 : 0;); \
|
||||
} \
|
||||
void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, real value) \
|
||||
{ \
|
||||
THTensor_(rawResize)(r_, t->nDimension, t->size, NULL); \
|
||||
THTensor_(resizeNd)(r_, t->nDimension, t->size, NULL); \
|
||||
TH_TENSOR_APPLY2(real, r_, real, t, \
|
||||
*r__data = (*t_data OP value) ? 1 : 0;); \
|
||||
} \
|
||||
void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \
|
||||
{ \
|
||||
THByteTensor_rawResize(r_, ta->nDimension, ta->size, NULL); \
|
||||
THByteTensor_resizeNd(r_, ta->nDimension, ta->size, NULL); \
|
||||
TH_TENSOR_APPLY3(unsigned char, r_, real, ta, real, tb, \
|
||||
*r__data = (*ta_data OP *tb_data) ? 1 : 0;); \
|
||||
} \
|
||||
void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \
|
||||
{ \
|
||||
THTensor_(rawResize)(r_, ta->nDimension, ta->size, NULL); \
|
||||
THTensor_(resizeNd)(r_, ta->nDimension, ta->size, NULL); \
|
||||
TH_TENSOR_APPLY3(real, r_, real, ta, real, tb, \
|
||||
*r__data = (*ta_data OP *tb_data) ? 1 : 0;); \
|
||||
} \
|
||||
|
@ -3,6 +3,7 @@ CMAKE_POLICY(VERSION 2.8)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
|
||||
|
||||
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
|
||||
OPTION(NDEBUG "disable asserts (WARNING: this may result in invalid memory accesses)")
|
||||
IF(NOT NDEBUG)
|
||||
MESSAGE(STATUS "Removing -DNDEBUG from compile flags")
|
||||
@ -59,6 +60,10 @@ ENDIF()
|
||||
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
|
||||
INCLUDE_DIRECTORIES("${CUDA_SDK_ROOT_DIR}/common/inc")
|
||||
|
||||
IF ("$ENV{STATIC_TH}" STREQUAL "YES")
|
||||
LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
|
||||
ENDIF()
|
||||
|
||||
IF(MAGMA_FOUND)
|
||||
INCLUDE_DIRECTORIES(${MAGMA_INCLUDE_DIR})
|
||||
SET(CMAKE_REQUIRED_INCLUDES "${MAGMA_INCLUDE_DIR};${CUDA_INCLUDE_DIRS}")
|
||||
@ -130,9 +135,9 @@ IF(NOT THC_INSTALL_BIN_SUBDIR
|
||||
SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR})
|
||||
ELSE(Torch_INSTALL_BIN_SUBDIR)
|
||||
# not installing in a Torch context, so Torch_INSTALL_BIN_SUBDIR is not available
|
||||
SET(THC_INSTALL_BIN_SUBDIR "bin" CACHE PATH "THC install binary subdirectory")
|
||||
SET(THC_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THC install library subdirectory")
|
||||
SET(THC_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THC install include subdirectory")
|
||||
SET(THC_INSTALL_BIN_SUBDIR "bin" CACHE PATH "THC install binary subdirectory")
|
||||
SET(THC_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THC install library subdirectory")
|
||||
SET(THC_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THC install include subdirectory")
|
||||
SET(THC_INSTALL_CMAKE_SUBDIR "share/cmake/THC" CACHE PATH "THC install cmake subdirectory")
|
||||
ENDIF(Torch_INSTALL_BIN_SUBDIR)
|
||||
|
||||
@ -208,28 +213,33 @@ ELSE(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
|
||||
ENDIF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
|
||||
|
||||
MESSAGE(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
|
||||
IF ("$ENV{STATIC_TH}" STREQUAL "YES")
|
||||
CUDA_ADD_LIBRARY(THC STATIC ${src} ${src-cuda})
|
||||
SET_TARGET_PROPERTIES(THC PROPERTIES COMPILE_FLAGS "-fPIC")
|
||||
ELSE()
|
||||
CUDA_ADD_LIBRARY(THC SHARED ${src} ${src-cuda})
|
||||
CUDA_ADD_CUBLAS_TO_TARGET(THC)
|
||||
TARGET_LINK_LIBRARIES(THC ${TH_LIBRARIES} ${CUDA_curand_LIBRARY})
|
||||
|
||||
CUDA_ADD_LIBRARY(THC SHARED ${src} ${src-cuda})
|
||||
CUDA_ADD_CUBLAS_TO_TARGET(THC)
|
||||
TARGET_LINK_LIBRARIES(THC ${TH_LIBRARIES} ${CUDA_curand_LIBRARY})
|
||||
IF(USE_MAGMA)
|
||||
TARGET_LINK_LIBRARIES(THC ${MAGMA_LIBRARIES} ${CUDA_cusparse_LIBRARY})
|
||||
ENDIF(USE_MAGMA)
|
||||
|
||||
IF(USE_MAGMA)
|
||||
TARGET_LINK_LIBRARIES(THC ${MAGMA_LIBRARIES} ${CUDA_cusparse_LIBRARY})
|
||||
ENDIF(USE_MAGMA)
|
||||
|
||||
IF(NOT THC_SO_VERSION)
|
||||
SET(THC_SO_VERSION 0)
|
||||
ENDIF(NOT THC_SO_VERSION)
|
||||
MESSAGE(STATUS "THC_SO_VERSION: ${THC_SO_VERSION}")
|
||||
SET_TARGET_PROPERTIES(THC PROPERTIES
|
||||
VERSION ${THC_SO_VERSION}
|
||||
SOVERSION ${THC_SO_VERSION})
|
||||
IF(NOT THC_SO_VERSION)
|
||||
SET(THC_SO_VERSION 0)
|
||||
ENDIF(NOT THC_SO_VERSION)
|
||||
MESSAGE(STATUS "THC_SO_VERSION: ${THC_SO_VERSION}")
|
||||
SET_TARGET_PROPERTIES(THC PROPERTIES
|
||||
VERSION ${THC_SO_VERSION}
|
||||
SOVERSION ${THC_SO_VERSION})
|
||||
|
||||
|
||||
INSTALL(TARGETS THC
|
||||
RUNTIME DESTINATION "${THC_INSTALL_BIN_SUBDIR}"
|
||||
LIBRARY DESTINATION "${THC_INSTALL_LIB_SUBDIR}"
|
||||
ARCHIVE DESTINATION "${THC_INSTALL_LIB_SUBDIR}")
|
||||
INSTALL(TARGETS THC
|
||||
RUNTIME DESTINATION "${THC_INSTALL_BIN_SUBDIR}"
|
||||
LIBRARY DESTINATION "${THC_INSTALL_LIB_SUBDIR}"
|
||||
ARCHIVE DESTINATION "${THC_INSTALL_LIB_SUBDIR}")
|
||||
ENDIF()
|
||||
|
||||
|
||||
INSTALL(FILES
|
||||
THC.h
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <set>
|
||||
#include <stdint.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
|
||||
@ -23,11 +24,25 @@ struct Block : public BlockSize
|
||||
{
|
||||
bool allocated; // true if the block is currently allocated
|
||||
int event_count; // number of outstanding cuda events
|
||||
std::unordered_set<THCStream *> streams;
|
||||
|
||||
Block(size_t size, void* ptr, bool allocated) :
|
||||
BlockSize(size, ptr), allocated(allocated), event_count(0) { }
|
||||
};
|
||||
|
||||
struct BlockStreamCleaner {
|
||||
std::unordered_set<THCStream *> &streams;
|
||||
|
||||
BlockStreamCleaner(std::unordered_set<THCStream *> &streams) : streams(streams) {}
|
||||
~BlockStreamCleaner() {
|
||||
for(auto it = streams.begin(); it != streams.end(); ++it) {
|
||||
if (*it != NULL) {
|
||||
THCStream_free(*it);
|
||||
}
|
||||
}
|
||||
streams.clear();
|
||||
}
|
||||
};
|
||||
static bool BlockComparator(const BlockSize& a, const BlockSize& b)
|
||||
{
|
||||
// sort by size, break ties with pointer
|
||||
@ -98,13 +113,41 @@ struct HostAllocator
|
||||
return cudaSuccess;
|
||||
}
|
||||
|
||||
// process outstanding cuda events which may have occurred
|
||||
cudaError_t err = processEvents();
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
}
|
||||
|
||||
auto it = blocks.find(ptr);
|
||||
THAssert(it != blocks.end());
|
||||
|
||||
Block& block = it->second;
|
||||
THAssert(block.allocated);
|
||||
|
||||
// free (on valid memory) shouldn't fail, so mark unallocated before
|
||||
// we process the streams.
|
||||
block.allocated = false;
|
||||
|
||||
// since the block has been deallocated, no point in keeping around the
|
||||
// streams, even in case of error.
|
||||
BlockStreamCleaner sc(block.streams);
|
||||
for (auto it = block.streams.begin(); it != block.streams.end(); ++it) {
|
||||
cudaEvent_t event;
|
||||
err = cudaEventCreateWithFlags(&event, cudaEventDisableTiming);
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = cudaEventRecord(event, (*it) == NULL ? NULL : (*it)->stream);
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
}
|
||||
|
||||
// the block will not be re-used until all associated events have occured
|
||||
block.event_count++;
|
||||
cuda_events.emplace_back(event, ptr);
|
||||
}
|
||||
if (block.event_count == 0) {
|
||||
// the block can be re-used if there are no outstanding cuda events
|
||||
available.insert(block);
|
||||
@ -112,7 +155,7 @@ struct HostAllocator
|
||||
return cudaSuccess;
|
||||
}
|
||||
|
||||
cudaError_t recordEvent(void* ptr, cudaStream_t stream)
|
||||
cudaError_t recordEvent(void* ptr, THCStream *stream)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
cudaError_t err;
|
||||
@ -125,27 +168,11 @@ struct HostAllocator
|
||||
|
||||
Block& block = it->second;
|
||||
THAssert(block.allocated);
|
||||
|
||||
// process outstanding cuda events which may have occurred
|
||||
err = processEvents();
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
auto res = block.streams.emplace(stream);
|
||||
if (res.second == true && stream != NULL) {
|
||||
THCStream_retain(stream);
|
||||
}
|
||||
|
||||
// create and record an event in the given stream
|
||||
cudaEvent_t event;
|
||||
err = cudaEventCreateWithFlags(&event, cudaEventDisableTiming);
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
}
|
||||
err = cudaEventRecord(event, stream);
|
||||
if (err != cudaSuccess) {
|
||||
return err;
|
||||
}
|
||||
|
||||
// the block will not be re-used until all associated events have occured
|
||||
block.event_count++;
|
||||
cuda_events.emplace_back(event, ptr);
|
||||
return cudaSuccess;
|
||||
}
|
||||
|
||||
@ -186,18 +213,17 @@ struct HostAllocator
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
// remove events for freed blocks
|
||||
std::deque<std::pair<cudaEvent_t, void*>> new_events;
|
||||
for (auto it = cuda_events.begin(); it != cuda_events.end(); ++it) {
|
||||
cudaEvent_t event = it->first;
|
||||
Block& block = blocks.at(it->second);
|
||||
if (!block.allocated) {
|
||||
THCudaCheckWarn(cudaEventDestroy(event));
|
||||
block.event_count--;
|
||||
} else {
|
||||
new_events.push_back(*it);
|
||||
}
|
||||
}
|
||||
cuda_events.swap(new_events);
|
||||
|
||||
// all cuda_events have been processed
|
||||
cuda_events.clear();
|
||||
|
||||
// clear list of available blocks
|
||||
available.clear();
|
||||
@ -232,7 +258,7 @@ static void THCCachingHostAllocator_free(void* ctx, void* ptr)
|
||||
allocator.free(ptr);
|
||||
}
|
||||
|
||||
cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, cudaStream_t stream)
|
||||
cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, THCStream *stream)
|
||||
{
|
||||
return allocator.recordEvent(ptr, stream);
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define THC_CACHING_HOST_ALLOCATOR_INC
|
||||
|
||||
#include "THCGeneral.h"
|
||||
#include "THCStream.h"
|
||||
|
||||
//
|
||||
// A caching allocator for CUDA host allocations (pinned memory).
|
||||
@ -22,7 +23,7 @@ THC_API THAllocator THCCachingHostAllocator;
|
||||
|
||||
// Records an event in the specified stream. The allocation 'ptr' will not be
|
||||
// re-used until the event has occured.
|
||||
THC_API cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, cudaStream_t stream);
|
||||
THC_API cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, THCStream *stream);
|
||||
|
||||
// Releases cached pinned memory allocations via cudaHostFree
|
||||
THC_API void THCCachingHostAllocator_emptyCache(void);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "THCTensor.h"
|
||||
#include "THCGeneral.h"
|
||||
#include "THCHalf.h"
|
||||
#include "THCStream.h"
|
||||
|
||||
#include "generic/THCTensorCopy.h"
|
||||
#include "THCGenerateAllTypes.h"
|
||||
|
@ -65,7 +65,6 @@ void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag)
|
||||
/**** creation methods ****/
|
||||
|
||||
static void THCTensor_(rawInit)(THCState *state, THCTensor *self);
|
||||
static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride);
|
||||
|
||||
|
||||
/* Empty init */
|
||||
@ -81,13 +80,13 @@ THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor)
|
||||
{
|
||||
THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
|
||||
THCTensor_(rawInit)(state, self);
|
||||
THCTensor_(rawSet)(state,
|
||||
self,
|
||||
tensor->storage,
|
||||
tensor->storageOffset,
|
||||
tensor->nDimension,
|
||||
tensor->size,
|
||||
tensor->stride);
|
||||
THCTensor_(setStorageNd)(state,
|
||||
self,
|
||||
tensor->storage,
|
||||
tensor->storageOffset,
|
||||
tensor->nDimension,
|
||||
tensor->size,
|
||||
tensor->stride);
|
||||
return self;
|
||||
}
|
||||
|
||||
@ -99,13 +98,13 @@ THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, ptrd
|
||||
THArgCheck(size->size == stride->size, 4, "inconsistent size");
|
||||
|
||||
THCTensor_(rawInit)(state, self);
|
||||
THCTensor_(rawSet)(state,
|
||||
self,
|
||||
storage,
|
||||
storageOffset,
|
||||
(size ? size->size : (stride ? stride->size : 0)),
|
||||
(size ? size->data : NULL),
|
||||
(stride ? stride->data : NULL));
|
||||
THCTensor_(setStorageNd)(state,
|
||||
self,
|
||||
storage,
|
||||
storageOffset,
|
||||
(size ? size->size : (stride ? stride->size : 0)),
|
||||
(size ? size->data : NULL),
|
||||
(stride ? stride->data : NULL));
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -141,7 +140,7 @@ THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, pt
|
||||
|
||||
THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
|
||||
THCTensor_(rawInit)(state, self);
|
||||
THCTensor_(rawSet)(state, self, storage, storageOffset, 4, size, stride);
|
||||
THCTensor_(setStorageNd)(state, self, storage, storageOffset, 4, size, stride);
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -172,7 +171,7 @@ THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0, long size1, lo
|
||||
|
||||
THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
|
||||
THCTensor_(rawInit)(state, self);
|
||||
THCTensor_(rawResize)(state, self, 4, size, NULL);
|
||||
THCTensor_(resizeNd)(state, self, 4, size, NULL);
|
||||
|
||||
return self;
|
||||
}
|
||||
@ -224,6 +223,17 @@ THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimensi
|
||||
return self;
|
||||
}
|
||||
|
||||
THCTensor *THCTensor_(newView)(THCState *state, THCTensor *tensor, THLongStorage *size)
|
||||
{
|
||||
THArgCheck(THCTensor_(isContiguous)(state, tensor), 2, "input is not contiguous");
|
||||
ptrdiff_t numel = THCTensor_(nElement)(state, tensor);
|
||||
THCTensor *self = THCTensor_(new)(state);
|
||||
THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);
|
||||
THCTensor_(setStorage)(state, self, tensor->storage, tensor->storageOffset, inferred_size, NULL);
|
||||
THLongStorage_free(inferred_size);
|
||||
return self;
|
||||
}
|
||||
|
||||
/* Resize */
|
||||
void THCTensor_(resize)(THCState *state, THCTensor *self, THLongStorage *size, THLongStorage *stride)
|
||||
{
|
||||
@ -231,7 +241,7 @@ void THCTensor_(resize)(THCState *state, THCTensor *self, THLongStorage *size, T
|
||||
if(stride)
|
||||
THArgCheck(stride->size == size->size, 3, "invalid stride");
|
||||
|
||||
THCTensor_(rawResize)(state, self, size->size, size->data, (stride ? stride->data : NULL));
|
||||
THCTensor_(resizeNd)(state, self, size->size, size->data, (stride ? stride->data : NULL));
|
||||
}
|
||||
|
||||
void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src)
|
||||
@ -252,7 +262,7 @@ void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src)
|
||||
}
|
||||
|
||||
if(!isSame)
|
||||
THCTensor_(rawResize)(state, self, src->nDimension, src->size, NULL);
|
||||
THCTensor_(resizeNd)(state, self, src->nDimension, src->size, NULL);
|
||||
}
|
||||
|
||||
void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0)
|
||||
@ -274,26 +284,26 @@ void THCTensor_(resize4d)(THCState *state, THCTensor *self, long size0, long siz
|
||||
{
|
||||
long size[4] = {size0, size1, size2, size3};
|
||||
|
||||
THCTensor_(rawResize)(state, self, 4, size, NULL);
|
||||
THCTensor_(resizeNd)(state, self, 4, size, NULL);
|
||||
}
|
||||
|
||||
void THCTensor_(resize5d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3, long size4)
|
||||
{
|
||||
long size[5] = {size0, size1, size2, size3, size4};
|
||||
|
||||
THCTensor_(rawResize)(state, self, 5, size, NULL);
|
||||
THCTensor_(resizeNd)(state, self, 5, size, NULL);
|
||||
}
|
||||
|
||||
void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src)
|
||||
{
|
||||
if(self != src)
|
||||
THCTensor_(rawSet)(state,
|
||||
self,
|
||||
src->storage,
|
||||
src->storageOffset,
|
||||
src->nDimension,
|
||||
src->size,
|
||||
src->stride);
|
||||
THCTensor_(setStorageNd)(state,
|
||||
self,
|
||||
src->storage,
|
||||
src->storageOffset,
|
||||
src->nDimension,
|
||||
src->size,
|
||||
src->stride);
|
||||
}
|
||||
|
||||
void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_)
|
||||
@ -301,13 +311,13 @@ void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storag
|
||||
if(size_ && stride_)
|
||||
THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes");
|
||||
|
||||
THCTensor_(rawSet)(state,
|
||||
self,
|
||||
storage_,
|
||||
storageOffset_,
|
||||
(size_ ? size_->size : (stride_ ? stride_->size : 0)),
|
||||
(size_ ? size_->data : NULL),
|
||||
(stride_ ? stride_->data : NULL));
|
||||
THCTensor_(setStorageNd)(state,
|
||||
self,
|
||||
storage_,
|
||||
storageOffset_,
|
||||
(size_ ? size_->size : (stride_ ? stride_->size : 0)),
|
||||
(size_ ? size_->data : NULL),
|
||||
(stride_ ? stride_->data : NULL));
|
||||
}
|
||||
|
||||
void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
|
||||
@ -353,7 +363,7 @@ void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *stor
|
||||
long size[4] = {size0_, size1_, size2_, size3_};
|
||||
long stride[4] = {stride0_, stride1_, stride2_, stride3_};
|
||||
|
||||
THCTensor_(rawSet)(state, self, storage_, storageOffset_, 4, size, stride);
|
||||
THCTensor_(setStorageNd)(state, self, storage_, storageOffset_, 4, size, stride);
|
||||
}
|
||||
|
||||
|
||||
@ -517,6 +527,33 @@ void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int
|
||||
}
|
||||
}
|
||||
|
||||
void THCTensor_(unsqueeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension)
|
||||
{
|
||||
int d;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 3, "dimension out of range");
|
||||
THArgCheck(src->nDimension > 0, 3, "cannot unsqueeze empty tensor");
|
||||
|
||||
THCTensor_(set)(state, self, src);
|
||||
|
||||
self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1));
|
||||
self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1));
|
||||
self->nDimension++;
|
||||
for (d = self->nDimension-1; d > dimension; d--) {
|
||||
self->size[d] = self->size[d-1];
|
||||
self->stride[d] = self->stride[d-1];
|
||||
}
|
||||
if (dimension+1 < self->nDimension) {
|
||||
self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1];
|
||||
} else {
|
||||
self->stride[dimension] = 1;
|
||||
}
|
||||
self->size[dimension] = 1;
|
||||
}
|
||||
|
||||
int THCTensor_(isContiguous)(THCState *state, const THCTensor *self)
|
||||
{
|
||||
long z = 1;
|
||||
@ -637,7 +674,7 @@ static void THCTensor_(rawInit)(THCState *state, THCTensor *self)
|
||||
self->flag = TH_TENSOR_REFCOUNTED;
|
||||
}
|
||||
|
||||
static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
|
||||
void THCTensor_(setStorageNd)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
|
||||
{
|
||||
/* storage */
|
||||
if(self->storage != storage)
|
||||
@ -660,10 +697,10 @@ static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *sto
|
||||
self->storageOffset = storageOffset;
|
||||
|
||||
/* size and stride */
|
||||
THCTensor_(rawResize)(state, self, nDimension, size, stride);
|
||||
THCTensor_(resizeNd)(state, self, nDimension, size, stride);
|
||||
}
|
||||
|
||||
void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride)
|
||||
void THCTensor_(resizeNd)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride)
|
||||
{
|
||||
int d;
|
||||
int nDimension_;
|
||||
|
@ -66,6 +66,8 @@ THC_API THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int
|
||||
THC_API THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_);
|
||||
THC_API THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_);
|
||||
THC_API THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_);
|
||||
THC_API THCTensor *THCTensor_(newView)(THCState *state, THCTensor *tensor, THLongStorage *size);
|
||||
|
||||
|
||||
THC_API void THCTensor_(resize)(THCState *state, THCTensor *tensor, THLongStorage *size, THLongStorage *stride);
|
||||
THC_API void THCTensor_(resizeAs)(THCState *state, THCTensor *tensor, THCTensor *src);
|
||||
@ -74,10 +76,11 @@ THC_API void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0
|
||||
THC_API void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_);
|
||||
THC_API void THCTensor_(resize4d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_);
|
||||
THC_API void THCTensor_(resize5d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);
|
||||
THC_API void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride);
|
||||
THC_API void THCTensor_(resizeNd)(THCState *state, THCTensor *tensor, int nDimension, long *size, long *stride);
|
||||
|
||||
THC_API void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src);
|
||||
THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
|
||||
THC_API void THCTensor_(setStorageNd)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride);
|
||||
THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
|
||||
long size0_, long stride0_);
|
||||
THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
|
||||
@ -100,6 +103,7 @@ THC_API void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src
|
||||
|
||||
THC_API void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src);
|
||||
THC_API void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension_);
|
||||
THC_API void THCTensor_(unsqueeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension_);
|
||||
|
||||
THC_API int THCTensor_(isContiguous)(THCState *state, const THCTensor *self);
|
||||
THC_API int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor *src);
|
||||
|
@ -118,12 +118,12 @@ void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor
|
||||
THCudaCheck(cudaSetDevice(tensorDevice));
|
||||
}
|
||||
|
||||
cudaStream_t stream = THCState_getCurrentStream(state);
|
||||
THCStream *stream = THCState_getStream(state);
|
||||
THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self),
|
||||
THTensor_(data)(src),
|
||||
THTensor_(nElement)(src) * sizeof(real),
|
||||
cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
stream == NULL ? NULL : stream->stream));
|
||||
|
||||
THCudaCheck(THCCachingHostAllocator_recordEvent(src->storage->data, stream));
|
||||
|
||||
@ -149,12 +149,12 @@ void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor
|
||||
THCudaCheck(cudaSetDevice(tensorDevice));
|
||||
}
|
||||
|
||||
cudaStream_t stream = THCState_getCurrentStream(state);
|
||||
THCStream *stream = THCState_getStream(state);
|
||||
THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self),
|
||||
THCTensor_(data)(state, src),
|
||||
THCTensor_(nElement)(state, src) * sizeof(real),
|
||||
cudaMemcpyDeviceToHost,
|
||||
stream));
|
||||
stream == NULL ? NULL : stream->stream));
|
||||
|
||||
THCudaCheck(THCCachingHostAllocator_recordEvent(src->storage->data, stream));
|
||||
|
||||
|
@ -424,7 +424,7 @@ __global__ void createBatchGemmBuffer(const real** buffer, real* data,
|
||||
const long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < num_batches) {
|
||||
buffer[idx] = data + idx * stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THC_API void
|
||||
|
@ -10,7 +10,7 @@ static void THCTensor_(copyArray1d)(THCState *state, THCTensor *self, real *src,
|
||||
{
|
||||
long size[1] = { k };
|
||||
long stride[1] = { 1 };
|
||||
THCTensor_(rawResize)(state, self, 1, size, stride);
|
||||
THCTensor_(resizeNd)(state, self, 1, size, stride);
|
||||
size_t len = k * sizeof(real);
|
||||
THCudaCheck(cudaMemcpy(self->storage->data + self->storageOffset, src, len, cudaMemcpyHostToDevice));
|
||||
}
|
||||
@ -19,7 +19,7 @@ static void THCTensor_(copyArray2d)(THCState *state, THCTensor *self, real *src,
|
||||
{
|
||||
long size[2] = { m, n };
|
||||
long stride[2] = { 1, m };
|
||||
THCTensor_(rawResize)(state, self, 2, size, stride);
|
||||
THCTensor_(resizeNd)(state, self, 2, size, stride);
|
||||
size_t len = m * n * sizeof(real);
|
||||
THCudaCheck(cudaMemcpy(self->storage->data + self->storageOffset, src, len, cudaMemcpyHostToDevice));
|
||||
}
|
||||
@ -54,7 +54,7 @@ static THCTensor* THCTensor_(newColumnMajor)(THCState *state, THCTensor *self, T
|
||||
long size[2] = { src->size[0], src->size[1] };
|
||||
long stride[2] = { 1, src->size[0] };
|
||||
|
||||
THCTensor_(rawResize)(state, self, 2, size, stride);
|
||||
THCTensor_(resizeNd)(state, self, 2, size, stride);
|
||||
THCTensor_(copy)(state, self, src);
|
||||
return self;
|
||||
}
|
||||
|
30
torch/lib/THCUNN/GatedLinearUnit.cu
Normal file
30
torch/lib/THCUNN/GatedLinearUnit.cu
Normal file
@ -0,0 +1,30 @@
|
||||
#include "THCUNN.h"
|
||||
#include "THCHalf.h"
|
||||
#include "THCHalfAutoNumerics.cuh"
|
||||
#include <THC/THCApply.cuh>
|
||||
#include "common.h"
|
||||
|
||||
template <typename Dtype, typename Acctype>
|
||||
struct gatedLinearCSigMul_functor
|
||||
{
|
||||
__device__ void operator()(Dtype *target, const Dtype *sigTensor, const Dtype *mulTensor) const
|
||||
{
|
||||
const Acctype sigNum = Acctype(1)/(Acctype(1)+ exp(ScalarConvert<Dtype, Acctype>::to(-*sigTensor)));
|
||||
const Dtype mulNum = *mulTensor;
|
||||
*target = ScalarConvert<Acctype, Dtype>::to(sigNum * mulNum);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Dtype, typename Acctype>
|
||||
struct gatedLinearDerivativeSecondHalf_functor
|
||||
{
|
||||
__device__ void operator()(Dtype *target, const Dtype *sigTensor, const Dtype *mulTensor) const
|
||||
{
|
||||
const Acctype sigNum = Acctype(1)/(Acctype(1)+ exp(ScalarConvert<Dtype, Acctype>::to(-*sigTensor)));
|
||||
const Dtype mulNum = *mulTensor;
|
||||
*target *= ScalarConvert<Acctype, Dtype>::to((Acctype(1) - sigNum) * sigNum * mulNum);
|
||||
}
|
||||
};
|
||||
|
||||
#include "generic/GatedLinearUnit.cu"
|
||||
#include "THCGenerateFloatTypes.h"
|
64
torch/lib/THCUNN/generic/GatedLinearUnit.cu
Normal file
64
torch/lib/THCUNN/generic/GatedLinearUnit.cu
Normal file
@ -0,0 +1,64 @@
|
||||
#ifndef THC_GENERIC_FILE
|
||||
#define THC_GENERIC_FILE "generic/GatedLinearUnit.cu"
|
||||
#else
|
||||
|
||||
void THNN_(GatedLinear_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int dim)
|
||||
{
|
||||
THCUNN_assertSameGPU(state, 2, input, output);
|
||||
|
||||
// size output to half of input
|
||||
dim = dim - 1;
|
||||
const long nIn = THCTensor_(size)(state, input, dim);
|
||||
THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
|
||||
const long inputSize = THCTensor_(size)(state, input, dim) / 2;
|
||||
THLongStorage *newSizes = THCTensor_(newSizeOf)(state, input);
|
||||
THLongStorage_set(newSizes, dim, inputSize);
|
||||
THCTensor_(resize)(state, output, newSizes, NULL);
|
||||
|
||||
// halve tensor
|
||||
THCTensor *firstHalf = THCTensor_(newNarrow)(state, input, dim, 0, inputSize);
|
||||
THCTensor *secondHalf = THCTensor_(newNarrow)(state, input, dim, inputSize, inputSize);
|
||||
|
||||
// x = x1:cmul( sigmoid(x2) )
|
||||
THC_pointwiseApply3(state, output, secondHalf, firstHalf, gatedLinearCSigMul_functor<real, accreal>());
|
||||
|
||||
THLongStorage_free(newSizes);
|
||||
THCTensor_(free)(state, firstHalf);
|
||||
THCTensor_(free)(state, secondHalf);
|
||||
}
|
||||
|
||||
void THNN_(GatedLinear_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int dim)
|
||||
{
|
||||
THCUNN_assertSameGPU(state, 2, gradOutput, gradInput);
|
||||
dim = dim - 1;
|
||||
const long nIn = THCTensor_(size)(state, input, dim);
|
||||
THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
|
||||
|
||||
THCTensor_(resizeAs)(state, gradInput, input);
|
||||
const long inputSize = THCTensor_(size)(state, input, dim) / 2;
|
||||
THCTensor *firstHalf = THCTensor_(newNarrow)(state, input, dim, 0, inputSize);
|
||||
THCTensor *secondHalf = THCTensor_(newNarrow)(state, input, dim, inputSize, inputSize);
|
||||
THCTensor *gradInputfirstHalf = THCTensor_(newNarrow)(state, gradInput, dim, 0, inputSize);
|
||||
THCTensor *gradInputsecondHalf = THCTensor_(newNarrow)(state, gradInput, dim, inputSize, inputSize);
|
||||
// first half of derivative
|
||||
THC_pointwiseApply3(state, gradInputfirstHalf, secondHalf, gradOutput, gatedLinearCSigMul_functor<real, accreal>());
|
||||
// second half of derivative
|
||||
THCTensor_(copy)(state, gradInputsecondHalf, firstHalf);
|
||||
THC_pointwiseApply3(state, gradInputsecondHalf, secondHalf, gradOutput, gatedLinearDerivativeSecondHalf_functor<real, accreal>());
|
||||
|
||||
THCTensor_(free)(state, firstHalf);
|
||||
THCTensor_(free)(state, secondHalf);
|
||||
THCTensor_(free)(state, gradInputfirstHalf);
|
||||
THCTensor_(free)(state, gradInputsecondHalf);
|
||||
}
|
||||
|
||||
#endif
|
@ -182,7 +182,6 @@ void THNN_(SpatialFullConvolution_updateOutput)(
|
||||
THCTensor_(data)(state, output_n), n_
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Free
|
||||
|
@ -138,6 +138,19 @@ TH_API void THNN_(HardTanh_updateGradInput)(
|
||||
real max_val,
|
||||
bool inplace);
|
||||
|
||||
TH_API void THNN_(GatedLinear_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int dim);
|
||||
|
||||
TH_API void THNN_(GatedLinear_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int dim);
|
||||
|
||||
TH_API void THNN_(LeakyReLU_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
@ -1010,7 +1023,7 @@ TH_API void THNN_(VolumetricConvolution_updateOutput)(
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
THCTensor *bias, // [OPTIONAL]
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
@ -1031,7 +1044,7 @@ TH_API void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradWeight,
|
||||
THCTensor *gradBias,
|
||||
THCTensor *gradBias, // [OPTIONAL]
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
@ -1043,7 +1056,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)(
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
THCTensor *bias, // [OPTIONAL]
|
||||
THCTensor *columns,
|
||||
THCTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
@ -1068,7 +1081,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)(
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradWeight,
|
||||
THCTensor *gradBias,
|
||||
THCTensor *gradBias, // [OPTIONAL]
|
||||
THCTensor *columns,
|
||||
THCTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
@ -1105,7 +1118,7 @@ TH_API void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
THCTensor *bias, // [OPTIONAL]
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
@ -1129,7 +1142,7 @@ TH_API void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradWeight,
|
||||
THCTensor *gradBias,
|
||||
THCTensor *gradBias, // [OPTIONAL]
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
|
@ -178,22 +178,26 @@ void THNN_(VolumetricConvolution_updateOutput)(
|
||||
long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemm(
|
||||
#elif defined(THC_REAL_IS_HALF)
|
||||
THCudaBlas_Hgemm(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, ones), k_,
|
||||
THCTensor_(data)(state, bias), k_,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, output_n), n_
|
||||
);
|
||||
if (bias) {
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemm(
|
||||
#elif defined(THC_REAL_IS_HALF)
|
||||
THCudaBlas_Hgemm(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, ones), k_,
|
||||
THCTensor_(data)(state, bias), k_,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, output_n), n_
|
||||
);
|
||||
} else {
|
||||
THCTensor_(zero)(state, output_n);
|
||||
}
|
||||
|
||||
// Extract columns:
|
||||
im3d2col(
|
||||
@ -460,36 +464,38 @@ void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
long k_ = outputDepth * outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemv(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemv(
|
||||
#endif
|
||||
state,
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THCTensor_(data)(state, gradOutput_n), k_,
|
||||
THCTensor_(data)(state, ones), 1,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), 1
|
||||
);
|
||||
#endif
|
||||
#ifdef THC_REAL_IS_HALF
|
||||
THCudaBlas_Hgemm(
|
||||
state,
|
||||
't', 'n',
|
||||
m_, 1, k_,
|
||||
scale,
|
||||
THCTensor_(data)(state, gradOutput_n), k_,
|
||||
THCTensor_(data)(state, ones), k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), m_
|
||||
);
|
||||
#endif
|
||||
if (gradBias) {
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemv(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemv(
|
||||
#endif
|
||||
state,
|
||||
't',
|
||||
k_, m_,
|
||||
scale,
|
||||
THCTensor_(data)(state, gradOutput_n), k_,
|
||||
THCTensor_(data)(state, ones), 1,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), 1
|
||||
);
|
||||
#endif
|
||||
#ifdef THC_REAL_IS_HALF
|
||||
THCudaBlas_Hgemm(
|
||||
state,
|
||||
't', 'n',
|
||||
m_, 1, k_,
|
||||
scale,
|
||||
THCTensor_(data)(state, gradOutput_n), k_,
|
||||
THCTensor_(data)(state, ones), k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), m_
|
||||
);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Free
|
||||
THCTensor_(free)(state, input_n);
|
||||
THCTensor_(free)(state, gradOutput_n);
|
||||
|
@ -3,37 +3,37 @@
|
||||
#else
|
||||
|
||||
static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH) {
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH) {
|
||||
THCUNN_argCheck(state, input->nDimension == 4 || input->nDimension == 5, 2, input,
|
||||
"4D or 5D (batch mode) tensor expected for input, but got: %s");
|
||||
"4D or 5D (batch mode) tensor expected for input, but got: %s");
|
||||
// number of input & output planes and kernel size is indirectly defined by the weight tensor
|
||||
THCUNN_argCheck(state, weight->nDimension == 5, 4, weight,
|
||||
"5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
|
||||
"expected for weight, but got: %s");
|
||||
"5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
|
||||
"expected for weight, but got: %s");
|
||||
THArgCheck(THCTensor_(isContiguous)(state, weight), 4,
|
||||
"weight tensor has to be contiguous");
|
||||
"weight tensor has to be contiguous");
|
||||
THArgCheck(!bias || THCTensor_(isContiguous)(state, bias), 5,
|
||||
"bias tensor has to be contiguous");
|
||||
"bias tensor has to be contiguous");
|
||||
THArgCheck(dT > 0 && dW > 0 && dH > 0, 8,
|
||||
"stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
|
||||
"stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
|
||||
THArgCheck(adjT < dT && adjW < dW && adjH < dH, 14,
|
||||
"output adjustment must be smaller than stride, but got "
|
||||
"adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d",
|
||||
adjT, adjH, adjW, dT, dH, dW);
|
||||
"output adjustment must be smaller than stride, but got "
|
||||
"adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d",
|
||||
adjT, adjH, adjW, dT, dH, dW);
|
||||
|
||||
int ndim = input->nDimension;
|
||||
int nInputPlane = THCTensor_(size)(state, weight, 0);
|
||||
int nOutputPlane = THCTensor_(size)(state, weight, 1);
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
|
||||
if (bias != NULL) {
|
||||
THCUNN_check_dim_size(state, bias, 1, 0, weight->size[1]);
|
||||
@ -60,7 +60,7 @@ static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
|
||||
if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
|
||||
THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
|
||||
nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
|
||||
nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
|
||||
|
||||
THCUNN_check_dim_size(state, input, ndim, dimf, nInputPlane);
|
||||
if (gradOutput != NULL) {
|
||||
@ -72,16 +72,16 @@ static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
}
|
||||
|
||||
void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH)
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
THCTensor *weight,
|
||||
THCTensor *bias,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH)
|
||||
{
|
||||
|
||||
THCTensor *columns = finput;
|
||||
@ -89,16 +89,16 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
|
||||
int nInputPlane = THCTensor_(size)(state, weight, 0);
|
||||
int nOutputPlane = THCTensor_(size)(state, weight, 1);
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
|
||||
THCUNN_assertSameGPU(state, 6, input, output, weight,
|
||||
bias, columns, ones);
|
||||
bias, columns, ones);
|
||||
THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
state, input, NULL, weight, bias,
|
||||
dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
state, input, NULL, weight, bias,
|
||||
dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
|
||||
input = THCTensor_(newContiguous)(state, input);
|
||||
|
||||
@ -158,14 +158,14 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, input_n), n,
|
||||
THCTensor_(data)(state, weight), m,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, columns), n
|
||||
state,
|
||||
'n', 't',
|
||||
n, m, k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, input_n), n,
|
||||
THCTensor_(data)(state, weight), m,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, columns), n
|
||||
);
|
||||
|
||||
// Unpack columns back into input:
|
||||
@ -185,13 +185,14 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
long k_ = 1;
|
||||
|
||||
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemm(
|
||||
#elif defined(THC_REAL_IS_HALF)
|
||||
THCudaBlas_Hgemm(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
if (bias) {
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemm(
|
||||
#elif defined(THC_REAL_IS_HALF)
|
||||
THCudaBlas_Hgemm(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
't', 'n',
|
||||
n_, m_, k_,
|
||||
@ -200,8 +201,8 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
THCTensor_(data)(state, bias), k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, output_n), n_
|
||||
);
|
||||
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
@ -218,31 +219,31 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
}
|
||||
|
||||
void THNN_(VolumetricFullConvolution_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
THCTensor *weight,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH)
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
THCTensor *weight,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH)
|
||||
{
|
||||
THCTensor *gradColumns = finput;
|
||||
|
||||
int nInputPlane = THCTensor_(size)(state, weight, 0);
|
||||
int nOutputPlane = THCTensor_(size)(state, weight, 1);
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
const int kT = (int)weight->size[2];
|
||||
const int kH = (int)weight->size[3];
|
||||
const int kW = (int)weight->size[4];
|
||||
|
||||
THCUNN_assertSameGPU(state, 5, input, gradOutput, weight,
|
||||
gradColumns, gradInput);
|
||||
gradColumns, gradInput);
|
||||
THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
state, input, gradOutput, weight, NULL,
|
||||
dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
state, input, gradOutput, weight, NULL,
|
||||
dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
|
||||
input = THCTensor_(newContiguous)(state, input);
|
||||
gradOutput = THCTensor_(newContiguous)(state, gradOutput);
|
||||
@ -305,14 +306,14 @@ void THNN_(VolumetricFullConvolution_updateGradInput)(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradColumns), n,
|
||||
THCTensor_(data)(state, weight), k,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, gradInput_n), n
|
||||
state,
|
||||
'n', 'n',
|
||||
n, m, k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradColumns), n,
|
||||
THCTensor_(data)(state, weight), k,
|
||||
ScalarConvert<int, real>::to(0),
|
||||
THCTensor_(data)(state, gradInput_n), n
|
||||
);
|
||||
}
|
||||
|
||||
@ -334,33 +335,33 @@ void THNN_(VolumetricFullConvolution_updateGradInput)(
|
||||
|
||||
|
||||
void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradWeight,
|
||||
THCTensor *gradBias,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH,
|
||||
real scale)
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradWeight,
|
||||
THCTensor *gradBias,
|
||||
THCTensor *finput,
|
||||
THCTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
int padT, int padW, int padH,
|
||||
int adjT, int adjW, int adjH,
|
||||
real scale)
|
||||
{
|
||||
THCTensor *columns = finput;
|
||||
THCTensor *ones = fgradInput;
|
||||
|
||||
int nInputPlane = THCTensor_(size)(state, gradWeight, 0);
|
||||
int nOutputPlane = THCTensor_(size)(state, gradWeight, 1);
|
||||
const int kT = (int)gradWeight->size[2];
|
||||
const int kH = (int)gradWeight->size[3];
|
||||
const int kW = (int)gradWeight->size[4];
|
||||
const int kT = (int)gradWeight->size[2];
|
||||
const int kH = (int)gradWeight->size[3];
|
||||
const int kW = (int)gradWeight->size[4];
|
||||
|
||||
THCUNN_assertSameGPU(state, 6, input, gradOutput, gradWeight,
|
||||
gradBias, columns, ones);
|
||||
gradBias, columns, ones);
|
||||
THNN_(VolumetricFullConvolution_shapeCheck)(
|
||||
state, input, gradOutput, gradWeight,
|
||||
gradBias, dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
state, input, gradOutput, gradWeight,
|
||||
gradBias, dT, dW, dH, padT, padW, padH,
|
||||
adjT, adjW, adjH);
|
||||
|
||||
input = THCTensor_(newContiguous)(state, input);
|
||||
gradOutput = THCTensor_(newContiguous)(state, gradOutput);
|
||||
@ -426,14 +427,14 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemm(
|
||||
#endif
|
||||
state,
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THCTensor_(data)(state, columns), k,
|
||||
THCTensor_(data)(state, input_n), k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradWeight), n
|
||||
state,
|
||||
't', 'n',
|
||||
n, m, k,
|
||||
scale,
|
||||
THCTensor_(data)(state, columns), k,
|
||||
THCTensor_(data)(state, input_n), k,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradWeight), n
|
||||
);
|
||||
|
||||
// Do Bias:
|
||||
@ -443,12 +444,13 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
long k_ = outputDepth * outputHeight * outputWidth;
|
||||
|
||||
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemv(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemv(
|
||||
#endif
|
||||
if (gradBias) {
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)
|
||||
#ifdef THC_REAL_IS_FLOAT
|
||||
THCudaBlas_Sgemv(
|
||||
#elif defined(THC_REAL_IS_DOUBLE)
|
||||
THCudaBlas_Dgemv(
|
||||
#endif
|
||||
state,
|
||||
't',
|
||||
k_, m_,
|
||||
@ -457,10 +459,10 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THCTensor_(data)(state, ones), 1,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), 1
|
||||
);
|
||||
#endif
|
||||
#ifdef THC_REAL_IS_HALF
|
||||
THCudaBlas_Hgemm(
|
||||
);
|
||||
#endif
|
||||
#ifdef THC_REAL_IS_HALF
|
||||
THCudaBlas_Hgemm(
|
||||
state,
|
||||
't', 'n',
|
||||
m_, 1, k_,
|
||||
@ -469,8 +471,9 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THCTensor_(data)(state, ones), k_,
|
||||
ScalarConvert<int, real>::to(1),
|
||||
THCTensor_(data)(state, gradBias), m_
|
||||
);
|
||||
#endif
|
||||
);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Free
|
||||
|
@ -1072,7 +1072,7 @@ TH_API void THNN_(VolumetricConvolution_updateOutput)(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *bias, // [OPTIONAL]
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
@ -1091,7 +1091,7 @@ TH_API void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *gradBias, // [OPTIONAL]
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput,
|
||||
int dT, int dW, int dH,
|
||||
@ -1103,7 +1103,7 @@ TH_API void THNN_(VolumetricConvolutionMM_updateOutput)(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *bias, // [OPTIONAL]
|
||||
THTensor *finput,
|
||||
int kT, int kW, int kH,
|
||||
int dT, int dW, int dH,
|
||||
@ -1124,7 +1124,7 @@ TH_API void THNN_(VolumetricConvolutionMM_accGradParameters)(
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *gradBias, // [OPTIONAL]
|
||||
THTensor *finput,
|
||||
int kT, int kW, int kH,
|
||||
int dT, int dW, int dH,
|
||||
@ -1136,7 +1136,7 @@ TH_API void THNN_(VolumetricFullConvolution_updateOutput)(
|
||||
THTensor *input, // 4D or 5D (batch) tensor
|
||||
THTensor *output, // [OUT] volumetric convolution output
|
||||
THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
|
||||
THTensor *bias, // gradBias tensor (nOutputPlane)
|
||||
THTensor *bias, // [OPTIONAL] gradBias tensor (nOutputPlane)
|
||||
THTensor *finput, // [OUT] internal columns buffer
|
||||
THTensor *fgradInput, // [OUT] internal ones buffer
|
||||
int dT, int dW, int dH, // stride of the convolution
|
||||
@ -1158,7 +1158,7 @@ TH_API void THNN_(VolumetricFullConvolution_accGradParameters)(
|
||||
THTensor *input, // 4D or 5D (batch) tensor
|
||||
THTensor *gradOutput, // gradient w.r.t. output
|
||||
THTensor *gradWeight, // gradWeight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
|
||||
THTensor *gradBias, // gradBias tensor (nOutputPlane)
|
||||
THTensor *gradBias, // [OPTIONAL] gradBias tensor (nOutputPlane)
|
||||
THTensor *finput, // internal columns buffer
|
||||
THTensor *fgradInput, // internal ones buffer
|
||||
int dT, int dW, int dH, // stride
|
||||
@ -1171,7 +1171,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)(
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *bias, // [OPTIONAL]
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
@ -1196,7 +1196,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)(
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradWeight,
|
||||
THTensor *gradBias,
|
||||
THTensor *gradBias, // [OPTIONAL]
|
||||
THTensor *columns,
|
||||
THTensor *ones,
|
||||
int kT, int kW, int kH,
|
||||
|
@ -4,9 +4,13 @@
|
||||
|
||||
static inline void THNN_(TemporalRowConvolution_shapeCheck)(
|
||||
THNNState *state,
|
||||
THTensor *input, THTensor *gradOutput,
|
||||
THTensor *weight, THTensor *bias,
|
||||
int kW, int dW, int padW) {
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
int kW,
|
||||
int dW,
|
||||
int padW) {
|
||||
|
||||
THArgCheck(kW > 0, 5,
|
||||
"kernel size should be greater than zero, but got kW: %d", kW);
|
||||
@ -64,12 +68,12 @@ static void THNN_(unfolded_acc_row)(
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
|
||||
#pragma omp parallel for private(c)
|
||||
for (c = 0; c < inputFrameSize; ++c) {
|
||||
// #pragma omp parallel for private(c)
|
||||
for (c = 0; c < inputFrameSize; c++) {
|
||||
size_t kw, x;
|
||||
long long ix = 0;
|
||||
|
||||
for (kw = 0; kw < kW; ++kw) {
|
||||
for (kw = 0; kw < kW; kw++) {
|
||||
real *src = finput_data
|
||||
+ c * (kW * nOutputFrame)
|
||||
+ kw * (nOutputFrame);
|
||||
@ -79,7 +83,7 @@ static void THNN_(unfolded_acc_row)(
|
||||
if (dW == 1) {
|
||||
THVector_(add)(dst + (size_t)(ix), src, 1, nOutputFrame);
|
||||
} else {
|
||||
for (x = 0; x < nOutputFrame; ++x) {
|
||||
for (x = 0; x < nOutputFrame; x++) {
|
||||
THVector_(add)(dst + (size_t)(ix + x * dW),
|
||||
src + (size_t)(x), 1, 1);
|
||||
}
|
||||
@ -102,8 +106,8 @@ static void THNN_(unfolded_copy_row)(
|
||||
real *input_data = THTensor_(data)(input);
|
||||
real *finput_data = THTensor_(data)(finput);
|
||||
|
||||
#pragma omp parallel for private(k)
|
||||
for (k = 0; k < inputFrameSize * kW; ++k) {
|
||||
// #pragma omp parallel for private(k)
|
||||
for (k = 0; k < inputFrameSize * kW; k++) {
|
||||
size_t c = k / kW;
|
||||
size_t rest = k % kW;
|
||||
size_t kw = rest % kW;
|
||||
@ -116,7 +120,7 @@ static void THNN_(unfolded_copy_row)(
|
||||
if (dW == 1) {
|
||||
memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame));
|
||||
} else {
|
||||
for (x = 0; x < nOutputFrame; ++x) {
|
||||
for (x = 0; x < nOutputFrame; x++) {
|
||||
memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW),
|
||||
sizeof(real) * 1);
|
||||
}
|
||||
@ -138,34 +142,31 @@ static void THNN_(TemporalRowConvolution_updateOutput_frame)(
|
||||
long nOutputFrame) {
|
||||
|
||||
long i;
|
||||
THTensor *output3d;
|
||||
|
||||
THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
|
||||
inputFrameSize, nInputFrame, nOutputFrame);
|
||||
|
||||
output3d = THTensor_(newWithStorage3d)(
|
||||
THTensor *output3d = THTensor_(newWithStorage3d)(
|
||||
output->storage, output->storageOffset,
|
||||
inputFrameSize, -1,
|
||||
1, -1,
|
||||
nOutputFrame, -1);
|
||||
|
||||
THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
|
||||
inputFrameSize, nInputFrame, nOutputFrame);
|
||||
|
||||
THTensor_(zero)(output);
|
||||
|
||||
if (bias != NULL) {
|
||||
for (i = 0; i < inputFrameSize; ++i)
|
||||
for (i = 0; i < inputFrameSize; i++)
|
||||
THVector_(fill)
|
||||
(output->storage->data + output->storageOffset
|
||||
+ output->stride[0] * i,
|
||||
THTensor_(get1d)(bias, i), nOutputFrame);
|
||||
} else {
|
||||
THTensor_(zero)(output);
|
||||
}
|
||||
|
||||
|
||||
THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);
|
||||
|
||||
THTensor_(free)(output3d);
|
||||
}
|
||||
|
||||
|
||||
void THNN_(TemporalRowConvolution_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
@ -173,7 +174,7 @@ void THNN_(TemporalRowConvolution_updateOutput)(
|
||||
THTensor *weight,
|
||||
THTensor *bias,
|
||||
THTensor *finput,
|
||||
THTensor *fgradInput, // unused here but needed for Cuda
|
||||
THTensor *fgradInput, // unused here but needed for Cuda
|
||||
int kW,
|
||||
int dW,
|
||||
int padW,
|
||||
@ -198,11 +199,11 @@ void THNN_(TemporalRowConvolution_updateOutput)(
|
||||
|
||||
if (ndim == 2) { /* non-batch mode */
|
||||
|
||||
THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame);
|
||||
THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
|
||||
|
||||
THTensor_(zero)(finput);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
THNN_(TemporalRowConvolution_updateOutput_frame)
|
||||
(input, output, weight, bias, finput,
|
||||
@ -213,14 +214,14 @@ void THNN_(TemporalRowConvolution_updateOutput)(
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
|
||||
THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame);
|
||||
|
||||
THTensor_(zero)(finput);
|
||||
THTensor_(zero)(output);
|
||||
|
||||
THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
|
||||
THTensor_(zero)(finput);
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for (t = 0; t < T; ++t) {
|
||||
for (t = 0; t < T; t++) {
|
||||
THTensor *input_t = THTensor_(newSelect)(input, 0, t);
|
||||
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
@ -244,10 +245,16 @@ void THNN_(TemporalRowConvolution_updateOutput)(
|
||||
}
|
||||
|
||||
static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
|
||||
THTensor *gradInput, THTensor *gradOutput,
|
||||
THTensor *weight, THTensor *fgradInput,
|
||||
int kW, int dW, int padW,
|
||||
long inputFrameSize, long nInputFrame, long nOutputFrame) {
|
||||
THTensor *gradInput,
|
||||
THTensor *gradOutput,
|
||||
THTensor *weight,
|
||||
THTensor *fgradInput,
|
||||
int kW,
|
||||
int dW,
|
||||
int padW,
|
||||
long inputFrameSize,
|
||||
long nInputFrame,
|
||||
long nOutputFrame) {
|
||||
|
||||
THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
|
||||
gradOutput->storage, gradOutput->storageOffset,
|
||||
@ -259,7 +266,6 @@ static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
|
||||
// gradOutput3d: inputFrameSize x 1 x nOutputFrame
|
||||
THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
|
||||
// fgradInput: inputFrameSize x kW x nOutputFrame
|
||||
|
||||
THTensor_(free)(gradOutput3d);
|
||||
|
||||
THTensor_(zero)(gradInput);
|
||||
@ -269,7 +275,6 @@ static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
|
||||
inputFrameSize, nInputFrame, nOutputFrame);
|
||||
}
|
||||
|
||||
|
||||
void THNN_(TemporalRowConvolution_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
@ -283,8 +288,6 @@ void THNN_(TemporalRowConvolution_updateGradInput)(
|
||||
int padW,
|
||||
bool featFirst) {
|
||||
|
||||
|
||||
|
||||
int ndim = input->nDimension;
|
||||
|
||||
THTensor *tinput, *tgradOutput;
|
||||
@ -308,11 +311,11 @@ void THNN_(TemporalRowConvolution_updateGradInput)(
|
||||
long nInputFrame = input->size[ndim - 1];
|
||||
long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
|
||||
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
THTensor_(resizeAs)(fgradInput, finput);
|
||||
THTensor_(resizeAs)(gradInput, input);
|
||||
|
||||
THTensor_(zero)(fgradInput);
|
||||
THTensor_(zero)(gradInput);
|
||||
|
||||
THTensor_(transpose)(weight, weight, 1, 2);
|
||||
|
||||
@ -326,7 +329,7 @@ void THNN_(TemporalRowConvolution_updateGradInput)(
|
||||
long t;
|
||||
|
||||
#pragma omp parallel for private(t)
|
||||
for (t = 0; t < T; ++t) {
|
||||
for (t = 0; t < T; t++) {
|
||||
|
||||
THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
@ -370,22 +373,20 @@ static void THNN_(TemporalRowConvolution_accGradParameters_frame)(
|
||||
gradOutput->size[1], -1);
|
||||
|
||||
THTensor_(transpose)(finput, finput, 1, 2);
|
||||
|
||||
// gradOutput3d: inputFrameSize x 1 x nOutputFrame
|
||||
// finput: inputFrameSize x nOutputFrame x kW
|
||||
THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, finput);
|
||||
// gradWeight: inputFrameSize x 1 x kW
|
||||
|
||||
THTensor_(transpose)(finput, finput, 1, 2);
|
||||
|
||||
if (gradBias != NULL) {
|
||||
for (i = 0; i < gradBias->size[0]; ++i) {
|
||||
for (i = 0; i < gradBias->size[0]; i++) {
|
||||
long k;
|
||||
real sum = 0;
|
||||
real *data = gradOutput3d->storage->data
|
||||
+ gradOutput3d->storageOffset
|
||||
+ i * gradOutput3d->stride[0];
|
||||
for (k = 0; k < gradOutput3d->size[2]; ++k) {
|
||||
for (k = 0; k < gradOutput3d->size[2]; k++) {
|
||||
sum += data[k];
|
||||
}
|
||||
(gradBias->storage->data + gradBias->storageOffset)[i]
|
||||
@ -411,8 +412,6 @@ void THNN_(TemporalRowConvolution_accGradParameters)(
|
||||
bool featFirst,
|
||||
real scale) {
|
||||
|
||||
|
||||
|
||||
int ndim = input->nDimension;
|
||||
|
||||
THTensor *tinput, *tgradOutput;
|
||||
@ -442,7 +441,7 @@ void THNN_(TemporalRowConvolution_accGradParameters)(
|
||||
long T = input->size[0];
|
||||
long t;
|
||||
|
||||
for (t = 0; t < T; ++t) {
|
||||
for (t = 0; t < T; t++) {
|
||||
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
|
||||
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
|
||||
|
||||
|
@ -50,10 +50,14 @@ void THNN_(VolumetricConvolution_updateOutput)(
|
||||
THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
|
||||
|
||||
/* add bias */
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, output, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
if (bias) {
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, output, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
}
|
||||
} else {
|
||||
THTensor_(zero)(output);
|
||||
}
|
||||
|
||||
/* do convolutions */
|
||||
@ -73,10 +77,14 @@ void THNN_(VolumetricConvolution_updateOutput)(
|
||||
THTensor_(select)(outb, output, 0, j);
|
||||
|
||||
/* add bias */
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, outb, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
if (bias) {
|
||||
for (i = 0; i < bias->size[0]; i++)
|
||||
{
|
||||
THTensor_(select)(outn, outb, 0, i);
|
||||
THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
|
||||
}
|
||||
} else {
|
||||
THTensor_(zero)(outb);
|
||||
}
|
||||
|
||||
/* do convolutions */
|
||||
@ -179,10 +187,11 @@ void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
"expected for gradWeight, but got: %s");
|
||||
|
||||
int nOutputPlane = (int)gradWeight->size[0];
|
||||
|
||||
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
|
||||
"gradBias tensor has wrong size"
|
||||
);
|
||||
if (gradBias) {
|
||||
THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
|
||||
"gradBias tensor has wrong size"
|
||||
);
|
||||
}
|
||||
|
||||
long k;
|
||||
real *gradBias_data;
|
||||
@ -200,14 +209,16 @@ void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
if (gradOutput->nDimension == 4) /* non-batch mode */
|
||||
{
|
||||
/* gradient to bias */
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, gradOutput, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
if (gradBias) {
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, gradOutput, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
|
||||
/* gradient to kernels */
|
||||
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
|
||||
@ -226,14 +237,16 @@ void THNN_(VolumetricConvolution_accGradParameters)(
|
||||
THTensor_(select)(goutb, gradOutput, 0, j);
|
||||
|
||||
/* gradient to bias */
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, goutb, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
if (gradBias) {
|
||||
gradBias_data = THTensor_(data)(gradBias);
|
||||
gradOutSlice = THTensor_(new)();
|
||||
for (k = 0; k < nOutputPlane; k++)
|
||||
{
|
||||
THTensor_(select)(gradOutSlice, goutb, 0, k);
|
||||
gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
}
|
||||
THTensor_(free)(gradOutSlice);
|
||||
|
||||
/* gradient to kernels */
|
||||
THTensor_(conv3DRevger)(gradWeight, 1.0, scale, inpb, goutb, dT, dH, dW);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user