mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-24 15:44:58 +08:00
Compare commits
81 Commits
profiler-e
...
v0.3.0
Author | SHA1 | Date | |
---|---|---|---|
af3964a872 | |||
1645546aa9 | |||
350fad8a22 | |||
565d183042 | |||
2ebda372f6 | |||
28b846c486 | |||
9622eaa6fa | |||
db8154df32 | |||
b6eeea343d | |||
1fe9991554 | |||
00118024f3 | |||
87edf5a349 | |||
20972878cc | |||
0d1128d25c | |||
81dc60493d | |||
b18df1cedf | |||
3976d77509 | |||
09c83673bf | |||
5b9a8f918e | |||
f20fb2c1a1 | |||
4e00120117 | |||
2b3f35daea | |||
c580437342 | |||
455e788fe6 | |||
c980fb359b | |||
bae45bb106 | |||
34557d80f4 | |||
1e77879b2a | |||
ff52d424b2 | |||
4b7aa13b30 | |||
e1f2d0916e | |||
4b5b7e53f6 | |||
db66fa9436 | |||
392c89ab6a | |||
cddf501fc5 | |||
d0907d2c34 | |||
448a85a8e0 | |||
ea3138fd09 | |||
b89c96fe58 | |||
088f47bb89 | |||
ddb3804f87 | |||
a896311d06 | |||
937b634b5d | |||
004dfdc7cc | |||
f8aa5e2ed7 | |||
8a49309f81 | |||
14de24d89c | |||
c7cccc250e | |||
1f694e9a6e | |||
1108bced80 | |||
c36d452224 | |||
11955b86d2 | |||
9a6788202b | |||
d58bad4073 | |||
f95e252984 | |||
b49f0f8154 | |||
269c25267b | |||
fde471ee2a | |||
eb24d2ff6e | |||
f768068c3b | |||
c456451915 | |||
f282d1dc7c | |||
2a3cae0f3e | |||
3d9630abc2 | |||
da7a5147db | |||
5df8e582cd | |||
5dff261598 | |||
aa0c8920af | |||
a3b658bf3b | |||
94e89f3911 | |||
f0956ad9ec | |||
452ea78f43 | |||
3d5d66868e | |||
cf373e25e2 | |||
91d764c781 | |||
524235bb71 | |||
e035fa028b | |||
58a928c3b9 | |||
4f1eefa8ad | |||
4251c151e3 | |||
c0931a3a4d |
@ -202,9 +202,9 @@ MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
|
||||
Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual
|
||||
```
|
||||
docker build -t pytorch .
|
||||
|
||||
```
|
||||
Dockerfile to build with cuda 9 and cudnn v7 (with Volta support) is in tools/docker, the build command is
|
||||
|
||||
```
|
||||
docker build -t pytorch_cuda9 -f tools/docker/Dockerfile9 .
|
||||
```
|
||||
Alternatively, if you want to use a runtime image, you can use the pre-built one from Docker Hub and run with nvidia-docker:
|
||||
|
@ -56,6 +56,12 @@ gradients are correct.
|
||||
Profiler
|
||||
--------
|
||||
|
||||
Autograd includes a profiler that lets you inspect the cost of different
|
||||
operators inside your model - both on the CPU and GPU. There are two modes
|
||||
implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`.
|
||||
and nvprof based (registers both CPU and GPU activity) using
|
||||
:class:`~torch.autograd.profiler.emit_nvtx`.
|
||||
|
||||
.. autoclass:: torch.autograd.profiler.profile
|
||||
:members:
|
||||
|
||||
|
@ -37,6 +37,10 @@ Streams and events
|
||||
.. autoclass:: Event
|
||||
:members:
|
||||
|
||||
Memory management
|
||||
-----------------
|
||||
.. autofunction:: empty_cache
|
||||
|
||||
NVIDIA Tools Extension (NVTX)
|
||||
-----------------------------
|
||||
|
||||
|
@ -19,10 +19,10 @@ Probability distributions - torch.distributions
|
||||
.. autoclass:: Bernoulli
|
||||
:members:
|
||||
|
||||
:hidden:`Multinomial`
|
||||
:hidden:`Categorical`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: Multinomial
|
||||
.. autoclass:: Categorical
|
||||
:members:
|
||||
|
||||
:hidden:`Normal`
|
||||
|
@ -3,18 +3,19 @@
|
||||
CUDA semantics
|
||||
==============
|
||||
|
||||
:mod:`torch.cuda` keeps track of currently selected GPU, and all CUDA tensors
|
||||
you allocate will be created on it. The selected device can be changed with a
|
||||
:mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of
|
||||
the currently selected GPU, and all CUDA tensors you allocate will by default be
|
||||
created on that device. The selected device can be changed with a
|
||||
:any:`torch.cuda.device` context manager.
|
||||
|
||||
However, once a tensor is allocated, you can do operations on it irrespectively
|
||||
of your selected device, and the results will be always placed in on the same
|
||||
However, once a tensor is allocated, you can do operations on it irrespective
|
||||
of the selected device, and the results will be always placed in on the same
|
||||
device as the tensor.
|
||||
|
||||
Cross-GPU operations are not allowed by default, with the only exception of
|
||||
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory accesses,
|
||||
any attempts to launch ops on tensors spread across different devices will
|
||||
raise an error.
|
||||
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory access, any
|
||||
attempts to launch ops on tensors spread across different devices will raise an
|
||||
error.
|
||||
|
||||
Below you can find a small example showcasing this::
|
||||
|
||||
@ -41,6 +42,16 @@ Below you can find a small example showcasing this::
|
||||
d = torch.randn(2).cuda(2)
|
||||
# d.get_device() == 2
|
||||
|
||||
Memory management
|
||||
-----------------
|
||||
|
||||
PyTorch use a caching memory allocator to speed up memory allocations. This
|
||||
allows fast memory deallocation without device synchronizations. However, the
|
||||
unused memory managed by the allocator will still show as if used in
|
||||
`nvidia-smi`. Calling :meth:`~torch.cuda.empty_cache` can release all unused
|
||||
cached memory from PyTorch so that those can be used by other GPU applications.
|
||||
|
||||
|
||||
Best practices
|
||||
--------------
|
||||
|
||||
@ -49,13 +60,13 @@ Device-agnostic code
|
||||
|
||||
Due to the structure of PyTorch, you may need to explicitly write
|
||||
device-agnostic (CPU or GPU) code; an example may be creating a new tensor as
|
||||
the initial hidden state of a recurrent neural network.
|
||||
the initial hidden state of a recurrent neural network.
|
||||
|
||||
The first step is to determine whether the GPU should be used or not. A common
|
||||
pattern is to use Python's `argparse` module to read in user arguments, and
|
||||
pattern is to use Python's ``argparse`` module to read in user arguments, and
|
||||
have a flag that can be used to disable CUDA, in combination with
|
||||
`torch.cuda.is_available()`. In the following, `args.cuda` results in a flag
|
||||
that can be used to cast tensors and modules to CUDA if desired::
|
||||
:meth:`~torch.cuda.is_available`. In the following, ``args.cuda`` results in a
|
||||
flag that can be used to cast tensors and modules to CUDA if desired::
|
||||
|
||||
import argparse
|
||||
import torch
|
||||
@ -66,7 +77,7 @@ that can be used to cast tensors and modules to CUDA if desired::
|
||||
args = parser.parse_args()
|
||||
args.cuda = not args.disable_cuda and torch.cuda.is_available()
|
||||
|
||||
If modules or tensors need to be sent to the GPU, `args.cuda` can be used as
|
||||
If modules or tensors need to be sent to the GPU, ``args.cuda`` can be used as
|
||||
follows::
|
||||
|
||||
x = torch.Tensor(8, 42)
|
||||
@ -84,9 +95,9 @@ dataloader would be as follows::
|
||||
x = Variable(x.type(dtype))
|
||||
|
||||
When working with multiple GPUs on a system, you can use the
|
||||
`CUDA_VISIBLE_DEVICES` environment flag to manage which GPUs are available to
|
||||
PyTorch. To manually control which GPU a tensor is created on, the best practice
|
||||
is to use the `torch.cuda.device()` context manager::
|
||||
``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to
|
||||
PyTorch. As mentioned above, to manually control which GPU a tensor is created
|
||||
on, the best practice is to use a :any:`torch.cuda.device` context manager::
|
||||
|
||||
print("Outside device is 0") # On device 0 (default in most scenarios)
|
||||
with torch.cuda.device(1):
|
||||
@ -94,9 +105,10 @@ is to use the `torch.cuda.device()` context manager::
|
||||
print("Outside device is still 0") # On device 0
|
||||
|
||||
If you have a tensor and would like to create a new tensor of the same type on
|
||||
the same device, then you can use the `.new()` function, which acts the same as
|
||||
a normal tensor constructor. Whilst the previously mentioned methods depend on
|
||||
the current GPU context, `new()` preserves the device of the original tensor.
|
||||
the same device, then you can use the :meth:`~torch.Tensor.new` method, which
|
||||
acts the same as a normal tensor constructor. Whilst the previously mentioned
|
||||
methods depend on the current GPU context, :meth:`~torch.Tensor.new` preserves
|
||||
the device of the original tensor.
|
||||
|
||||
This is the recommended practice when creating modules in which new
|
||||
tensors/variables need to be created internally during the forward pass::
|
||||
@ -110,8 +122,9 @@ tensors/variables need to be created internally during the forward pass::
|
||||
y_cpu_long = x_cpu_long.new([[1, 2, 3]])
|
||||
|
||||
If you want to create a tensor of the same type and size of another tensor, and
|
||||
fill it with either ones or zeros, `torch.ones_like()` or `torch.zeros_like()`
|
||||
are provided as more convenient functions (which also preserve device)::
|
||||
fill it with either ones or zeros, :meth:`~torch.ones_like` or
|
||||
:meth:`~torch.zeros_like` are provided as convenient helper functions (which
|
||||
also preserve device)::
|
||||
|
||||
x_cpu = torch.FloatTensor(1)
|
||||
x_gpu = torch.cuda.FloatTensor(1)
|
||||
@ -145,9 +158,9 @@ pinned memory by passing ``pin_memory=True`` to its constructor.
|
||||
Use nn.DataParallel instead of multiprocessing
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Most use cases involving batched input and multiple GPUs should default to using
|
||||
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
|
||||
a single python process can saturate multiple GPUs.
|
||||
Most use cases involving batched inputs and multiple GPUs should default to
|
||||
using :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with
|
||||
the GIL, a single Python process can saturate multiple GPUs.
|
||||
|
||||
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
|
||||
However, this is a known issue that is under active development. As always,
|
||||
|
@ -53,7 +53,7 @@ exporter to print out a human-readable representation of the network::
|
||||
You can also verify the protobuf using the `onnx <https://github.com/onnx/onnx/>`_ library.
|
||||
You can install ``onnx`` with conda::
|
||||
|
||||
conda install -c ezyang onnx
|
||||
conda install -c conda-forge onnx
|
||||
|
||||
Then, you can run::
|
||||
|
||||
@ -75,10 +75,8 @@ To run the exported script with `caffe2 <https://caffe2.ai/>`_, you will need th
|
||||
|
||||
2. You'll need `onnx-caffe2 <https://github.com/onnx/onnx-caffe2>`_, a
|
||||
pure-Python library which provides a Caffe2 backend for ONNX. You can install ``onnx-caffe2``
|
||||
with conda or pip::
|
||||
with pip::
|
||||
|
||||
conda install -c ezyang onnx-caffe2
|
||||
# OR
|
||||
pip install onnx-caffe2
|
||||
|
||||
Once these are installed, you can use the backend for Caffe2::
|
||||
@ -122,34 +120,48 @@ Limitations
|
||||
Supported operators
|
||||
-------------------
|
||||
|
||||
In this tech preview, only the following operators are supported:
|
||||
The following operators are supported:
|
||||
|
||||
* Add (inplace is discarded)
|
||||
* Sub (inplace is discarded)
|
||||
* Mul (inplace is discarded)
|
||||
* Negate (inplace is discarded)
|
||||
* Addmm (inplace is discarded, alpha and beta must be 1)
|
||||
* Tanh (inplace is discarded)
|
||||
* Sigmoid (inplace is discarded)
|
||||
* Transpose
|
||||
* View
|
||||
* Permute
|
||||
* Concat
|
||||
* Squeeze (inplace is discarded)
|
||||
* add (nonzero alpha not supported)
|
||||
* sub (nonzero alpha not supported)
|
||||
* mul
|
||||
* div
|
||||
* cat
|
||||
* mm
|
||||
* addmm
|
||||
* neg
|
||||
* tanh
|
||||
* sigmoid
|
||||
* mean
|
||||
* t
|
||||
* expand (only when used before a broadcasting ONNX operator; e.g., add)
|
||||
* transpose
|
||||
* view
|
||||
* split
|
||||
* squeeze
|
||||
* prelu (single weight shared among input channels not supported)
|
||||
* threshold (non-zero threshold/non-zero value not supported)
|
||||
* leaky_relu
|
||||
* glu
|
||||
* softmax
|
||||
* avg_pool2d (ceil_mode not supported)
|
||||
* log_softmax
|
||||
* unfold (experimental support with ATen-Caffe2 integration)
|
||||
* elu
|
||||
* Conv
|
||||
* BatchNorm
|
||||
* Convolution
|
||||
* Embedding (only optional argument that is supported is ``padding_idx``)
|
||||
* Slice (only integer indexing is supported)
|
||||
* Dropout (inplace is discarded)
|
||||
* Relu (inplace is discarded)
|
||||
* PReLU (inplace is discarded, sharing a single weight among all channels is not supported)
|
||||
* LeakyRelu (inplace is discarded)
|
||||
* MaxPool1d (ceil_mode must be False)
|
||||
* MaxPool2d (ceil_mode must be False)
|
||||
* AvgPool2d (ceil_mode must be False)
|
||||
* MaxPool1d (ceil_mode not supported)
|
||||
* MaxPool2d (ceil_mode not supported)
|
||||
* MaxPool3d (ceil_mode not supported)
|
||||
* Embedding (no optional arguments supported)
|
||||
* RNN
|
||||
* ConstantPadNd
|
||||
* Dropout
|
||||
* FeatureDropout (training mode not supported)
|
||||
* Index (constant integer and tuple indices supported)
|
||||
* Negate
|
||||
|
||||
We plan on expanding support to more operators; RNNs are high on our priority
|
||||
list. The operator set above is sufficient to export the following models:
|
||||
The operator set above is sufficient to export the following models:
|
||||
|
||||
* AlexNet
|
||||
* DCGAN
|
||||
|
@ -18,11 +18,11 @@ you can specify optimizer-specific options such as the learning rate, weight dec
|
||||
|
||||
.. note::
|
||||
|
||||
If you need to move a model to GPU via `.cuda()`, please do so before
|
||||
If you need to move a model to GPU via `.cuda()`, please do so before
|
||||
constructing optimizers for it. Parameters of a model after `.cuda()` will
|
||||
be different objects with those before the call.
|
||||
be different objects with those before the call.
|
||||
|
||||
In general, you should make sure that optimized parameters live in
|
||||
In general, you should make sure that optimized parameters live in
|
||||
consistent locations when optimizers are constructed and used.
|
||||
|
||||
Example::
|
||||
@ -111,6 +111,8 @@ Algorithms
|
||||
:members:
|
||||
.. autoclass:: Adam
|
||||
:members:
|
||||
.. autoclass:: SparseAdam
|
||||
:members:
|
||||
.. autoclass:: Adamax
|
||||
:members:
|
||||
.. autoclass:: ASGD
|
||||
|
16
setup.py
16
setup.py
@ -542,7 +542,7 @@ if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
|
||||
STDCPP_LIB = STDCPP_LIB[:-1]
|
||||
if type(STDCPP_LIB) != str: # python 3
|
||||
STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding)
|
||||
main_link_args += [STDCPP_LIB]
|
||||
extra_link_args += [STDCPP_LIB]
|
||||
version_script = os.path.abspath("tools/pytorch.version")
|
||||
extra_link_args += ['-Wl,--version-script=' + version_script]
|
||||
|
||||
@ -593,9 +593,11 @@ extensions.append(THNN)
|
||||
if WITH_CUDA:
|
||||
thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')]
|
||||
if platform.system() == 'Linux':
|
||||
thnvrtc_link_flags = ['-Wl,--no-as-needed'] + thnvrtc_link_flags
|
||||
thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed']
|
||||
# these have to be specified as -lcuda in link_flags because they
|
||||
# have to come right after the `no-as-needed` option
|
||||
thnvrtc_link_flags += ['-lcuda', '-lnvrtc']
|
||||
THNVRTC = Extension("torch._nvrtc",
|
||||
libraries=['nvrtc', 'cuda'],
|
||||
sources=['torch/csrc/nvrtc.cpp'],
|
||||
language='c++',
|
||||
include_dirs=include_dirs,
|
||||
@ -618,11 +620,13 @@ if WITH_CUDA:
|
||||
)
|
||||
extensions.append(THCUNN)
|
||||
|
||||
version = '0.2.0'
|
||||
version = '0.3.0b0'
|
||||
if os.getenv('PYTORCH_BUILD_VERSION'):
|
||||
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
|
||||
version = os.getenv('PYTORCH_BUILD_VERSION') \
|
||||
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
|
||||
build_number = int(os.getenv('PYTORCH_BUILD_NUMBER'))
|
||||
version = os.getenv('PYTORCH_BUILD_VERSION')
|
||||
if build_number > 1:
|
||||
version += '.post' + str(build_number)
|
||||
else:
|
||||
try:
|
||||
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
|
||||
|
@ -170,6 +170,9 @@ class TestCase(unittest.TestCase):
|
||||
return x, y
|
||||
|
||||
def assertEqual(self, x, y, prec=None, message=''):
|
||||
if isinstance(prec, str) and message == '':
|
||||
message = prec
|
||||
prec = None
|
||||
if prec is None:
|
||||
prec = self.precision
|
||||
|
||||
|
@ -246,6 +246,17 @@ module_tests = [
|
||||
]
|
||||
|
||||
|
||||
def kldivloss_reference(input, target, size_average=True, reduce=True):
|
||||
safe_target = target * (target > 0).type_as(target)
|
||||
safe_target_log = (safe_target + (target <= 0).type_as(target)).log()
|
||||
result = safe_target * (safe_target_log - input)
|
||||
if reduce and size_average:
|
||||
return result.mean()
|
||||
elif reduce:
|
||||
return result.sum()
|
||||
return result
|
||||
|
||||
|
||||
def nllloss2d_reference(input, target, weight=None, ignore_index=-100,
|
||||
size_average=True, reduce=True):
|
||||
N, C, H, W = input.size()
|
||||
@ -309,6 +320,7 @@ def smoothl1loss_reference(input, target, size_average=True, reduce=True):
|
||||
|
||||
|
||||
loss_reference_fns = {
|
||||
'KLDivLoss': kldivloss_reference,
|
||||
'NLLLoss': nllloss_reference,
|
||||
'NLLLoss2d': nllloss2d_reference,
|
||||
'SmoothL1Loss': smoothl1loss_reference,
|
||||
@ -370,6 +382,8 @@ criterion_tests = [
|
||||
module_name='KLDivLoss',
|
||||
input_fn=lambda: torch.rand(10, 10).log(),
|
||||
target_fn=lambda: torch.rand(10, 10),
|
||||
reference_fn=lambda i, t, m:
|
||||
kldivloss_reference(i, t, get_size_average(m), reduce=True),
|
||||
check_no_size_average=True,
|
||||
),
|
||||
dict(
|
||||
|
@ -995,6 +995,16 @@ class TestAutograd(TestCase):
|
||||
self._test_setitem_tensor((5, 5), Variable(mask))
|
||||
self._test_setitem_tensor((5,), Variable(mask[0]))
|
||||
|
||||
def test_select_sum(self):
|
||||
# both select and sum return Scalars in ATen; ensure they work together.
|
||||
x = Variable(torch.randn(10), requires_grad=True)
|
||||
|
||||
def func(x):
|
||||
return x.select(0, 1).sum()
|
||||
|
||||
gradcheck(func, [x])
|
||||
gradgradcheck(func, [x])
|
||||
|
||||
def test_stack(self):
|
||||
x = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
y = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
@ -1006,6 +1016,43 @@ class TestAutograd(TestCase):
|
||||
self.assertEqual(y.grad.data, grad[1])
|
||||
self.assertEqual(z.grad.data, grad[2])
|
||||
|
||||
def test_put(self):
|
||||
root = Variable(torch.randn(4, 5), requires_grad=True)
|
||||
values = Variable(torch.randn(6), requires_grad=True)
|
||||
idx = Variable(torch.LongTensor([1, 2, 3, -1, -2, -3]))
|
||||
|
||||
def func(root, values):
|
||||
x = root.clone()
|
||||
x.put_(idx, values)
|
||||
return x
|
||||
|
||||
gradcheck(func, [root, values])
|
||||
gradgradcheck(func, [root, values])
|
||||
|
||||
def test_put_accumulate(self):
|
||||
root = Variable(torch.randn(4, 5), requires_grad=True)
|
||||
values = Variable(torch.randn(6), requires_grad=True)
|
||||
idx = Variable(torch.LongTensor([1, 2, 3, 1, 2, 3]))
|
||||
|
||||
def func(root, values):
|
||||
x = root.clone()
|
||||
x.put_(idx, values, accumulate=True)
|
||||
return x
|
||||
|
||||
gradcheck(func, [root, values])
|
||||
gradgradcheck(func, [root, values])
|
||||
|
||||
def test_fill(self):
|
||||
root = Variable(torch.randn(4, 5), requires_grad=True)
|
||||
|
||||
def func(root):
|
||||
x = root.clone()
|
||||
x.fill_(2)
|
||||
return x
|
||||
|
||||
gradcheck(func, [root])
|
||||
gradgradcheck(func, [root])
|
||||
|
||||
def test_unused_output(self):
|
||||
x = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
outputs = x.chunk(5)
|
||||
@ -1461,13 +1508,14 @@ class TestAutograd(TestCase):
|
||||
def test_norm_subgradient(self):
|
||||
def run_test(input_size, norm_deg):
|
||||
input = Variable(torch.zeros(*input_size), requires_grad=True)
|
||||
out = input.norm(norm_deg)
|
||||
out.backward()
|
||||
input.norm(norm_deg).backward()
|
||||
self.assertEqual(input.grad.data.abs().sum(), 0)
|
||||
|
||||
run_test((10,), 2)
|
||||
run_test((10, 10), 2)
|
||||
run_test((10,), 3)
|
||||
run_test((10,), 1)
|
||||
run_test((10,), 1.5)
|
||||
|
||||
def test_profiler(self):
|
||||
x = Variable(torch.randn(10, 10))
|
||||
@ -1764,8 +1812,14 @@ method_tests = [
|
||||
('addcdiv', (S, S), (0.5, (S, 1), (1, S)), 'scale_broadcast_rhs'),
|
||||
('addcdiv', (1,), (0.5, (S, S, 1), (1, S)), 'scale_broadcast_all'),
|
||||
('zero_', (S, S, S), ()),
|
||||
('norm', (S, S, S), (2,)),
|
||||
('norm', (S, S, S), (3,), '3'),
|
||||
('norm', (S, S), (2,)),
|
||||
('norm', (S, S), (0,), '0'),
|
||||
('norm', (S, S), (0.5,), '0_5'),
|
||||
('norm', (S, S), (1,), '1'),
|
||||
('norm', (S, S), (3,), '3'),
|
||||
('norm', (S, S), (-1,), 'neg_1'),
|
||||
('norm', (S, S), (-0.5,), 'neg_0_5'),
|
||||
('norm', (S, S), (-1.5,), 'neg_1_5'),
|
||||
('norm', torch.rand(S, S, S) + 5e-2, (1.5,), '1_5'),
|
||||
('norm', (S, S, S), (2, 1), '2_dim', [1]),
|
||||
('norm', (S, S, S), (3, 1), '3_dim', [1]),
|
||||
@ -1842,6 +1896,7 @@ method_tests = [
|
||||
('squeeze', (S, 1, S, 1), ()),
|
||||
('squeeze', (S, 1, S, 1), (1,), '1_dim', [0]),
|
||||
('squeeze', (S, 1, S, 1), (2,), 'not_1_dim', [0]),
|
||||
('squeeze', (1,), (0,), '1d_dim0', [0]),
|
||||
('unsqueeze', (S, S, S), (0,), 'first', [0]),
|
||||
('unsqueeze', (S, S, S), (1,), 'middle', [0]),
|
||||
('unsqueeze', (S, S, S), (3,), 'last', [0]),
|
||||
@ -1875,6 +1930,7 @@ method_tests = [
|
||||
('topk', (S, M, S), (3, 1), 'dim'),
|
||||
('topk', (S, M, S), (3, 1, True), 'dim_desc'),
|
||||
('topk', (S, M, S), (3, 1, True, True), 'dim_desc_sort'),
|
||||
('take', (S, S, S), (Variable(torch.LongTensor([[-3, 2], [20, 2]])),)),
|
||||
('__getitem__', torch.randn(S, S, S), (dont_convert([1, 2]),)),
|
||||
('__getitem__', torch.randn(S, S, S), (slice(0, 3),), 'slice'),
|
||||
('__getitem__', torch.randn(S, S, S), (dont_convert([slice(0, 3), 1]),), 'slice_index'),
|
||||
|
@ -1,5 +1,6 @@
|
||||
import math
|
||||
import tempfile
|
||||
import re
|
||||
import unittest
|
||||
from itertools import repeat
|
||||
|
||||
@ -16,6 +17,11 @@ if not torch.cuda.is_available():
|
||||
TestCase = object # noqa: F811
|
||||
HAS_CUDA = False
|
||||
|
||||
HAS_MAGMA = HAS_CUDA
|
||||
if HAS_CUDA:
|
||||
torch.ones(1).cuda() # has_magma shows up after cuda is initialized
|
||||
HAS_MAGMA = torch.cuda.has_magma
|
||||
|
||||
|
||||
def is_floating(t):
|
||||
return type(t) in [torch.FloatTensor, torch.DoubleTensor,
|
||||
@ -968,6 +974,69 @@ class TestCuda(TestCase):
|
||||
def test_tensor_scatterFill(self):
|
||||
TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False)
|
||||
|
||||
def test_var(self):
|
||||
cpu_tensor = torch.randn(2, 3, 3)
|
||||
gpu_tensor = cpu_tensor.cuda()
|
||||
self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
|
||||
self.assertEqual(gpu_tensor.var(1), cpu_tensor.var(1))
|
||||
self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
|
||||
self.assertEqual(gpu_tensor.std(), cpu_tensor.std())
|
||||
self.assertEqual(gpu_tensor.std(1), cpu_tensor.std(1))
|
||||
self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
|
||||
|
||||
cpu_tensor = torch.randn(100)
|
||||
gpu_tensor = cpu_tensor.cuda()
|
||||
self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
|
||||
|
||||
def test_var_unbiased(self):
|
||||
tensor = torch.randn(100).cuda()
|
||||
self.assertEqual(tensor.var(0), tensor.var(0, unbiased=True))
|
||||
self.assertEqual(tensor.var(), tensor.var(unbiased=True))
|
||||
self.assertEqual(tensor.var(unbiased=False), tensor.var(0, unbiased=False)[0])
|
||||
|
||||
tensor = torch.FloatTensor([1.0, 2.0]).cuda()
|
||||
self.assertEqual(tensor.var(unbiased=True), 0.5)
|
||||
self.assertEqual(tensor.var(unbiased=False), 0.25)
|
||||
|
||||
tensor = torch.randn(100).cuda()
|
||||
self.assertEqual(tensor.std(0), tensor.std(0, unbiased=True))
|
||||
self.assertEqual(tensor.std(), tensor.std(unbiased=True))
|
||||
self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0])
|
||||
|
||||
def test_var_large_input(self):
|
||||
# Large, not-nice input
|
||||
tensor_cpu = torch.randn(2 * 32 * 1024 + 1, 2, 67)
|
||||
tensor_cuda = tensor_cpu.cuda()
|
||||
|
||||
self.assertEqual(tensor_cpu.var(2), tensor_cuda.var(2).cpu())
|
||||
|
||||
def test_var_stability(self):
|
||||
tensor = torch.FloatTensor([2281.5, 2281.25]).cuda()
|
||||
|
||||
# Stability for inner dim
|
||||
self.assertEqual(tensor.var(0)[0], 0.03125)
|
||||
|
||||
# General stability
|
||||
self.assertEqual(tensor.var(), 0.03125)
|
||||
|
||||
# Stability for outer dimensions
|
||||
tensor = tensor.unsqueeze(1)
|
||||
self.assertEqual(tensor.var(0)[0], 0.03125)
|
||||
|
||||
@unittest.skipIf(not HAS_MAGMA, "no MAGMA library detected")
|
||||
def test_symeig(self):
|
||||
# Small case
|
||||
tensor = torch.randn(3, 3).cuda()
|
||||
tensor = torch.mm(tensor, tensor.t())
|
||||
eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
|
||||
self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
|
||||
|
||||
# Large case
|
||||
tensor = torch.randn(257, 257).cuda()
|
||||
tensor = torch.mm(tensor, tensor.t())
|
||||
eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
|
||||
self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
|
||||
|
||||
def test_arange(self):
|
||||
for t in ['IntTensor', 'LongTensor', 'FloatTensor', 'DoubleTensor']:
|
||||
a = torch.cuda.__dict__[t]()
|
||||
|
@ -4,6 +4,7 @@ import torch
|
||||
import traceback
|
||||
import unittest
|
||||
from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset
|
||||
from torch.utils.data.dataloader import default_collate
|
||||
from common import TestCase, run_tests, TEST_NUMPY
|
||||
from common_nn import TEST_CUDA
|
||||
|
||||
@ -276,6 +277,23 @@ class TestDataLoader(TestCase):
|
||||
batch = next(iter(loader))
|
||||
self.assertIsInstance(batch, tt)
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
|
||||
def test_default_colate_bad_numpy_types(self):
|
||||
import numpy as np
|
||||
|
||||
# Should be a no-op
|
||||
arr = np.array(['a', 'b', 'c'])
|
||||
default_collate(arr)
|
||||
|
||||
arr = np.array([[['a', 'b', 'c']]])
|
||||
self.assertRaises(TypeError, lambda: default_collate(arr))
|
||||
|
||||
arr = np.array([object(), object(), object()])
|
||||
self.assertRaises(TypeError, lambda: default_collate(arr))
|
||||
|
||||
arr = np.array([[[object(), object(), object()]]])
|
||||
self.assertRaises(TypeError, lambda: default_collate(arr))
|
||||
|
||||
|
||||
class StringDataset(Dataset):
|
||||
def __init__(self):
|
||||
|
@ -2,7 +2,7 @@ from common import TestCase, run_tests
|
||||
import math
|
||||
import torch
|
||||
from torch.autograd import Variable, gradcheck
|
||||
from torch.distributions import Bernoulli, Multinomial, Normal
|
||||
from torch.distributions import Bernoulli, Categorical, Normal
|
||||
|
||||
|
||||
class TestDistributions(TestCase):
|
||||
@ -47,22 +47,22 @@ class TestDistributions(TestCase):
|
||||
def test_multinomial_1d(self):
|
||||
p = Variable(torch.Tensor([0.1, 0.2, 0.3]), requires_grad=True)
|
||||
# TODO: this should return a 0-dim tensor once we have Scalar support
|
||||
self.assertEqual(Multinomial(p).sample().size(), (1,))
|
||||
self.assertEqual(Multinomial(p).sample_n(1).size(), (1, 1))
|
||||
self._gradcheck_log_prob(Multinomial, (p,))
|
||||
self.assertEqual(Categorical(p).sample().size(), (1,))
|
||||
self.assertEqual(Categorical(p).sample_n(1).size(), (1, 1))
|
||||
self._gradcheck_log_prob(Categorical, (p,))
|
||||
|
||||
def test_multinomial_2d(self):
|
||||
probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]]
|
||||
p = Variable(torch.Tensor(probabilities), requires_grad=True)
|
||||
self.assertEqual(Multinomial(p).sample().size(), (2,))
|
||||
self.assertEqual(Multinomial(p).sample_n(6).size(), (6, 2))
|
||||
self._gradcheck_log_prob(Multinomial, (p,))
|
||||
self.assertEqual(Categorical(p).sample().size(), (2,))
|
||||
self.assertEqual(Categorical(p).sample_n(6).size(), (6, 2))
|
||||
self._gradcheck_log_prob(Categorical, (p,))
|
||||
|
||||
def ref_log_prob(idx, val, log_prob):
|
||||
sample_prob = p.data[idx][val] / p.data[idx].sum()
|
||||
self.assertEqual(log_prob, math.log(sample_prob))
|
||||
|
||||
self._check_log_prob(Multinomial(p), ref_log_prob)
|
||||
self._check_log_prob(Categorical(p), ref_log_prob)
|
||||
|
||||
def test_normal(self):
|
||||
mean = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
@ -15,6 +15,15 @@ try:
|
||||
except ImportError:
|
||||
HAS_TORCHVISION = False
|
||||
|
||||
RUN_CUDA = torch.cuda.is_available()
|
||||
if torch.cuda.is_available():
|
||||
CUDA_VERSION = torch._C._cuda_getCompiledVersion()
|
||||
for d in range(torch.cuda.device_count()):
|
||||
major = torch.cuda.get_device_capability(d)[0]
|
||||
if (CUDA_VERSION < 8000 and major >= 6) or (CUDA_VERSION < 9000 and major >= 7):
|
||||
RUN_CUDA = False
|
||||
|
||||
|
||||
skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
|
||||
|
||||
|
||||
@ -52,7 +61,7 @@ class TestJit(TestCase):
|
||||
torch._C._jit_pass_lint(trace)
|
||||
self.assertExpected(str(trace))
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_lstm_fusion(self):
|
||||
input = Variable(torch.randn(3, 10).cuda())
|
||||
hx = Variable(torch.randn(3, 20).cuda())
|
||||
@ -65,7 +74,7 @@ class TestJit(TestCase):
|
||||
torch._C._jit_pass_lint(trace)
|
||||
self.assertExpected(str(trace))
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_run_lstm_fusion(self):
|
||||
input = Variable(torch.randn(3, 10).cuda())
|
||||
hx = Variable(torch.randn(3, 20).cuda())
|
||||
@ -78,7 +87,7 @@ class TestJit(TestCase):
|
||||
z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
|
||||
self.assertEqual(z, z2)
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_run_lstm_fusion_concat(self):
|
||||
input = Variable(torch.randn(3, 10).cuda())
|
||||
hx = Variable(torch.randn(3, 20).cuda())
|
||||
@ -91,7 +100,7 @@ class TestJit(TestCase):
|
||||
z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
|
||||
self.assertEqual(z, z2)
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_concat_fusion(self):
|
||||
hx = Variable(torch.randn(3, 20).cuda())
|
||||
cx = Variable(torch.randn(3, 20).cuda())
|
||||
@ -105,7 +114,7 @@ class TestJit(TestCase):
|
||||
torch._C._jit_pass_lint(trace)
|
||||
self.assertExpected(str(trace))
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_fusion_distribute(self):
|
||||
def f(x, y):
|
||||
z1, z2 = (x + y).chunk(2, dim=1)
|
||||
@ -146,7 +155,7 @@ class TestJit(TestCase):
|
||||
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
|
||||
self.assertEqual(z, z2)
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
|
||||
def test_compile_addc(self):
|
||||
x = Variable(torch.Tensor([0.4]), requires_grad=True).cuda()
|
||||
y = Variable(torch.Tensor([0.7]), requires_grad=True).cuda()
|
||||
@ -613,7 +622,7 @@ class TestJit(TestCase):
|
||||
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
|
||||
self.assertExpected(str(g2))
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "cpp tests require CUDA")
|
||||
@unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA")
|
||||
def test_cpp(self):
|
||||
torch._C._jit_run_cpp_tests()
|
||||
|
||||
|
153
test/test_nn.py
153
test/test_nn.py
@ -2249,6 +2249,38 @@ class TestNN(NNTestCase):
|
||||
weight_data[:] = 4
|
||||
self.assertEqual(weight_data, all_vars[4].data)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
|
||||
def test_cudnn_weight_tying(self):
|
||||
rnns = [
|
||||
nn.LSTM(10, 20, batch_first=True, bidirectional=True),
|
||||
nn.GRU(10, 20, batch_first=True, bidirectional=True),
|
||||
nn.RNN(10, 20, batch_first=True, bidirectional=True)
|
||||
]
|
||||
for rnn in rnns:
|
||||
rnn.bias_ih_l0_reverse = rnn.bias_ih_l0
|
||||
rnn.cuda()
|
||||
input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True)
|
||||
hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
|
||||
all_vars = [input, hx] + list(rnn.parameters())
|
||||
opt = torch.optim.SGD(rnn.parameters(), lr=0.1)
|
||||
opt.zero_grad()
|
||||
if isinstance(rnn, nn.LSTM):
|
||||
cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
|
||||
all_vars[2:2] = [cx]
|
||||
hx = (hx, cx)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
output = rnn(input, hx)
|
||||
output[0].sum().backward()
|
||||
|
||||
opt.step()
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
output_cuda = rnn(input, hx)
|
||||
rnn.cpu()
|
||||
hx = (hx[0].cpu(), hx[1].cpu()) if isinstance(rnn, nn.LSTM) else hx.cpu()
|
||||
output_cpu = rnn(input.cpu(), hx)
|
||||
self.assertEqual(output_cuda, output_cpu)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
|
||||
def test_cuda_rnn_fused(self):
|
||||
def copy_rnn(rnn1, rnn2):
|
||||
@ -2759,6 +2791,26 @@ class TestNN(NNTestCase):
|
||||
|
||||
self.assertEqual(out1, out2)
|
||||
|
||||
def test_elu_inplace_gradgrad(self):
|
||||
v = Variable(torch.randn(8), requires_grad=True)
|
||||
|
||||
def func(root):
|
||||
x = root.clone()
|
||||
return F.elu(x, inplace=True)
|
||||
|
||||
gradcheck(func, [v])
|
||||
gradgradcheck(func, [v])
|
||||
|
||||
def test_hardtanh_inplace_gradgrad(self):
|
||||
v = Variable(torch.randn(8), requires_grad=True)
|
||||
|
||||
def func(root):
|
||||
x = root.clone()
|
||||
return F.hardtanh(x, inplace=True)
|
||||
|
||||
gradcheck(func, [v])
|
||||
gradgradcheck(func, [v])
|
||||
|
||||
def test_batchnorm_raises_error_if_running_mean_is_not_same_size_as_input(self):
|
||||
input = Variable(torch.rand(2, 10))
|
||||
running_var = torch.rand(10)
|
||||
@ -2845,38 +2897,17 @@ class TestNN(NNTestCase):
|
||||
self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
|
||||
|
||||
def test_grid_sample(self):
|
||||
# test known input on CPU
|
||||
input = Variable(torch.arange(1, 11).view(1, 1, 2, 5))
|
||||
grid = Variable(torch.Tensor(
|
||||
[[-1, -0.5, 0, 0.2, 1],
|
||||
[-1, -0.333, 0, 0.5, 1],
|
||||
[-1, -0.5, 0, 0.3333, 1],
|
||||
[-1, -0.2, 0, 0.2, 1]]).view(1, 2, 5, 2))
|
||||
output = F.grid_sample(input, grid)
|
||||
groundtruth = torch.Tensor(
|
||||
[[2.2500, 6.0000000000, 5.0000, 4.8340, 9.0000],
|
||||
[2.2500, 6.333250045, 5.0000, 5.1000, 8.4000]]).view(1, 1, 2, 5)
|
||||
self.assertEqual(output.data, groundtruth)
|
||||
def test_cpu_against_cuda(N, C, H, W, padding_mode):
|
||||
def test_shape(N, C, IH, IW, H, W, padding_mode):
|
||||
|
||||
# do gradcheck
|
||||
N = random.randint(1, 8)
|
||||
C = random.randint(1, 8)
|
||||
H = random.randint(1, 8)
|
||||
W = random.randint(1, 8)
|
||||
input = Variable(torch.randn(N, C, H, W), requires_grad=True)
|
||||
grid = Variable(torch.randn(N, H, W, 2), requires_grad=True)
|
||||
self.assertTrue(gradcheck(lambda inp, grid: F.grid_sample(inp, grid), (input, grid)))
|
||||
|
||||
def test_cpu_against_cuda(N, C, H, W):
|
||||
def test_shape(N, C, IH, IW, H, W):
|
||||
input_cpu = Variable(torch.randn(C, N, IH, IW).transpose(0, 1), requires_grad=True)
|
||||
grid_cpu = Variable(torch.randn(H, N, W, 2).transpose(0, 1), requires_grad=True)
|
||||
out_cpu = F.grid_sample(input_cpu, grid_cpu)
|
||||
out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode)
|
||||
self.assertTrue(out_cpu.size() == torch.Size([N, C, H, W]))
|
||||
|
||||
input_cuda = Variable(input_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
|
||||
grid_cuda = Variable(grid_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
|
||||
out_cuda = F.grid_sample(input_cuda, grid_cuda)
|
||||
out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode)
|
||||
self.assertEqual(out_cpu, out_cuda)
|
||||
|
||||
gradients = out_cpu.data.new(out_cpu.size()).normal_()
|
||||
@ -2889,15 +2920,15 @@ class TestNN(NNTestCase):
|
||||
base_input = torch.randn(C, IH, IW)
|
||||
input_cpu = Variable(base_input.expand(input_cuda.size()), requires_grad=True)
|
||||
grid_cpu = Variable(torch.randn(N, H, W, 2), requires_grad=True)
|
||||
out_cpu = F.grid_sample(input_cpu, grid_cpu)
|
||||
out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode)
|
||||
|
||||
input_cuda = Variable(base_input.cuda().expand(input_cuda.size()), requires_grad=True)
|
||||
grid_cuda = Variable(grid_cpu.data.cuda(), requires_grad=True)
|
||||
out_cuda = F.grid_sample(input_cuda, grid_cuda)
|
||||
out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode)
|
||||
self.assertEqual(out_cpu, out_cuda)
|
||||
|
||||
# test same size output
|
||||
test_shape(N, C, H, W, H, W)
|
||||
test_shape(N, C, H, W, H, W, padding_mode)
|
||||
|
||||
# test larger output
|
||||
N = random.randint(1, 8)
|
||||
@ -2906,7 +2937,7 @@ class TestNN(NNTestCase):
|
||||
IW = random.randint(1, 8)
|
||||
H = random.randint(IH + 1, 12)
|
||||
W = random.randint(IH + 1, 12)
|
||||
test_shape(N, C, IH, IW, H, W)
|
||||
test_shape(N, C, IH, IW, H, W, padding_mode)
|
||||
|
||||
# test smaller output
|
||||
N = random.randint(1, 8)
|
||||
@ -2915,21 +2946,44 @@ class TestNN(NNTestCase):
|
||||
IW = random.randint(1, 8)
|
||||
H = random.randint(1, IH)
|
||||
W = random.randint(1, IW)
|
||||
test_shape(N, C, IH, IW, H, W)
|
||||
test_shape(N, C, IH, IW, H, W, padding_mode)
|
||||
|
||||
# test CUDNN against CPU
|
||||
if TEST_CUDNN:
|
||||
test_cpu_against_cuda(N, C, H, W)
|
||||
# test known input on CPU
|
||||
for padding_mode in ['zeros', 'border']:
|
||||
|
||||
# test CUDA (without CUDNN) against CPU
|
||||
if TEST_CUDA:
|
||||
input = Variable(torch.arange(1, 11).view(1, 1, 2, 5))
|
||||
grid = Variable(torch.Tensor(
|
||||
[[-0.9, -1.4, 0, 0.2, 1],
|
||||
[-1, -0.333, 0, 0.5, 1],
|
||||
[-1, -0.5, 0, 0.3333, 1],
|
||||
[-1, -0.2, 0, 1.1, 0.5]]).view(1, 2, 5, 2))
|
||||
output = F.grid_sample(input, grid, padding_mode=padding_mode)
|
||||
|
||||
# GridSampler will automatically use CUDNN if it is available
|
||||
# so we disable CUDNN temporarily
|
||||
original_cudnn_enabled = cudnn.enabled
|
||||
cudnn.enabled = False
|
||||
test_cpu_against_cuda(N, C, H, W)
|
||||
cudnn.enabled = original_cudnn_enabled
|
||||
if padding_mode == 'zeros':
|
||||
groundtruth = torch.Tensor(
|
||||
[[0.9600, 6.0000000000, 5.0000, 4.8340, 9.0000],
|
||||
[2.2500, 6.333250045, 5.0000, 5.1000, 7.0000]]).view(1, 1, 2, 5)
|
||||
else:
|
||||
groundtruth = torch.Tensor(
|
||||
[[1.2000, 6.0000000000, 5.0000, 4.8340, 9.0000],
|
||||
[2.2500, 6.333250045, 5.0000, 5.1000, 8.7500]]).view(1, 1, 2, 5)
|
||||
|
||||
self.assertEqual(output.data, groundtruth)
|
||||
|
||||
# do gradcheck
|
||||
N = random.randint(1, 8)
|
||||
C = random.randint(1, 8)
|
||||
H = random.randint(1, 8)
|
||||
W = random.randint(1, 8)
|
||||
input = Variable(torch.randn(N, C, H, W), requires_grad=True)
|
||||
grid = Variable(torch.randn(N, H, W, 2), requires_grad=True)
|
||||
self.assertTrue(gradcheck(
|
||||
lambda inp, grid: F.grid_sample(inp, grid, padding_mode=padding_mode),
|
||||
(input, grid)))
|
||||
|
||||
# test CUDA against CPU
|
||||
if TEST_CUDA:
|
||||
test_cpu_against_cuda(N, C, H, W, padding_mode)
|
||||
|
||||
def test_affine_grid(self):
|
||||
# test known input on CPU
|
||||
@ -3653,6 +3707,18 @@ new_criterion_tests = [
|
||||
]
|
||||
|
||||
|
||||
def kldivloss_no_reduce_test():
|
||||
t = Variable(torch.randn(10, 10))
|
||||
return dict(
|
||||
fullname='KLDivLoss_no_reduce',
|
||||
constructor=wrap_functional(
|
||||
lambda i: F.kl_div(i, t.type_as(i), reduce=False)),
|
||||
input_fn=lambda: torch.rand(10, 10).log(),
|
||||
reference_fn=lambda i, _:
|
||||
loss_reference_fns['KLDivLoss'](i, t.data.type_as(i), reduce=False),
|
||||
pickle=False)
|
||||
|
||||
|
||||
def l1loss_no_reduce_test():
|
||||
t = Variable(torch.randn(2, 3, 4))
|
||||
return dict(
|
||||
@ -3811,6 +3877,7 @@ def smoothl1loss_no_reduce_test():
|
||||
|
||||
|
||||
new_module_tests = [
|
||||
kldivloss_no_reduce_test(),
|
||||
l1loss_no_reduce_test(),
|
||||
mseloss_no_reduce_test(),
|
||||
nllloss_no_reduce_test(),
|
||||
@ -4553,7 +4620,7 @@ new_module_tests = [
|
||||
desc='dim'
|
||||
),
|
||||
dict(
|
||||
constructor=wrap_functional(F.softmax, dim=1),
|
||||
constructor=wrap_functional(F.softmax, dim=-1),
|
||||
input_size=(2, 128), # trigger the last-dim algo in CUDA
|
||||
fullname='softmax_lastdim',
|
||||
pickle=False,
|
||||
@ -4585,7 +4652,7 @@ new_module_tests = [
|
||||
pickle=False,
|
||||
),
|
||||
dict(
|
||||
constructor=wrap_functional(F.log_softmax, dim=1),
|
||||
constructor=wrap_functional(F.log_softmax, dim=-1),
|
||||
input_size=(2, 128), # trigger the last-dim algo in CUDA
|
||||
fullname='log_softmax_lastdim',
|
||||
pickle=False,
|
||||
|
@ -61,13 +61,14 @@ class TestOptim(TestCase):
|
||||
|
||||
self.assertLessEqual(params.data.dist(solution), initial_dist)
|
||||
|
||||
def _test_rosenbrock_sparse(self, constructor):
|
||||
def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
|
||||
params_t = torch.Tensor([1.5, 1.5])
|
||||
|
||||
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
|
||||
params_c = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
|
||||
params = Variable(params_t, requires_grad=True)
|
||||
optimizer = constructor([params])
|
||||
optimizer_c = constructor([params_c])
|
||||
if not sparse_only:
|
||||
params_c = Variable(params_t.clone(), requires_grad=True)
|
||||
optimizer_c = constructor([params_c])
|
||||
|
||||
solution = torch.Tensor([1, 1])
|
||||
initial_dist = params.data.dist(solution)
|
||||
@ -99,8 +100,9 @@ class TestOptim(TestCase):
|
||||
# Do cyclic coordinate descent
|
||||
w = i % 2
|
||||
optimizer.step(functools.partial(eval, params, True, w))
|
||||
optimizer_c.step(functools.partial(eval, params_c, False, w))
|
||||
self.assertEqual(params.data, params_c.data)
|
||||
if not sparse_only:
|
||||
optimizer_c.step(functools.partial(eval, params_c, False, w))
|
||||
self.assertEqual(params.data, params_c.data)
|
||||
|
||||
self.assertLessEqual(params.data.dist(solution), initial_dist)
|
||||
|
||||
@ -229,6 +231,11 @@ class TestOptim(TestCase):
|
||||
lr=1e-3)
|
||||
)
|
||||
|
||||
def test_sgd_sparse(self):
|
||||
self._test_rosenbrock_sparse(
|
||||
lambda params: optim.SGD(params, lr=5e-3)
|
||||
)
|
||||
|
||||
def test_adam(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adam(params, lr=1e-2),
|
||||
@ -247,6 +254,12 @@ class TestOptim(TestCase):
|
||||
lr=1e-3)
|
||||
)
|
||||
|
||||
def test_sparse_adam(self):
|
||||
self._test_rosenbrock_sparse(
|
||||
lambda params: optim.SparseAdam(params, lr=4e-2),
|
||||
True
|
||||
)
|
||||
|
||||
def test_adadelta(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.Adadelta(params),
|
||||
|
@ -71,6 +71,34 @@ class TestTorch(TestCase):
|
||||
res2[i, j] = v1[i] * v2[j]
|
||||
self.assertEqual(res1, res2)
|
||||
|
||||
def test_addr(self):
|
||||
types = {
|
||||
'torch.DoubleTensor': 1e-8,
|
||||
'torch.FloatTensor': 1e-4,
|
||||
}
|
||||
|
||||
def run_test(m, v1, v2, m_transform=lambda x: x):
|
||||
m = m_transform(m.clone())
|
||||
ref = m.clone()
|
||||
torch.addr(m, v1, v2, out=m)
|
||||
for i in range(m.size(0)):
|
||||
for j in range(m.size(1)):
|
||||
ref[i, j] += v1[i] * v2[j]
|
||||
self.assertEqual(m, ref)
|
||||
|
||||
for tname, _prec in types.items():
|
||||
for h, w in [(100, 110), (1, 20), (200, 2)]:
|
||||
m = torch.randn(h, w).type(tname)
|
||||
v1 = torch.randn(h).type(tname)
|
||||
v2 = torch.randn(w).type(tname)
|
||||
run_test(m, v1, v2)
|
||||
# test transpose
|
||||
run_test(m, v2, v1, lambda x: x.transpose(0, 1))
|
||||
# test 0 strided
|
||||
v1 = torch.randn(1).type(tname).expand(h)
|
||||
run_test(m, v1, v2)
|
||||
run_test(m, v2, v1, lambda x: x.transpose(0, 1))
|
||||
|
||||
def test_addmv(self):
|
||||
types = {
|
||||
'torch.DoubleTensor': 1e-8,
|
||||
@ -408,6 +436,17 @@ class TestTorch(TestCase):
|
||||
test((10,))
|
||||
test((5, 5))
|
||||
|
||||
def test_all_any_empty(self):
|
||||
x = torch.ByteTensor()
|
||||
self.assertTrue(x.all())
|
||||
self.assertFalse(x.any())
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
|
||||
def test_all_any_empty_cuda(self):
|
||||
x = torch.cuda.ByteTensor()
|
||||
self.assertTrue(x.all())
|
||||
self.assertFalse(x.any())
|
||||
|
||||
def test_mv(self):
|
||||
m1 = torch.randn(100, 100)
|
||||
v1 = torch.randn(100)
|
||||
@ -1111,6 +1150,11 @@ class TestTorch(TestCase):
|
||||
torch.arange(0, 1, out=res2)
|
||||
self.assertEqual(res1, res2, 0)
|
||||
|
||||
# Check arange with only one argument
|
||||
res1 = torch.arange(10)
|
||||
res2 = torch.arange(0, 10)
|
||||
self.assertEqual(res1, res2, 0)
|
||||
|
||||
# Check arange for non-contiguous tensors.
|
||||
x = torch.zeros(2, 3)
|
||||
torch.arange(0, 4, out=x.narrow(1, 1, 2))
|
||||
@ -3643,6 +3687,11 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(tensor.std(), tensor.std(unbiased=True))
|
||||
self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0])
|
||||
|
||||
def test_var_stability(self):
|
||||
tensor = torch.FloatTensor([2281.5, 2281.25])
|
||||
self.assertEqual(tensor.var(0)[0], 0.03125)
|
||||
self.assertEqual(tensor.var(), 0.03125)
|
||||
|
||||
def test_view(self):
|
||||
tensor = torch.rand(15)
|
||||
template = torch.rand(3, 5)
|
||||
@ -3709,7 +3758,7 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(result.size(), target, 'Error in repeat using result')
|
||||
result = tensor.repeat(torchSize)
|
||||
self.assertEqual(result.size(), target, 'Error in repeat using result and LongStorage')
|
||||
self.assertEqual((result.mean(0).view(8, 4) - tensor).abs().max(), 0, 'Error in repeat (not equal)')
|
||||
self.assertEqual(result.mean(0).view(8, 4), tensor, 'Error in repeat (not equal)')
|
||||
|
||||
def test_is_same_size(self):
|
||||
t1 = torch.Tensor(3, 4, 9, 10)
|
||||
@ -4511,6 +4560,19 @@ class TestTorch(TestCase):
|
||||
for i in range(len(x)):
|
||||
self.assertEqual(geq2_x[i], geq2_array[i])
|
||||
|
||||
def test_error_msg_type_translation(self):
|
||||
with self.assertRaisesRegex(
|
||||
RuntimeError,
|
||||
# message includes both torch.DoubleTensor and torch.LongTensor
|
||||
'(?=.*torch\.DoubleTensor)(?=.*torch\.LongTensor)'):
|
||||
|
||||
# Calls model with a DoubleTensor input but LongTensor weights
|
||||
input = torch.autograd.Variable(torch.randn(1, 1, 1, 6).double())
|
||||
weight = torch.zeros(1, 1, 1, 3).long()
|
||||
model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False)
|
||||
model.weight.data = weight
|
||||
out = model(input)
|
||||
|
||||
def test_comparison_ops(self):
|
||||
x = torch.randn(5, 5)
|
||||
y = torch.randn(5, 5)
|
||||
|
@ -386,7 +386,7 @@ class TestONNXUtils(TestCase):
|
||||
sizes = [2, 3, 4]
|
||||
pad = [1, 2, 3, 4]
|
||||
paddings = prepare_onnx_paddings(len(sizes), pad)
|
||||
self.assertEqual(paddings, [0, 0, 3, 4, 1, 2])
|
||||
self.assertEqual(paddings, [0, 3, 1, 0, 4, 2])
|
||||
|
||||
def test_check_onnx_broadcast(self):
|
||||
|
||||
|
@ -13,10 +13,10 @@
|
||||
|
||||
- name: add(Tensor self, Tensor other, *, Scalar alpha=1)
|
||||
self: grad
|
||||
other: grad * alpha
|
||||
other: maybe_multiply(grad, alpha)
|
||||
|
||||
- name: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
|
||||
self: grad * beta
|
||||
self: maybe_multiply(grad, beta)
|
||||
batch1: grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) }).bmm(batch2.transpose(1, 2)) * alpha
|
||||
batch2: batch1.transpose(1, 2).bmm(grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) })) * alpha
|
||||
|
||||
@ -36,12 +36,12 @@
|
||||
mat2: mm_mat2_backward(grad, mat1, mat2.sizes(), mat2.strides(), alpha)
|
||||
|
||||
- name: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1)
|
||||
self: grad * beta
|
||||
self: maybe_multiply(grad, beta)
|
||||
mat: grad.ger(vec) * alpha
|
||||
vec: mat.t().mv(grad) * alpha
|
||||
|
||||
- name: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1)
|
||||
self: grad * beta
|
||||
self: maybe_multiply(grad, beta)
|
||||
vec1: grad.mv(vec2) * alpha
|
||||
vec2: grad.t().mv(vec1) * alpha
|
||||
|
||||
@ -62,7 +62,7 @@
|
||||
other: grad * -self * ((self * self + other * other).reciprocal())
|
||||
|
||||
- name: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
|
||||
self: grad * beta
|
||||
self: maybe_multiply(grad, beta)
|
||||
batch1: grad.bmm(batch2.transpose(1, 2)) * alpha
|
||||
batch2: batch1.transpose(1, 2).bmm(grad) * alpha
|
||||
|
||||
@ -108,8 +108,8 @@
|
||||
self: grad.diag(diagonal)
|
||||
|
||||
- name: dist(Tensor self, Tensor other, Scalar p=2)
|
||||
self: norm_backward(grad, self - other, p)
|
||||
other: -norm_backward(grad, self - other, p)
|
||||
self: norm_backward(grad, self - other, p, result)
|
||||
other: -norm_backward(grad, self - other, p, result)
|
||||
|
||||
- name: div(Tensor self, Scalar other)
|
||||
self: grad / other
|
||||
@ -149,7 +149,8 @@
|
||||
|
||||
- name: eye # fallthrough
|
||||
|
||||
- name: fill(Tensor self, Scalar value) # FIXME
|
||||
- name: fill(Tensor self, Scalar value)
|
||||
self: zeros_like(grad)
|
||||
|
||||
- name: floor(Tensor self)
|
||||
self: zeros_like(grad)
|
||||
@ -217,7 +218,6 @@
|
||||
|
||||
- name: index_select(Tensor self, int64_t dim, Tensor index)
|
||||
self: grad.type().zeros(self.sizes()).index_add_(dim, index, grad)
|
||||
__view__: True
|
||||
|
||||
- name: inverse(Tensor self)
|
||||
self: -at::mm(output.t(), at::mm(grad, output.t()))
|
||||
@ -348,10 +348,10 @@
|
||||
self: zeros_like(grad)
|
||||
|
||||
- name: norm(Tensor self, Scalar p=2)
|
||||
self: norm_backward(grad, self, p)
|
||||
self: norm_backward(grad, self, p, result)
|
||||
|
||||
- name: norm(Tensor self, Scalar p, int64_t dim, bool keepdim=False)
|
||||
self: norm_backward(grad, self, p, dim, keepdim)
|
||||
self: norm_backward(grad, self, p, destination, dim, keepdim)
|
||||
|
||||
- name: numel # fallthrough
|
||||
- name: ones # fallthrough
|
||||
@ -395,7 +395,7 @@
|
||||
self: not_implemented("pstrf")
|
||||
|
||||
- name: put(Tensor self, Tensor index, Tensor source, bool accumulate)
|
||||
self: zeros_like(self).put_(index, source, accumulate)
|
||||
self: grad.clone().put_(index, zeros_like(source), accumulate)
|
||||
source: grad.take(index)
|
||||
|
||||
- name: qr(Tensor self)
|
||||
@ -468,7 +468,7 @@
|
||||
__view__: True
|
||||
|
||||
- name: squeeze(Tensor self, int64_t dim)
|
||||
self: maybe_unsqueeze(grad, dim, self.size(dim) == 1)
|
||||
self: maybe_unsqueeze(grad, dim, self.size(dim) == 1 && self.sizes().size() != 1)
|
||||
__view__: True
|
||||
|
||||
- name: std
|
||||
@ -563,9 +563,9 @@
|
||||
grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad)
|
||||
input: zeros_like(input)
|
||||
|
||||
- name: elu_backward(Tensor grad_output, Tensor input, Scalar alpha, bool inplace, Tensor output)
|
||||
grad_output: elu_backward(grad, input, alpha, inplace, output)
|
||||
input: grad * grad_input * (input < 0).toType(grad.type())
|
||||
- name: elu_backward(Tensor grad_output, Scalar alpha, Tensor output)
|
||||
grad_output: elu_backward(grad, alpha, output)
|
||||
output: grad * grad_output * (output < 0).toType(grad.type())
|
||||
|
||||
- name: glu_backward(Tensor grad_output, Tensor input, int64_t dim)
|
||||
grad_output: glu_double_backward_grad_output(grad, input, dim)
|
||||
@ -575,11 +575,12 @@
|
||||
grad_output: hardshrink_backward(grad, input, lambd)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val, bool inplace)
|
||||
grad_output: hardtanh_backward(grad, input, min_val, max_val, false)
|
||||
- name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val)
|
||||
grad_output: hardtanh_backward(grad, input, min_val, max_val)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: kl_div_backward(Tensor input, Tensor target, bool size_average)
|
||||
- name: kl_div_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
|
||||
grad_output: kl_div_double_backward_grad_output(grad, input, target, size_average, reduce)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
|
||||
@ -594,8 +595,8 @@
|
||||
grad_output: grad - (grad * output.exp()).sum(dim, true)
|
||||
input: log_softmax_double_backward(grad, grad_output, dim, output)
|
||||
|
||||
- name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope, bool inplace)
|
||||
grad_output: leaky_relu_backward(grad, input, negative_slope, false)
|
||||
- name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope)
|
||||
grad_output: leaky_relu_backward(grad, input, negative_slope)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: max_pool2d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
|
||||
@ -623,8 +624,8 @@
|
||||
input: zeros_like(input)
|
||||
weight: zeros_like(weight)
|
||||
|
||||
- name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, bool inplace, Tensor noise)
|
||||
grad_output: rrelu_backward(grad, input, lower, upper, training, false, noise)
|
||||
- name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, Tensor noise)
|
||||
grad_output: rrelu_backward(grad, input, lower, upper, training, noise)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: smooth_l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
|
||||
@ -646,8 +647,8 @@
|
||||
grad_output: softshrink_backward(grad, input, lambd)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value, bool inplace)
|
||||
grad_output: threshold_backward(grad, input, threshold, value, false)
|
||||
- name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value)
|
||||
grad_output: threshold_backward(grad, input, threshold, value)
|
||||
input: zeros_like(grad)
|
||||
|
||||
- name: _sigmoid_backward(Tensor grad_output, Tensor output)
|
||||
|
@ -49,6 +49,16 @@ PY_VARIABLE_METHOD_DEF = CodeTemplate("""\
|
||||
UNPACK_SELF = "auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata;"
|
||||
|
||||
|
||||
# XXX: if you got here because of an assertion failure, it doesn't mean
|
||||
# it's enough to just extend the list here. Before you do this, make sure
|
||||
# to add an appropriate wrap() overload in torch/csrc/autograd/utils/wrap_outputs.h.
|
||||
SUPPORTED_RETURN_TYPES = {
|
||||
'Tensor', 'std::tuple<Tensor,Tensor>',
|
||||
'std::tuple<Tensor,Tensor,Tensor>', 'std::vector<Tensor>',
|
||||
'Scalar', 'bool', 'int64_t', 'void*'
|
||||
}
|
||||
|
||||
|
||||
def create_python_bindings(
|
||||
python_functions, py_methods, py_method_defs, py_method_dispatch,
|
||||
is_class):
|
||||
@ -80,6 +90,9 @@ def create_python_bindings(
|
||||
|
||||
def emit_dispatch(i, function):
|
||||
env = {}
|
||||
simple_return_type = function['return_type'].replace(' &', '')
|
||||
assert simple_return_type in SUPPORTED_RETURN_TYPES, \
|
||||
function['name'] + ' returns unsupported type: ' + simple_return_type
|
||||
|
||||
actuals = []
|
||||
formal_args = []
|
||||
|
@ -39,7 +39,11 @@ return baseType->${method_prefix}${api_name}(${unpacked_args});""")
|
||||
|
||||
METHOD_DEFINITION_FALLTHROUGH_VARIABLE = CodeTemplate("""\
|
||||
${unpack_args}
|
||||
return as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));""")
|
||||
auto flags = compute_flags({ ${args_with_derivatives} });
|
||||
auto var = as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));
|
||||
var.is_volatile() = flags.is_volatile;
|
||||
return var;
|
||||
""")
|
||||
|
||||
METHOD_DEFINITION_FALLTHROUGH_INPLACE = CodeTemplate("""\
|
||||
${unpack_args}
|
||||
@ -67,6 +71,7 @@ FUNCTION_DEFINITION = CodeTemplate("""\
|
||||
variable_list ${op}::apply(const variable_list& grads) {
|
||||
variable_list grad_inputs{${num_inputs}};
|
||||
${body}
|
||||
ensure_no_aten_scalars(grad_inputs);
|
||||
return grad_inputs;
|
||||
}
|
||||
""")
|
||||
@ -682,11 +687,6 @@ def create_variable_type(top_env, aten_declarations):
|
||||
if declaration['return_type'] in FALLTHROUGH_RETURN_TYPES:
|
||||
body.extend(METHOD_DEFINITION_FALLTHROUGH.substitute(combined).split('\n'))
|
||||
return body
|
||||
elif declaration['name'] in FALLTHROUGH_FUNCTIONS:
|
||||
tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace']
|
||||
else METHOD_DEFINITION_FALLTHROUGH_VARIABLE)
|
||||
body.extend(tmpl.substitute(combined).split('\n'))
|
||||
return body
|
||||
|
||||
arguments = declaration['arguments']
|
||||
tensor_args = [arg for arg in arguments if arg['simple_type'] in {'Tensor', 'TensorList'}]
|
||||
@ -752,6 +752,12 @@ def create_variable_type(top_env, aten_declarations):
|
||||
elif is_view:
|
||||
env['version_counter'] = 'take_version_counter(ret, self);'
|
||||
|
||||
if declaration['name'] in FALLTHROUGH_FUNCTIONS:
|
||||
tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace']
|
||||
else METHOD_DEFINITION_FALLTHROUGH_VARIABLE)
|
||||
body.extend(tmpl.substitute(combined).split('\n'))
|
||||
return body
|
||||
|
||||
base_call = BASE_CALL.substitute(combined)
|
||||
if not declaration['inplace']:
|
||||
base_call = 'auto ret = as_variable({})'.format(base_call)
|
||||
|
@ -34,41 +34,44 @@ Tensor maybe_multiply(const Tensor & t, const Scalar & s) {
|
||||
}
|
||||
}
|
||||
|
||||
Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_) {
|
||||
auto p = p_.toDouble();
|
||||
auto norm = self.norm(p_);
|
||||
|
||||
if (norm.toDouble() == 0.0) {
|
||||
// handle case at 0 where we return a subgradient containing 0
|
||||
return zeros_like(self);
|
||||
}
|
||||
|
||||
if (p == 2.0) {
|
||||
return self * (grad / norm);
|
||||
} else {
|
||||
auto pow_ = self.abs().pow(p - 2);
|
||||
auto scale_v = grad / norm.toTensor().pow(p - 1);
|
||||
return self * pow_ * scale_v;
|
||||
// Don't expose ATen scalars to Variable API, because they are not supported yet.
|
||||
void ensure_no_aten_scalars(variable_list &vars) {
|
||||
for (auto& v : vars) {
|
||||
if (v.defined() && v.dim() == 0) {
|
||||
v.data().as_strided_({1}, {1});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, int64_t dim, bool keepdim) {
|
||||
if (!keepdim && self.dim() > 1) {
|
||||
grad = grad.unsqueeze(dim);
|
||||
}
|
||||
auto p = p_.toDouble();
|
||||
auto norm = self.norm(p, dim, true);
|
||||
Tensor grad_input;
|
||||
if (p == 2.0) {
|
||||
grad_input = self * (grad / norm);
|
||||
Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_, const Tensor & norm) {
|
||||
double p = p_.toDouble();
|
||||
Tensor self_scaled;
|
||||
Tensor scale_v;
|
||||
if (p == 0.0) {
|
||||
return zeros_like(self);
|
||||
} else if (p == 1.0) {
|
||||
return self.sign() * grad;
|
||||
} else if (p < 2.0) {
|
||||
self_scaled = self.sign() * self.abs().pow(p - 1);
|
||||
scale_v = grad / norm.pow(p - 1);
|
||||
} else if (p == 2.0) {
|
||||
self_scaled = self;
|
||||
scale_v = grad / norm;
|
||||
} else {
|
||||
auto pow_ = self.abs().pow(p - 2);
|
||||
auto scale_v = grad / norm.pow(p - 1);
|
||||
grad_input = self * pow_ * scale_v;
|
||||
self_scaled = self * self.abs().pow(p - 2);
|
||||
scale_v = grad / norm.pow(p - 1);
|
||||
}
|
||||
// handle case at 0 where we return a subgradient containing 0
|
||||
grad_input.masked_fill_(norm == 0, 0);
|
||||
return grad_input;
|
||||
scale_v.masked_fill_(norm == 0, 0);
|
||||
return self_scaled * scale_v;
|
||||
}
|
||||
|
||||
Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, Tensor norm, int64_t dim, bool keepdim) {
|
||||
if (!keepdim && self.dim() > 1) {
|
||||
grad = grad.unsqueeze(dim);
|
||||
norm = norm.unsqueeze(dim);
|
||||
}
|
||||
return norm_backward(grad, self, p_, norm);
|
||||
}
|
||||
|
||||
Tensor reduce_to(const Tensor & grad, IntList sizes) {
|
||||
@ -300,6 +303,16 @@ Tensor glu_double_backward_grad_output(const Tensor & grad, const Tensor & input
|
||||
return tmp.narrow(dim, 0, sizes[dim]) + tmp.narrow(dim, sizes[dim], sizes[dim]);
|
||||
}
|
||||
|
||||
Tensor kl_div_double_backward_grad_output(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
|
||||
auto result = kl_div_backward(grad, input, target, size_average, false);
|
||||
if (reduce && size_average) {
|
||||
return result.mean().toTensor();
|
||||
} else if (reduce) {
|
||||
return result.sum().toTensor();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor log_sigmoid_double_backward(const Tensor & grad, const Tensor & input) {
|
||||
auto z = input.sigmoid();
|
||||
return grad * (z - 1) * z;
|
||||
|
@ -25,7 +25,7 @@ RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-la
|
||||
/opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH
|
||||
#RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda80
|
||||
RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda90
|
||||
# This must be done before pip so that requirements.txt is available
|
||||
WORKDIR /opt/pytorch
|
||||
COPY . .
|
||||
|
31
tools/pytorch.version
Normal file
31
tools/pytorch.version
Normal file
@ -0,0 +1,31 @@
|
||||
{
|
||||
global:
|
||||
_TH*;
|
||||
__TH*;
|
||||
TH*;
|
||||
*THP*;
|
||||
*THCP*;
|
||||
PyInit*;
|
||||
init*;
|
||||
state;
|
||||
_ZGVZN2at*;
|
||||
_ZN2at*;
|
||||
_ZNK2at*Type*;
|
||||
_ZNK2at*Tensor*;
|
||||
_ZNK2at*Storage*;
|
||||
_ZNK2at*Scalar*;
|
||||
_ZNK2at*CUDA*;
|
||||
*2at7Context*;
|
||||
_ZTIN2at*;
|
||||
_ZTIZN2at*;
|
||||
_ZTSN2at*;
|
||||
_ZTSPN2at*;
|
||||
_ZTSZN2at*;
|
||||
_ZTVN2at*;
|
||||
_ZZN2at*;
|
||||
_Z*torch*;
|
||||
_Z*Tensor*;
|
||||
_Z*tensor*;
|
||||
local:
|
||||
*;
|
||||
};
|
@ -322,10 +322,10 @@ It may be of a different data type or reside on a different device.
|
||||
|
||||
Args:
|
||||
src (Tensor): Source tensor to copy
|
||||
async (bool): If True and this copy is between CPU and GPU, then the copy
|
||||
async (bool): If ``True`` and this copy is between CPU and GPU, then the copy
|
||||
may occur asynchronously with respect to the host. For other
|
||||
copies, this argument has no effect.
|
||||
broadcast (bool): If True, :attr:`src` will be broadcast to the shape of
|
||||
broadcast (bool): If ``True``, :attr:`src` will be broadcast to the shape of
|
||||
the underlying tensor.
|
||||
""")
|
||||
|
||||
|
@ -1244,7 +1244,7 @@ Computes the eigenvalues and eigenvectors of a real square matrix.
|
||||
Args:
|
||||
a (Tensor): A square matrix for which the eigenvalues and eigenvectors will
|
||||
be computed
|
||||
eigenvectors (bool): `True` to compute both eigenvalues and eigenvectors.
|
||||
eigenvectors (bool): ``True`` to compute both eigenvalues and eigenvectors.
|
||||
Otherwise, only eigenvalues will be computed.
|
||||
out (tuple, optional): Output tensors
|
||||
|
||||
@ -1287,7 +1287,7 @@ add_docstr(torch._C.equal,
|
||||
"""
|
||||
equal(tensor1, tensor2) -> bool
|
||||
|
||||
True if two tensors have the same size and elements, False otherwise.
|
||||
``True`` if two tensors have the same size and elements, ``False`` otherwise.
|
||||
|
||||
Example::
|
||||
|
||||
@ -1843,7 +1843,7 @@ If :attr:`dim` is not given, the last dimension of the `input` is chosen.
|
||||
A tuple of `(values, indices)` is returned, where the `indices` is the indices
|
||||
of the kth-smallest element in the original `input` Tensor in dimension `dim`.
|
||||
|
||||
If :attr:`keepdim` is true, both the :attr:`values` and :attr:`indices` Tensors
|
||||
If :attr:`keepdim` is ``True``, both the :attr:`values` and :attr:`indices` Tensors
|
||||
are the same size as :attr:`input`, except in the dimension :attr:`dim` where
|
||||
they are of size 1. Otherwise, :attr:`dim` is squeezed
|
||||
(see :func:`torch.squeeze`), resulting in both the :attr:`values` and
|
||||
@ -2230,7 +2230,7 @@ Returns the maximum value of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`. The second return value is the index location of each
|
||||
maximum value found (argmax).
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensors are of the same size
|
||||
If :attr:`keepdim` is ``True``, the output Tensors are of the same size
|
||||
as :attr:`input` except in the dimension :attr:`dim` where they are of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
|
||||
in the output Tensors having 1 fewer dimension than :attr:`input`.
|
||||
@ -2341,7 +2341,7 @@ Example::
|
||||
Returns the mean value of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensor is of the same size
|
||||
If :attr:`keepdim` is ``True``, the output Tensor is of the same size
|
||||
as :attr:`input` except in the dimension :attr:`dim` where it is of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in the
|
||||
output Tensor having 1 fewer dimension.
|
||||
@ -2411,7 +2411,7 @@ as a `LongTensor`.
|
||||
|
||||
By default, :attr:`dim` is the last dimension of the :attr:`input` Tensor.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensors are of the same size
|
||||
If :attr:`keepdim` is ``True``, the output Tensors are of the same size
|
||||
as :attr:`input` except in the dimension :attr:`dim` where they are of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
|
||||
the outputs Tensor having 1 fewer dimension than :attr:`input`.
|
||||
@ -2486,7 +2486,7 @@ Returns the minimum value of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`. The second return value is the index location of each
|
||||
minimum value found (argmin).
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensors are of the same size as
|
||||
If :attr:`keepdim` is ``True``, the output Tensors are of the same size as
|
||||
:attr:`input` except in the dimension :attr:`dim` where they are of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
|
||||
the output Tensors having 1 fewer dimension than :attr:`input`.
|
||||
@ -2608,7 +2608,7 @@ as a `LongTensor`.
|
||||
|
||||
By default, :attr:`dim` is the last dimension of the :attr:`input` Tensor.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensors are of the same size as
|
||||
If :attr:`keepdim` is ``True``, the output Tensors are of the same size as
|
||||
:attr:`input` except in the dimension :attr:`dim` where they are of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
|
||||
in the output Tensors having 1 fewer dimension than :attr:`input`.
|
||||
@ -2756,7 +2756,7 @@ If :attr:`input` is a vector, :attr:`out` is a vector of size `num_samples`.
|
||||
If :attr:`input` is a matrix with `m` rows, :attr:`out` is an matrix of shape
|
||||
`m \u00D7 n`.
|
||||
|
||||
If replacement is `True`, samples are drawn with replacement.
|
||||
If replacement is ``True``, samples are drawn with replacement.
|
||||
|
||||
If not, they are drawn without replacement, which means that when a
|
||||
sample index is drawn for a row, it cannot be drawn again for that row.
|
||||
@ -2945,7 +2945,7 @@ Example::
|
||||
Returns the p-norm of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensor is of the same size as
|
||||
If :attr:`keepdim` is ``True``, the output Tensor is of the same size as
|
||||
:attr:`input` except in the dimension :attr:`dim` where it is of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
|
||||
in the output Tensor having 1 fewer dimension than :attr:`input`.
|
||||
@ -3156,9 +3156,9 @@ potrf(a, upper, out=None)
|
||||
|
||||
Computes the Cholesky decomposition of a positive semidefinite
|
||||
matrix :attr:`a`: returns matrix `u`
|
||||
If `upper` is True or not provided, `u` is upper triangular
|
||||
If `upper` is ``True`` or not provided, `u` is upper triangular
|
||||
such that :math:`a = u^T u`.
|
||||
If `upper` is False, `u` is lower triangular
|
||||
If `upper` is ``False``, `u` is lower triangular
|
||||
such that :math:`a = u u^T`.
|
||||
|
||||
Args:
|
||||
@ -3201,9 +3201,9 @@ potri(u, upper, out=None)
|
||||
|
||||
Computes the inverse of a positive semidefinite matrix given its
|
||||
Cholesky factor :attr:`u`: returns matrix `inv`
|
||||
If `upper` is True or not provided, `u` is upper triangular
|
||||
If `upper` is ``True`` or not provided, `u` is upper triangular
|
||||
such that :math:`inv = (u^T u)^{-1}`.
|
||||
If `upper` is False, `u` is lower triangular
|
||||
If `upper` is ``False``, `u` is lower triangular
|
||||
such that :math:`inv = (u u^T)^{-1}`.
|
||||
|
||||
Args:
|
||||
@ -3248,9 +3248,9 @@ potrs(b, u, upper, out=None)
|
||||
Solves a linear system of equations with a positive semidefinite
|
||||
matrix to be inverted given its given a Cholesky factor
|
||||
matrix :attr:`u`: returns matrix `c`
|
||||
If `upper` is True or not provided, `u` is and upper triangular
|
||||
If `upper` is ``True`` or not provided, `u` is and upper triangular
|
||||
such that :math:`c = (u^T u)^{-1} b`.
|
||||
If `upper` is False, `u` is and lower triangular
|
||||
If `upper` is ``False``, `u` is and lower triangular
|
||||
such that :math:`c = (u u^T)^{-1} b`.
|
||||
|
||||
.. note:: `b` is always a 2D `Tensor`, use `b.unsqueeze(1)` to convert a vector.
|
||||
@ -3424,7 +3424,7 @@ Example::
|
||||
Returns the product of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensor is of the same size as
|
||||
If :attr:`keepdim` is ``True``, the output Tensor is of the same size as
|
||||
:attr:`input` except in the dimension :attr:`dim` where it is of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
|
||||
in the output Tensor having 1 fewer dimension than :attr:`input`.
|
||||
@ -3463,9 +3463,9 @@ pstrf(a, upper, out=None)
|
||||
|
||||
Computes the pivoted Cholesky decomposition of a positive semidefinite
|
||||
matrix :attr:`a`: returns matrices `u` and `piv`.
|
||||
If `upper` is True or not provided, `u` is and upper triangular
|
||||
If `upper` is ``True`` or not provided, `u` is and upper triangular
|
||||
such that :math:`a = p^T u^T u p`, with `p` the permutation given by `piv`.
|
||||
If `upper` is False, `u` is and lower triangular
|
||||
If `upper` is ``False``, `u` is and lower triangular
|
||||
such that :math:`a = p^T u u^T p`.
|
||||
|
||||
Args:
|
||||
@ -3691,7 +3691,7 @@ Example::
|
||||
|
||||
add_docstr(torch._C.arange,
|
||||
"""
|
||||
arange(start, end, step=1, out=None) -> Tensor
|
||||
arange(start=0, end, step=1, out=None) -> Tensor
|
||||
|
||||
Returns a 1D Tensor of size :math:`floor((end - start) / step)` with values
|
||||
from the interval ``[start, end)`` taken with step :attr:`step` starting
|
||||
@ -3705,6 +3705,15 @@ Args:
|
||||
|
||||
Example::
|
||||
|
||||
>>> torch.arange(5)
|
||||
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
[torch.FloatTensor of size 5]
|
||||
|
||||
>>> torch.arange(1, 4)
|
||||
|
||||
1
|
||||
@ -3989,7 +3998,7 @@ in ascending order by value.
|
||||
|
||||
If :attr:`dim` is not given, the last dimension of the `input` is chosen.
|
||||
|
||||
If :attr:`descending` is `True` then the elements are sorted in descending
|
||||
If :attr:`descending` is ``True`` then the elements are sorted in descending
|
||||
order by value.
|
||||
|
||||
A tuple of (sorted_tensor, sorted_indices) is returned, where the
|
||||
@ -4117,7 +4126,7 @@ add_docstr(torch._C.std,
|
||||
|
||||
Returns the standard-deviation of all elements in the :attr:`input` Tensor.
|
||||
|
||||
If :attr:`unbiased` is false, then the standard-deviation will be calculated via
|
||||
If :attr:`unbiased` is ``False``, then the standard-deviation will be calculated via
|
||||
the biased estimator. Otherwise, Bessel's correction will be used.
|
||||
|
||||
Args:
|
||||
@ -4141,12 +4150,12 @@ Example::
|
||||
Returns the standard-deviation of each row of the :attr:`input` Tensor in the
|
||||
given dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensor is of the same size as
|
||||
If :attr:`keepdim` is ``True``, the output Tensor is of the same size as
|
||||
:attr:`input` except in the dimension :attr:`dim` where it is of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting
|
||||
in the output Tensor having 1 fewer dimension than :attr:`input`.
|
||||
|
||||
If :attr:`unbiased` is false, then the standard-deviation will be calculated via
|
||||
If :attr:`unbiased` is ``False``, then the standard-deviation will be calculated via
|
||||
the biased estimator. Otherwise, Bessel's correction will be used.
|
||||
|
||||
Args:
|
||||
@ -4203,7 +4212,7 @@ Example::
|
||||
Returns the sum of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensor is of the same size
|
||||
If :attr:`keepdim` is ``True``, the output Tensor is of the same size
|
||||
as :attr:`input` except in the dimension :attr:`dim` where it is of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
|
||||
the output Tensor having 1 fewer dimension than :attr:`input`.
|
||||
@ -4325,13 +4334,13 @@ such that `input = V diag(e) V'`
|
||||
The boolean argument :attr:`eigenvectors` defines computation of
|
||||
eigenvectors or eigenvalues only.
|
||||
|
||||
If it is `False`, only eigenvalues are computed. If it is `True`,
|
||||
If it is ``False``, only eigenvalues are computed. If it is ``True``,
|
||||
both eigenvalues and eigenvectors are computed.
|
||||
|
||||
Since the input matrix `input` is supposed to be symmetric,
|
||||
only the upper triangular portion is used by default.
|
||||
|
||||
If :attr:`upper` is `False`, then lower triangular portion is used.
|
||||
If :attr:`upper` is ``False``, then lower triangular portion is used.
|
||||
|
||||
Note: Irrespective of the original strides, the returned matrix `V` will
|
||||
be transposed, i.e. with strides `(1, m)` instead of `(m, 1)`.
|
||||
@ -4493,12 +4502,12 @@ a given dimension.
|
||||
|
||||
If :attr:`dim` is not given, the last dimension of the `input` is chosen.
|
||||
|
||||
If :attr:`largest` is `False` then the `k` smallest elements are returned.
|
||||
If :attr:`largest` is ``False`` then the `k` smallest elements are returned.
|
||||
|
||||
A tuple of `(values, indices)` is returned, where the `indices` are the indices
|
||||
of the elements in the original `input` Tensor.
|
||||
|
||||
The boolean option :attr:`sorted` if `True`, will make sure that the returned
|
||||
The boolean option :attr:`sorted` if ``True``, will make sure that the returned
|
||||
`k` elements are themselves sorted
|
||||
|
||||
Args:
|
||||
@ -4787,7 +4796,7 @@ add_docstr(torch._C.var,
|
||||
|
||||
Returns the variance of all elements in the :attr:`input` Tensor.
|
||||
|
||||
If :attr:`unbiased` is false, then the variance will be calculated via the
|
||||
If :attr:`unbiased` is ``False``, then the variance will be calculated via the
|
||||
biased estimator. Otherwise, Bessel's correction will be used.
|
||||
|
||||
Args:
|
||||
@ -4811,12 +4820,12 @@ Example::
|
||||
Returns the variance of each row of the :attr:`input` Tensor in the given
|
||||
dimension :attr:`dim`.
|
||||
|
||||
If :attr:`keepdim` is true, the output Tensors are of the same size
|
||||
If :attr:`keepdim` is ``True``, the output Tensors are of the same size
|
||||
as :attr:`input` except in the dimension :attr:`dim` where they are of size 1.
|
||||
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in
|
||||
the outputs Tensor having 1 fewer dimension than :attr:`input`.
|
||||
|
||||
If :attr:`unbiased` is false, then the variance will be calculated via the
|
||||
If :attr:`unbiased` is ``False``, then the variance will be calculated via the
|
||||
biased estimator. Otherwise, Bessel's correction will be used.
|
||||
|
||||
Args:
|
||||
|
@ -12,7 +12,7 @@ def _type(self, new_type=None, async=False):
|
||||
|
||||
Args:
|
||||
new_type (type or string): The desired type
|
||||
async (bool): If True, and the source is in pinned memory and
|
||||
async (bool): If ``True``, and the source is in pinned memory and
|
||||
destination is on the GPU or vice versa, the copy is
|
||||
performed asynchronously with respect to the host.
|
||||
Otherwise, the argument has no effect.
|
||||
@ -46,7 +46,7 @@ def _cuda(self, device=None, async=False):
|
||||
|
||||
Args:
|
||||
device (int): The destination GPU id. Defaults to the current device.
|
||||
async (bool): If True and the source is in pinned memory, the copy will
|
||||
async (bool): If ``True`` and the source is in pinned memory, the copy will
|
||||
be asynchronous with respect to the host. Otherwise, the
|
||||
argument has no effect.
|
||||
"""
|
||||
|
@ -63,16 +63,16 @@ def backward(variables, grad_variables=None, retain_graph=None, create_graph=Non
|
||||
grad_variables (sequence of (Tensor, Variable or None)): Gradients w.r.t.
|
||||
each element of corresponding variables. Any tensors will be
|
||||
automatically converted to Variables that are volatile unless
|
||||
``create_graph`` is True. None values can be specified for scalar
|
||||
``create_graph`` is ``True``. None values can be specified for scalar
|
||||
Variables or ones that don't require grad. If a None value would
|
||||
be acceptable for all grad_variables, then this argument is optional.
|
||||
retain_graph (bool, optional): If False, the graph used to compute the grad
|
||||
will be freed. Note that in nearly all cases setting this option to True
|
||||
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
|
||||
will be freed. Note that in nearly all cases setting this option to ``True``
|
||||
is not needed and often can be worked around in a much more efficient
|
||||
way. Defaults to the value of ``create_graph``.
|
||||
create_graph (bool, optional): If true, graph of the derivative will
|
||||
create_graph (bool, optional): If ``True``, graph of the derivative will
|
||||
be constructed, allowing to compute higher order derivative products.
|
||||
Defaults to False, unless ``grad_variables`` contains at least one
|
||||
Defaults to ``False``, unless ``grad_variables`` contains at least one
|
||||
non-volatile Variable.
|
||||
"""
|
||||
variables = (variables,) if isinstance(variables, Variable) else tuple(variables)
|
||||
@ -109,8 +109,8 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non
|
||||
Gradients can be given as Tensors when one doesn't need the graph of the
|
||||
derivative, or as Variables, in which case the graph will be created.
|
||||
|
||||
If ``only_inputs`` is True, the function will only return a list of gradients
|
||||
w.r.t the specified inputs. If it's False, then gradient w.r.t. all remaining
|
||||
If ``only_inputs`` is ``True``, the function will only return a list of gradients
|
||||
w.r.t the specified inputs. If it's ``False``, then gradient w.r.t. all remaining
|
||||
leaves will still be computed, and will be accumulated into their ``.grad``
|
||||
attribute.
|
||||
|
||||
@ -120,24 +120,24 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non
|
||||
returned (and not accumulated into ``.grad``).
|
||||
grad_outputs (sequence of Tensor or Variable): Gradients w.r.t. each output.
|
||||
Any tensors will be automatically converted to Variables that are
|
||||
volatile unless ``create_graph`` is True. None values can be
|
||||
volatile unless ``create_graph`` is ``True``. None values can be
|
||||
specified for scalar Variables or ones that don't require grad.
|
||||
If a None value would be acceptable for all grad_variables, then
|
||||
this argument is optional.
|
||||
retain_graph (bool, optional): If False, the graph used to compute the grad
|
||||
will be freed. Note that in nearly all cases setting this option to True
|
||||
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
|
||||
will be freed. Note that in nearly all cases setting this option to ``True``
|
||||
is not needed and often can be worked around in a much more efficient
|
||||
way. Defaults to the value of ``create_graph``.
|
||||
create_graph (bool, optional): If True, graph of the derivative will
|
||||
create_graph (bool, optional): If ``True``, graph of the derivative will
|
||||
be constructed, allowing to compute higher order derivative products.
|
||||
Defaults to False, unless ``grad_variables`` contains at least one
|
||||
Defaults to ``False``, unless ``grad_variables`` contains at least one
|
||||
non-volatile Variable.
|
||||
only_inputs (bool, optional): If True, gradient w.r.t. leaves that are
|
||||
only_inputs (bool, optional): If ``True``, gradient w.r.t. leaves that are
|
||||
part of the graph, but don't appear in ``inputs`` won't be computed
|
||||
and accumulated. Defaults to True.
|
||||
allow_unused (bool, optional): If False, specifying inputs that were not
|
||||
and accumulated. Defaults to ``True``.
|
||||
allow_unused (bool, optional): If ``False``, specifying inputs that were not
|
||||
used when computing outputs (and therefore their grad is always zero)
|
||||
is an error. Default: False.
|
||||
is an error. Defaults to ``False``.
|
||||
"""
|
||||
|
||||
outputs = (outputs,) if isinstance(outputs, Variable) else tuple(outputs)
|
||||
|
@ -2,7 +2,7 @@ import torch
|
||||
from ..function import Function
|
||||
|
||||
|
||||
class Multinomial(Function):
|
||||
class Categorical(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, probs, num_samples, with_replacement):
|
||||
samples = probs.multinomial(num_samples, with_replacement)
|
||||
|
@ -57,15 +57,14 @@ def maybe_unexpand_or_view(variable, old_size):
|
||||
# The order is dim_n_begin, dim_n_end, dim_n-1_begin, dim_n-1_end, ...
|
||||
def prepare_onnx_paddings(dim, pad):
|
||||
assert isinstance(dim, int)
|
||||
# The order of paddings is dim_0_begin, dim_0_end, dim_1_begin, ... , dim_n_end.
|
||||
# The desired order of paddings is
|
||||
# dim_0_begin, dim_1_begin, ... , dim_0_end, ..., dim_n_end.
|
||||
# n is the dimension of input.
|
||||
assert len(pad) <= dim * 2
|
||||
paddings = []
|
||||
# pad is guaranteed to have even elements.
|
||||
for i, j in zip(pad[0::2], pad[1::2]):
|
||||
paddings = [i, j] + paddings
|
||||
while len(paddings) < 2 * dim:
|
||||
paddings = [0, 0] + paddings
|
||||
# assume zero-dimensions in the beginning
|
||||
paddings = list(pad[:]) + [0] * (dim * 2 - len(pad))
|
||||
# reverse order and collate first beginnings and then ends
|
||||
paddings = paddings[-2::-2] + paddings[-1::-2]
|
||||
assert len(paddings) == dim * 2
|
||||
return paddings
|
||||
|
||||
|
@ -203,7 +203,7 @@ def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True
|
||||
return True
|
||||
|
||||
|
||||
def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3):
|
||||
def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3):
|
||||
"""Check gradients of gradients computed via small finite differences
|
||||
against analytical gradients
|
||||
This function checks that backpropagating through the gradients computed
|
||||
@ -216,17 +216,27 @@ def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3):
|
||||
is true for all elements of analytical gradient a and numerical gradient n.
|
||||
|
||||
Args:
|
||||
func: Python function that takes Variable inputs and returns
|
||||
func (function): Python function that takes Variable inputs and returns
|
||||
a tuple of Variables
|
||||
inputs: tuple of Variables
|
||||
grad_outputs: tuple of Variables
|
||||
eps: perturbation for finite differences
|
||||
atol: absolute tolerance
|
||||
rtol: relative tolerance
|
||||
inputs (tuple of Variable): inputs to the function
|
||||
grad_outputs (tuple of Variable, optional): The gradients with respect to
|
||||
the function's outputs.
|
||||
eps (float, optional): perturbation for finite differences
|
||||
atol (float, optional): absolute tolerance
|
||||
rtol (float, optional): relative tolerance
|
||||
|
||||
Returns:
|
||||
True if all differences satisfy allclose condition
|
||||
True if all differences satisfy allclose condition. Raises an exception
|
||||
otherwise.
|
||||
"""
|
||||
if grad_outputs is None:
|
||||
# If grad_outputs is not specified, create random variables of the same
|
||||
# shape, type, and device as the outputs
|
||||
def randn_like(x):
|
||||
return Variable(x.data.new(x.size()).normal_(), requires_grad=True)
|
||||
outputs = _as_tuple(func(*inputs))
|
||||
grad_outputs = [randn_like(x) for x in outputs]
|
||||
|
||||
def new_func(*input_args):
|
||||
input_args = input_args[:-len(grad_outputs)]
|
||||
outputs = _differentiable_outputs(func(*input_args))
|
||||
|
@ -17,6 +17,17 @@ class EventList(list):
|
||||
return self.table()
|
||||
|
||||
def table(self, sort_by=None):
|
||||
"""Prints an EventList as a nicely formatted table.
|
||||
|
||||
Arguments:
|
||||
sort_by (str, optional): Attribute used to sort entries. By default
|
||||
they are printed in the same order as they were registered.
|
||||
Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``,
|
||||
``cuda_time_total``, ``count``.
|
||||
|
||||
Returns:
|
||||
A string containing the table.
|
||||
"""
|
||||
return build_table(self, sort_by)
|
||||
|
||||
def export_chrome_trace(self, path):
|
||||
@ -72,7 +83,7 @@ class profile(object):
|
||||
|
||||
Arguments:
|
||||
enabled (bool, optional): Setting this to False makes this context manager a no-op.
|
||||
Default: True.
|
||||
Default: ``True``.
|
||||
|
||||
.. warning:
|
||||
This context managers should not be called recursively, i.e. at most one
|
||||
@ -131,6 +142,12 @@ class profile(object):
|
||||
return '<unfinished torch.autograd.profile>'
|
||||
return str(self.function_events)
|
||||
|
||||
def table(self, sort_by=None):
|
||||
if self.function_events is None:
|
||||
raise RuntimeError("can't export a trace that didn't finish running")
|
||||
return self.function_events.table(sort_by)
|
||||
table.__doc__ = EventList.table.__doc__
|
||||
|
||||
def export_chrome_trace(self, path):
|
||||
if self.function_events is None:
|
||||
raise RuntimeError("can't export a trace that didn't finish running")
|
||||
@ -153,18 +170,24 @@ class profile(object):
|
||||
class emit_nvtx(object):
|
||||
"""Context manager that makes every autograd operation emit an NVTX range.
|
||||
|
||||
It is useful when running the program under nvprof. Unfortunately, there's no
|
||||
way to force nvprof to flush the data it collected to disk, so for CUDA profiling
|
||||
one has to use this context manager to annotate nvprof traces, and then use
|
||||
:func:`torch.autograd.profiler.open_nvtx` to analyze the checkpoint.
|
||||
It is useful when running the program under nvprof::
|
||||
|
||||
nvprof --profile-from-start off -o trace_name.prof -- <regular command here>
|
||||
|
||||
Unfortunately, there's no way to force nvprof to flush the data it collected
|
||||
to disk, so for CUDA profiling one has to use this context manager to annotate
|
||||
nvprof traces and wait for the process to exit before inspecting them.
|
||||
Then, either NVIDIA Visual Profiler (nvvp) can be used to visualize the timeline, or
|
||||
:func:`torch.autograd.profiler.load_nvprof` can load the results for inspection
|
||||
e.g. in Python REPL.
|
||||
|
||||
.. warning:
|
||||
This context managers should not be called recursively, i.e. at most one
|
||||
This context manager should not be called recursively, i.e. at most one
|
||||
instance should be enabled at any given time.
|
||||
|
||||
Arguments:
|
||||
enabled (bool, optional): Setting this to False makes this context manager a no-op.
|
||||
Default: True.
|
||||
Default: ``True``.
|
||||
|
||||
Example:
|
||||
>>> with torch.cuda.profiler.profile():
|
||||
@ -291,7 +314,7 @@ def demangle(name):
|
||||
try:
|
||||
with open(os.devnull, 'w') as devnull:
|
||||
return subprocess.check_output(['c++filt', '-n', name], stderr=devnull).rstrip().decode("ascii")
|
||||
except subprocess.CalledProcessError:
|
||||
except (subprocess.CalledProcessError, OSError, FileNotFoundError) as e:
|
||||
return name
|
||||
|
||||
|
||||
|
@ -154,14 +154,14 @@ class Variable(_C._VariableBase):
|
||||
None values can be specified for scalar Variables or ones that
|
||||
don't require grad. If a None value would be acceptable then
|
||||
this argument is optional.
|
||||
retain_graph (bool, optional): If False, the graph used to compute
|
||||
retain_graph (bool, optional): If ``False``, the graph used to compute
|
||||
the grads will be freed. Note that in nearly all cases setting
|
||||
this option to True is not needed and often can be worked around
|
||||
in a much more efficient way. Defaults to the value of
|
||||
``create_graph``.
|
||||
create_graph (bool, optional): If true, graph of the derivative will
|
||||
create_graph (bool, optional): If ``True``, graph of the derivative will
|
||||
be constructed, allowing to compute higher order derivative
|
||||
products. Defaults to False, unless ``gradient`` is a volatile
|
||||
products. Defaults to ``False``, unless ``gradient`` is a volatile
|
||||
Variable.
|
||||
"""
|
||||
torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
|
||||
@ -205,20 +205,31 @@ class Variable(_C._VariableBase):
|
||||
return handle
|
||||
|
||||
def reinforce(self, reward):
|
||||
"""Registers a reward obtained as a result of a stochastic process.
|
||||
def trim(str):
|
||||
return '\n'.join([line.strip() for line in str.split('\n')])
|
||||
|
||||
Differentiating stochastic nodes requires providing them with reward
|
||||
value. If your graph contains any stochastic operations, you should
|
||||
call this function on their outputs. Otherwise an error will be raised.
|
||||
raise RuntimeError(trim(r"""reinforce() was removed.
|
||||
Use torch.distributions instead.
|
||||
See http://pytorch.org/docs/master/distributions.html
|
||||
|
||||
Parameters:
|
||||
reward(Tensor): Tensor with per-element rewards. It has to match
|
||||
the device location and shape of Variable's data.
|
||||
"""
|
||||
if not isinstance(self.grad_fn, StochasticFunction):
|
||||
raise RuntimeError("reinforce() can be only called on outputs "
|
||||
"of stochastic functions")
|
||||
self.grad_fn._reinforce(reward)
|
||||
Instead of:
|
||||
|
||||
probs = policy_network(state)
|
||||
action = probs.multinomial()
|
||||
next_state, reward = env.step(action)
|
||||
action.reinforce(reward)
|
||||
action.backward()
|
||||
|
||||
Use:
|
||||
|
||||
probs = policy_network(state)
|
||||
# NOTE: categorical is equivalent to what used to be called multinomial
|
||||
m = torch.distributions.Categorical(probs)
|
||||
action = m.sample()
|
||||
next_state, reward = env.step(action)
|
||||
loss = -m.log_prob(action) * reward
|
||||
loss.backward()
|
||||
"""))
|
||||
|
||||
def detach(self):
|
||||
"""Returns a new Variable, detached from the current graph.
|
||||
@ -422,7 +433,7 @@ class Variable(_C._VariableBase):
|
||||
return self.expand(tensor.size())
|
||||
|
||||
def multinomial(self, num_samples=1, replacement=False):
|
||||
return Multinomial.apply(self, num_samples, replacement)
|
||||
return Categorical.apply(self, num_samples, replacement)
|
||||
|
||||
def bernoulli(self):
|
||||
return Bernoulli.apply(self)
|
||||
|
@ -257,10 +257,11 @@ class RNNDescriptor(object):
|
||||
CUDNN_RNN_ALGO_STANDARD,
|
||||
datatype
|
||||
))
|
||||
if version() >= 7000 and int(cuda[0]) >= 9:
|
||||
lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH)
|
||||
if datatype == CUDNN_DATA_HALF:
|
||||
lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH)
|
||||
if version() >= 7000 and int(cuda[0]) >= 9 and (
|
||||
torch.cuda.get_device_capability(torch.cuda.current_device())[0] >= 7):
|
||||
lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH)
|
||||
if datatype == CUDNN_DATA_HALF:
|
||||
lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH)
|
||||
else:
|
||||
check_error(lib.cudnnSetRNNDescriptor(
|
||||
self,
|
||||
|
@ -1,5 +1,8 @@
|
||||
#include <Python.h>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "THP.h"
|
||||
|
||||
PyObject *THPException_FatalError;
|
||||
@ -11,3 +14,61 @@ bool THPException_init(PyObject *module)
|
||||
ASSERT_TRUE(PyModule_AddObject(module, "FatalError", THPException_FatalError) == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace torch {
|
||||
|
||||
void replaceAll(std::string & str,
|
||||
const std::string & old_str,
|
||||
const std::string & new_str) {
|
||||
std::string::size_type pos = 0u;
|
||||
while ((pos = str.find(old_str, pos)) != std::string::npos){
|
||||
str.replace(pos, old_str.length(), new_str);
|
||||
}
|
||||
}
|
||||
|
||||
std::string processErrorMsg(std::string str) {
|
||||
|
||||
// Translate Aten types to their respective pytorch ones
|
||||
std::vector<std::pair<std::string, std::string>> changes {
|
||||
{"SparseCUDAByteType", "torch.cuda.sparse.ByteTensor"},
|
||||
{"SparseCUDACharType", "torch.cuda.sparse.CharTensor"},
|
||||
{"SparseCUDADoubleType", "torch.cuda.sparse.DoubleTensor"},
|
||||
{"SparseCUDAFloatType", "torch.cuda.sparse.FloatTensor"},
|
||||
{"SparseCUDAIntType", "torch.cuda.sparse.IntTensor"},
|
||||
{"SparseCUDALongType", "torch.cuda.sparse.LongTensor"},
|
||||
{"SparseCUDAShortType", "torch.cuda.sparse.ShortTensor"},
|
||||
{"SparseCUDAHalfType", "torch.cuda.sparse.HalfTensor"},
|
||||
{"SparseCPUByteType", "torch.sparse.ByteTensor"},
|
||||
{"SparseCPUCharType", "torch.sparse.CharTensor"},
|
||||
{"SparseCPUDoubleType", "torch.sparse.DoubleTensor"},
|
||||
{"SparseCPUFloatType", "torch.sparse.FloatTensor"},
|
||||
{"SparseCPUIntType", "torch.sparse.IntTensor"},
|
||||
{"SparseCPULongType", "torch.sparse.LongTensor"},
|
||||
{"SparseCPUShortType", "torch.sparse.ShortTensor"},
|
||||
{"SparseCPUHalfType", "torch.sparse.HalfTensor"},
|
||||
{"CUDAByteType", "torch.cuda.ByteTensor"},
|
||||
{"CUDACharType", "torch.cuda.CharTensor"},
|
||||
{"CUDADoubleType", "torch.cuda.DoubleTensor"},
|
||||
{"CUDAFloatType", "torch.cuda.FloatTensor"},
|
||||
{"CUDAIntType", "torch.cuda.IntTensor"},
|
||||
{"CUDALongType", "torch.cuda.LongTensor"},
|
||||
{"CUDAShortType", "torch.cuda.ShortTensor"},
|
||||
{"CUDAHalfType", "torch.cuda.HalfTensor"},
|
||||
{"CPUByteType", "torch.ByteTensor"},
|
||||
{"CPUCharType", "torch.CharTensor"},
|
||||
{"CPUDoubleType", "torch.DoubleTensor"},
|
||||
{"CPUFloatType", "torch.FloatTensor"},
|
||||
{"CPUIntType", "torch.IntTensor"},
|
||||
{"CPULongType", "torch.LongTensor"},
|
||||
{"CPUShortType", "torch.ShortTensor"},
|
||||
{"CPUHalfType", "torch.HalfTensor"},
|
||||
};
|
||||
|
||||
for (const auto & it : changes) {
|
||||
replaceAll(str, it.first, it.second);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,8 @@
|
||||
} catch (python_error &e) { \
|
||||
return retval; \
|
||||
} catch (std::exception &e) { \
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what()); \
|
||||
auto msg = torch::processErrorMsg(e.what()); \
|
||||
PyErr_SetString(PyExc_RuntimeError, msg.c_str()); \
|
||||
return retval; \
|
||||
}
|
||||
|
||||
@ -68,4 +69,8 @@ struct python_error : public std::exception {
|
||||
bool THPException_init(PyObject *module);
|
||||
#endif
|
||||
|
||||
namespace torch {
|
||||
std::string processErrorMsg(std::string str);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,5 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
|
@ -164,7 +164,7 @@ auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_lis
|
||||
// Add saved variables used out of the pure autograd to inputs
|
||||
variable_list all_inputs(grad_outputs);
|
||||
all_inputs.push_back(input_var);
|
||||
if (weight.get()) {
|
||||
if (weight.defined()) {
|
||||
all_inputs.push_back(weight_var);
|
||||
}
|
||||
auto outputs = as_tensor_list(std::move(grad_input),
|
||||
|
@ -365,7 +365,7 @@ auto ConvForward::apply(const variable_list& inputs) -> variable_list {
|
||||
// For Convolution strategies that don't implicitly handle grad_bias, we add a helper
|
||||
// function here to perform it using simple Tensor operators
|
||||
static at::Tensor compute_grad_bias(const at::Tensor& grad_output) {
|
||||
// grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches
|
||||
// grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches
|
||||
return grad_output.contiguous().view({grad_output.size(0), grad_output.size(1), -1}).sum(0).sum(1);
|
||||
}
|
||||
|
||||
@ -727,7 +727,18 @@ auto ConvBackwardBackward::apply(const variable_list& grad_grad_inputs) -> varia
|
||||
gI = apply_fn<Transpose>(0, 1)(gIt);
|
||||
}
|
||||
}
|
||||
return {ggO, gI, gW};
|
||||
|
||||
if (should_compute_output(0) && !ggO.defined()) ggO = at::zeros_like(gO);
|
||||
if (should_compute_output(1) && !gI.defined()) gI = at::zeros_like(input);
|
||||
if (should_compute_output(2) && !gW.defined()) gW = at::zeros_like(weight);
|
||||
bool is_volatile = std::any_of(grad_grad_inputs.begin(), grad_grad_inputs.end(), [](const Variable& v){
|
||||
return v.defined() && v.is_volatile();
|
||||
});
|
||||
auto results = variable_list({ggO, gI, gW});
|
||||
for (auto& result : results) {
|
||||
result.is_volatile() |= is_volatile;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
auto ConvBackwardBackward::releaseVariables() -> void {
|
||||
|
@ -9,7 +9,7 @@ namespace autograd {
|
||||
jit::node_list BatchNormForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) {
|
||||
auto & g = ctx->graph;
|
||||
// X, Scale, Bias
|
||||
auto bn = g->appendNode(g->create(jit::kSpatialBN,{inputs.at(0),inputs.at(1),inputs.at(2)}));
|
||||
auto bn = g->appendNode(g->create(jit::kBatchNormalization, {inputs.at(0),inputs.at(1),inputs.at(2)}));
|
||||
bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_mean));
|
||||
bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_var));
|
||||
bn->i_(jit::kis_test, !this->training);
|
||||
|
@ -18,7 +18,7 @@ namespace torch { namespace autograd {
|
||||
jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) {
|
||||
auto & g = ctx->graph;
|
||||
// See Note [Caffe2ConvTranspose]
|
||||
auto n = g->create(!transposed ? jit::kConv : jit::kCaffe2ConvTranspose,
|
||||
auto n = g->create(!transposed ? jit::kConv : jit::kConvTranspose,
|
||||
{inputs.at(0), inputs.at(1)});
|
||||
|
||||
// Irritatingly, Caffe2 requires us to specify kernels,
|
||||
@ -55,6 +55,8 @@ jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs
|
||||
n->i_(jit::kgroup,groups);
|
||||
|
||||
// Not in ONNX?
|
||||
// TODO: implement it once ConvTranspose in ONNX gets `adj` argument instead
|
||||
// of providing `output_shape`
|
||||
for (int p : output_padding) {
|
||||
JIT_EXPECTM(p == 0, "output padding is not supported.");
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "torch/csrc/autograd/input_buffer.h"
|
||||
|
||||
#include "torch/csrc/assertions.h"
|
||||
#include "torch/csrc/autograd/functions/basic_ops.h"
|
||||
#include "torch/csrc/utils/auto_gpu.h"
|
||||
|
||||
@ -10,6 +11,7 @@ InputBuffer::InputBuffer(size_t size)
|
||||
{}
|
||||
|
||||
void InputBuffer::add(size_t pos, Variable var) {
|
||||
TORCH_ASSERT(pos >= 0 && pos < buffer.size());
|
||||
if (!var.defined()) {
|
||||
return;
|
||||
}
|
||||
|
@ -43,6 +43,10 @@ PyObject * THPVariable_Wrap(Variable var)
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
if (var.dim() == 0) {
|
||||
throw std::runtime_error("Variable API does not support Scalars");
|
||||
}
|
||||
|
||||
if (auto obj = var.get()->pyobj) {
|
||||
Py_INCREF(obj);
|
||||
return obj;
|
||||
|
@ -13,6 +13,10 @@
|
||||
namespace torch { namespace autograd { namespace utils {
|
||||
|
||||
inline PyObject* wrap(at::Tensor tensor) {
|
||||
if (tensor.defined() && tensor.dim() == 0) {
|
||||
// don't expose 0-dim tensors to Variable API.
|
||||
Variable(tensor).data().as_strided_({1}, {1});
|
||||
}
|
||||
return THPVariable_Wrap(Variable(std::move(tensor)));
|
||||
}
|
||||
|
||||
@ -54,6 +58,10 @@ inline PyObject* wrap(int64_t value) {
|
||||
return THPUtils_packInt64(value);
|
||||
}
|
||||
|
||||
inline PyObject* wrap(void* value) {
|
||||
return THPUtils_packInt64(reinterpret_cast<intptr_t>(value));
|
||||
}
|
||||
|
||||
inline PyObject* wrap(at::Scalar scalar) {
|
||||
return wrap(scalar.toTensor());
|
||||
}
|
||||
|
@ -133,6 +133,18 @@ PyObject * THCPModule_getDeviceName_wrap(PyObject *self, PyObject *arg)
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject * THCPModule_getDeviceCapability_wrap(PyObject *self, PyObject *arg)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
THPUtils_assert(THPUtils_checkLong(arg), "invalid argument to getDeviceCapability");
|
||||
long device = THPUtils_unpackLong(arg);
|
||||
|
||||
cudaDeviceProp prop;
|
||||
THCudaCheck(cudaGetDeviceProperties(&prop, device));
|
||||
return Py_BuildValue("(ii)", prop.major, prop.minor);
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject * THCPModule_getCurrentStream_wrap(PyObject *self)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
@ -174,6 +186,11 @@ PyObject * THCPModule_getDriverVersion(PyObject *self)
|
||||
return PyLong_FromLong((long) driverVersion);
|
||||
}
|
||||
|
||||
PyObject * THCPModule_getCompiledVersion(PyObject *self)
|
||||
{
|
||||
return PyLong_FromLong((long) CUDA_VERSION);
|
||||
}
|
||||
|
||||
PyObject * THCPModule_getRNGState(PyObject *_unused)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
@ -297,6 +314,15 @@ PyObject * THCPModule_cudaUnlockMutex(PyObject *module)
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject * THCPModule_emptyCache(PyObject *_unused)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
auto device_allocator = THCState_getDeviceAllocator(state);
|
||||
THCudaCheck(device_allocator->emptyCache(device_allocator->state));
|
||||
END_HANDLE_TH_ERRORS
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Cuda module initialization
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -369,13 +395,16 @@ static struct PyMethodDef _THCPModule_methods[] = {
|
||||
{"_cuda_getDevice", (PyCFunction)THCPModule_getDevice_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getDeviceCount", (PyCFunction)THCPModule_getDeviceCount_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getDeviceName", (PyCFunction)THCPModule_getDeviceName_wrap, METH_O, NULL},
|
||||
{"_cuda_getDeviceCapability", (PyCFunction)THCPModule_getDeviceCapability_wrap, METH_O, NULL},
|
||||
{"_cuda_getCurrentStream", (PyCFunction)THCPModule_getCurrentStream_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getCurrentBlasHandle", (PyCFunction)THCPModule_getCurrentBlasHandle_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_setStream", (PyCFunction)THCPModule_setStream_wrap, METH_O, NULL},
|
||||
{"_cuda_isDriverSufficient", (PyCFunction)THCPModule_isDriverSufficient, METH_NOARGS, NULL},
|
||||
{"_cuda_getDriverVersion", (PyCFunction)THCPModule_getDriverVersion, METH_NOARGS, NULL},
|
||||
{"_cuda_getCompiledVersion", (PyCFunction)THCPModule_getCompiledVersion, METH_NOARGS, NULL},
|
||||
{"_cuda_getRNGState", (PyCFunction)THCPModule_getRNGState, METH_NOARGS, NULL},
|
||||
{"_cuda_setRNGState", (PyCFunction)THCPModule_setRNGState, METH_O, NULL},
|
||||
{"_cuda_emptyCache", (PyCFunction) THCPModule_emptyCache, METH_NOARGS, NULL},
|
||||
{"_cuda_manualSeed", (PyCFunction)THCPModule_manualSeed, METH_O, NULL},
|
||||
{"_cuda_manualSeedAll", (PyCFunction)THCPModule_manualSeedAll, METH_O, NULL},
|
||||
{"_cuda_seed", (PyCFunction)THCPModule_seed, METH_NOARGS, NULL},
|
||||
|
@ -1,3 +1,5 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
|
@ -228,12 +228,12 @@ struct algorithm_search<cudnnConvolutionFwdAlgo_t> {
|
||||
conv.cdesc.desc,
|
||||
conv.odesc.desc,
|
||||
out,
|
||||
1,
|
||||
n_algo,
|
||||
&algoCount,
|
||||
perfResults,
|
||||
ws.data,
|
||||
ws.size));
|
||||
return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, n_algo);
|
||||
return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, algoCount);
|
||||
}
|
||||
|
||||
static void getAlgorithm(
|
||||
@ -302,12 +302,12 @@ struct algorithm_search<cudnnConvolutionBwdDataAlgo_t> {
|
||||
conv.cdesc.desc,
|
||||
conv.idesc.desc,
|
||||
in,
|
||||
1,
|
||||
n_algo,
|
||||
&algoCount,
|
||||
perfResults,
|
||||
ws.data,
|
||||
ws.size));
|
||||
return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, n_algo);
|
||||
return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, algoCount);
|
||||
}
|
||||
|
||||
static void getAlgorithm(cudnnHandle_t handle, const Convolution& conv, cudnnConvolutionBwdDataAlgo_t* algo) {
|
||||
@ -376,12 +376,12 @@ struct algorithm_search<cudnnConvolutionBwdFilterAlgo_t> {
|
||||
conv.cdesc.desc,
|
||||
conv.wdesc.desc,
|
||||
wght,
|
||||
1,
|
||||
n_algo,
|
||||
&algoCount,
|
||||
perfResults,
|
||||
ws.data,
|
||||
ws.size));
|
||||
return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, n_algo);
|
||||
return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, algoCount);
|
||||
}
|
||||
|
||||
static void getAlgorithm(
|
||||
|
@ -761,6 +761,12 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- accreal start
|
||||
- accreal end
|
||||
- CONSTANT 1
|
||||
- arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- CONSTANT 0
|
||||
- accreal end
|
||||
- CONSTANT 1
|
||||
]]
|
||||
|
||||
[[
|
||||
|
@ -78,7 +78,7 @@ using GraphsAttr = VectorAttributeValue<std::shared_ptr<Graph>,AttributeKind::gs
|
||||
|
||||
// CRTP so that Node which inherits Attributes can be return for
|
||||
// method chaining e.g:
|
||||
// Node * n = g->create(kSelect)->set_i(kOffset,3)->set_f(kValue,3.5);
|
||||
// Node * n = g->create(kSelect)->i_(kOffset,3)->f_(kValue,3.5);
|
||||
// we return Derived* pointers because Nodes are normally held as pointers.
|
||||
template<typename Derived>
|
||||
struct Attributes {
|
||||
|
@ -69,7 +69,8 @@ void encodeTensor(onnx::TensorProto * p, const at::Tensor & tensor) {
|
||||
break;
|
||||
}
|
||||
p->set_data_type(onnx_type);
|
||||
at::Tensor cont = tensor.toType(at::CPU(at_type)).contiguous();
|
||||
// CPU's HalfTensor doesn't have contiguous(), so first calling contiguous()
|
||||
at::Tensor cont = tensor.contiguous().toType(at::CPU(at_type));
|
||||
p->set_raw_data(cont);
|
||||
}
|
||||
|
||||
@ -79,40 +80,50 @@ void addAttribute(onnx::NodeProto * n_p, jit::Node * n, jit::Symbol name) {
|
||||
switch(n->kindOf(name)) {
|
||||
case AttributeKind::f:
|
||||
attr->set_f(n->f(name));
|
||||
attr->set_type(onnx::aFLOAT);
|
||||
break;
|
||||
case AttributeKind::fs:
|
||||
attr->set_type(onnx::aFLOATS);
|
||||
for(auto & v : n->fs(name))
|
||||
attr->add_floats(v);
|
||||
break;
|
||||
case AttributeKind::i:
|
||||
attr->set_type(onnx::aINT);
|
||||
attr->set_i(n->i(name));
|
||||
break;
|
||||
case AttributeKind::is:
|
||||
attr->set_type(onnx::aINTS);
|
||||
for(auto & v : n->is(name))
|
||||
attr->add_ints(v);
|
||||
break;
|
||||
case AttributeKind::s:
|
||||
attr->set_type(onnx::aSTRING);
|
||||
attr->set_s(n->s(name));
|
||||
break;
|
||||
case AttributeKind::ss:
|
||||
attr->set_type(onnx::aSTRINGS);
|
||||
for(auto & v : n->ss(name))
|
||||
attr->add_strings(v);
|
||||
break;
|
||||
case AttributeKind::t: {
|
||||
attr->set_type(onnx::aTENSOR);
|
||||
auto t = attr->mutable_t();
|
||||
encodeTensor(t, n->t(name));
|
||||
} break;
|
||||
case AttributeKind::ts:
|
||||
attr->set_type(onnx::aTENSORS);
|
||||
for(auto & v : n->ts(name)) {
|
||||
auto t = attr->add_tensors();
|
||||
encodeTensor(t, v);
|
||||
}
|
||||
break;
|
||||
case AttributeKind::g: {
|
||||
attr->set_type(onnx::aGRAPH);
|
||||
auto g = attr->mutable_g();
|
||||
encodeGraph(g, n->g(name), {});
|
||||
} break;
|
||||
case AttributeKind::gs:
|
||||
attr->set_type(onnx::aGRAPHS);
|
||||
for(auto & v : n->gs(name)) {
|
||||
auto g = attr->add_graphs();
|
||||
encodeGraph(g, v, {});
|
||||
@ -191,6 +202,9 @@ void encodeGraph(onnx::GraphProto * p_g, const std::shared_ptr<Graph> & g, const
|
||||
continue;
|
||||
}
|
||||
auto p_n = p_g->add_node();
|
||||
if (node->getSourceLocation()) {
|
||||
p_n->set_doc_string(node->getSourceLocation()->python_traceback);
|
||||
}
|
||||
for(auto input : node->inputs()) {
|
||||
p_n->add_input(node_name(input));
|
||||
}
|
||||
@ -256,11 +270,18 @@ void validateGraph(const std::shared_ptr<Graph>& graph) {
|
||||
}
|
||||
|
||||
std::string ExportGraph(const std::shared_ptr<Graph>& graph,
|
||||
const std::vector<at::Tensor> & initializers) {
|
||||
const std::vector<at::Tensor> & initializers,
|
||||
int64_t onnx_opset_version) {
|
||||
|
||||
validateGraph(graph);
|
||||
|
||||
onnx::ModelProto model_proto;
|
||||
model_proto.set_producer_name("pytorch");
|
||||
model_proto.set_producer_version("0.3");
|
||||
auto* imp = model_proto.add_opset_import();
|
||||
// This is the version of ONNX operator set we are targeting
|
||||
imp->set_version(onnx_opset_version);
|
||||
|
||||
// Set up nanopb callbacks and compute the amount of space needed to store
|
||||
// the resulting protobuf
|
||||
encodeModel(&model_proto, graph, initializers);
|
||||
|
@ -5,6 +5,7 @@
|
||||
namespace torch { namespace jit {
|
||||
|
||||
std::string ExportGraph(const std::shared_ptr<Graph>& graph,
|
||||
const std::vector<at::Tensor> & initializers);
|
||||
const std::vector<at::Tensor> & initializers,
|
||||
int64_t onnx_opset_version);
|
||||
|
||||
}}
|
||||
|
@ -261,6 +261,14 @@ CompiledFusionFunction::CompiledFusionFunction(const std::string & name, Annotat
|
||||
, output_desc(agraph.output_desc) {
|
||||
JIT_CUDA_CHECK(cudaGetDevice(&device));
|
||||
JIT_CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
|
||||
if ((prop.major >= 6 && CUDA_VERSION < 8000) ||
|
||||
(prop.major >= 7 && CUDA_VERSION < 9000)) {
|
||||
std::stringstream err_string;
|
||||
err_string << "PyTorch compiled with insufficient CUDA version: "
|
||||
<< CUDA_VERSION << " for the current GPU device " << prop.name
|
||||
<< " with device capability " << prop.major << "." << prop.minor;
|
||||
throw std::runtime_error(err_string.str());
|
||||
}
|
||||
|
||||
std::stringstream cu;
|
||||
concat_desc = codegen::emitCompilationUnit(cu, name, agraph);
|
||||
|
@ -43,9 +43,8 @@ _(split) \
|
||||
_(Offset) \
|
||||
_(value) \
|
||||
_(Subgraph) \
|
||||
_(SpatialBN) \
|
||||
_(BatchNormalization) \
|
||||
_(Conv) \
|
||||
_(Caffe2ConvTranspose) \
|
||||
_(ConvTranspose) \
|
||||
_(is_test) \
|
||||
_(epsilon) \
|
||||
@ -75,6 +74,8 @@ _(shape) \
|
||||
_(axes) \
|
||||
_(group) \
|
||||
_(inplace) \
|
||||
_(transA) \
|
||||
_(transB) \
|
||||
_(other)
|
||||
|
||||
enum BuiltinSymbol {
|
||||
|
@ -41,6 +41,7 @@ void printNodeRef(std::ostream & out, const Node * n) {
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream & out, const std::vector<T> & nodes) {
|
||||
out << at::ArrayRef<T>{nodes};
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -60,6 +60,15 @@ static inline bool operator==(const Use & a, const Use & b) {
|
||||
// Graph holds a list of parameters.
|
||||
struct Param;
|
||||
|
||||
// SourceLocation represents source code-level debug information for a node.
|
||||
// It contains a Python stack trace that represents the provenance of a given
|
||||
// node in the trace.
|
||||
struct SourceLocation {
|
||||
SourceLocation(std::string python_traceback)
|
||||
: python_traceback(std::move(python_traceback)) {}
|
||||
std::string python_traceback;
|
||||
};
|
||||
|
||||
// the list types are intentionally simple, but we type-def
|
||||
// them here so if we need to change them, refactoring will be easier
|
||||
using node_list = std::vector<Node*>;
|
||||
@ -113,6 +122,7 @@ private:
|
||||
size_t unique_ = 0; // unique id
|
||||
size_t stage_ = 0; // 0-forward, 1-backward, 2-double-backward,...
|
||||
std::string debug_name_;
|
||||
std::shared_ptr<SourceLocation> source_location_;
|
||||
protected:
|
||||
TypePtr type_;
|
||||
Node(Graph * graph_, NodeKind kind_); //defined after graph
|
||||
@ -150,6 +160,13 @@ public:
|
||||
const std::string & debugName() const {
|
||||
return debug_name_;
|
||||
}
|
||||
Node* setSourceLocation(std::shared_ptr<SourceLocation> sl) {
|
||||
source_location_ = sl;
|
||||
return this;
|
||||
}
|
||||
std::shared_ptr<SourceLocation> getSourceLocation() const {
|
||||
return source_location_;
|
||||
}
|
||||
Graph * owningGraph() {
|
||||
return graph_;
|
||||
}
|
||||
@ -514,6 +531,7 @@ protected:
|
||||
virtual void cloneFrom(Node * s) {
|
||||
if (s->hasType()) setType(s->type());
|
||||
setDebugName(s->debugName());
|
||||
setSourceLocation(s->getSourceLocation());
|
||||
copyAttributes(*s);
|
||||
}
|
||||
};
|
||||
|
@ -86,6 +86,9 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
|
||||
if (!outputs[i]->hasType()) {
|
||||
outputs[i]->setType(old->typeOption());
|
||||
}
|
||||
// Copy over source location information to all nodes created by
|
||||
// the symbolic
|
||||
outputs[i]->setSourceLocation(node->getSourceLocation());
|
||||
env[old] = outputs[i];
|
||||
} else {
|
||||
// Null output means that the ONNX op doesn't have outputs corresponding
|
||||
@ -121,6 +124,31 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
|
||||
}
|
||||
};
|
||||
|
||||
// Cast output of symbolic() python implementation
|
||||
auto processSymbolicOutput = [&](const std::string& op_name, Node* n, const py::object& raw_output) {
|
||||
if (raw_output.ptr() == Py_None) {
|
||||
cloneNode(n);
|
||||
return;
|
||||
}
|
||||
// Cast the outputs back to C++ and put them in the new graph
|
||||
std::vector<Node*> outputs;
|
||||
try {
|
||||
if (py::isinstance<Node>(raw_output)) {
|
||||
outputs = node_list{py::cast<Node*>(raw_output)};
|
||||
} else {
|
||||
outputs = py::cast<std::vector<Node*>>(raw_output);
|
||||
}
|
||||
} catch (const std::exception& ex) {
|
||||
std::ostringstream ss;
|
||||
ss << "Error casting results of symbolic for " << op_name
|
||||
<< ": expected to return list of op nodes, instead received type ''"
|
||||
<< py::str(raw_output.get_type()) << "': " << py::str(raw_output);
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
setOutputs(op_name, n, outputs);
|
||||
};
|
||||
|
||||
auto callPySymbolicFunction = [&](Node* n) {
|
||||
// The idea is delegate as much of the actual argument massaging to
|
||||
// Python as possible
|
||||
@ -133,19 +161,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
|
||||
|
||||
py::object raw_output = onnx.attr("_run_symbolic_function")(ctx.graph, n, py_inputs);
|
||||
|
||||
if (raw_output.ptr() == Py_None) {
|
||||
cloneNode(n);
|
||||
} else {
|
||||
// Cast the outputs back to C++ and put them in the new graph
|
||||
node_list outputs;
|
||||
if (py::isinstance<Node>(raw_output)) {
|
||||
outputs = node_list{py::cast<Node*>(raw_output)};
|
||||
} else {
|
||||
outputs = py::cast<std::vector<Node*>>(raw_output);
|
||||
}
|
||||
|
||||
setOutputs(symbolToString(n->kind()), n, outputs);
|
||||
}
|
||||
processSymbolicOutput(symbolToString(n->kind()), n, raw_output);
|
||||
};
|
||||
|
||||
auto callPySymbolicMethod = [&](PythonOp* op) {
|
||||
@ -184,20 +200,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
|
||||
// upon argument mismatch
|
||||
py::object raw_output = onnx.attr("_run_symbolic_method")(op->name(), pyobj.attr("symbolic"), py_symbolic_args);
|
||||
|
||||
if (raw_output.ptr() == Py_None) {
|
||||
cloneNode(op);
|
||||
return;
|
||||
}
|
||||
|
||||
// Cast the outputs back to C++ and put them in the new graph
|
||||
std::vector<Node*> outputs;
|
||||
if (py::isinstance<Node>(raw_output)) {
|
||||
outputs = node_list{py::cast<Node*>(raw_output)};
|
||||
} else {
|
||||
outputs = py::cast<std::vector<Node*>>(raw_output);
|
||||
}
|
||||
|
||||
setOutputs(op->name(), op, outputs);
|
||||
processSymbolicOutput(op->name(), op, raw_output);
|
||||
};
|
||||
|
||||
// Finally, visit all nodes in the graph
|
||||
|
@ -15,24 +15,62 @@ std::unordered_set<NodeKind> broadcasting = {
|
||||
kGemm,
|
||||
};
|
||||
|
||||
bool isNopTranspose(const std::vector<int64_t> & perm) {
|
||||
for (size_t i = 0; i < perm.size(); i++)
|
||||
if (perm[i] != i)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns a vector `ret` such that transposing by `ret` is equivalent
|
||||
// to transposing by `t1` and then by `t2`
|
||||
std::vector<int64_t> composeTransposes(const std::vector<int64_t> & t1,
|
||||
const std::vector<int64_t> & t2) {
|
||||
JIT_ASSERT(t1.size() == t2.size());
|
||||
std::vector<int64_t> ret;
|
||||
for (size_t i = 0; i < t1.size(); i++) {
|
||||
JIT_ASSERT( t1[i] < t2.size());
|
||||
JIT_ASSERT(t2[t1[i]] < t2.size());
|
||||
ret.push_back(t2[t1[i]]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool isBroadcasting(Node *node) {
|
||||
return broadcasting.count(node->kind());
|
||||
}
|
||||
|
||||
// When iterating over the dimension sizes, starting at the trailing dimension,
|
||||
// the dimension sizes must either be equal, or one of them does not exist.
|
||||
// First iterate over the 'from' tensor sizes. Ignore all leading and trailing
|
||||
// dimensions that are simply one, since they can be trivially broadcasted.
|
||||
// When iterating over the dimension sizes (with reduced 'from' tensor),
|
||||
// starting at the trailing dimension, the dimension sizes must either be equal,
|
||||
// or one of them does not exist.
|
||||
//
|
||||
// equivalently:
|
||||
//
|
||||
// Test that 'from' is a suffix of 'to'.
|
||||
// Note that this is NOT equivalent to numpy broadcasting semantics, and do
|
||||
// not represent that generalized broadcasting that Pytorch implements in
|
||||
// general. Rather, this is Caffe2-style broadcasting.
|
||||
bool fusibleExpandTo(at::IntList from, at::IntList to) {
|
||||
auto f = from.rbegin();
|
||||
auto t = to.rbegin();
|
||||
for (; f != from.rend() && t != to.rend(); f++, t++) {
|
||||
// TODO: if 1->n expansion is supported, adjust this conditional.
|
||||
if (*f != *t) return false;
|
||||
if (from.size() > to.size()) {
|
||||
return false;
|
||||
}
|
||||
return f == from.rend();
|
||||
ssize_t from_dim_start = 0, from_dim_end = from.size() - 1;
|
||||
while (from_dim_start < from.size() && from[from_dim_start] == 1) {
|
||||
from_dim_start++;
|
||||
}
|
||||
while (from_dim_end > from_dim_start && from[from_dim_end] == 1) {
|
||||
from_dim_end--;
|
||||
}
|
||||
|
||||
ssize_t f = from_dim_end;
|
||||
ssize_t t = to.size() - 1;
|
||||
for (; f >= from_dim_start && t >= 0; --f, --t) {
|
||||
if (from[f] != to[t]) return false;
|
||||
}
|
||||
|
||||
// In the case that the 'to' tensor has leading ones in the same place that
|
||||
// the 'from' tensor does, f will be less than from_dim_start rather than
|
||||
// strictly equal. E.x.: to := [5, 1, 768] and from := [1, 1, 768]
|
||||
return f <= from_dim_start;
|
||||
}
|
||||
|
||||
void fuseBroadcast(std::shared_ptr<Graph>& graph) {
|
||||
@ -76,6 +114,58 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) {
|
||||
}
|
||||
}
|
||||
|
||||
void fuseConsecutiveTransposes(std::shared_ptr<Graph>& graph) {
|
||||
for (auto it = graph->begin(); it != graph->end(); ++it) {
|
||||
auto* n = *it;
|
||||
|
||||
if (n->kind() == kTranspose && n->input()->kind() == kTranspose) {
|
||||
auto origInput = n->input();
|
||||
n->is_(kperm, composeTransposes(origInput->is(kperm), n->is(kperm)));
|
||||
n->replaceInput(0, origInput->input());
|
||||
if (origInput->uses().size() == 0) {
|
||||
origInput->destroy();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eliminateNopTranspose(std::shared_ptr<Graph>& graph) {
|
||||
for (auto it = graph->begin(); it != graph->end(); ++it) {
|
||||
auto* n = *it;
|
||||
|
||||
if (n->kind() == kTranspose) {
|
||||
if (isNopTranspose(n->is(kperm))) {
|
||||
n->replaceAllUsesWith(n->input());
|
||||
it.destroyCurrent();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fuseTransposeIntoGemm(std::shared_ptr<Graph>& graph) {
|
||||
static const std::vector<int64_t> simpleTransPerm({1,0});
|
||||
|
||||
for (auto it = graph->begin(); it != graph->end(); ++it) {
|
||||
auto* n = *it;
|
||||
|
||||
if (n->kind() == kGemm) {
|
||||
for (size_t i : {0,1}) {
|
||||
auto inp = n->inputs()[i];
|
||||
auto trans = i == 0 ? ktransA : ktransB;
|
||||
if (inp->kind() == kTranspose && inp->is(kperm) == simpleTransPerm) {
|
||||
n->replaceInput(i, inp->input());
|
||||
n->i_(trans, n->hasAttribute(trans) ? !n->i(trans) : 1);
|
||||
if (inp->uses().size() == 0) {
|
||||
inp->destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This optimization does ONNX-specific peephole optimizations.
|
||||
//
|
||||
// At the moment, here are the optimizations it does:
|
||||
@ -83,6 +173,9 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) {
|
||||
// easier for non-strided backends to more efficiently do broadcasts if this is
|
||||
// local information. This optimization is not useful for PyTorch as 'expand'
|
||||
// is free.
|
||||
// - Fusing of consecutive transposes
|
||||
// - Elimiation of NOP transposes
|
||||
// - Fusing of transposes into Gemm
|
||||
//
|
||||
// Before you write an optimization here, ask yourself, "Could I do this
|
||||
// optimization on ATen operators"? If so, you should seriously consider
|
||||
@ -94,6 +187,9 @@ void PeepholeOptimizeONNX(std::shared_ptr<Graph>& graph) {
|
||||
// TODO: make it easier not to do O(k) iterations over the graph, where
|
||||
// k is the number of distinct peephole optimizations
|
||||
fuseBroadcast(graph);
|
||||
fuseConsecutiveTransposes(graph);
|
||||
eliminateNopTranspose(graph);
|
||||
fuseTransposeIntoGemm(graph);
|
||||
}
|
||||
|
||||
}}
|
||||
|
@ -13,6 +13,7 @@ void PeepholeOptimize(std::shared_ptr<Graph>& graph) {
|
||||
for (auto it = graph->begin(); it != graph->end(); ++it) {
|
||||
auto* n = *it;
|
||||
|
||||
// eliminate redundant expand
|
||||
if (n->kind() == kexpand) {
|
||||
if (n->is(ksize) == n->input()->type()->expect<TensorType>()->sizes()) {
|
||||
n->replaceAllUsesWith(n->input());
|
||||
|
@ -32,13 +32,9 @@ void initPythonTracerBindings(PyObject* module_) {
|
||||
ss << *s.graph;
|
||||
return ss.str();
|
||||
})
|
||||
.def("export", [](TracingState& s) {
|
||||
.def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers, int64_t onnx_opset_version) {
|
||||
ASSERT_UNEXPIRED("export");
|
||||
return py::bytes(ExportGraph(s.graph, {}));
|
||||
})
|
||||
.def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers) {
|
||||
ASSERT_UNEXPIRED("export");
|
||||
return py::bytes(ExportGraph(s.graph, initializers));
|
||||
return py::bytes(ExportGraph(s.graph, initializers, onnx_opset_version));
|
||||
})
|
||||
.def("graph", [](TracingState& s) {
|
||||
return s.graph;
|
||||
|
@ -4,6 +4,11 @@
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/python_engine.h"
|
||||
#include "torch/csrc/autograd/functions/special.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
#include "torch/csrc/utils/python_strings.h"
|
||||
|
||||
#include <frameobject.h>
|
||||
#include <patchlevel.h>
|
||||
|
||||
namespace torch { namespace jit { namespace tracer {
|
||||
|
||||
@ -89,6 +94,28 @@ void nontraceableBackwardSubgraph(const variable_list& inputs, const variable_li
|
||||
std::make_shared<autograd::Eval>()->replaceSubgraph(inputs, outputs);
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Python interpreter retrieval routine adapted from
|
||||
// https://stackoverflow.com/a/8706144
|
||||
std::string getPythonInterpreterStackTrace() {
|
||||
std::stringstream stack_trace;
|
||||
AutoGIL gil;
|
||||
PyThreadState *tstate = PyThreadState_GET();
|
||||
if (NULL != tstate && NULL != tstate->frame) {
|
||||
PyFrameObject *frame = tstate->frame;
|
||||
|
||||
while (NULL != frame) {
|
||||
int line = PyCode_Addr2Line(frame->f_code, frame->f_lasti);
|
||||
std::string filename = THPUtils_unpackString(frame->f_code->co_filename);
|
||||
std::string funcname = THPUtils_unpackString(frame->f_code->co_name);
|
||||
stack_trace << filename << "(" << line << "): " << funcname << "\n";
|
||||
frame = frame->f_back;
|
||||
}
|
||||
}
|
||||
return stack_trace.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Node* recordTrace(std::string op, // TODO: make this a Symbol
|
||||
at::ArrayRef<Variable> inputs,
|
||||
at::ArrayRef<Variable> outputs) {
|
||||
@ -99,6 +126,9 @@ Node* recordTrace(std::string op, // TODO: make this a Symbol
|
||||
auto state_lock = state->lock();
|
||||
|
||||
Node *n = graph->create(stringToSymbol(op));
|
||||
auto sl = std::make_shared<SourceLocation>(getPythonInterpreterStackTrace());
|
||||
n->setSourceLocation(sl);
|
||||
|
||||
for (Variable input : inputs) {
|
||||
n->addInput(getValueTrace(state, input));
|
||||
}
|
||||
|
@ -168,6 +168,21 @@ DEFINE_CONST(UINT64)
|
||||
DEFINE_CONST(COMPLEX64)
|
||||
DEFINE_CONST(COMPLEX128)
|
||||
#undef DEFINE_CONST
|
||||
|
||||
#define DEFINE_CONST(C) \
|
||||
const auto a##C = onnx_AttributeProto_AttributeType_##C;
|
||||
DEFINE_CONST(FLOAT)
|
||||
DEFINE_CONST(INT)
|
||||
DEFINE_CONST(STRING)
|
||||
DEFINE_CONST(TENSOR)
|
||||
DEFINE_CONST(GRAPH)
|
||||
DEFINE_CONST(FLOATS)
|
||||
DEFINE_CONST(INTS)
|
||||
DEFINE_CONST(STRINGS)
|
||||
DEFINE_CONST(TENSORS)
|
||||
DEFINE_CONST(GRAPHS)
|
||||
#undef DEFINE_CONST
|
||||
|
||||
// C++ wrappers which simulate the Google C++ Protobuf API
|
||||
//
|
||||
// These are NOT COMPLETE wrappers. If you find something is missing, add it!
|
||||
@ -270,6 +285,7 @@ public:
|
||||
proto.graphs = list<GraphProto, onnx_GraphProto_fields>(&graphs);
|
||||
}
|
||||
void set_name(const std::string& s) { proto.name = string(&name, s); }
|
||||
void set_type(onnx_AttributeProto_AttributeType t) { proto.has_type = true; proto.type = t; }
|
||||
void set_f(float f) { proto.has_f = true; proto.f = f; }
|
||||
void set_i(int64_t i) { proto.has_i = true; proto.i = i; }
|
||||
void set_s(std::string s_) { proto.s = string(&s, s_); }
|
||||
@ -290,6 +306,7 @@ public:
|
||||
class NodeProto : public MicroProto<onnx_NodeProto> {
|
||||
private:
|
||||
std::string op_type;
|
||||
std::string doc_string;
|
||||
unique_vector<std::string> inputs;
|
||||
unique_vector<std::string> outputs;
|
||||
unique_vector<AttributeProto> attributes;
|
||||
@ -309,6 +326,7 @@ public:
|
||||
return ptr;
|
||||
}
|
||||
void set_op_type(const std::string& s) { proto.op_type= string(&op_type, s); }
|
||||
void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); }
|
||||
};
|
||||
|
||||
class GraphProto : public MicroProto<onnx_GraphProto> {
|
||||
@ -349,6 +367,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class OperatorSetIdProto : public MicroProto<onnx_OperatorSetIdProto> {
|
||||
private:
|
||||
std::string domain;
|
||||
public:
|
||||
OperatorSetIdProto() : MicroProto(onnx_OperatorSetIdProto_init_default) {}
|
||||
void set_domain(const std::string& s) { proto.domain = string(&domain, s); }
|
||||
void set_version(int64_t v) { proto.has_version = true; proto.version = v; }
|
||||
};
|
||||
|
||||
class ModelProto : public MicroProto<onnx_ModelProto> {
|
||||
private:
|
||||
std::string producer_name;
|
||||
@ -356,21 +383,26 @@ private:
|
||||
std::string domain;
|
||||
std::string doc_string;
|
||||
std::unique_ptr<GraphProto> graph;
|
||||
unique_vector<OperatorSetIdProto> opset_import;
|
||||
public:
|
||||
ModelProto() : MicroProto(onnx_ModelProto_init_default) {
|
||||
proto.has_ir_version = true;
|
||||
proto.ir_version = onnx_Version_IR_VERSION;
|
||||
proto.producer_name = string(&producer_name, "pytorch");
|
||||
// TODO: stop hard-coding this
|
||||
proto.producer_version = string(&producer_version, "0.2");
|
||||
proto.domain = string(&domain, "com.facebook");
|
||||
proto.opset_import = list<OperatorSetIdProto, onnx_OperatorSetIdProto_fields>(&opset_import);
|
||||
}
|
||||
void set_model_version(int64_t i) { proto.has_model_version = true; proto.model_version = i; }
|
||||
void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); }
|
||||
void set_producer_name(const std::string& s) { proto.producer_name = string(&producer_name, s); }
|
||||
void set_producer_version(const std::string& s) { proto.producer_version = string(&producer_version, s); }
|
||||
GraphProto* mutable_graph() {
|
||||
proto.graph = msg<GraphProto, onnx_GraphProto_fields>(&graph);
|
||||
return graph.get();
|
||||
}
|
||||
OperatorSetIdProto* add_opset_import() {
|
||||
auto ptr = new OperatorSetIdProto();
|
||||
opset_import.emplace_back(ptr);
|
||||
return ptr;
|
||||
}
|
||||
};
|
||||
|
||||
}} // namespace torch::onnx
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
|
||||
|
||||
const pb_field_t onnx_AttributeProto_fields[12] = {
|
||||
const pb_field_t onnx_AttributeProto_fields[13] = {
|
||||
PB_FIELD( 1, STRING , OPTIONAL, CALLBACK, FIRST, onnx_AttributeProto, name, name, 0),
|
||||
PB_FIELD( 2, FLOAT , OPTIONAL, STATIC , OTHER, onnx_AttributeProto, f, name, 0),
|
||||
PB_FIELD( 3, INT64 , OPTIONAL, STATIC , OTHER, onnx_AttributeProto, i, f, 0),
|
||||
@ -22,6 +22,7 @@ const pb_field_t onnx_AttributeProto_fields[12] = {
|
||||
PB_FIELD( 9, BYTES , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, strings, ints, 0),
|
||||
PB_FIELD( 10, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, tensors, strings, &onnx_TensorProto_fields),
|
||||
PB_FIELD( 11, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, graphs, tensors, &onnx_GraphProto_fields),
|
||||
PB_FIELD( 20, UENUM , OPTIONAL, STATIC , OTHER, onnx_AttributeProto, type, graphs, 0),
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
@ -31,17 +32,18 @@ const pb_field_t onnx_ValueInfoProto_fields[3] = {
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
const pb_field_t onnx_NodeProto_fields[7] = {
|
||||
const pb_field_t onnx_NodeProto_fields[8] = {
|
||||
PB_FIELD( 1, STRING , REPEATED, CALLBACK, FIRST, onnx_NodeProto, input, input, 0),
|
||||
PB_FIELD( 2, STRING , REPEATED, CALLBACK, OTHER, onnx_NodeProto, output, input, 0),
|
||||
PB_FIELD( 3, STRING , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, name, output, 0),
|
||||
PB_FIELD( 4, STRING , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, op_type, name, 0),
|
||||
PB_FIELD( 5, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_NodeProto, attribute, op_type, &onnx_AttributeProto_fields),
|
||||
PB_FIELD( 6, STRING , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, doc_string, attribute, 0),
|
||||
PB_FIELD( 7, STRING , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, domain, doc_string, 0),
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
const pb_field_t onnx_ModelProto_fields[8] = {
|
||||
const pb_field_t onnx_ModelProto_fields[9] = {
|
||||
PB_FIELD( 1, INT64 , OPTIONAL, STATIC , FIRST, onnx_ModelProto, ir_version, ir_version, 0),
|
||||
PB_FIELD( 2, STRING , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_name, ir_version, 0),
|
||||
PB_FIELD( 3, STRING , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_version, producer_name, 0),
|
||||
@ -49,6 +51,7 @@ const pb_field_t onnx_ModelProto_fields[8] = {
|
||||
PB_FIELD( 5, INT64 , OPTIONAL, STATIC , OTHER, onnx_ModelProto, model_version, domain, 0),
|
||||
PB_FIELD( 6, STRING , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, doc_string, model_version, 0),
|
||||
PB_FIELD( 7, MESSAGE , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, graph, doc_string, &onnx_GraphProto_fields),
|
||||
PB_FIELD( 8, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_ModelProto, opset_import, graph, &onnx_OperatorSetIdProto_fields),
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
@ -120,6 +123,13 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = {
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
const pb_field_t onnx_OperatorSetIdProto_fields[3] = {
|
||||
PB_FIELD( 1, STRING , OPTIONAL, CALLBACK, FIRST, onnx_OperatorSetIdProto, domain, domain, 0),
|
||||
PB_FIELD( 2, INT64 , OPTIONAL, STATIC , OTHER, onnx_OperatorSetIdProto, version, domain, 0),
|
||||
PB_LAST_FIELD
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -132,7 +142,7 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = {
|
||||
* numbers or field sizes that are larger than what can fit in 8 or 16 bit
|
||||
* field descriptors.
|
||||
*/
|
||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto)
|
||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto)
|
||||
#endif
|
||||
|
||||
#if !defined(PB_FIELD_16BIT) && !defined(PB_FIELD_32BIT)
|
||||
@ -143,7 +153,7 @@ PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_members
|
||||
* numbers or field sizes that are larger than what can fit in the default
|
||||
* 8 bit descriptors.
|
||||
*/
|
||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto)
|
||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto)
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -16,12 +16,31 @@ extern "C" {
|
||||
|
||||
/* Enum definitions */
|
||||
typedef enum _onnx_Version {
|
||||
onnx_Version_IR_VERSION = 1
|
||||
onnx_Version__START_VERSION = 0,
|
||||
onnx_Version_IR_VERSION_2017_10_10 = 1,
|
||||
onnx_Version_IR_VERSION = 2
|
||||
} onnx_Version;
|
||||
#define _onnx_Version_MIN onnx_Version_IR_VERSION
|
||||
#define _onnx_Version_MIN onnx_Version__START_VERSION
|
||||
#define _onnx_Version_MAX onnx_Version_IR_VERSION
|
||||
#define _onnx_Version_ARRAYSIZE ((onnx_Version)(onnx_Version_IR_VERSION+1))
|
||||
|
||||
typedef enum _onnx_AttributeProto_AttributeType {
|
||||
onnx_AttributeProto_AttributeType_UNDEFINED = 0,
|
||||
onnx_AttributeProto_AttributeType_FLOAT = 1,
|
||||
onnx_AttributeProto_AttributeType_INT = 2,
|
||||
onnx_AttributeProto_AttributeType_STRING = 3,
|
||||
onnx_AttributeProto_AttributeType_TENSOR = 4,
|
||||
onnx_AttributeProto_AttributeType_GRAPH = 5,
|
||||
onnx_AttributeProto_AttributeType_FLOATS = 6,
|
||||
onnx_AttributeProto_AttributeType_INTS = 7,
|
||||
onnx_AttributeProto_AttributeType_STRINGS = 8,
|
||||
onnx_AttributeProto_AttributeType_TENSORS = 9,
|
||||
onnx_AttributeProto_AttributeType_GRAPHS = 10
|
||||
} onnx_AttributeProto_AttributeType;
|
||||
#define _onnx_AttributeProto_AttributeType_MIN onnx_AttributeProto_AttributeType_UNDEFINED
|
||||
#define _onnx_AttributeProto_AttributeType_MAX onnx_AttributeProto_AttributeType_GRAPHS
|
||||
#define _onnx_AttributeProto_AttributeType_ARRAYSIZE ((onnx_AttributeProto_AttributeType)(onnx_AttributeProto_AttributeType_GRAPHS+1))
|
||||
|
||||
typedef enum _onnx_TensorProto_DataType {
|
||||
onnx_TensorProto_DataType_UNDEFINED = 0,
|
||||
onnx_TensorProto_DataType_FLOAT = 1,
|
||||
@ -63,6 +82,7 @@ typedef struct _onnx_NodeProto {
|
||||
pb_callback_t op_type;
|
||||
pb_callback_t attribute;
|
||||
pb_callback_t doc_string;
|
||||
pb_callback_t domain;
|
||||
/* @@protoc_insertion_point(struct:onnx_NodeProto) */
|
||||
} onnx_NodeProto;
|
||||
|
||||
@ -91,6 +111,8 @@ typedef struct _onnx_AttributeProto {
|
||||
pb_callback_t strings;
|
||||
pb_callback_t tensors;
|
||||
pb_callback_t graphs;
|
||||
bool has_type;
|
||||
onnx_AttributeProto_AttributeType type;
|
||||
/* @@protoc_insertion_point(struct:onnx_AttributeProto) */
|
||||
} onnx_AttributeProto;
|
||||
|
||||
@ -104,9 +126,17 @@ typedef struct _onnx_ModelProto {
|
||||
int64_t model_version;
|
||||
pb_callback_t doc_string;
|
||||
pb_callback_t graph;
|
||||
pb_callback_t opset_import;
|
||||
/* @@protoc_insertion_point(struct:onnx_ModelProto) */
|
||||
} onnx_ModelProto;
|
||||
|
||||
typedef struct _onnx_OperatorSetIdProto {
|
||||
pb_callback_t domain;
|
||||
bool has_version;
|
||||
int64_t version;
|
||||
/* @@protoc_insertion_point(struct:onnx_OperatorSetIdProto) */
|
||||
} onnx_OperatorSetIdProto;
|
||||
|
||||
typedef struct _onnx_TensorProto_Segment {
|
||||
bool has_begin;
|
||||
int64_t begin;
|
||||
@ -173,10 +203,10 @@ typedef struct _onnx_SparseTensorProto {
|
||||
/* Default values for struct fields */
|
||||
|
||||
/* Initializer values for message structs */
|
||||
#define onnx_AttributeProto_init_default {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_AttributeProto_init_default {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0}
|
||||
#define onnx_ValueInfoProto_init_default {{{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_NodeProto_init_default {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_ModelProto_init_default {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_NodeProto_init_default {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_ModelProto_init_default {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_GraphProto_init_default {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_TensorProto_init_default {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_default, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_TensorProto_Segment_init_default {false, 0, false, 0}
|
||||
@ -186,10 +216,11 @@ typedef struct _onnx_SparseTensorProto {
|
||||
#define onnx_TypeProto_TensorShapeProto_Dimension_init_default {false, 0, {{NULL}, NULL}}
|
||||
#define onnx_TypeProto_TensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}}
|
||||
#define onnx_TypeProto_SparseTensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_default}
|
||||
#define onnx_AttributeProto_init_zero {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_OperatorSetIdProto_init_default {{{NULL}, NULL}, false, 0}
|
||||
#define onnx_AttributeProto_init_zero {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0}
|
||||
#define onnx_ValueInfoProto_init_zero {{{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_NodeProto_init_zero {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_ModelProto_init_zero {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_NodeProto_init_zero {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_ModelProto_init_zero {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_GraphProto_init_zero {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_TensorProto_init_zero {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_zero, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
|
||||
#define onnx_TensorProto_Segment_init_zero {false, 0, false, 0}
|
||||
@ -199,6 +230,7 @@ typedef struct _onnx_SparseTensorProto {
|
||||
#define onnx_TypeProto_TensorShapeProto_Dimension_init_zero {false, 0, {{NULL}, NULL}}
|
||||
#define onnx_TypeProto_TensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}}
|
||||
#define onnx_TypeProto_SparseTensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_zero}
|
||||
#define onnx_OperatorSetIdProto_init_zero {{{NULL}, NULL}, false, 0}
|
||||
|
||||
/* Field tags (for use in manual encoding/decoding) */
|
||||
#define onnx_GraphProto_node_tag 1
|
||||
@ -212,12 +244,14 @@ typedef struct _onnx_SparseTensorProto {
|
||||
#define onnx_NodeProto_output_tag 2
|
||||
#define onnx_NodeProto_name_tag 3
|
||||
#define onnx_NodeProto_op_type_tag 4
|
||||
#define onnx_NodeProto_domain_tag 7
|
||||
#define onnx_NodeProto_attribute_tag 5
|
||||
#define onnx_NodeProto_doc_string_tag 6
|
||||
#define onnx_TypeProto_TensorShapeProto_dim_tag 1
|
||||
#define onnx_ValueInfoProto_name_tag 1
|
||||
#define onnx_ValueInfoProto_type_tag 2
|
||||
#define onnx_AttributeProto_name_tag 1
|
||||
#define onnx_AttributeProto_type_tag 20
|
||||
#define onnx_AttributeProto_f_tag 2
|
||||
#define onnx_AttributeProto_i_tag 3
|
||||
#define onnx_AttributeProto_s_tag 4
|
||||
@ -229,12 +263,15 @@ typedef struct _onnx_SparseTensorProto {
|
||||
#define onnx_AttributeProto_tensors_tag 10
|
||||
#define onnx_AttributeProto_graphs_tag 11
|
||||
#define onnx_ModelProto_ir_version_tag 1
|
||||
#define onnx_ModelProto_opset_import_tag 8
|
||||
#define onnx_ModelProto_producer_name_tag 2
|
||||
#define onnx_ModelProto_producer_version_tag 3
|
||||
#define onnx_ModelProto_domain_tag 4
|
||||
#define onnx_ModelProto_model_version_tag 5
|
||||
#define onnx_ModelProto_doc_string_tag 6
|
||||
#define onnx_ModelProto_graph_tag 7
|
||||
#define onnx_OperatorSetIdProto_domain_tag 1
|
||||
#define onnx_OperatorSetIdProto_version_tag 2
|
||||
#define onnx_TensorProto_Segment_begin_tag 1
|
||||
#define onnx_TensorProto_Segment_end_tag 2
|
||||
#define onnx_TypeProto_SparseTensorTypeProto_elem_type_tag 1
|
||||
@ -261,10 +298,10 @@ typedef struct _onnx_SparseTensorProto {
|
||||
#define onnx_SparseTensorProto_values_tag 3
|
||||
|
||||
/* Struct field encoding specification for nanopb */
|
||||
extern const pb_field_t onnx_AttributeProto_fields[12];
|
||||
extern const pb_field_t onnx_AttributeProto_fields[13];
|
||||
extern const pb_field_t onnx_ValueInfoProto_fields[3];
|
||||
extern const pb_field_t onnx_NodeProto_fields[7];
|
||||
extern const pb_field_t onnx_ModelProto_fields[8];
|
||||
extern const pb_field_t onnx_NodeProto_fields[8];
|
||||
extern const pb_field_t onnx_ModelProto_fields[9];
|
||||
extern const pb_field_t onnx_GraphProto_fields[8];
|
||||
extern const pb_field_t onnx_TensorProto_fields[12];
|
||||
extern const pb_field_t onnx_TensorProto_Segment_fields[3];
|
||||
@ -274,6 +311,7 @@ extern const pb_field_t onnx_TypeProto_TensorShapeProto_fields[2];
|
||||
extern const pb_field_t onnx_TypeProto_TensorShapeProto_Dimension_fields[3];
|
||||
extern const pb_field_t onnx_TypeProto_TensorTypeProto_fields[3];
|
||||
extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3];
|
||||
extern const pb_field_t onnx_OperatorSetIdProto_fields[3];
|
||||
|
||||
/* Maximum encoded size of messages (where known) */
|
||||
/* onnx_AttributeProto_size depends on runtime parameters */
|
||||
@ -289,6 +327,7 @@ extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3];
|
||||
/* onnx_TypeProto_TensorShapeProto_Dimension_size depends on runtime parameters */
|
||||
/* onnx_TypeProto_TensorTypeProto_size depends on runtime parameters */
|
||||
#define onnx_TypeProto_SparseTensorTypeProto_size (8 + onnx_TypeProto_TensorShapeProto_size)
|
||||
/* onnx_OperatorSetIdProto_size depends on runtime parameters */
|
||||
|
||||
/* Message IDs (where set with "msgid" option) */
|
||||
#ifdef PB_MSGID
|
||||
|
@ -14,6 +14,7 @@ import ctypes
|
||||
import os
|
||||
import torch
|
||||
import traceback
|
||||
import warnings
|
||||
from torch._six import raise_from
|
||||
from multiprocessing.util import register_after_fork as _register_after_fork
|
||||
|
||||
@ -65,11 +66,29 @@ http://www.nvidia.com/Download/index.aspx""")
|
||||
The NVIDIA driver on your system is too old (found version {}).
|
||||
Please update your GPU driver by downloading and installing a new
|
||||
version from the URL: http://www.nvidia.com/Download/index.aspx
|
||||
Alternatively, go to: https://pytorch.org/binaries to install
|
||||
Alternatively, go to: http://pytorch.org to install
|
||||
a PyTorch version that has been compiled with your version
|
||||
of the CUDA driver.""".format(str(torch._C._cuda_getDriverVersion())))
|
||||
|
||||
|
||||
def _check_capability():
|
||||
error_str = """
|
||||
Found GPU%d %s which requires CUDA_VERSION >= %d for
|
||||
optimal performance and fast startup time, but your PyTorch was compiled
|
||||
with CUDA_VERSION %d. Please install the correct PyTorch binary
|
||||
using instructions from http://pytorch.org
|
||||
"""
|
||||
|
||||
CUDA_VERSION = torch._C._cuda_getCompiledVersion()
|
||||
for d in range(device_count()):
|
||||
major = get_device_capability(d)[0]
|
||||
name = get_device_name(d)
|
||||
if CUDA_VERSION < 8000 and major >= 6:
|
||||
warnings.warn(error_str % (d, name, 8000, CUDA_VERSION))
|
||||
elif CUDA_VERSION < 9000 and major >= 7:
|
||||
warnings.warn(error_str % (d, name, 8000, CUDA_VERSION))
|
||||
|
||||
|
||||
def _lazy_call(callable):
|
||||
if _initialized:
|
||||
callable()
|
||||
@ -77,6 +96,8 @@ def _lazy_call(callable):
|
||||
# Don't store the actual traceback to avoid memory cycle
|
||||
_queued_calls.append((callable, traceback.format_stack()))
|
||||
|
||||
_lazy_call(_check_capability)
|
||||
|
||||
|
||||
class DeferredCudaCallError(Exception):
|
||||
pass
|
||||
@ -213,6 +234,19 @@ def get_device_name(device):
|
||||
return torch._C._cuda_getDeviceName(device)
|
||||
|
||||
|
||||
def get_device_capability(device):
|
||||
"""Gets the cuda capability of a device.
|
||||
|
||||
Arguments:
|
||||
device (int): device for which to return the name. This function is a
|
||||
no-op if this argument is negative.
|
||||
Returns:
|
||||
tuple(int, int): the major and minor cuda capability of the device
|
||||
"""
|
||||
if device >= 0:
|
||||
return torch._C._cuda_getDeviceCapability(device)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stream(stream):
|
||||
"""Context-manager that selects a given stream.
|
||||
@ -267,6 +301,13 @@ def current_blas_handle():
|
||||
return torch._C._cuda_getCurrentBlasHandle()
|
||||
|
||||
|
||||
def empty_cache():
|
||||
"""Releases all unoccupied cached memory currently held by the caching
|
||||
allocator so that those can be used in other GPU application and visible in
|
||||
`nvidia-smi`."""
|
||||
return torch._C._cuda_emptyCache()
|
||||
|
||||
|
||||
def _host_allocator():
|
||||
_lazy_init()
|
||||
return torch._C._cuda_cudaHostAllocator()
|
||||
|
@ -107,10 +107,10 @@ class Event(object):
|
||||
|
||||
Arguments:
|
||||
enable_timing (bool): indicates if the event should measure time
|
||||
(default: False)
|
||||
blocking (bool): if true, :meth:`wait` will be blocking (default: False)
|
||||
interprocess (bool): if true, the event can be shared between processes
|
||||
(default: False)
|
||||
(default: ``False``)
|
||||
blocking (bool): if ``True``, :meth:`wait` will be blocking (default: ``False``)
|
||||
interprocess (bool): if ``True``, the event can be shared between processes
|
||||
(default: ``False``)
|
||||
"""
|
||||
|
||||
DEFAULT = 0x0
|
||||
|
@ -1,17 +1,32 @@
|
||||
"""
|
||||
r"""
|
||||
The ``distributions`` package contains parameterizable probability distributions
|
||||
and sampling functions.
|
||||
|
||||
The :meth:`log_prob` method is useful for policy gradient based methods. If the
|
||||
parameters of the distribution are differentiable, then the result of ``log_prob``
|
||||
is also differentiable.
|
||||
Policy gradient methods can be implemented using the
|
||||
:meth:`~torch.distributions.Distribution.log_prob` method, when the probability
|
||||
density function is differentiable with respect to its parameters. A basic
|
||||
method is the REINFORCE rule:
|
||||
|
||||
Example::
|
||||
.. math::
|
||||
|
||||
probs = network(input)
|
||||
m = Multinomial(probs)
|
||||
\Delta\theta = \alpha r \frac{\partial\log p(a|\pi^\theta(s))}{\partial\theta}
|
||||
|
||||
where :math:`\theta` are the parameters, :math:`\alpha` is the learning rate,
|
||||
:math:`r` is the reward and :math:`p(a|\pi^\theta(s))` is the probability of
|
||||
taking action :math:`a` in state :math:`s` given policy :math:`\pi^\theta`.
|
||||
|
||||
In practice we would sample an action from the output of a network, apply this
|
||||
action in an environment, and then use ``log_prob`` to construct an equivalent
|
||||
loss function. Note that we use a negative because optimisers use gradient
|
||||
descent, whilst the rule above assumes gradient ascent. With a categorical
|
||||
policy, the code for implementing REINFORCE would be as follows::
|
||||
|
||||
probs = policy_network(state)
|
||||
# NOTE: this is equivalent to what used to be called multinomial
|
||||
m = Categorical(probs)
|
||||
action = m.sample()
|
||||
loss = -m.log_prob(action) * get_reward(env, action)
|
||||
next_state, reward = env.step(action)
|
||||
loss = -m.log_prob(action) * reward
|
||||
loss.backward()
|
||||
"""
|
||||
import math
|
||||
@ -19,7 +34,7 @@ from numbers import Number
|
||||
import torch
|
||||
|
||||
|
||||
__all__ = ['Distribution', 'Bernoulli', 'Multinomial', 'Normal']
|
||||
__all__ = ['Distribution', 'Bernoulli', 'Categorical', 'Normal']
|
||||
|
||||
|
||||
class Distribution(object):
|
||||
@ -87,9 +102,12 @@ class Bernoulli(Distribution):
|
||||
return log_pmf.gather(0, value.unsqueeze(0).long()).squeeze(0)
|
||||
|
||||
|
||||
class Multinomial(Distribution):
|
||||
class Categorical(Distribution):
|
||||
r"""
|
||||
Creates a multinomial distribution parameterized by `probs`.
|
||||
Creates a categorical distribution parameterized by `probs`.
|
||||
|
||||
.. note::
|
||||
It is equivalent to the distribution that ``multinomial()`` samples from.
|
||||
|
||||
Samples are integers from `0 ... K-1` where `K` is probs.size(-1).
|
||||
|
||||
@ -102,7 +120,7 @@ class Multinomial(Distribution):
|
||||
|
||||
Example::
|
||||
|
||||
>>> m = Multinomial(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ]))
|
||||
>>> m = Categorical(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ]))
|
||||
>>> m.sample() # equal probability of 0, 1, 2, 3
|
||||
3
|
||||
[torch.LongTensor of size 1]
|
||||
|
@ -69,10 +69,10 @@ def compile(arg=None, **kwargs):
|
||||
(as we always wait to see all derivatives before compiling.)
|
||||
Default: 1 (i.e., we will compile forwards and backwards, but not
|
||||
double-backwards).
|
||||
optimize (bool, optional): whether or not to apply optimizations. Default: True.
|
||||
optimize (bool, optional): whether or not to apply optimizations. Default: ``True``.
|
||||
|
||||
Debug arguments:
|
||||
time (bool, optional): if True, whenever we execute the model in question, we
|
||||
time (bool, optional): if ``True``, whenever we execute the model in question, we
|
||||
will also print out some timing information for how long the model
|
||||
took to execute. At the moment, there are three types of timings we
|
||||
emit:
|
||||
@ -87,10 +87,10 @@ def compile(arg=None, **kwargs):
|
||||
- optimized: the time it took to execute the optimized model.
|
||||
|
||||
At the moment, all of these timings are for the forward pass only.
|
||||
Default: False.
|
||||
enabled (bool, optional): if False, compilation is disabled and you
|
||||
Default: ``False``.
|
||||
enabled (bool, optional): if ``False``, compilation is disabled and you
|
||||
will get back your original model. This is a convenient way to
|
||||
disable tracing without having to delete the annotation. Default: True.
|
||||
disable tracing without having to delete the annotation. Default: ``True``.
|
||||
|
||||
Example: Compile as class decorator.
|
||||
|
||||
@ -396,6 +396,10 @@ class _CompiledMixin(object):
|
||||
# TODO: Figure out how to call parent destructor, if there is one.
|
||||
# Apparently, this is buggy:
|
||||
# https://stackoverflow.com/questions/22972720/python-cant-invoke-parent-class-destructor-with-super
|
||||
# NB: Have to mangle this by hand!
|
||||
if not (hasattr(self, '_CompiledMixin__misses') and hasattr(self, '_CompiledMixin___hits')):
|
||||
# Probably died during construction
|
||||
return
|
||||
if self.__misses != 0 and self.__hits == 0:
|
||||
warnings.warn("{} was marked with JIT and invoked {} times, "
|
||||
"but we never successfully used compiled code."
|
||||
|
@ -18,18 +18,22 @@ class DistKLDivCriterion(Criterion):
|
||||
input,
|
||||
target,
|
||||
self.output_tensor,
|
||||
self.sizeAverage
|
||||
self.sizeAverage,
|
||||
True, # reduce
|
||||
)
|
||||
self.output = self.output_tensor[0]
|
||||
return self.output
|
||||
|
||||
def updateGradInput(self, input, target):
|
||||
assert input.is_same_size(target)
|
||||
implicit_gradOutput = torch.ones(1).type_as(input)
|
||||
self._backend.DistKLDivCriterion_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
target,
|
||||
implicit_gradOutput,
|
||||
self.gradInput,
|
||||
self.sizeAverage
|
||||
self.sizeAverage,
|
||||
True, # reduce
|
||||
)
|
||||
return self.gradInput
|
||||
|
@ -29,7 +29,6 @@ class ELU(Module):
|
||||
def updateGradInput(self, input, gradOutput):
|
||||
self._backend.ELU_updateGradInput(
|
||||
self._backend.library_state,
|
||||
input,
|
||||
gradOutput,
|
||||
self.gradInput,
|
||||
self.output,
|
||||
|
@ -66,6 +66,7 @@ IF ($ENV{TH_BINARY_BUILD})
|
||||
IF (UNIX AND NOT APPLE)
|
||||
# hiding statically linked library symbols, this flag is not available for the linker under MACOSX
|
||||
SET(CMAKE_CXX_FLAGS "-Wl,--exclude-libs,libstdc++.a ${CMAKE_CXX_FLAGS}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../../tools/pytorch.version")
|
||||
ENDIF(UNIX AND NOT APPLE)
|
||||
ENDIF()
|
||||
|
||||
|
@ -17,8 +17,15 @@ public:
|
||||
Type & getType(Backend p, ScalarType s) {
|
||||
initCUDAIfNeeded(p);
|
||||
auto & type = type_registry[static_cast<int>(p)][static_cast<int>(s)];
|
||||
if(!type)
|
||||
|
||||
if(!type) {
|
||||
// there is only a single Undefined Type.
|
||||
if (p == Backend::Undefined || s == ScalarType::Undefined) {
|
||||
auto & undef = type_registry[static_cast<int>(Backend::Undefined)][static_cast<int>(ScalarType::Undefined)];
|
||||
if (undef) return *undef;
|
||||
}
|
||||
runtime_error("%s%sType is not enabled.",toString(p),toString(s));
|
||||
}
|
||||
return *type;
|
||||
}
|
||||
Generator & defaultGenerator(Backend p) {
|
||||
|
@ -36,6 +36,8 @@ static DLDataType getDLDataType(const Type& type) {
|
||||
case ScalarType::Half:
|
||||
dtype.code = DLDataTypeCode::kFloat;
|
||||
break;
|
||||
case ScalarType::Undefined:
|
||||
throw std::logic_error("Undefined is not a valid ScalarType");
|
||||
case ScalarType::NumOptions:
|
||||
throw std::logic_error("NumOptions is not a valid ScalarType");
|
||||
}
|
||||
|
@ -579,13 +579,22 @@
|
||||
- CPU
|
||||
- CUDA
|
||||
return: argument 0
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- accreal start
|
||||
- accreal end
|
||||
- arg: accreal step
|
||||
default: 1
|
||||
options:
|
||||
- cname: arange
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- accreal start
|
||||
- accreal end
|
||||
- arg: accreal step
|
||||
default: 1
|
||||
- cname: arange
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- CONSTANT 0
|
||||
- accreal end
|
||||
- CONSTANT 1
|
||||
]]
|
||||
[[
|
||||
name: scatter_
|
||||
|
@ -1,10 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/Tensor.h"
|
||||
#include <functional>
|
||||
#include <sstream>
|
||||
|
||||
namespace at {
|
||||
|
||||
// avoid copy-construction of Tensor by using a reference_wrapper.
|
||||
inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) {
|
||||
for (auto& t : tensors) {
|
||||
if (!t.get().defined()) {
|
||||
runtime_error("%s(...) called with an undefined Tensor", api_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand) {
|
||||
if (tensor.sizes().equals(to_expand.sizes())) {
|
||||
return std::make_tuple(to_expand);
|
||||
@ -13,6 +23,11 @@ inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_
|
||||
return std::make_tuple(to_expand.expand(tensor.sizes()));
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand, const char *api_name) {
|
||||
check_defined({tensor, to_expand}, api_name);
|
||||
return expand_inplace(tensor, to_expand);
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2) {
|
||||
if (tensor.sizes().equals(to_expand1.sizes()) && tensor.sizes().equals((to_expand2.sizes()))) {
|
||||
return std::make_tuple(to_expand1, to_expand2);
|
||||
@ -21,6 +36,12 @@ inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Ten
|
||||
return std::make_tuple(to_expand1.expand(tensor.sizes()), to_expand2.expand(tensor.sizes()));
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2,
|
||||
const char *api_name) {
|
||||
check_defined({tensor, to_expand1, to_expand2}, api_name);
|
||||
return expand_inplace(tensor, to_expand1, to_expand2);
|
||||
}
|
||||
|
||||
inline std::vector<int64_t> infer_size2(IntList a, IntList b) {
|
||||
auto dimsA = a.size();
|
||||
auto dimsB = b.size();
|
||||
@ -55,9 +76,14 @@ inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, cons
|
||||
return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size));
|
||||
}
|
||||
|
||||
std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
|
||||
const Tensor &to_expand2,
|
||||
const Tensor &to_expand3) {
|
||||
inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) {
|
||||
check_defined({to_expand1, to_expand2}, api_name);
|
||||
return expand_outplace(to_expand1, to_expand2);
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
|
||||
const Tensor &to_expand2,
|
||||
const Tensor &to_expand3) {
|
||||
if (to_expand1.sizes().equals(to_expand2.sizes()) && to_expand1.sizes().equals(to_expand3.sizes())) {
|
||||
return std::make_tuple(to_expand1, to_expand2, to_expand3);
|
||||
}
|
||||
@ -67,6 +93,14 @@ std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
|
||||
return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size), to_expand3.expand(expanded_size));
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
|
||||
const Tensor &to_expand2,
|
||||
const Tensor &to_expand3,
|
||||
const char *api_name) {
|
||||
check_defined({to_expand1, to_expand2, to_expand3}, api_name);
|
||||
return expand_outplace(to_expand1, to_expand2, to_expand3);
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) {
|
||||
if(to_expand.sizes().equals(sizes)) {
|
||||
return std::make_tuple(to_expand);
|
||||
@ -75,4 +109,9 @@ inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) {
|
||||
return std::make_tuple(to_expand.expand(sizes));
|
||||
}
|
||||
|
||||
inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes, const char *api_name) {
|
||||
check_defined({to_expand}, api_name);
|
||||
return expand_size(to_expand, sizes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -128,6 +128,24 @@
|
||||
${THTensor}_setStorage(${state,}result_->tensor, self_->tensor->storage, self_->tensor->storageOffset, size_, stride_);
|
||||
]]
|
||||
|
||||
[[
|
||||
name: as_strided_
|
||||
variants: [method,function]
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THSize* size
|
||||
- THStride* stride
|
||||
- arg: int64_t storage_offset
|
||||
default: -1
|
||||
aten_custom_call: |
|
||||
if (storage_offset == -1) {
|
||||
storage_offset = self_->tensor->storageOffset;
|
||||
}
|
||||
${THTensor}_setStorage(${state,}self_->tensor, self_->tensor->storage, storage_offset, size_, stride_);
|
||||
self_->maybeScalar(size.size() == 0);
|
||||
]]
|
||||
|
||||
[[
|
||||
name: cat
|
||||
cname: catArray
|
||||
|
@ -23,7 +23,7 @@ public:
|
||||
|
||||
explicit Scalar(const detail::TensorBase & t)
|
||||
: tag(Tag::HAS_t), t(t) {
|
||||
AT_ASSERT(t.pImpl, "Attempting to create a Scalar from an undefined tensor");
|
||||
AT_ASSERT(t.defined(), "Attempting to create a Scalar from an undefined tensor");
|
||||
AT_ASSERT(t.dim() == 0, "Attempting to create a Scalar from a %d dim tensor", t.dim());
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ enum class ScalarType {
|
||||
n,
|
||||
AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
|
||||
#undef DEFINE_ENUM
|
||||
Undefined,
|
||||
NumOptions
|
||||
};
|
||||
|
||||
@ -31,6 +32,7 @@ enum class Backend {
|
||||
CUDA,
|
||||
SparseCPU,
|
||||
SparseCUDA,
|
||||
Undefined,
|
||||
NumOptions
|
||||
};
|
||||
|
||||
@ -62,7 +64,7 @@ static inline const char * toString(ScalarType t) {
|
||||
switch(t) {
|
||||
AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
|
||||
default:
|
||||
return "UNKNOWN_SCALAR_TYPE";
|
||||
return "UNKNOWN_SCALAR";
|
||||
}
|
||||
#undef DEFINE_CASE
|
||||
}
|
||||
|
@ -1,29 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/TensorImpl.h"
|
||||
#include "ATen/UndefinedTensor.h"
|
||||
|
||||
namespace at { namespace detail {
|
||||
|
||||
// TensorBase is the base class for Tensor which handles the reference counting
|
||||
struct TensorBase {
|
||||
TensorBase()
|
||||
: pImpl(nullptr) {}
|
||||
TensorBase(): TensorBase(UndefinedTensor::singleton(), false) {}
|
||||
TensorBase(TensorImpl * self, bool retain)
|
||||
: pImpl(self) {
|
||||
if(pImpl != nullptr && retain)
|
||||
if (pImpl == nullptr) {
|
||||
throw std::runtime_error("TensorBase with nullptr not supported");
|
||||
}
|
||||
if(retain && pImpl != UndefinedTensor::singleton())
|
||||
pImpl->retain();
|
||||
}
|
||||
TensorBase(const TensorBase & rhs)
|
||||
: pImpl(rhs.pImpl) {
|
||||
if(pImpl != nullptr)
|
||||
if (pImpl != UndefinedTensor::singleton())
|
||||
pImpl->retain();
|
||||
}
|
||||
TensorBase(TensorBase && rhs) noexcept
|
||||
: pImpl(rhs.pImpl) {
|
||||
rhs.pImpl = nullptr;
|
||||
rhs.pImpl = UndefinedTensor::singleton();
|
||||
}
|
||||
~TensorBase() {
|
||||
if(pImpl != nullptr)
|
||||
if (pImpl != UndefinedTensor::singleton())
|
||||
pImpl->release();
|
||||
}
|
||||
TensorBase & operator=(TensorBase && rhs) & {
|
||||
@ -48,6 +51,9 @@ struct TensorBase {
|
||||
TensorImpl * get() const {
|
||||
return pImpl;
|
||||
}
|
||||
bool defined() const {
|
||||
return pImpl != UndefinedTensor::singleton();
|
||||
}
|
||||
|
||||
friend struct Type;
|
||||
|
||||
|
@ -11,6 +11,7 @@ inline Tensor & Tensor::operator=(Scalar v) && {
|
||||
return assign_(v);
|
||||
}
|
||||
inline Tensor & Tensor::assign_(Scalar v) {
|
||||
AT_ASSERT(defined(), "attempting to assign a scalar to an undefined tensor");
|
||||
AT_ASSERT(dim() == 0, "attempting to assign a scalar to %d dim tensor", dim());
|
||||
pImpl->assign_(v);
|
||||
return *this;
|
||||
|
42
torch/lib/ATen/UndefinedTensor.cpp
Normal file
42
torch/lib/ATen/UndefinedTensor.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include "ATen/UndefinedTensor.h"
|
||||
#include "ATen/Context.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
// should this use the globalContext? Can it get a context passed in somehow?
|
||||
UndefinedTensor::UndefinedTensor()
|
||||
: TensorImpl(&(globalContext().getType(Backend::Undefined,ScalarType::Undefined))) {
|
||||
}
|
||||
|
||||
const char * UndefinedTensor::toString() const {
|
||||
return "UndefinedTensor";
|
||||
}
|
||||
|
||||
IntList UndefinedTensor::sizes() const {
|
||||
runtime_error("sizes() called on undefined Tensor");
|
||||
}
|
||||
|
||||
int64_t UndefinedTensor::dim() const {
|
||||
runtime_error("dim() called on undefined Tensor");
|
||||
}
|
||||
|
||||
const char * UndefinedTensor::typeString() {
|
||||
return "UndefinedType";
|
||||
}
|
||||
void * UndefinedTensor::unsafeGetTH(bool retain) {
|
||||
runtime_error("unsafeGetTH(bool retain) called on undefined Tensor");
|
||||
}
|
||||
|
||||
IntList UndefinedTensor::strides() const {
|
||||
runtime_error("strides() called on undefined Tensor");
|
||||
}
|
||||
Scalar UndefinedTensor::localScalar() {
|
||||
runtime_error("localScalar() called on undefined Tensor");
|
||||
}
|
||||
void UndefinedTensor::assign_(Scalar s) {
|
||||
runtime_error("assign_() called on undefined Tensor");
|
||||
}
|
||||
|
||||
UndefinedTensor UndefinedTensor::_singleton;
|
||||
|
||||
}
|
28
torch/lib/ATen/UndefinedTensor.h
Normal file
28
torch/lib/ATen/UndefinedTensor.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/TensorImpl.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
struct UndefinedTensor final : public TensorImpl {
|
||||
public:
|
||||
static inline UndefinedTensor * singleton() {
|
||||
return &_singleton;
|
||||
}
|
||||
virtual ~UndefinedTensor() {}
|
||||
virtual const char * toString() const override;
|
||||
virtual IntList sizes() const override;
|
||||
virtual IntList strides() const override;
|
||||
virtual int64_t dim() const override;
|
||||
virtual Scalar localScalar() override;
|
||||
virtual void assign_(Scalar s) override;
|
||||
virtual void * unsafeGetTH(bool retain) override;
|
||||
static const char * typeString();
|
||||
private:
|
||||
UndefinedTensor();
|
||||
static UndefinedTensor _singleton;
|
||||
public:
|
||||
friend struct UndefinedType;
|
||||
};
|
||||
|
||||
} // namespace at
|
65
torch/lib/ATen/UndefinedType.cpp
Normal file
65
torch/lib/ATen/UndefinedType.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
#include "ATen/UndefinedType.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
UndefinedType::UndefinedType(Context* context)
|
||||
: Type(context) {}
|
||||
ScalarType UndefinedType::scalarType() const {
|
||||
return ScalarType::Undefined;
|
||||
}
|
||||
Backend UndefinedType::backend() const {
|
||||
return Backend::Undefined;
|
||||
}
|
||||
bool UndefinedType::isCuda() const { return false; }
|
||||
bool UndefinedType::isSparse() const { return false; }
|
||||
bool UndefinedType::isDistributed() const { return false; }
|
||||
|
||||
std::unique_ptr<Storage> UndefinedType::storage() const {
|
||||
runtime_error("storage not defined for UndefinedType");
|
||||
}
|
||||
std::unique_ptr<Storage> UndefinedType::storage(size_t size) const {
|
||||
runtime_error("storage(size_t) not defined for UndefinedType");
|
||||
}
|
||||
std::unique_ptr<Storage> UndefinedType::storageFromBlob(void * data, int64_t size, const std::function<void(void*)> & deleter) const {
|
||||
runtime_error("storageFromBlob not defined for UndefinedType");
|
||||
}
|
||||
Tensor UndefinedType::unsafeTensorFromTH(void * th_pointer, bool retain) const {
|
||||
runtime_error("unsafeTensorFromTH not defined for UndefinedType");
|
||||
}
|
||||
std::unique_ptr<Generator> UndefinedType::generator() const {
|
||||
runtime_error("generator not defined for UndefinedType");
|
||||
}
|
||||
|
||||
const char * UndefinedType::toString() const {
|
||||
return UndefinedType::typeString();
|
||||
}
|
||||
TypeID UndefinedType::ID() const {
|
||||
return TypeID::Undefined;
|
||||
}
|
||||
|
||||
std::size_t UndefinedType::elementSizeInBytes() const {
|
||||
runtime_error("elementSizeInBytes not defined for UndefinedType");
|
||||
}
|
||||
|
||||
Type & UndefinedType::toBackend(Backend b) const {
|
||||
if (b == Backend::Undefined) {
|
||||
return Type::toBackend(b);
|
||||
}
|
||||
runtime_error("toBackend not implemented for UndefinedType to non-UndefinedType");
|
||||
}
|
||||
Type & UndefinedType::toScalarType(ScalarType s) const {
|
||||
if (s == ScalarType::Undefined) {
|
||||
return Type::toScalarType(s);
|
||||
}
|
||||
runtime_error("toScalarType not implemented for UndefinedType to non-UndefinedType");
|
||||
}
|
||||
|
||||
const char * UndefinedType::typeString() {
|
||||
return "UndefinedType";
|
||||
}
|
||||
|
||||
void UndefinedType::s_copy(const Tensor & src, Tensor & dst) const {
|
||||
runtime_error("s_copy not defined for UndefinedType");
|
||||
}
|
||||
|
||||
}
|
37
torch/lib/ATen/UndefinedType.h
Normal file
37
torch/lib/ATen/UndefinedType.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/Type.h"
|
||||
#include "ATen/Context.h"
|
||||
#include "ATen/CheckGenerator.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef Type
|
||||
#undef Type
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
|
||||
struct UndefinedType final : public Type {
|
||||
explicit UndefinedType(Context* context);
|
||||
virtual ScalarType scalarType() const override;
|
||||
virtual Backend backend() const override;
|
||||
virtual bool isCuda() const override;
|
||||
virtual bool isSparse() const override;
|
||||
virtual bool isDistributed() const override;
|
||||
virtual std::unique_ptr<Storage> storage() const override;
|
||||
virtual std::unique_ptr<Storage> storage(size_t size) const override;
|
||||
virtual std::unique_ptr<Storage> storageFromBlob(void * data, int64_t size, const std::function<void(void*)> & deleter) const override;
|
||||
virtual std::unique_ptr<Generator> generator() const override;
|
||||
virtual const char * toString() const override;
|
||||
virtual std::size_t elementSizeInBytes() const override;
|
||||
virtual Type & toBackend(Backend b) const;
|
||||
virtual Type & toScalarType(ScalarType s) const;
|
||||
virtual TypeID ID() const override;
|
||||
static const char * typeString();
|
||||
Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override;
|
||||
|
||||
virtual void s_copy(const Tensor & src, Tensor & dst) const override;
|
||||
};
|
||||
|
||||
} // namespace at
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "ArrayRef.h"
|
||||
#include "ATenGeneral.h"
|
||||
#include "UndefinedTensor.h"
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <typeinfo>
|
||||
@ -14,13 +15,17 @@ namespace at {
|
||||
AT_API void runtime_error(const char *format, ...);
|
||||
|
||||
template <typename T, typename Base>
|
||||
static inline T* checked_cast(Base* expr, const char * name, int pos, bool allowNull) {
|
||||
if(!expr) {
|
||||
if (allowNull) {
|
||||
return (T*) expr;
|
||||
}
|
||||
runtime_error("Expected a Tensor of type %s but found an undefined Tensor for argument #%d '%s'",
|
||||
T::typeString(),pos,name);
|
||||
static inline T* checked_cast_storage(Base* expr, const char * name, int pos) {
|
||||
if (typeid(*expr) != typeid(T))
|
||||
runtime_error("Expected object of type %s but found type %s for argument #%d '%s'",
|
||||
T::typeString(),expr->type().toString(),pos,name);
|
||||
return static_cast<T*>(expr);
|
||||
}
|
||||
|
||||
template <typename T, typename Base>
|
||||
inline T* checked_cast_tensor(Base* expr, const char * name, int pos, bool allowNull) {
|
||||
if(allowNull && expr == UndefinedTensor::singleton()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (typeid(*expr) != typeid(T))
|
||||
runtime_error("Expected object of type %s but found type %s for argument #%d '%s'",
|
||||
@ -34,11 +39,6 @@ static inline std::vector<TH*> tensor_list_checked_cast(ArrayRef<TBase> tensors,
|
||||
std::vector<TH*> casted(tensors.size());
|
||||
for (unsigned int i = 0; i < tensors.size(); ++i) {
|
||||
auto *expr = tensors[i].pImpl;
|
||||
if (!expr) {
|
||||
runtime_error("Expected a Tensor of type %s but found an undefined Tensor for sequence element %u "
|
||||
" in sequence argument at position #%d '%s'",
|
||||
T::typeString(),i,pos,name);
|
||||
}
|
||||
auto result = dynamic_cast<T*>(expr);
|
||||
if (result) {
|
||||
casted[i] = result->tensor;
|
||||
|
@ -25,7 +25,7 @@ case ${src_id}:
|
||||
FUNCTION = CodeTemplate("""\
|
||||
void ${Type}::s_copy(const Tensor & src, Tensor & dst) const {
|
||||
// code generated by function_wrapper
|
||||
auto dst_ = checked_cast<${Tensor}>(dst.pImpl,"dst",0,false);
|
||||
auto dst_ = checked_cast_tensor<${Tensor}>(dst.pImpl,"dst",0,false);
|
||||
(void) dst_; //silence unused warning
|
||||
switch(src.type().ID()) {
|
||||
${copy_body}
|
||||
|
@ -19,7 +19,7 @@ ${return_type} ${method_prefix}${api_name}(${formals_with_defaults}) const;
|
||||
TYPE_METHOD_DEFINITION_BROADCAST = CodeTemplate("""\
|
||||
${return_type} Type::${method_prefix}${api_name}(${formals}) const {
|
||||
Tensor ${broadcast_returns};
|
||||
std::tie(${broadcast_returns}) = ${broadcast_function}(${broadcast_actuals});
|
||||
std::tie(${broadcast_returns}) = ${broadcast_function}(${broadcast_actuals}, "${api_name}");
|
||||
return ${method_prefix_derived}${api_name}(${broadcast_modified_actuals});
|
||||
}
|
||||
""")
|
||||
@ -142,20 +142,22 @@ TYPE_RETURN = {
|
||||
}
|
||||
|
||||
CHECKED_CAST = {
|
||||
'THTensor*': CodeTemplate('checked_cast<${Tensor}>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THTensor*':
|
||||
CodeTemplate(
|
||||
'checked_cast_tensor<${Tensor}>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THSTensor*':
|
||||
CodeTemplate(
|
||||
'checked_cast<Sparse${Tensor}>(${arg_name}.tref.pImpl,"${arg_name}",${arg_pos},false)'),
|
||||
'checked_cast_tensor<Sparse${Tensor}>(${arg_name}.tref.pImpl,"${arg_name}",${arg_pos},false)'),
|
||||
'THBoolTensor*':
|
||||
CodeTemplate(
|
||||
'checked_cast<${Backend}ByteTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'checked_cast_tensor<${Backend}ByteTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THIndexTensor*':
|
||||
CodeTemplate(
|
||||
'checked_cast<${Backend}LongTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'checked_cast_tensor<${Backend}LongTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THIntegerTensor*':
|
||||
CodeTemplate(
|
||||
'checked_cast<${Backend}IntTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THStorage*': CodeTemplate('checked_cast<${Storage}>(&${arg_name},"${arg_name}",${arg_pos}, false)'),
|
||||
'checked_cast_tensor<${Backend}IntTensor>(${arg_name}.pImpl,"${arg_name}",${arg_pos}, ${null_okay})'),
|
||||
'THStorage*': CodeTemplate('checked_cast_storage<${Storage}>(&${arg_name},"${arg_name}",${arg_pos})'),
|
||||
'THGenerator*':
|
||||
CodeTemplate(
|
||||
'check_generator<${Backend}Generator>(${arg_name}, &context->defaultGenerator(backend()))'),
|
||||
@ -720,11 +722,14 @@ def create_derived(backend_type_env, declarations):
|
||||
def allocate_arg(env, arg, output_count):
|
||||
name = arg['name']
|
||||
allocation = CodeTemplate(ALLOC_WRAP[arg['type']]).substitute(env)
|
||||
tensor_arg = '{}_'.format(name)
|
||||
if arg.get('mask', False):
|
||||
allocation = 'output_mask[{}] ? {} : nullptr'.format(output_count, allocation)
|
||||
tensor_arg = ('{}_ == nullptr ? (TensorImpl*)UndefinedTensor::singleton() : (TensorImpl*){}_'
|
||||
.format(name, name))
|
||||
return [
|
||||
'auto {}_ = {};'.format(name, allocation),
|
||||
'auto {} = Tensor({}_,false);'.format(name, name),
|
||||
'auto {} = Tensor({}, false);'.format(name, tensor_arg),
|
||||
]
|
||||
|
||||
def resize_arg(arg):
|
||||
|
@ -3,7 +3,7 @@
|
||||
- name: binary_cross_entropy(Tensor input, Tensor target, Tensor weight={}, bool size_average=true)
|
||||
cname: BCECriterion
|
||||
|
||||
- name: kl_div(Tensor input, Tensor target, bool size_average=true)
|
||||
- name: kl_div(Tensor input, Tensor target, bool size_average=true, bool reduce=true)
|
||||
cname: DistKLDivCriterion
|
||||
|
||||
- name: l1_loss(Tensor input, Tensor target, bool size_average=true, bool reduce=True)
|
||||
@ -58,6 +58,8 @@
|
||||
|
||||
- name: log_softmax(Tensor input, int64_t dim)
|
||||
cname: LogSoftMax
|
||||
wrap_dim:
|
||||
dim: input
|
||||
|
||||
- name: prelu(Tensor input, Tensor weight)
|
||||
cname: PReLU
|
||||
@ -68,6 +70,8 @@
|
||||
|
||||
- name: softmax(Tensor input, int64_t dim)
|
||||
cname: SoftMax
|
||||
wrap_dim:
|
||||
dim: input
|
||||
|
||||
- name: softplus(Tensor input, Scalar beta=1, Scalar threshold=20)
|
||||
cname: SoftPlus
|
||||
|
@ -171,6 +171,8 @@ def get_thnn_args(thnn_function, params):
|
||||
thnn_args.append(arg_expr(name[0], name[1:]))
|
||||
elif name == 'scale':
|
||||
thnn_args.append({'type': 'EXPRESSION', 'name': '1'})
|
||||
elif name == 'inplace':
|
||||
thnn_args.append({'type': 'EXPRESSION', 'name': 'false'})
|
||||
else:
|
||||
raise RuntimeError("{}: can't find binding for '{}'"
|
||||
.format(thnn_function.name, name))
|
||||
@ -261,7 +263,8 @@ def backward_declaration(base, thnn_functions):
|
||||
|
||||
arguments = []
|
||||
arguments.append({'type': 'THTensor*', 'name': 'grad_output'})
|
||||
arguments += [copy.deepcopy(arg) for arg in base['arguments']]
|
||||
arguments += [copy.deepcopy(arg) for arg in base['arguments']
|
||||
if arg['name'] != 'inplace']
|
||||
arguments += base['buffers']
|
||||
|
||||
for arg in arguments:
|
||||
|
@ -70,9 +70,6 @@ struct Tensor : public detail::TensorBase {
|
||||
pImpl = nullptr;
|
||||
return ret;
|
||||
}
|
||||
bool defined() const {
|
||||
return pImpl != nullptr;
|
||||
}
|
||||
void swap(Tensor & rhs) {
|
||||
TensorImpl * tmp = pImpl;
|
||||
pImpl = rhs.pImpl;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "ATen/SparseTensorRef.h"
|
||||
#include "ATen/ExpandUtils.h"
|
||||
#include "ATen/NativeFunctions.h"
|
||||
#include "ATen/UndefinedType.h"
|
||||
|
||||
#include <iostream>
|
||||
${type_headers}
|
||||
@ -13,15 +14,17 @@ namespace at {
|
||||
|
||||
void Type::registerAll(Context * context) {
|
||||
${type_registrations}
|
||||
context->type_registry[static_cast<int>(Backend::Undefined)][static_cast<int>(ScalarType::Undefined)].reset(new UndefinedType(context));
|
||||
}
|
||||
|
||||
void Type::copy(const Tensor & src, Tensor & dst) const {
|
||||
Tensor b_src;
|
||||
std::tie(b_src) = expand_inplace(dst, src);
|
||||
std::tie(b_src) = expand_inplace(dst, src, "copy");
|
||||
s_copy(b_src, dst);
|
||||
}
|
||||
|
||||
Tensor Type::copy(const Tensor & src) const {
|
||||
AT_ASSERT(src.defined(), "attempt to copy an undefined tensor");
|
||||
Tensor r = this->tensor(src.sizes());
|
||||
r.copy_(src);
|
||||
return r;
|
||||
|
@ -56,6 +56,7 @@ static inline void noop_deleter(void*) {}
|
||||
|
||||
enum class TypeID {
|
||||
${type_ids}
|
||||
Undefined,
|
||||
NumOptions
|
||||
};
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "ATen/Utils.h"
|
||||
#include "ATen/WrapDimUtils.h"
|
||||
#include "ATen/THLongStorageView.h"
|
||||
#include "ATen/UndefinedTensor.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
|
@ -18,3 +18,6 @@ target_link_libraries(dlconvertor_test ATen)
|
||||
|
||||
add_executable(native_test native_test.cpp)
|
||||
target_link_libraries(native_test ATen)
|
||||
|
||||
add_executable(undefined_tensor_test undefined_tensor_test.cpp)
|
||||
target_link_libraries(undefined_tensor_test ATen)
|
||||
|
60
torch/lib/ATen/test/undefined_tensor_test.cpp
Normal file
60
torch/lib/ATen/test/undefined_tensor_test.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include "ATen/ATen.h"
|
||||
#include "ATen/UndefinedTensor.h"
|
||||
#include <string>
|
||||
#include "test_assert.h"
|
||||
|
||||
|
||||
using namespace at;
|
||||
|
||||
#define ASSERT_THROWS(fn, message) \
|
||||
try { \
|
||||
fn; \
|
||||
ASSERT(false); \
|
||||
} catch(std::runtime_error &e) { \
|
||||
ASSERT(std::string(e.what()).find(message) != std::string::npos); \
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
// mainly test ops on undefined tensors don't segfault and give a reasonable errror message.
|
||||
Tensor und;
|
||||
Tensor ft = CPU(kFloat).ones({1});
|
||||
|
||||
std::cout << und << std::endl;
|
||||
ASSERT(!und.defined());
|
||||
ASSERT(std::string("UndefinedTensor") == und.toString());
|
||||
|
||||
ASSERT_THROWS(und.strides(), "strides");
|
||||
ASSERT_THROWS(und.dim(), "dim");
|
||||
ASSERT_THROWS(und.assign_(Scalar(5)), "assign");
|
||||
ASSERT_THROWS(und.unsafeGetTH(true), "unsafeGetTH");
|
||||
ASSERT_THROWS(und.add(und), "add");
|
||||
ASSERT_THROWS(und.add(ft), "add");
|
||||
ASSERT_THROWS(ft.add(und), "add");
|
||||
ASSERT_THROWS(und.add(5), "add");
|
||||
ASSERT_THROWS(und.mm(und), "mm");
|
||||
|
||||
und.toType(und.type());
|
||||
ASSERT_THROWS(und.toType(ft.type()), "attempt to copy an undefined tensor");
|
||||
ASSERT_THROWS(ft.toType(und.type()), "UndefinedType");
|
||||
und.toType(ScalarType::Undefined);
|
||||
ASSERT_THROWS(und.toType(ScalarType::Float), "toScalarType");
|
||||
ASSERT_THROWS(ft.toType(ScalarType::Undefined), "UndefinedType");
|
||||
|
||||
// copy_
|
||||
ASSERT_THROWS(und.copy_(und), "copy");
|
||||
ASSERT_THROWS(und.copy_(ft), "copy");
|
||||
ASSERT_THROWS(ft.copy_(und), "copy");
|
||||
|
||||
und.toBackend(Backend::Undefined);
|
||||
ASSERT_THROWS(und.toBackend(Backend::CPU), "toBackend");
|
||||
ASSERT_THROWS(ft.toBackend(Backend::Undefined), "UndefinedType");
|
||||
|
||||
Tensor to_move = CPU(kFloat).ones({1});
|
||||
Tensor m(std::move(to_move));
|
||||
ASSERT(!to_move.defined());
|
||||
ASSERT(to_move.get() == UndefinedTensor::singleton());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -306,6 +306,9 @@ IF(BLAS_FOUND)
|
||||
IF ($ENV{TH_BINARY_BUILD})
|
||||
MESSAGE(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.")
|
||||
TARGET_LINK_LIBRARIES(TH "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}")
|
||||
IF (UNIX AND NOT APPLE)
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../../tools/pytorch.version")
|
||||
ENDIF(UNIX AND NOT APPLE)
|
||||
ELSE ($ENV{TH_BINARY_BUILD})
|
||||
TARGET_LINK_LIBRARIES(TH ${BLAS_LIBRARIES})
|
||||
ENDIF ($ENV{TH_BINARY_BUILD})
|
||||
|
@ -2,6 +2,10 @@
|
||||
#include "THDiskFile.h"
|
||||
#include "THFilePrivate.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#ifndef LLONG_MAX
|
||||
#define LLONG_MAX 9223372036854775807LL
|
||||
|
@ -2,6 +2,10 @@
|
||||
#include "THFilePrivate.h"
|
||||
#include "stdint.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
typedef struct THMemoryFile__
|
||||
{
|
||||
THFile file;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user