mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-04 16:04:58 +08:00 
			
		
		
		
	Compare commits
	
		
			177 Commits
		
	
	
		
			mlazos/hc5
			...
			v0.3.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2b4748011b | |||
| 902d57be9f | |||
| db9a700cb7 | |||
| 94ba828f7b | |||
| 3d6242daba | |||
| e90699b862 | |||
| d515806e84 | |||
| 17f94d20aa | |||
| ac0b41e3ba | |||
| 29f897ea2f | |||
| bbdaf66534 | |||
| 404510e3d5 | |||
| da3c4cb84c | |||
| 30bd9e6462 | |||
| 4e549e94b7 | |||
| 00f9da74b3 | |||
| a5fec26622 | |||
| a33a75385c | |||
| 86fdc898ec | |||
| 548596b5f7 | |||
| 7972c7e290 | |||
| 395d5f9295 | |||
| 596133697c | |||
| 2bcc44a33d | |||
| 281b6ce41d | |||
| e757aaf07d | |||
| d9001ce861 | |||
| b68861d7bb | |||
| f9f113adf5 | |||
| 9e3bcf4bce | |||
| b4862f67db | |||
| f98c795b71 | |||
| 24a4881cb2 | |||
| 7427a88660 | |||
| 26f038a557 | |||
| 3321cdce84 | |||
| f31ac990f7 | |||
| f69c6e4f2f | |||
| 4cec94d8ba | |||
| d721743b03 | |||
| a52e9dd352 | |||
| 26751c5675 | |||
| 45b06257dc | |||
| 819e76fa11 | |||
| 3f2b57e8e8 | |||
| 6ff04fbcd4 | |||
| ab5b03e02b | |||
| 24cd54d064 | |||
| afd1ce006b | |||
| 1e86e9106e | |||
| 4630ce8999 | |||
| 315662554d | |||
| c0e270a142 | |||
| 9260184592 | |||
| 120e9014d3 | |||
| 343c15c2b1 | |||
| 85ea548cff | |||
| 8a9f570eb1 | |||
| 53d1b6318b | |||
| 9e18db974b | |||
| 84da898124 | |||
| 63d6afdc62 | |||
| 8c3e1b713a | |||
| 0185d5aac6 | |||
| faea900161 | |||
| 19b1ad8c30 | |||
| bd8b9986ec | |||
| 3d3bddb953 | |||
| d7bd3b9acf | |||
| 7763c6f871 | |||
| 98879d58e3 | |||
| e98af60a7d | |||
| 6338da9c19 | |||
| b09e7a6788 | |||
| a9a76c6e75 | |||
| 1a5a28dc34 | |||
| 9fdc8644f1 | |||
| a120a008f8 | |||
| ab8b632d8c | |||
| e5920a1083 | |||
| 6d1bccceec | |||
| 82e39d1231 | |||
| 07f0364304 | |||
| 24e2ccfc07 | |||
| 4797f98158 | |||
| 840760c29f | |||
| ee24a054fe | |||
| f3519fd5f7 | |||
| f816029a72 | |||
| d27c3ce79c | |||
| 280bf0979d | |||
| d880a52091 | |||
| aae0ce4f05 | |||
| 47d35d2dea | |||
| f8b5ce1541 | |||
| 7f42c74f0f | |||
| af3964a872 | |||
| 1645546aa9 | |||
| 350fad8a22 | |||
| 565d183042 | |||
| 2ebda372f6 | |||
| 28b846c486 | |||
| 9622eaa6fa | |||
| db8154df32 | |||
| b6eeea343d | |||
| 1fe9991554 | |||
| 00118024f3 | |||
| 87edf5a349 | |||
| 20972878cc | |||
| 0d1128d25c | |||
| 81dc60493d | |||
| b18df1cedf | |||
| 3976d77509 | |||
| 09c83673bf | |||
| 5b9a8f918e | |||
| f20fb2c1a1 | |||
| 4e00120117 | |||
| 2b3f35daea | |||
| c580437342 | |||
| 455e788fe6 | |||
| c980fb359b | |||
| bae45bb106 | |||
| 34557d80f4 | |||
| 1e77879b2a | |||
| ff52d424b2 | |||
| 4b7aa13b30 | |||
| e1f2d0916e | |||
| 4b5b7e53f6 | |||
| db66fa9436 | |||
| 392c89ab6a | |||
| cddf501fc5 | |||
| d0907d2c34 | |||
| 448a85a8e0 | |||
| ea3138fd09 | |||
| b89c96fe58 | |||
| 088f47bb89 | |||
| ddb3804f87 | |||
| a896311d06 | |||
| 937b634b5d | |||
| 004dfdc7cc | |||
| f8aa5e2ed7 | |||
| 8a49309f81 | |||
| 14de24d89c | |||
| c7cccc250e | |||
| 1f694e9a6e | |||
| 1108bced80 | |||
| c36d452224 | |||
| 11955b86d2 | |||
| 9a6788202b | |||
| d58bad4073 | |||
| f95e252984 | |||
| b49f0f8154 | |||
| 269c25267b | |||
| fde471ee2a | |||
| eb24d2ff6e | |||
| f768068c3b | |||
| c456451915 | |||
| f282d1dc7c | |||
| 2a3cae0f3e | |||
| 3d9630abc2 | |||
| da7a5147db | |||
| 5df8e582cd | |||
| 5dff261598 | |||
| aa0c8920af | |||
| a3b658bf3b | |||
| 94e89f3911 | |||
| f0956ad9ec | |||
| 452ea78f43 | |||
| 3d5d66868e | |||
| cf373e25e2 | |||
| 91d764c781 | |||
| 524235bb71 | |||
| e035fa028b | |||
| 58a928c3b9 | |||
| 4f1eefa8ad | |||
| 4251c151e3 | |||
| c0931a3a4d | 
@ -202,9 +202,9 @@ MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
 | 
			
		||||
Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual
 | 
			
		||||
```
 | 
			
		||||
docker build -t pytorch .
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Dockerfile to build with cuda 9 and cudnn v7 (with Volta support) is in tools/docker, the build command is
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
docker build -t pytorch_cuda9 -f tools/docker/Dockerfile9 .
 | 
			
		||||
```
 | 
			
		||||
Alternatively, if you want to use a runtime image, you can use the pre-built one from Docker Hub and run with nvidia-docker:
 | 
			
		||||
 | 
			
		||||
@ -56,6 +56,12 @@ gradients are correct.
 | 
			
		||||
Profiler
 | 
			
		||||
--------
 | 
			
		||||
 | 
			
		||||
Autograd includes a profiler that lets you inspect the cost of different
 | 
			
		||||
operators inside your model - both on the CPU and GPU. There are two modes
 | 
			
		||||
implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`.
 | 
			
		||||
and nvprof based (registers both CPU and GPU activity) using
 | 
			
		||||
:class:`~torch.autograd.profiler.emit_nvtx`.
 | 
			
		||||
 | 
			
		||||
.. autoclass:: torch.autograd.profiler.profile
 | 
			
		||||
    :members:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -37,6 +37,10 @@ Streams and events
 | 
			
		||||
.. autoclass:: Event
 | 
			
		||||
   :members:
 | 
			
		||||
 | 
			
		||||
Memory management
 | 
			
		||||
-----------------
 | 
			
		||||
.. autofunction:: empty_cache
 | 
			
		||||
 | 
			
		||||
NVIDIA Tools Extension (NVTX)
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -19,10 +19,10 @@ Probability distributions - torch.distributions
 | 
			
		||||
.. autoclass:: Bernoulli
 | 
			
		||||
    :members:
 | 
			
		||||
 | 
			
		||||
:hidden:`Multinomial`
 | 
			
		||||
:hidden:`Categorical`
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
.. autoclass:: Multinomial
 | 
			
		||||
.. autoclass:: Categorical
 | 
			
		||||
    :members:
 | 
			
		||||
 | 
			
		||||
:hidden:`Normal`
 | 
			
		||||
 | 
			
		||||
@ -3,18 +3,19 @@
 | 
			
		||||
CUDA semantics
 | 
			
		||||
==============
 | 
			
		||||
 | 
			
		||||
:mod:`torch.cuda` keeps track of currently selected GPU, and all CUDA tensors
 | 
			
		||||
you allocate will be created on it. The selected device can be changed with a
 | 
			
		||||
:mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of
 | 
			
		||||
the currently selected GPU, and all CUDA tensors you allocate will by default be
 | 
			
		||||
created on that device. The selected device can be changed with a
 | 
			
		||||
:any:`torch.cuda.device` context manager.
 | 
			
		||||
 | 
			
		||||
However, once a tensor is allocated, you can do operations on it irrespectively
 | 
			
		||||
of your selected device, and the results will be always placed in on the same
 | 
			
		||||
However, once a tensor is allocated, you can do operations on it irrespective
 | 
			
		||||
of the selected device, and the results will be always placed in on the same
 | 
			
		||||
device as the tensor.
 | 
			
		||||
 | 
			
		||||
Cross-GPU operations are not allowed by default, with the only exception of
 | 
			
		||||
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory accesses,
 | 
			
		||||
any attempts to launch ops on tensors spread across different devices will
 | 
			
		||||
raise an error.
 | 
			
		||||
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory access, any
 | 
			
		||||
attempts to launch ops on tensors spread across different devices will raise an
 | 
			
		||||
error.
 | 
			
		||||
 | 
			
		||||
Below you can find a small example showcasing this::
 | 
			
		||||
 | 
			
		||||
@ -41,6 +42,66 @@ Below you can find a small example showcasing this::
 | 
			
		||||
        d = torch.randn(2).cuda(2)
 | 
			
		||||
        # d.get_device() == 2
 | 
			
		||||
 | 
			
		||||
Asynchronous execution
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
By default, GPU operations are asynchronous.  When you call a function that
 | 
			
		||||
uses the GPU, the operations are *enqueued* to the particular device, but not
 | 
			
		||||
necessarily executed until later.  This allows us to execute more computations
 | 
			
		||||
in parallel, including operations on CPU or other GPUs.
 | 
			
		||||
 | 
			
		||||
In general, the effect of asynchronous computation is invisible to the caller,
 | 
			
		||||
because (1) each device executes operations in the order they are queued, and
 | 
			
		||||
(2) PyTorch automatically performs necessary synchronization when copying data
 | 
			
		||||
between CPU and GPU or between two GPUs.  Hence, computation will proceed as if
 | 
			
		||||
every operation was executed synchronously.
 | 
			
		||||
 | 
			
		||||
You can force synchronous computation by setting environment variable
 | 
			
		||||
`CUDA_LAUNCH_BLOCKING=1`.  This can be handy when an error occurs on the GPU.
 | 
			
		||||
(With asynchronous execution, such an error isn't reported until after the
 | 
			
		||||
operation is actually executed, so the stack trace does not show where it was
 | 
			
		||||
requested.)
 | 
			
		||||
 | 
			
		||||
As an exception, several functions such as :meth:`~torch.Tensor.copy_` admit
 | 
			
		||||
an explicit :attr:`async` argument, which lets the caller bypass synchronization
 | 
			
		||||
when it is unnecessary.  Another exception is CUDA streams, explained below.
 | 
			
		||||
 | 
			
		||||
CUDA streams
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
A `CUDA stream`_ is a linear sequence of execution that belongs to a specific
 | 
			
		||||
device.  You normally do not need to create one explicitly: by default, each
 | 
			
		||||
device uses its own "default" stream.
 | 
			
		||||
 | 
			
		||||
Operations inside each stream are serialized in the order they are created,
 | 
			
		||||
but operations from different streams can execute concurrently in any
 | 
			
		||||
relative order, unless explicit synchronization functions (such as
 | 
			
		||||
:meth:`~torch.cuda.synchronize` or :meth:`~torch.cuda.Stream.wait_stream`) are
 | 
			
		||||
used.  For example, the following code is incorrect::
 | 
			
		||||
 | 
			
		||||
    s = torch.cuda.stream()  # Create a new stream.
 | 
			
		||||
    A = torch.cuda.FloatTensor(100, 100).normal_(0.0, 1.0)
 | 
			
		||||
    with torch.cuda.stream(s):
 | 
			
		||||
        # sum() may start execution before normal_() finishes!
 | 
			
		||||
        B = torch.sum(A)
 | 
			
		||||
 | 
			
		||||
When the "current stream" is the default stream, PyTorch automatically performs
 | 
			
		||||
necessary synchronization when data is moved around, as explained above.
 | 
			
		||||
However, when using non-default streams, it is the user's responsibility to
 | 
			
		||||
ensure proper synchronization.
 | 
			
		||||
 | 
			
		||||
.. _CUDA stream: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams
 | 
			
		||||
 | 
			
		||||
Memory management
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
PyTorch use a caching memory allocator to speed up memory allocations. This
 | 
			
		||||
allows fast memory deallocation without device synchronizations. However, the
 | 
			
		||||
unused memory managed by the allocator will still show as if used in
 | 
			
		||||
`nvidia-smi`. Calling :meth:`~torch.cuda.empty_cache` can release all unused
 | 
			
		||||
cached memory from PyTorch so that those can be used by other GPU applications.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Best practices
 | 
			
		||||
--------------
 | 
			
		||||
 | 
			
		||||
@ -49,13 +110,13 @@ Device-agnostic code
 | 
			
		||||
 | 
			
		||||
Due to the structure of PyTorch, you may need to explicitly write
 | 
			
		||||
device-agnostic (CPU or GPU) code; an example may be creating a new tensor as
 | 
			
		||||
the initial hidden state of a recurrent neural network. 
 | 
			
		||||
the initial hidden state of a recurrent neural network.
 | 
			
		||||
 | 
			
		||||
The first step is to determine whether the GPU should be used or not. A common
 | 
			
		||||
pattern is to use Python's `argparse` module to read in user arguments, and
 | 
			
		||||
pattern is to use Python's ``argparse`` module to read in user arguments, and
 | 
			
		||||
have a flag that can be used to disable CUDA, in combination with
 | 
			
		||||
`torch.cuda.is_available()`. In the following, `args.cuda` results in a flag
 | 
			
		||||
that can be used to cast tensors and modules to CUDA if desired::
 | 
			
		||||
:meth:`~torch.cuda.is_available`. In the following, ``args.cuda`` results in a
 | 
			
		||||
flag that can be used to cast tensors and modules to CUDA if desired::
 | 
			
		||||
 | 
			
		||||
    import argparse
 | 
			
		||||
    import torch
 | 
			
		||||
@ -66,7 +127,7 @@ that can be used to cast tensors and modules to CUDA if desired::
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
 | 
			
		||||
 | 
			
		||||
If modules or tensors need to be sent to the GPU, `args.cuda` can be used as
 | 
			
		||||
If modules or tensors need to be sent to the GPU, ``args.cuda`` can be used as
 | 
			
		||||
follows::
 | 
			
		||||
 | 
			
		||||
    x = torch.Tensor(8, 42)
 | 
			
		||||
@ -84,9 +145,9 @@ dataloader would be as follows::
 | 
			
		||||
        x = Variable(x.type(dtype))
 | 
			
		||||
 | 
			
		||||
When working with multiple GPUs on a system, you can use the
 | 
			
		||||
`CUDA_VISIBLE_DEVICES` environment flag to manage which GPUs are available to
 | 
			
		||||
PyTorch. To manually control which GPU a tensor is created on, the best practice
 | 
			
		||||
is to use the `torch.cuda.device()` context manager::
 | 
			
		||||
``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to
 | 
			
		||||
PyTorch. As mentioned above, to manually control which GPU a tensor is created
 | 
			
		||||
on, the best practice is to use a :any:`torch.cuda.device` context manager::
 | 
			
		||||
 | 
			
		||||
    print("Outside device is 0")  # On device 0 (default in most scenarios)
 | 
			
		||||
    with torch.cuda.device(1):
 | 
			
		||||
@ -94,9 +155,10 @@ is to use the `torch.cuda.device()` context manager::
 | 
			
		||||
    print("Outside device is still 0")  # On device 0
 | 
			
		||||
 | 
			
		||||
If you have a tensor and would like to create a new tensor of the same type on
 | 
			
		||||
the same device, then you can use the `.new()` function, which acts the same as
 | 
			
		||||
a normal tensor constructor. Whilst the previously mentioned methods depend on
 | 
			
		||||
the current GPU context, `new()` preserves the device of the original tensor.
 | 
			
		||||
the same device, then you can use the :meth:`~torch.Tensor.new` method, which
 | 
			
		||||
acts the same as a normal tensor constructor. Whilst the previously mentioned
 | 
			
		||||
methods depend on the current GPU context, :meth:`~torch.Tensor.new` preserves
 | 
			
		||||
the device of the original tensor.
 | 
			
		||||
 | 
			
		||||
This is the recommended practice when creating modules in which new
 | 
			
		||||
tensors/variables need to be created internally during the forward pass::
 | 
			
		||||
@ -110,8 +172,9 @@ tensors/variables need to be created internally during the forward pass::
 | 
			
		||||
    y_cpu_long = x_cpu_long.new([[1, 2, 3]])
 | 
			
		||||
 | 
			
		||||
If you want to create a tensor of the same type and size of another tensor, and
 | 
			
		||||
fill it with either ones or zeros, `torch.ones_like()` or `torch.zeros_like()`
 | 
			
		||||
are provided as more convenient functions (which also preserve device)::
 | 
			
		||||
fill it with either ones or zeros, :meth:`~torch.ones_like` or
 | 
			
		||||
:meth:`~torch.zeros_like` are provided as convenient helper functions (which
 | 
			
		||||
also preserve device)::
 | 
			
		||||
 | 
			
		||||
    x_cpu = torch.FloatTensor(1)
 | 
			
		||||
    x_gpu = torch.cuda.FloatTensor(1)
 | 
			
		||||
@ -145,9 +208,9 @@ pinned memory by passing ``pin_memory=True`` to its constructor.
 | 
			
		||||
Use nn.DataParallel instead of multiprocessing
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Most use cases involving batched input and multiple GPUs should default to using
 | 
			
		||||
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
 | 
			
		||||
a single python process can saturate multiple GPUs.
 | 
			
		||||
Most use cases involving batched inputs and multiple GPUs should default to
 | 
			
		||||
using :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with
 | 
			
		||||
the GIL, a single Python process can saturate multiple GPUs.
 | 
			
		||||
 | 
			
		||||
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
 | 
			
		||||
However, this is a known issue that is under active development. As always,
 | 
			
		||||
 | 
			
		||||
@ -53,7 +53,7 @@ exporter to print out a human-readable representation of the network::
 | 
			
		||||
You can also verify the protobuf using the `onnx <https://github.com/onnx/onnx/>`_ library.
 | 
			
		||||
You can install ``onnx`` with conda::
 | 
			
		||||
 | 
			
		||||
    conda install -c ezyang onnx
 | 
			
		||||
    conda install -c conda-forge onnx
 | 
			
		||||
 | 
			
		||||
Then, you can run::
 | 
			
		||||
 | 
			
		||||
@ -75,10 +75,8 @@ To run the exported script with `caffe2 <https://caffe2.ai/>`_, you will need th
 | 
			
		||||
 | 
			
		||||
2. You'll need `onnx-caffe2 <https://github.com/onnx/onnx-caffe2>`_, a
 | 
			
		||||
   pure-Python library which provides a Caffe2 backend for ONNX.  You can install ``onnx-caffe2``
 | 
			
		||||
   with conda or pip::
 | 
			
		||||
   with pip::
 | 
			
		||||
 | 
			
		||||
      conda install -c ezyang onnx-caffe2
 | 
			
		||||
      # OR
 | 
			
		||||
      pip install onnx-caffe2
 | 
			
		||||
 | 
			
		||||
Once these are installed, you can use the backend for Caffe2::
 | 
			
		||||
@ -122,34 +120,48 @@ Limitations
 | 
			
		||||
Supported operators
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
In this tech preview, only the following operators are supported:
 | 
			
		||||
The following operators are supported:
 | 
			
		||||
 | 
			
		||||
* Add (inplace is discarded)
 | 
			
		||||
* Sub (inplace is discarded)
 | 
			
		||||
* Mul (inplace is discarded)
 | 
			
		||||
* Negate (inplace is discarded)
 | 
			
		||||
* Addmm (inplace is discarded, alpha and beta must be 1)
 | 
			
		||||
* Tanh (inplace is discarded)
 | 
			
		||||
* Sigmoid (inplace is discarded)
 | 
			
		||||
* Transpose
 | 
			
		||||
* View
 | 
			
		||||
* Permute
 | 
			
		||||
* Concat
 | 
			
		||||
* Squeeze (inplace is discarded)
 | 
			
		||||
* add (nonzero alpha not supported)
 | 
			
		||||
* sub (nonzero alpha not supported)
 | 
			
		||||
* mul
 | 
			
		||||
* div
 | 
			
		||||
* cat
 | 
			
		||||
* mm
 | 
			
		||||
* addmm
 | 
			
		||||
* neg
 | 
			
		||||
* tanh
 | 
			
		||||
* sigmoid
 | 
			
		||||
* mean
 | 
			
		||||
* t
 | 
			
		||||
* expand (only when used before a broadcasting ONNX operator; e.g., add)
 | 
			
		||||
* transpose
 | 
			
		||||
* view
 | 
			
		||||
* split
 | 
			
		||||
* squeeze
 | 
			
		||||
* prelu (single weight shared among input channels not supported)
 | 
			
		||||
* threshold (non-zero threshold/non-zero value not supported)
 | 
			
		||||
* leaky_relu
 | 
			
		||||
* glu
 | 
			
		||||
* softmax
 | 
			
		||||
* avg_pool2d (ceil_mode not supported)
 | 
			
		||||
* log_softmax
 | 
			
		||||
* unfold (experimental support with ATen-Caffe2 integration)
 | 
			
		||||
* elu
 | 
			
		||||
* Conv
 | 
			
		||||
* BatchNorm
 | 
			
		||||
* Convolution
 | 
			
		||||
* Embedding (only optional argument that is supported is ``padding_idx``)
 | 
			
		||||
* Slice (only integer indexing is supported)
 | 
			
		||||
* Dropout (inplace is discarded)
 | 
			
		||||
* Relu (inplace is discarded)
 | 
			
		||||
* PReLU (inplace is discarded, sharing a single weight among all channels is not supported)
 | 
			
		||||
* LeakyRelu (inplace is discarded)
 | 
			
		||||
* MaxPool1d (ceil_mode must be False)
 | 
			
		||||
* MaxPool2d (ceil_mode must be False)
 | 
			
		||||
* AvgPool2d (ceil_mode must be False)
 | 
			
		||||
* MaxPool1d (ceil_mode not supported)
 | 
			
		||||
* MaxPool2d (ceil_mode not supported)
 | 
			
		||||
* MaxPool3d (ceil_mode not supported)
 | 
			
		||||
* Embedding (no optional arguments supported)
 | 
			
		||||
* RNN
 | 
			
		||||
* ConstantPadNd
 | 
			
		||||
* Dropout
 | 
			
		||||
* FeatureDropout (training mode not supported)
 | 
			
		||||
* Index (constant integer and tuple indices supported)
 | 
			
		||||
* Negate
 | 
			
		||||
 | 
			
		||||
We plan on expanding support to more operators; RNNs are high on our priority
 | 
			
		||||
list.  The operator set above is sufficient to export the following models:
 | 
			
		||||
The operator set above is sufficient to export the following models:
 | 
			
		||||
 | 
			
		||||
* AlexNet
 | 
			
		||||
* DCGAN
 | 
			
		||||
 | 
			
		||||
@ -18,11 +18,11 @@ you can specify optimizer-specific options such as the learning rate, weight dec
 | 
			
		||||
 | 
			
		||||
.. note::
 | 
			
		||||
 | 
			
		||||
    If you need to move a model to GPU via `.cuda()`, please do so before 
 | 
			
		||||
    If you need to move a model to GPU via `.cuda()`, please do so before
 | 
			
		||||
    constructing optimizers for it. Parameters of a model after `.cuda()` will
 | 
			
		||||
    be different objects with those before the call. 
 | 
			
		||||
    be different objects with those before the call.
 | 
			
		||||
 | 
			
		||||
    In general, you should make sure that optimized parameters live in  
 | 
			
		||||
    In general, you should make sure that optimized parameters live in
 | 
			
		||||
    consistent locations when optimizers are constructed and used.
 | 
			
		||||
 | 
			
		||||
Example::
 | 
			
		||||
@ -111,6 +111,8 @@ Algorithms
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: Adam
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: SparseAdam
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: Adamax
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: ASGD
 | 
			
		||||
@ -128,7 +130,7 @@ How to adjust Learning Rate
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
:mod:`torch.optim.lr_scheduler` provides several methods to adjust the learning
 | 
			
		||||
rate based on the number of epoches. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
 | 
			
		||||
rate based on the number of epochs. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
 | 
			
		||||
allows dynamic learning rate reducing based on some validation measurements.
 | 
			
		||||
 | 
			
		||||
.. autoclass:: torch.optim.lr_scheduler.LambdaLR
 | 
			
		||||
@ -139,5 +141,7 @@ allows dynamic learning rate reducing based on some validation measurements.
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: torch.optim.lr_scheduler.ExponentialLR
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: torch.optim.lr_scheduler.CosineAnnealingLR
 | 
			
		||||
    :members:
 | 
			
		||||
.. autoclass:: torch.optim.lr_scheduler.ReduceLROnPlateau
 | 
			
		||||
    :members:
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,7 @@
 | 
			
		||||
.. currentmodule:: torch
 | 
			
		||||
 | 
			
		||||
.. _tensor-doc:
 | 
			
		||||
 | 
			
		||||
torch.Tensor
 | 
			
		||||
===================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										16
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								setup.py
									
									
									
									
									
								
							@ -542,7 +542,7 @@ if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
 | 
			
		||||
    STDCPP_LIB = STDCPP_LIB[:-1]
 | 
			
		||||
    if type(STDCPP_LIB) != str:  # python 3
 | 
			
		||||
        STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding)
 | 
			
		||||
    main_link_args += [STDCPP_LIB]
 | 
			
		||||
    extra_link_args += [STDCPP_LIB]
 | 
			
		||||
    version_script = os.path.abspath("tools/pytorch.version")
 | 
			
		||||
    extra_link_args += ['-Wl,--version-script=' + version_script]
 | 
			
		||||
 | 
			
		||||
@ -593,9 +593,11 @@ extensions.append(THNN)
 | 
			
		||||
if WITH_CUDA:
 | 
			
		||||
    thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')]
 | 
			
		||||
    if platform.system() == 'Linux':
 | 
			
		||||
        thnvrtc_link_flags = ['-Wl,--no-as-needed'] + thnvrtc_link_flags
 | 
			
		||||
        thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed']
 | 
			
		||||
    # these have to be specified as -lcuda in link_flags because they
 | 
			
		||||
    # have to come right after the `no-as-needed` option
 | 
			
		||||
    thnvrtc_link_flags += ['-lcuda', '-lnvrtc']
 | 
			
		||||
    THNVRTC = Extension("torch._nvrtc",
 | 
			
		||||
                        libraries=['nvrtc', 'cuda'],
 | 
			
		||||
                        sources=['torch/csrc/nvrtc.cpp'],
 | 
			
		||||
                        language='c++',
 | 
			
		||||
                        include_dirs=include_dirs,
 | 
			
		||||
@ -618,11 +620,13 @@ if WITH_CUDA:
 | 
			
		||||
                       )
 | 
			
		||||
    extensions.append(THCUNN)
 | 
			
		||||
 | 
			
		||||
version = '0.2.0'
 | 
			
		||||
version = '0.3.1b0'
 | 
			
		||||
if os.getenv('PYTORCH_BUILD_VERSION'):
 | 
			
		||||
    assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
 | 
			
		||||
    version = os.getenv('PYTORCH_BUILD_VERSION') \
 | 
			
		||||
        + '_' + os.getenv('PYTORCH_BUILD_NUMBER')
 | 
			
		||||
    build_number = int(os.getenv('PYTORCH_BUILD_NUMBER'))
 | 
			
		||||
    version = os.getenv('PYTORCH_BUILD_VERSION')
 | 
			
		||||
    if build_number > 1:
 | 
			
		||||
        version += '.post' + str(build_number)
 | 
			
		||||
else:
 | 
			
		||||
    try:
 | 
			
		||||
        sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,7 @@ UNITTEST_ARGS = [sys.argv[0]] + remaining
 | 
			
		||||
def run_tests():
 | 
			
		||||
    unittest.main(argv=UNITTEST_ARGS)
 | 
			
		||||
 | 
			
		||||
IS_WINDOWS = sys.platform == "win32"
 | 
			
		||||
 | 
			
		||||
TEST_NUMPY = True
 | 
			
		||||
try:
 | 
			
		||||
@ -170,6 +171,9 @@ class TestCase(unittest.TestCase):
 | 
			
		||||
        return x, y
 | 
			
		||||
 | 
			
		||||
    def assertEqual(self, x, y, prec=None, message=''):
 | 
			
		||||
        if isinstance(prec, str) and message == '':
 | 
			
		||||
            message = prec
 | 
			
		||||
            prec = None
 | 
			
		||||
        if prec is None:
 | 
			
		||||
            prec = self.precision
 | 
			
		||||
 | 
			
		||||
@ -329,6 +333,8 @@ class TestCase(unittest.TestCase):
 | 
			
		||||
                self.assertEqual(s, expected)
 | 
			
		||||
 | 
			
		||||
    if sys.version_info < (3, 2):
 | 
			
		||||
        # assertRegexpMatches renamed assertRegex in 3.2
 | 
			
		||||
        assertRegex = unittest.TestCase.assertRegexpMatches
 | 
			
		||||
        # assertRaisesRegexp renamed assertRaisesRegex in 3.2
 | 
			
		||||
        assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -246,10 +246,24 @@ module_tests = [
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def nllloss2d_reference(input, target, weight=None, ignore_index=-100,
 | 
			
		||||
def kldivloss_reference(input, target, size_average=True, reduce=True):
 | 
			
		||||
    safe_target = target * (target > 0).type_as(target)
 | 
			
		||||
    safe_target_log = (safe_target + (target <= 0).type_as(target)).log()
 | 
			
		||||
    result = safe_target * (safe_target_log - input)
 | 
			
		||||
    if reduce and size_average:
 | 
			
		||||
        return result.mean()
 | 
			
		||||
    elif reduce:
 | 
			
		||||
        return result.sum()
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def nlllossNd_reference(input, target, weight=None, ignore_index=-100,
 | 
			
		||||
                        size_average=True, reduce=True):
 | 
			
		||||
    N, C, H, W = input.size()
 | 
			
		||||
    output = torch.zeros(N, H, W).type_as(input)
 | 
			
		||||
    assert input.dim() >= 3
 | 
			
		||||
    N = input.size(0)
 | 
			
		||||
    C = input.size(1)
 | 
			
		||||
    out_size = (N,) + input.size()[2:]
 | 
			
		||||
    output = torch.zeros(out_size).type_as(input)
 | 
			
		||||
    if isinstance(target, Variable):
 | 
			
		||||
        target = target.data
 | 
			
		||||
 | 
			
		||||
@ -257,13 +271,13 @@ def nllloss2d_reference(input, target, weight=None, ignore_index=-100,
 | 
			
		||||
        weight = torch.ones(C).type_as(input)
 | 
			
		||||
 | 
			
		||||
    total_weight_data = 0
 | 
			
		||||
    for n in range(0, N):
 | 
			
		||||
        for h in range(0, H):
 | 
			
		||||
            for w in range(0, W):
 | 
			
		||||
                t_nhw = target[n][h][w]
 | 
			
		||||
                norm = 0. if ignore_index == t_nhw else weight[t_nhw]
 | 
			
		||||
                output[n][h][w] = -input[n][t_nhw][h][w] * norm
 | 
			
		||||
                total_weight_data += norm
 | 
			
		||||
    for tup in product(*[range(size) for size in out_size]):
 | 
			
		||||
        t_nx = target[tup]
 | 
			
		||||
        norm = 0. if ignore_index == t_nx else weight[t_nx]
 | 
			
		||||
        input_index = list(tup)
 | 
			
		||||
        input_index.insert(1, t_nx)
 | 
			
		||||
        output[tup] = -input[tuple(input_index)] * norm
 | 
			
		||||
        total_weight_data += norm
 | 
			
		||||
 | 
			
		||||
    if reduce and size_average:
 | 
			
		||||
        return output.sum() / total_weight_data
 | 
			
		||||
@ -309,8 +323,9 @@ def smoothl1loss_reference(input, target, size_average=True, reduce=True):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
loss_reference_fns = {
 | 
			
		||||
    'KLDivLoss': kldivloss_reference,
 | 
			
		||||
    'NLLLoss': nllloss_reference,
 | 
			
		||||
    'NLLLoss2d': nllloss2d_reference,
 | 
			
		||||
    'NLLLossNd': nlllossNd_reference,
 | 
			
		||||
    'SmoothL1Loss': smoothl1loss_reference,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -370,6 +385,8 @@ criterion_tests = [
 | 
			
		||||
        module_name='KLDivLoss',
 | 
			
		||||
        input_fn=lambda: torch.rand(10, 10).log(),
 | 
			
		||||
        target_fn=lambda: torch.rand(10, 10),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            kldivloss_reference(i, t, get_size_average(m), reduce=True),
 | 
			
		||||
        check_no_size_average=True,
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
@ -410,7 +427,7 @@ criterion_tests = [
 | 
			
		||||
        input_size=(2, 3, 5, 5),
 | 
			
		||||
        target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            nllloss2d_reference(i, t, size_average=get_size_average(m)),
 | 
			
		||||
            nlllossNd_reference(i, t, size_average=get_size_average(m)),
 | 
			
		||||
        check_no_size_average=True,
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
@ -419,7 +436,7 @@ criterion_tests = [
 | 
			
		||||
        input_size=(2, 3, 5, 5),
 | 
			
		||||
        target=torch.rand(2, 5, 5).mul(3).floor().long(),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            nllloss2d_reference(i, t, weight=get_weight(m)),
 | 
			
		||||
            nlllossNd_reference(i, t, weight=get_weight(m)),
 | 
			
		||||
        desc='weights',
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
@ -428,7 +445,7 @@ criterion_tests = [
 | 
			
		||||
        input_size=(2, 3, 5, 5),
 | 
			
		||||
        target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            nllloss2d_reference(i, t, ignore_index=1),
 | 
			
		||||
            nlllossNd_reference(i, t, ignore_index=1),
 | 
			
		||||
        desc='ignore_index',
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,6 @@ graph(%1 : Double(2, 2)
 | 
			
		||||
      %3 : Double(2)
 | 
			
		||||
      %4 : Double(2)
 | 
			
		||||
      %5 : Double(2)) {
 | 
			
		||||
  %7 : Double(2, 2), %8 : Handle = CppOp[N5torch8autograd16BatchNormForwardE](%1, %2, %3), uses = [[%0.i0], []];
 | 
			
		||||
  %7 : Double(2, 2), %8 : Handle = CppOp[N5torch8autograd16BatchNormForwardE](%1, %2, %3), uses = [[%0.i0], []], scope: BatchNorm2d;
 | 
			
		||||
  return (%7);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,6 @@
 | 
			
		||||
graph(%1 : Double(20, 16, 50, 40)
 | 
			
		||||
      %2 : Double(13, 16, 3, 3)) {
 | 
			
		||||
  %4 : UNKNOWN_TYPE = Undefined(), uses = [%3.i2];
 | 
			
		||||
  %5 : Double(20, 13, 48, 38), %6 : Handle = CppOp[ConvForward](%1, %2, %4), uses = [[%0.i0], []];
 | 
			
		||||
  %4 : UNKNOWN_TYPE = Undefined(), uses = [%3.i2], scope: Conv2d;
 | 
			
		||||
  %5 : Double(20, 13, 48, 38), %6 : Handle = CppOp[ConvForward](%1, %2, %4), uses = [[%0.i0], []], scope: Conv2d;
 | 
			
		||||
  return (%5);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
graph(%1 : Double(2, 2)) {
 | 
			
		||||
  %3 : Double(2, 2), %4 : Handle = ^Dropout(0.6, True, False)(%1), uses = [[%0.i0], []];
 | 
			
		||||
  %3 : Double(2, 2), %4 : Handle = ^Dropout(0.6, True, False)(%1), uses = [[%0.i0], []], scope: Dropout;
 | 
			
		||||
  return (%3);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										8
									
								
								test/expect/TestJit.test_scopes.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								test/expect/TestJit.test_scopes.expect
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,8 @@
 | 
			
		||||
graph(%1 : Double(1)
 | 
			
		||||
      %2 : Double(1)) {
 | 
			
		||||
  %3 : Double(1) = add[alpha={1}](%1, %2), uses = [%4.i1];
 | 
			
		||||
  %4 : Double(1) = mul(%1, %3), uses = [%5.i0], scope: Foo;
 | 
			
		||||
  %5 : Double(1) = tanh(%4), uses = [%6.i0], scope: Foo/Bar;
 | 
			
		||||
  %6 : Double(1) = sigmoid(%5), uses = [%0.i0], scope: Foo;
 | 
			
		||||
  return (%6);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										9
									
								
								test/expect/TestJit.test_scopes_identity_node.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								test/expect/TestJit.test_scopes_identity_node.expect
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,9 @@
 | 
			
		||||
graph(%1 : Double(1, 3, 227, 227)
 | 
			
		||||
      %2 : Double(64, 3, 11, 11)
 | 
			
		||||
      %3 : Double(64)) {
 | 
			
		||||
  %5 : UNKNOWN_TYPE = Conv[kernel_shape=[11, 11], strides=[4, 4], pads=[2, 2, 2, 2], dilations=[1, 1], group=1](%1, %2), uses = [[%6.i0]], scope: Net/Sequential[features]/Conv2d[0];
 | 
			
		||||
  %6 : Double(1, 64, 56, 56) = Add[broadcast=1, axis=1](%5, %3), uses = [%7.i0], scope: Net/Sequential[features]/Conv2d[0];
 | 
			
		||||
  %7 : Double(1, 64, 56, 56) = Relu(%6), uses = [%8.i0], scope: Net/Sequential[features]/ReLU[1];
 | 
			
		||||
  %8 : Double(1, 64, 27, 27) = MaxPool[kernel_shape=[3, 3], pads=[0, 0], strides=[2, 2]](%7), uses = [%0.i0], scope: Net/Sequential[features]/MaxPool2d[2];
 | 
			
		||||
  return (%8);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										5
									
								
								test/expect/TestJit.test_scopes_intermediate_node.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								test/expect/TestJit.test_scopes_intermediate_node.expect
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,5 @@
 | 
			
		||||
graph(%1 : Double(2)) {
 | 
			
		||||
  %2 : Double(2) = Softmax[axis=0](%1), uses = [%3.i0], scope: Net;
 | 
			
		||||
  %3 : Double(2) = Log(%2), uses = [%0.i0], scope: Net;
 | 
			
		||||
  return (%3);
 | 
			
		||||
}
 | 
			
		||||
@ -345,32 +345,6 @@ class TestAutograd(TestCase):
 | 
			
		||||
        self.assertEqual(counter[0], 1, 'bw_hook not called')
 | 
			
		||||
        self.assertEqual(x.grad.data, torch.ones(5, 5) * 2)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(sys.version_info[0] == 2, "Python 2 doesn't collect cycles involving __del__")
 | 
			
		||||
    def test_hooks_cycle(self):
 | 
			
		||||
        import gc
 | 
			
		||||
        counter = [0]
 | 
			
		||||
 | 
			
		||||
        class GradHook(object):
 | 
			
		||||
            def __init__(self, var):
 | 
			
		||||
                self.var = var
 | 
			
		||||
 | 
			
		||||
            def __del__(self):
 | 
			
		||||
                counter[0] += 1
 | 
			
		||||
 | 
			
		||||
            def __call__(self, *args):
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        def run_test():
 | 
			
		||||
            x = Variable(torch.ones(5, 5), requires_grad=True)
 | 
			
		||||
            y = x * 2
 | 
			
		||||
            x.register_hook(GradHook(x))
 | 
			
		||||
            y.register_hook(GradHook(y))
 | 
			
		||||
            y._backward_hooks[1] = GradHook(y)
 | 
			
		||||
 | 
			
		||||
        run_test()
 | 
			
		||||
        gc.collect()
 | 
			
		||||
        self.assertEqual(counter[0], 3)
 | 
			
		||||
 | 
			
		||||
    def test_hook_none(self):
 | 
			
		||||
        # WARNING: this is a test for autograd internals.
 | 
			
		||||
        # You should never have to use such things in your code.
 | 
			
		||||
@ -995,6 +969,16 @@ class TestAutograd(TestCase):
 | 
			
		||||
        self._test_setitem_tensor((5, 5), Variable(mask))
 | 
			
		||||
        self._test_setitem_tensor((5,), Variable(mask[0]))
 | 
			
		||||
 | 
			
		||||
    def test_select_sum(self):
 | 
			
		||||
        # both select and sum return Scalars in ATen; ensure they work together.
 | 
			
		||||
        x = Variable(torch.randn(10), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        def func(x):
 | 
			
		||||
            return x.select(0, 1).sum()
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [x])
 | 
			
		||||
        gradgradcheck(func, [x])
 | 
			
		||||
 | 
			
		||||
    def test_stack(self):
 | 
			
		||||
        x = Variable(torch.randn(10, 10), requires_grad=True)
 | 
			
		||||
        y = Variable(torch.randn(10, 10), requires_grad=True)
 | 
			
		||||
@ -1006,6 +990,43 @@ class TestAutograd(TestCase):
 | 
			
		||||
        self.assertEqual(y.grad.data, grad[1])
 | 
			
		||||
        self.assertEqual(z.grad.data, grad[2])
 | 
			
		||||
 | 
			
		||||
    def test_put(self):
 | 
			
		||||
        root = Variable(torch.randn(4, 5), requires_grad=True)
 | 
			
		||||
        values = Variable(torch.randn(6), requires_grad=True)
 | 
			
		||||
        idx = Variable(torch.LongTensor([1, 2, 3, -1, -2, -3]))
 | 
			
		||||
 | 
			
		||||
        def func(root, values):
 | 
			
		||||
            x = root.clone()
 | 
			
		||||
            x.put_(idx, values)
 | 
			
		||||
            return x
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [root, values])
 | 
			
		||||
        gradgradcheck(func, [root, values])
 | 
			
		||||
 | 
			
		||||
    def test_put_accumulate(self):
 | 
			
		||||
        root = Variable(torch.randn(4, 5), requires_grad=True)
 | 
			
		||||
        values = Variable(torch.randn(6), requires_grad=True)
 | 
			
		||||
        idx = Variable(torch.LongTensor([1, 2, 3, 1, 2, 3]))
 | 
			
		||||
 | 
			
		||||
        def func(root, values):
 | 
			
		||||
            x = root.clone()
 | 
			
		||||
            x.put_(idx, values, accumulate=True)
 | 
			
		||||
            return x
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [root, values])
 | 
			
		||||
        gradgradcheck(func, [root, values])
 | 
			
		||||
 | 
			
		||||
    def test_fill(self):
 | 
			
		||||
        root = Variable(torch.randn(4, 5), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        def func(root):
 | 
			
		||||
            x = root.clone()
 | 
			
		||||
            x.fill_(2)
 | 
			
		||||
            return x
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [root])
 | 
			
		||||
        gradgradcheck(func, [root])
 | 
			
		||||
 | 
			
		||||
    def test_unused_output(self):
 | 
			
		||||
        x = Variable(torch.randn(10, 10), requires_grad=True)
 | 
			
		||||
        outputs = x.chunk(5)
 | 
			
		||||
@ -1461,13 +1482,14 @@ class TestAutograd(TestCase):
 | 
			
		||||
    def test_norm_subgradient(self):
 | 
			
		||||
        def run_test(input_size, norm_deg):
 | 
			
		||||
            input = Variable(torch.zeros(*input_size), requires_grad=True)
 | 
			
		||||
            out = input.norm(norm_deg)
 | 
			
		||||
            out.backward()
 | 
			
		||||
            input.norm(norm_deg).backward()
 | 
			
		||||
            self.assertEqual(input.grad.data.abs().sum(), 0)
 | 
			
		||||
 | 
			
		||||
        run_test((10,), 2)
 | 
			
		||||
        run_test((10, 10), 2)
 | 
			
		||||
        run_test((10,), 3)
 | 
			
		||||
        run_test((10,), 1)
 | 
			
		||||
        run_test((10,), 1.5)
 | 
			
		||||
 | 
			
		||||
    def test_profiler(self):
 | 
			
		||||
        x = Variable(torch.randn(10, 10))
 | 
			
		||||
@ -1764,8 +1786,14 @@ method_tests = [
 | 
			
		||||
    ('addcdiv', (S, S), (0.5, (S, 1), (1, S)), 'scale_broadcast_rhs'),
 | 
			
		||||
    ('addcdiv', (1,), (0.5, (S, S, 1), (1, S)), 'scale_broadcast_all'),
 | 
			
		||||
    ('zero_', (S, S, S), ()),
 | 
			
		||||
    ('norm', (S, S, S), (2,)),
 | 
			
		||||
    ('norm', (S, S, S), (3,), '3'),
 | 
			
		||||
    ('norm', (S, S), (2,)),
 | 
			
		||||
    ('norm', (S, S), (0,), '0'),
 | 
			
		||||
    ('norm', (S, S), (0.5,), '0_5'),
 | 
			
		||||
    ('norm', (S, S), (1,), '1'),
 | 
			
		||||
    ('norm', (S, S), (3,), '3'),
 | 
			
		||||
    ('norm', (S, S), (-1,), 'neg_1'),
 | 
			
		||||
    ('norm', (S, S), (-0.5,), 'neg_0_5'),
 | 
			
		||||
    ('norm', (S, S), (-1.5,), 'neg_1_5'),
 | 
			
		||||
    ('norm', torch.rand(S, S, S) + 5e-2, (1.5,), '1_5'),
 | 
			
		||||
    ('norm', (S, S, S), (2, 1), '2_dim', [1]),
 | 
			
		||||
    ('norm', (S, S, S), (3, 1), '3_dim', [1]),
 | 
			
		||||
@ -1842,6 +1870,7 @@ method_tests = [
 | 
			
		||||
    ('squeeze', (S, 1, S, 1), ()),
 | 
			
		||||
    ('squeeze', (S, 1, S, 1), (1,), '1_dim', [0]),
 | 
			
		||||
    ('squeeze', (S, 1, S, 1), (2,), 'not_1_dim', [0]),
 | 
			
		||||
    ('squeeze', (1,), (0,), '1d_dim0', [0]),
 | 
			
		||||
    ('unsqueeze', (S, S, S), (0,), 'first', [0]),
 | 
			
		||||
    ('unsqueeze', (S, S, S), (1,), 'middle', [0]),
 | 
			
		||||
    ('unsqueeze', (S, S, S), (3,), 'last', [0]),
 | 
			
		||||
@ -1875,6 +1904,7 @@ method_tests = [
 | 
			
		||||
    ('topk', (S, M, S), (3, 1), 'dim'),
 | 
			
		||||
    ('topk', (S, M, S), (3, 1, True), 'dim_desc'),
 | 
			
		||||
    ('topk', (S, M, S), (3, 1, True, True), 'dim_desc_sort'),
 | 
			
		||||
    ('take', (S, S, S), (Variable(torch.LongTensor([[-3, 2], [20, 2]])),)),
 | 
			
		||||
    ('__getitem__', torch.randn(S, S, S), (dont_convert([1, 2]),)),
 | 
			
		||||
    ('__getitem__', torch.randn(S, S, S), (slice(0, 3),), 'slice'),
 | 
			
		||||
    ('__getitem__', torch.randn(S, S, S), (dont_convert([slice(0, 3), 1]),), 'slice_index'),
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import math
 | 
			
		||||
import tempfile
 | 
			
		||||
import re
 | 
			
		||||
import unittest
 | 
			
		||||
from itertools import repeat
 | 
			
		||||
 | 
			
		||||
@ -16,6 +17,11 @@ if not torch.cuda.is_available():
 | 
			
		||||
    TestCase = object  # noqa: F811
 | 
			
		||||
    HAS_CUDA = False
 | 
			
		||||
 | 
			
		||||
HAS_MAGMA = HAS_CUDA
 | 
			
		||||
if HAS_CUDA:
 | 
			
		||||
    torch.ones(1).cuda()  # has_magma shows up after cuda is initialized
 | 
			
		||||
    HAS_MAGMA = torch.cuda.has_magma
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_floating(t):
 | 
			
		||||
    return type(t) in [torch.FloatTensor, torch.DoubleTensor,
 | 
			
		||||
@ -91,6 +97,10 @@ def medium_2d(t):
 | 
			
		||||
    return make_tensor(t, M, M)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def medium_2d_expanded(t):
 | 
			
		||||
    return t(1).expand(M, M)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def medium_2d_scaled(t, scale=10):
 | 
			
		||||
    return make_tensor(t, M, M).mul(scale)
 | 
			
		||||
 | 
			
		||||
@ -137,6 +147,13 @@ def new_t(*sizes):
 | 
			
		||||
        return t(*sizes).copy_(torch.randn(*sizes))
 | 
			
		||||
    return tmp
 | 
			
		||||
 | 
			
		||||
# Content of each tuple:
 | 
			
		||||
# - function name
 | 
			
		||||
# - constructor for the tensor,    signature: fn(tensor_type) -> tensor
 | 
			
		||||
# - constructor for the arguments, signature: fn(tensor_type) -> list
 | 
			
		||||
# - postfix name for the test (must be unique for a given function) (default='')
 | 
			
		||||
# - tensor types to use (default=types)
 | 
			
		||||
# - disable inplace test, if set to True, no inplace test will be done (default=False)
 | 
			
		||||
tests = [
 | 
			
		||||
    ('add', small_3d, lambda t: [number(3.14, 3, t)]),
 | 
			
		||||
    ('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
 | 
			
		||||
@ -289,9 +306,11 @@ tests = [
 | 
			
		||||
    ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'),
 | 
			
		||||
    ('trace', medium_2d, lambda t: [],),
 | 
			
		||||
    ('tril', medium_2d, lambda t: [],),
 | 
			
		||||
    ('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
 | 
			
		||||
    ('tril', medium_2d, lambda t: [2], 'positive'),
 | 
			
		||||
    ('tril', medium_2d, lambda t: [-2], 'negative'),
 | 
			
		||||
    ('triu', medium_2d, lambda t: [],),
 | 
			
		||||
    ('triu', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
 | 
			
		||||
    ('triu', medium_2d, lambda t: [2], 'positive'),
 | 
			
		||||
    ('triu', medium_2d, lambda t: [-2], 'negative'),
 | 
			
		||||
    ('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
 | 
			
		||||
@ -378,18 +397,24 @@ def get_cycles_per_ms():
 | 
			
		||||
    return _cycles_per_ms
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
 | 
			
		||||
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5, force_gpu_half=False):
 | 
			
		||||
    def tmp(self):
 | 
			
		||||
        cpu_tensor = tensor_constructor(t)
 | 
			
		||||
        gpu_tensor = to_gpu(cpu_tensor)
 | 
			
		||||
        type_map = {}
 | 
			
		||||
        if force_gpu_half:
 | 
			
		||||
            type_map = {
 | 
			
		||||
                'torch.FloatTensor': 'torch.cuda.HalfTensor',
 | 
			
		||||
                'torch.DoubleTensor': 'torch.cuda.HalfTensor',
 | 
			
		||||
            }
 | 
			
		||||
        gpu_tensor = to_gpu(cpu_tensor, type_map)
 | 
			
		||||
        cpu_args = arg_constructor(t)
 | 
			
		||||
        gpu_args = [to_gpu(arg) for arg in cpu_args]
 | 
			
		||||
        gpu_args = [to_gpu(arg, type_map) for arg in cpu_args]
 | 
			
		||||
        cpu_result = getattr(cpu_tensor, fn)(*cpu_args)
 | 
			
		||||
        try:
 | 
			
		||||
            gpu_result = getattr(gpu_tensor, fn)(*gpu_args)
 | 
			
		||||
        except RuntimeError as e:
 | 
			
		||||
            reason = e.args[0]
 | 
			
		||||
            if 'unimplemented data type' in reason:
 | 
			
		||||
            if 'only supports floating-point types' in reason or 'unimplemented data type' in reason:
 | 
			
		||||
                raise unittest.SkipTest('unimplemented data type')
 | 
			
		||||
            raise
 | 
			
		||||
        except AttributeError as e:
 | 
			
		||||
@ -707,6 +732,38 @@ class TestCuda(TestCase):
 | 
			
		||||
        z = torch.cat([x, y], 0)
 | 
			
		||||
        self.assertEqual(z.get_device(), x.get_device())
 | 
			
		||||
 | 
			
		||||
    def test_cat(self):
 | 
			
		||||
        SIZE = 10
 | 
			
		||||
        for dim in range(-3, 3):
 | 
			
		||||
            pos_dim = dim if dim >= 0 else 3 + dim
 | 
			
		||||
            x = torch.rand(13, SIZE, SIZE).transpose(0, pos_dim).cuda()
 | 
			
		||||
            y = torch.rand(17, SIZE, SIZE).transpose(0, pos_dim).cuda()
 | 
			
		||||
            z = torch.rand(19, SIZE, SIZE).transpose(0, pos_dim).cuda()
 | 
			
		||||
 | 
			
		||||
            res1 = torch.cat((x, y, z), dim)
 | 
			
		||||
            self.assertEqual(res1.narrow(pos_dim, 0, 13), x, 0)
 | 
			
		||||
            self.assertEqual(res1.narrow(pos_dim, 13, 17), y, 0)
 | 
			
		||||
            self.assertEqual(res1.narrow(pos_dim, 30, 19), z, 0)
 | 
			
		||||
 | 
			
		||||
        x = torch.randn(20, SIZE, SIZE).cuda()
 | 
			
		||||
        self.assertEqual(torch.cat(torch.split(x, 7)), x)
 | 
			
		||||
        self.assertEqual(torch.cat(torch.chunk(x, 7)), x)
 | 
			
		||||
 | 
			
		||||
        y = torch.randn(1, SIZE, SIZE).cuda()
 | 
			
		||||
        z = torch.cat([x, y])
 | 
			
		||||
        self.assertEqual(z.size(), (21, SIZE, SIZE))
 | 
			
		||||
 | 
			
		||||
    def test_cat_bad_input_sizes(self):
 | 
			
		||||
        x = torch.randn(2, 1).cuda()
 | 
			
		||||
        y = torch.randn(2, 1, 1).cuda()
 | 
			
		||||
        z = torch.randn(2, 1, 1).cuda()
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z]))
 | 
			
		||||
 | 
			
		||||
        x = torch.randn(2, 1, 2).cuda()
 | 
			
		||||
        y = torch.randn(2, 1, 1).cuda()
 | 
			
		||||
        z = torch.randn(2, 2, 1).cuda()
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z], dim=1))
 | 
			
		||||
 | 
			
		||||
    def test_serialization(self):
 | 
			
		||||
        x = torch.randn(4, 4).cuda()
 | 
			
		||||
        with tempfile.NamedTemporaryFile() as f:
 | 
			
		||||
@ -968,6 +1025,69 @@ class TestCuda(TestCase):
 | 
			
		||||
    def test_tensor_scatterFill(self):
 | 
			
		||||
        TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False)
 | 
			
		||||
 | 
			
		||||
    def test_var(self):
 | 
			
		||||
        cpu_tensor = torch.randn(2, 3, 3)
 | 
			
		||||
        gpu_tensor = cpu_tensor.cuda()
 | 
			
		||||
        self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
 | 
			
		||||
        self.assertEqual(gpu_tensor.var(1), cpu_tensor.var(1))
 | 
			
		||||
        self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
 | 
			
		||||
        self.assertEqual(gpu_tensor.std(), cpu_tensor.std())
 | 
			
		||||
        self.assertEqual(gpu_tensor.std(1), cpu_tensor.std(1))
 | 
			
		||||
        self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
 | 
			
		||||
 | 
			
		||||
        cpu_tensor = torch.randn(100)
 | 
			
		||||
        gpu_tensor = cpu_tensor.cuda()
 | 
			
		||||
        self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
 | 
			
		||||
 | 
			
		||||
    def test_var_unbiased(self):
 | 
			
		||||
        tensor = torch.randn(100).cuda()
 | 
			
		||||
        self.assertEqual(tensor.var(0), tensor.var(0, unbiased=True))
 | 
			
		||||
        self.assertEqual(tensor.var(), tensor.var(unbiased=True))
 | 
			
		||||
        self.assertEqual(tensor.var(unbiased=False), tensor.var(0, unbiased=False)[0])
 | 
			
		||||
 | 
			
		||||
        tensor = torch.FloatTensor([1.0, 2.0]).cuda()
 | 
			
		||||
        self.assertEqual(tensor.var(unbiased=True), 0.5)
 | 
			
		||||
        self.assertEqual(tensor.var(unbiased=False), 0.25)
 | 
			
		||||
 | 
			
		||||
        tensor = torch.randn(100).cuda()
 | 
			
		||||
        self.assertEqual(tensor.std(0), tensor.std(0, unbiased=True))
 | 
			
		||||
        self.assertEqual(tensor.std(), tensor.std(unbiased=True))
 | 
			
		||||
        self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0])
 | 
			
		||||
 | 
			
		||||
    def test_var_large_input(self):
 | 
			
		||||
        # Large, not-nice input
 | 
			
		||||
        tensor_cpu = torch.randn(2 * 32 * 1024 + 1, 2, 67)
 | 
			
		||||
        tensor_cuda = tensor_cpu.cuda()
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(tensor_cpu.var(2), tensor_cuda.var(2).cpu())
 | 
			
		||||
 | 
			
		||||
    def test_var_stability(self):
 | 
			
		||||
        tensor = torch.FloatTensor([2281.5, 2281.25]).cuda()
 | 
			
		||||
 | 
			
		||||
        # Stability for inner dim
 | 
			
		||||
        self.assertEqual(tensor.var(0)[0], 0.03125)
 | 
			
		||||
 | 
			
		||||
        # General stability
 | 
			
		||||
        self.assertEqual(tensor.var(), 0.03125)
 | 
			
		||||
 | 
			
		||||
        # Stability for outer dimensions
 | 
			
		||||
        tensor = tensor.unsqueeze(1)
 | 
			
		||||
        self.assertEqual(tensor.var(0)[0], 0.03125)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not HAS_MAGMA, "no MAGMA library detected")
 | 
			
		||||
    def test_symeig(self):
 | 
			
		||||
        # Small case
 | 
			
		||||
        tensor = torch.randn(3, 3).cuda()
 | 
			
		||||
        tensor = torch.mm(tensor, tensor.t())
 | 
			
		||||
        eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
 | 
			
		||||
        self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
 | 
			
		||||
 | 
			
		||||
        # Large case
 | 
			
		||||
        tensor = torch.randn(257, 257).cuda()
 | 
			
		||||
        tensor = torch.mm(tensor, tensor.t())
 | 
			
		||||
        eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
 | 
			
		||||
        self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
 | 
			
		||||
 | 
			
		||||
    def test_arange(self):
 | 
			
		||||
        for t in ['IntTensor', 'LongTensor', 'FloatTensor', 'DoubleTensor']:
 | 
			
		||||
            a = torch.cuda.__dict__[t]()
 | 
			
		||||
@ -999,18 +1119,27 @@ if HAS_CUDA:
 | 
			
		||||
        for t in types:
 | 
			
		||||
            tensor = t()
 | 
			
		||||
            gpu_tensor = get_gpu_type(t)()
 | 
			
		||||
 | 
			
		||||
            # Default values
 | 
			
		||||
            desc = ''
 | 
			
		||||
            type_subset = types
 | 
			
		||||
            no_inplace = False
 | 
			
		||||
            if len(decl) == 3:
 | 
			
		||||
                name, constr, arg_constr = decl
 | 
			
		||||
                desc = ''
 | 
			
		||||
            elif len(decl) == 4:
 | 
			
		||||
                name, constr, arg_constr, desc = decl
 | 
			
		||||
            elif len(decl) == 5:
 | 
			
		||||
                name, constr, arg_constr, desc, type_subset = decl
 | 
			
		||||
                if t not in type_subset:
 | 
			
		||||
                    continue
 | 
			
		||||
            elif len(decl) == 6:
 | 
			
		||||
                name, constr, arg_constr, desc, type_subset, no_inplace = decl
 | 
			
		||||
 | 
			
		||||
            if t not in type_subset:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            precision = custom_precision.get(name, TestCuda.precision)
 | 
			
		||||
            for inplace in (True, False):
 | 
			
		||||
                if inplace and no_inplace:
 | 
			
		||||
                    continue
 | 
			
		||||
                if inplace:
 | 
			
		||||
                    name_inner = name + '_'
 | 
			
		||||
                else:
 | 
			
		||||
@ -1027,7 +1156,15 @@ if HAS_CUDA:
 | 
			
		||||
                    test_name += '_' + desc
 | 
			
		||||
 | 
			
		||||
                assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
 | 
			
		||||
                setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
 | 
			
		||||
                setattr(TestCuda,
 | 
			
		||||
                        test_name,
 | 
			
		||||
                        compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
 | 
			
		||||
                if t == torch.FloatTensor:
 | 
			
		||||
                    assert not hasattr(TestCuda, test_name + '_gpu_half'), "Duplicated test name: " + test_name
 | 
			
		||||
                    setattr(TestCuda,
 | 
			
		||||
                            test_name + '_gpu_half',
 | 
			
		||||
                            compare_cpu_gpu(constr, arg_constr, name_inner, t,
 | 
			
		||||
                                            precision, force_gpu_half=True))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 | 
			
		||||
@ -1,13 +1,46 @@
 | 
			
		||||
import math
 | 
			
		||||
import sys
 | 
			
		||||
import errno
 | 
			
		||||
import os
 | 
			
		||||
import ctypes
 | 
			
		||||
import signal
 | 
			
		||||
import torch
 | 
			
		||||
import time
 | 
			
		||||
import traceback
 | 
			
		||||
import unittest
 | 
			
		||||
from torch import multiprocessing
 | 
			
		||||
from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset
 | 
			
		||||
from common import TestCase, run_tests, TEST_NUMPY
 | 
			
		||||
from torch.utils.data.dataset import random_split
 | 
			
		||||
from torch.utils.data.dataloader import default_collate, ExceptionWrapper
 | 
			
		||||
from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS
 | 
			
		||||
from common_nn import TEST_CUDA
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
JOIN_TIMEOUT = 17.0 if IS_WINDOWS else 4.5
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestDatasetRandomSplit(TestCase):
 | 
			
		||||
    def test_lengths_must_equal_datset_size(self):
 | 
			
		||||
        with self.assertRaises(ValueError):
 | 
			
		||||
            random_split([1, 2, 3, 4], [1, 2])
 | 
			
		||||
 | 
			
		||||
    def test_splits_have_correct_size(self):
 | 
			
		||||
        splits = random_split([1, 2, 3, 4, 5, 6], [2, 4])
 | 
			
		||||
        self.assertEqual(len(splits), 2)
 | 
			
		||||
        self.assertEqual(len(splits[0]), 2)
 | 
			
		||||
        self.assertEqual(len(splits[1]), 4)
 | 
			
		||||
 | 
			
		||||
    def test_splits_are_mutually_exclusive(self):
 | 
			
		||||
        data = [5, 2, 3, 4, 1, 6]
 | 
			
		||||
        splits = random_split(data, [2, 4])
 | 
			
		||||
        all_values = []
 | 
			
		||||
        all_values.extend(list(splits[0]))
 | 
			
		||||
        all_values.extend(list(splits[1]))
 | 
			
		||||
        data.sort()
 | 
			
		||||
        all_values.sort()
 | 
			
		||||
        self.assertListEqual(data, all_values)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestTensorDataset(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_len(self):
 | 
			
		||||
@ -73,6 +106,46 @@ class TestConcatDataset(TestCase):
 | 
			
		||||
        self.assertEqual(0, (d3[0][0] - result[14][0]).abs().sum())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Stores the first encountered exception in .exception.
 | 
			
		||||
# Inspired by https://stackoverflow.com/a/33599967
 | 
			
		||||
class ErrorTrackingProcess(multiprocessing.Process):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(ErrorTrackingProcess, self).__init__(*args, **kwargs)
 | 
			
		||||
        self._pconn, self._cconn = multiprocessing.Pipe()
 | 
			
		||||
        self._exception = None
 | 
			
		||||
 | 
			
		||||
    def run(self):
 | 
			
		||||
        # Disable stderr printing from os level, and make workers not printing
 | 
			
		||||
        # to stderr.
 | 
			
		||||
        # Can't use sys.stderr.close, otherwise Python `raise` will error with
 | 
			
		||||
        # ValueError: I/O operation on closed file.
 | 
			
		||||
        os.close(sys.stderr.fileno())
 | 
			
		||||
        try:
 | 
			
		||||
            super(ErrorTrackingProcess, self).run()
 | 
			
		||||
            self._cconn.send(None)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            self._cconn.send(ExceptionWrapper(sys.exc_info()))
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def exception(self):
 | 
			
		||||
        if self._pconn.poll():
 | 
			
		||||
            self._exception = self._pconn.recv()
 | 
			
		||||
        if self._exception is None:
 | 
			
		||||
            return None
 | 
			
		||||
        else:
 | 
			
		||||
            return self._exception.exc_type(self._exception.exc_msg)
 | 
			
		||||
 | 
			
		||||
    # ESRCH means that os.kill can't finds alive proc
 | 
			
		||||
    def send_signal(self, signum, ignore_ESRCH=False):
 | 
			
		||||
        try:
 | 
			
		||||
            os.kill(self.pid, signum)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            if not ignore_ESRCH or e.errno != errno.ESRCH:
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ErrorDataset(Dataset):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, size):
 | 
			
		||||
@ -82,6 +155,84 @@ class ErrorDataset(Dataset):
 | 
			
		||||
        return self.size
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SegfaultDataset(Dataset):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, size):
 | 
			
		||||
        self.size = size
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, idx):
 | 
			
		||||
        return ctypes.string_at(0)
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return self.size
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SleepDataset(Dataset):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, size, sleep_sec):
 | 
			
		||||
        self.size = size
 | 
			
		||||
        self.sleep_sec = sleep_sec
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, idx):
 | 
			
		||||
        time.sleep(self.sleep_sec)
 | 
			
		||||
        return idx
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return self.size
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SeedDataset(Dataset):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, size):
 | 
			
		||||
        self.size = size
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, idx):
 | 
			
		||||
        return torch.initial_seed()
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return self.size
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Inspired by https://stackoverflow.com/a/26703365
 | 
			
		||||
# This will ensure that each worker at least processes one data
 | 
			
		||||
class SynchronizedSeedDataset(Dataset):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, size, num_workers):
 | 
			
		||||
        assert size >= num_workers
 | 
			
		||||
        self.count = multiprocessing.Value('i', 0)
 | 
			
		||||
        self.barrier = multiprocessing.Semaphore(0)
 | 
			
		||||
        self.num_workers = num_workers
 | 
			
		||||
        self.size = size
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, idx):
 | 
			
		||||
        self.count.value += 1
 | 
			
		||||
        if self.count.value == self.num_workers:
 | 
			
		||||
            self.barrier.release()
 | 
			
		||||
        self.barrier.acquire()
 | 
			
		||||
        self.barrier.release()
 | 
			
		||||
        return torch.initial_seed()
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return self.size
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _test_timeout():
 | 
			
		||||
    dataset = SleepDataset(10, 10)
 | 
			
		||||
    dataloader = DataLoader(dataset, batch_size=2, num_workers=2, timeout=1)
 | 
			
		||||
    _ = next(iter(dataloader))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _test_segfault():
 | 
			
		||||
    dataset = SegfaultDataset(10)
 | 
			
		||||
    dataloader = DataLoader(dataset, batch_size=2, num_workers=2)
 | 
			
		||||
    _ = next(iter(dataloader))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# test custom init function
 | 
			
		||||
def init_fn(worker_id):
 | 
			
		||||
    torch.manual_seed(12345)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestDataLoader(TestCase):
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
@ -148,6 +299,62 @@ class TestDataLoader(TestCase):
 | 
			
		||||
            self.assertTrue(input.is_pinned())
 | 
			
		||||
            self.assertTrue(target.is_pinned())
 | 
			
		||||
 | 
			
		||||
    def test_multiple_dataloaders(self):
 | 
			
		||||
        loader1_it = iter(DataLoader(self.dataset, num_workers=1))
 | 
			
		||||
        loader2_it = iter(DataLoader(self.dataset, num_workers=2))
 | 
			
		||||
        next(loader1_it)
 | 
			
		||||
        next(loader1_it)
 | 
			
		||||
        next(loader2_it)
 | 
			
		||||
        next(loader2_it)
 | 
			
		||||
        next(loader1_it)
 | 
			
		||||
        next(loader2_it)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(True, "flaky test")
 | 
			
		||||
    def test_segfault(self):
 | 
			
		||||
        p = ErrorTrackingProcess(target=_test_segfault)
 | 
			
		||||
        p.start()
 | 
			
		||||
        p.join(JOIN_TIMEOUT)
 | 
			
		||||
        try:
 | 
			
		||||
            self.assertFalse(p.is_alive())
 | 
			
		||||
            self.assertNotEqual(p.exitcode, 0)
 | 
			
		||||
            if IS_WINDOWS:
 | 
			
		||||
                self.assertIsInstance(p.exception, OSError)
 | 
			
		||||
                self.assertRegex(str(p.exception), r'access violation reading ')
 | 
			
		||||
            else:
 | 
			
		||||
                self.assertIsInstance(p.exception, RuntimeError)
 | 
			
		||||
                self.assertRegex(str(p.exception), r'DataLoader worker \(pid \d+\) is killed by signal: ')
 | 
			
		||||
        finally:
 | 
			
		||||
            p.terminate()
 | 
			
		||||
 | 
			
		||||
    def test_timeout(self):
 | 
			
		||||
        p = ErrorTrackingProcess(target=_test_timeout)
 | 
			
		||||
        p.start()
 | 
			
		||||
        p.join(JOIN_TIMEOUT)
 | 
			
		||||
        try:
 | 
			
		||||
            self.assertFalse(p.is_alive())
 | 
			
		||||
            self.assertNotEqual(p.exitcode, 0)
 | 
			
		||||
            self.assertIsInstance(p.exception, RuntimeError)
 | 
			
		||||
            self.assertRegex(str(p.exception), r'DataLoader timed out after \d+ seconds')
 | 
			
		||||
        finally:
 | 
			
		||||
            p.terminate()
 | 
			
		||||
 | 
			
		||||
    def test_worker_seed(self):
 | 
			
		||||
        num_workers = 6
 | 
			
		||||
        dataset = SynchronizedSeedDataset(num_workers, num_workers)
 | 
			
		||||
        dataloader = DataLoader(dataset, batch_size=1, num_workers=num_workers)
 | 
			
		||||
        seeds = set()
 | 
			
		||||
        for batch in dataloader:
 | 
			
		||||
            seeds.add(batch[0])
 | 
			
		||||
        self.assertEqual(len(seeds), num_workers)
 | 
			
		||||
 | 
			
		||||
    def test_worker_init_fn(self):
 | 
			
		||||
        dataset = SeedDataset(4)
 | 
			
		||||
        dataloader = DataLoader(dataset, batch_size=2, num_workers=2,
 | 
			
		||||
                                worker_init_fn=init_fn)
 | 
			
		||||
        for batch in dataloader:
 | 
			
		||||
            self.assertEqual(12345, batch[0])
 | 
			
		||||
            self.assertEqual(12345, batch[1])
 | 
			
		||||
 | 
			
		||||
    def test_shuffle(self):
 | 
			
		||||
        self._test_shuffle(DataLoader(self.dataset, shuffle=True))
 | 
			
		||||
 | 
			
		||||
@ -223,17 +430,17 @@ class TestDataLoader(TestCase):
 | 
			
		||||
        "check that workers exit even if the iterator is not exhausted"
 | 
			
		||||
        loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True))
 | 
			
		||||
        workers = loader.workers
 | 
			
		||||
        pin_thread = loader.pin_thread
 | 
			
		||||
        worker_manager_thread = loader.worker_manager_thread
 | 
			
		||||
        for i, sample in enumerate(loader):
 | 
			
		||||
            if i == 3:
 | 
			
		||||
                break
 | 
			
		||||
        del loader
 | 
			
		||||
        for w in workers:
 | 
			
		||||
            w.join(1.0)  # timeout of one second
 | 
			
		||||
            w.join(JOIN_TIMEOUT)
 | 
			
		||||
            self.assertFalse(w.is_alive(), 'subprocess not terminated')
 | 
			
		||||
            self.assertEqual(w.exitcode, 0)
 | 
			
		||||
        pin_thread.join(1.0)
 | 
			
		||||
        self.assertFalse(pin_thread.is_alive())
 | 
			
		||||
        worker_manager_thread.join(JOIN_TIMEOUT)
 | 
			
		||||
        self.assertFalse(worker_manager_thread.is_alive())
 | 
			
		||||
 | 
			
		||||
    def test_len(self):
 | 
			
		||||
        def check_len(dl, expected):
 | 
			
		||||
@ -276,6 +483,23 @@ class TestDataLoader(TestCase):
 | 
			
		||||
            batch = next(iter(loader))
 | 
			
		||||
            self.assertIsInstance(batch, tt)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
 | 
			
		||||
    def test_default_colate_bad_numpy_types(self):
 | 
			
		||||
        import numpy as np
 | 
			
		||||
 | 
			
		||||
        # Should be a no-op
 | 
			
		||||
        arr = np.array(['a', 'b', 'c'])
 | 
			
		||||
        default_collate(arr)
 | 
			
		||||
 | 
			
		||||
        arr = np.array([[['a', 'b', 'c']]])
 | 
			
		||||
        self.assertRaises(TypeError, lambda: default_collate(arr))
 | 
			
		||||
 | 
			
		||||
        arr = np.array([object(), object(), object()])
 | 
			
		||||
        self.assertRaises(TypeError, lambda: default_collate(arr))
 | 
			
		||||
 | 
			
		||||
        arr = np.array([[[object(), object(), object()]]])
 | 
			
		||||
        self.assertRaises(TypeError, lambda: default_collate(arr))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StringDataset(Dataset):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@ from common import TestCase, run_tests
 | 
			
		||||
import math
 | 
			
		||||
import torch
 | 
			
		||||
from torch.autograd import Variable, gradcheck
 | 
			
		||||
from torch.distributions import Bernoulli, Multinomial, Normal
 | 
			
		||||
from torch.distributions import Bernoulli, Categorical, Normal
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestDistributions(TestCase):
 | 
			
		||||
@ -47,22 +47,22 @@ class TestDistributions(TestCase):
 | 
			
		||||
    def test_multinomial_1d(self):
 | 
			
		||||
        p = Variable(torch.Tensor([0.1, 0.2, 0.3]), requires_grad=True)
 | 
			
		||||
        # TODO: this should return a 0-dim tensor once we have Scalar support
 | 
			
		||||
        self.assertEqual(Multinomial(p).sample().size(), (1,))
 | 
			
		||||
        self.assertEqual(Multinomial(p).sample_n(1).size(), (1, 1))
 | 
			
		||||
        self._gradcheck_log_prob(Multinomial, (p,))
 | 
			
		||||
        self.assertEqual(Categorical(p).sample().size(), (1,))
 | 
			
		||||
        self.assertEqual(Categorical(p).sample_n(1).size(), (1, 1))
 | 
			
		||||
        self._gradcheck_log_prob(Categorical, (p,))
 | 
			
		||||
 | 
			
		||||
    def test_multinomial_2d(self):
 | 
			
		||||
        probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]]
 | 
			
		||||
        p = Variable(torch.Tensor(probabilities), requires_grad=True)
 | 
			
		||||
        self.assertEqual(Multinomial(p).sample().size(), (2,))
 | 
			
		||||
        self.assertEqual(Multinomial(p).sample_n(6).size(), (6, 2))
 | 
			
		||||
        self._gradcheck_log_prob(Multinomial, (p,))
 | 
			
		||||
        self.assertEqual(Categorical(p).sample().size(), (2,))
 | 
			
		||||
        self.assertEqual(Categorical(p).sample_n(6).size(), (6, 2))
 | 
			
		||||
        self._gradcheck_log_prob(Categorical, (p,))
 | 
			
		||||
 | 
			
		||||
        def ref_log_prob(idx, val, log_prob):
 | 
			
		||||
            sample_prob = p.data[idx][val] / p.data[idx].sum()
 | 
			
		||||
            self.assertEqual(log_prob, math.log(sample_prob))
 | 
			
		||||
 | 
			
		||||
        self._check_log_prob(Multinomial(p), ref_log_prob)
 | 
			
		||||
        self._check_log_prob(Categorical(p), ref_log_prob)
 | 
			
		||||
 | 
			
		||||
    def test_normal(self):
 | 
			
		||||
        mean = Variable(torch.randn(5, 5), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
@ -15,6 +15,15 @@ try:
 | 
			
		||||
except ImportError:
 | 
			
		||||
    HAS_TORCHVISION = False
 | 
			
		||||
 | 
			
		||||
RUN_CUDA = torch.cuda.is_available()
 | 
			
		||||
if torch.cuda.is_available():
 | 
			
		||||
    CUDA_VERSION = torch._C._cuda_getCompiledVersion()
 | 
			
		||||
    for d in range(torch.cuda.device_count()):
 | 
			
		||||
        major = torch.cuda.get_device_capability(d)[0]
 | 
			
		||||
        if (CUDA_VERSION < 8000 and major >= 6) or (CUDA_VERSION < 9000 and major >= 7):
 | 
			
		||||
            RUN_CUDA = False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -41,6 +50,12 @@ def LSTMCellC(*args, **kwargs):
 | 
			
		||||
class TestJit(TestCase):
 | 
			
		||||
    maxDiff = None
 | 
			
		||||
 | 
			
		||||
    def assertExpectedTrace(self, trace, *args, **kwargs):
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        torch._C._jit_pass_dce(trace)
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        self.assertExpected(str(trace), *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def test_simple(self):
 | 
			
		||||
        x = Variable(torch.Tensor([0.4]), requires_grad=True)
 | 
			
		||||
        y = Variable(torch.Tensor([0.7]), requires_grad=True)
 | 
			
		||||
@ -52,7 +67,64 @@ class TestJit(TestCase):
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        self.assertExpected(str(trace))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    def test_scopes(self):
 | 
			
		||||
        x = Variable(torch.Tensor([0.4]), requires_grad=True)
 | 
			
		||||
        y = Variable(torch.Tensor([0.7]), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        def f(x, y):
 | 
			
		||||
            out = x + y
 | 
			
		||||
            with torch.jit.scope('Foo', out):
 | 
			
		||||
                out = x * out
 | 
			
		||||
                with torch.jit.scope('Bar', out):
 | 
			
		||||
                    out = torch.tanh(out)
 | 
			
		||||
                out = torch.sigmoid(out)
 | 
			
		||||
            return out
 | 
			
		||||
 | 
			
		||||
        trace, z = torch.jit.trace(f, (x, y), nderivs=0)
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        self.assertExpected(str(trace))
 | 
			
		||||
 | 
			
		||||
    def test_scopes_intermediate_node(self):
 | 
			
		||||
 | 
			
		||||
        class Net(nn.Module):
 | 
			
		||||
            def forward(self, x):
 | 
			
		||||
                return F.log_softmax(x, dim=0)
 | 
			
		||||
 | 
			
		||||
        net = Net()
 | 
			
		||||
        t = Variable(torch.ones(2), requires_grad=True)
 | 
			
		||||
        trace, _ = torch.jit.trace(net, (t, ))
 | 
			
		||||
        torch.onnx._optimize_trace(trace)
 | 
			
		||||
 | 
			
		||||
        self.assertExpectedTrace(trace)
 | 
			
		||||
 | 
			
		||||
    def test_scopes_identity_node(self):
 | 
			
		||||
 | 
			
		||||
        class Net(nn.Module):
 | 
			
		||||
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Net, self).__init__()
 | 
			
		||||
                self.features = nn.Sequential(
 | 
			
		||||
                    nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
 | 
			
		||||
                    nn.ReLU(inplace=True),
 | 
			
		||||
                    nn.MaxPool2d(kernel_size=3, stride=2),
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            def forward(self, x):
 | 
			
		||||
                x = self.features(x)
 | 
			
		||||
                return x
 | 
			
		||||
 | 
			
		||||
        model = Net()
 | 
			
		||||
 | 
			
		||||
        t = Variable(torch.ones(1, 3, 227, 227), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        with torch.onnx.set_training(model, False):
 | 
			
		||||
            trace, _ = torch.jit.trace(model, (t, ))
 | 
			
		||||
 | 
			
		||||
        torch.onnx._optimize_trace(trace)
 | 
			
		||||
 | 
			
		||||
        self.assertExpectedTrace(trace)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_lstm_fusion(self):
 | 
			
		||||
        input = Variable(torch.randn(3, 10).cuda())
 | 
			
		||||
        hx = Variable(torch.randn(3, 20).cuda())
 | 
			
		||||
@ -65,7 +137,7 @@ class TestJit(TestCase):
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        self.assertExpected(str(trace))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_run_lstm_fusion(self):
 | 
			
		||||
        input = Variable(torch.randn(3, 10).cuda())
 | 
			
		||||
        hx = Variable(torch.randn(3, 20).cuda())
 | 
			
		||||
@ -78,7 +150,7 @@ class TestJit(TestCase):
 | 
			
		||||
        z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
 | 
			
		||||
        self.assertEqual(z, z2)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_run_lstm_fusion_concat(self):
 | 
			
		||||
        input = Variable(torch.randn(3, 10).cuda())
 | 
			
		||||
        hx = Variable(torch.randn(3, 20).cuda())
 | 
			
		||||
@ -91,7 +163,7 @@ class TestJit(TestCase):
 | 
			
		||||
        z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
 | 
			
		||||
        self.assertEqual(z, z2)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_concat_fusion(self):
 | 
			
		||||
        hx = Variable(torch.randn(3, 20).cuda())
 | 
			
		||||
        cx = Variable(torch.randn(3, 20).cuda())
 | 
			
		||||
@ -105,7 +177,7 @@ class TestJit(TestCase):
 | 
			
		||||
        torch._C._jit_pass_lint(trace)
 | 
			
		||||
        self.assertExpected(str(trace))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_fusion_distribute(self):
 | 
			
		||||
        def f(x, y):
 | 
			
		||||
            z1, z2 = (x + y).chunk(2, dim=1)
 | 
			
		||||
@ -146,7 +218,7 @@ class TestJit(TestCase):
 | 
			
		||||
        self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
 | 
			
		||||
        self.assertEqual(z, z2)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
 | 
			
		||||
    def test_compile_addc(self):
 | 
			
		||||
        x = Variable(torch.Tensor([0.4]), requires_grad=True).cuda()
 | 
			
		||||
        y = Variable(torch.Tensor([0.7]), requires_grad=True).cuda()
 | 
			
		||||
@ -613,7 +685,7 @@ class TestJit(TestCase):
 | 
			
		||||
        assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
 | 
			
		||||
        self.assertExpected(str(g2))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), "cpp tests require CUDA")
 | 
			
		||||
    @unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA")
 | 
			
		||||
    def test_cpp(self):
 | 
			
		||||
        torch._C._jit_run_cpp_tests()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -11,14 +11,15 @@ import torch.cuda
 | 
			
		||||
import torch.multiprocessing as mp
 | 
			
		||||
from torch.autograd import Variable
 | 
			
		||||
from torch.nn import Parameter
 | 
			
		||||
from common import TestCase, run_tests
 | 
			
		||||
from common import TestCase, run_tests, IS_WINDOWS
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
TEST_REPEATS = 30
 | 
			
		||||
HAS_SHM_FILES = os.path.isdir('/dev/shm')
 | 
			
		||||
TEST_CUDA_IPC = torch.cuda.is_available() and \
 | 
			
		||||
    sys.version_info[0] == 3 and \
 | 
			
		||||
    sys.platform != 'darwin'
 | 
			
		||||
    sys.platform != 'darwin' and \
 | 
			
		||||
    sys.platform != 'win32'
 | 
			
		||||
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -318,6 +319,7 @@ class TestMultiprocessing(TestCase):
 | 
			
		||||
            self.assertEqual(tensor_size, 5)
 | 
			
		||||
            self.assertEqual(storage_size, 5)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(IS_WINDOWS, 'not applicable to Windows (only fails with fork)')
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), 'CUDA not available')
 | 
			
		||||
    def test_cuda_bad_call(self):
 | 
			
		||||
        # Initialize CUDA
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										416
									
								
								test/test_nn.py
									
									
									
									
									
								
							
							
						
						
									
										416
									
								
								test/test_nn.py
									
									
									
									
									
								
							@ -27,7 +27,7 @@ from torch.nn import Parameter
 | 
			
		||||
from torch.nn.parallel._functions import Broadcast
 | 
			
		||||
from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \
 | 
			
		||||
    module_tests, criterion_tests, TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, \
 | 
			
		||||
    TEST_CUDNN_VERSION, loss_reference_fns
 | 
			
		||||
    TEST_CUDNN_VERSION, loss_reference_fns, get_size_average
 | 
			
		||||
from common import freeze_rng_state, run_tests, TestCase, skipIfNoLapack, \
 | 
			
		||||
    TEST_SCIPY, download_file
 | 
			
		||||
 | 
			
		||||
@ -934,6 +934,12 @@ class TestNN(NNTestCase):
 | 
			
		||||
        self.assertEqual(output[0][0].sum().data[0], 0)
 | 
			
		||||
        self.assertEqual(output[1][2].sum().data[0], 0)
 | 
			
		||||
 | 
			
		||||
        embedding = nn.Embedding(10, 20, padding_idx=0, sparse=True)
 | 
			
		||||
        input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
 | 
			
		||||
        output = embedding(input)
 | 
			
		||||
        self.assertEqual(output[0][0].sum().data[0], 0)
 | 
			
		||||
        self.assertEqual(output[1][2].sum().data[0], 0)
 | 
			
		||||
 | 
			
		||||
    def test_embedding_max_norm(self):
 | 
			
		||||
        embedding = nn.Embedding(22, 5, max_norm=1.0)
 | 
			
		||||
        input = Variable(torch.LongTensor([2, 8, 8, 6]))
 | 
			
		||||
@ -1060,6 +1066,26 @@ class TestNN(NNTestCase):
 | 
			
		||||
        offset[-1] = 100
 | 
			
		||||
        self.assertRaises(ValueError, lambda: es(input.view(-1), offset))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
 | 
			
		||||
    def test_pool3d_size_one_feature_dim(self):
 | 
			
		||||
        # Tests crazy strides for feature dim of size 1
 | 
			
		||||
        x = torch.randn(7, 1, 5, 3, 2).cuda()
 | 
			
		||||
        strange_strides = (30, 1234, 6, 2, 1)
 | 
			
		||||
        y = x.new().set_(x.storage(), x.storage_offset(), x.size(), strange_strides)
 | 
			
		||||
        x = x.cpu().set_(x.cpu().storage(), x.storage_offset(), x.size(), strange_strides)
 | 
			
		||||
        x, y = Variable(x), Variable(y)
 | 
			
		||||
 | 
			
		||||
        to_test = {
 | 
			
		||||
            'max_pool3d': lambda t: F.max_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
 | 
			
		||||
            'avg_pool3d': lambda t: F.avg_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for test, fn in to_test.items():
 | 
			
		||||
            # Should not crash
 | 
			
		||||
            out_y = fn(y)
 | 
			
		||||
            out_x = fn(x)
 | 
			
		||||
            self.assertEqual(out_y, out_x.cuda(), test)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
 | 
			
		||||
    def test_AvgPool3d_backward_after_cat_dim1_cuda(self):
 | 
			
		||||
        # x has to have batch_size 1 to test contiguous checks
 | 
			
		||||
@ -1609,6 +1635,60 @@ class TestNN(NNTestCase):
 | 
			
		||||
        self.assertEqual(out.get_device(), 0)
 | 
			
		||||
        self.assertEqual(out.data, expected_out)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
 | 
			
		||||
    def test_data_parallel_module_kwargs_only_empty_list(self):
 | 
			
		||||
        class Net(nn.Module):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Net, self).__init__()
 | 
			
		||||
                self.l = l
 | 
			
		||||
 | 
			
		||||
            def forward(self, input):
 | 
			
		||||
                return self.l(input['data'])
 | 
			
		||||
 | 
			
		||||
        l = nn.Linear(10, 5).float().cuda()
 | 
			
		||||
        i = Variable(torch.randn(20, 10).float().cuda())
 | 
			
		||||
        expected_out = l(i).data
 | 
			
		||||
        n = nn.DataParallel(Net())
 | 
			
		||||
        out = n(input={'data': i, 'unused': []})
 | 
			
		||||
        self.assertEqual(out.get_device(), 0)
 | 
			
		||||
        self.assertEqual(out.data, expected_out)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
 | 
			
		||||
    def test_data_parallel_module_kwargs_only_empty_dict(self):
 | 
			
		||||
        class Net(nn.Module):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Net, self).__init__()
 | 
			
		||||
                self.l = l
 | 
			
		||||
 | 
			
		||||
            def forward(self, input):
 | 
			
		||||
                return self.l(input['data'])
 | 
			
		||||
 | 
			
		||||
        l = nn.Linear(10, 5).float().cuda()
 | 
			
		||||
        i = Variable(torch.randn(20, 10).float().cuda())
 | 
			
		||||
        expected_out = l(i).data
 | 
			
		||||
        n = nn.DataParallel(Net())
 | 
			
		||||
        out = n(input={'data': i, 'unused': {}})
 | 
			
		||||
        self.assertEqual(out.get_device(), 0)
 | 
			
		||||
        self.assertEqual(out.data, expected_out)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
 | 
			
		||||
    def test_data_parallel_module_kwargs_only_empty_tuple(self):
 | 
			
		||||
        class Net(nn.Module):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Net, self).__init__()
 | 
			
		||||
                self.l = l
 | 
			
		||||
 | 
			
		||||
            def forward(self, input):
 | 
			
		||||
                return self.l(input['data'])
 | 
			
		||||
 | 
			
		||||
        l = nn.Linear(10, 5).float().cuda()
 | 
			
		||||
        i = Variable(torch.randn(20, 10).float().cuda())
 | 
			
		||||
        expected_out = l(i).data
 | 
			
		||||
        n = nn.DataParallel(Net())
 | 
			
		||||
        out = n(input={'data': i, 'unused': ()})
 | 
			
		||||
        self.assertEqual(out.get_device(), 0)
 | 
			
		||||
        self.assertEqual(out.data, expected_out)
 | 
			
		||||
 | 
			
		||||
    def test_state_dict(self):
 | 
			
		||||
        l = nn.Linear(5, 5)
 | 
			
		||||
        block = nn.Module()
 | 
			
		||||
@ -1909,6 +1989,32 @@ class TestNN(NNTestCase):
 | 
			
		||||
                input = Variable(torch.Tensor(torch.Size((3, ) * dims)))
 | 
			
		||||
                self.assertRaises(ValueError, lambda: module(input))
 | 
			
		||||
 | 
			
		||||
    def test_conv_shapecheck(self):
 | 
			
		||||
        def test(should_raise, module, input_size):
 | 
			
		||||
            input = Variable(torch.Tensor(3, *input_size))
 | 
			
		||||
            if should_raise:
 | 
			
		||||
                self.assertRaises(RuntimeError, lambda: module(input))
 | 
			
		||||
            else:
 | 
			
		||||
                # just run it to ensure no exception raised.
 | 
			
		||||
                module(input)
 | 
			
		||||
 | 
			
		||||
        # Conv1d
 | 
			
		||||
        test(True, nn.Conv1d(1, 1, 3), (1, 2))
 | 
			
		||||
        test(True, nn.Conv1d(1, 1, 3, stride=2), (1, 2))
 | 
			
		||||
        test(False, nn.Conv1d(1, 1, 2), (1, 2))
 | 
			
		||||
        test(False, nn.Conv1d(1, 1, 2, stride=2), (1, 2))
 | 
			
		||||
        test(False, nn.Conv1d(1, 1, 3, stride=2, padding=1), (1, 2))
 | 
			
		||||
 | 
			
		||||
        # Conv2d
 | 
			
		||||
        test(True, nn.Conv2d(1, 1, (3, 3)), (1, 2, 2))
 | 
			
		||||
        test(False, nn.Conv2d(1, 1, (3, 3)), (1, 3, 3))
 | 
			
		||||
        test(False, nn.Conv2d(1, 1, (3, 3), padding=1), (1, 2, 2))
 | 
			
		||||
 | 
			
		||||
        # Conv3D
 | 
			
		||||
        test(True, nn.Conv3d(1, 1, (3, 3, 3)), (1, 2, 2, 2))
 | 
			
		||||
        test(False, nn.Conv3d(1, 1, (3, 3, 3)), (1, 3, 3, 3))
 | 
			
		||||
        test(False, nn.Conv3d(1, 1, (3, 3, 3), padding=1), (1, 2, 2, 2))
 | 
			
		||||
 | 
			
		||||
    def test_ConvTranspose2d_output_size(self):
 | 
			
		||||
        m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2)
 | 
			
		||||
        i = Variable(torch.randn(2, 3, 6, 6))
 | 
			
		||||
@ -2249,6 +2355,38 @@ class TestNN(NNTestCase):
 | 
			
		||||
            weight_data[:] = 4
 | 
			
		||||
            self.assertEqual(weight_data, all_vars[4].data)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
 | 
			
		||||
    def test_cudnn_weight_tying(self):
 | 
			
		||||
        rnns = [
 | 
			
		||||
            nn.LSTM(10, 20, batch_first=True, bidirectional=True),
 | 
			
		||||
            nn.GRU(10, 20, batch_first=True, bidirectional=True),
 | 
			
		||||
            nn.RNN(10, 20, batch_first=True, bidirectional=True)
 | 
			
		||||
        ]
 | 
			
		||||
        for rnn in rnns:
 | 
			
		||||
            rnn.bias_ih_l0_reverse = rnn.bias_ih_l0
 | 
			
		||||
            rnn.cuda()
 | 
			
		||||
            input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True)
 | 
			
		||||
            hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
 | 
			
		||||
            all_vars = [input, hx] + list(rnn.parameters())
 | 
			
		||||
            opt = torch.optim.SGD(rnn.parameters(), lr=0.1)
 | 
			
		||||
            opt.zero_grad()
 | 
			
		||||
            if isinstance(rnn, nn.LSTM):
 | 
			
		||||
                cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
 | 
			
		||||
                all_vars[2:2] = [cx]
 | 
			
		||||
                hx = (hx, cx)
 | 
			
		||||
 | 
			
		||||
            with warnings.catch_warnings(record=True) as w:
 | 
			
		||||
                output = rnn(input, hx)
 | 
			
		||||
            output[0].sum().backward()
 | 
			
		||||
 | 
			
		||||
            opt.step()
 | 
			
		||||
            with warnings.catch_warnings(record=True) as w:
 | 
			
		||||
                output_cuda = rnn(input, hx)
 | 
			
		||||
            rnn.cpu()
 | 
			
		||||
            hx = (hx[0].cpu(), hx[1].cpu()) if isinstance(rnn, nn.LSTM) else hx.cpu()
 | 
			
		||||
            output_cpu = rnn(input.cpu(), hx)
 | 
			
		||||
            self.assertEqual(output_cuda, output_cpu)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
 | 
			
		||||
    def test_cuda_rnn_fused(self):
 | 
			
		||||
        def copy_rnn(rnn1, rnn2):
 | 
			
		||||
@ -2318,6 +2456,69 @@ class TestNN(NNTestCase):
 | 
			
		||||
        finally:
 | 
			
		||||
            torch.backends.cudnn.enabled = prev
 | 
			
		||||
 | 
			
		||||
    def test_rnn_args_check(self):
 | 
			
		||||
        input_size = 3
 | 
			
		||||
        hidden_size = 5
 | 
			
		||||
        num_layers = 2
 | 
			
		||||
        batch_size = 4
 | 
			
		||||
        seq_len = 6
 | 
			
		||||
        num_directions = 1
 | 
			
		||||
 | 
			
		||||
        def test(input_shape, hidden_shape, mode):
 | 
			
		||||
            for input, hidden in get_inputs(input_shape, hidden_shape, mode):
 | 
			
		||||
                model = getattr(nn, mode)(input_size, hidden_size, num_layers)
 | 
			
		||||
                self.assertRaises(RuntimeError, lambda: model(input, hidden))
 | 
			
		||||
 | 
			
		||||
        correct_input_shape = (seq_len, batch_size, input_size)
 | 
			
		||||
        correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size)
 | 
			
		||||
 | 
			
		||||
        def update_tuple(tup, dim, delta):
 | 
			
		||||
            new_tup = list(tup)
 | 
			
		||||
            new_tup[dim] = delta
 | 
			
		||||
            return tuple(new_tup)
 | 
			
		||||
 | 
			
		||||
        def get_inputs(input_shape, hidden_shape, mode):
 | 
			
		||||
            '''returns list( tuple(input, hidden) )
 | 
			
		||||
            where input, hidden are inputs to a model'''
 | 
			
		||||
            input = Variable(torch.randn(input_shape))
 | 
			
		||||
            hidden = Variable(torch.randn(hidden_shape))
 | 
			
		||||
            if mode is not 'LSTM':
 | 
			
		||||
                return [(input, hidden)]
 | 
			
		||||
            if hidden_shape == correct_hidden_shape:
 | 
			
		||||
                return [(input, (hidden, hidden))]
 | 
			
		||||
            good_hidden = Variable(torch.randn(correct_hidden_shape))
 | 
			
		||||
            return [
 | 
			
		||||
                (input, (hidden, good_hidden)),
 | 
			
		||||
                (input, (good_hidden, hidden)),
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
        rnn_modes = ['RNN', 'GRU', 'LSTM']
 | 
			
		||||
        for mode in rnn_modes:
 | 
			
		||||
            # Incorrect input batch size
 | 
			
		||||
            input_shape = update_tuple(correct_input_shape, 1, -1)
 | 
			
		||||
            hidden_shape = correct_hidden_shape
 | 
			
		||||
            test(input_shape, hidden_shape, mode)
 | 
			
		||||
 | 
			
		||||
            # Incorrect hidden batch size
 | 
			
		||||
            input_shape = correct_input_shape
 | 
			
		||||
            hidden_shape = update_tuple(correct_hidden_shape, 1, -1)
 | 
			
		||||
            test(input_shape, hidden_shape, mode)
 | 
			
		||||
 | 
			
		||||
            # Incorrect input size
 | 
			
		||||
            input_shape = update_tuple(correct_input_shape, 2, -1)
 | 
			
		||||
            hidden_shape = correct_hidden_shape
 | 
			
		||||
            test(input_shape, hidden_shape, mode)
 | 
			
		||||
 | 
			
		||||
            # Incorrect hidden size
 | 
			
		||||
            input_shape = correct_input_shape
 | 
			
		||||
            hidden_shape = update_tuple(correct_hidden_shape, 2, -1)
 | 
			
		||||
            test(input_shape, hidden_shape, mode)
 | 
			
		||||
 | 
			
		||||
            # Incorrect hidden[0]
 | 
			
		||||
            input_shape = correct_input_shape
 | 
			
		||||
            hidden_shape = update_tuple(correct_hidden_shape, 0, -1)
 | 
			
		||||
            test(input_shape, hidden_shape, mode)
 | 
			
		||||
 | 
			
		||||
    def test_rnn_initial_hidden_state(self):
 | 
			
		||||
        rnn_modes = ['RNN', 'GRU', 'LSTM']
 | 
			
		||||
        for mode in rnn_modes:
 | 
			
		||||
@ -2759,6 +2960,26 @@ class TestNN(NNTestCase):
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(out1, out2)
 | 
			
		||||
 | 
			
		||||
    def test_elu_inplace_gradgrad(self):
 | 
			
		||||
        v = Variable(torch.randn(8), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        def func(root):
 | 
			
		||||
            x = root.clone()
 | 
			
		||||
            return F.elu(x, inplace=True)
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [v])
 | 
			
		||||
        gradgradcheck(func, [v])
 | 
			
		||||
 | 
			
		||||
    def test_hardtanh_inplace_gradgrad(self):
 | 
			
		||||
        v = Variable(torch.randn(8), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
        def func(root):
 | 
			
		||||
            x = root.clone()
 | 
			
		||||
            return F.hardtanh(x, inplace=True)
 | 
			
		||||
 | 
			
		||||
        gradcheck(func, [v])
 | 
			
		||||
        gradgradcheck(func, [v])
 | 
			
		||||
 | 
			
		||||
    def test_batchnorm_raises_error_if_running_mean_is_not_same_size_as_input(self):
 | 
			
		||||
        input = Variable(torch.rand(2, 10))
 | 
			
		||||
        running_var = torch.rand(10)
 | 
			
		||||
@ -2844,39 +3065,25 @@ class TestNN(NNTestCase):
 | 
			
		||||
        self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2)))
 | 
			
		||||
        self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
 | 
			
		||||
 | 
			
		||||
        # Check cosine_similarity input/output shapes
 | 
			
		||||
        input_size = (1, 3, 2, 1)
 | 
			
		||||
        expected_size = (1, 2, 1)
 | 
			
		||||
        input1 = Variable(torch.randn(input_size), requires_grad=True)
 | 
			
		||||
        input2 = Variable(torch.randn(input_size), requires_grad=True)
 | 
			
		||||
        self.assertEqual(F.cosine_similarity(input1, input2, dim=1).size(), expected_size)
 | 
			
		||||
 | 
			
		||||
    def test_grid_sample(self):
 | 
			
		||||
        # test known input on CPU
 | 
			
		||||
        input = Variable(torch.arange(1, 11).view(1, 1, 2, 5))
 | 
			
		||||
        grid = Variable(torch.Tensor(
 | 
			
		||||
            [[-1, -0.5, 0, 0.2, 1],
 | 
			
		||||
             [-1, -0.333, 0, 0.5, 1],
 | 
			
		||||
             [-1, -0.5, 0, 0.3333, 1],
 | 
			
		||||
             [-1, -0.2, 0, 0.2, 1]]).view(1, 2, 5, 2))
 | 
			
		||||
        output = F.grid_sample(input, grid)
 | 
			
		||||
        groundtruth = torch.Tensor(
 | 
			
		||||
            [[2.2500, 6.0000000000, 5.0000, 4.8340, 9.0000],
 | 
			
		||||
             [2.2500, 6.333250045, 5.0000, 5.1000, 8.4000]]).view(1, 1, 2, 5)
 | 
			
		||||
        self.assertEqual(output.data, groundtruth)
 | 
			
		||||
        def test_cpu_against_cuda(N, C, H, W, padding_mode):
 | 
			
		||||
            def test_shape(N, C, IH, IW, H, W, padding_mode):
 | 
			
		||||
 | 
			
		||||
        # do gradcheck
 | 
			
		||||
        N = random.randint(1, 8)
 | 
			
		||||
        C = random.randint(1, 8)
 | 
			
		||||
        H = random.randint(1, 8)
 | 
			
		||||
        W = random.randint(1, 8)
 | 
			
		||||
        input = Variable(torch.randn(N, C, H, W), requires_grad=True)
 | 
			
		||||
        grid = Variable(torch.randn(N, H, W, 2), requires_grad=True)
 | 
			
		||||
        self.assertTrue(gradcheck(lambda inp, grid: F.grid_sample(inp, grid), (input, grid)))
 | 
			
		||||
 | 
			
		||||
        def test_cpu_against_cuda(N, C, H, W):
 | 
			
		||||
            def test_shape(N, C, IH, IW, H, W):
 | 
			
		||||
                input_cpu = Variable(torch.randn(C, N, IH, IW).transpose(0, 1), requires_grad=True)
 | 
			
		||||
                grid_cpu = Variable(torch.randn(H, N, W, 2).transpose(0, 1), requires_grad=True)
 | 
			
		||||
                out_cpu = F.grid_sample(input_cpu, grid_cpu)
 | 
			
		||||
                out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode)
 | 
			
		||||
                self.assertTrue(out_cpu.size() == torch.Size([N, C, H, W]))
 | 
			
		||||
 | 
			
		||||
                input_cuda = Variable(input_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
 | 
			
		||||
                grid_cuda = Variable(grid_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
 | 
			
		||||
                out_cuda = F.grid_sample(input_cuda, grid_cuda)
 | 
			
		||||
                out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode)
 | 
			
		||||
                self.assertEqual(out_cpu, out_cuda)
 | 
			
		||||
 | 
			
		||||
                gradients = out_cpu.data.new(out_cpu.size()).normal_()
 | 
			
		||||
@ -2889,15 +3096,15 @@ class TestNN(NNTestCase):
 | 
			
		||||
                base_input = torch.randn(C, IH, IW)
 | 
			
		||||
                input_cpu = Variable(base_input.expand(input_cuda.size()), requires_grad=True)
 | 
			
		||||
                grid_cpu = Variable(torch.randn(N, H, W, 2), requires_grad=True)
 | 
			
		||||
                out_cpu = F.grid_sample(input_cpu, grid_cpu)
 | 
			
		||||
                out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode)
 | 
			
		||||
 | 
			
		||||
                input_cuda = Variable(base_input.cuda().expand(input_cuda.size()), requires_grad=True)
 | 
			
		||||
                grid_cuda = Variable(grid_cpu.data.cuda(), requires_grad=True)
 | 
			
		||||
                out_cuda = F.grid_sample(input_cuda, grid_cuda)
 | 
			
		||||
                out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode)
 | 
			
		||||
                self.assertEqual(out_cpu, out_cuda)
 | 
			
		||||
 | 
			
		||||
            # test same size output
 | 
			
		||||
            test_shape(N, C, H, W, H, W)
 | 
			
		||||
            test_shape(N, C, H, W, H, W, padding_mode)
 | 
			
		||||
 | 
			
		||||
            # test larger output
 | 
			
		||||
            N = random.randint(1, 8)
 | 
			
		||||
@ -2906,7 +3113,7 @@ class TestNN(NNTestCase):
 | 
			
		||||
            IW = random.randint(1, 8)
 | 
			
		||||
            H = random.randint(IH + 1, 12)
 | 
			
		||||
            W = random.randint(IH + 1, 12)
 | 
			
		||||
            test_shape(N, C, IH, IW, H, W)
 | 
			
		||||
            test_shape(N, C, IH, IW, H, W, padding_mode)
 | 
			
		||||
 | 
			
		||||
            # test smaller output
 | 
			
		||||
            N = random.randint(1, 8)
 | 
			
		||||
@ -2915,21 +3122,44 @@ class TestNN(NNTestCase):
 | 
			
		||||
            IW = random.randint(1, 8)
 | 
			
		||||
            H = random.randint(1, IH)
 | 
			
		||||
            W = random.randint(1, IW)
 | 
			
		||||
            test_shape(N, C, IH, IW, H, W)
 | 
			
		||||
            test_shape(N, C, IH, IW, H, W, padding_mode)
 | 
			
		||||
 | 
			
		||||
        # test CUDNN against CPU
 | 
			
		||||
        if TEST_CUDNN:
 | 
			
		||||
            test_cpu_against_cuda(N, C, H, W)
 | 
			
		||||
        # test known input on CPU
 | 
			
		||||
        for padding_mode in ['zeros', 'border']:
 | 
			
		||||
 | 
			
		||||
        # test CUDA (without CUDNN) against CPU
 | 
			
		||||
        if TEST_CUDA:
 | 
			
		||||
            input = Variable(torch.arange(1, 11).view(1, 1, 2, 5))
 | 
			
		||||
            grid = Variable(torch.Tensor(
 | 
			
		||||
                [[-0.9, -1.4, 0, 0.2, 1],
 | 
			
		||||
                 [-1, -0.333, 0, 0.5, 1],
 | 
			
		||||
                 [-1, -0.5, 0, 0.3333, 1],
 | 
			
		||||
                 [-1, -0.2, 0, 1.1, 0.5]]).view(1, 2, 5, 2))
 | 
			
		||||
            output = F.grid_sample(input, grid, padding_mode=padding_mode)
 | 
			
		||||
 | 
			
		||||
            # GridSampler will automatically use CUDNN if it is available
 | 
			
		||||
            # so we disable CUDNN temporarily
 | 
			
		||||
            original_cudnn_enabled = cudnn.enabled
 | 
			
		||||
            cudnn.enabled = False
 | 
			
		||||
            test_cpu_against_cuda(N, C, H, W)
 | 
			
		||||
            cudnn.enabled = original_cudnn_enabled
 | 
			
		||||
            if padding_mode == 'zeros':
 | 
			
		||||
                groundtruth = torch.Tensor(
 | 
			
		||||
                    [[0.9600, 6.0000000000, 5.0000, 4.8340, 9.0000],
 | 
			
		||||
                     [2.2500, 6.333250045, 5.0000, 5.1000, 7.0000]]).view(1, 1, 2, 5)
 | 
			
		||||
            else:
 | 
			
		||||
                groundtruth = torch.Tensor(
 | 
			
		||||
                    [[1.2000, 6.0000000000, 5.0000, 4.8340, 9.0000],
 | 
			
		||||
                     [2.2500, 6.333250045, 5.0000, 5.1000, 8.7500]]).view(1, 1, 2, 5)
 | 
			
		||||
 | 
			
		||||
            self.assertEqual(output.data, groundtruth)
 | 
			
		||||
 | 
			
		||||
            # do gradcheck
 | 
			
		||||
            N = random.randint(1, 8)
 | 
			
		||||
            C = random.randint(1, 8)
 | 
			
		||||
            H = random.randint(1, 8)
 | 
			
		||||
            W = random.randint(1, 8)
 | 
			
		||||
            input = Variable(torch.randn(N, C, H, W), requires_grad=True)
 | 
			
		||||
            grid = Variable(torch.randn(N, H, W, 2), requires_grad=True)
 | 
			
		||||
            self.assertTrue(gradcheck(
 | 
			
		||||
                lambda inp, grid: F.grid_sample(inp, grid, padding_mode=padding_mode),
 | 
			
		||||
                (input, grid)))
 | 
			
		||||
 | 
			
		||||
            # test CUDA against CPU
 | 
			
		||||
            if TEST_CUDA:
 | 
			
		||||
                test_cpu_against_cuda(N, C, H, W, padding_mode)
 | 
			
		||||
 | 
			
		||||
    def test_affine_grid(self):
 | 
			
		||||
        # test known input on CPU
 | 
			
		||||
@ -3637,22 +3867,62 @@ new_criterion_tests = [
 | 
			
		||||
        target_fn=lambda: torch.randn(15, 10).gt(0).double(),
 | 
			
		||||
        desc='weights'
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        module_name='NLLLoss',
 | 
			
		||||
        input_size=(2, 3, 5, 5, 2, 2),
 | 
			
		||||
        target_fn=lambda: torch.rand(2, 5, 5, 2, 2).mul(3).floor().long(),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t, size_average=get_size_average(m)),
 | 
			
		||||
        check_no_size_average=True,
 | 
			
		||||
        desc='higher_dim'
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        module_name='NLLLoss',
 | 
			
		||||
        input_size=(2, 3, 5),
 | 
			
		||||
        target_fn=lambda: torch.rand(2, 5).mul(3).floor().long(),
 | 
			
		||||
        reference_fn=lambda i, t, m:
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t, size_average=get_size_average(m)),
 | 
			
		||||
        check_no_size_average=True,
 | 
			
		||||
        desc='dim_is_3'
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        module_name='PoissonNLLLoss',
 | 
			
		||||
        input_size=(2, 3, 4, 5),
 | 
			
		||||
        target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
 | 
			
		||||
        desc='reduced_loss',
 | 
			
		||||
        desc='no_full_loss',  # without sterling approx
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        module_name='PoissonNLLLoss',
 | 
			
		||||
        constructor_args=(False, True, True),
 | 
			
		||||
        input_fn=lambda: torch.randn(2, 3, 4, 5).abs_().add_(0.001),
 | 
			
		||||
        target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
 | 
			
		||||
        desc='full_loss',
 | 
			
		||||
        desc='full_loss',  # with sterling approx
 | 
			
		||||
    ),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def poissonnllloss_no_reduce_test():
 | 
			
		||||
    t = Variable(torch.randn(10, 10))
 | 
			
		||||
    return dict(
 | 
			
		||||
        fullname='PoissonNLLLLoss_no_reduce',
 | 
			
		||||
        constructor=wrap_functional(
 | 
			
		||||
            lambda i: F.poisson_nll_loss(i, t.type_as(i), reduce=False)),
 | 
			
		||||
        input_fn=lambda: torch.rand(10, 10),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def kldivloss_no_reduce_test():
 | 
			
		||||
    t = Variable(torch.randn(10, 10))
 | 
			
		||||
    return dict(
 | 
			
		||||
        fullname='KLDivLoss_no_reduce',
 | 
			
		||||
        constructor=wrap_functional(
 | 
			
		||||
            lambda i: F.kl_div(i, t.type_as(i), reduce=False)),
 | 
			
		||||
        input_fn=lambda: torch.rand(10, 10).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['KLDivLoss'](i, t.data.type_as(i), reduce=False),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def l1loss_no_reduce_test():
 | 
			
		||||
    t = Variable(torch.randn(2, 3, 4))
 | 
			
		||||
    return dict(
 | 
			
		||||
@ -3764,7 +4034,7 @@ def nllloss2d_no_reduce_test():
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3777,7 +4047,7 @@ def nllloss2d_no_reduce_ignore_index_test():
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3794,7 +4064,50 @@ def nllloss2d_no_reduce_weights_test():
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs(i)),
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def nlllossNd_no_reduce_test():
 | 
			
		||||
    t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
 | 
			
		||||
    kwargs = {'reduce': False}
 | 
			
		||||
    return dict(
 | 
			
		||||
        fullname='NLLLossNd_no_reduce',
 | 
			
		||||
        constructor=wrap_functional(
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def nlllossNd_no_reduce_ignore_index_test():
 | 
			
		||||
    t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
 | 
			
		||||
    kwargs = {'ignore_index': 1, 'reduce': False}
 | 
			
		||||
    return dict(
 | 
			
		||||
        fullname='NLLLossNd_no_reduce_ignore_index',
 | 
			
		||||
        constructor=wrap_functional(
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def nlllossNd_no_reduce_weights_test():
 | 
			
		||||
    t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long())
 | 
			
		||||
    weight = torch.rand(3)
 | 
			
		||||
 | 
			
		||||
    def kwargs(i):
 | 
			
		||||
        return {'weight': weight.type_as(i), 'reduce': False}
 | 
			
		||||
 | 
			
		||||
    return dict(
 | 
			
		||||
        fullname='NLLLossNd_no_reduce_weights',
 | 
			
		||||
        constructor=wrap_functional(
 | 
			
		||||
            lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))),
 | 
			
		||||
        input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(),
 | 
			
		||||
        reference_fn=lambda i, _:
 | 
			
		||||
            loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)),
 | 
			
		||||
        pickle=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3811,6 +4124,8 @@ def smoothl1loss_no_reduce_test():
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
new_module_tests = [
 | 
			
		||||
    poissonnllloss_no_reduce_test(),
 | 
			
		||||
    kldivloss_no_reduce_test(),
 | 
			
		||||
    l1loss_no_reduce_test(),
 | 
			
		||||
    mseloss_no_reduce_test(),
 | 
			
		||||
    nllloss_no_reduce_test(),
 | 
			
		||||
@ -3821,6 +4136,9 @@ new_module_tests = [
 | 
			
		||||
    nllloss2d_no_reduce_test(),
 | 
			
		||||
    nllloss2d_no_reduce_weights_test(),
 | 
			
		||||
    nllloss2d_no_reduce_ignore_index_test(),
 | 
			
		||||
    nlllossNd_no_reduce_test(),
 | 
			
		||||
    nlllossNd_no_reduce_weights_test(),
 | 
			
		||||
    nlllossNd_no_reduce_ignore_index_test(),
 | 
			
		||||
    smoothl1loss_no_reduce_test(),
 | 
			
		||||
    dict(
 | 
			
		||||
        module_name='BatchNorm1d',
 | 
			
		||||
@ -4553,7 +4871,7 @@ new_module_tests = [
 | 
			
		||||
        desc='dim'
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        constructor=wrap_functional(F.softmax, dim=1),
 | 
			
		||||
        constructor=wrap_functional(F.softmax, dim=-1),
 | 
			
		||||
        input_size=(2, 128),  # trigger the last-dim algo in CUDA
 | 
			
		||||
        fullname='softmax_lastdim',
 | 
			
		||||
        pickle=False,
 | 
			
		||||
@ -4585,7 +4903,7 @@ new_module_tests = [
 | 
			
		||||
        pickle=False,
 | 
			
		||||
    ),
 | 
			
		||||
    dict(
 | 
			
		||||
        constructor=wrap_functional(F.log_softmax, dim=1),
 | 
			
		||||
        constructor=wrap_functional(F.log_softmax, dim=-1),
 | 
			
		||||
        input_size=(2, 128),  # trigger the last-dim algo in CUDA
 | 
			
		||||
        fullname='log_softmax_lastdim',
 | 
			
		||||
        pickle=False,
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,4 @@
 | 
			
		||||
import math
 | 
			
		||||
import unittest
 | 
			
		||||
import functools
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
@ -8,7 +9,7 @@ import torch.nn.functional as F
 | 
			
		||||
from torch.optim import SGD
 | 
			
		||||
from torch.autograd import Variable
 | 
			
		||||
from torch import sparse
 | 
			
		||||
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
 | 
			
		||||
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
 | 
			
		||||
from common import TestCase, run_tests
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -61,13 +62,14 @@ class TestOptim(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.assertLessEqual(params.data.dist(solution), initial_dist)
 | 
			
		||||
 | 
			
		||||
    def _test_rosenbrock_sparse(self, constructor):
 | 
			
		||||
    def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
 | 
			
		||||
        params_t = torch.Tensor([1.5, 1.5])
 | 
			
		||||
 | 
			
		||||
        params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
 | 
			
		||||
        params_c = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
 | 
			
		||||
        params = Variable(params_t, requires_grad=True)
 | 
			
		||||
        optimizer = constructor([params])
 | 
			
		||||
        optimizer_c = constructor([params_c])
 | 
			
		||||
        if not sparse_only:
 | 
			
		||||
            params_c = Variable(params_t.clone(), requires_grad=True)
 | 
			
		||||
            optimizer_c = constructor([params_c])
 | 
			
		||||
 | 
			
		||||
        solution = torch.Tensor([1, 1])
 | 
			
		||||
        initial_dist = params.data.dist(solution)
 | 
			
		||||
@ -99,8 +101,9 @@ class TestOptim(TestCase):
 | 
			
		||||
            # Do cyclic coordinate descent
 | 
			
		||||
            w = i % 2
 | 
			
		||||
            optimizer.step(functools.partial(eval, params, True, w))
 | 
			
		||||
            optimizer_c.step(functools.partial(eval, params_c, False, w))
 | 
			
		||||
            self.assertEqual(params.data, params_c.data)
 | 
			
		||||
            if not sparse_only:
 | 
			
		||||
                optimizer_c.step(functools.partial(eval, params_c, False, w))
 | 
			
		||||
                self.assertEqual(params.data, params_c.data)
 | 
			
		||||
 | 
			
		||||
        self.assertLessEqual(params.data.dist(solution), initial_dist)
 | 
			
		||||
 | 
			
		||||
@ -229,6 +232,11 @@ class TestOptim(TestCase):
 | 
			
		||||
                lr=1e-3)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_sgd_sparse(self):
 | 
			
		||||
        self._test_rosenbrock_sparse(
 | 
			
		||||
            lambda params: optim.SGD(params, lr=5e-3)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_adam(self):
 | 
			
		||||
        self._test_rosenbrock(
 | 
			
		||||
            lambda params: optim.Adam(params, lr=1e-2),
 | 
			
		||||
@ -247,6 +255,12 @@ class TestOptim(TestCase):
 | 
			
		||||
                lr=1e-3)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_sparse_adam(self):
 | 
			
		||||
        self._test_rosenbrock_sparse(
 | 
			
		||||
            lambda params: optim.SparseAdam(params, lr=4e-2),
 | 
			
		||||
            True
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_adadelta(self):
 | 
			
		||||
        self._test_rosenbrock(
 | 
			
		||||
            lambda params: optim.Adadelta(params),
 | 
			
		||||
@ -423,10 +437,10 @@ class TestLRScheduler(TestCase):
 | 
			
		||||
        # lr = 0.05     if epoch < 3
 | 
			
		||||
        # lr = 0.005    if 30 <= epoch < 6
 | 
			
		||||
        # lr = 0.0005   if epoch >= 9
 | 
			
		||||
        single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
 | 
			
		||||
        scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
 | 
			
		||||
        scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
 | 
			
		||||
        self._test(scheduler, targets, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_multi_step_lr(self):
 | 
			
		||||
@ -434,106 +448,116 @@ class TestLRScheduler(TestCase):
 | 
			
		||||
        # lr = 0.005    if 2 <= epoch < 5
 | 
			
		||||
        # lr = 0.0005   if epoch < 9
 | 
			
		||||
        # lr = 0.00005   if epoch >= 9
 | 
			
		||||
        single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
 | 
			
		||||
        scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
 | 
			
		||||
        scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
 | 
			
		||||
        self._test(scheduler, targets, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_exp_lr(self):
 | 
			
		||||
        single_targets = [0.05 * (0.9 ** x) for x in range(10)]
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
 | 
			
		||||
        scheduler = ExponentialLR(self.opt, gamma=0.9)
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        single_targets = [0.05 * (0.9 ** x) for x in range(epochs)]
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
 | 
			
		||||
        scheduler = ExponentialLR(self.opt, gamma=0.9)
 | 
			
		||||
        self._test(scheduler, targets, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_cos_anneal_lr(self):
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        eta_min = 1e-10
 | 
			
		||||
        single_targets = [eta_min + (0.05 - eta_min) *
 | 
			
		||||
                          (1 + math.cos(math.pi * x / epochs)) / 2
 | 
			
		||||
                          for x in range(epochs)]
 | 
			
		||||
        targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
 | 
			
		||||
        scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
 | 
			
		||||
        self._test(scheduler, targets, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau1(self):
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 20]
 | 
			
		||||
        metrics = [10 - i * 0.0167 for i in range(20)]
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min',
 | 
			
		||||
                                      threshold=0.01, patience=5, cooldown=5)
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau2(self):
 | 
			
		||||
        epochs = 22
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2]
 | 
			
		||||
        metrics = [10 - i * 0.0165 for i in range(22)]
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
 | 
			
		||||
                                      mode='min', threshold=0.1)
 | 
			
		||||
        epochs = 22
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau3(self):
 | 
			
		||||
        epochs = 22
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4]
 | 
			
		||||
        metrics = [-0.8] * 2 + [-0.234] * 20
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5,
 | 
			
		||||
                                      threshold_mode='abs')
 | 
			
		||||
        epochs = 22
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau4(self):
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 20]
 | 
			
		||||
        metrics = [1.5 * (1.025 ** i) for i in range(20)]  # 1.025 > 1.1**0.25
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=3,
 | 
			
		||||
                                      threshold_mode='rel', threshold=0.1)
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau5(self):
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
 | 
			
		||||
        metrics = [1.5 * (1.005 ** i) for i in range(20)]
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel',
 | 
			
		||||
                                      threshold=0.1, patience=5, cooldown=5)
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau6(self):
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 20]
 | 
			
		||||
        metrics = [1.5 * (0.85 ** i) for i in range(20)]
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
 | 
			
		||||
                                      threshold=0.1)
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau7(self):
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
 | 
			
		||||
        metrics = [1] * 7 + [0.6] + [0.5] * 12
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
 | 
			
		||||
                                      threshold=0.1, patience=5, cooldown=5)
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_reduce_lr_on_plateau8(self):
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        for param_group in self.opt.param_groups:
 | 
			
		||||
            param_group['lr'] = 0.5
 | 
			
		||||
        targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14]
 | 
			
		||||
        metrics = [1.5 * (1.005 ** i) for i in range(20)]
 | 
			
		||||
        scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', min_lr=[0.4, 0.3],
 | 
			
		||||
                                      threshold=0.1, patience=5, cooldown=5)
 | 
			
		||||
        epochs = 20
 | 
			
		||||
        self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
 | 
			
		||||
 | 
			
		||||
    def test_lambda_lr(self):
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        self.opt.param_groups[0]['lr'] = 0.05
 | 
			
		||||
        self.opt.param_groups[1]['lr'] = 0.4
 | 
			
		||||
        targets = [[0.05 * (0.9 ** x) for x in range(10)], [0.4 * (0.8 ** x) for x in range(10)]]
 | 
			
		||||
        targets = [[0.05 * (0.9 ** x) for x in range(epochs)], [0.4 * (0.8 ** x) for x in range(epochs)]]
 | 
			
		||||
        scheduler = LambdaLR(self.opt,
 | 
			
		||||
                             lr_lambda=[lambda x1: 0.9 ** x1, lambda x2: 0.8 ** x2])
 | 
			
		||||
        epochs = 10
 | 
			
		||||
        self._test(scheduler, targets, epochs)
 | 
			
		||||
 | 
			
		||||
    def _test(self, scheduler, targets, epochs=10):
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,7 @@ import torch.cuda
 | 
			
		||||
import tempfile
 | 
			
		||||
import unittest
 | 
			
		||||
import warnings
 | 
			
		||||
import pickle
 | 
			
		||||
from torch.utils.dlpack import from_dlpack, to_dlpack
 | 
			
		||||
from itertools import product, combinations
 | 
			
		||||
from common import TestCase, iter_indices, TEST_NUMPY, run_tests, download_file, skipIfNoLapack, \
 | 
			
		||||
@ -71,6 +72,34 @@ class TestTorch(TestCase):
 | 
			
		||||
                    res2[i, j] = v1[i] * v2[j]
 | 
			
		||||
            self.assertEqual(res1, res2)
 | 
			
		||||
 | 
			
		||||
    def test_addr(self):
 | 
			
		||||
        types = {
 | 
			
		||||
            'torch.DoubleTensor': 1e-8,
 | 
			
		||||
            'torch.FloatTensor': 1e-4,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        def run_test(m, v1, v2, m_transform=lambda x: x):
 | 
			
		||||
            m = m_transform(m.clone())
 | 
			
		||||
            ref = m.clone()
 | 
			
		||||
            torch.addr(m, v1, v2, out=m)
 | 
			
		||||
            for i in range(m.size(0)):
 | 
			
		||||
                for j in range(m.size(1)):
 | 
			
		||||
                    ref[i, j] += v1[i] * v2[j]
 | 
			
		||||
            self.assertEqual(m, ref)
 | 
			
		||||
 | 
			
		||||
        for tname, _prec in types.items():
 | 
			
		||||
            for h, w in [(100, 110), (1, 20), (200, 2)]:
 | 
			
		||||
                m = torch.randn(h, w).type(tname)
 | 
			
		||||
                v1 = torch.randn(h).type(tname)
 | 
			
		||||
                v2 = torch.randn(w).type(tname)
 | 
			
		||||
                run_test(m, v1, v2)
 | 
			
		||||
                # test transpose
 | 
			
		||||
                run_test(m, v2, v1, lambda x: x.transpose(0, 1))
 | 
			
		||||
                # test 0 strided
 | 
			
		||||
                v1 = torch.randn(1).type(tname).expand(h)
 | 
			
		||||
                run_test(m, v1, v2)
 | 
			
		||||
                run_test(m, v2, v1, lambda x: x.transpose(0, 1))
 | 
			
		||||
 | 
			
		||||
    def test_addmv(self):
 | 
			
		||||
        types = {
 | 
			
		||||
            'torch.DoubleTensor': 1e-8,
 | 
			
		||||
@ -320,17 +349,20 @@ class TestTorch(TestCase):
 | 
			
		||||
            "mean", "median", "mode", "norm", "prod",
 | 
			
		||||
            "std", "sum", "var", "max", "min"]
 | 
			
		||||
 | 
			
		||||
        def normfn_attr(t, dim, keepdim=False):
 | 
			
		||||
        def normfn_attr(t, dim, keepdim=False, out=None):
 | 
			
		||||
            attr = getattr(torch, "norm")
 | 
			
		||||
            return attr(t, 2, dim, keepdim)
 | 
			
		||||
            return attr(t, 2, dim, keepdim, out=out)
 | 
			
		||||
 | 
			
		||||
        for fn_name in dim_red_fns:
 | 
			
		||||
            fn_attr = getattr(torch, fn_name) if fn_name != "norm" else normfn_attr
 | 
			
		||||
 | 
			
		||||
            def fn(x, dim, keepdim=False):
 | 
			
		||||
                ans = fn_attr(x, dim, keepdim=keepdim)
 | 
			
		||||
            def fn(x, dim, keepdim=False, out=None):
 | 
			
		||||
                ans = fn_attr(x, dim, keepdim=keepdim, out=out)
 | 
			
		||||
                return ans if not isinstance(ans, tuple) else ans[0]
 | 
			
		||||
 | 
			
		||||
            def fn_tuple(x, dim, keepdim=False, out=None):
 | 
			
		||||
                return fn_attr(x, dim, keepdim=keepdim, out=out)
 | 
			
		||||
 | 
			
		||||
            def test_multidim(x, dim):
 | 
			
		||||
                self.assertEqual(fn(x, dim).unsqueeze(dim), fn(x, dim, keepdim=True))
 | 
			
		||||
                self.assertEqual(x.ndimension() - 1, fn(x, dim).ndimension())
 | 
			
		||||
@ -355,6 +387,25 @@ class TestTorch(TestCase):
 | 
			
		||||
            x = cast(torch.randn(dims))
 | 
			
		||||
            test_multidim(x, singleton_dim)
 | 
			
		||||
 | 
			
		||||
            # check reducing with output kwargs
 | 
			
		||||
            if fn_name in ['median', 'mode', 'max', 'min']:
 | 
			
		||||
                y = cast(torch.randn(5, 3))
 | 
			
		||||
                values = cast(torch.randn(5, 3))
 | 
			
		||||
                indices = cast(torch.zeros(5, 3).long() - 1)
 | 
			
		||||
                fn_tuple(y, 1, keepdim=False, out=(values[:, 1], indices[:, 1]))
 | 
			
		||||
                values_expected, indices_expected = fn_tuple(y, 1, keepdim=False)
 | 
			
		||||
                self.assertEqual(values[:, 1], values_expected,
 | 
			
		||||
                                 '{} values with out= kwarg'.format(fn_name))
 | 
			
		||||
                self.assertEqual(indices[:, 1], indices_expected,
 | 
			
		||||
                                 '{} indices with out= kwarg'.format(fn_name))
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            x = cast(torch.randn(5, 3))
 | 
			
		||||
            y = cast(torch.randn(5, 3))
 | 
			
		||||
            fn(y, 1, keepdim=False, out=x[:, 1])
 | 
			
		||||
            expected = fn(y, 1, keepdim=False)
 | 
			
		||||
            self.assertEqual(x[:, 1], expected, '{} with out= kwarg'.format(fn_name))
 | 
			
		||||
 | 
			
		||||
    def test_dim_reduction(self):
 | 
			
		||||
        self._test_dim_reduction(self, lambda t: t)
 | 
			
		||||
 | 
			
		||||
@ -408,6 +459,17 @@ class TestTorch(TestCase):
 | 
			
		||||
        test((10,))
 | 
			
		||||
        test((5, 5))
 | 
			
		||||
 | 
			
		||||
    def test_all_any_empty(self):
 | 
			
		||||
        x = torch.ByteTensor()
 | 
			
		||||
        self.assertTrue(x.all())
 | 
			
		||||
        self.assertFalse(x.any())
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
 | 
			
		||||
    def test_all_any_empty_cuda(self):
 | 
			
		||||
        x = torch.cuda.ByteTensor()
 | 
			
		||||
        self.assertTrue(x.all())
 | 
			
		||||
        self.assertFalse(x.any())
 | 
			
		||||
 | 
			
		||||
    def test_mv(self):
 | 
			
		||||
        m1 = torch.randn(100, 100)
 | 
			
		||||
        v1 = torch.randn(100)
 | 
			
		||||
@ -1111,6 +1173,11 @@ class TestTorch(TestCase):
 | 
			
		||||
        torch.arange(0, 1, out=res2)
 | 
			
		||||
        self.assertEqual(res1, res2, 0)
 | 
			
		||||
 | 
			
		||||
        # Check arange with only one argument
 | 
			
		||||
        res1 = torch.arange(10)
 | 
			
		||||
        res2 = torch.arange(0, 10)
 | 
			
		||||
        self.assertEqual(res1, res2, 0)
 | 
			
		||||
 | 
			
		||||
        # Check arange for non-contiguous tensors.
 | 
			
		||||
        x = torch.zeros(2, 3)
 | 
			
		||||
        torch.arange(0, 4, out=x.narrow(1, 1, 2))
 | 
			
		||||
@ -1873,6 +1940,17 @@ class TestTorch(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.cat([]))
 | 
			
		||||
 | 
			
		||||
    def test_cat_bad_input_sizes(self):
 | 
			
		||||
        x = torch.randn(2, 1)
 | 
			
		||||
        y = torch.randn(2, 1, 1)
 | 
			
		||||
        z = torch.randn(2, 1, 1)
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z]))
 | 
			
		||||
 | 
			
		||||
        x = torch.randn(2, 1, 2)
 | 
			
		||||
        y = torch.randn(2, 1, 1)
 | 
			
		||||
        z = torch.randn(2, 2, 1)
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z], dim=1))
 | 
			
		||||
 | 
			
		||||
    def test_stack(self):
 | 
			
		||||
        x = torch.rand(2, 3, 4)
 | 
			
		||||
        y = torch.rand(2, 3, 4)
 | 
			
		||||
@ -3429,6 +3507,24 @@ class TestTorch(TestCase):
 | 
			
		||||
            dest2[idx[i]] = dest2[idx[i]] + src[i]
 | 
			
		||||
        self.assertEqual(dest, dest2)
 | 
			
		||||
 | 
			
		||||
    def test_index_select(self):
 | 
			
		||||
        src = torch.randn(3, 4, 5)
 | 
			
		||||
        # Index can be duplicated.
 | 
			
		||||
        idx = torch.LongTensor([2, 1, 0, 1, 2])
 | 
			
		||||
        dest = torch.index_select(src, 0, idx)
 | 
			
		||||
        self.assertEqual(dest.shape, (5, 4, 5))
 | 
			
		||||
        for i in range(idx.size(0)):
 | 
			
		||||
            self.assertEqual(dest[i], src[idx[i]])
 | 
			
		||||
 | 
			
		||||
        # Check that 'out' is used correctly.
 | 
			
		||||
        out = torch.randn(5 * 4 * 5)
 | 
			
		||||
        dest = torch.index_select(src, 0, idx, out=out.view(5, 4, 5))
 | 
			
		||||
        self.assertEqual(dest.shape, (5, 4, 5))
 | 
			
		||||
        for i in range(idx.size(0)):
 | 
			
		||||
            self.assertEqual(dest[i], src[idx[i]])
 | 
			
		||||
        out.fill_(0.123)
 | 
			
		||||
        self.assertEqual(out, dest.view(-1))  # Must point to the same storage.
 | 
			
		||||
 | 
			
		||||
    def test_take(self):
 | 
			
		||||
        def check(src, idx):
 | 
			
		||||
            expected = src.contiguous().view(-1).index_select(
 | 
			
		||||
@ -3643,6 +3739,11 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertEqual(tensor.std(), tensor.std(unbiased=True))
 | 
			
		||||
        self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0])
 | 
			
		||||
 | 
			
		||||
    def test_var_stability(self):
 | 
			
		||||
        tensor = torch.FloatTensor([2281.5, 2281.25])
 | 
			
		||||
        self.assertEqual(tensor.var(0)[0], 0.03125)
 | 
			
		||||
        self.assertEqual(tensor.var(), 0.03125)
 | 
			
		||||
 | 
			
		||||
    def test_view(self):
 | 
			
		||||
        tensor = torch.rand(15)
 | 
			
		||||
        template = torch.rand(3, 5)
 | 
			
		||||
@ -3698,18 +3799,47 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertEqual(torch.randn(()).expand(()), torch.randn(()))
 | 
			
		||||
 | 
			
		||||
    def test_repeat(self):
 | 
			
		||||
        result = torch.Tensor()
 | 
			
		||||
        tensor = torch.rand(8, 4)
 | 
			
		||||
 | 
			
		||||
        initial_shape = (8, 4)
 | 
			
		||||
        tensor = torch.rand(*initial_shape)
 | 
			
		||||
 | 
			
		||||
        size = (3, 1, 1)
 | 
			
		||||
        torchSize = torch.Size(size)
 | 
			
		||||
        target = [3, 8, 4]
 | 
			
		||||
        self.assertEqual(tensor.repeat(*size).size(), target, 'Error in repeat')
 | 
			
		||||
        self.assertEqual(tensor.repeat(torchSize).size(), target, 'Error in repeat using LongStorage')
 | 
			
		||||
        self.assertEqual(tensor.repeat(torchSize).size(), target,
 | 
			
		||||
                         'Error in repeat using LongStorage')
 | 
			
		||||
        result = tensor.repeat(*size)
 | 
			
		||||
        self.assertEqual(result.size(), target, 'Error in repeat using result')
 | 
			
		||||
        result = tensor.repeat(torchSize)
 | 
			
		||||
        self.assertEqual(result.size(), target, 'Error in repeat using result and LongStorage')
 | 
			
		||||
        self.assertEqual((result.mean(0).view(8, 4) - tensor).abs().max(), 0, 'Error in repeat (not equal)')
 | 
			
		||||
        self.assertEqual(result.mean(0).view(8, 4), tensor, 'Error in repeat (not equal)')
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
 | 
			
		||||
    def test_repeat_tile(self):
 | 
			
		||||
 | 
			
		||||
        initial_shape = (8, 4)
 | 
			
		||||
 | 
			
		||||
        repeats = ((3, 1, 1),
 | 
			
		||||
                   (3, 3, 3),
 | 
			
		||||
                   (1, 2, 1),
 | 
			
		||||
                   (2, 2, 2, 2))
 | 
			
		||||
 | 
			
		||||
        def _generate_noncontiguous_input():
 | 
			
		||||
 | 
			
		||||
            out = np.broadcast_to(np.random.random((1, 4)),
 | 
			
		||||
                                  initial_shape)
 | 
			
		||||
 | 
			
		||||
            assert not (out.flags.c_contiguous or out.flags.f_contiguous)
 | 
			
		||||
 | 
			
		||||
            return out
 | 
			
		||||
 | 
			
		||||
        for repeat in repeats:
 | 
			
		||||
            for tensor in (torch.from_numpy(np.random.random(initial_shape)),
 | 
			
		||||
                           torch.from_numpy(_generate_noncontiguous_input()),):
 | 
			
		||||
 | 
			
		||||
                self.assertEqual(tensor.repeat(*repeat).numpy(),
 | 
			
		||||
                                 np.tile(tensor.numpy(), repeat))
 | 
			
		||||
 | 
			
		||||
    def test_is_same_size(self):
 | 
			
		||||
        t1 = torch.Tensor(3, 4, 9, 10)
 | 
			
		||||
@ -4071,6 +4201,18 @@ class TestTorch(TestCase):
 | 
			
		||||
            rootview = c[8]
 | 
			
		||||
            self.assertEqual(rootview.data_ptr(), c[0].data_ptr())
 | 
			
		||||
 | 
			
		||||
    def test_serialization_offset(self):
 | 
			
		||||
        a = torch.randn(5, 5)
 | 
			
		||||
        i = 41
 | 
			
		||||
        with tempfile.TemporaryFile() as f:
 | 
			
		||||
            pickle.dump(i, f)
 | 
			
		||||
            torch.save(a, f)
 | 
			
		||||
            f.seek(0)
 | 
			
		||||
            j = pickle.load(f)
 | 
			
		||||
            b = torch.load(f)
 | 
			
		||||
            self.assertTrue(torch.equal(a, b))
 | 
			
		||||
            self.assertEqual(i, j)
 | 
			
		||||
 | 
			
		||||
    def test_half_tensor(self):
 | 
			
		||||
        x = torch.randn(5, 5).float()
 | 
			
		||||
        y = torch.randn(5, 5).float()
 | 
			
		||||
@ -4186,6 +4328,10 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertEqual(type(tensor), torch.FloatTensor)
 | 
			
		||||
        self.assertEqual(tensor, torch.FloatTensor([[1.0, 2.0], [3.0, 4.0]]))
 | 
			
		||||
 | 
			
		||||
        tensor = torch.load(test_file_path, map_location='cpu')
 | 
			
		||||
        self.assertEqual(type(tensor), torch.FloatTensor)
 | 
			
		||||
        self.assertEqual(tensor, torch.FloatTensor([[1.0, 2.0], [3.0, 4.0]]))
 | 
			
		||||
 | 
			
		||||
    def test_from_buffer(self):
 | 
			
		||||
        a = bytearray([1, 2, 3, 4])
 | 
			
		||||
        self.assertEqual(torch.ByteStorage.from_buffer(a).tolist(), [1, 2, 3, 4])
 | 
			
		||||
@ -4247,6 +4393,19 @@ class TestTorch(TestCase):
 | 
			
		||||
        x.__repr__()
 | 
			
		||||
        str(x),
 | 
			
		||||
 | 
			
		||||
    def test_sizeof(self):
 | 
			
		||||
        sizeof_empty = torch.randn(0).storage().__sizeof__()
 | 
			
		||||
        sizeof_10 = torch.randn(10).storage().__sizeof__()
 | 
			
		||||
        sizeof_100 = torch.randn(100).storage().__sizeof__()
 | 
			
		||||
        self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10)
 | 
			
		||||
        self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0)
 | 
			
		||||
 | 
			
		||||
        sizeof_empty = torch.randn(0).type(torch.ByteTensor).storage().__sizeof__()
 | 
			
		||||
        sizeof_10 = torch.randn(10).type(torch.ByteTensor).storage().__sizeof__()
 | 
			
		||||
        sizeof_100 = torch.randn(100).type(torch.ByteTensor).storage().__sizeof__()
 | 
			
		||||
        self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10)
 | 
			
		||||
        self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0)
 | 
			
		||||
 | 
			
		||||
    def test_unsqueeze(self):
 | 
			
		||||
        x = torch.randn(2, 3, 4)
 | 
			
		||||
        y = x.unsqueeze(1)
 | 
			
		||||
@ -4511,6 +4670,19 @@ class TestTorch(TestCase):
 | 
			
		||||
            for i in range(len(x)):
 | 
			
		||||
                self.assertEqual(geq2_x[i], geq2_array[i])
 | 
			
		||||
 | 
			
		||||
    def test_error_msg_type_translation(self):
 | 
			
		||||
        with self.assertRaisesRegex(
 | 
			
		||||
                RuntimeError,
 | 
			
		||||
                # message includes both torch.DoubleTensor and torch.LongTensor
 | 
			
		||||
                '(?=.*torch\.DoubleTensor)(?=.*torch\.LongTensor)'):
 | 
			
		||||
 | 
			
		||||
            # Calls model with a DoubleTensor input but LongTensor weights
 | 
			
		||||
            input = torch.autograd.Variable(torch.randn(1, 1, 1, 6).double())
 | 
			
		||||
            weight = torch.zeros(1, 1, 1, 3).long()
 | 
			
		||||
            model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False)
 | 
			
		||||
            model.weight.data = weight
 | 
			
		||||
            out = model(input)
 | 
			
		||||
 | 
			
		||||
    def test_comparison_ops(self):
 | 
			
		||||
        x = torch.randn(5, 5)
 | 
			
		||||
        y = torch.randn(5, 5)
 | 
			
		||||
 | 
			
		||||
@ -386,7 +386,7 @@ class TestONNXUtils(TestCase):
 | 
			
		||||
        sizes = [2, 3, 4]
 | 
			
		||||
        pad = [1, 2, 3, 4]
 | 
			
		||||
        paddings = prepare_onnx_paddings(len(sizes), pad)
 | 
			
		||||
        self.assertEqual(paddings, [0, 0, 3, 4, 1, 2])
 | 
			
		||||
        self.assertEqual(paddings, [0, 3, 1, 0, 4, 2])
 | 
			
		||||
 | 
			
		||||
    def test_check_onnx_broadcast(self):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -13,10 +13,10 @@
 | 
			
		||||
 | 
			
		||||
- name: add(Tensor self, Tensor other, *, Scalar alpha=1)
 | 
			
		||||
  self: grad
 | 
			
		||||
  other: grad * alpha
 | 
			
		||||
  other: maybe_multiply(grad, alpha)
 | 
			
		||||
 | 
			
		||||
- name: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
 | 
			
		||||
  self: grad * beta
 | 
			
		||||
  self: maybe_multiply(grad, beta)
 | 
			
		||||
  batch1: grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) }).bmm(batch2.transpose(1, 2)) * alpha
 | 
			
		||||
  batch2: batch1.transpose(1, 2).bmm(grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) })) * alpha
 | 
			
		||||
 | 
			
		||||
@ -36,12 +36,12 @@
 | 
			
		||||
  mat2: mm_mat2_backward(grad, mat1, mat2.sizes(), mat2.strides(), alpha)
 | 
			
		||||
 | 
			
		||||
- name: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1)
 | 
			
		||||
  self: grad * beta
 | 
			
		||||
  self: maybe_multiply(grad, beta)
 | 
			
		||||
  mat: grad.ger(vec) * alpha
 | 
			
		||||
  vec: mat.t().mv(grad) * alpha
 | 
			
		||||
 | 
			
		||||
- name: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1)
 | 
			
		||||
  self: grad * beta
 | 
			
		||||
  self: maybe_multiply(grad, beta)
 | 
			
		||||
  vec1: grad.mv(vec2) * alpha
 | 
			
		||||
  vec2: grad.t().mv(vec1) * alpha
 | 
			
		||||
 | 
			
		||||
@ -62,7 +62,7 @@
 | 
			
		||||
  other: grad * -self * ((self * self + other * other).reciprocal())
 | 
			
		||||
 | 
			
		||||
- name: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
 | 
			
		||||
  self: grad * beta
 | 
			
		||||
  self: maybe_multiply(grad, beta)
 | 
			
		||||
  batch1: grad.bmm(batch2.transpose(1, 2)) * alpha
 | 
			
		||||
  batch2: batch1.transpose(1, 2).bmm(grad) * alpha
 | 
			
		||||
 | 
			
		||||
@ -108,8 +108,8 @@
 | 
			
		||||
  self: grad.diag(diagonal)
 | 
			
		||||
 | 
			
		||||
- name: dist(Tensor self, Tensor other, Scalar p=2)
 | 
			
		||||
  self: norm_backward(grad, self - other, p)
 | 
			
		||||
  other: -norm_backward(grad, self - other, p)
 | 
			
		||||
  self: norm_backward(grad, self - other, p, result)
 | 
			
		||||
  other: -norm_backward(grad, self - other, p, result)
 | 
			
		||||
 | 
			
		||||
- name: div(Tensor self, Scalar other)
 | 
			
		||||
  self: grad / other
 | 
			
		||||
@ -149,7 +149,8 @@
 | 
			
		||||
 | 
			
		||||
- name: eye  # fallthrough
 | 
			
		||||
 | 
			
		||||
- name: fill(Tensor self, Scalar value)  # FIXME
 | 
			
		||||
- name: fill(Tensor self, Scalar value)
 | 
			
		||||
  self: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: floor(Tensor self)
 | 
			
		||||
  self: zeros_like(grad)
 | 
			
		||||
@ -217,7 +218,6 @@
 | 
			
		||||
 | 
			
		||||
- name: index_select(Tensor self, int64_t dim, Tensor index)
 | 
			
		||||
  self: grad.type().zeros(self.sizes()).index_add_(dim, index, grad)
 | 
			
		||||
  __view__: True
 | 
			
		||||
 | 
			
		||||
- name: inverse(Tensor self)
 | 
			
		||||
  self: -at::mm(output.t(), at::mm(grad, output.t()))
 | 
			
		||||
@ -348,10 +348,10 @@
 | 
			
		||||
  self: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: norm(Tensor self, Scalar p=2)
 | 
			
		||||
  self: norm_backward(grad, self, p)
 | 
			
		||||
  self: norm_backward(grad, self, p, result)
 | 
			
		||||
 | 
			
		||||
- name: norm(Tensor self, Scalar p, int64_t dim, bool keepdim=False)
 | 
			
		||||
  self: norm_backward(grad, self, p, dim, keepdim)
 | 
			
		||||
  self: norm_backward(grad, self, p, destination, dim, keepdim)
 | 
			
		||||
 | 
			
		||||
- name: numel  # fallthrough
 | 
			
		||||
- name: ones  # fallthrough
 | 
			
		||||
@ -395,7 +395,7 @@
 | 
			
		||||
  self: not_implemented("pstrf")
 | 
			
		||||
 | 
			
		||||
- name: put(Tensor self, Tensor index, Tensor source, bool accumulate)
 | 
			
		||||
  self: zeros_like(self).put_(index, source, accumulate)
 | 
			
		||||
  self: grad.clone().put_(index, zeros_like(source), accumulate)
 | 
			
		||||
  source: grad.take(index)
 | 
			
		||||
 | 
			
		||||
- name: qr(Tensor self)
 | 
			
		||||
@ -468,7 +468,7 @@
 | 
			
		||||
  __view__: True
 | 
			
		||||
 | 
			
		||||
- name: squeeze(Tensor self, int64_t dim)
 | 
			
		||||
  self: maybe_unsqueeze(grad, dim, self.size(dim) == 1)
 | 
			
		||||
  self: maybe_unsqueeze(grad, dim, self.size(dim) == 1 && self.sizes().size() != 1)
 | 
			
		||||
  __view__: True
 | 
			
		||||
 | 
			
		||||
- name: std
 | 
			
		||||
@ -563,9 +563,9 @@
 | 
			
		||||
  grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad)
 | 
			
		||||
  input: zeros_like(input)
 | 
			
		||||
 | 
			
		||||
- name: elu_backward(Tensor grad_output, Tensor input, Scalar alpha, bool inplace, Tensor output)
 | 
			
		||||
  grad_output: elu_backward(grad, input, alpha, inplace, output)
 | 
			
		||||
  input: grad * grad_input * (input < 0).toType(grad.type())
 | 
			
		||||
- name: elu_backward(Tensor grad_output, Scalar alpha, Tensor output)
 | 
			
		||||
  grad_output: elu_backward(grad, alpha, output)
 | 
			
		||||
  output: grad * grad_output * (output < 0).toType(grad.type())
 | 
			
		||||
 | 
			
		||||
- name: glu_backward(Tensor grad_output, Tensor input, int64_t dim)
 | 
			
		||||
  grad_output: glu_double_backward_grad_output(grad, input, dim)
 | 
			
		||||
@ -575,11 +575,12 @@
 | 
			
		||||
  grad_output: hardshrink_backward(grad, input, lambd)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val, bool inplace)
 | 
			
		||||
  grad_output: hardtanh_backward(grad, input, min_val, max_val, false)
 | 
			
		||||
- name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val)
 | 
			
		||||
  grad_output: hardtanh_backward(grad, input, min_val, max_val)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: kl_div_backward(Tensor input, Tensor target, bool size_average)
 | 
			
		||||
- name: kl_div_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
 | 
			
		||||
  grad_output: kl_div_double_backward_grad_output(grad, input, target, size_average, reduce)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
 | 
			
		||||
@ -594,8 +595,8 @@
 | 
			
		||||
  grad_output: grad - (grad * output.exp()).sum(dim, true)
 | 
			
		||||
  input: log_softmax_double_backward(grad, grad_output, dim, output)
 | 
			
		||||
 | 
			
		||||
- name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope, bool inplace)
 | 
			
		||||
  grad_output: leaky_relu_backward(grad, input, negative_slope, false)
 | 
			
		||||
- name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope)
 | 
			
		||||
  grad_output: leaky_relu_backward(grad, input, negative_slope)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: max_pool2d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
 | 
			
		||||
@ -623,8 +624,8 @@
 | 
			
		||||
  input: zeros_like(input)
 | 
			
		||||
  weight: zeros_like(weight)
 | 
			
		||||
 | 
			
		||||
- name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, bool inplace, Tensor noise)
 | 
			
		||||
  grad_output: rrelu_backward(grad, input, lower, upper, training, false, noise)
 | 
			
		||||
- name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, Tensor noise)
 | 
			
		||||
  grad_output: rrelu_backward(grad, input, lower, upper, training, noise)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: smooth_l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
 | 
			
		||||
@ -646,8 +647,8 @@
 | 
			
		||||
  grad_output: softshrink_backward(grad, input, lambd)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value, bool inplace)
 | 
			
		||||
  grad_output: threshold_backward(grad, input, threshold, value, false)
 | 
			
		||||
- name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value)
 | 
			
		||||
  grad_output: threshold_backward(grad, input, threshold, value)
 | 
			
		||||
  input: zeros_like(grad)
 | 
			
		||||
 | 
			
		||||
- name: _sigmoid_backward(Tensor grad_output, Tensor output)
 | 
			
		||||
 | 
			
		||||
@ -49,6 +49,16 @@ PY_VARIABLE_METHOD_DEF = CodeTemplate("""\
 | 
			
		||||
UNPACK_SELF = "auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata;"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# XXX: if you got here because of an assertion failure, it doesn't mean
 | 
			
		||||
# it's enough to just extend the list here. Before you do this, make sure
 | 
			
		||||
# to add an appropriate wrap() overload in torch/csrc/autograd/utils/wrap_outputs.h.
 | 
			
		||||
SUPPORTED_RETURN_TYPES = {
 | 
			
		||||
    'Tensor', 'std::tuple<Tensor,Tensor>',
 | 
			
		||||
    'std::tuple<Tensor,Tensor,Tensor>', 'std::vector<Tensor>',
 | 
			
		||||
    'Scalar', 'bool', 'int64_t', 'void*'
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def create_python_bindings(
 | 
			
		||||
        python_functions, py_methods, py_method_defs, py_method_dispatch,
 | 
			
		||||
        is_class):
 | 
			
		||||
@ -80,6 +90,9 @@ def create_python_bindings(
 | 
			
		||||
 | 
			
		||||
    def emit_dispatch(i, function):
 | 
			
		||||
        env = {}
 | 
			
		||||
        simple_return_type = function['return_type'].replace(' &', '')
 | 
			
		||||
        assert simple_return_type in SUPPORTED_RETURN_TYPES, \
 | 
			
		||||
            function['name'] + ' returns unsupported type: ' + simple_return_type
 | 
			
		||||
 | 
			
		||||
        actuals = []
 | 
			
		||||
        formal_args = []
 | 
			
		||||
 | 
			
		||||
@ -39,7 +39,11 @@ return baseType->${method_prefix}${api_name}(${unpacked_args});""")
 | 
			
		||||
 | 
			
		||||
METHOD_DEFINITION_FALLTHROUGH_VARIABLE = CodeTemplate("""\
 | 
			
		||||
${unpack_args}
 | 
			
		||||
return as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));""")
 | 
			
		||||
auto flags = compute_flags({ ${args_with_derivatives} });
 | 
			
		||||
auto var = as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));
 | 
			
		||||
var.is_volatile() = flags.is_volatile;
 | 
			
		||||
return var;
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
METHOD_DEFINITION_FALLTHROUGH_INPLACE = CodeTemplate("""\
 | 
			
		||||
${unpack_args}
 | 
			
		||||
@ -67,6 +71,7 @@ FUNCTION_DEFINITION = CodeTemplate("""\
 | 
			
		||||
variable_list ${op}::apply(const variable_list& grads) {
 | 
			
		||||
  variable_list grad_inputs{${num_inputs}};
 | 
			
		||||
  ${body}
 | 
			
		||||
  ensure_no_aten_scalars(grad_inputs);
 | 
			
		||||
  return grad_inputs;
 | 
			
		||||
}
 | 
			
		||||
""")
 | 
			
		||||
@ -682,11 +687,6 @@ def create_variable_type(top_env, aten_declarations):
 | 
			
		||||
        if declaration['return_type'] in FALLTHROUGH_RETURN_TYPES:
 | 
			
		||||
            body.extend(METHOD_DEFINITION_FALLTHROUGH.substitute(combined).split('\n'))
 | 
			
		||||
            return body
 | 
			
		||||
        elif declaration['name'] in FALLTHROUGH_FUNCTIONS:
 | 
			
		||||
            tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace']
 | 
			
		||||
                    else METHOD_DEFINITION_FALLTHROUGH_VARIABLE)
 | 
			
		||||
            body.extend(tmpl.substitute(combined).split('\n'))
 | 
			
		||||
            return body
 | 
			
		||||
 | 
			
		||||
        arguments = declaration['arguments']
 | 
			
		||||
        tensor_args = [arg for arg in arguments if arg['simple_type'] in {'Tensor', 'TensorList'}]
 | 
			
		||||
@ -752,6 +752,12 @@ def create_variable_type(top_env, aten_declarations):
 | 
			
		||||
        elif is_view:
 | 
			
		||||
            env['version_counter'] = 'take_version_counter(ret, self);'
 | 
			
		||||
 | 
			
		||||
        if declaration['name'] in FALLTHROUGH_FUNCTIONS:
 | 
			
		||||
            tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace']
 | 
			
		||||
                    else METHOD_DEFINITION_FALLTHROUGH_VARIABLE)
 | 
			
		||||
            body.extend(tmpl.substitute(combined).split('\n'))
 | 
			
		||||
            return body
 | 
			
		||||
 | 
			
		||||
        base_call = BASE_CALL.substitute(combined)
 | 
			
		||||
        if not declaration['inplace']:
 | 
			
		||||
            base_call = 'auto ret = as_variable({})'.format(base_call)
 | 
			
		||||
 | 
			
		||||
@ -34,41 +34,44 @@ Tensor maybe_multiply(const Tensor & t, const Scalar & s) {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_) {
 | 
			
		||||
  auto p = p_.toDouble();
 | 
			
		||||
  auto norm = self.norm(p_);
 | 
			
		||||
 | 
			
		||||
  if (norm.toDouble() == 0.0) {
 | 
			
		||||
    // handle case at 0 where we return a subgradient containing 0
 | 
			
		||||
    return zeros_like(self);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (p == 2.0) {
 | 
			
		||||
    return self * (grad / norm);
 | 
			
		||||
  } else {
 | 
			
		||||
    auto pow_ = self.abs().pow(p - 2);
 | 
			
		||||
    auto scale_v = grad / norm.toTensor().pow(p - 1);
 | 
			
		||||
    return self * pow_ * scale_v;
 | 
			
		||||
// Don't expose ATen scalars to Variable API, because they are not supported yet.
 | 
			
		||||
void ensure_no_aten_scalars(variable_list &vars) {
 | 
			
		||||
  for (auto& v : vars) {
 | 
			
		||||
    if (v.defined() && v.dim() == 0) {
 | 
			
		||||
      v.data().as_strided_({1}, {1});
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, int64_t dim, bool keepdim) {
 | 
			
		||||
  if (!keepdim && self.dim() > 1) {
 | 
			
		||||
    grad = grad.unsqueeze(dim);
 | 
			
		||||
  }
 | 
			
		||||
  auto p = p_.toDouble();
 | 
			
		||||
  auto norm = self.norm(p, dim, true);
 | 
			
		||||
  Tensor grad_input;
 | 
			
		||||
  if (p == 2.0) {
 | 
			
		||||
    grad_input = self * (grad / norm);
 | 
			
		||||
Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_, const Tensor & norm) {
 | 
			
		||||
  double p = p_.toDouble();
 | 
			
		||||
  Tensor self_scaled;
 | 
			
		||||
  Tensor scale_v;
 | 
			
		||||
  if (p == 0.0) {
 | 
			
		||||
    return zeros_like(self);
 | 
			
		||||
  } else if (p == 1.0) {
 | 
			
		||||
    return self.sign() * grad;
 | 
			
		||||
  } else if (p < 2.0) {
 | 
			
		||||
    self_scaled = self.sign() * self.abs().pow(p - 1);
 | 
			
		||||
    scale_v = grad / norm.pow(p - 1);
 | 
			
		||||
  } else if (p == 2.0) {
 | 
			
		||||
    self_scaled = self;
 | 
			
		||||
    scale_v = grad / norm;
 | 
			
		||||
  } else {
 | 
			
		||||
    auto pow_ = self.abs().pow(p - 2);
 | 
			
		||||
    auto scale_v = grad / norm.pow(p - 1);
 | 
			
		||||
    grad_input = self * pow_ * scale_v;
 | 
			
		||||
    self_scaled = self * self.abs().pow(p - 2);
 | 
			
		||||
    scale_v = grad / norm.pow(p - 1);
 | 
			
		||||
  }
 | 
			
		||||
  // handle case at 0 where we return a subgradient containing 0
 | 
			
		||||
  grad_input.masked_fill_(norm == 0, 0);
 | 
			
		||||
  return grad_input;
 | 
			
		||||
  scale_v.masked_fill_(norm == 0, 0);
 | 
			
		||||
  return self_scaled * scale_v;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, Tensor norm, int64_t dim, bool keepdim) {
 | 
			
		||||
  if (!keepdim && self.dim() > 1) {
 | 
			
		||||
    grad = grad.unsqueeze(dim);
 | 
			
		||||
    norm = norm.unsqueeze(dim);
 | 
			
		||||
  }
 | 
			
		||||
  return norm_backward(grad, self, p_, norm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor reduce_to(const Tensor & grad, IntList sizes) {
 | 
			
		||||
@ -300,6 +303,16 @@ Tensor glu_double_backward_grad_output(const Tensor & grad, const Tensor & input
 | 
			
		||||
  return tmp.narrow(dim, 0, sizes[dim]) + tmp.narrow(dim, sizes[dim], sizes[dim]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor kl_div_double_backward_grad_output(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
 | 
			
		||||
  auto result = kl_div_backward(grad, input, target, size_average, false);
 | 
			
		||||
  if (reduce && size_average) {
 | 
			
		||||
    return result.mean().toTensor();
 | 
			
		||||
  } else if (reduce) {
 | 
			
		||||
    return result.sum().toTensor();
 | 
			
		||||
  }
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor log_sigmoid_double_backward(const Tensor & grad, const Tensor & input) {
 | 
			
		||||
  auto z = input.sigmoid();
 | 
			
		||||
  return grad * (z - 1) * z;
 | 
			
		||||
 | 
			
		||||
@ -25,7 +25,7 @@ RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-la
 | 
			
		||||
     /opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \
 | 
			
		||||
     /opt/conda/bin/conda clean -ya 
 | 
			
		||||
ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH
 | 
			
		||||
#RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda80
 | 
			
		||||
RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda90
 | 
			
		||||
# This must be done before pip so that requirements.txt is available
 | 
			
		||||
WORKDIR /opt/pytorch
 | 
			
		||||
COPY . .
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										31
									
								
								tools/pytorch.version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								tools/pytorch.version
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,31 @@
 | 
			
		||||
{
 | 
			
		||||
     global:
 | 
			
		||||
         _TH*;
 | 
			
		||||
         __TH*;
 | 
			
		||||
         TH*;
 | 
			
		||||
         *THP*;
 | 
			
		||||
         *THCP*;
 | 
			
		||||
         PyInit*;
 | 
			
		||||
         init*;
 | 
			
		||||
         state;
 | 
			
		||||
	 _ZGVZN2at*;
 | 
			
		||||
         _ZN2at*;
 | 
			
		||||
	 _ZNK2at*Type*;
 | 
			
		||||
	 _ZNK2at*Tensor*;
 | 
			
		||||
	 _ZNK2at*Storage*;
 | 
			
		||||
	 _ZNK2at*Scalar*;
 | 
			
		||||
	 _ZNK2at*CUDA*;
 | 
			
		||||
	 *2at7Context*;
 | 
			
		||||
	 _ZTIN2at*;
 | 
			
		||||
	 _ZTIZN2at*;
 | 
			
		||||
	 _ZTSN2at*;
 | 
			
		||||
	 _ZTSPN2at*;
 | 
			
		||||
	 _ZTSZN2at*;
 | 
			
		||||
	 _ZTVN2at*;
 | 
			
		||||
	 _ZZN2at*;
 | 
			
		||||
	 _Z*torch*;
 | 
			
		||||
	 _Z*Tensor*;
 | 
			
		||||
	 _Z*tensor*;
 | 
			
		||||
     local:
 | 
			
		||||
         *;
 | 
			
		||||
 };
 | 
			
		||||
@ -108,6 +108,11 @@ def set_default_tensor_type(t):
 | 
			
		||||
    global Storage
 | 
			
		||||
    Tensor = _import_dotted_name(t)
 | 
			
		||||
    Storage = _import_dotted_name(t.replace('Tensor', 'Storage'))
 | 
			
		||||
 | 
			
		||||
    if 'cuda' in t:
 | 
			
		||||
        import torch.cuda
 | 
			
		||||
        torch.cuda.init()
 | 
			
		||||
 | 
			
		||||
    _C._set_default_tensor_type(Tensor)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1653
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
							
						
						
									
										1653
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -12,7 +12,7 @@ def _type(self, new_type=None, async=False):
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        new_type (type or string): The desired type
 | 
			
		||||
        async (bool): If True, and the source is in pinned memory and
 | 
			
		||||
        async (bool): If ``True``, and the source is in pinned memory and
 | 
			
		||||
                      destination is on the GPU or vice versa, the copy is
 | 
			
		||||
                      performed asynchronously with respect to the host.
 | 
			
		||||
                      Otherwise, the argument has no effect.
 | 
			
		||||
@ -46,7 +46,7 @@ def _cuda(self, device=None, async=False):
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        device (int): The destination GPU id. Defaults to the current device.
 | 
			
		||||
        async (bool): If True and the source is in pinned memory, the copy will
 | 
			
		||||
        async (bool): If ``True`` and the source is in pinned memory, the copy will
 | 
			
		||||
                      be asynchronous with respect to the host. Otherwise, the
 | 
			
		||||
                      argument has no effect.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
@ -63,16 +63,16 @@ def backward(variables, grad_variables=None, retain_graph=None, create_graph=Non
 | 
			
		||||
        grad_variables (sequence of (Tensor, Variable or None)): Gradients w.r.t.
 | 
			
		||||
            each element of corresponding variables.  Any tensors will be
 | 
			
		||||
            automatically converted to Variables that are volatile unless
 | 
			
		||||
            ``create_graph`` is True.  None values can be specified for scalar
 | 
			
		||||
            ``create_graph`` is ``True``.  None values can be specified for scalar
 | 
			
		||||
            Variables or ones that don't require grad. If a None value would
 | 
			
		||||
            be acceptable for all grad_variables, then this argument is optional.
 | 
			
		||||
        retain_graph (bool, optional): If False, the graph used to compute the grad
 | 
			
		||||
            will be freed. Note that in nearly all cases setting this option to True
 | 
			
		||||
        retain_graph (bool, optional): If ``False``, the graph used to compute the grad
 | 
			
		||||
            will be freed. Note that in nearly all cases setting this option to ``True``
 | 
			
		||||
            is not needed and often can be worked around in a much more efficient
 | 
			
		||||
            way. Defaults to the value of ``create_graph``.
 | 
			
		||||
        create_graph (bool, optional): If true, graph of the derivative will
 | 
			
		||||
        create_graph (bool, optional): If ``True``, graph of the derivative will
 | 
			
		||||
            be constructed, allowing to compute higher order derivative products.
 | 
			
		||||
            Defaults to False, unless ``grad_variables`` contains at least one
 | 
			
		||||
            Defaults to ``False``, unless ``grad_variables`` contains at least one
 | 
			
		||||
            non-volatile Variable.
 | 
			
		||||
    """
 | 
			
		||||
    variables = (variables,) if isinstance(variables, Variable) else tuple(variables)
 | 
			
		||||
@ -109,8 +109,8 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non
 | 
			
		||||
    Gradients can be given as Tensors when one doesn't need the graph of the
 | 
			
		||||
    derivative, or as Variables, in which case the graph will be created.
 | 
			
		||||
 | 
			
		||||
    If ``only_inputs`` is True, the function will only return a list of gradients
 | 
			
		||||
    w.r.t the specified inputs. If it's False, then gradient w.r.t. all remaining
 | 
			
		||||
    If ``only_inputs`` is ``True``, the function will only return a list of gradients
 | 
			
		||||
    w.r.t the specified inputs. If it's ``False``, then gradient w.r.t. all remaining
 | 
			
		||||
    leaves will still be computed, and will be accumulated into their ``.grad``
 | 
			
		||||
    attribute.
 | 
			
		||||
 | 
			
		||||
@ -120,24 +120,24 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non
 | 
			
		||||
            returned (and not accumulated into ``.grad``).
 | 
			
		||||
        grad_outputs (sequence of Tensor or Variable): Gradients w.r.t. each output.
 | 
			
		||||
            Any tensors will be automatically converted to Variables that are
 | 
			
		||||
            volatile unless ``create_graph`` is True.  None values can be
 | 
			
		||||
            volatile unless ``create_graph`` is ``True``. None values can be
 | 
			
		||||
            specified for scalar Variables or ones that don't require grad.
 | 
			
		||||
            If a None value would be acceptable for all grad_variables, then
 | 
			
		||||
            this argument is optional.
 | 
			
		||||
        retain_graph (bool, optional): If False, the graph used to compute the grad
 | 
			
		||||
            will be freed. Note that in nearly all cases setting this option to True
 | 
			
		||||
        retain_graph (bool, optional): If ``False``, the graph used to compute the grad
 | 
			
		||||
            will be freed. Note that in nearly all cases setting this option to ``True``
 | 
			
		||||
            is not needed and often can be worked around in a much more efficient
 | 
			
		||||
            way. Defaults to the value of ``create_graph``.
 | 
			
		||||
        create_graph (bool, optional): If True, graph of the derivative will
 | 
			
		||||
        create_graph (bool, optional): If ``True``, graph of the derivative will
 | 
			
		||||
            be constructed, allowing to compute higher order derivative products.
 | 
			
		||||
            Defaults to False, unless ``grad_variables`` contains at least one
 | 
			
		||||
            Defaults to ``False``, unless ``grad_variables`` contains at least one
 | 
			
		||||
            non-volatile Variable.
 | 
			
		||||
        only_inputs (bool, optional): If True, gradient w.r.t. leaves that are
 | 
			
		||||
        only_inputs (bool, optional): If ``True``, gradient w.r.t. leaves that are
 | 
			
		||||
            part of the graph, but don't appear in ``inputs`` won't be computed
 | 
			
		||||
            and accumulated. Defaults to True.
 | 
			
		||||
        allow_unused (bool, optional): If False, specifying inputs that were not
 | 
			
		||||
            and accumulated. Defaults to ``True``.
 | 
			
		||||
        allow_unused (bool, optional): If ``False``, specifying inputs that were not
 | 
			
		||||
            used when computing outputs (and therefore their grad is always zero)
 | 
			
		||||
            is an error. Default: False.
 | 
			
		||||
            is an error. Defaults to ``False``.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    outputs = (outputs,) if isinstance(outputs, Variable) else tuple(outputs)
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@ import torch
 | 
			
		||||
from ..function import Function
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Multinomial(Function):
 | 
			
		||||
class Categorical(Function):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def forward(ctx, probs, num_samples, with_replacement):
 | 
			
		||||
        samples = probs.multinomial(num_samples, with_replacement)
 | 
			
		||||
 | 
			
		||||
@ -57,15 +57,14 @@ def maybe_unexpand_or_view(variable, old_size):
 | 
			
		||||
#          The order is dim_n_begin, dim_n_end, dim_n-1_begin, dim_n-1_end, ...
 | 
			
		||||
def prepare_onnx_paddings(dim, pad):
 | 
			
		||||
    assert isinstance(dim, int)
 | 
			
		||||
    # The order of paddings is dim_0_begin, dim_0_end, dim_1_begin, ... , dim_n_end.
 | 
			
		||||
    # The desired order of paddings is
 | 
			
		||||
    # dim_0_begin, dim_1_begin, ... , dim_0_end, ..., dim_n_end.
 | 
			
		||||
    # n is the dimension of input.
 | 
			
		||||
    assert len(pad) <= dim * 2
 | 
			
		||||
    paddings = []
 | 
			
		||||
    # pad is guaranteed to have even elements.
 | 
			
		||||
    for i, j in zip(pad[0::2], pad[1::2]):
 | 
			
		||||
        paddings = [i, j] + paddings
 | 
			
		||||
    while len(paddings) < 2 * dim:
 | 
			
		||||
        paddings = [0, 0] + paddings
 | 
			
		||||
    # assume zero-dimensions in the beginning
 | 
			
		||||
    paddings = list(pad[:]) + [0] * (dim * 2 - len(pad))
 | 
			
		||||
    # reverse order and collate first beginnings and then ends
 | 
			
		||||
    paddings = paddings[-2::-2] + paddings[-1::-2]
 | 
			
		||||
    assert len(paddings) == dim * 2
 | 
			
		||||
    return paddings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -203,7 +203,7 @@ def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3):
 | 
			
		||||
def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3):
 | 
			
		||||
    """Check gradients of gradients computed via small finite differences
 | 
			
		||||
       against analytical gradients
 | 
			
		||||
    This function checks that backpropagating through the gradients computed
 | 
			
		||||
@ -216,17 +216,27 @@ def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3):
 | 
			
		||||
    is true for all elements of analytical gradient a and numerical gradient n.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        func: Python function that takes Variable inputs and returns
 | 
			
		||||
        func (function): Python function that takes Variable inputs and returns
 | 
			
		||||
            a tuple of Variables
 | 
			
		||||
        inputs: tuple of Variables
 | 
			
		||||
        grad_outputs: tuple of Variables
 | 
			
		||||
        eps: perturbation for finite differences
 | 
			
		||||
        atol: absolute tolerance
 | 
			
		||||
        rtol: relative tolerance
 | 
			
		||||
        inputs (tuple of Variable): inputs to the function
 | 
			
		||||
        grad_outputs (tuple of Variable, optional): The gradients with respect to
 | 
			
		||||
            the function's outputs.
 | 
			
		||||
        eps (float, optional): perturbation for finite differences
 | 
			
		||||
        atol (float, optional): absolute tolerance
 | 
			
		||||
        rtol (float, optional): relative tolerance
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        True if all differences satisfy allclose condition
 | 
			
		||||
        True if all differences satisfy allclose condition. Raises an exception
 | 
			
		||||
        otherwise.
 | 
			
		||||
    """
 | 
			
		||||
    if grad_outputs is None:
 | 
			
		||||
        # If grad_outputs is not specified, create random variables of the same
 | 
			
		||||
        # shape, type, and device as the outputs
 | 
			
		||||
        def randn_like(x):
 | 
			
		||||
            return Variable(x.data.new(x.size()).normal_(), requires_grad=True)
 | 
			
		||||
        outputs = _as_tuple(func(*inputs))
 | 
			
		||||
        grad_outputs = [randn_like(x) for x in outputs]
 | 
			
		||||
 | 
			
		||||
    def new_func(*input_args):
 | 
			
		||||
        input_args = input_args[:-len(grad_outputs)]
 | 
			
		||||
        outputs = _differentiable_outputs(func(*input_args))
 | 
			
		||||
 | 
			
		||||
@ -1,12 +1,18 @@
 | 
			
		||||
import torch
 | 
			
		||||
import subprocess
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import copy
 | 
			
		||||
import tempfile
 | 
			
		||||
import re
 | 
			
		||||
import itertools
 | 
			
		||||
from collections import defaultdict, namedtuple
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    FileNotFoundError
 | 
			
		||||
except NameError:
 | 
			
		||||
    # py2.7
 | 
			
		||||
    FileNotFoundError = IOError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EventList(list):
 | 
			
		||||
    """A list of Events (for pretty printing)"""
 | 
			
		||||
@ -17,6 +23,17 @@ class EventList(list):
 | 
			
		||||
        return self.table()
 | 
			
		||||
 | 
			
		||||
    def table(self, sort_by=None):
 | 
			
		||||
        """Prints an EventList as a nicely formatted table.
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            sort_by (str, optional): Attribute used to sort entries. By default
 | 
			
		||||
                they are printed in the same order as they were registered.
 | 
			
		||||
                Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``,
 | 
			
		||||
                ``cuda_time_total``, ``count``.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            A string containing the table.
 | 
			
		||||
        """
 | 
			
		||||
        return build_table(self, sort_by)
 | 
			
		||||
 | 
			
		||||
    def export_chrome_trace(self, path):
 | 
			
		||||
@ -72,7 +89,7 @@ class profile(object):
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        enabled (bool, optional): Setting this to False makes this context manager a no-op.
 | 
			
		||||
            Default: True.
 | 
			
		||||
            Default: ``True``.
 | 
			
		||||
 | 
			
		||||
    .. warning:
 | 
			
		||||
        This context managers should not be called recursively, i.e. at most one
 | 
			
		||||
@ -131,21 +148,27 @@ class profile(object):
 | 
			
		||||
            return '<unfinished torch.autograd.profile>'
 | 
			
		||||
        return str(self.function_events)
 | 
			
		||||
 | 
			
		||||
    def export_chrome_trace(self, path):
 | 
			
		||||
    def _check_finish(self):
 | 
			
		||||
        if self.function_events is None:
 | 
			
		||||
            raise RuntimeError("can't export a trace that didn't finish running")
 | 
			
		||||
 | 
			
		||||
    def table(self, sort_by=None):
 | 
			
		||||
        self._check_finish()
 | 
			
		||||
        return self.function_events.table(sort_by)
 | 
			
		||||
    table.__doc__ = EventList.table.__doc__
 | 
			
		||||
 | 
			
		||||
    def export_chrome_trace(self, path):
 | 
			
		||||
        self._check_finish()
 | 
			
		||||
        return self.function_events.export_chrome_trace(path)
 | 
			
		||||
    export_chrome_trace.__doc__ = EventList.export_chrome_trace.__doc__
 | 
			
		||||
 | 
			
		||||
    def key_averages(self):
 | 
			
		||||
        if self.function_events is None:
 | 
			
		||||
            raise RuntimeError("can't average a trace that didn't finish running")
 | 
			
		||||
        self._check_finish()
 | 
			
		||||
        return self.function_events.key_averages()
 | 
			
		||||
    key_averages.__doc__ = EventList.key_averages.__doc__
 | 
			
		||||
 | 
			
		||||
    def total_average(self):
 | 
			
		||||
        if self.function_events is None:
 | 
			
		||||
            raise RuntimeError("can't average a trace that didn't finish running")
 | 
			
		||||
        self._check_finish()
 | 
			
		||||
        return self.function_events.total_average()
 | 
			
		||||
    total_average.__doc__ = EventList.total_average.__doc__
 | 
			
		||||
 | 
			
		||||
@ -153,18 +176,24 @@ class profile(object):
 | 
			
		||||
class emit_nvtx(object):
 | 
			
		||||
    """Context manager that makes every autograd operation emit an NVTX range.
 | 
			
		||||
 | 
			
		||||
    It is useful when running the program under nvprof. Unfortunately, there's no
 | 
			
		||||
    way to force nvprof to flush the data it collected to disk, so for CUDA profiling
 | 
			
		||||
    one has to use this context manager to annotate nvprof traces, and then use
 | 
			
		||||
    :func:`torch.autograd.profiler.open_nvtx` to analyze the checkpoint.
 | 
			
		||||
    It is useful when running the program under nvprof::
 | 
			
		||||
 | 
			
		||||
        nvprof --profile-from-start off -o trace_name.prof -- <regular command here>
 | 
			
		||||
 | 
			
		||||
    Unfortunately, there's no way to force nvprof to flush the data it collected
 | 
			
		||||
    to disk, so for CUDA profiling one has to use this context manager to annotate
 | 
			
		||||
    nvprof traces and wait for the process to exit before inspecting them.
 | 
			
		||||
    Then, either NVIDIA Visual Profiler (nvvp) can be used to visualize the timeline, or
 | 
			
		||||
    :func:`torch.autograd.profiler.load_nvprof` can load the results for inspection
 | 
			
		||||
    e.g. in Python REPL.
 | 
			
		||||
 | 
			
		||||
    .. warning:
 | 
			
		||||
        This context managers should not be called recursively, i.e. at most one
 | 
			
		||||
        This context manager should not be called recursively, i.e. at most one
 | 
			
		||||
        instance should be enabled at any given time.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        enabled (bool, optional): Setting this to False makes this context manager a no-op.
 | 
			
		||||
            Default: True.
 | 
			
		||||
            Default: ``True``.
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
        >>> with torch.cuda.profiler.profile():
 | 
			
		||||
@ -173,7 +202,7 @@ class emit_nvtx(object):
 | 
			
		||||
        ...         model(x)
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, enabled=True):
 | 
			
		||||
        self.enabled = True
 | 
			
		||||
        self.enabled = enabled
 | 
			
		||||
        self.entered = False
 | 
			
		||||
 | 
			
		||||
    def __enter__(self):
 | 
			
		||||
@ -291,7 +320,7 @@ def demangle(name):
 | 
			
		||||
    try:
 | 
			
		||||
        with open(os.devnull, 'w') as devnull:
 | 
			
		||||
            return subprocess.check_output(['c++filt', '-n', name], stderr=devnull).rstrip().decode("ascii")
 | 
			
		||||
    except subprocess.CalledProcessError:
 | 
			
		||||
    except (subprocess.CalledProcessError, OSError, FileNotFoundError) as e:
 | 
			
		||||
        return name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -154,14 +154,14 @@ class Variable(_C._VariableBase):
 | 
			
		||||
                None values can be specified for scalar Variables or ones that
 | 
			
		||||
                don't require grad. If a None value would be acceptable then
 | 
			
		||||
                this argument is optional.
 | 
			
		||||
            retain_graph (bool, optional): If False, the graph used to compute
 | 
			
		||||
            retain_graph (bool, optional): If ``False``, the graph used to compute
 | 
			
		||||
                the grads will be freed. Note that in nearly all cases setting
 | 
			
		||||
                this option to True is not needed and often can be worked around
 | 
			
		||||
                in a much more efficient way. Defaults to the value of
 | 
			
		||||
                ``create_graph``.
 | 
			
		||||
            create_graph (bool, optional): If true, graph of the derivative will
 | 
			
		||||
            create_graph (bool, optional): If ``True``, graph of the derivative will
 | 
			
		||||
                be constructed, allowing to compute higher order derivative
 | 
			
		||||
                products. Defaults to False, unless ``gradient`` is a volatile
 | 
			
		||||
                products. Defaults to ``False``, unless ``gradient`` is a volatile
 | 
			
		||||
                Variable.
 | 
			
		||||
        """
 | 
			
		||||
        torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
 | 
			
		||||
@ -205,20 +205,31 @@ class Variable(_C._VariableBase):
 | 
			
		||||
        return handle
 | 
			
		||||
 | 
			
		||||
    def reinforce(self, reward):
 | 
			
		||||
        """Registers a reward obtained as a result of a stochastic process.
 | 
			
		||||
        def trim(str):
 | 
			
		||||
            return '\n'.join([line.strip() for line in str.split('\n')])
 | 
			
		||||
 | 
			
		||||
        Differentiating stochastic nodes requires providing them with reward
 | 
			
		||||
        value. If your graph contains any stochastic operations, you should
 | 
			
		||||
        call this function on their outputs. Otherwise an error will be raised.
 | 
			
		||||
        raise RuntimeError(trim(r"""reinforce() was removed.
 | 
			
		||||
            Use torch.distributions instead.
 | 
			
		||||
            See http://pytorch.org/docs/master/distributions.html
 | 
			
		||||
 | 
			
		||||
        Parameters:
 | 
			
		||||
            reward(Tensor): Tensor with per-element rewards. It has to match
 | 
			
		||||
                the device location and shape of Variable's data.
 | 
			
		||||
        """
 | 
			
		||||
        if not isinstance(self.grad_fn, StochasticFunction):
 | 
			
		||||
            raise RuntimeError("reinforce() can be only called on outputs "
 | 
			
		||||
                               "of stochastic functions")
 | 
			
		||||
        self.grad_fn._reinforce(reward)
 | 
			
		||||
            Instead of:
 | 
			
		||||
 | 
			
		||||
            probs = policy_network(state)
 | 
			
		||||
            action = probs.multinomial()
 | 
			
		||||
            next_state, reward = env.step(action)
 | 
			
		||||
            action.reinforce(reward)
 | 
			
		||||
            action.backward()
 | 
			
		||||
 | 
			
		||||
            Use:
 | 
			
		||||
 | 
			
		||||
            probs = policy_network(state)
 | 
			
		||||
            # NOTE: categorical is equivalent to what used to be called multinomial
 | 
			
		||||
            m = torch.distributions.Categorical(probs)
 | 
			
		||||
            action = m.sample()
 | 
			
		||||
            next_state, reward = env.step(action)
 | 
			
		||||
            loss = -m.log_prob(action) * reward
 | 
			
		||||
            loss.backward()
 | 
			
		||||
        """))
 | 
			
		||||
 | 
			
		||||
    def detach(self):
 | 
			
		||||
        """Returns a new Variable, detached from the current graph.
 | 
			
		||||
@ -422,7 +433,7 @@ class Variable(_C._VariableBase):
 | 
			
		||||
        return self.expand(tensor.size())
 | 
			
		||||
 | 
			
		||||
    def multinomial(self, num_samples=1, replacement=False):
 | 
			
		||||
        return Multinomial.apply(self, num_samples, replacement)
 | 
			
		||||
        return Categorical.apply(self, num_samples, replacement)
 | 
			
		||||
 | 
			
		||||
    def bernoulli(self):
 | 
			
		||||
        return Bernoulli.apply(self)
 | 
			
		||||
 | 
			
		||||
@ -257,10 +257,11 @@ class RNNDescriptor(object):
 | 
			
		||||
                CUDNN_RNN_ALGO_STANDARD,
 | 
			
		||||
                datatype
 | 
			
		||||
            ))
 | 
			
		||||
        if version() >= 7000 and int(cuda[0]) >= 9:
 | 
			
		||||
            lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH)
 | 
			
		||||
            if datatype == CUDNN_DATA_HALF:
 | 
			
		||||
                lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH)
 | 
			
		||||
            if version() >= 7000 and int(cuda[0]) >= 9 and (
 | 
			
		||||
                    torch.cuda.get_device_capability(torch.cuda.current_device())[0] >= 7):
 | 
			
		||||
                lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH)
 | 
			
		||||
                if datatype == CUDNN_DATA_HALF:
 | 
			
		||||
                    lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH)
 | 
			
		||||
        else:
 | 
			
		||||
            check_error(lib.cudnnSetRNNDescriptor(
 | 
			
		||||
                self,
 | 
			
		||||
 | 
			
		||||
@ -203,13 +203,6 @@ def forward(fn, input, hx, weight, output, hy):
 | 
			
		||||
        if fn.batch_first and not is_input_packed:
 | 
			
		||||
            input = input.transpose(0, 1)
 | 
			
		||||
 | 
			
		||||
        if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
 | 
			
		||||
            raise RuntimeError(
 | 
			
		||||
                'input must have 3 dimensions, got {}'.format(input.dim()))
 | 
			
		||||
        if fn.input_size != input.size(-1):
 | 
			
		||||
            raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
 | 
			
		||||
                fn.input_size, input.size(-1)
 | 
			
		||||
            ))
 | 
			
		||||
        if fn.dropout != 0 and cudnn.version() < 5103:
 | 
			
		||||
            raise RuntimeError('dropout supported only in cudnn v5.1 and above')
 | 
			
		||||
 | 
			
		||||
@ -261,9 +254,6 @@ def forward(fn, input, hx, weight, output, hy):
 | 
			
		||||
            fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
 | 
			
		||||
            w = fn.weight_buf
 | 
			
		||||
 | 
			
		||||
        if tuple(hx.size()) != hidden_size:
 | 
			
		||||
            raise RuntimeError('Expected hidden size {}, got {}'.format(
 | 
			
		||||
                hidden_size, tuple(hx.size())))
 | 
			
		||||
        if cx is not None and tuple(cx.size()) != hidden_size:
 | 
			
		||||
            raise RuntimeError('Expected cell size {}, got {}'.format(
 | 
			
		||||
                hidden_size, tuple(cx.size())))
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										203
									
								
								torch/csrc/DataLoader.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								torch/csrc/DataLoader.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,203 @@
 | 
			
		||||
#include <sys/wait.h>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <set>
 | 
			
		||||
#include <atomic>
 | 
			
		||||
#include <signal.h>
 | 
			
		||||
#include "THP.h"
 | 
			
		||||
 | 
			
		||||
// In cases like DataLoader, if a worker process die due to bus error/segfault
 | 
			
		||||
// or just hang, the main process, if implemented with
 | 
			
		||||
// multiprocessing.queue.SimpleQueue, will hang waiting for data. This is
 | 
			
		||||
// difficult to avoid on PyTorch side as it can be caused by limited shm, or
 | 
			
		||||
// other libraries users call in the workers. The following methods is an effort
 | 
			
		||||
// to do our best provide some error message to users when such unfortunate
 | 
			
		||||
// events happen.
 | 
			
		||||
 | 
			
		||||
// TODO: The following don't work on Windows. Specifically, sigaction, waitid
 | 
			
		||||
// calls ,and SIGCHLD handler. Currently, dummy implementations are provided
 | 
			
		||||
// for Windows.
 | 
			
		||||
 | 
			
		||||
#ifndef _WIN32
 | 
			
		||||
 | 
			
		||||
// Critical signal handlers should be registered on worker processes before
 | 
			
		||||
// doing work.
 | 
			
		||||
// The handler will raise default handler so that the kill information will be
 | 
			
		||||
// retrieved from main process.
 | 
			
		||||
// Python handle is _set_worker_signal_handlers().
 | 
			
		||||
#define SIGNAL_HANDLER(SIGNAL, HANDLER_NAME, ERROR_MSG)                       \
 | 
			
		||||
static void HANDLER_NAME(int sig, siginfo_t *info, void *ctx)                 \
 | 
			
		||||
{                                                                             \
 | 
			
		||||
  write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char));          \
 | 
			
		||||
  struct sigaction sa;                                                        \
 | 
			
		||||
  sa.sa_handler = SIG_DFL;                                                    \
 | 
			
		||||
  sa.sa_flags = 0;                                                            \
 | 
			
		||||
  if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGNAL, &sa, NULL) != 0) {   \
 | 
			
		||||
    _exit(EXIT_FAILURE);                                                      \
 | 
			
		||||
  } else {                                                                    \
 | 
			
		||||
    raise(SIGNAL);                                                            \
 | 
			
		||||
  }                                                                           \
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// signal(2) is really not portable. So use sigaction.
 | 
			
		||||
// http://man7.org/linux/man-pages/man2/signal.2.html
 | 
			
		||||
static inline void setSignalHandler(int signal, void(*handler)(int, siginfo_t *, void *), struct sigaction *old_sa_ptr)
 | 
			
		||||
{
 | 
			
		||||
  struct sigaction sa;
 | 
			
		||||
  sa.sa_sigaction = handler;
 | 
			
		||||
  sa.sa_flags = SA_RESTART|SA_SIGINFO|SA_NOCLDSTOP|SA_NODEFER;
 | 
			
		||||
  if (sigemptyset(&sa.sa_mask) != 0 || sigaction(signal, &sa, old_sa_ptr) != 0) {
 | 
			
		||||
    std::ostringstream oss;
 | 
			
		||||
    oss << "An error occurred while setting handler for " << strsignal(signal) << ".";
 | 
			
		||||
    throw std::runtime_error(oss.str());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SIGNAL_HANDLER(SIGBUS, handler_SIGBUS, "ERROR: Unexpected bus error encountered in worker. "
 | 
			
		||||
  "This might be caused by insufficient shared memory (shm).\n");
 | 
			
		||||
SIGNAL_HANDLER(SIGSEGV, handler_SIGSEGV, "ERROR: Unexpected segmentation fault encountered in worker.\n");
 | 
			
		||||
 | 
			
		||||
// When an error happend in DataLoader methods and Python starts to exit, the
 | 
			
		||||
// error trace will keep the loader alive, and Python may kill the children
 | 
			
		||||
// processes first before deleting the loader object. Then the cleaning up
 | 
			
		||||
// methods in DataLoader.__del__ are not yet called, and SIGCHILD will print an
 | 
			
		||||
// error saying a worker is killed by SIGTERM. So we suppress SIGTERM from main
 | 
			
		||||
// loader process here to avoid this by _exit(EXIT_SUCCESS). Note that if we
 | 
			
		||||
// exit with nonzero code, the loader SIGCHLD handler may report RuntimeError
 | 
			
		||||
// again, and then it defeats the whole purpose.
 | 
			
		||||
static void handler_SIGTERM(int sig, siginfo_t *info, void *ctx)
 | 
			
		||||
{
 | 
			
		||||
  if (info->si_pid == getppid()) {
 | 
			
		||||
    _exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
  struct sigaction sa;
 | 
			
		||||
  sa.sa_handler = SIG_DFL;
 | 
			
		||||
  sa.sa_flags = 0;
 | 
			
		||||
  if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, NULL) != 0) {
 | 
			
		||||
    _exit(EXIT_FAILURE);
 | 
			
		||||
  } else {
 | 
			
		||||
    raise(SIGTERM);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_setWorkerSignalHandlers(PyObject *module, PyObject *arg) {
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
  setSignalHandler(SIGBUS, &handler_SIGBUS, NULL);
 | 
			
		||||
  setSignalHandler(SIGSEGV, &handler_SIGSEGV, NULL);
 | 
			
		||||
  setSignalHandler(SIGTERM, &handler_SIGTERM, NULL);
 | 
			
		||||
  Py_RETURN_TRUE;
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static std::map<int64_t, std::set<pid_t>> worker_pids = {};
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_errorIfAnyWorkerFails(PyObject *module) {
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
  int error;
 | 
			
		||||
  std::set<pid_t> *pid_set;
 | 
			
		||||
  pid_t worker_pid;
 | 
			
		||||
  siginfo_t infop;
 | 
			
		||||
 | 
			
		||||
  // Only check the pids we care about
 | 
			
		||||
  for (auto it = worker_pids.begin(); it != worker_pids.end(); ++it) {
 | 
			
		||||
    pid_set = &(it->second);
 | 
			
		||||
    for (auto pid_it = pid_set->begin(); pid_it != pid_set->end(); ++pid_it) {
 | 
			
		||||
      worker_pid = *pid_it;
 | 
			
		||||
      // Use waitid rather than waitpid so that we can set NOWAIT, and that Python
 | 
			
		||||
      // and other handlers can get whatever info they want about the child.
 | 
			
		||||
      infop.si_pid = 0;
 | 
			
		||||
      error = waitid(P_PID, worker_pid, &infop, WEXITED|WNOHANG|WNOWAIT);
 | 
			
		||||
      // ignore errors and case with no waitable child
 | 
			
		||||
      if (error < 0 || infop.si_pid == 0)
 | 
			
		||||
        continue;
 | 
			
		||||
      if (infop.si_code == CLD_EXITED && infop.si_status != EXIT_SUCCESS) {  // exit with error
 | 
			
		||||
        std::ostringstream oss;
 | 
			
		||||
        oss << "DataLoader worker (pid " << worker_pid << ") exited "
 | 
			
		||||
            << "unexpectedly with exit code " << infop.si_status << ".";
 | 
			
		||||
        // This is necessary. Otherwise, the runtime error will kill the other
 | 
			
		||||
        // workers, and trigger this again.
 | 
			
		||||
        pid_set->clear();
 | 
			
		||||
        throw std::runtime_error(oss.str());
 | 
			
		||||
      }  else if (infop.si_code == CLD_KILLED || infop.si_code == CLD_DUMPED) {  // killed by signal
 | 
			
		||||
        std::ostringstream oss;
 | 
			
		||||
        oss << "DataLoader worker (pid " << worker_pid << ") is killed "
 | 
			
		||||
            << "by signal: " << strsignal(infop.si_status) << ".";
 | 
			
		||||
        // This is necessary. Otherwise, the runtime error will kill the other
 | 
			
		||||
        // workers, and trigger this again.
 | 
			
		||||
        pid_set->clear();
 | 
			
		||||
        throw std::runtime_error(oss.str());
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  Py_RETURN_NONE;
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// We don't want to exit on any SIGCHLD from any child. child_pids is a tuple
 | 
			
		||||
// of pids we are interested in.
 | 
			
		||||
PyObject *THPModule_updateWorkerPIDs(PyObject *module, PyObject *args) {
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
  Py_ssize_t num_args = args ? (Py_ssize_t) PyTuple_Size(args) : 0;
 | 
			
		||||
  THPUtils_assert(num_args == 2, "_update_worker_pids expectes exactly 2 arguments.");
 | 
			
		||||
  int64_t key = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 0));
 | 
			
		||||
  THPUtils_assert(worker_pids.find(key) == worker_pids.end(), "_update_worker_pids "
 | 
			
		||||
        "should be called only once for each DataLoader.");
 | 
			
		||||
  PyObject *child_pids = PyTuple_GET_ITEM(args, 1);
 | 
			
		||||
  THPUtils_assert(PyTuple_Check(child_pids), "_update_worker_pids "
 | 
			
		||||
        "expects a tuple for child_pids, but got %s.", THPUtils_typename(child_pids));
 | 
			
		||||
 | 
			
		||||
  std::set<pid_t> pids_set = {};
 | 
			
		||||
  auto size = PyTuple_GET_SIZE(child_pids);
 | 
			
		||||
  for (int idx = 0; idx < size; idx++) {
 | 
			
		||||
    PyObject* obj = PyTuple_GET_ITEM(child_pids, idx);
 | 
			
		||||
    pids_set.insert((pid_t) THPUtils_unpackLong(obj));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  worker_pids[key] = pids_set;
 | 
			
		||||
 | 
			
		||||
  Py_RETURN_NONE;
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_removeWorkerPIDs(PyObject *module, PyObject *loader_id) {
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
 | 
			
		||||
  int64_t key = THPUtils_unpackLong(loader_id);
 | 
			
		||||
  THPUtils_assert(worker_pids.find(key) != worker_pids.end(), "Cannot find worker "
 | 
			
		||||
        "information for DataLoader with id %ld.", key);
 | 
			
		||||
 | 
			
		||||
  worker_pids.erase(key);
 | 
			
		||||
 | 
			
		||||
  Py_RETURN_NONE;
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef SIGNAL_HANDLER
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
// dummy implementations for windows
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_setWorkerSignalHandlers(PyObject *module, PyObject *_ignored) {
 | 
			
		||||
    Py_RETURN_TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_updateWorkerPIDs(PyObject *module, PyObject *_ignored) {
 | 
			
		||||
    Py_RETURN_TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_removeWorkerPIDs(PyObject *module, PyObject *_ignored) {
 | 
			
		||||
    Py_RETURN_NONE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject *THPModule_exitIfAnyWorkerFails(PyObject *module, PyObject *_ignored) {
 | 
			
		||||
    Py_RETURN_NONE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
PyMethodDef DataLoaderMethods[] = {
 | 
			
		||||
  {"_set_worker_signal_handlers",  (PyCFunction)THPModule_setWorkerSignalHandlers,  METH_NOARGS,   NULL},
 | 
			
		||||
  {"_update_worker_pids",          (PyCFunction)THPModule_updateWorkerPIDs,         METH_VARARGS,  NULL},
 | 
			
		||||
  {"_remove_worker_pids",          (PyCFunction)THPModule_removeWorkerPIDs,         METH_O,        NULL},
 | 
			
		||||
  {"_error_if_any_worker_fails",   (PyCFunction)THPModule_errorIfAnyWorkerFails,    METH_NOARGS,   NULL},
 | 
			
		||||
  {NULL, NULL, 0, NULL}
 | 
			
		||||
};
 | 
			
		||||
@ -1,5 +1,8 @@
 | 
			
		||||
#include <Python.h>
 | 
			
		||||
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "THP.h"
 | 
			
		||||
 | 
			
		||||
PyObject *THPException_FatalError;
 | 
			
		||||
@ -11,3 +14,61 @@ bool THPException_init(PyObject *module)
 | 
			
		||||
  ASSERT_TRUE(PyModule_AddObject(module, "FatalError", THPException_FatalError) == 0);
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace torch {
 | 
			
		||||
 | 
			
		||||
void replaceAll(std::string & str,
 | 
			
		||||
    const std::string & old_str,
 | 
			
		||||
    const std::string & new_str) {
 | 
			
		||||
  std::string::size_type pos = 0u;
 | 
			
		||||
  while ((pos = str.find(old_str, pos)) != std::string::npos){
 | 
			
		||||
     str.replace(pos, old_str.length(), new_str);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string processErrorMsg(std::string str) {
 | 
			
		||||
 | 
			
		||||
  // Translate Aten types to their respective pytorch ones
 | 
			
		||||
  std::vector<std::pair<std::string, std::string>> changes {
 | 
			
		||||
    {"SparseCUDAByteType", "torch.cuda.sparse.ByteTensor"},
 | 
			
		||||
    {"SparseCUDACharType", "torch.cuda.sparse.CharTensor"},
 | 
			
		||||
    {"SparseCUDADoubleType", "torch.cuda.sparse.DoubleTensor"},
 | 
			
		||||
    {"SparseCUDAFloatType", "torch.cuda.sparse.FloatTensor"},
 | 
			
		||||
    {"SparseCUDAIntType", "torch.cuda.sparse.IntTensor"},
 | 
			
		||||
    {"SparseCUDALongType", "torch.cuda.sparse.LongTensor"},
 | 
			
		||||
    {"SparseCUDAShortType", "torch.cuda.sparse.ShortTensor"},
 | 
			
		||||
    {"SparseCUDAHalfType", "torch.cuda.sparse.HalfTensor"},
 | 
			
		||||
    {"SparseCPUByteType", "torch.sparse.ByteTensor"},
 | 
			
		||||
    {"SparseCPUCharType", "torch.sparse.CharTensor"},
 | 
			
		||||
    {"SparseCPUDoubleType", "torch.sparse.DoubleTensor"},
 | 
			
		||||
    {"SparseCPUFloatType", "torch.sparse.FloatTensor"},
 | 
			
		||||
    {"SparseCPUIntType", "torch.sparse.IntTensor"},
 | 
			
		||||
    {"SparseCPULongType", "torch.sparse.LongTensor"},
 | 
			
		||||
    {"SparseCPUShortType", "torch.sparse.ShortTensor"},
 | 
			
		||||
    {"SparseCPUHalfType", "torch.sparse.HalfTensor"},
 | 
			
		||||
    {"CUDAByteType", "torch.cuda.ByteTensor"},
 | 
			
		||||
    {"CUDACharType", "torch.cuda.CharTensor"},
 | 
			
		||||
    {"CUDADoubleType", "torch.cuda.DoubleTensor"},
 | 
			
		||||
    {"CUDAFloatType", "torch.cuda.FloatTensor"},
 | 
			
		||||
    {"CUDAIntType", "torch.cuda.IntTensor"},
 | 
			
		||||
    {"CUDALongType", "torch.cuda.LongTensor"},
 | 
			
		||||
    {"CUDAShortType", "torch.cuda.ShortTensor"},
 | 
			
		||||
    {"CUDAHalfType", "torch.cuda.HalfTensor"},
 | 
			
		||||
    {"CPUByteType", "torch.ByteTensor"},
 | 
			
		||||
    {"CPUCharType", "torch.CharTensor"},
 | 
			
		||||
    {"CPUDoubleType", "torch.DoubleTensor"},
 | 
			
		||||
    {"CPUFloatType", "torch.FloatTensor"},
 | 
			
		||||
    {"CPUIntType", "torch.IntTensor"},
 | 
			
		||||
    {"CPULongType", "torch.LongTensor"},
 | 
			
		||||
    {"CPUShortType", "torch.ShortTensor"},
 | 
			
		||||
    {"CPUHalfType", "torch.HalfTensor"},
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  for (const auto & it : changes) {
 | 
			
		||||
    replaceAll(str, it.first, it.second);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return str;
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,8 @@
 | 
			
		||||
  } catch (python_error &e) {                                                  \
 | 
			
		||||
    return retval;                                                             \
 | 
			
		||||
  } catch (std::exception &e) {                                                \
 | 
			
		||||
    PyErr_SetString(PyExc_RuntimeError, e.what());                             \
 | 
			
		||||
    auto msg = torch::processErrorMsg(e.what());                               \
 | 
			
		||||
    PyErr_SetString(PyExc_RuntimeError, msg.c_str());                          \
 | 
			
		||||
    return retval;                                                             \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -68,4 +69,8 @@ struct python_error : public std::exception {
 | 
			
		||||
bool THPException_init(PyObject *module);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace torch {
 | 
			
		||||
std::string processErrorMsg(std::string str);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -25,6 +25,7 @@
 | 
			
		||||
#include "THP.h"
 | 
			
		||||
 | 
			
		||||
#include "ModuleSparse.cpp"
 | 
			
		||||
#include "DataLoader.cpp"
 | 
			
		||||
 | 
			
		||||
PyObject* module;
 | 
			
		||||
PyObject* tensor_classes;
 | 
			
		||||
@ -792,6 +793,7 @@ static PyObject* initModule() {
 | 
			
		||||
#define ASSERT_TRUE(cmd) if (!(cmd)) return NULL
 | 
			
		||||
 | 
			
		||||
  THPUtils_addPyMethodDefs(methods, TorchMethods);
 | 
			
		||||
  THPUtils_addPyMethodDefs(methods, DataLoaderMethods);
 | 
			
		||||
#ifdef WITH_CUDA
 | 
			
		||||
  THPUtils_addPyMethodDefs(methods, THCPModule_methods());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,5 @@
 | 
			
		||||
#define __STDC_FORMAT_MACROS
 | 
			
		||||
 | 
			
		||||
#include <Python.h>
 | 
			
		||||
#include <structmember.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,5 @@
 | 
			
		||||
#define __STDC_FORMAT_MACROS
 | 
			
		||||
 | 
			
		||||
#include <Python.h>
 | 
			
		||||
#include <structmember.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -55,7 +55,8 @@ auto BatchNormForward::apply(const variable_list& inputs) -> variable_list {
 | 
			
		||||
  bool use_cudnn = false;
 | 
			
		||||
#ifdef WITH_CUDNN
 | 
			
		||||
  use_cudnn = (input.type().isCuda()
 | 
			
		||||
               && input.type().scalarType() != at::kHalf
 | 
			
		||||
               && (input.type().scalarType() != at::kHalf
 | 
			
		||||
               || weight.type().scalarType() == at::kFloat)
 | 
			
		||||
               && weight.defined() && bias.defined()
 | 
			
		||||
               && input.size(0) <= 131070
 | 
			
		||||
               && cudnn_enabled && CUDNN_VERSION >= 5110L);
 | 
			
		||||
@ -115,7 +116,8 @@ auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_lis
 | 
			
		||||
  bool use_cudnn = false;
 | 
			
		||||
#ifdef WITH_CUDNN
 | 
			
		||||
  use_cudnn = (input.type().backend() == at::kCUDA
 | 
			
		||||
               && input.type().scalarType() != at::kHalf
 | 
			
		||||
               && (input.type().scalarType() != at::kHalf
 | 
			
		||||
               || weight.type().scalarType() == at::kFloat)
 | 
			
		||||
               && weight.defined() && bias.defined() && training
 | 
			
		||||
               && input.size(0) <= 131070
 | 
			
		||||
               && cudnn_enabled && CUDNN_VERSION >= 5110L);
 | 
			
		||||
@ -164,7 +166,7 @@ auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_lis
 | 
			
		||||
  // Add saved variables used out of the pure autograd to inputs
 | 
			
		||||
  variable_list all_inputs(grad_outputs);
 | 
			
		||||
  all_inputs.push_back(input_var);
 | 
			
		||||
  if (weight.get()) {
 | 
			
		||||
  if (weight.defined()) {
 | 
			
		||||
    all_inputs.push_back(weight_var);
 | 
			
		||||
  }
 | 
			
		||||
  auto outputs =  as_tensor_list(std::move(grad_input),
 | 
			
		||||
 | 
			
		||||
@ -365,7 +365,7 @@ auto ConvForward::apply(const variable_list& inputs) -> variable_list {
 | 
			
		||||
// For Convolution strategies that don't implicitly handle grad_bias, we add a helper
 | 
			
		||||
// function here to perform it using simple Tensor operators
 | 
			
		||||
static at::Tensor compute_grad_bias(const at::Tensor& grad_output) {
 | 
			
		||||
  // grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches 
 | 
			
		||||
  // grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches
 | 
			
		||||
  return grad_output.contiguous().view({grad_output.size(0), grad_output.size(1), -1}).sum(0).sum(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -727,7 +727,18 @@ auto ConvBackwardBackward::apply(const variable_list& grad_grad_inputs) -> varia
 | 
			
		||||
      gI = apply_fn<Transpose>(0, 1)(gIt);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return {ggO, gI, gW};
 | 
			
		||||
 | 
			
		||||
  if (should_compute_output(0) && !ggO.defined()) ggO = at::zeros_like(gO);
 | 
			
		||||
  if (should_compute_output(1) && !gI.defined()) gI = at::zeros_like(input);
 | 
			
		||||
  if (should_compute_output(2) && !gW.defined()) gW = at::zeros_like(weight);
 | 
			
		||||
  bool is_volatile = std::any_of(grad_grad_inputs.begin(), grad_grad_inputs.end(), [](const Variable& v){
 | 
			
		||||
    return v.defined() && v.is_volatile();
 | 
			
		||||
  });
 | 
			
		||||
  auto results = variable_list({ggO, gI, gW});
 | 
			
		||||
  for (auto& result : results) {
 | 
			
		||||
    result.is_volatile() |= is_volatile;
 | 
			
		||||
  }
 | 
			
		||||
  return results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
auto ConvBackwardBackward::releaseVariables() -> void {
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,7 @@ namespace autograd {
 | 
			
		||||
jit::node_list BatchNormForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) {
 | 
			
		||||
  auto & g = ctx->graph;
 | 
			
		||||
  // X, Scale, Bias
 | 
			
		||||
  auto bn = g->appendNode(g->create(jit::kSpatialBN,{inputs.at(0),inputs.at(1),inputs.at(2)}));
 | 
			
		||||
  auto bn = g->appendNode(g->create(jit::kBatchNormalization, {inputs.at(0),inputs.at(1),inputs.at(2)}));
 | 
			
		||||
  bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_mean));
 | 
			
		||||
  bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_var));
 | 
			
		||||
  bn->i_(jit::kis_test, !this->training);
 | 
			
		||||
 | 
			
		||||
@ -18,7 +18,7 @@ namespace torch { namespace autograd {
 | 
			
		||||
jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) {
 | 
			
		||||
  auto & g = ctx->graph;
 | 
			
		||||
  // See Note [Caffe2ConvTranspose]
 | 
			
		||||
  auto n = g->create(!transposed ? jit::kConv : jit::kCaffe2ConvTranspose,
 | 
			
		||||
  auto n = g->create(!transposed ? jit::kConv : jit::kConvTranspose,
 | 
			
		||||
                                   {inputs.at(0), inputs.at(1)});
 | 
			
		||||
 | 
			
		||||
  // Irritatingly, Caffe2 requires us to specify kernels,
 | 
			
		||||
@ -55,6 +55,8 @@ jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs
 | 
			
		||||
  n->i_(jit::kgroup,groups);
 | 
			
		||||
 | 
			
		||||
  // Not in ONNX?
 | 
			
		||||
  // TODO: implement it once ConvTranspose in ONNX gets `adj` argument instead
 | 
			
		||||
  // of providing `output_shape`
 | 
			
		||||
  for (int p : output_padding) {
 | 
			
		||||
    JIT_EXPECTM(p == 0, "output padding is not supported.");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
#include "torch/csrc/autograd/input_buffer.h"
 | 
			
		||||
 | 
			
		||||
#include "torch/csrc/assertions.h"
 | 
			
		||||
#include "torch/csrc/autograd/functions/basic_ops.h"
 | 
			
		||||
#include "torch/csrc/utils/auto_gpu.h"
 | 
			
		||||
 | 
			
		||||
@ -10,6 +11,7 @@ InputBuffer::InputBuffer(size_t size)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
void InputBuffer::add(size_t pos, Variable var) {
 | 
			
		||||
  TORCH_ASSERT(pos >= 0 && pos < buffer.size());
 | 
			
		||||
  if (!var.defined()) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -43,6 +43,10 @@ PyObject * THPVariable_Wrap(Variable var)
 | 
			
		||||
    Py_RETURN_NONE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (var.dim() == 0) {
 | 
			
		||||
    throw std::runtime_error("Variable API does not support Scalars");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (auto obj = var.get()->pyobj) {
 | 
			
		||||
    Py_INCREF(obj);
 | 
			
		||||
    return obj;
 | 
			
		||||
@ -96,26 +100,21 @@ static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
 | 
			
		||||
{
 | 
			
		||||
  Py_VISIT(self->data);
 | 
			
		||||
  Py_VISIT(self->backward_hooks);
 | 
			
		||||
  // We don't want to traverse the grad_fn, even if the Variable owns it and the
 | 
			
		||||
  // shared pointer's use count is 1. This is because we would need to treat
 | 
			
		||||
  // the grad_fn as part of the Python state and hold the GIL sometimes when
 | 
			
		||||
  // grad_fn's shared_ptr is copied, otherwise a race condition with the Python
 | 
			
		||||
  // GC could occur. Holding the GIL when the shared_ptr is copied adds
 | 
			
		||||
  // undesirable complexity/overhead.
 | 
			
		||||
  //
 | 
			
		||||
  // When hooks, a Variable, and its grad_fn are involved in a Python reference
 | 
			
		||||
  // cycle, because we're not traversing the grad_fn, the reference cycle will
 | 
			
		||||
  // in fact leak.
 | 
			
		||||
  //
 | 
			
		||||
  // See https://gist.github.com/zou3519/7ac92b84dd7d206dcc6eae55fee8372c
 | 
			
		||||
  // for more details about the race condition involving traversing the grad_fn
 | 
			
		||||
  // and the python GC.
 | 
			
		||||
  if (self->cdata.defined()) {
 | 
			
		||||
    // Only visit this if we actually own it (no one else use the shared pointer)
 | 
			
		||||
    auto& grad_fn = self->cdata.grad_fn();
 | 
			
		||||
    if (grad_fn.use_count() == 1) {
 | 
			
		||||
      if (auto fn = dynamic_cast<PyFunction*>(grad_fn.get())) {
 | 
			
		||||
        Py_VISIT(fn->obj);
 | 
			
		||||
      } else {
 | 
			
		||||
        // visit hooks in C++ implemented autograd functions
 | 
			
		||||
        for (auto& hook : grad_fn->pre_hooks) {
 | 
			
		||||
          if (auto pyhook = dynamic_cast<PyFunctionPreHook*>(hook.get())) {
 | 
			
		||||
            Py_VISIT(pyhook->dict);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        for (auto& hook : grad_fn->post_hooks) {
 | 
			
		||||
          if (auto pyhook = dynamic_cast<PyFunctionPostHook*>(hook.get())) {
 | 
			
		||||
            Py_VISIT(pyhook->dict);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    for (auto& hook : self->cdata.hooks()) {
 | 
			
		||||
      if (auto pyhook = dynamic_cast<PyFunctionPreHook*>(hook.get())) {
 | 
			
		||||
        Py_VISIT(pyhook->dict);
 | 
			
		||||
 | 
			
		||||
@ -13,6 +13,10 @@
 | 
			
		||||
namespace torch { namespace autograd { namespace utils {
 | 
			
		||||
 | 
			
		||||
inline PyObject* wrap(at::Tensor tensor) {
 | 
			
		||||
  if (tensor.defined() && tensor.dim() == 0) {
 | 
			
		||||
    // don't expose 0-dim tensors to Variable API.
 | 
			
		||||
    Variable(tensor).data().as_strided_({1}, {1});
 | 
			
		||||
  }
 | 
			
		||||
  return THPVariable_Wrap(Variable(std::move(tensor)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -54,6 +58,10 @@ inline PyObject* wrap(int64_t value) {
 | 
			
		||||
  return THPUtils_packInt64(value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline PyObject* wrap(void* value) {
 | 
			
		||||
  return THPUtils_packInt64(reinterpret_cast<intptr_t>(value));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline PyObject* wrap(at::Scalar scalar) {
 | 
			
		||||
  return wrap(scalar.toTensor());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -133,6 +133,18 @@ PyObject * THCPModule_getDeviceName_wrap(PyObject *self, PyObject *arg)
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject * THCPModule_getDeviceCapability_wrap(PyObject *self, PyObject *arg)
 | 
			
		||||
{
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
  THPUtils_assert(THPUtils_checkLong(arg), "invalid argument to getDeviceCapability");
 | 
			
		||||
  long device = THPUtils_unpackLong(arg);
 | 
			
		||||
 | 
			
		||||
  cudaDeviceProp prop;
 | 
			
		||||
  THCudaCheck(cudaGetDeviceProperties(&prop, device));
 | 
			
		||||
  return Py_BuildValue("(ii)", prop.major, prop.minor);
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject * THCPModule_getCurrentStream_wrap(PyObject *self)
 | 
			
		||||
{
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
@ -174,6 +186,11 @@ PyObject * THCPModule_getDriverVersion(PyObject *self)
 | 
			
		||||
  return PyLong_FromLong((long) driverVersion);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject * THCPModule_getCompiledVersion(PyObject *self)
 | 
			
		||||
{
 | 
			
		||||
  return PyLong_FromLong((long) CUDA_VERSION);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject * THCPModule_getRNGState(PyObject *_unused)
 | 
			
		||||
{
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
@ -297,6 +314,15 @@ PyObject * THCPModule_cudaUnlockMutex(PyObject *module)
 | 
			
		||||
  Py_RETURN_NONE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PyObject * THCPModule_emptyCache(PyObject *_unused)
 | 
			
		||||
{
 | 
			
		||||
  HANDLE_TH_ERRORS
 | 
			
		||||
  auto device_allocator = THCState_getDeviceAllocator(state);
 | 
			
		||||
  THCudaCheck(device_allocator->emptyCache(device_allocator->state));
 | 
			
		||||
  END_HANDLE_TH_ERRORS
 | 
			
		||||
  Py_RETURN_NONE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Cuda module initialization
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@ -369,13 +395,16 @@ static struct PyMethodDef _THCPModule_methods[] = {
 | 
			
		||||
  {"_cuda_getDevice",   (PyCFunction)THCPModule_getDevice_wrap,   METH_NOARGS,  NULL},
 | 
			
		||||
  {"_cuda_getDeviceCount", (PyCFunction)THCPModule_getDeviceCount_wrap, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_getDeviceName", (PyCFunction)THCPModule_getDeviceName_wrap, METH_O,   NULL},
 | 
			
		||||
  {"_cuda_getDeviceCapability", (PyCFunction)THCPModule_getDeviceCapability_wrap, METH_O,   NULL},
 | 
			
		||||
  {"_cuda_getCurrentStream", (PyCFunction)THCPModule_getCurrentStream_wrap, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_getCurrentBlasHandle", (PyCFunction)THCPModule_getCurrentBlasHandle_wrap, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_setStream",    (PyCFunction)THCPModule_setStream_wrap,  METH_O, NULL},
 | 
			
		||||
  {"_cuda_isDriverSufficient", (PyCFunction)THCPModule_isDriverSufficient, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_getDriverVersion", (PyCFunction)THCPModule_getDriverVersion, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_getCompiledVersion", (PyCFunction)THCPModule_getCompiledVersion, METH_NOARGS, NULL},
 | 
			
		||||
  {"_cuda_getRNGState", (PyCFunction)THCPModule_getRNGState,      METH_NOARGS,  NULL},
 | 
			
		||||
  {"_cuda_setRNGState", (PyCFunction)THCPModule_setRNGState,      METH_O,       NULL},
 | 
			
		||||
  {"_cuda_emptyCache", (PyCFunction) THCPModule_emptyCache,       METH_NOARGS,  NULL},
 | 
			
		||||
  {"_cuda_manualSeed",  (PyCFunction)THCPModule_manualSeed,       METH_O,       NULL},
 | 
			
		||||
  {"_cuda_manualSeedAll", (PyCFunction)THCPModule_manualSeedAll,  METH_O,       NULL},
 | 
			
		||||
  {"_cuda_seed",        (PyCFunction)THCPModule_seed,             METH_NOARGS,  NULL},
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,5 @@
 | 
			
		||||
#define __STDC_FORMAT_MACROS
 | 
			
		||||
 | 
			
		||||
#include <Python.h>
 | 
			
		||||
#include <structmember.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,5 @@
 | 
			
		||||
#define __STDC_FORMAT_MACROS
 | 
			
		||||
 | 
			
		||||
#include <Python.h>
 | 
			
		||||
#include <structmember.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -228,12 +228,12 @@ struct algorithm_search<cudnnConvolutionFwdAlgo_t> {
 | 
			
		||||
        conv.cdesc.desc,
 | 
			
		||||
        conv.odesc.desc,
 | 
			
		||||
        out,
 | 
			
		||||
        1,
 | 
			
		||||
        n_algo,
 | 
			
		||||
        &algoCount,
 | 
			
		||||
        perfResults,
 | 
			
		||||
        ws.data,
 | 
			
		||||
        ws.size));
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, n_algo);
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, algoCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void getAlgorithm(
 | 
			
		||||
@ -302,12 +302,12 @@ struct algorithm_search<cudnnConvolutionBwdDataAlgo_t> {
 | 
			
		||||
        conv.cdesc.desc,
 | 
			
		||||
        conv.idesc.desc,
 | 
			
		||||
        in,
 | 
			
		||||
        1,
 | 
			
		||||
        n_algo,
 | 
			
		||||
        &algoCount,
 | 
			
		||||
        perfResults,
 | 
			
		||||
        ws.data,
 | 
			
		||||
        ws.size));
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, n_algo);
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, algoCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void getAlgorithm(cudnnHandle_t handle, const Convolution& conv, cudnnConvolutionBwdDataAlgo_t* algo) {
 | 
			
		||||
@ -376,12 +376,12 @@ struct algorithm_search<cudnnConvolutionBwdFilterAlgo_t> {
 | 
			
		||||
        conv.cdesc.desc,
 | 
			
		||||
        conv.wdesc.desc,
 | 
			
		||||
        wght,
 | 
			
		||||
        1,
 | 
			
		||||
        n_algo,
 | 
			
		||||
        &algoCount,
 | 
			
		||||
        perfResults,
 | 
			
		||||
        ws.data,
 | 
			
		||||
        ws.size));
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, n_algo);
 | 
			
		||||
    return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, algoCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void getAlgorithm(
 | 
			
		||||
 | 
			
		||||
@ -36,7 +36,7 @@
 | 
			
		||||
#define COPY_FROM_ARRAY_CUDA(ELTYPE, ARRAY, STORAGE, SIZE) \
 | 
			
		||||
{ \
 | 
			
		||||
  ELTYPE *arrdata = (ELTYPE*)PyArray_DATA(ARRAY);              \
 | 
			
		||||
  std::unique_ptr<load_real> data_guard(new load_real[SIZE]);  \
 | 
			
		||||
  std::unique_ptr<load_real[]> data_guard(new load_real[SIZE]);  \
 | 
			
		||||
  load_real *data = data_guard.get();                          \
 | 
			
		||||
  for (size_t i=0; i<SIZE; i++) {                              \
 | 
			
		||||
    data[i] = arrdata[i];                                      \
 | 
			
		||||
@ -51,7 +51,7 @@
 | 
			
		||||
#define COPY_FROM_ARRAY_CUDA_HALF(ELTYPE, ARRAY, STORAGE, SIZE) \
 | 
			
		||||
{ \
 | 
			
		||||
  ELTYPE *arrdata = (ELTYPE*)PyArray_DATA(ARRAY);                  \
 | 
			
		||||
  std::unique_ptr<load_real> data_guard(new load_real[SIZE]);      \
 | 
			
		||||
  std::unique_ptr<load_real[]> data_guard(new load_real[SIZE]);      \
 | 
			
		||||
  load_real *data = data_guard.get();                              \
 | 
			
		||||
  for (size_t i=0; i<SIZE; i++) {                                  \
 | 
			
		||||
    data[i] = arrdata[i];                                          \
 | 
			
		||||
@ -379,7 +379,7 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject
 | 
			
		||||
    real *data = tensor->storage->data;
 | 
			
		||||
#else
 | 
			
		||||
    size_t numel = THTensor_(numel)(LIBRARY_STATE tensor);
 | 
			
		||||
    std::unique_ptr<load_real> data_guard(new load_real[numel]);
 | 
			
		||||
    std::unique_ptr<load_real[]> data_guard(new load_real[numel]);
 | 
			
		||||
    load_real *data = data_guard.get();
 | 
			
		||||
#endif
 | 
			
		||||
    THPObjectPtr final_sequence;
 | 
			
		||||
@ -778,7 +778,7 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
  // store THPTensors rather than THTensors.
 | 
			
		||||
 | 
			
		||||
  std::vector<Py_ssize_t> indexingDims;
 | 
			
		||||
  std::vector<THPIndexTensor*>indexers;
 | 
			
		||||
  std::vector<THPPointer<THPIndexTensor>> indexers;
 | 
			
		||||
 | 
			
		||||
  if (THPTensor_(_checkSingleSequenceTriggersAdvancedIndexing)(index)) {
 | 
			
		||||
    // Handle the special case where we only have a single indexer
 | 
			
		||||
@ -791,7 +791,7 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    indexingDims.push_back(0);
 | 
			
		||||
    indexers.push_back(indexer);
 | 
			
		||||
    indexers.push_back(THPPointer<THPIndexTensor>(indexer));
 | 
			
		||||
  } else {
 | 
			
		||||
    // The top-level indexer should be a sequence, per the check above
 | 
			
		||||
    THPObjectPtr fast(PySequence_Fast(index, NULL));
 | 
			
		||||
@ -827,15 +827,10 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
              "convertible to LongTensors. The indexing object at position %zd is of type %s "
 | 
			
		||||
              "and cannot be converted", i, THPUtils_typename(obj));
 | 
			
		||||
 | 
			
		||||
          // Clean up Indexers
 | 
			
		||||
          for (auto& idx : indexers) {
 | 
			
		||||
            THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
 | 
			
		||||
            Py_DECREF(idx);
 | 
			
		||||
          }
 | 
			
		||||
          return false;
 | 
			
		||||
        }
 | 
			
		||||
        indexingDims.push_back(i + ellipsisOffset);
 | 
			
		||||
        indexers.push_back(indexer);
 | 
			
		||||
        indexers.push_back(THPPointer<THPIndexTensor>(indexer));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@ -849,7 +844,7 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
  for (const auto& indexer : indexers) {
 | 
			
		||||
    maybeBroadcasted.emplace_back(THIndexTensor_(new)(LIBRARY_STATE_NOARGS));
 | 
			
		||||
    // borrow the underlying Tensor from the indexer map
 | 
			
		||||
    candidates.emplace_back(indexer->cdata);
 | 
			
		||||
    candidates.emplace_back(indexer.get()->cdata);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Broadcast/Expand indexing Tensors as necessary
 | 
			
		||||
@ -888,11 +883,6 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
              "for dimension %lld (of size %lld)",
 | 
			
		||||
              (long long)indexAtDim, (long long)dim, (long long)sizeAtDim);
 | 
			
		||||
 | 
			
		||||
          // Clean up Indexers
 | 
			
		||||
          for (auto& idx : indexers) {
 | 
			
		||||
            THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
 | 
			
		||||
            Py_DECREF(idx);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          return false;
 | 
			
		||||
        }
 | 
			
		||||
@ -907,19 +897,9 @@ static bool THPTensor_(_convertToTensorIndexers)(
 | 
			
		||||
    }
 | 
			
		||||
    PyErr_Format(PyExc_IndexError, "The advanced indexing objects could not be broadcast");
 | 
			
		||||
 | 
			
		||||
    // Clean up Indexers
 | 
			
		||||
    for (auto& idx : indexers) {
 | 
			
		||||
      THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
 | 
			
		||||
      Py_DECREF(idx);
 | 
			
		||||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Clean up Indexers
 | 
			
		||||
  for (auto& idx : indexers) {
 | 
			
		||||
    THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
 | 
			
		||||
    Py_DECREF(idx);
 | 
			
		||||
  }
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -761,6 +761,12 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
 | 
			
		||||
        - accreal start
 | 
			
		||||
        - accreal end
 | 
			
		||||
        - CONSTANT 1
 | 
			
		||||
      - arguments:
 | 
			
		||||
        - arg: THTensor* result
 | 
			
		||||
          output: True
 | 
			
		||||
        - CONSTANT 0
 | 
			
		||||
        - accreal end
 | 
			
		||||
        - CONSTANT 1
 | 
			
		||||
]]
 | 
			
		||||
 | 
			
		||||
[[
 | 
			
		||||
 | 
			
		||||
@ -78,7 +78,7 @@ using GraphsAttr = VectorAttributeValue<std::shared_ptr<Graph>,AttributeKind::gs
 | 
			
		||||
 | 
			
		||||
// CRTP so that Node which inherits Attributes can be return for
 | 
			
		||||
// method chaining e.g:
 | 
			
		||||
// Node * n = g->create(kSelect)->set_i(kOffset,3)->set_f(kValue,3.5);
 | 
			
		||||
// Node * n = g->create(kSelect)->i_(kOffset,3)->f_(kValue,3.5);
 | 
			
		||||
// we return Derived* pointers because Nodes are normally held as pointers.
 | 
			
		||||
template<typename Derived>
 | 
			
		||||
struct Attributes {
 | 
			
		||||
 | 
			
		||||
@ -69,7 +69,8 @@ void encodeTensor(onnx::TensorProto * p, const at::Tensor & tensor) {
 | 
			
		||||
      break;
 | 
			
		||||
  }
 | 
			
		||||
  p->set_data_type(onnx_type);
 | 
			
		||||
  at::Tensor cont = tensor.toType(at::CPU(at_type)).contiguous();
 | 
			
		||||
  // CPU's HalfTensor doesn't have contiguous(), so first calling contiguous()
 | 
			
		||||
  at::Tensor cont = tensor.contiguous().toType(at::CPU(at_type));
 | 
			
		||||
  p->set_raw_data(cont);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -79,40 +80,50 @@ void addAttribute(onnx::NodeProto * n_p, jit::Node * n, jit::Symbol name) {
 | 
			
		||||
  switch(n->kindOf(name)) {
 | 
			
		||||
    case AttributeKind::f:
 | 
			
		||||
      attr->set_f(n->f(name));
 | 
			
		||||
      attr->set_type(onnx::aFLOAT);
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::fs:
 | 
			
		||||
      attr->set_type(onnx::aFLOATS);
 | 
			
		||||
      for(auto & v : n->fs(name))
 | 
			
		||||
        attr->add_floats(v);
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::i:
 | 
			
		||||
      attr->set_type(onnx::aINT);
 | 
			
		||||
      attr->set_i(n->i(name));
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::is:
 | 
			
		||||
      attr->set_type(onnx::aINTS);
 | 
			
		||||
      for(auto & v : n->is(name))
 | 
			
		||||
        attr->add_ints(v);
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::s:
 | 
			
		||||
      attr->set_type(onnx::aSTRING);
 | 
			
		||||
      attr->set_s(n->s(name));
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::ss:
 | 
			
		||||
      attr->set_type(onnx::aSTRINGS);
 | 
			
		||||
      for(auto & v : n->ss(name))
 | 
			
		||||
        attr->add_strings(v);
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::t: {
 | 
			
		||||
      attr->set_type(onnx::aTENSOR);
 | 
			
		||||
      auto t = attr->mutable_t();
 | 
			
		||||
      encodeTensor(t, n->t(name));
 | 
			
		||||
    } break;
 | 
			
		||||
    case AttributeKind::ts:
 | 
			
		||||
      attr->set_type(onnx::aTENSORS);
 | 
			
		||||
      for(auto & v : n->ts(name)) {
 | 
			
		||||
        auto t = attr->add_tensors();
 | 
			
		||||
        encodeTensor(t, v);
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case AttributeKind::g: {
 | 
			
		||||
      attr->set_type(onnx::aGRAPH);
 | 
			
		||||
      auto g = attr->mutable_g();
 | 
			
		||||
      encodeGraph(g, n->g(name), {});
 | 
			
		||||
    } break;
 | 
			
		||||
    case AttributeKind::gs:
 | 
			
		||||
      attr->set_type(onnx::aGRAPHS);
 | 
			
		||||
      for(auto & v : n->gs(name)) {
 | 
			
		||||
        auto g = attr->add_graphs();
 | 
			
		||||
        encodeGraph(g, v, {});
 | 
			
		||||
@ -191,6 +202,9 @@ void encodeGraph(onnx::GraphProto * p_g, const std::shared_ptr<Graph> & g, const
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    auto p_n = p_g->add_node();
 | 
			
		||||
    if (node->getSourceLocation()) {
 | 
			
		||||
      p_n->set_doc_string(node->getSourceLocation()->python_traceback);
 | 
			
		||||
    }
 | 
			
		||||
    for(auto input : node->inputs()) {
 | 
			
		||||
      p_n->add_input(node_name(input));
 | 
			
		||||
    }
 | 
			
		||||
@ -256,11 +270,18 @@ void validateGraph(const std::shared_ptr<Graph>& graph) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string ExportGraph(const std::shared_ptr<Graph>& graph,
 | 
			
		||||
                        const std::vector<at::Tensor> & initializers) {
 | 
			
		||||
                        const std::vector<at::Tensor> & initializers,
 | 
			
		||||
                        int64_t onnx_opset_version) {
 | 
			
		||||
 | 
			
		||||
  validateGraph(graph);
 | 
			
		||||
 | 
			
		||||
  onnx::ModelProto model_proto;
 | 
			
		||||
  model_proto.set_producer_name("pytorch");
 | 
			
		||||
  model_proto.set_producer_version("0.3");
 | 
			
		||||
  auto* imp = model_proto.add_opset_import();
 | 
			
		||||
  // This is the version of ONNX operator set we are targeting
 | 
			
		||||
  imp->set_version(onnx_opset_version);
 | 
			
		||||
 | 
			
		||||
  // Set up nanopb callbacks and compute the amount of space needed to store
 | 
			
		||||
  // the resulting protobuf
 | 
			
		||||
  encodeModel(&model_proto, graph, initializers);
 | 
			
		||||
 | 
			
		||||
@ -5,6 +5,7 @@
 | 
			
		||||
namespace torch { namespace jit {
 | 
			
		||||
 | 
			
		||||
std::string ExportGraph(const std::shared_ptr<Graph>& graph,
 | 
			
		||||
                        const std::vector<at::Tensor> & initializers);
 | 
			
		||||
                        const std::vector<at::Tensor> & initializers,
 | 
			
		||||
                        int64_t onnx_opset_version);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
@ -261,6 +261,14 @@ CompiledFusionFunction::CompiledFusionFunction(const std::string & name, Annotat
 | 
			
		||||
  , output_desc(agraph.output_desc) {
 | 
			
		||||
  JIT_CUDA_CHECK(cudaGetDevice(&device));
 | 
			
		||||
  JIT_CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
 | 
			
		||||
  if ((prop.major >= 6 && CUDA_VERSION < 8000) ||
 | 
			
		||||
      (prop.major >= 7 && CUDA_VERSION < 9000)) {
 | 
			
		||||
    std::stringstream err_string;
 | 
			
		||||
    err_string << "PyTorch compiled with insufficient CUDA version: " 
 | 
			
		||||
	       << CUDA_VERSION << " for the current GPU device " << prop.name 
 | 
			
		||||
	       << " with device capability " << prop.major << "." << prop.minor;
 | 
			
		||||
    throw std::runtime_error(err_string.str());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::stringstream cu;
 | 
			
		||||
  concat_desc = codegen::emitCompilationUnit(cu, name, agraph);
 | 
			
		||||
 | 
			
		||||
@ -43,9 +43,8 @@ _(split) \
 | 
			
		||||
_(Offset) \
 | 
			
		||||
_(value) \
 | 
			
		||||
_(Subgraph) \
 | 
			
		||||
_(SpatialBN) \
 | 
			
		||||
_(BatchNormalization) \
 | 
			
		||||
_(Conv) \
 | 
			
		||||
_(Caffe2ConvTranspose) \
 | 
			
		||||
_(ConvTranspose) \
 | 
			
		||||
_(is_test) \
 | 
			
		||||
_(epsilon) \
 | 
			
		||||
@ -75,6 +74,8 @@ _(shape) \
 | 
			
		||||
_(axes) \
 | 
			
		||||
_(group) \
 | 
			
		||||
_(inplace) \
 | 
			
		||||
_(transA) \
 | 
			
		||||
_(transB) \
 | 
			
		||||
_(other)
 | 
			
		||||
 | 
			
		||||
enum BuiltinSymbol {
 | 
			
		||||
 | 
			
		||||
@ -41,6 +41,7 @@ void printNodeRef(std::ostream & out, const Node * n) {
 | 
			
		||||
template <typename T>
 | 
			
		||||
std::ostream& operator<<(std::ostream & out, const std::vector<T> & nodes) {
 | 
			
		||||
  out << at::ArrayRef<T>{nodes};
 | 
			
		||||
  return out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
@ -262,7 +263,15 @@ std::ostream& printNode(std::ostream & out, const Node * n, std::vector<const No
 | 
			
		||||
  } else {
 | 
			
		||||
    emitUses(out,n);
 | 
			
		||||
  }
 | 
			
		||||
  out << "];\n";
 | 
			
		||||
  out << "]";
 | 
			
		||||
  std::string scopeName = n->scopeName();
 | 
			
		||||
  if (scopeName.empty()) {
 | 
			
		||||
    out << ";\n";
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    out << ", ";
 | 
			
		||||
    out << "scope: " << scopeName << ";\n";
 | 
			
		||||
  }
 | 
			
		||||
  return out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -60,6 +60,73 @@ static inline bool operator==(const Use & a, const Use & b) {
 | 
			
		||||
// Graph holds a list of parameters.
 | 
			
		||||
struct Param;
 | 
			
		||||
 | 
			
		||||
// SourceLocation represents source code-level debug information for a node.
 | 
			
		||||
// It contains a Python stack trace that represents the provenance of a given
 | 
			
		||||
// node in the trace.
 | 
			
		||||
struct SourceLocation {
 | 
			
		||||
  SourceLocation(std::string python_traceback)
 | 
			
		||||
  : python_traceback(std::move(python_traceback)) {}
 | 
			
		||||
  std::string python_traceback;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Scope is a node of a trie that represents the tree of nested scopes.
 | 
			
		||||
// Individual scopes are pushed and popped from Graph, which holds a
 | 
			
		||||
// pointer to the current scope. Each Node in Graph holds a pointer
 | 
			
		||||
// to the scope that was current when the node was created.
 | 
			
		||||
// The trie never needs to shrink, it only grows until it is disposed
 | 
			
		||||
// of when Graph is deallocated. Hence, pointers to scopes held by nodes
 | 
			
		||||
// will always be valid as long as Graph is alive.
 | 
			
		||||
struct Scope {
 | 
			
		||||
private:
 | 
			
		||||
  Scope* parent_;
 | 
			
		||||
  Symbol name_;
 | 
			
		||||
  std::vector<std::unique_ptr<Scope> > children_;
 | 
			
		||||
public:
 | 
			
		||||
  Scope() {
 | 
			
		||||
    name_ = stringToSymbol("");
 | 
			
		||||
    parent_ = NULL;
 | 
			
		||||
  }
 | 
			
		||||
  Scope(Scope* parent, Symbol name) {
 | 
			
		||||
    name_ = name;
 | 
			
		||||
    parent_ = parent;
 | 
			
		||||
  }
 | 
			
		||||
  Scope* push(Symbol name) {
 | 
			
		||||
    children_.push_back(std::unique_ptr<Scope>(new Scope(this, name)));
 | 
			
		||||
    return children_.back().get();
 | 
			
		||||
  }
 | 
			
		||||
  Scope* parent() {
 | 
			
		||||
    if (parent_ == NULL) {
 | 
			
		||||
      throw std::runtime_error("Cannot get parent from Scope with no parent");
 | 
			
		||||
    }
 | 
			
		||||
    return parent_;
 | 
			
		||||
  }
 | 
			
		||||
  bool isRoot() {
 | 
			
		||||
    return parent_ == NULL;
 | 
			
		||||
  }
 | 
			
		||||
  Scope* getRoot() {
 | 
			
		||||
    Scope* current = this;
 | 
			
		||||
    while (current->parent_) {
 | 
			
		||||
      current = current->parent_;
 | 
			
		||||
    }
 | 
			
		||||
    return current;
 | 
			
		||||
  }
 | 
			
		||||
  Symbol name() {
 | 
			
		||||
    return name_;
 | 
			
		||||
  }
 | 
			
		||||
  std::string namesFromRoot(const std::string& separator="/") {
 | 
			
		||||
    std::string out = std::string(symbolToString(this->name_));
 | 
			
		||||
    if (this->isRoot()) {
 | 
			
		||||
      return out;
 | 
			
		||||
    }
 | 
			
		||||
    Scope* parent = this->parent_;
 | 
			
		||||
    while (!parent->isRoot()) {
 | 
			
		||||
      out = std::string(symbolToString(parent->name_)) + separator + out;
 | 
			
		||||
      parent = parent->parent_;
 | 
			
		||||
    }
 | 
			
		||||
    return out;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// the list types are intentionally simple, but we type-def
 | 
			
		||||
// them here so if we need to change them, refactoring will be easier
 | 
			
		||||
using node_list = std::vector<Node*>;
 | 
			
		||||
@ -113,6 +180,8 @@ private:
 | 
			
		||||
  size_t unique_ = 0;          // unique id
 | 
			
		||||
  size_t stage_ = 0;           // 0-forward, 1-backward, 2-double-backward,...
 | 
			
		||||
  std::string debug_name_;
 | 
			
		||||
  std::shared_ptr<SourceLocation> source_location_;
 | 
			
		||||
  Scope* scope_;
 | 
			
		||||
protected:
 | 
			
		||||
  TypePtr type_;
 | 
			
		||||
  Node(Graph * graph_, NodeKind kind_); //defined after graph
 | 
			
		||||
@ -150,6 +219,13 @@ public:
 | 
			
		||||
  const std::string & debugName() const {
 | 
			
		||||
    return debug_name_;
 | 
			
		||||
  }
 | 
			
		||||
  Node* setSourceLocation(std::shared_ptr<SourceLocation> sl) {
 | 
			
		||||
    source_location_ = sl;
 | 
			
		||||
    return this;
 | 
			
		||||
  }
 | 
			
		||||
  std::shared_ptr<SourceLocation> getSourceLocation() const {
 | 
			
		||||
    return source_location_;
 | 
			
		||||
  }
 | 
			
		||||
  Graph * owningGraph() {
 | 
			
		||||
    return graph_;
 | 
			
		||||
  }
 | 
			
		||||
@ -171,6 +247,18 @@ public:
 | 
			
		||||
  size_t stage() const {
 | 
			
		||||
    return stage_;
 | 
			
		||||
  }
 | 
			
		||||
  Scope* scope() {
 | 
			
		||||
    return scope_;
 | 
			
		||||
  }
 | 
			
		||||
  void setScope(Scope* scope) {
 | 
			
		||||
    scope_ = scope;
 | 
			
		||||
  }
 | 
			
		||||
  std::string scopeName() const {
 | 
			
		||||
    if (scope_ == NULL) {
 | 
			
		||||
      return "";
 | 
			
		||||
    }
 | 
			
		||||
    return scope_->namesFromRoot();
 | 
			
		||||
  }
 | 
			
		||||
  // NB: This returns an ArrayRef; that means that it will
 | 
			
		||||
  // get invalidated if you resize inputs (e.g., using addInput)
 | 
			
		||||
  // We can't return a std::vector<Node*>& because there's no
 | 
			
		||||
@ -511,11 +599,7 @@ protected:
 | 
			
		||||
  //
 | 
			
		||||
  // NB: This does NOT clone stages.  You're expected to set the stage correctly
 | 
			
		||||
  // if you are going to preserve it.
 | 
			
		||||
  virtual void cloneFrom(Node * s) {
 | 
			
		||||
    if (s->hasType()) setType(s->type());
 | 
			
		||||
    setDebugName(s->debugName());
 | 
			
		||||
    copyAttributes(*s);
 | 
			
		||||
  }
 | 
			
		||||
  virtual void cloneFrom(Node * s);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct Graph {
 | 
			
		||||
@ -533,6 +617,9 @@ private:
 | 
			
		||||
 | 
			
		||||
  size_t new_node_stage_;
 | 
			
		||||
 | 
			
		||||
  std::shared_ptr<Scope> scope_root_;
 | 
			
		||||
  Scope * current_scope_;
 | 
			
		||||
 | 
			
		||||
  // holds outputs in a way that can be reflected
 | 
			
		||||
  // as a Use object
 | 
			
		||||
  // also used as the beginning/end of the circular node list to avoid
 | 
			
		||||
@ -540,11 +627,17 @@ private:
 | 
			
		||||
  Node * const output_;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  Graph()
 | 
			
		||||
 | 
			
		||||
  Graph(std::shared_ptr<Scope> scope_root)
 | 
			
		||||
  : next_unique_(0)
 | 
			
		||||
  , new_node_stage_(0)
 | 
			
		||||
  , scope_root_(scope_root)
 | 
			
		||||
  , current_scope_(scope_root_.get())
 | 
			
		||||
  , output_(initOutput(create(kReturn))) {}
 | 
			
		||||
 | 
			
		||||
  Graph()
 | 
			
		||||
  : Graph( std::make_shared<Scope>()) {}
 | 
			
		||||
 | 
			
		||||
  at::ArrayRef<Node*> inputs() {
 | 
			
		||||
    return inputs_;
 | 
			
		||||
  }
 | 
			
		||||
@ -600,6 +693,29 @@ public:
 | 
			
		||||
  Node * addInput() {
 | 
			
		||||
    return addInput(create(kParam));
 | 
			
		||||
  }
 | 
			
		||||
  void push_scope(const std::string& scope_name) {
 | 
			
		||||
    current_scope_ = current_scope_->push(stringToSymbol(scope_name));
 | 
			
		||||
  }
 | 
			
		||||
  void pop_scope() {
 | 
			
		||||
    current_scope_ = current_scope_->parent();
 | 
			
		||||
  }
 | 
			
		||||
  Scope * current_scope() {
 | 
			
		||||
    return current_scope_;
 | 
			
		||||
  }
 | 
			
		||||
  void set_current_scope(Scope* scope) {
 | 
			
		||||
    if (scope->getRoot() != scope_root_.get()) {
 | 
			
		||||
      throw std::runtime_error("trying to set a scope as current that does not belong to the Graph's scope trie");
 | 
			
		||||
    }
 | 
			
		||||
    current_scope_ = scope;
 | 
			
		||||
  }
 | 
			
		||||
  ResourceGuard set_current_scope_temporary(Scope* scope) {
 | 
			
		||||
    auto prev_scope = current_scope_;
 | 
			
		||||
    this->set_current_scope(scope);
 | 
			
		||||
    return ResourceGuard([prev_scope, this]() { this->current_scope_ = prev_scope; });
 | 
			
		||||
  }
 | 
			
		||||
  std::shared_ptr<Scope> scope_root() {
 | 
			
		||||
    return scope_root_;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Node * addInput(Node* n) {
 | 
			
		||||
    JIT_ASSERT(n->kind() == kParam);
 | 
			
		||||
@ -676,7 +792,8 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  Node * createFusionGroup() {
 | 
			
		||||
    auto n = create(kFusionGroup);
 | 
			
		||||
    n->g_(kSubgraph,std::make_shared<Graph>());
 | 
			
		||||
    auto subgraph = std::make_shared<Graph>(scope_root_);
 | 
			
		||||
    n->g_(kSubgraph, subgraph);
 | 
			
		||||
    return n;
 | 
			
		||||
  }
 | 
			
		||||
  Node * createPythonOp(THPObjectPtr&& pyobj, const std::string & cconv, bool is_legacy, pyobj_list&& scalar_args);
 | 
			
		||||
@ -746,9 +863,10 @@ inline Node::Node(Graph * graph_, NodeKind kind_) :
 | 
			
		||||
  graph_(graph_),
 | 
			
		||||
  unique_(graph_->next_unique_++),
 | 
			
		||||
  stage_(graph_->new_node_stage_),
 | 
			
		||||
  scope_(graph_->current_scope_) ,
 | 
			
		||||
  type_(getInitialType(kind_)) {
 | 
			
		||||
  graph_->all_nodes.emplace(this);
 | 
			
		||||
}
 | 
			
		||||
    graph_->all_nodes.emplace(this);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
inline void Node::destroy() {
 | 
			
		||||
  JIT_ASSERT(inGraphList());
 | 
			
		||||
@ -770,6 +888,16 @@ inline Node* Node::makeMultireturn() {
 | 
			
		||||
  return select;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void Node::cloneFrom(Node * s) {
 | 
			
		||||
  if (s->hasType()) setType(s->type());
 | 
			
		||||
  setDebugName(s->debugName());
 | 
			
		||||
  setSourceLocation(s->getSourceLocation());
 | 
			
		||||
	if (s->owningGraph()->scope_root_ == owningGraph()->scope_root_) {
 | 
			
		||||
    scope_ = s->scope_;
 | 
			
		||||
  }
 | 
			
		||||
  copyAttributes(*s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Helper macros for constructing switch statements over Node types
 | 
			
		||||
// instead of heavy-weight visitors
 | 
			
		||||
// read 'between' these defines to see how they turn into a big switch
 | 
			
		||||
 | 
			
		||||
@ -33,7 +33,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
    throw std::logic_error("ToONNX: tracing state is expired");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  auto new_graph = std::make_shared<Graph>();
 | 
			
		||||
  auto new_graph = std::make_shared<Graph>(state->graph->scope_root());
 | 
			
		||||
  std::unordered_map<void*, Node*> new_buffer_map;
 | 
			
		||||
 | 
			
		||||
  torch::autograd::SymbolicContext ctx;
 | 
			
		||||
@ -86,6 +86,9 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
        if (!outputs[i]->hasType()) {
 | 
			
		||||
          outputs[i]->setType(old->typeOption());
 | 
			
		||||
        }
 | 
			
		||||
        // Copy over source location information to all nodes created by
 | 
			
		||||
        // the symbolic
 | 
			
		||||
        outputs[i]->setSourceLocation(node->getSourceLocation());
 | 
			
		||||
        env[old] = outputs[i];
 | 
			
		||||
      } else {
 | 
			
		||||
        // Null output means that the ONNX op doesn't have outputs corresponding
 | 
			
		||||
@ -121,6 +124,31 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Cast output of symbolic() python implementation
 | 
			
		||||
  auto processSymbolicOutput = [&](const std::string& op_name, Node* n, const py::object& raw_output) {
 | 
			
		||||
    if (raw_output.ptr() == Py_None) {
 | 
			
		||||
      cloneNode(n);
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
    // Cast the outputs back to C++ and put them in the new graph
 | 
			
		||||
    std::vector<Node*> outputs;
 | 
			
		||||
    try {
 | 
			
		||||
      if (py::isinstance<Node>(raw_output)) {
 | 
			
		||||
        outputs = node_list{py::cast<Node*>(raw_output)};
 | 
			
		||||
      } else {
 | 
			
		||||
        outputs = py::cast<std::vector<Node*>>(raw_output);
 | 
			
		||||
      }
 | 
			
		||||
    } catch (const std::exception& ex) {
 | 
			
		||||
      std::ostringstream ss;
 | 
			
		||||
      ss << "Error casting results of symbolic for " << op_name
 | 
			
		||||
         << ": expected to return list of op nodes, instead received type ''"
 | 
			
		||||
         << py::str(raw_output.get_type()) << "': " << py::str(raw_output);
 | 
			
		||||
      throw std::runtime_error(ss.str());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    setOutputs(op_name, n, outputs);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  auto callPySymbolicFunction = [&](Node* n) {
 | 
			
		||||
    // The idea is delegate as much of the actual argument massaging to
 | 
			
		||||
    // Python as possible
 | 
			
		||||
@ -131,21 +159,11 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
        py_inputs[input_nr++] = py::cast(envFn(input));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    auto scope_guard = ctx.graph->set_current_scope_temporary(n->scope());
 | 
			
		||||
 | 
			
		||||
    py::object raw_output = onnx.attr("_run_symbolic_function")(ctx.graph, n, py_inputs);
 | 
			
		||||
 | 
			
		||||
    if (raw_output.ptr() == Py_None) {
 | 
			
		||||
      cloneNode(n);
 | 
			
		||||
    } else {
 | 
			
		||||
      // Cast the outputs back to C++ and put them in the new graph
 | 
			
		||||
      node_list outputs;
 | 
			
		||||
      if (py::isinstance<Node>(raw_output)) {
 | 
			
		||||
        outputs = node_list{py::cast<Node*>(raw_output)};
 | 
			
		||||
      } else {
 | 
			
		||||
        outputs = py::cast<std::vector<Node*>>(raw_output);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      setOutputs(symbolToString(n->kind()), n, outputs);
 | 
			
		||||
    }
 | 
			
		||||
    processSymbolicOutput(symbolToString(n->kind()), n, raw_output);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  auto callPySymbolicMethod = [&](PythonOp* op) {
 | 
			
		||||
@ -179,25 +197,14 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
      py_symbolic_args[input_nr++] = obj;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    auto scope_guard = ctx.graph->set_current_scope_temporary(op->scope());
 | 
			
		||||
 | 
			
		||||
    // Call the symbolic function
 | 
			
		||||
    // Use a little trampoline function so we can give good error messages
 | 
			
		||||
    // upon argument mismatch
 | 
			
		||||
    py::object raw_output = onnx.attr("_run_symbolic_method")(op->name(), pyobj.attr("symbolic"), py_symbolic_args);
 | 
			
		||||
 | 
			
		||||
    if (raw_output.ptr() == Py_None) {
 | 
			
		||||
      cloneNode(op);
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Cast the outputs back to C++ and put them in the new graph
 | 
			
		||||
    std::vector<Node*> outputs;
 | 
			
		||||
    if (py::isinstance<Node>(raw_output)) {
 | 
			
		||||
      outputs = node_list{py::cast<Node*>(raw_output)};
 | 
			
		||||
    } else {
 | 
			
		||||
      outputs = py::cast<std::vector<Node*>>(raw_output);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    setOutputs(op->name(), op, outputs);
 | 
			
		||||
    processSymbolicOutput(op->name(), op, raw_output);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Finally, visit all nodes in the graph
 | 
			
		||||
@ -215,6 +222,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) {
 | 
			
		||||
      // Selects are translated by multi-return nodes.
 | 
			
		||||
      JIT_ASSERT(env.count(value) > 0);
 | 
			
		||||
    IR_ELSEIFM(CppOp)
 | 
			
		||||
      auto scope_guard = new_graph->set_current_scope_temporary(node->scope());
 | 
			
		||||
      if (auto fn = std::dynamic_pointer_cast<autograd::HasSymbolic>(value->fn)) {
 | 
			
		||||
        auto outputs = fn->symbolic(&ctx, fmap(node->inputs(), envFn));
 | 
			
		||||
        setOutputs(value->name(), node, outputs);
 | 
			
		||||
 | 
			
		||||
@ -15,24 +15,62 @@ std::unordered_set<NodeKind> broadcasting = {
 | 
			
		||||
  kGemm,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
bool isNopTranspose(const std::vector<int64_t> & perm) {
 | 
			
		||||
  for (size_t i = 0; i < perm.size(); i++)
 | 
			
		||||
    if (perm[i] != i)
 | 
			
		||||
      return false;
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// returns a vector `ret` such that transposing by `ret` is equivalent
 | 
			
		||||
// to transposing by `t1` and then by `t2`
 | 
			
		||||
std::vector<int64_t> composeTransposes(const std::vector<int64_t> & t1,
 | 
			
		||||
                                       const std::vector<int64_t> & t2) {
 | 
			
		||||
  JIT_ASSERT(t1.size() == t2.size());
 | 
			
		||||
  std::vector<int64_t> ret;
 | 
			
		||||
  for (size_t i = 0; i < t1.size(); i++) {
 | 
			
		||||
    JIT_ASSERT(   t1[i]  < t2.size());
 | 
			
		||||
    JIT_ASSERT(t2[t1[i]] < t2.size());
 | 
			
		||||
    ret.push_back(t2[t1[i]]);
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool isBroadcasting(Node *node) {
 | 
			
		||||
  return broadcasting.count(node->kind());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// When iterating over the dimension sizes, starting at the trailing dimension,
 | 
			
		||||
// the dimension sizes must either be equal, or one of them does not exist.
 | 
			
		||||
// First iterate over the 'from' tensor sizes. Ignore all leading and trailing
 | 
			
		||||
// dimensions that are simply one, since they can be trivially broadcasted.
 | 
			
		||||
// When iterating over the dimension sizes (with reduced 'from' tensor),
 | 
			
		||||
// starting at the trailing dimension, the dimension sizes must either be equal,
 | 
			
		||||
// or one of them does not exist.
 | 
			
		||||
//
 | 
			
		||||
//  equivalently:
 | 
			
		||||
//
 | 
			
		||||
// Test that 'from' is a suffix of 'to'.
 | 
			
		||||
// Note that this is NOT equivalent to numpy broadcasting semantics, and do
 | 
			
		||||
// not represent that generalized broadcasting that Pytorch implements in
 | 
			
		||||
// general. Rather, this is Caffe2-style broadcasting.
 | 
			
		||||
bool fusibleExpandTo(at::IntList from, at::IntList to) {
 | 
			
		||||
  auto f = from.rbegin();
 | 
			
		||||
  auto t = to.rbegin();
 | 
			
		||||
  for (; f != from.rend() && t != to.rend(); f++, t++) {
 | 
			
		||||
    // TODO: if 1->n expansion is supported, adjust this conditional.
 | 
			
		||||
    if (*f != *t) return false;
 | 
			
		||||
  if (from.size() > to.size()) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  return f == from.rend();
 | 
			
		||||
  ssize_t from_dim_start = 0, from_dim_end = from.size() - 1;
 | 
			
		||||
  while (from_dim_start < from.size() && from[from_dim_start] == 1) {
 | 
			
		||||
    from_dim_start++;
 | 
			
		||||
  }
 | 
			
		||||
  while (from_dim_end > from_dim_start && from[from_dim_end] == 1) {
 | 
			
		||||
    from_dim_end--;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ssize_t f = from_dim_end;
 | 
			
		||||
  ssize_t t = to.size() - 1;
 | 
			
		||||
  for (; f >= from_dim_start && t >= 0; --f, --t) {
 | 
			
		||||
    if (from[f] != to[t]) return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // In the case that the 'to' tensor has leading ones in the same place that
 | 
			
		||||
  // the 'from' tensor does, f will be less than from_dim_start rather than
 | 
			
		||||
  // strictly equal. E.x.: to := [5, 1, 768] and from := [1, 1, 768]
 | 
			
		||||
  return f <= from_dim_start;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fuseBroadcast(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
@ -76,6 +114,58 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fuseConsecutiveTransposes(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  for (auto it = graph->begin(); it != graph->end(); ++it) {
 | 
			
		||||
    auto* n = *it;
 | 
			
		||||
 | 
			
		||||
    if (n->kind() == kTranspose && n->input()->kind() == kTranspose) {
 | 
			
		||||
      auto origInput = n->input();
 | 
			
		||||
      n->is_(kperm, composeTransposes(origInput->is(kperm), n->is(kperm)));
 | 
			
		||||
      n->replaceInput(0, origInput->input());
 | 
			
		||||
      if (origInput->uses().size() == 0) {
 | 
			
		||||
        origInput->destroy();
 | 
			
		||||
      }
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void eliminateNopTranspose(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  for (auto it = graph->begin(); it != graph->end(); ++it) {
 | 
			
		||||
    auto* n = *it;
 | 
			
		||||
 | 
			
		||||
    if (n->kind() == kTranspose) {
 | 
			
		||||
      if (isNopTranspose(n->is(kperm))) {
 | 
			
		||||
        n->replaceAllUsesWith(n->input());
 | 
			
		||||
        it.destroyCurrent();
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fuseTransposeIntoGemm(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  static const std::vector<int64_t> simpleTransPerm({1,0});
 | 
			
		||||
 | 
			
		||||
  for (auto it = graph->begin(); it != graph->end(); ++it) {
 | 
			
		||||
    auto* n = *it;
 | 
			
		||||
 | 
			
		||||
    if (n->kind() == kGemm) {
 | 
			
		||||
      for (size_t i : {0,1}) {
 | 
			
		||||
        auto inp = n->inputs()[i];
 | 
			
		||||
        auto trans = i == 0 ? ktransA : ktransB;
 | 
			
		||||
        if (inp->kind() == kTranspose && inp->is(kperm) == simpleTransPerm) {
 | 
			
		||||
          n->replaceInput(i, inp->input());
 | 
			
		||||
          n->i_(trans, n->hasAttribute(trans) ? !n->i(trans) : 1);
 | 
			
		||||
          if (inp->uses().size() == 0) {
 | 
			
		||||
            inp->destroy();
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This optimization does ONNX-specific peephole optimizations.
 | 
			
		||||
//
 | 
			
		||||
// At the moment, here are the optimizations it does:
 | 
			
		||||
@ -83,6 +173,9 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
//    easier for non-strided backends to more efficiently do broadcasts if this is
 | 
			
		||||
//    local information.  This optimization is not useful for PyTorch as 'expand'
 | 
			
		||||
//    is free.
 | 
			
		||||
//  - Fusing of consecutive transposes
 | 
			
		||||
//  - Elimiation of NOP transposes
 | 
			
		||||
//  - Fusing of transposes into Gemm
 | 
			
		||||
//
 | 
			
		||||
// Before you write an optimization here, ask yourself, "Could I do this
 | 
			
		||||
// optimization on ATen operators"?  If so, you should seriously consider
 | 
			
		||||
@ -94,6 +187,9 @@ void PeepholeOptimizeONNX(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  // TODO: make it easier not to do O(k) iterations over the graph, where
 | 
			
		||||
  // k is the number of distinct peephole optimizations
 | 
			
		||||
  fuseBroadcast(graph);
 | 
			
		||||
  fuseConsecutiveTransposes(graph);
 | 
			
		||||
  eliminateNopTranspose(graph);
 | 
			
		||||
  fuseTransposeIntoGemm(graph);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
@ -13,6 +13,7 @@ void PeepholeOptimize(std::shared_ptr<Graph>& graph) {
 | 
			
		||||
  for (auto it = graph->begin(); it != graph->end(); ++it) {
 | 
			
		||||
    auto* n = *it;
 | 
			
		||||
 | 
			
		||||
    // eliminate redundant expand
 | 
			
		||||
    if (n->kind() == kexpand) {
 | 
			
		||||
      if (n->is(ksize) == n->input()->type()->expect<TensorType>()->sizes()) {
 | 
			
		||||
        n->replaceAllUsesWith(n->input());
 | 
			
		||||
 | 
			
		||||
@ -105,6 +105,7 @@ void initPythonIRBindings(PyObject * module_) {
 | 
			
		||||
      node->setType(other->typeOption());
 | 
			
		||||
      return node;
 | 
			
		||||
    })
 | 
			
		||||
    .NS(scopeName)
 | 
			
		||||
#define AS(name) def(#name,&Attributes<Node> :: name)
 | 
			
		||||
    // methods from Attributes
 | 
			
		||||
    .AS(copyAttributes)
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,7 @@ namespace torch { namespace jit {
 | 
			
		||||
 | 
			
		||||
void initPythonTracerBindings(PyObject* module_) {
 | 
			
		||||
  auto m = py::handle(module_).cast<py::module>();
 | 
			
		||||
  py::class_<TracingState,std::shared_ptr<TracingState>>(m, "TracingState")
 | 
			
		||||
  py::class_<TracingState,std::shared_ptr<TracingState>>(m, "TracingState", py::dynamic_attr())
 | 
			
		||||
    // NB: no constructor; you have to get it from C++ code
 | 
			
		||||
    .def("__repr__", [](const TracingState& s) {
 | 
			
		||||
      std::ostringstream ss;
 | 
			
		||||
@ -32,13 +32,17 @@ void initPythonTracerBindings(PyObject* module_) {
 | 
			
		||||
      ss << *s.graph;
 | 
			
		||||
      return ss.str();
 | 
			
		||||
    })
 | 
			
		||||
    .def("export", [](TracingState& s) {
 | 
			
		||||
      ASSERT_UNEXPIRED("export");
 | 
			
		||||
      return py::bytes(ExportGraph(s.graph, {}));
 | 
			
		||||
    .def("push_scope", [](TracingState& s, const std::string& scope_name) {
 | 
			
		||||
      ASSERT_UNEXPIRED("push_scope");
 | 
			
		||||
      s.push_scope(scope_name);
 | 
			
		||||
    })
 | 
			
		||||
    .def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers) {
 | 
			
		||||
    .def("pop_scope", [](TracingState& s) {
 | 
			
		||||
      ASSERT_UNEXPIRED("pop_scope");
 | 
			
		||||
      s.pop_scope();
 | 
			
		||||
    })
 | 
			
		||||
    .def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers, int64_t onnx_opset_version) {
 | 
			
		||||
      ASSERT_UNEXPIRED("export");
 | 
			
		||||
      return py::bytes(ExportGraph(s.graph, initializers));
 | 
			
		||||
      return py::bytes(ExportGraph(s.graph, initializers, onnx_opset_version));
 | 
			
		||||
    })
 | 
			
		||||
    .def("graph", [](TracingState& s) {
 | 
			
		||||
      return s.graph;
 | 
			
		||||
@ -56,6 +60,12 @@ void initPythonTracerBindings(PyObject* module_) {
 | 
			
		||||
  m.def("_tracer_exit", [](variable_list var_outputs) {
 | 
			
		||||
    tracer::exit(var_outputs);
 | 
			
		||||
  });
 | 
			
		||||
  m.def("_get_tracing_state", [](const variable_list& vars) {
 | 
			
		||||
    return getTracingState(vars);
 | 
			
		||||
  });
 | 
			
		||||
  m.def("_is_tracing", [](const variable_list& vars) {
 | 
			
		||||
    return isTracing(vars);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}} // namespace torch::jit
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,11 @@
 | 
			
		||||
#include "torch/csrc/autograd/function.h"
 | 
			
		||||
#include "torch/csrc/autograd/python_engine.h"
 | 
			
		||||
#include "torch/csrc/autograd/functions/special.h"
 | 
			
		||||
#include "torch/csrc/utils/auto_gil.h"
 | 
			
		||||
#include "torch/csrc/utils/python_strings.h"
 | 
			
		||||
 | 
			
		||||
#include <frameobject.h>
 | 
			
		||||
#include <patchlevel.h>
 | 
			
		||||
 | 
			
		||||
namespace torch { namespace jit { namespace tracer {
 | 
			
		||||
 | 
			
		||||
@ -89,6 +94,28 @@ void nontraceableBackwardSubgraph(const variable_list& inputs, const variable_li
 | 
			
		||||
  std::make_shared<autograd::Eval>()->replaceSubgraph(inputs, outputs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
// Python interpreter retrieval routine adapted from
 | 
			
		||||
// https://stackoverflow.com/a/8706144
 | 
			
		||||
std::string getPythonInterpreterStackTrace() {
 | 
			
		||||
  std::stringstream stack_trace;
 | 
			
		||||
  AutoGIL gil;
 | 
			
		||||
  PyThreadState *tstate = PyThreadState_GET();
 | 
			
		||||
  if (NULL != tstate && NULL != tstate->frame) {
 | 
			
		||||
    PyFrameObject *frame = tstate->frame;
 | 
			
		||||
 | 
			
		||||
    while (NULL != frame) {
 | 
			
		||||
      int line = PyCode_Addr2Line(frame->f_code, frame->f_lasti);
 | 
			
		||||
      std::string filename = THPUtils_unpackString(frame->f_code->co_filename);
 | 
			
		||||
      std::string funcname = THPUtils_unpackString(frame->f_code->co_name);
 | 
			
		||||
      stack_trace << filename << "(" << line << "): " << funcname << "\n";
 | 
			
		||||
      frame = frame->f_back;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return stack_trace.str();
 | 
			
		||||
}
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
Node* recordTrace(std::string op, // TODO: make this a Symbol
 | 
			
		||||
                  at::ArrayRef<Variable> inputs,
 | 
			
		||||
                  at::ArrayRef<Variable> outputs) {
 | 
			
		||||
@ -99,6 +126,9 @@ Node* recordTrace(std::string op, // TODO: make this a Symbol
 | 
			
		||||
  auto state_lock = state->lock();
 | 
			
		||||
 | 
			
		||||
  Node *n = graph->create(stringToSymbol(op));
 | 
			
		||||
  auto sl = std::make_shared<SourceLocation>(getPythonInterpreterStackTrace());
 | 
			
		||||
  n->setSourceLocation(sl);
 | 
			
		||||
 | 
			
		||||
  for (Variable input : inputs) {
 | 
			
		||||
    n->addInput(getValueTrace(state, input));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -80,6 +80,14 @@ struct TracingState : public std::enable_shared_from_this<TracingState> {
 | 
			
		||||
  bool is_complete() const {
 | 
			
		||||
    return !is_expired() && graph->stage() == num_stages - 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void push_scope(const std::string& scope_name) {
 | 
			
		||||
    graph->push_scope(scope_name);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void pop_scope() {
 | 
			
		||||
    graph->pop_scope();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct ValueTracingStateElem {
 | 
			
		||||
 | 
			
		||||
@ -168,6 +168,21 @@ DEFINE_CONST(UINT64)
 | 
			
		||||
DEFINE_CONST(COMPLEX64)
 | 
			
		||||
DEFINE_CONST(COMPLEX128)
 | 
			
		||||
#undef DEFINE_CONST
 | 
			
		||||
 | 
			
		||||
#define DEFINE_CONST(C) \
 | 
			
		||||
const auto a##C = onnx_AttributeProto_AttributeType_##C;
 | 
			
		||||
DEFINE_CONST(FLOAT)
 | 
			
		||||
DEFINE_CONST(INT)
 | 
			
		||||
DEFINE_CONST(STRING)
 | 
			
		||||
DEFINE_CONST(TENSOR)
 | 
			
		||||
DEFINE_CONST(GRAPH)
 | 
			
		||||
DEFINE_CONST(FLOATS)
 | 
			
		||||
DEFINE_CONST(INTS)
 | 
			
		||||
DEFINE_CONST(STRINGS)
 | 
			
		||||
DEFINE_CONST(TENSORS)
 | 
			
		||||
DEFINE_CONST(GRAPHS)
 | 
			
		||||
#undef DEFINE_CONST
 | 
			
		||||
 | 
			
		||||
// C++ wrappers which simulate the Google C++ Protobuf API
 | 
			
		||||
//
 | 
			
		||||
// These are NOT COMPLETE wrappers. If you find something is missing, add it!
 | 
			
		||||
@ -270,6 +285,7 @@ public:
 | 
			
		||||
    proto.graphs  = list<GraphProto, onnx_GraphProto_fields>(&graphs);
 | 
			
		||||
  }
 | 
			
		||||
  void set_name(const std::string& s) { proto.name = string(&name, s); }
 | 
			
		||||
  void set_type(onnx_AttributeProto_AttributeType t) { proto.has_type = true; proto.type = t; }
 | 
			
		||||
  void set_f(float f) { proto.has_f = true; proto.f = f; }
 | 
			
		||||
  void set_i(int64_t i) { proto.has_i = true; proto.i = i; }
 | 
			
		||||
  void set_s(std::string s_) { proto.s = string(&s, s_); }
 | 
			
		||||
@ -290,6 +306,7 @@ public:
 | 
			
		||||
class NodeProto : public MicroProto<onnx_NodeProto> {
 | 
			
		||||
private:
 | 
			
		||||
  std::string op_type;
 | 
			
		||||
  std::string doc_string;
 | 
			
		||||
  unique_vector<std::string> inputs;
 | 
			
		||||
  unique_vector<std::string> outputs;
 | 
			
		||||
  unique_vector<AttributeProto> attributes;
 | 
			
		||||
@ -309,6 +326,7 @@ public:
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
  void set_op_type(const std::string& s) { proto.op_type= string(&op_type, s); }
 | 
			
		||||
  void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GraphProto : public MicroProto<onnx_GraphProto> {
 | 
			
		||||
@ -349,6 +367,15 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class OperatorSetIdProto : public MicroProto<onnx_OperatorSetIdProto> {
 | 
			
		||||
private:
 | 
			
		||||
  std::string domain;
 | 
			
		||||
public:
 | 
			
		||||
  OperatorSetIdProto() : MicroProto(onnx_OperatorSetIdProto_init_default) {}
 | 
			
		||||
  void set_domain(const std::string& s) { proto.domain = string(&domain, s); }
 | 
			
		||||
  void set_version(int64_t v) { proto.has_version = true; proto.version = v; }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ModelProto : public MicroProto<onnx_ModelProto> {
 | 
			
		||||
private:
 | 
			
		||||
  std::string producer_name;
 | 
			
		||||
@ -356,21 +383,26 @@ private:
 | 
			
		||||
  std::string domain;
 | 
			
		||||
  std::string doc_string;
 | 
			
		||||
  std::unique_ptr<GraphProto> graph;
 | 
			
		||||
  unique_vector<OperatorSetIdProto> opset_import;
 | 
			
		||||
public:
 | 
			
		||||
  ModelProto() : MicroProto(onnx_ModelProto_init_default) {
 | 
			
		||||
    proto.has_ir_version = true;
 | 
			
		||||
    proto.ir_version = onnx_Version_IR_VERSION;
 | 
			
		||||
    proto.producer_name = string(&producer_name, "pytorch");
 | 
			
		||||
    // TODO: stop hard-coding this
 | 
			
		||||
    proto.producer_version = string(&producer_version, "0.2");
 | 
			
		||||
    proto.domain = string(&domain, "com.facebook");
 | 
			
		||||
    proto.opset_import = list<OperatorSetIdProto, onnx_OperatorSetIdProto_fields>(&opset_import);
 | 
			
		||||
  }
 | 
			
		||||
  void set_model_version(int64_t i) { proto.has_model_version = true; proto.model_version = i; }
 | 
			
		||||
  void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); }
 | 
			
		||||
  void set_producer_name(const std::string& s) { proto.producer_name = string(&producer_name, s); }
 | 
			
		||||
  void set_producer_version(const std::string& s) { proto.producer_version = string(&producer_version, s); }
 | 
			
		||||
  GraphProto* mutable_graph() {
 | 
			
		||||
    proto.graph = msg<GraphProto, onnx_GraphProto_fields>(&graph);
 | 
			
		||||
    return graph.get();
 | 
			
		||||
  }
 | 
			
		||||
  OperatorSetIdProto* add_opset_import() {
 | 
			
		||||
    auto ptr = new OperatorSetIdProto();
 | 
			
		||||
    opset_import.emplace_back(ptr);
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}} // namespace torch::onnx
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,7 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
const pb_field_t onnx_AttributeProto_fields[12] = {
 | 
			
		||||
const pb_field_t onnx_AttributeProto_fields[13] = {
 | 
			
		||||
    PB_FIELD(  1, STRING  , OPTIONAL, CALLBACK, FIRST, onnx_AttributeProto, name, name, 0),
 | 
			
		||||
    PB_FIELD(  2, FLOAT   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, f, name, 0),
 | 
			
		||||
    PB_FIELD(  3, INT64   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, i, f, 0),
 | 
			
		||||
@ -22,6 +22,7 @@ const pb_field_t onnx_AttributeProto_fields[12] = {
 | 
			
		||||
    PB_FIELD(  9, BYTES   , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, strings, ints, 0),
 | 
			
		||||
    PB_FIELD( 10, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, tensors, strings, &onnx_TensorProto_fields),
 | 
			
		||||
    PB_FIELD( 11, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, graphs, tensors, &onnx_GraphProto_fields),
 | 
			
		||||
    PB_FIELD( 20, UENUM   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, type, graphs, 0),
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -31,17 +32,18 @@ const pb_field_t onnx_ValueInfoProto_fields[3] = {
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const pb_field_t onnx_NodeProto_fields[7] = {
 | 
			
		||||
const pb_field_t onnx_NodeProto_fields[8] = {
 | 
			
		||||
    PB_FIELD(  1, STRING  , REPEATED, CALLBACK, FIRST, onnx_NodeProto, input, input, 0),
 | 
			
		||||
    PB_FIELD(  2, STRING  , REPEATED, CALLBACK, OTHER, onnx_NodeProto, output, input, 0),
 | 
			
		||||
    PB_FIELD(  3, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, name, output, 0),
 | 
			
		||||
    PB_FIELD(  4, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, op_type, name, 0),
 | 
			
		||||
    PB_FIELD(  5, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_NodeProto, attribute, op_type, &onnx_AttributeProto_fields),
 | 
			
		||||
    PB_FIELD(  6, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, doc_string, attribute, 0),
 | 
			
		||||
    PB_FIELD(  7, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, domain, doc_string, 0),
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const pb_field_t onnx_ModelProto_fields[8] = {
 | 
			
		||||
const pb_field_t onnx_ModelProto_fields[9] = {
 | 
			
		||||
    PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, onnx_ModelProto, ir_version, ir_version, 0),
 | 
			
		||||
    PB_FIELD(  2, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_name, ir_version, 0),
 | 
			
		||||
    PB_FIELD(  3, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_version, producer_name, 0),
 | 
			
		||||
@ -49,6 +51,7 @@ const pb_field_t onnx_ModelProto_fields[8] = {
 | 
			
		||||
    PB_FIELD(  5, INT64   , OPTIONAL, STATIC  , OTHER, onnx_ModelProto, model_version, domain, 0),
 | 
			
		||||
    PB_FIELD(  6, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, doc_string, model_version, 0),
 | 
			
		||||
    PB_FIELD(  7, MESSAGE , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, graph, doc_string, &onnx_GraphProto_fields),
 | 
			
		||||
    PB_FIELD(  8, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_ModelProto, opset_import, graph, &onnx_OperatorSetIdProto_fields),
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -120,6 +123,13 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = {
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const pb_field_t onnx_OperatorSetIdProto_fields[3] = {
 | 
			
		||||
    PB_FIELD(  1, STRING  , OPTIONAL, CALLBACK, FIRST, onnx_OperatorSetIdProto, domain, domain, 0),
 | 
			
		||||
    PB_FIELD(  2, INT64   , OPTIONAL, STATIC  , OTHER, onnx_OperatorSetIdProto, version, domain, 0),
 | 
			
		||||
    PB_LAST_FIELD
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -132,7 +142,7 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = {
 | 
			
		||||
 * numbers or field sizes that are larger than what can fit in 8 or 16 bit
 | 
			
		||||
 * field descriptors.
 | 
			
		||||
 */
 | 
			
		||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto)
 | 
			
		||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(PB_FIELD_16BIT) && !defined(PB_FIELD_32BIT)
 | 
			
		||||
@ -143,7 +153,7 @@ PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_members
 | 
			
		||||
 * numbers or field sizes that are larger than what can fit in the default
 | 
			
		||||
 * 8 bit descriptors.
 | 
			
		||||
 */
 | 
			
		||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto)
 | 
			
		||||
PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -16,12 +16,31 @@ extern "C" {
 | 
			
		||||
 | 
			
		||||
/* Enum definitions */
 | 
			
		||||
typedef enum _onnx_Version {
 | 
			
		||||
    onnx_Version_IR_VERSION = 1
 | 
			
		||||
    onnx_Version__START_VERSION = 0,
 | 
			
		||||
    onnx_Version_IR_VERSION_2017_10_10 = 1,
 | 
			
		||||
    onnx_Version_IR_VERSION = 2
 | 
			
		||||
} onnx_Version;
 | 
			
		||||
#define _onnx_Version_MIN onnx_Version_IR_VERSION
 | 
			
		||||
#define _onnx_Version_MIN onnx_Version__START_VERSION
 | 
			
		||||
#define _onnx_Version_MAX onnx_Version_IR_VERSION
 | 
			
		||||
#define _onnx_Version_ARRAYSIZE ((onnx_Version)(onnx_Version_IR_VERSION+1))
 | 
			
		||||
 | 
			
		||||
typedef enum _onnx_AttributeProto_AttributeType {
 | 
			
		||||
    onnx_AttributeProto_AttributeType_UNDEFINED = 0,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_FLOAT = 1,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_INT = 2,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_STRING = 3,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_TENSOR = 4,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_GRAPH = 5,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_FLOATS = 6,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_INTS = 7,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_STRINGS = 8,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_TENSORS = 9,
 | 
			
		||||
    onnx_AttributeProto_AttributeType_GRAPHS = 10
 | 
			
		||||
} onnx_AttributeProto_AttributeType;
 | 
			
		||||
#define _onnx_AttributeProto_AttributeType_MIN onnx_AttributeProto_AttributeType_UNDEFINED
 | 
			
		||||
#define _onnx_AttributeProto_AttributeType_MAX onnx_AttributeProto_AttributeType_GRAPHS
 | 
			
		||||
#define _onnx_AttributeProto_AttributeType_ARRAYSIZE ((onnx_AttributeProto_AttributeType)(onnx_AttributeProto_AttributeType_GRAPHS+1))
 | 
			
		||||
 | 
			
		||||
typedef enum _onnx_TensorProto_DataType {
 | 
			
		||||
    onnx_TensorProto_DataType_UNDEFINED = 0,
 | 
			
		||||
    onnx_TensorProto_DataType_FLOAT = 1,
 | 
			
		||||
@ -63,6 +82,7 @@ typedef struct _onnx_NodeProto {
 | 
			
		||||
    pb_callback_t op_type;
 | 
			
		||||
    pb_callback_t attribute;
 | 
			
		||||
    pb_callback_t doc_string;
 | 
			
		||||
    pb_callback_t domain;
 | 
			
		||||
/* @@protoc_insertion_point(struct:onnx_NodeProto) */
 | 
			
		||||
} onnx_NodeProto;
 | 
			
		||||
 | 
			
		||||
@ -91,6 +111,8 @@ typedef struct _onnx_AttributeProto {
 | 
			
		||||
    pb_callback_t strings;
 | 
			
		||||
    pb_callback_t tensors;
 | 
			
		||||
    pb_callback_t graphs;
 | 
			
		||||
    bool has_type;
 | 
			
		||||
    onnx_AttributeProto_AttributeType type;
 | 
			
		||||
/* @@protoc_insertion_point(struct:onnx_AttributeProto) */
 | 
			
		||||
} onnx_AttributeProto;
 | 
			
		||||
 | 
			
		||||
@ -104,9 +126,17 @@ typedef struct _onnx_ModelProto {
 | 
			
		||||
    int64_t model_version;
 | 
			
		||||
    pb_callback_t doc_string;
 | 
			
		||||
    pb_callback_t graph;
 | 
			
		||||
    pb_callback_t opset_import;
 | 
			
		||||
/* @@protoc_insertion_point(struct:onnx_ModelProto) */
 | 
			
		||||
} onnx_ModelProto;
 | 
			
		||||
 | 
			
		||||
typedef struct _onnx_OperatorSetIdProto {
 | 
			
		||||
    pb_callback_t domain;
 | 
			
		||||
    bool has_version;
 | 
			
		||||
    int64_t version;
 | 
			
		||||
/* @@protoc_insertion_point(struct:onnx_OperatorSetIdProto) */
 | 
			
		||||
} onnx_OperatorSetIdProto;
 | 
			
		||||
 | 
			
		||||
typedef struct _onnx_TensorProto_Segment {
 | 
			
		||||
    bool has_begin;
 | 
			
		||||
    int64_t begin;
 | 
			
		||||
@ -173,10 +203,10 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
/* Default values for struct fields */
 | 
			
		||||
 | 
			
		||||
/* Initializer values for message structs */
 | 
			
		||||
#define onnx_AttributeProto_init_default         {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_AttributeProto_init_default         {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0}
 | 
			
		||||
#define onnx_ValueInfoProto_init_default         {{{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_NodeProto_init_default              {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_ModelProto_init_default             {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_NodeProto_init_default              {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_ModelProto_init_default             {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_GraphProto_init_default             {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TensorProto_init_default            {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_default, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TensorProto_Segment_init_default    {false, 0, false, 0}
 | 
			
		||||
@ -186,10 +216,11 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
#define onnx_TypeProto_TensorShapeProto_Dimension_init_default {false, 0, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TypeProto_TensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TypeProto_SparseTensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_default}
 | 
			
		||||
#define onnx_AttributeProto_init_zero            {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_OperatorSetIdProto_init_default     {{{NULL}, NULL}, false, 0}
 | 
			
		||||
#define onnx_AttributeProto_init_zero            {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0}
 | 
			
		||||
#define onnx_ValueInfoProto_init_zero            {{{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_NodeProto_init_zero                 {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_ModelProto_init_zero                {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_NodeProto_init_zero                 {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_ModelProto_init_zero                {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_GraphProto_init_zero                {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TensorProto_init_zero               {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_zero, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TensorProto_Segment_init_zero       {false, 0, false, 0}
 | 
			
		||||
@ -199,6 +230,7 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
#define onnx_TypeProto_TensorShapeProto_Dimension_init_zero {false, 0, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TypeProto_TensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}}
 | 
			
		||||
#define onnx_TypeProto_SparseTensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_zero}
 | 
			
		||||
#define onnx_OperatorSetIdProto_init_zero        {{{NULL}, NULL}, false, 0}
 | 
			
		||||
 | 
			
		||||
/* Field tags (for use in manual encoding/decoding) */
 | 
			
		||||
#define onnx_GraphProto_node_tag                 1
 | 
			
		||||
@ -212,12 +244,14 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
#define onnx_NodeProto_output_tag                2
 | 
			
		||||
#define onnx_NodeProto_name_tag                  3
 | 
			
		||||
#define onnx_NodeProto_op_type_tag               4
 | 
			
		||||
#define onnx_NodeProto_domain_tag                7
 | 
			
		||||
#define onnx_NodeProto_attribute_tag             5
 | 
			
		||||
#define onnx_NodeProto_doc_string_tag            6
 | 
			
		||||
#define onnx_TypeProto_TensorShapeProto_dim_tag  1
 | 
			
		||||
#define onnx_ValueInfoProto_name_tag             1
 | 
			
		||||
#define onnx_ValueInfoProto_type_tag             2
 | 
			
		||||
#define onnx_AttributeProto_name_tag             1
 | 
			
		||||
#define onnx_AttributeProto_type_tag             20
 | 
			
		||||
#define onnx_AttributeProto_f_tag                2
 | 
			
		||||
#define onnx_AttributeProto_i_tag                3
 | 
			
		||||
#define onnx_AttributeProto_s_tag                4
 | 
			
		||||
@ -229,12 +263,15 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
#define onnx_AttributeProto_tensors_tag          10
 | 
			
		||||
#define onnx_AttributeProto_graphs_tag           11
 | 
			
		||||
#define onnx_ModelProto_ir_version_tag           1
 | 
			
		||||
#define onnx_ModelProto_opset_import_tag         8
 | 
			
		||||
#define onnx_ModelProto_producer_name_tag        2
 | 
			
		||||
#define onnx_ModelProto_producer_version_tag     3
 | 
			
		||||
#define onnx_ModelProto_domain_tag               4
 | 
			
		||||
#define onnx_ModelProto_model_version_tag        5
 | 
			
		||||
#define onnx_ModelProto_doc_string_tag           6
 | 
			
		||||
#define onnx_ModelProto_graph_tag                7
 | 
			
		||||
#define onnx_OperatorSetIdProto_domain_tag       1
 | 
			
		||||
#define onnx_OperatorSetIdProto_version_tag      2
 | 
			
		||||
#define onnx_TensorProto_Segment_begin_tag       1
 | 
			
		||||
#define onnx_TensorProto_Segment_end_tag         2
 | 
			
		||||
#define onnx_TypeProto_SparseTensorTypeProto_elem_type_tag 1
 | 
			
		||||
@ -261,10 +298,10 @@ typedef struct _onnx_SparseTensorProto {
 | 
			
		||||
#define onnx_SparseTensorProto_values_tag        3
 | 
			
		||||
 | 
			
		||||
/* Struct field encoding specification for nanopb */
 | 
			
		||||
extern const pb_field_t onnx_AttributeProto_fields[12];
 | 
			
		||||
extern const pb_field_t onnx_AttributeProto_fields[13];
 | 
			
		||||
extern const pb_field_t onnx_ValueInfoProto_fields[3];
 | 
			
		||||
extern const pb_field_t onnx_NodeProto_fields[7];
 | 
			
		||||
extern const pb_field_t onnx_ModelProto_fields[8];
 | 
			
		||||
extern const pb_field_t onnx_NodeProto_fields[8];
 | 
			
		||||
extern const pb_field_t onnx_ModelProto_fields[9];
 | 
			
		||||
extern const pb_field_t onnx_GraphProto_fields[8];
 | 
			
		||||
extern const pb_field_t onnx_TensorProto_fields[12];
 | 
			
		||||
extern const pb_field_t onnx_TensorProto_Segment_fields[3];
 | 
			
		||||
@ -274,6 +311,7 @@ extern const pb_field_t onnx_TypeProto_TensorShapeProto_fields[2];
 | 
			
		||||
extern const pb_field_t onnx_TypeProto_TensorShapeProto_Dimension_fields[3];
 | 
			
		||||
extern const pb_field_t onnx_TypeProto_TensorTypeProto_fields[3];
 | 
			
		||||
extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3];
 | 
			
		||||
extern const pb_field_t onnx_OperatorSetIdProto_fields[3];
 | 
			
		||||
 | 
			
		||||
/* Maximum encoded size of messages (where known) */
 | 
			
		||||
/* onnx_AttributeProto_size depends on runtime parameters */
 | 
			
		||||
@ -289,6 +327,7 @@ extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3];
 | 
			
		||||
/* onnx_TypeProto_TensorShapeProto_Dimension_size depends on runtime parameters */
 | 
			
		||||
/* onnx_TypeProto_TensorTypeProto_size depends on runtime parameters */
 | 
			
		||||
#define onnx_TypeProto_SparseTensorTypeProto_size (8 + onnx_TypeProto_TensorShapeProto_size)
 | 
			
		||||
/* onnx_OperatorSetIdProto_size depends on runtime parameters */
 | 
			
		||||
 | 
			
		||||
/* Message IDs (where set with "msgid" option) */
 | 
			
		||||
#ifdef PB_MSGID
 | 
			
		||||
 | 
			
		||||
@ -14,6 +14,7 @@ import ctypes
 | 
			
		||||
import os
 | 
			
		||||
import torch
 | 
			
		||||
import traceback
 | 
			
		||||
import warnings
 | 
			
		||||
from torch._six import raise_from
 | 
			
		||||
from multiprocessing.util import register_after_fork as _register_after_fork
 | 
			
		||||
 | 
			
		||||
@ -65,11 +66,37 @@ http://www.nvidia.com/Download/index.aspx""")
 | 
			
		||||
The NVIDIA driver on your system is too old (found version {}).
 | 
			
		||||
Please update your GPU driver by downloading and installing a new
 | 
			
		||||
version from the URL: http://www.nvidia.com/Download/index.aspx
 | 
			
		||||
Alternatively, go to: https://pytorch.org/binaries to install
 | 
			
		||||
Alternatively, go to: http://pytorch.org to install
 | 
			
		||||
a PyTorch version that has been compiled with your version
 | 
			
		||||
of the CUDA driver.""".format(str(torch._C._cuda_getDriverVersion())))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _check_capability():
 | 
			
		||||
    incorrect_binary_warn = """
 | 
			
		||||
    Found GPU%d %s which requires CUDA_VERSION >= %d for
 | 
			
		||||
     optimal performance and fast startup time, but your PyTorch was compiled
 | 
			
		||||
     with CUDA_VERSION %d. Please install the correct PyTorch binary
 | 
			
		||||
     using instructions from http://pytorch.org
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    old_gpu_warn = """
 | 
			
		||||
    Found GPU%d %s which is of cuda capability %d.%d.
 | 
			
		||||
    PyTorch no longer supports this GPU because it is too old.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    CUDA_VERSION = torch._C._cuda_getCompiledVersion()
 | 
			
		||||
    for d in range(device_count()):
 | 
			
		||||
        capability = get_device_capability(d)
 | 
			
		||||
        major = capability[0]
 | 
			
		||||
        name = get_device_name(d)
 | 
			
		||||
        if CUDA_VERSION < 8000 and major >= 6:
 | 
			
		||||
            warnings.warn(incorrect_binary_warn % (d, name, 8000, CUDA_VERSION))
 | 
			
		||||
        elif CUDA_VERSION < 9000 and major >= 7:
 | 
			
		||||
            warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION))
 | 
			
		||||
        elif capability == (3, 0) or capability == (5, 0) or major < 3:
 | 
			
		||||
            warnings.warn(old_gpu_warn % (d, name, major, capability[1]))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _lazy_call(callable):
 | 
			
		||||
    if _initialized:
 | 
			
		||||
        callable()
 | 
			
		||||
@ -77,11 +104,26 @@ def _lazy_call(callable):
 | 
			
		||||
        # Don't store the actual traceback to avoid memory cycle
 | 
			
		||||
        _queued_calls.append((callable, traceback.format_stack()))
 | 
			
		||||
 | 
			
		||||
_lazy_call(_check_capability)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DeferredCudaCallError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def init():
 | 
			
		||||
    """Initialize PyTorch's CUDA state.  You may need to call
 | 
			
		||||
    this explicitly if you are interacting with PyTorch via
 | 
			
		||||
    its C API, as Python bindings for CUDA functionality will not
 | 
			
		||||
    be until this initialization takes place.  Ordinary users
 | 
			
		||||
    should not need this, as all of PyTorch's CUDA methods
 | 
			
		||||
    automatically initialize CUDA state on-demand.
 | 
			
		||||
 | 
			
		||||
    Does nothing if the CUDA state is already initialized.
 | 
			
		||||
    """
 | 
			
		||||
    _lazy_init()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _lazy_init():
 | 
			
		||||
    global _initialized, _cudart, _original_pid, _queued_calls
 | 
			
		||||
    if _initialized:
 | 
			
		||||
@ -162,10 +204,10 @@ class device(object):
 | 
			
		||||
    def __enter__(self):
 | 
			
		||||
        if self.idx is -1:
 | 
			
		||||
            return
 | 
			
		||||
        _lazy_init()
 | 
			
		||||
        self.prev_idx = torch._C._cuda_getDevice()
 | 
			
		||||
        if self.prev_idx != self.idx:
 | 
			
		||||
            torch._C._cuda_setDevice(self.idx)
 | 
			
		||||
        _lazy_init()
 | 
			
		||||
 | 
			
		||||
    def __exit__(self, *args):
 | 
			
		||||
        if self.prev_idx != self.idx:
 | 
			
		||||
@ -213,6 +255,19 @@ def get_device_name(device):
 | 
			
		||||
        return torch._C._cuda_getDeviceName(device)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_device_capability(device):
 | 
			
		||||
    """Gets the cuda capability of a device.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        device (int): device for which to return the name. This function is a
 | 
			
		||||
            no-op if this argument is negative.
 | 
			
		||||
    Returns:
 | 
			
		||||
        tuple(int, int): the major and minor cuda capability of the device
 | 
			
		||||
    """
 | 
			
		||||
    if device >= 0:
 | 
			
		||||
        return torch._C._cuda_getDeviceCapability(device)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@contextlib.contextmanager
 | 
			
		||||
def stream(stream):
 | 
			
		||||
    """Context-manager that selects a given stream.
 | 
			
		||||
@ -223,6 +278,10 @@ def stream(stream):
 | 
			
		||||
    Arguments:
 | 
			
		||||
        stream (Stream): selected stream. This manager is a no-op if it's
 | 
			
		||||
            ``None``.
 | 
			
		||||
 | 
			
		||||
    .. note:: Streams are per-device, and this function changes the "current
 | 
			
		||||
       stream" only for the currently selected device.  It is illegal to select
 | 
			
		||||
       a stream that belongs to a different device.
 | 
			
		||||
    """
 | 
			
		||||
    if stream is None:
 | 
			
		||||
        yield
 | 
			
		||||
@ -238,7 +297,6 @@ def stream(stream):
 | 
			
		||||
def device_count():
 | 
			
		||||
    """Returns the number of GPUs available."""
 | 
			
		||||
    if is_available():
 | 
			
		||||
        _lazy_init()
 | 
			
		||||
        return torch._C._cuda_getDeviceCount()
 | 
			
		||||
    else:
 | 
			
		||||
        return 0
 | 
			
		||||
@ -264,9 +322,18 @@ def current_stream():
 | 
			
		||||
 | 
			
		||||
def current_blas_handle():
 | 
			
		||||
    """Returns cublasHandle_t pointer to current cuBLAS handle"""
 | 
			
		||||
    _lazy_init()
 | 
			
		||||
    return torch._C._cuda_getCurrentBlasHandle()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def empty_cache():
 | 
			
		||||
    """Releases all unoccupied cached memory currently held by the caching
 | 
			
		||||
    allocator so that those can be used in other GPU application and visible in
 | 
			
		||||
    `nvidia-smi`."""
 | 
			
		||||
    if _initialized:
 | 
			
		||||
        return torch._C._cuda_emptyCache()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _host_allocator():
 | 
			
		||||
    _lazy_init()
 | 
			
		||||
    return torch._C._cuda_cudaHostAllocator()
 | 
			
		||||
 | 
			
		||||
@ -6,6 +6,10 @@ from . import cudart, check_error, cudaStatus
 | 
			
		||||
class Stream(torch._C._CudaStreamBase):
 | 
			
		||||
    """Wrapper around a CUDA stream.
 | 
			
		||||
 | 
			
		||||
    A CUDA stream is a linear sequence of execution that belongs to a specific
 | 
			
		||||
    device, independent from other streams.  See :ref:`cuda-semantics` for
 | 
			
		||||
    details.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        device(int, optional): a device on which to allocate the Stream.
 | 
			
		||||
        priority(int, optional): priority of the stream. Lower numbers
 | 
			
		||||
@ -21,6 +25,15 @@ class Stream(torch._C._CudaStreamBase):
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            event (Event): an event to wait for.
 | 
			
		||||
 | 
			
		||||
        .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see `CUDA
 | 
			
		||||
           documentation`_ for more info.
 | 
			
		||||
 | 
			
		||||
           This function returns without waiting for :attr:`event`: only future
 | 
			
		||||
           operations are affected.
 | 
			
		||||
 | 
			
		||||
        .. _CUDA documentation:
 | 
			
		||||
           http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
 | 
			
		||||
        """
 | 
			
		||||
        check_error(cudart().cudaStreamWaitEvent(self, event, ctypes.c_int(0)))
 | 
			
		||||
 | 
			
		||||
@ -32,6 +45,9 @@ class Stream(torch._C._CudaStreamBase):
 | 
			
		||||
 | 
			
		||||
        Arguments:
 | 
			
		||||
            stream (Stream): a stream to synchronize.
 | 
			
		||||
 | 
			
		||||
        .. note:: This function returns without waiting for currently enqueued
 | 
			
		||||
           kernels in :attr:`stream`: only future operations are affected.
 | 
			
		||||
        """
 | 
			
		||||
        self.wait_event(stream.record_event())
 | 
			
		||||
 | 
			
		||||
@ -63,7 +79,14 @@ class Stream(torch._C._CudaStreamBase):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def synchronize(self):
 | 
			
		||||
        """Wait for all the kernels in this stream to complete."""
 | 
			
		||||
        """Wait for all the kernels in this stream to complete.
 | 
			
		||||
 | 
			
		||||
        .. note:: This is a wrapper around ``cudaStreamSynchronize()``: see
 | 
			
		||||
           `CUDA documentation`_ for more info.
 | 
			
		||||
 | 
			
		||||
        .. _CUDA documentation:
 | 
			
		||||
           http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
 | 
			
		||||
        """
 | 
			
		||||
        check_error(cudart().cudaStreamSynchronize(self))
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
@ -107,10 +130,10 @@ class Event(object):
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        enable_timing (bool): indicates if the event should measure time
 | 
			
		||||
            (default: False)
 | 
			
		||||
        blocking (bool): if true, :meth:`wait` will be blocking (default: False)
 | 
			
		||||
        interprocess (bool): if true, the event can be shared between processes
 | 
			
		||||
            (default: False)
 | 
			
		||||
            (default: ``False``)
 | 
			
		||||
        blocking (bool): if ``True``, :meth:`wait` will be blocking (default: ``False``)
 | 
			
		||||
        interprocess (bool): if ``True``, the event can be shared between processes
 | 
			
		||||
            (default: ``False``)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    DEFAULT = 0x0
 | 
			
		||||
 | 
			
		||||
@ -1,17 +1,32 @@
 | 
			
		||||
"""
 | 
			
		||||
r"""
 | 
			
		||||
The ``distributions`` package contains parameterizable probability distributions
 | 
			
		||||
and sampling functions.
 | 
			
		||||
 | 
			
		||||
The :meth:`log_prob` method is useful for policy gradient based methods. If the
 | 
			
		||||
parameters of the distribution are differentiable, then the result of ``log_prob``
 | 
			
		||||
is also differentiable.
 | 
			
		||||
Policy gradient methods can be implemented using the
 | 
			
		||||
:meth:`~torch.distributions.Distribution.log_prob` method, when the probability
 | 
			
		||||
density function is differentiable with respect to its parameters. A basic
 | 
			
		||||
method is the REINFORCE rule:
 | 
			
		||||
 | 
			
		||||
Example::
 | 
			
		||||
.. math::
 | 
			
		||||
 | 
			
		||||
    probs = network(input)
 | 
			
		||||
    m = Multinomial(probs)
 | 
			
		||||
    \Delta\theta  = \alpha r \frac{\partial\log p(a|\pi^\theta(s))}{\partial\theta}
 | 
			
		||||
 | 
			
		||||
where :math:`\theta` are the parameters, :math:`\alpha` is the learning rate,
 | 
			
		||||
:math:`r` is the reward and :math:`p(a|\pi^\theta(s))` is the probability of
 | 
			
		||||
taking action :math:`a` in state :math:`s` given policy :math:`\pi^\theta`.
 | 
			
		||||
 | 
			
		||||
In practice we would sample an action from the output of a network, apply this
 | 
			
		||||
action in an environment, and then use ``log_prob`` to construct an equivalent
 | 
			
		||||
loss function. Note that we use a negative because optimisers use gradient
 | 
			
		||||
descent, whilst the rule above assumes gradient ascent. With a categorical
 | 
			
		||||
policy, the code for implementing REINFORCE would be as follows::
 | 
			
		||||
 | 
			
		||||
    probs = policy_network(state)
 | 
			
		||||
    # NOTE: this is equivalent to what used to be called multinomial
 | 
			
		||||
    m = Categorical(probs)
 | 
			
		||||
    action = m.sample()
 | 
			
		||||
    loss = -m.log_prob(action) * get_reward(env, action)
 | 
			
		||||
    next_state, reward = env.step(action)
 | 
			
		||||
    loss = -m.log_prob(action) * reward
 | 
			
		||||
    loss.backward()
 | 
			
		||||
"""
 | 
			
		||||
import math
 | 
			
		||||
@ -19,7 +34,7 @@ from numbers import Number
 | 
			
		||||
import torch
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__all__ = ['Distribution', 'Bernoulli', 'Multinomial', 'Normal']
 | 
			
		||||
__all__ = ['Distribution', 'Bernoulli', 'Categorical', 'Normal']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Distribution(object):
 | 
			
		||||
@ -87,9 +102,12 @@ class Bernoulli(Distribution):
 | 
			
		||||
        return log_pmf.gather(0, value.unsqueeze(0).long()).squeeze(0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Multinomial(Distribution):
 | 
			
		||||
class Categorical(Distribution):
 | 
			
		||||
    r"""
 | 
			
		||||
    Creates a multinomial distribution parameterized by `probs`.
 | 
			
		||||
    Creates a categorical distribution parameterized by `probs`.
 | 
			
		||||
 | 
			
		||||
    .. note::
 | 
			
		||||
        It is equivalent to the distribution that ``multinomial()`` samples from.
 | 
			
		||||
 | 
			
		||||
    Samples are integers from `0 ... K-1` where `K` is probs.size(-1).
 | 
			
		||||
 | 
			
		||||
@ -102,7 +120,7 @@ class Multinomial(Distribution):
 | 
			
		||||
 | 
			
		||||
    Example::
 | 
			
		||||
 | 
			
		||||
        >>> m = Multinomial(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ]))
 | 
			
		||||
        >>> m = Categorical(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ]))
 | 
			
		||||
        >>> m.sample()  # equal probability of 0, 1, 2, 3
 | 
			
		||||
         3
 | 
			
		||||
        [torch.LongTensor of size 1]
 | 
			
		||||
 | 
			
		||||
@ -9,15 +9,15 @@ __all__ = [
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def split(tensor, split_size, dim=0):
 | 
			
		||||
    """Splits the tensor into equally sized chunks (if possible).
 | 
			
		||||
    """Splits the tensor into chunks all of size :attr:`split_size` (if possible).
 | 
			
		||||
 | 
			
		||||
    Last chunk will be smaller if the tensor size along a given dimension
 | 
			
		||||
    is not divisible by ``split_size``.
 | 
			
		||||
    is not divisible by :attr`split_size`.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        tensor (Tensor): tensor to split.
 | 
			
		||||
        split_size (int): size of a single chunk.
 | 
			
		||||
        dim (int): dimension along which to split the tensor.
 | 
			
		||||
        tensor (Tensor): the tensor to split
 | 
			
		||||
        split_size (int): size of a single chunk
 | 
			
		||||
        dim (int): dimension along which to split the tensor
 | 
			
		||||
    """
 | 
			
		||||
    if dim < 0:
 | 
			
		||||
        dim += tensor.dim()
 | 
			
		||||
@ -32,12 +32,12 @@ def split(tensor, split_size, dim=0):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def chunk(tensor, chunks, dim=0):
 | 
			
		||||
    """Splits a tensor into a number of chunks along a given dimension.
 | 
			
		||||
    """Splits a tensor into a specific number of chunks.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        tensor (Tensor): tensor to split.
 | 
			
		||||
        chunks (int): number of chunks to return.
 | 
			
		||||
        dim (int): dimension along which to split the tensor.
 | 
			
		||||
        tensor (Tensor): the tensor to split
 | 
			
		||||
        chunks (int): number of chunks to return
 | 
			
		||||
        dim (int): dimension along which to split the tensor
 | 
			
		||||
    """
 | 
			
		||||
    if dim < 0:
 | 
			
		||||
        dim += tensor.dim()
 | 
			
		||||
@ -51,9 +51,9 @@ def stack(sequence, dim=0, out=None):
 | 
			
		||||
    All tensors need to be of the same size.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        sequence (Sequence): sequence of tensors to concatenate.
 | 
			
		||||
        sequence (Sequence): sequence of tensors to concatenate
 | 
			
		||||
        dim (int): dimension to insert. Has to be between 0 and the number
 | 
			
		||||
            of dimensions of concatenated tensors (inclusive).
 | 
			
		||||
            of dimensions of concatenated tensors (inclusive)
 | 
			
		||||
    """
 | 
			
		||||
    if len(sequence) == 0:
 | 
			
		||||
        raise ValueError("stack expects a non-empty sequence of tensors")
 | 
			
		||||
@ -72,8 +72,8 @@ def unbind(tensor, dim=0):
 | 
			
		||||
    Returns a tuple of all slices along a given dimension, already without it.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        tensor (Tensor): tensor to unbind.
 | 
			
		||||
        dim (int): dimension to remove.
 | 
			
		||||
        tensor (Tensor): the tensor to unbind
 | 
			
		||||
        dim (int): dimension to remove
 | 
			
		||||
    """
 | 
			
		||||
    return tuple(tensor.select(dim, i) for i in _range(tensor.size(dim)))
 | 
			
		||||
 | 
			
		||||
@ -87,10 +87,10 @@ def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True):
 | 
			
		||||
      2: The U tensor.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        LU_data (Tensor): The packed LU factorization data.
 | 
			
		||||
        LU_pivots (Tensor): The packed LU factorization pivots.
 | 
			
		||||
        unpack_data (bool): Flag indicating if the data should be unpacked.
 | 
			
		||||
        unpack_pivots (bool): Flag indicating if the pivots should be unpacked.
 | 
			
		||||
        LU_data (Tensor): the packed LU factorization data
 | 
			
		||||
        LU_pivots (Tensor): the packed LU factorization pivots
 | 
			
		||||
        unpack_data (bool): flag indicating if the data should be unpacked
 | 
			
		||||
        unpack_pivots (bool): tlag indicating if the pivots should be unpacked
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    nBatch, sz, _ = LU_data.size()
 | 
			
		||||
@ -122,7 +122,7 @@ def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def matmul(tensor1, tensor2, out=None):
 | 
			
		||||
    """Matrix product of two tensors.
 | 
			
		||||
    r"""Matrix product of two tensors.
 | 
			
		||||
 | 
			
		||||
    The behavior depends on the dimensionality of the tensors as follows:
 | 
			
		||||
 | 
			
		||||
@ -139,17 +139,18 @@ def matmul(tensor1, tensor2, out=None):
 | 
			
		||||
      batched matrix multiply and removed after.  If the second argument is 1-dimensional, a
 | 
			
		||||
      1 is appended to its dimension for the purpose of the batched matrix multiple and removed after.
 | 
			
		||||
      The non-matrix (i.e. batch) dimensions are :ref:`broadcasted <broadcasting-semantics>` (and thus
 | 
			
		||||
      must be broadcastable).  For example, if :attr:`tensor1` is a `j x 1 x n x m` Tensor
 | 
			
		||||
      and :attr:`tensor2` is a `k x m x p` Tensor, :attr:`out` will be an `j x k x n x p` Tensor.
 | 
			
		||||
      must be broadcastable).  For example, if :attr:`tensor1` is a
 | 
			
		||||
      :math:`(j \times 1 \times n \times m)` tensor and :attr:`tensor2` is a :math:`(k \times m \times p)`
 | 
			
		||||
      tensor, :attr:`out` will be an :math:`(j \times k \times n \times p)` tensor.
 | 
			
		||||
 | 
			
		||||
    .. note::
 | 
			
		||||
 | 
			
		||||
        The 1-dimensional dot product version of this function does not support an :attr:`out` parameter.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        tensor1 (Tensor): First tensor to be multiplied
 | 
			
		||||
        tensor2 (Tensor): Second tensor to be multiplied
 | 
			
		||||
        out (Tensor, optional): Output tensor
 | 
			
		||||
        tensor1 (Tensor): the first tensor to be multiplied
 | 
			
		||||
        tensor2 (Tensor): the second tensor to be multiplied
 | 
			
		||||
        out (Tensor, optional): the output tensor
 | 
			
		||||
    """
 | 
			
		||||
    dim_tensor1 = tensor1.dim()
 | 
			
		||||
    dim_tensor2 = tensor2.dim()
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,30 @@ HOLE = Placeholder("HOLE")
 | 
			
		||||
VOLATILE = Placeholder("VOLATILE")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# This global variable is set when we are tracing a *forwards* computation.
 | 
			
		||||
# It is intended to be a cheap way to test if tracing has occurred, before
 | 
			
		||||
# doing the slower path using `get_tracing_state` (below.)
 | 
			
		||||
_tracing = False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_tracing_state(args):
 | 
			
		||||
    if not torch._C._is_tracing(args):
 | 
			
		||||
        return None
 | 
			
		||||
    return torch._C._get_tracing_state(args)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@contextlib.contextmanager
 | 
			
		||||
def scope(scope_name, *vars):
 | 
			
		||||
    tracing_state = get_tracing_state(vars)
 | 
			
		||||
    if tracing_state:
 | 
			
		||||
        tracing_state.push_scope(scope_name)
 | 
			
		||||
    try:
 | 
			
		||||
        yield
 | 
			
		||||
    finally:
 | 
			
		||||
        if tracing_state:
 | 
			
		||||
            tracing_state.pop_scope()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def compile(arg=None, **kwargs):
 | 
			
		||||
    """
 | 
			
		||||
    Decorator which marks a function or module class as eligible for
 | 
			
		||||
@ -69,10 +93,10 @@ def compile(arg=None, **kwargs):
 | 
			
		||||
            (as we always wait to see all derivatives before compiling.)
 | 
			
		||||
            Default: 1 (i.e., we will compile forwards and backwards, but not
 | 
			
		||||
            double-backwards).
 | 
			
		||||
        optimize (bool, optional): whether or not to apply optimizations.  Default: True.
 | 
			
		||||
        optimize (bool, optional): whether or not to apply optimizations.  Default: ``True``.
 | 
			
		||||
 | 
			
		||||
    Debug arguments:
 | 
			
		||||
        time (bool, optional): if True, whenever we execute the model in question, we
 | 
			
		||||
        time (bool, optional): if ``True``, whenever we execute the model in question, we
 | 
			
		||||
            will also print out some timing information for how long the model
 | 
			
		||||
            took to execute.  At the moment, there are three types of timings we
 | 
			
		||||
            emit:
 | 
			
		||||
@ -87,10 +111,10 @@ def compile(arg=None, **kwargs):
 | 
			
		||||
                - optimized: the time it took to execute the optimized model.
 | 
			
		||||
 | 
			
		||||
            At the moment, all of these timings are for the forward pass only.
 | 
			
		||||
            Default: False.
 | 
			
		||||
        enabled (bool, optional): if False, compilation is disabled and you
 | 
			
		||||
            Default: ``False``.
 | 
			
		||||
        enabled (bool, optional): if ``False``, compilation is disabled and you
 | 
			
		||||
            will get back your original model.  This is a convenient way to
 | 
			
		||||
            disable tracing without having to delete the annotation. Default: True.
 | 
			
		||||
            disable tracing without having to delete the annotation. Default: ``True``.
 | 
			
		||||
 | 
			
		||||
    Example: Compile as class decorator.
 | 
			
		||||
 | 
			
		||||
@ -227,6 +251,8 @@ class TracedModule(Module):
 | 
			
		||||
        self.nderivs = nderivs
 | 
			
		||||
 | 
			
		||||
    def forward(self, *args, **kwargs):
 | 
			
		||||
        global _tracing
 | 
			
		||||
 | 
			
		||||
        # TODO: Possible optimization: use the unflattened
 | 
			
		||||
        # output so we don't unflatten it when we get out
 | 
			
		||||
        # NB: Not a method because _raw_trace can't deal
 | 
			
		||||
@ -238,7 +264,9 @@ class TracedModule(Module):
 | 
			
		||||
        kw_items = list(kwargs.items())
 | 
			
		||||
        kw_items.sort()
 | 
			
		||||
        in_vars, in_struct = _flatten((args, tuple(kw_items)), self.state_dict(keep_vars=True).values())
 | 
			
		||||
        _tracing = True
 | 
			
		||||
        trace, (out_vars, out_struct) = traced_inner(in_vars, in_struct)
 | 
			
		||||
        _tracing = False
 | 
			
		||||
        out, unmatched = _unflatten(out_vars, out_struct)
 | 
			
		||||
        assert len(unmatched) == 0
 | 
			
		||||
        return trace, out
 | 
			
		||||
@ -396,6 +424,10 @@ class _CompiledMixin(object):
 | 
			
		||||
        # TODO: Figure out how to call parent destructor, if there is one.
 | 
			
		||||
        # Apparently, this is buggy:
 | 
			
		||||
        #     https://stackoverflow.com/questions/22972720/python-cant-invoke-parent-class-destructor-with-super
 | 
			
		||||
        # NB: Have to mangle this by hand!
 | 
			
		||||
        if not (hasattr(self, '_CompiledMixin__misses') and hasattr(self, '_CompiledMixin___hits')):
 | 
			
		||||
            # Probably died during construction
 | 
			
		||||
            return
 | 
			
		||||
        if self.__misses != 0 and self.__hits == 0:
 | 
			
		||||
            warnings.warn("{} was marked with JIT and invoked {} times, "
 | 
			
		||||
                          "but we never successfully used compiled code."
 | 
			
		||||
 | 
			
		||||
@ -18,18 +18,22 @@ class DistKLDivCriterion(Criterion):
 | 
			
		||||
            input,
 | 
			
		||||
            target,
 | 
			
		||||
            self.output_tensor,
 | 
			
		||||
            self.sizeAverage
 | 
			
		||||
            self.sizeAverage,
 | 
			
		||||
            True,  # reduce
 | 
			
		||||
        )
 | 
			
		||||
        self.output = self.output_tensor[0]
 | 
			
		||||
        return self.output
 | 
			
		||||
 | 
			
		||||
    def updateGradInput(self, input, target):
 | 
			
		||||
        assert input.is_same_size(target)
 | 
			
		||||
        implicit_gradOutput = torch.ones(1).type_as(input)
 | 
			
		||||
        self._backend.DistKLDivCriterion_updateGradInput(
 | 
			
		||||
            self._backend.library_state,
 | 
			
		||||
            input,
 | 
			
		||||
            target,
 | 
			
		||||
            implicit_gradOutput,
 | 
			
		||||
            self.gradInput,
 | 
			
		||||
            self.sizeAverage
 | 
			
		||||
            self.sizeAverage,
 | 
			
		||||
            True,  # reduce
 | 
			
		||||
        )
 | 
			
		||||
        return self.gradInput
 | 
			
		||||
 | 
			
		||||
@ -29,7 +29,6 @@ class ELU(Module):
 | 
			
		||||
    def updateGradInput(self, input, gradOutput):
 | 
			
		||||
        self._backend.ELU_updateGradInput(
 | 
			
		||||
            self._backend.library_state,
 | 
			
		||||
            input,
 | 
			
		||||
            gradOutput,
 | 
			
		||||
            self.gradInput,
 | 
			
		||||
            self.output,
 | 
			
		||||
 | 
			
		||||
@ -20,14 +20,14 @@ class Padding(Module):
 | 
			
		||||
        super(Padding, self).__init__()
 | 
			
		||||
 | 
			
		||||
    def updateOutput(self, input):
 | 
			
		||||
        outputSize = list(input.size())
 | 
			
		||||
        outputSize[self.dim] += abs(self.pad)
 | 
			
		||||
        self.outputSize = torch.Size(outputSize)
 | 
			
		||||
        dim = self.dim
 | 
			
		||||
 | 
			
		||||
        if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim:
 | 
			
		||||
            dim = dim + 1
 | 
			
		||||
 | 
			
		||||
        outputSize = list(input.size())
 | 
			
		||||
        outputSize[dim] += abs(self.pad)
 | 
			
		||||
        self.outputSize = torch.Size(outputSize)
 | 
			
		||||
 | 
			
		||||
        self.output.resize_(self.outputSize)
 | 
			
		||||
        self.output.fill_(self.value)
 | 
			
		||||
        index = self.index
 | 
			
		||||
 | 
			
		||||
@ -66,6 +66,7 @@ IF ($ENV{TH_BINARY_BUILD})
 | 
			
		||||
  IF (UNIX AND NOT APPLE)
 | 
			
		||||
    # hiding statically linked library symbols, this flag is not available for the linker under MACOSX
 | 
			
		||||
    SET(CMAKE_CXX_FLAGS "-Wl,--exclude-libs,libstdc++.a ${CMAKE_CXX_FLAGS}")
 | 
			
		||||
    set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../../tools/pytorch.version")
 | 
			
		||||
  ENDIF(UNIX AND NOT APPLE)
 | 
			
		||||
ENDIF()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,8 +17,15 @@ public:
 | 
			
		||||
  Type & getType(Backend p, ScalarType s) {
 | 
			
		||||
    initCUDAIfNeeded(p);
 | 
			
		||||
    auto & type = type_registry[static_cast<int>(p)][static_cast<int>(s)];
 | 
			
		||||
    if(!type)
 | 
			
		||||
 | 
			
		||||
    if(!type) {
 | 
			
		||||
      // there is only a single Undefined Type.
 | 
			
		||||
      if (p == Backend::Undefined || s == ScalarType::Undefined) {
 | 
			
		||||
        auto & undef = type_registry[static_cast<int>(Backend::Undefined)][static_cast<int>(ScalarType::Undefined)];
 | 
			
		||||
        if (undef) return *undef;
 | 
			
		||||
      }
 | 
			
		||||
      runtime_error("%s%sType is not enabled.",toString(p),toString(s));
 | 
			
		||||
    }
 | 
			
		||||
    return *type;
 | 
			
		||||
  }
 | 
			
		||||
  Generator & defaultGenerator(Backend p) {
 | 
			
		||||
 | 
			
		||||
@ -13,28 +13,28 @@ static DLDataType getDLDataType(const Type& type) {
 | 
			
		||||
  dtype.bits = type.elementSizeInBytes() * 8;
 | 
			
		||||
  switch (type.scalarType()) {
 | 
			
		||||
    case ScalarType::Byte:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kUInt;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLUInt;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Char:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kInt;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLInt;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Double:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kFloat;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLFloat;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Float:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kFloat;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLFloat;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Int:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kInt;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLInt;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Long:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kInt;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLInt;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Short:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kInt;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLInt;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::Half:
 | 
			
		||||
      dtype.code = DLDataTypeCode::kFloat;
 | 
			
		||||
      dtype.code = DLDataTypeCode::kDLFloat;
 | 
			
		||||
      break;
 | 
			
		||||
    case ScalarType::NumOptions:
 | 
			
		||||
      throw std::logic_error("NumOptions is not a valid ScalarType");
 | 
			
		||||
@ -47,9 +47,9 @@ static DLContext getDLContext(const Type& type, const int64_t& device_id) {
 | 
			
		||||
  DLContext ctx;
 | 
			
		||||
  ctx.device_id = device_id;
 | 
			
		||||
  if (type.isCuda()) {
 | 
			
		||||
    ctx.device_type = DLDeviceType::kGPU;
 | 
			
		||||
    ctx.device_type = DLDeviceType::kDLGPU;
 | 
			
		||||
  } else {
 | 
			
		||||
    ctx.device_type = DLDeviceType::kCPU;
 | 
			
		||||
    ctx.device_type = DLDeviceType::kDLCPU;
 | 
			
		||||
  }
 | 
			
		||||
  return ctx;
 | 
			
		||||
}
 | 
			
		||||
@ -58,10 +58,10 @@ static DLContext getDLContext(const Type& type, const int64_t& device_id) {
 | 
			
		||||
static Backend getATenBackend(const DLContext& ctx) {
 | 
			
		||||
  Backend backend;
 | 
			
		||||
  switch (ctx.device_type) {
 | 
			
		||||
    case DLDeviceType::kCPU:
 | 
			
		||||
    case DLDeviceType::kDLCPU:
 | 
			
		||||
      backend = Backend::CPU;
 | 
			
		||||
      break;
 | 
			
		||||
    case DLDeviceType::kGPU:
 | 
			
		||||
    case DLDeviceType::kDLGPU:
 | 
			
		||||
      backend = Backend::CUDA;
 | 
			
		||||
      break;
 | 
			
		||||
    default:
 | 
			
		||||
@ -75,7 +75,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
 | 
			
		||||
  ScalarType stype;
 | 
			
		||||
  if (dtype.lanes != 1) throw std::logic_error("ATen does not support lanes != 1");
 | 
			
		||||
  switch (dtype.code) {
 | 
			
		||||
    case DLDataTypeCode::kUInt:
 | 
			
		||||
    case DLDataTypeCode::kDLUInt:
 | 
			
		||||
      switch (dtype.bits) {
 | 
			
		||||
        case 8:
 | 
			
		||||
          stype = ScalarType::Byte;
 | 
			
		||||
@ -84,7 +84,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
 | 
			
		||||
          throw std::logic_error("Unsupported kUInt bits " + std::to_string(dtype.bits));
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case DLDataTypeCode::kInt:
 | 
			
		||||
    case DLDataTypeCode::kDLInt:
 | 
			
		||||
      switch (dtype.bits) {
 | 
			
		||||
        case 8:
 | 
			
		||||
          stype = ScalarType::Char;
 | 
			
		||||
@ -102,7 +102,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
 | 
			
		||||
          throw std::logic_error("Unsupported kInt bits " + std::to_string(dtype.bits));
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case DLDataTypeCode::kFloat:
 | 
			
		||||
    case DLDataTypeCode::kDLFloat:
 | 
			
		||||
      switch (dtype.bits) {
 | 
			
		||||
        case 16:
 | 
			
		||||
          stype = ScalarType::Half;
 | 
			
		||||
@ -128,8 +128,8 @@ struct ATenDLMTensor {
 | 
			
		||||
  DLManagedTensor tensor;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void destructor(DLManagedTensor * arg) {
 | 
			
		||||
  delete static_cast<ATenDLMTensor*>(arg->ctx);
 | 
			
		||||
void deleter(DLManagedTensor * arg) {
 | 
			
		||||
  delete static_cast<ATenDLMTensor*>(arg->manager_ctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -138,33 +138,33 @@ void destructor(DLManagedTensor * arg) {
 | 
			
		||||
DLManagedTensor* toDLPack(const Tensor& src) {
 | 
			
		||||
  ATenDLMTensor * atDLMTensor(new ATenDLMTensor);
 | 
			
		||||
  atDLMTensor->handle = src;
 | 
			
		||||
  atDLMTensor->tensor.ctx = atDLMTensor;
 | 
			
		||||
  atDLMTensor->tensor.destructor = &destructor;
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.data = src.data_ptr();
 | 
			
		||||
  atDLMTensor->tensor.manager_ctx = atDLMTensor;
 | 
			
		||||
  atDLMTensor->tensor.deleter = &deleter;
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.data = src.data_ptr();
 | 
			
		||||
  int64_t device_id = 0;
 | 
			
		||||
  if (src.type().isCuda()) {
 | 
			
		||||
    device_id = src.get_device();
 | 
			
		||||
  }
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.ctx = getDLContext(src.type(), device_id);
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.ndim = src.dim();
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.dtype = getDLDataType(src.type());
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.shape = const_cast<int64_t*>(src.sizes().data());
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.strides = const_cast<int64_t*>(src.strides().data());
 | 
			
		||||
  atDLMTensor->tensor.dlTensor.byte_offset = 0;
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.ctx = getDLContext(src.type(), device_id);
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.ndim = src.dim();
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src.type());
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.shape = const_cast<int64_t*>(src.sizes().data());
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.strides = const_cast<int64_t*>(src.strides().data());
 | 
			
		||||
  atDLMTensor->tensor.dl_tensor.byte_offset = 0;
 | 
			
		||||
  return &(atDLMTensor->tensor);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Tensor fromDLPack(const DLManagedTensor* src) {
 | 
			
		||||
  Backend backend = getATenBackend(src->dlTensor.ctx);
 | 
			
		||||
  ScalarType stype = toScalarType(src->dlTensor.dtype);
 | 
			
		||||
  Backend backend = getATenBackend(src->dl_tensor.ctx);
 | 
			
		||||
  ScalarType stype = toScalarType(src->dl_tensor.dtype);
 | 
			
		||||
  auto deleter = [src](void * self) {
 | 
			
		||||
    src->destructor(const_cast<DLManagedTensor*>(src));
 | 
			
		||||
    src->deleter(const_cast<DLManagedTensor*>(src));
 | 
			
		||||
  };
 | 
			
		||||
  return getType(backend, stype).tensorFromBlob(
 | 
			
		||||
      src->dlTensor.data,
 | 
			
		||||
      IntList(src->dlTensor.shape, src->dlTensor.ndim),
 | 
			
		||||
      IntList(src->dlTensor.strides, src->dlTensor.ndim),
 | 
			
		||||
      src->dl_tensor.data,
 | 
			
		||||
      IntList(src->dl_tensor.shape, src->dl_tensor.ndim),
 | 
			
		||||
      IntList(src->dl_tensor.strides, src->dl_tensor.ndim),
 | 
			
		||||
      deleter);
 | 
			
		||||
}
 | 
			
		||||
} //namespace at
 | 
			
		||||
 | 
			
		||||
@ -579,13 +579,22 @@
 | 
			
		||||
    - CPU
 | 
			
		||||
    - CUDA
 | 
			
		||||
  return: argument 0
 | 
			
		||||
  arguments:
 | 
			
		||||
    - arg: THTensor* result
 | 
			
		||||
      output: True
 | 
			
		||||
    - accreal start
 | 
			
		||||
    - accreal end
 | 
			
		||||
    - arg: accreal step
 | 
			
		||||
      default: 1
 | 
			
		||||
  options:
 | 
			
		||||
    - cname: arange
 | 
			
		||||
      arguments:
 | 
			
		||||
        - arg: THTensor* result
 | 
			
		||||
          output: True
 | 
			
		||||
        - accreal start
 | 
			
		||||
        - accreal end
 | 
			
		||||
        - arg: accreal step
 | 
			
		||||
          default: 1
 | 
			
		||||
    - cname: arange
 | 
			
		||||
      arguments:
 | 
			
		||||
        - arg: THTensor* result
 | 
			
		||||
          output: True
 | 
			
		||||
        - CONSTANT 0
 | 
			
		||||
        - accreal end
 | 
			
		||||
        - CONSTANT 1
 | 
			
		||||
]]
 | 
			
		||||
[[
 | 
			
		||||
  name: scatter_
 | 
			
		||||
 | 
			
		||||
@ -1,10 +1,20 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include "ATen/Tensor.h"
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
 | 
			
		||||
namespace at {
 | 
			
		||||
 | 
			
		||||
// avoid copy-construction of Tensor by using a reference_wrapper.
 | 
			
		||||
inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) {
 | 
			
		||||
  for (auto& t : tensors) {
 | 
			
		||||
    if (!t.get().defined()) {
 | 
			
		||||
      runtime_error("%s(...) called with an undefined Tensor", api_name);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand) {
 | 
			
		||||
  if (tensor.sizes().equals(to_expand.sizes())) {
 | 
			
		||||
    return std::make_tuple(to_expand);
 | 
			
		||||
@ -13,6 +23,11 @@ inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_
 | 
			
		||||
  return std::make_tuple(to_expand.expand(tensor.sizes()));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand, const char *api_name) {
 | 
			
		||||
  check_defined({tensor, to_expand}, api_name);
 | 
			
		||||
  return expand_inplace(tensor, to_expand);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2) {
 | 
			
		||||
  if (tensor.sizes().equals(to_expand1.sizes()) && tensor.sizes().equals((to_expand2.sizes()))) {
 | 
			
		||||
    return std::make_tuple(to_expand1, to_expand2);
 | 
			
		||||
@ -21,6 +36,12 @@ inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Ten
 | 
			
		||||
  return std::make_tuple(to_expand1.expand(tensor.sizes()), to_expand2.expand(tensor.sizes()));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2,
 | 
			
		||||
                                                 const char *api_name) {
 | 
			
		||||
  check_defined({tensor, to_expand1, to_expand2}, api_name);
 | 
			
		||||
  return expand_inplace(tensor, to_expand1, to_expand2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::vector<int64_t> infer_size2(IntList a, IntList b) {
 | 
			
		||||
  auto dimsA = a.size();
 | 
			
		||||
  auto dimsB = b.size();
 | 
			
		||||
@ -55,9 +76,14 @@ inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, cons
 | 
			
		||||
  return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
 | 
			
		||||
                                                   const Tensor &to_expand2,
 | 
			
		||||
                                                   const Tensor &to_expand3) {
 | 
			
		||||
inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) {
 | 
			
		||||
  check_defined({to_expand1, to_expand2}, api_name);
 | 
			
		||||
  return expand_outplace(to_expand1, to_expand2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
 | 
			
		||||
                                                          const Tensor &to_expand2,
 | 
			
		||||
                                                          const Tensor &to_expand3) {
 | 
			
		||||
  if (to_expand1.sizes().equals(to_expand2.sizes()) && to_expand1.sizes().equals(to_expand3.sizes())) {
 | 
			
		||||
    return std::make_tuple(to_expand1, to_expand2, to_expand3);
 | 
			
		||||
  }
 | 
			
		||||
@ -67,6 +93,14 @@ std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
 | 
			
		||||
  return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size), to_expand3.expand(expanded_size));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1,
 | 
			
		||||
                                                          const Tensor &to_expand2,
 | 
			
		||||
                                                          const Tensor &to_expand3,
 | 
			
		||||
                                                          const char *api_name) {
 | 
			
		||||
  check_defined({to_expand1, to_expand2, to_expand3}, api_name);
 | 
			
		||||
  return expand_outplace(to_expand1, to_expand2, to_expand3);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) {
 | 
			
		||||
  if(to_expand.sizes().equals(sizes)) {
 | 
			
		||||
    return std::make_tuple(to_expand);
 | 
			
		||||
@ -75,4 +109,9 @@ inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) {
 | 
			
		||||
  return std::make_tuple(to_expand.expand(sizes));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes, const char *api_name) {
 | 
			
		||||
  check_defined({to_expand}, api_name);
 | 
			
		||||
  return expand_size(to_expand, sizes);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -128,6 +128,24 @@
 | 
			
		||||
    ${THTensor}_setStorage(${state,}result_->tensor, self_->tensor->storage, self_->tensor->storageOffset, size_, stride_);
 | 
			
		||||
]]
 | 
			
		||||
 | 
			
		||||
[[
 | 
			
		||||
  name: as_strided_
 | 
			
		||||
  variants: [method,function]
 | 
			
		||||
  return: argument 0
 | 
			
		||||
  arguments:
 | 
			
		||||
    - THTensor* self
 | 
			
		||||
    - THSize* size
 | 
			
		||||
    - THStride* stride
 | 
			
		||||
    - arg: int64_t storage_offset
 | 
			
		||||
      default: -1
 | 
			
		||||
  aten_custom_call: |
 | 
			
		||||
    if (storage_offset == -1) {
 | 
			
		||||
      storage_offset = self_->tensor->storageOffset;
 | 
			
		||||
    }
 | 
			
		||||
    ${THTensor}_setStorage(${state,}self_->tensor, self_->tensor->storage, storage_offset, size_, stride_);
 | 
			
		||||
    self_->maybeScalar(size.size() == 0);
 | 
			
		||||
]]
 | 
			
		||||
 | 
			
		||||
[[
 | 
			
		||||
  name: cat
 | 
			
		||||
  cname: catArray
 | 
			
		||||
 | 
			
		||||
@ -23,7 +23,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  explicit Scalar(const detail::TensorBase & t)
 | 
			
		||||
  : tag(Tag::HAS_t), t(t) {
 | 
			
		||||
    AT_ASSERT(t.pImpl, "Attempting to create a Scalar from an undefined tensor");
 | 
			
		||||
    AT_ASSERT(t.defined(), "Attempting to create a Scalar from an undefined tensor");
 | 
			
		||||
    AT_ASSERT(t.dim() == 0, "Attempting to create a Scalar from a %d dim tensor", t.dim());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -23,6 +23,7 @@ enum class ScalarType {
 | 
			
		||||
  n,
 | 
			
		||||
  AT_FORALL_SCALAR_TYPES(DEFINE_ENUM)
 | 
			
		||||
#undef DEFINE_ENUM
 | 
			
		||||
  Undefined,
 | 
			
		||||
  NumOptions
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -31,6 +32,7 @@ enum class Backend {
 | 
			
		||||
  CUDA,
 | 
			
		||||
  SparseCPU,
 | 
			
		||||
  SparseCUDA,
 | 
			
		||||
  Undefined,
 | 
			
		||||
  NumOptions
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -62,7 +64,7 @@ static inline const char * toString(ScalarType t) {
 | 
			
		||||
  switch(t) {
 | 
			
		||||
    AT_FORALL_SCALAR_TYPES(DEFINE_CASE)
 | 
			
		||||
    default:
 | 
			
		||||
      return "UNKNOWN_SCALAR_TYPE";
 | 
			
		||||
      return "UNKNOWN_SCALAR";
 | 
			
		||||
  }
 | 
			
		||||
#undef DEFINE_CASE
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,29 +1,32 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include "ATen/TensorImpl.h"
 | 
			
		||||
#include "ATen/UndefinedTensor.h"
 | 
			
		||||
 | 
			
		||||
namespace at { namespace detail {
 | 
			
		||||
 | 
			
		||||
// TensorBase is the base class for Tensor which handles the reference counting
 | 
			
		||||
struct TensorBase {
 | 
			
		||||
  TensorBase()
 | 
			
		||||
  : pImpl(nullptr) {}
 | 
			
		||||
  TensorBase(): TensorBase(UndefinedTensor::singleton(), false) {}
 | 
			
		||||
  TensorBase(TensorImpl * self, bool retain)
 | 
			
		||||
  : pImpl(self) {
 | 
			
		||||
    if(pImpl != nullptr && retain)
 | 
			
		||||
    if (pImpl == nullptr) {
 | 
			
		||||
      throw std::runtime_error("TensorBase with nullptr not supported");
 | 
			
		||||
    }
 | 
			
		||||
    if(retain && pImpl != UndefinedTensor::singleton())
 | 
			
		||||
      pImpl->retain();
 | 
			
		||||
  }
 | 
			
		||||
  TensorBase(const TensorBase & rhs)
 | 
			
		||||
  : pImpl(rhs.pImpl) {
 | 
			
		||||
    if(pImpl != nullptr)
 | 
			
		||||
    if (pImpl != UndefinedTensor::singleton())
 | 
			
		||||
      pImpl->retain();
 | 
			
		||||
  }
 | 
			
		||||
  TensorBase(TensorBase && rhs) noexcept
 | 
			
		||||
  : pImpl(rhs.pImpl) {
 | 
			
		||||
    rhs.pImpl = nullptr;
 | 
			
		||||
    rhs.pImpl = UndefinedTensor::singleton();
 | 
			
		||||
  }
 | 
			
		||||
  ~TensorBase() {
 | 
			
		||||
    if(pImpl != nullptr)
 | 
			
		||||
    if (pImpl != UndefinedTensor::singleton())
 | 
			
		||||
      pImpl->release();
 | 
			
		||||
  }
 | 
			
		||||
  TensorBase & operator=(TensorBase && rhs) & {
 | 
			
		||||
@ -48,6 +51,9 @@ struct TensorBase {
 | 
			
		||||
  TensorImpl * get() const {
 | 
			
		||||
    return pImpl;
 | 
			
		||||
  }
 | 
			
		||||
  bool defined() const {
 | 
			
		||||
    return pImpl != UndefinedTensor::singleton();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  friend struct Type;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -11,6 +11,7 @@ inline Tensor & Tensor::operator=(Scalar v) && {
 | 
			
		||||
  return assign_(v);
 | 
			
		||||
}
 | 
			
		||||
inline Tensor & Tensor::assign_(Scalar v) {
 | 
			
		||||
  AT_ASSERT(defined(), "attempting to assign a scalar to an undefined tensor");
 | 
			
		||||
  AT_ASSERT(dim() == 0, "attempting to assign a scalar to %d dim tensor", dim());
 | 
			
		||||
  pImpl->assign_(v);
 | 
			
		||||
  return *this;
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										42
									
								
								torch/lib/ATen/UndefinedTensor.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								torch/lib/ATen/UndefinedTensor.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,42 @@
 | 
			
		||||
#include "ATen/UndefinedTensor.h"
 | 
			
		||||
#include "ATen/Context.h"
 | 
			
		||||
 | 
			
		||||
namespace at {
 | 
			
		||||
 | 
			
		||||
// should this use the globalContext?  Can it get a context passed in somehow?
 | 
			
		||||
UndefinedTensor::UndefinedTensor()
 | 
			
		||||
: TensorImpl(&(globalContext().getType(Backend::Undefined,ScalarType::Undefined))) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * UndefinedTensor::toString() const {
 | 
			
		||||
  return "UndefinedTensor";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
IntList UndefinedTensor::sizes() const {
 | 
			
		||||
  runtime_error("sizes() called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int64_t UndefinedTensor::dim() const {
 | 
			
		||||
  runtime_error("dim() called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * UndefinedTensor::typeString() {
 | 
			
		||||
  return "UndefinedType";
 | 
			
		||||
}
 | 
			
		||||
void * UndefinedTensor::unsafeGetTH(bool retain) {
 | 
			
		||||
  runtime_error("unsafeGetTH(bool retain) called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
IntList UndefinedTensor::strides() const {
 | 
			
		||||
  runtime_error("strides() called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
Scalar UndefinedTensor::localScalar() {
 | 
			
		||||
  runtime_error("localScalar() called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
void UndefinedTensor::assign_(Scalar s) {
 | 
			
		||||
  runtime_error("assign_() called on undefined Tensor");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
UndefinedTensor UndefinedTensor::_singleton;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user