Compare commits

..

28 Commits

Author SHA1 Message Date
0b92e5c9ed fix static linkage and make THD statically linked 2017-08-28 10:41:55 -04:00
df44c571c6 increase test subprocess timeout 2017-08-25 09:00:36 -07:00
ed03f74043 fix leaking symbols in THNN 2017-08-25 09:00:35 -07:00
c8d8803b90 Remove unnecessary moves in convolution autograd. 2017-08-25 09:00:35 -07:00
750245f990 Remove unnecessary moves, avoid IncRef/DecRef of PyBools. 2017-08-25 09:00:35 -07:00
fbf573a6b8 Properly pass saved_for in BatchNorm/Conv as the relevant Backward function.
Previously, these Functions passed themselves, i.e. the saved_for from
ConvForward would be ConvForward.
2017-08-25 09:00:35 -07:00
9bda6dee8f Add AutoGPU guard and properly reference Python args from BatchNormBackwardBackward. 2017-08-25 09:00:35 -07:00
e02f7bf8a3 Update autograd notes (#2295) 2017-08-04 20:28:04 -04:00
7ea48eaf7a cuda 7.5 fix for gloo 2017-08-04 02:25:16 -04:00
d278a14141 Fix ZeroPad2d backwards with negative pads. 2017-08-03 21:16:37 -04:00
6b1ca4b4b6 variable shape error of LSTMCell, GRUCell (#2289) 2017-08-03 21:16:06 -04:00
65ddaf13a9 Improve cuDNN weight layout test 2017-08-03 02:06:27 -04:00
96156013c3 Make sure deserialized RNN modules have _data_ptrs too 2017-08-03 02:06:21 -04:00
a997cdbb25 Fix BatchNorm double backwards when training=False.
Changes for v.0.2.0 around using shared_ptrs rather than at::Tensors.
2017-08-03 10:47:34 +05:30
8db9df94b6 Merge commit '74e5328b03634e163df65d6c6877c6f03387b536' 2017-08-02 22:51:17 -04:00
6c9e3334b1 Merge commit '70c95dbe52102d70facf7fc5d31cb8bd9ae860d9' 2017-08-02 22:50:52 -04:00
b33f232678 disable cudnn when output_padding >= stride or dilation 2017-08-02 22:48:03 -04:00
058f50aa50 fix shape and correctness bugs in autograd/convolution BackwardBackward 2017-08-02 22:48:03 -04:00
8b06efea7a remove dead code for python ConvNd (moved to C already) 2017-08-02 22:48:03 -04:00
52b7a49b37 enable cudnn transposed dilated 2017-08-02 22:48:03 -04:00
47f4d549e0 refactoring the THNN calls in autograd/convolution.cpp to be more compact 2017-08-02 22:48:03 -04:00
5b6d1837c7 enable dilated transpose and gradgrad tests 2017-08-02 22:48:02 -04:00
69642d4423 add THNN bindings for DilatedConvTranspose in autograd/convolution 2017-08-02 22:48:02 -04:00
daf5b20cd7 Add tests that gradcheck grad sizes match input size and fix advanced indexing
case that fails check.
2017-08-02 07:13:01 +05:30
515efdab5d add reentrancy checking for gradcheck. 2017-08-02 07:13:01 +05:30
f9f98daf11 Remove save_mean/save_var from BatchNorm double backwards, as it's not needed.
These could cause a problem with double backwards because they were std::move'd in
Backward.
2017-08-02 07:13:01 +05:30
2ac1003228 Implement LogSoftmax (v.0.2.0) (#2265) 2017-08-01 14:32:05 +05:30
141224ad7c Implement SoftMax and NLLLoss double backwards. (#2233)
* Implement SoftMax and NLLLoss double backwards.

* Update legacy ClassNLLCriterion to add ignore_index.

* Fix serialization of legacy ClassNLLCriterion with ignore_index.
2017-07-30 09:02:04 +05:30
1043 changed files with 43542 additions and 56733 deletions

View File

@ -1 +0,0 @@
.gitignore

2
.gitignore vendored
View File

@ -12,8 +12,6 @@ torch/lib/build
torch/lib/tmp_install
torch/lib/include
torch/lib/torch_shm_manager
torch/csrc/jit/generated/*
torch/csrc/autograd/generated/*
torch/csrc/cudnn/cuDNN.cpp
torch/csrc/nn/THNN.cwrap
torch/csrc/nn/THNN.cpp

9
.gitmodules vendored
View File

@ -1,9 +0,0 @@
[submodule "torch/lib/gloo"]
path = torch/lib/gloo
url = https://github.com/facebookincubator/gloo
[submodule "torch/lib/pybind11"]
path = torch/lib/pybind11
url = https://github.com/pybind/pybind11
[submodule "torch/lib/nanopb"]
path = torch/lib/nanopb
url = https://github.com/nanopb/nanopb.git

View File

@ -1,8 +1,6 @@
# https://travis-ci.org/pytorch/pytorch
language: python
dist: trusty
git:
submodules: false
python:
- 2.7.9
- 2.7
@ -18,23 +16,23 @@ cache:
install:
- unset CCACHE_DISABLE
- export CCACHE_DIR=$HOME/.ccache
- export CC="ccache gcc-5"
- export CXX="ccache g++-5"
- export CC="ccache gcc-4.8"
- export CXX="ccache g++-4.8"
- ccache --show-stats
- travis_retry pip install --upgrade pip setuptools wheel
- travis_retry pip install -r requirements.txt --only-binary=scipy
- git submodule update --init --recursive
- MAX_JOBS=8 python setup.py install
- python setup.py install
script:
- OMP_NUM_THREADS=2 ./test/run_test.sh
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
script:
- OMP_NUM_THREADS=2 ./test/run_test.sh
- gcc-4.8
- g++-4.8
# This reportedly works around an issue downloading packages from pypi on
# travis. Consider removing this after the underlying issue is fixed.

View File

@ -181,15 +181,5 @@ export PATH=~/ccache/lib:$PATH
export CUDA_NVCC_EXECUTABLE=~/ccache/cuda/nvcc
```
## CUDA Development tips
If you are working on the CUDA code, here are some useful CUDA debugging tips:
1. `CUDA_DEBUG=1` will enable CUDA debugging symbols (-g -G). This is particularly
helpful in debugging device code. However, it will slow down the build process,
so use wisely.
2. `cuda-gdb` and `cuda-memcheck` are your best CUDA debuging friends. Unlike`gdb`,
`cuda-gdb` can display actual values in a CUDA tensor (rather than all zeros).
Hope this helps, and thanks for considering to contribute.

View File

@ -9,28 +9,23 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
vim \
ca-certificates \
libnccl2=2.0.5-2+cuda8.0 \
libnccl-dev=2.0.5-2+cuda8.0 \
libjpeg-dev \
libpng-dev &&\
rm -rf /var/lib/apt/lists/*
ENV PYTHON_VERSION=3.6
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
# /opt/conda/bin/conda install conda-build && \
/opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \
/opt/conda/bin/conda install conda-build && \
/opt/conda/bin/conda create -y --name pytorch-py35 python=3.5.2 numpy pyyaml scipy ipython mkl&& \
/opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH
RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda80
ENV PATH /opt/conda/envs/pytorch-py35/bin:$PATH
RUN conda install --name pytorch-py35 -c soumith magma-cuda80
# This must be done before pip so that requirements.txt is available
WORKDIR /opt/pytorch
COPY . .
RUN git submodule update --init
RUN TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
pip install -v .

View File

@ -89,7 +89,7 @@ Changing the way the network behaves means that one has to start from scratch.
With PyTorch, we use a technique called reverse-mode auto-differentiation, which allows you to
change the way your network behaves arbitrarily with zero lag or overhead. Our inspiration comes
from several research papers on this topic, as well as current and past work such as
[torch-autograd](https://github.com/twitter/torch-autograd),
[autograd](https://github.com/twitter/torch-autograd),
[autograd](https://github.com/HIPS/autograd),
[Chainer](http://chainer.org), etc.
@ -120,7 +120,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
PyTorch has minimal framework overhead. We integrate acceleration libraries
such as Intel MKL and NVIDIA (cuDNN, NCCL) to maximize speed.
At the core, its CPU and GPU Tensor and neural network backends
(TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API.
(TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API.
They are mature and have been tested for years.
Hence, PyTorch is quite fast whether you run small or large neural networks.
@ -159,7 +159,7 @@ Once you have [Anaconda](https://www.continuum.io/downloads) installed, here are
If you want to compile with CUDA support, install
- [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) 7.5 or above
- [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v6.x or above
- [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v5.x or above
If you want to disable CUDA support, export environment variable `NO_CUDA=1`.
@ -170,7 +170,7 @@ On Linux
export CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" # [anaconda root directory]
# Install basic dependencies
conda install numpy pyyaml mkl setuptools cmake cffi
conda install numpy pyyaml mkl setuptools cmake gcc cffi
# Add LAPACK support for the GPU
conda install -c soumith magma-cuda80 # or magma-cuda75 if CUDA 7.5
@ -181,10 +181,6 @@ On OSX
export CMAKE_PREFIX_PATH=[anaconda root directory]
conda install numpy pyyaml setuptools cmake cffi
```
#### Get the PyTorch source
```bash
git clone --recursive https://github.com/pytorch/pytorch
```
#### Install PyTorch
On Linux
@ -203,13 +199,15 @@ Dockerfile is supplied to build images with cuda support and cudnn v6. Build as
```
docker build -t pytorch .
```
Dockerfile to build with cuda 9 and cudnn v7 (with Volta support) is in tools/docker, the build command is
Alternatively, if you want a runtime image, build with
```
docker build -t pytorch_cuda9 -f tools/docker/Dockerfile9 .
docker build -t pytorch . -f tools/docker/Dockerfile_runtime
```
Alternatively, if you want to use a runtime image, you can use the pre-built one from Docker Hub and run with nvidia-docker:
and run with nvidia-docker:
```
nvidia-docker run --rm -ti --ipc=host pytorch/pytorch:latest
nvidia-docker run --rm -ti --ipc=host pytorch
```
Please note that PyTorch uses shared memory to share data between processes, so if torch multiprocessing is used (e.g.
for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you
@ -221,12 +219,12 @@ should increase shared memory size either with `--ipc=host` or `--shm-size` comm
Three pointers to get you started:
- [Tutorials: get you started with understanding and using PyTorch](http://pytorch.org/tutorials/)
- [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples)
- [The API Reference](http://pytorch.org/docs/)
- The API Reference: [http://pytorch.org/docs/](http://pytorch.org/docs/)
## Communication
* forums: discuss implementations, research, etc. http://discuss.pytorch.org
* GitHub issues: bug reports, feature requests, install issues, RFCs, thoughts, etc.
* Slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . Our slack channel is invite-only to promote a healthy balance between power-users and beginners. If you need a slack invite, ping us at soumith@pytorch.org
* Slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . If you need a slack invite, ping us at soumith@pytorch.org
* newsletter: no-noise, one-way email newsletter with important announcements about pytorch. You can sign-up here: http://eepurl.com/cbG0rv
## Releases and Contributing
@ -239,11 +237,18 @@ We appreciate all contributions. If you are planning to contribute back bug-fixe
If you plan to contribute new features, utility functions or extensions to the core, please first open an issue and discuss the feature with us.
Sending a PR without discussion might end up resulting in a rejected PR, because we might be taking the core in a different direction than you might be aware of.
**For the next release cycle, these are the 3 big features we are planning to add:**
1. [Distributed PyTorch](https://github.com/pytorch/pytorch/issues/241) (a draft implementation is present in this [branch](https://github.com/apaszke/pytorch-dist) )
2. Backward of Backward - Backpropagating through the optimization process itself. Some past and recent papers such as
[Double Backprop](http://yann.lecun.com/exdb/publis/pdf/drucker-lecun-91.pdf) and [Unrolled GANs](https://arxiv.org/abs/1611.02163) need this.
3. Lazy Execution Engine for autograd - This will enable us to optionally introduce caching and JIT compilers to optimize autograd code.
## The Team
PyTorch is a community driven project with several skillful engineers and researchers contributing to it.
PyTorch is currently maintained by [Adam Paszke](https://apaszke.github.io/), [Sam Gross](https://github.com/colesbury), [Soumith Chintala](http://soumith.ch) and [Gregory Chanan](https://github.com/gchanan) with major contributions coming from 10s of talented individuals in various forms and means.
A non-exhaustive but growing list needs to mention: Trevor Killeen, Sasank Chilamkurthy, Sergey Zagoruyko, Adam Lerer, Francisco Massa, Alykhan Tejani, Luca Antiga, Alban Desmaison, Andreas Kopf, James Bradbury, Zeming Lin, Yuandong Tian, Guillaume Lample, Marat Dukhan, Natalia Gimelshein, Christian Sarofeen, Martin Raison, Edward Yang, Zachary Devito.
PyTorch is currently maintained by [Adam Paszke](https://apaszke.github.io/), [Sam Gross](https://github.com/colesbury) and [Soumith Chintala](http://soumith.ch) with major contributions coming from 10s of talented individuals in various forms and means. A non-exhaustive but growing list needs to mention: Sergey Zagoruyko, Adam Lerer, Francisco Massa, Andreas Kopf, James Bradbury, Zeming Lin, Yuandong Tian, Guillaume Lample, Marat Dukhan, Natalia Gimelshein.
Note: this project is unrelated to [hughperkins/pytorch](https://github.com/hughperkins/pytorch) with the same name. Hugh is a valuable contributor in the Torch community and has helped with many things Torch and PyTorch.

View File

@ -1,15 +0,0 @@
{% extends "!layout.html" %}
{% block footer %}
{{ super() }}
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-90545585-1', 'auto');
ga('send', 'pageview');
</script>
{% endblock %}

View File

@ -53,19 +53,3 @@ gradients are correct.
.. autoclass:: Function
:members:
Profiler
--------
Autograd includes a profiler that lets you inspect the cost of different
operators inside your model - both on the CPU and GPU. There are two modes
implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`.
and nvprof based (registers both CPU and GPU activity) using
:class:`~torch.autograd.profiler.emit_nvtx`.
.. autoclass:: torch.autograd.profiler.profile
:members:
.. autoclass:: torch.autograd.profiler.emit_nvtx
:members:
.. autofunction:: torch.autograd.profiler.load_nvprof

View File

@ -6,17 +6,6 @@ torch.cuda
.. automodule:: torch.cuda
:members:
Random Number Generator
-------------------------
.. autofunction:: get_rng_state
.. autofunction:: set_rng_state
.. autofunction:: manual_seed
.. autofunction:: manual_seed_all
.. autofunction:: seed
.. autofunction:: seed_all
.. autofunction:: initial_seed
Communication collectives
-------------------------
@ -37,10 +26,6 @@ Streams and events
.. autoclass:: Event
:members:
Memory management
-----------------
.. autofunction:: empty_cache
NVIDIA Tools Extension (NVTX)
-----------------------------

View File

@ -4,7 +4,6 @@ torch.utils.data
.. automodule:: torch.utils.data
.. autoclass:: Dataset
.. autoclass:: TensorDataset
.. autoclass:: ConcatDataset
.. autoclass:: DataLoader
.. autoclass:: torch.utils.data.sampler.Sampler
.. autoclass:: torch.utils.data.sampler.SequentialSampler

View File

@ -10,8 +10,7 @@ Distributed communication package - torch.distributed
Currently torch.distributed supports three backends, each with
different capabilities. The table below shows which functions are available
for use with CPU / CUDA tensors.
MPI supports cuda only if the implementation used to build PyTorch supports it.
MPI supports cuda only iff the implementation used to build PyTorch supports it.
+------------+-----------+-----------+-----------+
| Backend | ``tcp`` | ``gloo`` | ``mpi`` |
@ -37,41 +36,11 @@ MPI supports cuda only if the implementation used to build PyTorch supports it.
| barrier | ✓ | ✘ | ✓ | ✓ | ✓ | ? |
+------------+-----+-----+-----+-----+-----+-----+
.. _distributed-basics:
Basics
------
The `torch.distributed` package provides PyTorch support and communication primitives
for multiprocess parallelism across several computation nodes running on one or more
machines. The class :func:`torch.nn.parallel.DistributedDataParallel` builds on this
functionality to provide synchronous distributed training as a wrapper around any
PyTorch model. This differs from the kinds of parallelism provided by
:doc:`multiprocessing` and :func:`torch.nn.DataParallel` in that it supports
multiple network-connected machines and in that the user must explicitly launch a separate
copy of the main training script for each process.
In the single-machine synchronous case, `torch.distributed` or the
:func:`torch.nn.parallel.DistributedDataParallel` wrapper may still have advantages over other
approaches to data-parallelism, including :func:`torch.nn.DataParallel`:
* Each process maintains its own optimizer and performs a complete optimization step with each
iteration. While this may appear redundant, since the gradients have already been gathered
together and averaged across processes and are thus the same for every process, this means
that no parameter broadcast step is needed, reducing time spent transferring tensors between
nodes.
* Each process contains an independent Python interpreter, eliminating the extra interpreter
overhead and "GIL-thrashing" that comes from driving several execution threads, model
replicas, or GPUs from a single Python process. This is especially important for models that
make heavy use of the Python runtime, including models with recurrent layers or many small
components.
Initialization
--------------
The package needs to be initialized using the :func:`torch.distributed.init_process_group`
function before calling any other methods. This blocks until all processes have
joined.
function before calling any other methods.
.. autofunction:: init_process_group
@ -86,15 +55,14 @@ Currently three initialization methods are supported:
TCP initialization
^^^^^^^^^^^^^^^^^^
There are two ways to initialize using TCP, both requiring a network address
reachable from all processes and a desired ``world_size``. The first way
requires specifying an address that belongs to the rank 0 process. This first way of
initialization requires that all processes have manually specified ranks.
Initialization will utilize a network address reachable from all processes.
If the address belongs to one of the machines, initialization requires that all processes
have manually specified ranks.
Alternatively, the address has to be a valid IP multicast address, in which case
Alternatively, the address has to be a valid IP multicast address, in which case,
ranks can be assigned automatically. Multicast initialization also supports
a ``group_name`` argument, which allows you to use the same address for multiple
jobs, as long as they use different group names.
a ``group_name`` argument, which allows you to use the same address for multiple jobs,
as long as they use different group names.
::
@ -110,12 +78,12 @@ jobs, as long as they use different group names.
Shared file-system initialization
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Another initialization method makes use of a file system that is shared and
visible from all machines in a group, along with a desired ``world_size``. The URL should start
with ``file://`` and contain a path to a non-existent file (in an existing
directory) on a shared file system. This initialization method also supports a
``group_name`` argument, which allows you to use the same shared file path for
multiple jobs, as long as they use different group names.
Another initialization method makes use of a file system shared and visible from
all machines in a group. The URL should start with ``file://`` and contain a path
to a non-existent file (in an existing directory) on a shared file system.
This initialization method also supports a ``group_name`` argument, which allows you to
use the same shared file path for multiple jobs, as long as they use different
group names.
.. warning::
This method assumes that the file system supports locking using ``fcntl`` - most
@ -173,10 +141,6 @@ as they should never be created manually, but they are guaranteed to support two
* ``is_completed()`` - returns True if the operation has finished
* ``wait()`` - will block the process until the operation is finished.
``is_completed()`` is guaranteed to return True once it returns.
When using the MPI backend, :func:`~torch.distributed.isend` and :func:`~torch.distributed.irecv`
support non-overtaking, which has some guarantees on supporting message order. For more detail, see
http://mpi-forum.org/docs/mpi-2.2/mpi22-report/node54.htm#Node54
.. autofunction:: isend

View File

@ -1,32 +0,0 @@
.. role:: hidden
:class: hidden-section
Probability distributions - torch.distributions
==================================================
.. automodule:: torch.distributions
.. currentmodule:: torch.distributions
:hidden:`Distribution`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Distribution
:members:
:hidden:`Bernoulli`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Bernoulli
:members:
:hidden:`Categorical`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Categorical
:members:
:hidden:`Normal`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Normal
:members:

View File

@ -29,7 +29,6 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
nn
optim
torch.autograd <autograd>
torch.distributions <distributions>
torch.multiprocessing <multiprocessing>
torch.distributed <distributed>
torch.legacy <legacy>
@ -37,14 +36,17 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
ffi
data
model_zoo
onnx
.. toctree::
:glob:
:maxdepth: 2
:maxdepth: 1
:caption: torchvision Reference
torchvision/index
torchvision/torchvision
torchvision/datasets
torchvision/models
torchvision/transforms
torchvision/utils
Indices and tables

View File

@ -62,7 +62,7 @@ view onto the storage data.
Note that if there will be a lot of tensors shared, this strategy will keep a
large number of file descriptors open most of the time. If your system has low
limits for the number of open file descriptors, and you can't raise them, you
limits for the number of open file descriptors, and you can't rise them, you
should use the ``file_system`` strategy.
File system - ``file_system``

View File

@ -162,12 +162,6 @@ Pooling Layers
.. autoclass:: AdaptiveMaxPool2d
:members:
:hidden:`AdaptiveMaxPool3d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveMaxPool3d
:members:
:hidden:`AdaptiveAvgPool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -180,12 +174,6 @@ Pooling Layers
.. autoclass:: AdaptiveAvgPool2d
:members:
:hidden:`AdaptiveAvgPool3d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool3d
:members:
Padding Layers
--------------
@ -326,12 +314,6 @@ Non-linear Activations
.. autoclass:: Softmax
:members:
:hidden:`Softmax2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Softmax2d
:members:
:hidden:`LogSoftmax`
~~~~~~~~~~~~~~~~~~~~
@ -426,11 +408,6 @@ Linear layers
.. autoclass:: Linear
:members:
:hidden:`Bilinear`
~~~~~~~~~~~~~~~~~~
.. autoclass:: Bilinear
:members:
Dropout layers
----------------------------------
@ -643,7 +620,7 @@ DataParallel layers (multi-GPU, distributed)
:hidden:`DistributedDataParallel`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: torch.nn.parallel.DistributedDataParallel
.. autoclass:: torch.nn.parallel.DataParallel
:members:
@ -787,11 +764,6 @@ Pooling functions
.. autofunction:: adaptive_max_pool2d
:hidden:`adaptive_max_pool3d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_max_pool3d
:hidden:`adaptive_avg_pool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -802,11 +774,6 @@ Pooling functions
.. autofunction:: adaptive_avg_pool2d
:hidden:`adaptive_avg_pool3d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_avg_pool3d
Non-linear activation functions
-------------------------------
@ -857,11 +824,6 @@ Non-linear activation functions
.. autofunction:: rrelu
:hidden:`glu`
~~~~~~~~~~~~~~~
.. autofunction:: glu
:hidden:`logsigmoid`
~~~~~~~~~~~~~~~~~~~~

View File

@ -3,19 +3,18 @@
CUDA semantics
==============
:mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of
the currently selected GPU, and all CUDA tensors you allocate will by default be
created on that device. The selected device can be changed with a
:mod:`torch.cuda` keeps track of currently selected GPU, and all CUDA tensors
you allocate will be created on it. The selected device can be changed with a
:any:`torch.cuda.device` context manager.
However, once a tensor is allocated, you can do operations on it irrespective
of the selected device, and the results will be always placed in on the same
However, once a tensor is allocated, you can do operations on it irrespectively
of your selected device, and the results will be always placed in on the same
device as the tensor.
Cross-GPU operations are not allowed by default, with the only exception of
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory access, any
attempts to launch ops on tensors spread across different devices will raise an
error.
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory accesses,
any attempts to launch ops on tensors spread across different devices will
raise an error.
Below you can find a small example showcasing this::
@ -42,147 +41,9 @@ Below you can find a small example showcasing this::
d = torch.randn(2).cuda(2)
# d.get_device() == 2
Asynchronous execution
----------------------
By default, GPU operations are asynchronous. When you call a function that
uses the GPU, the operations are *enqueued* to the particular device, but not
necessarily executed until later. This allows us to execute more computations
in parallel, including operations on CPU or other GPUs.
In general, the effect of asynchronous computation is invisible to the caller,
because (1) each device executes operations in the order they are queued, and
(2) PyTorch automatically performs necessary synchronization when copying data
between CPU and GPU or between two GPUs. Hence, computation will proceed as if
every operation was executed synchronously.
You can force synchronous computation by setting environment variable
`CUDA_LAUNCH_BLOCKING=1`. This can be handy when an error occurs on the GPU.
(With asynchronous execution, such an error isn't reported until after the
operation is actually executed, so the stack trace does not show where it was
requested.)
As an exception, several functions such as :meth:`~torch.Tensor.copy_` admit
an explicit :attr:`async` argument, which lets the caller bypass synchronization
when it is unnecessary. Another exception is CUDA streams, explained below.
CUDA streams
^^^^^^^^^^^^
A `CUDA stream`_ is a linear sequence of execution that belongs to a specific
device. You normally do not need to create one explicitly: by default, each
device uses its own "default" stream.
Operations inside each stream are serialized in the order they are created,
but operations from different streams can execute concurrently in any
relative order, unless explicit synchronization functions (such as
:meth:`~torch.cuda.synchronize` or :meth:`~torch.cuda.Stream.wait_stream`) are
used. For example, the following code is incorrect::
s = torch.cuda.stream() # Create a new stream.
A = torch.cuda.FloatTensor(100, 100).normal_(0.0, 1.0)
with torch.cuda.stream(s):
# sum() may start execution before normal_() finishes!
B = torch.sum(A)
When the "current stream" is the default stream, PyTorch automatically performs
necessary synchronization when data is moved around, as explained above.
However, when using non-default streams, it is the user's responsibility to
ensure proper synchronization.
.. _CUDA stream: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams
Memory management
-----------------
PyTorch use a caching memory allocator to speed up memory allocations. This
allows fast memory deallocation without device synchronizations. However, the
unused memory managed by the allocator will still show as if used in
`nvidia-smi`. Calling :meth:`~torch.cuda.empty_cache` can release all unused
cached memory from PyTorch so that those can be used by other GPU applications.
Best practices
--------------
Device-agnostic code
^^^^^^^^^^^^^^^^^^^^
Due to the structure of PyTorch, you may need to explicitly write
device-agnostic (CPU or GPU) code; an example may be creating a new tensor as
the initial hidden state of a recurrent neural network.
The first step is to determine whether the GPU should be used or not. A common
pattern is to use Python's ``argparse`` module to read in user arguments, and
have a flag that can be used to disable CUDA, in combination with
:meth:`~torch.cuda.is_available`. In the following, ``args.cuda`` results in a
flag that can be used to cast tensors and modules to CUDA if desired::
import argparse
import torch
parser = argparse.ArgumentParser(description='PyTorch Example')
parser.add_argument('--disable-cuda', action='store_true',
help='Disable CUDA')
args = parser.parse_args()
args.cuda = not args.disable_cuda and torch.cuda.is_available()
If modules or tensors need to be sent to the GPU, ``args.cuda`` can be used as
follows::
x = torch.Tensor(8, 42)
net = Network()
if args.cuda:
x = x.cuda()
net.cuda()
When creating tensors, an alternative to the if statement is to have a default
datatype defined, and cast all tensors using that. An example when using a
dataloader would be as follows::
dtype = torch.cuda.FloatTensor
for i, x in enumerate(train_loader):
x = Variable(x.type(dtype))
When working with multiple GPUs on a system, you can use the
``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to
PyTorch. As mentioned above, to manually control which GPU a tensor is created
on, the best practice is to use a :any:`torch.cuda.device` context manager::
print("Outside device is 0") # On device 0 (default in most scenarios)
with torch.cuda.device(1):
print("Inside device is 1") # On device 1
print("Outside device is still 0") # On device 0
If you have a tensor and would like to create a new tensor of the same type on
the same device, then you can use the :meth:`~torch.Tensor.new` method, which
acts the same as a normal tensor constructor. Whilst the previously mentioned
methods depend on the current GPU context, :meth:`~torch.Tensor.new` preserves
the device of the original tensor.
This is the recommended practice when creating modules in which new
tensors/variables need to be created internally during the forward pass::
x_cpu = torch.FloatTensor(1)
x_gpu = torch.cuda.FloatTensor(1)
x_cpu_long = torch.LongTensor(1)
y_cpu = x_cpu.new(8, 10, 10).fill_(0.3)
y_gpu = x_gpu.new(x_gpu.size()).fill_(-5)
y_cpu_long = x_cpu_long.new([[1, 2, 3]])
If you want to create a tensor of the same type and size of another tensor, and
fill it with either ones or zeros, :meth:`~torch.ones_like` or
:meth:`~torch.zeros_like` are provided as convenient helper functions (which
also preserve device)::
x_cpu = torch.FloatTensor(1)
x_gpu = torch.cuda.FloatTensor(1)
y_cpu = torch.ones_like(x_cpu)
y_gpu = torch.zeros_like(x_gpu)
Use pinned memory buffers
^^^^^^^^^^^^^^^^^^^^^^^^^
@ -208,9 +69,9 @@ pinned memory by passing ``pin_memory=True`` to its constructor.
Use nn.DataParallel instead of multiprocessing
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most use cases involving batched inputs and multiple GPUs should default to
using :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with
the GIL, a single Python process can saturate multiple GPUs.
Most use cases involving batched input and multiple GPUs should default to using
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
a single python process can saturate multiple GPUs.
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
However, this is a known issue that is under active development. As always,

View File

@ -40,7 +40,7 @@ Below you can find code for a ``Linear`` function from :mod:`torch.nn`, with
additional comments::
# Inherit from Function
class LinearFunction(Function):
class Linear(Function):
# Note that both forward and backward are @staticmethods
@staticmethod
@ -67,11 +67,11 @@ additional comments::
# improve efficiency. If you want to make your code simpler, you can
# skip them. Returning gradients for inputs that don't require it is
# not an error.
if ctx.needs_input_grad[0]:
if self.needs_input_grad[0]:
grad_input = grad_output.mm(weight)
if ctx.needs_input_grad[1]:
if self.needs_input_grad[1]:
grad_weight = grad_output.t().mm(input)
if bias is not None and ctx.needs_input_grad[2]:
if bias is not None and self.needs_input_grad[2]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_weight, grad_bias
@ -79,7 +79,7 @@ additional comments::
Now, to make it easier to use these custom ops, we recommend aliasing their
``apply`` method::
linear = LinearFunction.apply
linear = Linear.aply
Here, we give an additional example of a function that is parametrized by
non-Variable arguments::
@ -88,7 +88,7 @@ non-Variable arguments::
@staticmethod
def forward(ctx, tensor, constant):
# ctx is a context object that can be used to stash information
# for backward computation
for backward computation
ctx.constant = constant
return tensor * constant
@ -145,7 +145,6 @@ This is how a ``Linear`` module can be implemented::
class Linear(nn.Module):
def __init__(self, input_features, output_features, bias=True):
super(Linear, self).__init__()
self.input_features = input_features
self.output_features = output_features
@ -157,7 +156,7 @@ This is how a ``Linear`` module can be implemented::
# .register_buffer() to register buffers.
# nn.Parameters can never be volatile and, different than Variables,
# they require gradients by default.
self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
@ -172,7 +171,7 @@ This is how a ``Linear`` module can be implemented::
def forward(self, input):
# See the autograd section for explanation of what happens here.
return LinearFunction.apply(input, self.weight, self.bias)
return Linear()(input, self.weight, self.bias)
Writing custom C extensions

View File

@ -1,182 +0,0 @@
torch.onnx
============
.. automodule:: torch.onnx
Example: End-to-end AlexNet from PyTorch to Caffe2
--------------------------------------------------
Here is a simple script which exports a pretrained AlexNet as defined in
torchvision into ONNX. It runs a single round of inference and then
saves the resulting traced model to ``alexnet.proto``::
from torch.autograd import Variable
import torch.onnx
import torchvision
dummy_input = Variable(torch.randn(10, 3, 224, 224)).cuda()
model = torchvision.models.alexnet(pretrained=True).cuda()
torch.onnx.export(model, dummy_input, "alexnet.proto", verbose=True)
The resulting ``alexnet.proto`` is a binary protobuf file which contains both
the network structure and parameters of the model you exported
(in this case, AlexNet). The keyword argument ``verbose=True`` causes the
exporter to print out a human-readable representation of the network::
# All parameters are encoded explicitly as inputs. By convention,
# learned parameters (ala nn.Module.state_dict) are first, and the
# actual inputs are last.
graph(%1 : Float(64, 3, 11, 11)
%2 : Float(64)
# The definition sites of all variables are annotated with type
# information, specifying the type and size of tensors.
# For example, %3 is a 192 x 64 x 5 x 5 tensor of floats.
%3 : Float(192, 64, 5, 5)
%4 : Float(192)
# ---- omitted for brevity ----
%15 : Float(1000, 4096)
%16 : Float(1000)
%17 : Float(10, 3, 224, 224)) { # the actual input!
# Every statement consists of some output tensors (and their types),
# the operator to be run (with its attributes, e.g., kernels, strides,
# etc.), its input tensors (%17, %1)
%19 : UNKNOWN_TYPE = Conv[kernels=[11, 11], strides=[4, 4], pads=[2, 2, 2, 2], dilations=[1, 1], group=1](%17, %1), uses = [[%20.i0]];
# UNKNOWN_TYPE: sometimes type information is not known. We hope to eliminate
# all such cases in a later release.
%20 : Float(10, 64, 55, 55) = Add[broadcast=1, axis=1](%19, %2), uses = [%21.i0];
%21 : Float(10, 64, 55, 55) = Relu(%20), uses = [%22.i0];
%22 : Float(10, 64, 27, 27) = MaxPool[kernels=[3, 3], pads=[0, 0, 0, 0], dilations=[1, 1], strides=[2, 2]](%21), uses = [%23.i0];
# ...
# Finally, a network returns some tensors
return (%58);
}
You can also verify the protobuf using the `onnx <https://github.com/onnx/onnx/>`_ library.
You can install ``onnx`` with conda::
conda install -c conda-forge onnx
Then, you can run::
import onnx
# Load the ONNX model
model = onnx.load("alexnet.proto")
# Check that the IR is well formed
onnx.checker.check_model(model)
# Print a human readable representation of the graph
onnx.helper.printable_graph(model.graph)
To run the exported script with `caffe2 <https://caffe2.ai/>`_, you will need three things:
1. You'll need an install of Caffe2. If you don't have one already, Please
`follow the install instructions <https://caffe2.ai/docs/getting-started.html>`_.
2. You'll need `onnx-caffe2 <https://github.com/onnx/onnx-caffe2>`_, a
pure-Python library which provides a Caffe2 backend for ONNX. You can install ``onnx-caffe2``
with pip::
pip install onnx-caffe2
Once these are installed, you can use the backend for Caffe2::
# ...continuing from above
import onnx_caffe2.backend as backend
import numpy as np
rep = backend.prepare(model, device="CUDA:0") # or "CPU"
# For the Caffe2 backend:
# rep.predict_net is the Caffe2 protobuf for the network
# rep.workspace is the Caffe2 workspace for the network
# (see the class onnx_caffe2.backend.Workspace)
outputs = rep.run(np.random.randn(10, 3, 224, 224).astype(np.float32))
# To run networks with more than one input, pass a tuple
# rather than a single numpy ndarray.
print(outputs[0])
In the future, there will be backends for other frameworks as well.
Limitations
-----------
* The ONNX exporter is a *trace-based* exporter, which means that it
operates by executing your model once, and exporting the operators which
were actually run during this run. This means that if your model is
dynamic, e.g., changes behavior depending on input data, the export
won't be accurate. Similarly, a trace is likely to be valid only
for a specific input size (which is one reason why we require explicit inputs
on tracing.) We recommend examining the model trace and making sure
the traced operators look reasonable.
* PyTorch and Caffe2 often have implementations of operators with some
numeric differences. Depending on model structure, these differences
may be negligible, but they can also cause major divergences in behavior
(especially on untrained models.) In a future release, we plan to
allow Caffe2 to call directly to Torch implementations of operators, to
help you smooth over these differences when precision is important,
and to also document these differences.
Supported operators
-------------------
The following operators are supported:
* add (nonzero alpha not supported)
* sub (nonzero alpha not supported)
* mul
* div
* cat
* mm
* addmm
* neg
* tanh
* sigmoid
* mean
* t
* expand (only when used before a broadcasting ONNX operator; e.g., add)
* transpose
* view
* split
* squeeze
* prelu (single weight shared among input channels not supported)
* threshold (non-zero threshold/non-zero value not supported)
* leaky_relu
* glu
* softmax
* avg_pool2d (ceil_mode not supported)
* log_softmax
* unfold (experimental support with ATen-Caffe2 integration)
* elu
* Conv
* BatchNorm
* MaxPool1d (ceil_mode not supported)
* MaxPool2d (ceil_mode not supported)
* MaxPool3d (ceil_mode not supported)
* Embedding (no optional arguments supported)
* RNN
* ConstantPadNd
* Dropout
* FeatureDropout (training mode not supported)
* Index (constant integer and tuple indices supported)
* Negate
The operator set above is sufficient to export the following models:
* AlexNet
* DCGAN
* DenseNet
* Inception (warning: this model is highly sensitive to changes in operator
implementation)
* ResNet
* SuperResolution
* VGG
* `word_language_model <https://github.com/pytorch/examples/tree/master/word_language_model>`_
The interface for specifying operator definitions is highly experimental
and undocumented; adventurous users should note that the APIs will probably
change in a future interface.
Functions
--------------------------
.. autofunction:: export

View File

@ -16,15 +16,6 @@ To construct an :class:`Optimizer` you have to give it an iterable containing th
parameters (all should be :class:`~torch.autograd.Variable` s) to optimize. Then,
you can specify optimizer-specific options such as the learning rate, weight decay, etc.
.. note::
If you need to move a model to GPU via `.cuda()`, please do so before
constructing optimizers for it. Parameters of a model after `.cuda()` will
be different objects with those before the call.
In general, you should make sure that optimized parameters live in
consistent locations when optimizers are constructed and used.
Example::
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
@ -111,8 +102,6 @@ Algorithms
:members:
.. autoclass:: Adam
:members:
.. autoclass:: SparseAdam
:members:
.. autoclass:: Adamax
:members:
.. autoclass:: ASGD
@ -130,7 +119,7 @@ How to adjust Learning Rate
---------------------------
:mod:`torch.optim.lr_scheduler` provides several methods to adjust the learning
rate based on the number of epochs. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
rate based on the number of epoches. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
allows dynamic learning rate reducing based on some validation measurements.
.. autoclass:: torch.optim.lr_scheduler.LambdaLR
@ -141,7 +130,5 @@ allows dynamic learning rate reducing based on some validation measurements.
:members:
.. autoclass:: torch.optim.lr_scheduler.ExponentialLR
:members:
.. autoclass:: torch.optim.lr_scheduler.CosineAnnealingLR
:members:
.. autoclass:: torch.optim.lr_scheduler.ReduceLROnPlateau
:members:

View File

@ -12,30 +12,16 @@ efficiently store and process tensors for which the majority of elements
are zeros.
A sparse tensor is represented as a pair of dense tensors: a tensor
of values and a 2D tensor of indices. A sparse tensor can be constructed
of values and a tensor of indices. A sparse tensor can be constructed
by providing these two tensors, as well as the size of the sparse tensor
(which cannot be inferred from these tensors!) Suppose we want to define
a sparse tensor with the entry 3 at location (0, 2), entry 4 at
location (1, 0), and entry 5 at location (1, 2). We would then write:
(which cannot be inferred from these tensors!)
>>> i = torch.LongTensor([[0, 1, 1],
[2, 0, 2]])
>>> v = torch.FloatTensor([3, 4, 5])
>>> i = torch.LongTensor([[0, 1], [2, 0]])
>>> v = torch.FloatTensor([3, 4])
>>> torch.sparse.FloatTensor(i, v, torch.Size([2,3])).to_dense()
0 0 3
4 0 5
[torch.FloatTensor of size 2x3]
Note that the input to LongTensor is NOT a list of index tuples. If you want
to write your indices this way, you should transpose before passing them to
the sparse constructor:
>>> i = torch.LongTensor([[0, 2], [1, 0], [1, 2]])
>>> v = torch.FloatTensor([3, 4, 5 ])
>>> torch.sparse.FloatTensor(i.t(), v, torch.Size([2,3])).to_dense()
0 0 3
4 0 5
[torch.FloatTensor of size 2x3]
4 0 0
[torch.FloatTensor of size 2x2]
You can also construct hybrid sparse tensors, where only the first n
dimensions are sparse, and the rest of the dimensions are dense.

View File

@ -1,7 +1,5 @@
.. currentmodule:: torch
.. _tensor-doc:
torch.Tensor
===================================
@ -146,10 +144,6 @@ view of a storage and defines numeric operations on it.
.. automethod:: eq
.. automethod:: eq_
.. automethod:: equal
.. automethod:: erf
.. automethod:: erf_
.. automethod:: erfinv
.. automethod:: erfinv_
.. automethod:: exp
.. automethod:: exp_
.. automethod:: expand
@ -240,7 +234,6 @@ view of a storage and defines numeric operations on it.
.. automethod:: pow_
.. automethod:: prod
.. automethod:: pstrf
.. automethod:: put_
.. automethod:: qr
.. automethod:: random_
.. automethod:: reciprocal
@ -288,7 +281,6 @@ view of a storage and defines numeric operations on it.
.. automethod:: symeig
.. automethod:: t
.. automethod:: t_
.. automethod:: take
.. automethod:: tan
.. automethod:: tan_
.. automethod:: tanh
@ -315,10 +307,3 @@ view of a storage and defines numeric operations on it.
.. automethod:: view
.. automethod:: view_as
.. automethod:: zero_
.. class:: ByteTensor()
The following methods are unique to :class:`torch.ByteTensor`.
.. automethod:: all
.. automethod:: any

View File

@ -18,11 +18,13 @@ Creation Ops
.. autofunction:: linspace
.. autofunction:: logspace
.. autofunction:: ones
.. autofunction:: ones_like
.. autofunction:: rand
.. autofunction:: randn
.. autofunction:: randperm
.. autofunction:: arange
.. autofunction:: range
.. autofunction:: zeros
.. autofunction:: zeros_like
Indexing, Slicing, Joining, Mutating Ops
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -36,7 +38,6 @@ Indexing, Slicing, Joining, Mutating Ops
.. autofunction:: squeeze
.. autofunction:: stack
.. autofunction:: t
.. autofunction:: take
.. autofunction:: transpose
.. autofunction:: unbind
.. autofunction:: unsqueeze
@ -52,23 +53,6 @@ Random sampling
.. autofunction:: bernoulli
.. autofunction:: multinomial
.. autofunction:: normal
.. autofunction:: rand
.. autofunction:: randn
.. autofunction:: randperm
In-place random sampling
~~~~~~~~~~~~~~~~~~~~~~~~
There are a few more in-place random sampling functions defined on Tensors as well. Click through to refer to their documentation:
- :func:`torch.Tensor.bernoulli_` - in-place version of :func:`torch.bernoulli`
- :func:`torch.Tensor.cauchy_` - numbers drawn from the Cauchy distribution
- :func:`torch.Tensor.exponential_` - numbers drawn from the exponential distribution
- :func:`torch.Tensor.geometric_` - elements drawn from the geometric distribution
- :func:`torch.Tensor.log_normal_` - samples from the log-normal distribution
- :func:`torch.Tensor.normal_` - in-place version of :func:`torch.normal`
- :func:`torch.Tensor.random_` - numbers sampled from the discrete uniform distribution
- :func:`torch.Tensor.uniform_` - numbers sampled from the uniform distribution
Serialization
@ -102,8 +86,6 @@ Pointwise Ops
.. autofunction:: cos
.. autofunction:: cosh
.. autofunction:: div
.. autofunction:: erf
.. autofunction:: erfinv
.. autofunction:: exp
.. autofunction:: floor
.. autofunction:: fmod
@ -201,3 +183,4 @@ BLAS and LAPACK Operations
.. autofunction:: svd
.. autofunction:: symeig
.. autofunction:: trtrs

View File

@ -0,0 +1,112 @@
torchvision.datasets
====================
All datasets are subclasses of :class:`torch.utils.data.Dataset`
i.e, they have ``__getitem__`` and ``__len__`` methods implemented.
Hence, they can all be passed to a :class:`torch.utils.data.DataLoader`
which can load multiple samples parallelly using ``torch.multiprocessing`` workers.
For example: ::
imagenet_data = torchvision.datasets.ImageFolder('path/to/imagenet_root/')
data_loader = torch.utils.data.DataLoader(imagenet_data,
batch_size=4,
shuffle=True,
num_workers=args.nThreads)
The following datasets are available:
.. contents:: Datasets
:local:
All the datasets have almost similar API. They all have two common arguments:
``transform`` and ``target_transform`` to transform the input and target respectively.
.. currentmodule:: torchvision.datasets
MNIST
~~~~~
.. autoclass:: MNIST
COCO
~~~~
.. note ::
These require the `COCO API to be installed`_
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI
Captions
^^^^^^^^
.. autoclass:: CocoCaptions
:members: __getitem__
:special-members:
Detection
^^^^^^^^^
.. autoclass:: CocoDetection
:members: __getitem__
:special-members:
LSUN
~~~~
.. autoclass:: LSUN
:members: __getitem__
:special-members:
ImageFolder
~~~~~~~~~~~
.. autoclass:: ImageFolder
:members: __getitem__
:special-members:
Imagenet-12
~~~~~~~~~~~
This should simply be implemented with an ``ImageFolder`` dataset.
The data is preprocessed `as described
here <https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset>`__
`Here is an
example <https://github.com/pytorch/examples/blob/27e2a46c1d1505324032b1d94fc6ce24d5b67e97/imagenet/main.py#L48-L62>`__.
CIFAR
~~~~~
.. autoclass:: CIFAR10
:members: __getitem__
:special-members:
STL10
~~~~~
.. autoclass:: STL10
:members: __getitem__
:special-members:
SVHN
~~~~~
.. autoclass:: SVHN
:members: __getitem__
:special-members:
PhotoTour
~~~~~~~~~
.. autoclass:: PhotoTour
:members: __getitem__
:special-members:

View File

@ -0,0 +1,12 @@
torchvision.models
===================
.. currentmodule:: torchvision.models
.. automodule:: torchvision.models
:members: alexnet, resnet18, resnet34, resnet50, resnet101, resnet152,
vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19,
vgg19_bn, inception_v3, squeezenet1_0, squeezenet1_1, densenet121,
densenet169, densenet201, densenet161
:undoc-members:

View File

@ -0,0 +1,8 @@
torchvision
===================
The :mod:`torchvision` package consists of popular datasets, model
architectures, and common image transformations for computer vision.
.. automodule:: torchvision
:members:

View File

@ -0,0 +1,48 @@
torchvision.transforms
======================
.. currentmodule:: torchvision.transforms
Transforms are common image transforms. They can be chained together using :class:`Compose`
.. autoclass:: Compose
Transforms on PIL.Image
-----------------------
.. autoclass:: Scale
.. autoclass:: CenterCrop
.. autoclass:: RandomCrop
.. autoclass:: RandomHorizontalFlip
.. autoclass:: RandomSizedCrop
.. autoclass:: Pad
Transforms on torch.\*Tensor
----------------------------
.. autoclass:: Normalize
:members: __call__
:special-members:
Conversion Transforms
---------------------
.. autoclass:: ToTensor
:members: __call__
:special-members:
.. autoclass:: ToPILImage
:members: __call__
:special-members:
Generic Transforms
------------------
.. autoclass:: Lambda

View File

@ -0,0 +1,9 @@
torchvision.utils
===================
.. currentmodule:: torchvision.utils
.. autofunction:: make_grid
.. autofunction:: save_image

264
setup.py
View File

@ -13,17 +13,14 @@ import sys
import os
from tools.setup_helpers.env import check_env_flag
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME, CUDA_VERSION
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME
from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR
from tools.setup_helpers.nccl import WITH_NCCL, WITH_SYSTEM_NCCL, NCCL_LIB_DIR, \
NCCL_INCLUDE_DIR, NCCL_ROOT_DIR, NCCL_SYSTEM_LIB
from tools.setup_helpers.nnpack import WITH_NNPACK, NNPACK_LIB_PATHS, \
NNPACK_INCLUDE_DIRS
from tools.setup_helpers.split_types import split_types
DEBUG = check_env_flag('DEBUG')
WITH_DISTRIBUTED = not check_env_flag('NO_DISTRIBUTED')
WITH_DISTRIBUTED_MW = WITH_DISTRIBUTED and check_env_flag('WITH_DISTRIBUTED_MW')
WITH_NCCL = WITH_CUDA and platform.system() != 'Darwin'
SYSTEM_NCCL = False
################################################################################
@ -34,7 +31,7 @@ import distutils.sysconfig
cfg_vars = distutils.sysconfig.get_config_vars()
for key, value in cfg_vars.items():
if type(value) == str:
cfg_vars[key] = value.replace("-Wstrict-prototypes", "")
cfg_vars[key] = value.replace("-Wstrict-prototypes", "")
################################################################################
# Monkey-patch setuptools to compile in parallel
@ -57,9 +54,6 @@ def parallelCCompile(self, sources, output_dir=None, macros=None,
src, ext = build[obj]
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
num_jobs = multiprocessing.cpu_count()
max_jobs = os.getenv("MAX_JOBS")
if max_jobs is not None:
num_jobs = min(num_jobs, int(max_jobs))
multiprocessing.pool.ThreadPool(num_jobs).map(_single_compile, objects)
return objects
@ -80,31 +74,6 @@ distutils.unixccompiler.UnixCCompiler.link = patched_link
# Custom build commands
################################################################################
dep_libs = [
'TH', 'THS', 'THNN', 'THC', 'THCS', 'THCUNN', 'nccl', 'libshm',
'ATen', 'gloo', 'THD', 'nanopb',
]
def build_libs(libs):
for lib in libs:
assert lib in dep_libs, 'invalid lib: {}'.format(lib)
build_libs_cmd = ['bash', 'torch/lib/build_libs.sh']
my_env = os.environ.copy()
my_env["PYTORCH_PYTHON"] = sys.executable
if WITH_SYSTEM_NCCL:
my_env["NCCL_ROOT_DIR"] = NCCL_ROOT_DIR
if WITH_CUDA:
my_env["CUDA_BIN_PATH"] = CUDA_HOME
build_libs_cmd += ['--with-cuda']
if subprocess.call(build_libs_cmd + libs, env=my_env) != 0:
sys.exit(1)
if 'THNN' in libs or 'THCUNN' in libs:
from tools.nnwrap import generate_wrappers as generate_nn_wrappers
generate_nn_wrappers()
class build_deps(Command):
user_options = []
@ -116,30 +85,17 @@ class build_deps(Command):
pass
def run(self):
libs = ['TH', 'THS', 'THNN']
from tools.nnwrap import generate_wrappers as generate_nn_wrappers
build_all_cmd = ['bash', 'torch/lib/build_all.sh']
if WITH_CUDA:
libs += ['THC', 'THCS', 'THCUNN']
if WITH_NCCL and not WITH_SYSTEM_NCCL:
libs += ['nccl']
libs += ['libshm', 'ATen', 'nanopb']
build_all_cmd += ['--with-cuda']
if WITH_NCCL and not SYSTEM_NCCL:
build_all_cmd += ['--with-nccl']
if WITH_DISTRIBUTED:
if sys.platform.startswith('linux'):
libs += ['gloo']
libs += ['THD']
build_libs(libs)
build_dep_cmds = {}
for lib in dep_libs:
# wrap in function to capture lib
class build_dep(build_deps):
description = 'Build {} external library'.format(lib)
def run(self):
build_libs([self.lib])
build_dep.lib = lib
build_dep_cmds['build_' + lib.lower()] = build_dep
build_all_cmd += ['--with-distributed']
if subprocess.call(build_all_cmd) != 0:
sys.exit(1)
generate_nn_wrappers()
class build_module(Command):
@ -169,11 +125,6 @@ class build_py(setuptools.command.build_py.build_py):
version_path = os.path.join(cwd, 'torch', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
# NB: This is not 100% accurate, because you could have built the
# library code with DEBUG, but csrc without DEBUG (in which case
# this would claim to be a release build when it's not.)
f.write("debug = {}\n".format(repr(DEBUG)))
f.write("cuda = {}\n".format(repr(CUDA_VERSION)))
class develop(setuptools.command.develop.develop):
@ -183,23 +134,6 @@ class develop(setuptools.command.develop.develop):
setuptools.command.develop.develop.run(self)
def monkey_patch_THD_link_flags():
'''
THD's dynamic link deps are not determined until after build_deps is run
So, we need to monkey-patch them in later
'''
# read tmp_install_path/THD_deps.txt for THD's dynamic linkage deps
with open(tmp_install_path + '/THD_deps.txt', 'r') as f:
thd_deps_ = f.read()
thd_deps = []
# remove empty lines
for l in thd_deps_.split(';'):
if l != '':
thd_deps.append(l)
C.extra_link_args += thd_deps
class build_ext(setuptools.command.build_ext.build_ext):
def run(self):
@ -216,26 +150,17 @@ class build_ext(setuptools.command.build_ext.build_ext):
print('-- Detected CUDA at ' + CUDA_HOME)
else:
print('-- Not using CUDA')
if WITH_NCCL and WITH_SYSTEM_NCCL:
print('-- Using system provided NCCL library at ' +
NCCL_SYSTEM_LIB + ', ' + NCCL_INCLUDE_DIR)
if WITH_NCCL and SYSTEM_NCCL:
print('-- Using system provided NCCL library')
elif WITH_NCCL:
print('-- Building NCCL library')
else:
print('-- Not using NCCL')
if WITH_DISTRIBUTED:
print('-- Building with distributed package ')
monkey_patch_THD_link_flags()
else:
print('-- Building without distributed package')
# Do we actually need this here?
if WITH_NNPACK:
nnpack_dir = NNPACK_LIB_PATHS[0]
print('-- Detected NNPACK at ' + nnpack_dir)
else:
print('-- Not using NNPACK')
# cwrap depends on pyyaml, so we can't import it earlier
from tools.cwrap import cwrap
from tools.cwrap.plugins.THPPlugin import THPPlugin
@ -249,8 +174,6 @@ class build_ext(setuptools.command.build_ext.build_ext):
from tools.cwrap.plugins.AssertNDim import AssertNDim
from tools.cwrap.plugins.Broadcast import Broadcast
from tools.cwrap.plugins.ProcessorSpecificPlugin import ProcessorSpecificPlugin
from tools.autograd.gen_variable_type import gen_variable_type
from tools.jit.gen_jit_dispatch import gen_jit_dispatch
thp_plugin = THPPlugin()
cwrap('torch/csrc/generic/TensorMethods.cwrap', plugins=[
ProcessorSpecificPlugin(), BoolOption(), thp_plugin,
@ -260,19 +183,6 @@ class build_ext(setuptools.command.build_ext.build_ext):
cwrap('torch/csrc/cudnn/cuDNN.cwrap', plugins=[
CuDNNPlugin(), NullableArguments()
])
# Build ATen based Variable classes
autograd_gen_dir = 'torch/csrc/autograd/generated'
jit_gen_dir = 'torch/csrc/jit/generated'
for d in (autograd_gen_dir, jit_gen_dir):
if not os.path.exists(d):
os.mkdir(d)
gen_variable_type(
'torch/lib/build/ATen/ATen/Declarations.yaml',
autograd_gen_dir)
gen_jit_dispatch(
'torch/lib/build/ATen/ATen/Declarations.yaml',
jit_gen_dir)
# It's an old-style class in Python 2.7...
setuptools.command.build_ext.build_ext.run(self)
@ -318,33 +228,18 @@ extra_link_args = []
extra_compile_args = ['-std=c++11', '-Wno-write-strings',
# Python 2.6 requires -fno-strict-aliasing, see
# http://legacy.python.org/dev/peps/pep-3123/
'-fno-strict-aliasing',
# Clang has an unfixed bug leading to spurious missing
# braces warnings, see
# https://bugs.llvm.org/show_bug.cgi?id=21629
'-Wno-missing-braces']
'-fno-strict-aliasing']
cwd = os.path.dirname(os.path.abspath(__file__))
lib_path = os.path.join(cwd, "torch", "lib")
# Check if you remembered to check out submodules
def check_file(f):
if not os.path.exists(f):
print("Could not find {}".format(f))
print("Did you run 'git submodule update --init'?")
sys.exit(1)
check_file(os.path.join(lib_path, "gloo", "CMakeLists.txt"))
check_file(os.path.join(lib_path, "nanopb", "CMakeLists.txt"))
check_file(os.path.join(lib_path, "pybind11", "CMakeLists.txt"))
tmp_install_path = lib_path + "/tmp_install"
include_dirs += [
cwd,
os.path.join(cwd, "torch", "csrc"),
lib_path + "/pybind11/include",
tmp_install_path + "/include",
tmp_install_path + "/include/TH",
tmp_install_path + "/include/THPP",
tmp_install_path + "/include/THNN",
tmp_install_path + "/include/ATen",
]
@ -358,7 +253,10 @@ THC_LIB = os.path.join(lib_path, 'libTHC.so.1')
THCS_LIB = os.path.join(lib_path, 'libTHCS.so.1')
THNN_LIB = os.path.join(lib_path, 'libTHNN.so.1')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.so.1')
THPP_LIB = os.path.join(lib_path, 'libTHPP.so.1')
ATEN_LIB = os.path.join(lib_path, 'libATen.so.1')
GLOO_LIB = os.path.join(lib_path, 'libgloo.a')
GLOO_CUDA_LIB = os.path.join(lib_path, 'libgloo_cuda.a')
THD_LIB = os.path.join(lib_path, 'libTHD.a')
NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1')
if platform.system() == 'Darwin':
@ -368,15 +266,16 @@ if platform.system() == 'Darwin':
THCS_LIB = os.path.join(lib_path, 'libTHCS.1.dylib')
THNN_LIB = os.path.join(lib_path, 'libTHNN.1.dylib')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.1.dylib')
THPP_LIB = os.path.join(lib_path, 'libTHPP.1.dylib')
ATEN_LIB = os.path.join(lib_path, 'libATen.1.dylib')
NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib')
# static library only
NANOPB_STATIC_LIB = os.path.join(lib_path, 'libprotobuf-nanopb.a')
if WITH_NCCL and subprocess.call('ldconfig -p | grep libnccl >/dev/null', shell=True) == 0:
SYSTEM_NCCL = True
main_compile_args = ['-D_THP_CORE']
main_libraries = ['shm']
main_link_args = [TH_LIB, THS_LIB, THNN_LIB, ATEN_LIB, NANOPB_STATIC_LIB]
main_link_args = [TH_LIB, THS_LIB, THPP_LIB, THNN_LIB, ATEN_LIB]
main_sources = [
"torch/csrc/PtrWrapper.cpp",
"torch/csrc/Module.cpp",
@ -385,63 +284,31 @@ main_sources = [
"torch/csrc/Exceptions.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/DynamicTypes.cpp",
"torch/csrc/assertions.cpp",
"torch/csrc/byte_order.cpp",
"torch/csrc/utils.cpp",
"torch/csrc/expand_utils.cpp",
"torch/csrc/utils/invalid_arguments.cpp",
"torch/csrc/utils/object_ptr.cpp",
"torch/csrc/utils/python_arg_parser.cpp",
"torch/csrc/utils/tuple_parser.cpp",
"torch/csrc/allocators.cpp",
"torch/csrc/serialization.cpp",
"torch/csrc/jit/init.cpp",
"torch/csrc/jit/ir.cpp",
"torch/csrc/jit/python_ir.cpp",
"torch/csrc/jit/test_jit.cpp",
"torch/csrc/jit/tracer.cpp",
"torch/csrc/jit/python_tracer.cpp",
"torch/csrc/jit/interned_strings.cpp",
"torch/csrc/jit/type.cpp",
"torch/csrc/jit/export.cpp",
"torch/csrc/jit/passes/graph_fuser.cpp",
"torch/csrc/jit/passes/onnx.cpp",
"torch/csrc/jit/passes/dead_code_elimination.cpp",
"torch/csrc/jit/passes/common_subexpression_elimination.cpp",
"torch/csrc/jit/passes/peephole.cpp",
"torch/csrc/jit/passes/onnx/peephole.cpp",
"torch/csrc/jit/generated/aten_dispatch.cpp",
"torch/csrc/autograd/init.cpp",
"torch/csrc/autograd/engine.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/saved_variable.cpp",
"torch/csrc/autograd/input_buffer.cpp",
"torch/csrc/autograd/profiler.cpp",
"torch/csrc/autograd/python_function.cpp",
"torch/csrc/autograd/python_cpp_function.cpp",
"torch/csrc/autograd/python_variable.cpp",
"torch/csrc/autograd/python_engine.cpp",
"torch/csrc/autograd/python_hook.cpp",
"torch/csrc/autograd/functions/jit_closure.cpp",
"torch/csrc/autograd/generated/VariableType.cpp",
"torch/csrc/autograd/generated/Functions.cpp",
"torch/csrc/autograd/generated/python_variable_methods.cpp",
"torch/csrc/autograd/generated/python_functions.cpp",
"torch/csrc/autograd/generated/python_nn_functions.cpp",
"torch/csrc/autograd/functions/batch_normalization.cpp",
"torch/csrc/autograd/functions/convolution.cpp",
"torch/csrc/autograd/functions/basic_ops.cpp",
"torch/csrc/autograd/functions/tensor.cpp",
"torch/csrc/autograd/functions/accumulate_grad.cpp",
"torch/csrc/autograd/functions/special.cpp",
"torch/csrc/autograd/functions/utils.cpp",
"torch/csrc/autograd/functions/init.cpp",
"torch/csrc/autograd/functions/onnx/convolution.cpp",
"torch/csrc/autograd/functions/onnx/batch_normalization.cpp",
"torch/csrc/autograd/functions/onnx/basic_ops.cpp",
"torch/csrc/onnx/onnx.pb.cpp",
"torch/csrc/onnx/onnx.cpp",
"torch/csrc/nn/THNN_generic.cpp",
]
main_sources += split_types("torch/csrc/Tensor.cpp")
@ -457,6 +324,7 @@ if WITH_DISTRIBUTED:
extra_compile_args += ['-DWITH_DISTRIBUTED']
main_sources += [
"torch/csrc/distributed/Module.cpp",
"torch/csrc/distributed/utils.cpp",
]
if WITH_DISTRIBUTED_MW:
main_sources += [
@ -466,6 +334,8 @@ if WITH_DISTRIBUTED:
extra_compile_args += ['-DWITH_DISTRIBUTED_MW']
include_dirs += [tmp_install_path + "/include/THD"]
main_link_args += [THD_LIB]
if platform.system() == 'Linux':
main_link_args += [GLOO_LIB]
if WITH_CUDA:
cuda_lib_dirs = ['lib64', 'lib']
@ -482,6 +352,8 @@ if WITH_CUDA:
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
main_libraries += ['cudart', 'nvToolsExt']
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
if platform.system() == 'Linux':
main_link_args += [GLOO_CUDA_LIB]
main_sources += [
"torch/csrc/cuda/Module.cpp",
"torch/csrc/cuda/Storage.cpp",
@ -490,26 +362,20 @@ if WITH_CUDA:
"torch/csrc/cuda/utils.cpp",
"torch/csrc/cuda/expand_utils.cpp",
"torch/csrc/cuda/serialization.cpp",
"torch/csrc/jit/fusion_compiler.cpp",
]
main_sources += split_types("torch/csrc/cuda/Tensor.cpp")
if WITH_NCCL:
if WITH_SYSTEM_NCCL:
main_link_args += [NCCL_SYSTEM_LIB]
include_dirs.append(NCCL_INCLUDE_DIR)
if SYSTEM_NCCL:
main_libraries += ['nccl']
else:
main_link_args += [NCCL_LIB]
extra_compile_args += ['-DWITH_NCCL']
main_sources += [
"torch/csrc/cuda/nccl.cpp",
]
if WITH_CUDNN:
main_libraries += ['cudnn']
include_dirs.append(CUDNN_INCLUDE_DIR)
library_dirs.append(CUDNN_LIB_DIR)
# NOTE: these are at the front, in case there's another cuDNN in CUDA path
include_dirs.insert(0, CUDNN_INCLUDE_DIR)
extra_link_args.insert(0, '-Wl,-rpath,' + CUDNN_LIB_DIR)
main_sources += [
"torch/csrc/cudnn/BatchNorm.cpp",
"torch/csrc/cudnn/Conv.cpp",
@ -521,14 +387,6 @@ if WITH_CUDNN:
]
extra_compile_args += ['-DWITH_CUDNN']
if WITH_NNPACK:
include_dirs.extend(NNPACK_INCLUDE_DIRS)
main_link_args.extend(NNPACK_LIB_PATHS)
main_sources += [
"torch/csrc/nnpack/NNPACK.cpp",
]
extra_compile_args += ['-DWITH_NNPACK']
if DEBUG:
extra_compile_args += ['-O0', '-g']
extra_link_args += ['-O0', '-g']
@ -542,11 +400,10 @@ if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
STDCPP_LIB = STDCPP_LIB[:-1]
if type(STDCPP_LIB) != str: # python 3
STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding)
extra_link_args += [STDCPP_LIB]
main_link_args += [STDCPP_LIB]
version_script = os.path.abspath("tools/pytorch.version")
extra_link_args += ['-Wl,--version-script=' + version_script]
def make_relative_rpath(path):
if platform.system() == 'Darwin':
return '-Wl,-rpath,@loader_path/' + path
@ -591,21 +448,6 @@ THNN = Extension("torch._thnn._THNN",
extensions.append(THNN)
if WITH_CUDA:
thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')]
if platform.system() == 'Linux':
thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed']
# these have to be specified as -lcuda in link_flags because they
# have to come right after the `no-as-needed` option
thnvrtc_link_flags += ['-lcuda', '-lnvrtc']
THNVRTC = Extension("torch._nvrtc",
sources=['torch/csrc/nvrtc.cpp'],
language='c++',
include_dirs=include_dirs,
library_dirs=library_dirs + [cuda_lib_path + '/stubs'],
extra_link_args=thnvrtc_link_flags,
)
extensions.append(THNVRTC)
THCUNN = Extension("torch._thnn._THCUNN",
sources=['torch/csrc/nn/THCUNN.cpp'],
language='c++',
@ -620,44 +462,38 @@ if WITH_CUDA:
)
extensions.append(THCUNN)
version = '0.3.1b0'
version = '0.2.0'
if os.getenv('PYTORCH_BUILD_VERSION'):
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
build_number = int(os.getenv('PYTORCH_BUILD_NUMBER'))
version = os.getenv('PYTORCH_BUILD_VERSION')
if build_number > 1:
version += '.post' + str(build_number)
version = os.getenv('PYTORCH_BUILD_VERSION') \
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
else:
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except Exception:
except subprocess.CalledProcessError:
pass
cmdclass = {
'build': build,
'build_py': build_py,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'develop': develop,
'install': install,
'clean': clean,
}
cmdclass.update(build_dep_cmds)
setup(name="torch", version=version,
description="Tensors and Dynamic neural networks in Python with strong GPU acceleration",
ext_modules=extensions,
cmdclass=cmdclass,
cmdclass={
'build': build,
'build_py': build_py,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'develop': develop,
'install': install,
'clean': clean,
},
packages=packages,
package_data={'torch': [
'lib/*.so*', 'lib/*.dylib*',
'lib/torch_shm_manager',
'lib/*.h',
'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h',
'lib/include/ATen/*.h',
]},
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
install_requires=['pyyaml', 'numpy'],
)

View File

@ -7,31 +7,38 @@ import contextlib
from functools import wraps
from itertools import product
from copy import deepcopy
import __main__
import errno
import torch
import torch.cuda
from torch.autograd import Variable
from torch._six import string_classes
torch.set_default_tensor_type('torch.DoubleTensor')
# set seed one time
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--seed', type=int, default=123)
parser.add_argument('--accept', action='store_true')
args, remaining = parser.parse_known_args()
SEED = args.seed
ACCEPT = args.accept
UNITTEST_ARGS = [sys.argv[0]] + remaining
SEED = 0
SEED_SET = 0
def parse_set_seed_once():
global SEED
global SEED_SET
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--seed', type=int, default=123)
args, remaining = parser.parse_known_args()
if SEED_SET == 0:
torch.manual_seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(args.seed)
SEED = args.seed
SEED_SET = 1
remaining = [sys.argv[0]] + remaining
return remaining
def run_tests():
unittest.main(argv=UNITTEST_ARGS)
remaining = parse_set_seed_once()
unittest.main(argv=remaining)
IS_WINDOWS = sys.platform == "win32"
TEST_NUMPY = True
try:
@ -117,13 +124,12 @@ def is_iterable(obj):
try:
iter(obj)
return True
except TypeError:
except:
return False
class TestCase(unittest.TestCase):
precision = 1e-5
maxDiff = None
def setUp(self):
torch.manual_seed(SEED)
@ -171,9 +177,6 @@ class TestCase(unittest.TestCase):
return x, y
def assertEqual(self, x, y, prec=None, message=''):
if isinstance(prec, str) and message == '':
message = prec
prec = None
if prec is None:
prec = self.precision
@ -202,7 +205,7 @@ class TestCase(unittest.TestCase):
assertTensorsEqual(x._values(), y._values())
else:
assertTensorsEqual(x, y)
elif isinstance(x, string_classes) and isinstance(y, string_classes):
elif type(x) == str and type(y) == str:
super(TestCase, self).assertEqual(x, y)
elif type(x) == set and type(y) == set:
super(TestCase, self).assertEqual(x, y)
@ -214,7 +217,7 @@ class TestCase(unittest.TestCase):
try:
self.assertLessEqual(abs(x - y), prec, message)
return
except (TypeError, AssertionError):
except:
pass
super(TestCase, self).assertEqual(x, y, message)
@ -246,7 +249,7 @@ class TestCase(unittest.TestCase):
try:
self.assertGreaterEqual(abs(x - y), prec, message)
return
except (TypeError, AssertionError):
except:
pass
super(TestCase, self).assertNotEqual(x, y, message)
@ -256,85 +259,7 @@ class TestCase(unittest.TestCase):
return
raise AssertionError("object not found in iterable")
# TODO: Support context manager interface
# NB: The kwargs forwarding to callable robs the 'subname' parameter.
# If you need it, manually apply your callable in a lambda instead.
def assertExpectedRaises(self, exc_type, callable, *args, **kwargs):
subname = None
if 'subname' in kwargs:
subname = kwargs['subname']
del kwargs['subname']
try:
callable(*args, **kwargs)
except exc_type as e:
self.assertExpected(str(e), subname)
return
# Don't put this in the try block; the AssertionError will catch it
self.fail(msg="Did not raise when expected to")
def assertExpected(self, s, subname=None):
"""
Test that a string matches the recorded contents of a file
derived from the name of this test and subname. This file
is placed in the 'expect' directory in the same directory
as the test script. You can automatically update the recorded test
output using --accept.
If you call this multiple times in a single function, you must
give a unique subname each time.
"""
if not (isinstance(s, str) or (sys.version_info[0] == 2 and isinstance(s, unicode))):
raise TypeError("assertExpected is strings only")
def remove_prefix(text, prefix):
if text.startswith(prefix):
return text[len(prefix):]
return text
munged_id = remove_prefix(self.id(), "__main__.")
# NB: we take __file__ from __main__, so we place the expect directory
# where the test script lives, NOT where test/common.py lives. This
# doesn't matter in PyTorch where all test scripts are in the same
# directory as test/common.py, but it matters in onnx-pytorch
expected_file = os.path.join(os.path.dirname(os.path.realpath(__main__.__file__)),
"expect",
munged_id)
if subname:
expected_file += "-" + subname
expected_file += ".expect"
expected = None
def accept_output(update_type):
print("Accepting {} for {}:\n\n{}".format(update_type, munged_id, s))
with open(expected_file, 'w') as f:
f.write(s)
try:
with open(expected_file) as f:
expected = f.read()
except IOError as e:
if e.errno != errno.ENOENT:
raise
elif ACCEPT:
return accept_output("output")
else:
raise RuntimeError(
("I got this output for {}:\n\n{}\n\n"
"No expect file exists; to accept the current output, run:\n"
"python {} {} --accept").format(munged_id, s, __main__.__file__, munged_id))
if ACCEPT:
if expected != s:
return accept_output("updated output")
else:
if hasattr(self, "assertMultiLineEqual"):
# Python 2.7 only
# NB: Python considers lhs "old" and rhs "new".
self.assertMultiLineEqual(expected, s)
else:
self.assertEqual(s, expected)
if sys.version_info < (3, 2):
# assertRegexpMatches renamed assertRegex in 3.2
assertRegex = unittest.TestCase.assertRegexpMatches
# assertRaisesRegexp renamed assertRaisesRegex in 3.2
assertRaisesRegex = unittest.TestCase.assertRaisesRegexp

View File

@ -23,17 +23,6 @@ TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTe
TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version()
PRECISION = 1e-5
def get_size_average(m):
return getattr(m, 'size_average', False) or getattr(m, 'sizeAverage', False)
def get_weight(m):
result = getattr(m, 'weight', None)
if result is not None:
return result
return getattr(m, 'weights', None)
module_tests = [
dict(
module_name='Linear',
@ -75,6 +64,7 @@ module_tests = [
module_name='RReLU',
input_size=(1, 2, 2),
test_cuda=False,
check_gradgrad=False,
),
dict(
module_name='RReLU',
@ -82,6 +72,7 @@ module_tests = [
input_size=(4, 4, 5),
desc='with_up_down',
test_cuda=False,
check_gradgrad=False,
),
dict(
module_name='Hardtanh',
@ -98,7 +89,6 @@ module_tests = [
),
dict(
module_name='Softmax',
constructor_args=(1,),
input_size=(10, 20),
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, True).expand(10, 20)),
),
@ -109,13 +99,11 @@ module_tests = [
),
dict(
module_name='LogSoftmax',
constructor_args=(1,),
input_size=(10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, True).expand(10, 20)).log_(),
),
dict(
module_name='LogSoftmax',
constructor_args=(1,),
input_size=(1, 3, 10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, False)).log_(),
desc='multiparam',
@ -130,6 +118,7 @@ module_tests = [
module_name='Hardshrink',
constructor_args=(2.,),
input_size=(4, 3, 2, 4),
check_gradgrad=False,
),
dict(
module_name='LeakyReLU',
@ -147,11 +136,13 @@ module_tests = [
module_name='LogSigmoid',
input_size=(2, 3, 4),
reference_fn=lambda i, _: i.sigmoid().log(),
check_gradgrad=False,
),
dict(
module_name='Softplus',
input_size=(10, 20),
reference_fn=lambda i, _: torch.log(1 + torch.exp(i)),
check_gradgrad=False,
),
dict(
module_name='Softplus',
@ -159,24 +150,19 @@ module_tests = [
input_size=(10, 20),
reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
desc='beta',
),
dict(
module_name='Softplus',
constructor_args=(2, -100),
input_size=(10, 20),
reference_fn=(lambda i, _: ((i * 2) > -100).type_as(i) * i +
((i * 2) <= -100).type_as(i) * 1. / 2. * torch.log(1 + torch.exp(2 * i))),
desc='beta_threshold',
check_gradgrad=False,
),
dict(
module_name='Softshrink',
input_size=(3, 2, 5),
check_gradgrad=False,
),
dict(
module_name='Softshrink',
constructor_args=(1,),
input_size=(3, 2, 5),
desc='lambda',
check_gradgrad=False,
),
dict(
module_name='CrossMapLRN2d',
@ -230,14 +216,8 @@ module_tests = [
),
dict(
module_name='Softmin',
constructor_args=(1,),
input_size=(10, 20),
),
dict(
module_name='Softmin',
constructor_args=(1,),
input_size=(2, 3, 5, 10),
desc='multidim',
check_gradgrad=False,
),
dict(
module_name='Tanhshrink',
@ -245,290 +225,192 @@ module_tests = [
),
]
def kldivloss_reference(input, target, size_average=True, reduce=True):
safe_target = target * (target > 0).type_as(target)
safe_target_log = (safe_target + (target <= 0).type_as(target)).log()
result = safe_target * (safe_target_log - input)
if reduce and size_average:
return result.mean()
elif reduce:
return result.sum()
return result
def nlllossNd_reference(input, target, weight=None, ignore_index=-100,
size_average=True, reduce=True):
assert input.dim() >= 3
N = input.size(0)
C = input.size(1)
out_size = (N,) + input.size()[2:]
output = torch.zeros(out_size).type_as(input)
if isinstance(target, Variable):
target = target.data
if weight is None:
weight = torch.ones(C).type_as(input)
total_weight_data = 0
for tup in product(*[range(size) for size in out_size]):
t_nx = target[tup]
norm = 0. if ignore_index == t_nx else weight[t_nx]
input_index = list(tup)
input_index.insert(1, t_nx)
output[tup] = -input[tuple(input_index)] * norm
total_weight_data += norm
if reduce and size_average:
return output.sum() / total_weight_data
elif reduce:
return output.sum()
return output
def nllloss_reference(input, target, weight=None, ignore_index=-100,
size_average=True, reduce=True):
if isinstance(target, Variable):
target = target.data
def nll_loss_helper(input, target, weight, ignore_index):
if target is ignore_index:
return (0, 0)
norm = 1 if weight is None else weight[target]
result = -input[target] * norm
return (result, norm)
losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index)
for i, t in zip(input, target)]
losses, weights = zip(*losses_and_weights)
losses_tensor = torch.Tensor(losses).type_as(input)
if reduce and size_average:
return sum(losses_tensor) / sum(weights)
elif reduce:
return sum(losses_tensor)
else:
return losses_tensor
def smoothl1loss_reference(input, target, size_average=True, reduce=True):
abs_diff = (input - target).abs()
ge_one_mask = (abs_diff >= 1).type_as(abs_diff)
lt_one_mask = (abs_diff < 1).type_as(abs_diff)
output = ge_one_mask * (abs_diff - 0.5) + lt_one_mask * 0.5 * (abs_diff ** 2)
if reduce and size_average:
return output.mean()
elif reduce:
return output.sum()
return output
loss_reference_fns = {
'KLDivLoss': kldivloss_reference,
'NLLLoss': nllloss_reference,
'NLLLossNd': nlllossNd_reference,
'SmoothL1Loss': smoothl1loss_reference,
}
criterion_tests = [
dict(module_name='L1Loss',
input_size=(2, 3, 4),
target=torch.randn(2, 3, 4),
reference_fn=lambda i, t, _: 1. / i.numel() *
sum((a - b).abs().sum() for a, b in zip(i, t)),
),
dict(
module_name='L1Loss',
input_size=(2, 3, 4),
target_size=(2, 3, 4),
reference_fn=lambda i, t, _: 1. / i.numel() *
sum((a - b).abs().sum() for a, b in zip(i, t)),
module_name='NLLLoss',
input=torch.rand(15, 10).log(),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
),
dict(
module_name='NLLLoss',
input_fn=lambda: torch.rand(15, 10).log(),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
reference_fn=lambda i, t, m:
nllloss_reference(i, t, size_average=get_size_average(m)),
check_no_size_average=True
constructor_args=(None, False),
input=torch.rand(15, 10).log(),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
desc='no_size_average'
),
dict(
module_name='NLLLoss',
constructor_args=(None, True, 2),
input_fn=lambda: torch.rand(15, 10).log(),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
reference_fn=lambda i, t, _: nllloss_reference(i, t, ignore_index=2),
input=torch.rand(15, 10).log(),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
desc='ignore_index'
),
dict(
module_name='NLLLoss',
constructor_args_fn=lambda: (torch.rand(10),),
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
reference_fn=lambda i, t, m:
nllloss_reference(i, t, weight=get_weight(m)),
constructor_args=(torch.rand(10),),
input=torch.rand(15, 10).add(1e-2).log(),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
desc='weights',
),
dict(
module_name='NLLLoss',
constructor_args_fn=lambda: (torch.rand(10), True, 2),
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
reference_fn=lambda i, t, m:
nllloss_reference(i, t, weight=get_weight(m), ignore_index=2),
constructor_args=(torch.rand(10), True, 2),
input=torch.rand(15, 10).add(1e-2).log(),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
desc='weights_ignore_index'
),
dict(
module_name='NLLLoss',
constructor_args_fn=lambda: (torch.rand(10), True, -1),
input_fn=lambda: torch.rand(15, 10).add(1e-2).log(),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10 + 1).floor().long() - 1,
reference_fn=lambda i, t, m:
nllloss_reference(i, t, weight=get_weight(m), ignore_index=-1),
constructor_args=(torch.rand(10), True, -1),
input=torch.rand(15, 10).add(1e-2).log(),
target=torch.Tensor(15).uniform_().mul(10 + 1).floor().long() - 1,
desc='weights_ignore_index_neg'
),
dict(
module_name='KLDivLoss',
input_fn=lambda: torch.rand(10, 10).log(),
target_fn=lambda: torch.rand(10, 10),
reference_fn=lambda i, t, m:
kldivloss_reference(i, t, get_size_average(m), reduce=True),
check_no_size_average=True,
input=torch.rand(10, 10).log(),
target=torch.rand(10, 10),
check_gradgrad=False,
),
dict(
module_name='MSELoss',
input_size=(2, 3, 4, 5),
target_size=(2, 3, 4, 5),
reference_fn=lambda i, t, m: (i - t).abs().pow(2).sum() / (i.numel() if get_size_average(m) else 1),
check_no_size_average=True,
),
dict(
module_name='BCELoss',
input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
target_fn=lambda: torch.randn(15, 10).gt(0).double(),
input=torch.randn(2, 3, 4, 5),
target=torch.randn(2, 3, 4, 5),
reference_fn=lambda i, t, _: (i - t).abs().pow(2).sum() / i.numel(),
check_gradgrad=False,
),
dict(
module_name='BCELoss',
constructor_args_fn=lambda: (torch.rand(10),),
input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
target_fn=lambda: torch.randn(15, 10).gt(0).double(),
input=torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
target=torch.randn(15, 10).gt(0).double(),
check_gradgrad=False,
),
dict(
module_name='BCELoss',
constructor_args=(torch.rand(10),),
input=torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
target=torch.randn(15, 10).gt(0).double(),
desc='weights',
check_gradgrad=False,
),
dict(
module_name='CrossEntropyLoss',
input_size=(15, 10),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
input=torch.randn(15, 10),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
check_gradgrad=False,
),
dict(
module_name='CrossEntropyLoss',
constructor_args_fn=lambda: (torch.rand(10),),
input_size=(15, 10),
target_fn=lambda: torch.Tensor(15).uniform_().mul(10).floor().long(),
constructor_args=(torch.rand(10),),
input=torch.randn(15, 10),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
desc='weights',
check_gradgrad=False,
),
dict(
module_name='NLLLoss2d',
input_size=(2, 3, 5, 5),
target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
reference_fn=lambda i, t, m:
nlllossNd_reference(i, t, size_average=get_size_average(m)),
check_no_size_average=True,
),
dict(
module_name='NLLLoss2d',
constructor_args_fn=lambda: (torch.rand(3),),
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long(),
reference_fn=lambda i, t, m:
nlllossNd_reference(i, t, weight=get_weight(m)),
),
dict(
module_name='NLLLoss2d',
constructor_args=(torch.rand(3),),
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long(),
desc='weights',
),
dict(
module_name='NLLLoss2d',
constructor_args=(None, True, 1),
constructor_args=(None, True, 3),
input_size=(2, 3, 5, 5),
target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
reference_fn=lambda i, t, m:
nlllossNd_reference(i, t, ignore_index=1),
target=torch.rand(2, 5, 5).mul(4).floor().long(),
desc='ignore_index',
),
dict(
module_name='HingeEmbeddingLoss',
input_size=(10,),
target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1),
input=torch.rand(10),
target=torch.randn(10).gt(0).double().mul_(2).sub(1),
check_gradgrad=False,
),
dict(
module_name='HingeEmbeddingLoss',
constructor_args=(0.5,),
input_size=(10,),
target_fn=lambda: torch.randn(10).gt(0).double().mul_(2).sub(1),
input=torch.rand(10),
target=torch.randn(10).gt(0).double().mul_(2).sub(1),
desc='margin',
check_no_size_average=True,
check_gradgrad=False,
),
dict(
module_name='MultiLabelMarginLoss',
input_size=(5, 10),
target_fn=lambda: torch.rand(5, 10).mul(10).floor().long(),
check_no_size_average=True,
target=torch.rand(5, 10).mul(10).floor().long(),
check_gradgrad=False,
),
dict(
module_name='MultiLabelSoftMarginLoss',
input_size=(5, 10),
target_fn=lambda: torch.rand(5, 10).mul(2).floor(),
target=torch.rand(5, 10).mul(2).floor(),
check_gradgrad=False,
),
dict(
module_name='MultiLabelSoftMarginLoss',
constructor_args_fn=lambda: (torch.rand(10),),
constructor_args=(torch.rand(10),),
input_size=(5, 10),
target_fn=lambda: torch.rand(5, 10).mul(2).floor(),
target=torch.rand(5, 10).mul(2).floor(),
desc='weights',
check_gradgrad=False,
),
dict(
module_name='MultiMarginLoss',
input_size=(5, 10),
target_fn=lambda: torch.rand(5).mul(8).floor().long(),
target=torch.rand(5).mul(8).floor().long(),
check_gradgrad=False,
),
dict(
module_name='SmoothL1Loss',
input_size=(5, 10),
target_size=(5, 10),
check_no_size_average=True,
reference_fn=lambda i, t, m:
smoothl1loss_reference(i, t, size_average=get_size_average(m)),
target=torch.randn(5, 10),
check_gradgrad=False,
),
dict(
module_name='SoftMarginLoss',
input_size=(5, 5),
target_fn=lambda: torch.randn(5, 5).sign(),
check_no_size_average=True,
target=torch.randn(5, 5).sign(),
check_gradgrad=False,
),
dict(
module_name='CosineEmbeddingLoss',
input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)),
target_fn=lambda: torch.randn(15).sign(),
input=(torch.rand(15, 10), torch.rand(15, 10)),
target=torch.randn(15).sign(),
check_gradgrad=False,
),
dict(
module_name='CosineEmbeddingLoss',
constructor_args=(0.7,),
input_fn=lambda: (torch.rand(15, 10), torch.rand(15, 10)),
target_fn=lambda: torch.randn(15).sign(),
input=(torch.rand(15, 10), torch.rand(15, 10)),
target=torch.randn(15).sign(),
desc='margin',
check_gradgrad=False,
),
dict(
module_name='MarginRankingLoss',
input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)),
target_fn=lambda: torch.randn(50).sign(),
check_no_size_average=True,
input=(torch.randn(50).mul(10), torch.randn(50).mul(10)),
target=torch.randn(50).sign(),
check_gradgrad=False,
),
dict(
module_name='MarginRankingLoss',
constructor_args=(2,),
input_fn=lambda: (torch.randn(50).mul(10), torch.randn(50).mul(10)),
target_fn=lambda: torch.randn(50).sign(),
input=(torch.randn(50).mul(10), torch.randn(50).mul(10)),
target=torch.randn(50).sign(),
desc='margin',
check_no_size_average=True,
check_gradgrad=False,
),
]
@ -670,22 +552,17 @@ class NNTestCase(TestCase):
class TestBase(object):
_required_arg_names = {'constructor_args', 'input'}
def __init__(self, constructor, desc='', reference_fn=None, fullname=None, **kwargs):
def __init__(self, constructor, constructor_args=tuple(), input_size=None,
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
if input_size is None and input is None:
raise RuntimeError("Specify either an input tensor, or it's size!")
self.constructor = constructor
self.constructor_args = constructor_args
self.input = input
self.input_size = input_size
self.desc = desc
self.fullname = fullname
self.constructor = constructor
self.reference_fn = reference_fn
for name in self._required_arg_names:
if name not in kwargs and name + '_fn' not in kwargs and name + '_size' not in kwargs:
if name == 'constructor_args':
kwargs['constructor_args'] = tuple()
else:
raise ValueError("{}: Specify {} by a value, a function to generate it, or it's size!"
.format(self.get_name(), name))
self._extra_kwargs = kwargs
self._arg_cache = {}
def get_name(self):
if self.fullname is not None:
@ -696,45 +573,28 @@ class TestBase(object):
test_name += '_' + self.desc
return test_name
def _unpack(self, value):
if isinstance(value, Variable):
return value.data
elif torch.is_tensor(value):
return value
def _unpack_input(self, input):
if isinstance(input, Variable):
return input.data
elif torch.is_tensor(input):
return input
else:
return type(value)(self._unpack(v) for v in value)
@property
def constructor_args(self):
return self._get_arg('constructor_args')
def _get_arg(self, name):
assert name in self._required_arg_names
if name not in self._arg_cache:
fn_name = name + '_fn'
size_name = name + '_size'
if name in self._extra_kwargs:
self._arg_cache[name] = self._extra_kwargs[name]
elif fn_name in self._extra_kwargs:
self._arg_cache[name] = self._extra_kwargs[fn_name]()
else:
assert size_name in self._extra_kwargs
def map_tensor_sizes(sizes):
if isinstance(sizes, list):
return [map_tensor_sizes(s) for s in sizes]
elif torch.is_tensor(sizes):
return sizes.double()
else:
return torch.randn(*sizes)
self._arg_cache[name] = map_tensor_sizes(self._extra_kwargs[size_name])
return self._arg_cache[name]
return type(input)(self._unpack_input(i) for i in input)
def _get_input(self):
return self._get_arg('input')
if self.input is not None:
return self.input
def map_input_sizes(sizes):
if isinstance(sizes, list):
return [map_input_sizes(s) for s in sizes]
elif torch.is_tensor(sizes):
return sizes.double()
else:
return torch.randn(*sizes)
assert self.input_size is not None
return map_input_sizes(self.input_size)
def __call__(self, test_case):
raise NotImplementedError
@ -746,7 +606,6 @@ class ModuleTest(TestBase):
super(ModuleTest, self).__init__(*args, **kwargs)
self.jacobian_input = kwargs.get('jacobian_input', True)
self.should_test_cuda = kwargs.get('test_cuda', True)
self.should_test_pickle = kwargs.get('pickle', True)
def __call__(self, test_case):
module = self.constructor(*self.constructor_args)
@ -756,20 +615,19 @@ class ModuleTest(TestBase):
out = test_case._forward(module, input)
if isinstance(out, Variable):
out = out.data
ref_input = self._unpack(deepcopy(input))
ref_input = self._unpack_input(deepcopy(input))
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
test_case.assertEqual(out, expected_out)
self.test_noncontig(test_case, module, input)
if self.should_test_pickle:
# TODO: do this with in-memory files as soon as torch.save will support it
with TemporaryFile() as f:
test_case._forward(module, input)
torch.save(module, f)
f.seek(0)
module_copy = torch.load(f)
test_case.assertEqual(test_case._forward(module, input), test_case._forward(module_copy, input))
# TODO: do this with in-memory files as soon as torch.save will support it
with TemporaryFile() as f:
test_case._forward(module, input)
torch.save(module, f)
f.seek(0)
module_copy = torch.load(f)
test_case.assertEqual(test_case._forward(module, input), test_case._forward(module_copy, input))
self._do_test(test_case, module, input)
@ -868,14 +726,13 @@ class ModuleTest(TestBase):
class CriterionTest(TestBase):
_required_arg_names = TestBase._required_arg_names.union({'target'})
def __init__(self, *args, **kwargs):
super(CriterionTest, self).__init__(*args, **kwargs)
self.target = self._get_target(kwargs['target'])
self.should_test_cuda = kwargs.get('test_cuda', True)
def _get_target(self):
return self._get_arg('target')
def _get_target(self, target):
return target
def __call__(self, test_case):
module = self.constructor(*self.constructor_args)
@ -885,16 +742,17 @@ class CriterionTest(TestBase):
module.__repr__()
str(module)
target = self._get_target()
if self.reference_fn is not None:
out = test_case._forward_criterion(module, input, target)
expected_out = self.reference_fn(deepcopy(self._unpack(input)),
deepcopy(self._unpack(target)), module)
out = test_case._forward_criterion(module, input, self.target)
target = self.target
if isinstance(target, Variable):
target = target.data
expected_out = self.reference_fn(deepcopy(self._unpack_input(input)),
deepcopy(target), module)
test_case.assertEqual(out, expected_out)
test_case.check_criterion_jacobian(module, input, target)
self._do_extra_tests(test_case, module, input, target)
test_case.check_criterion_jacobian(module, input, self.target)
self._do_extra_tests(test_case, module, input, self.target)
def test_cuda(self, test_case):
if not TEST_CUDA or not self.should_test_cuda:
@ -906,8 +764,8 @@ class CriterionTest(TestBase):
}
gpu_input = to_gpu(cpu_input, type_map=type_map)
cpu_target = self._get_target()
gpu_target = to_gpu(cpu_target, type_map=type_map)
cpu_target = self.target
gpu_target = to_gpu(self.target, type_map=type_map)
cpu_module = self.constructor(*self.constructor_args)
gpu_module = self.constructor(*self.constructor_args).float().cuda()

View File

@ -1,46 +0,0 @@
graph(%1 : Double(10, 3, 224, 224)
%2 : Double(64, 3, 11, 11)
%3 : Double(64)
%4 : Double(192, 64, 5, 5)
%5 : Double(192)
%6 : Double(384, 192, 3, 3)
%7 : Double(384)
%8 : Double(256, 384, 3, 3)
%9 : Double(256)
%10 : Double(256, 256, 3, 3)
%11 : Double(256)
%12 : Double(4096, 9216)
%13 : Double(4096)
%14 : Double(4096, 4096)
%15 : Double(4096)
%16 : Double(1000, 4096)
%17 : Double(1000)) {
%19 : Double(10, 64, 55, 55), %20 : Handle = CppOp[ConvForward](%1, %2, %3), uses = [[%21.i0], []];
%21 : Double(10, 64, 55, 55) = threshold[threshold={0}, value={0}, inplace=1](%19), uses = [%22.i0];
%23 : Double(10, 64, 27, 27), %24 : Long(10, 64, 27, 27) = max_pool2d[kernel_size=[3, 3], stride=[2, 2], padding=[0, 0], dilation=[1, 1], ceil_mode=0](%21), uses = [[%25.i0], []];
%26 : Double(10, 192, 27, 27), %27 : Handle = CppOp[ConvForward](%23, %4, %5), uses = [[%28.i0], []];
%28 : Double(10, 192, 27, 27) = threshold[threshold={0}, value={0}, inplace=1](%26), uses = [%29.i0];
%30 : Double(10, 192, 13, 13), %31 : Long(10, 192, 13, 13) = max_pool2d[kernel_size=[3, 3], stride=[2, 2], padding=[0, 0], dilation=[1, 1], ceil_mode=0](%28), uses = [[%32.i0], []];
%33 : Double(10, 384, 13, 13), %34 : Handle = CppOp[ConvForward](%30, %6, %7), uses = [[%35.i0], []];
%35 : Double(10, 384, 13, 13) = threshold[threshold={0}, value={0}, inplace=1](%33), uses = [%36.i0];
%37 : Double(10, 256, 13, 13), %38 : Handle = CppOp[ConvForward](%35, %8, %9), uses = [[%39.i0], []];
%39 : Double(10, 256, 13, 13) = threshold[threshold={0}, value={0}, inplace=1](%37), uses = [%40.i0];
%41 : Double(10, 256, 13, 13), %42 : Handle = CppOp[ConvForward](%39, %10, %11), uses = [[%43.i0], []];
%43 : Double(10, 256, 13, 13) = threshold[threshold={0}, value={0}, inplace=1](%41), uses = [%44.i0];
%45 : Double(10, 256, 6, 6), %46 : Long(10, 256, 6, 6) = max_pool2d[kernel_size=[3, 3], stride=[2, 2], padding=[0, 0], dilation=[1, 1], ceil_mode=0](%43), uses = [[%47.i0], []];
%47 : Double(10, 9216) = view[size=[10, 9216]](%45), uses = [%48.i0];
%49 : Double(10, 9216), %50 : Handle = ^Dropout(0.5, True, False)(%47), uses = [[%53.i1], []];
%51 : Double(9216!, 4096!) = t(%12), uses = [%53.i2];
%52 : Double(10!, 4096) = expand[size=[10, 4096]](%13), uses = [%53.i0];
%53 : Double(10, 4096) = addmm[beta={1}, alpha={1}](%52, %49, %51), uses = [%54.i0];
%54 : Double(10, 4096) = threshold[threshold={0}, value={0}, inplace=1](%53), uses = [%55.i0];
%56 : Double(10, 4096), %57 : Handle = ^Dropout(0.5, True, False)(%54), uses = [[%60.i1], []];
%58 : Double(4096!, 4096!) = t(%14), uses = [%60.i2];
%59 : Double(10!, 4096) = expand[size=[10, 4096]](%15), uses = [%60.i0];
%60 : Double(10, 4096) = addmm[beta={1}, alpha={1}](%59, %56, %58), uses = [%61.i0];
%61 : Double(10, 4096) = threshold[threshold={0}, value={0}, inplace=1](%60), uses = [%64.i1];
%62 : Double(4096!, 1000!) = t(%16), uses = [%64.i2];
%63 : Double(10!, 1000) = expand[size=[10, 1000]](%17), uses = [%64.i0];
%64 : Double(10, 1000) = addmm[beta={1}, alpha={1}](%63, %61, %62), uses = [%0.i0];
return (%64);
}

View File

@ -1,8 +0,0 @@
graph(%1 : Double(10, 10)
-------- stage 1 --------
%4 : Double(10, 10!)) {
%3 : Double(10, 10) = ^MyFn()(%1), uses = [[%0.i0, %5.i0]];
---------------- stage 1 ----------------
%5 : Double(10, 10) = mul(%3, %4), uses = [%0.i1];
return (%3, %5);
}

View File

@ -1,23 +0,0 @@
graph(%1 : Double(2, 2)
%2 : Double(2, 2)
-------- stage 1 --------
%5 : Double(2, 2)
-------- stage 2 --------
%9 : Double(2, 2!)
%10 : Double(2, 2)) {
%3 : Double(2, 2) = mul[other={2}](%2), uses = [%4.i0, %7.i1, %11.i1];
%4 : Double(2, 2) = mul(%3, %1), uses = [%0.i0];
---------------- stage 1 ----------------
%6 : Double(2, 2) = mul(%5, %1), uses = [%8.i0];
%7 : Double(2, 2) = mul(%5, %3), uses = [%0.i1];
%8 : Double(2, 2) = mul[other={2}](%6), uses = [%0.i2];
---------------- stage 2 ----------------
%11 : Double(2, 2) = mul(%9, %3), uses = [%17.i0];
%12 : Double(2, 2) = mul(%9, %5), uses = [%14.i0];
%13 : Double(2, 2) = mul[other={2}](%10), uses = [%15.i0, %16.i0];
%14 : Double(2, 2) = mul[other={2}](%12), uses = [%0.i5];
%15 : Double(2, 2) = mul(%13, %1), uses = [%17.i1];
%16 : Double(2, 2) = mul(%13, %5), uses = [%0.i4];
%18 : Double(2, 2) = CppOp[N5torch8autograd3AddE](%11, %15), uses = [[%0.i3]];
return (%4, %7, %8, %18, %16, %14);
}

View File

@ -1,10 +0,0 @@
graph(%1 : Double(3, 3)
%2 : Double(3, 3)
-------- stage 1 --------
%4 : Double(3, 3)) {
%3 : Double(3, 3) = cross[dim=-1](%1, %2), uses = [%0.i0];
---------------- stage 1 ----------------
%5 : Double(3, 3) = cross[dim=-1](%2, %4), uses = [%0.i1];
%6 : Double(3, 3) = cross[dim=-1](%4, %1), uses = [%0.i2];
return (%3, %5, %6);
}

View File

@ -1,8 +0,0 @@
graph(%1 : Double(2, 2)
%2 : Double(2)
%3 : Double(2)
%4 : Double(2)
%5 : Double(2)) {
%7 : Double(2, 2), %8 : Handle = CppOp[N5torch8autograd16BatchNormForwardE](%1, %2, %3), uses = [[%0.i0], []], scope: BatchNorm2d;
return (%7);
}

View File

@ -1,6 +0,0 @@
graph(%1 : Double(1, 3, 10, 10)
%2 : Double(8, 3, 3, 3)
%3 : Double(8)) {
%5 : Double(1, 8, 8, 8), %6 : Handle = CppOp[ConvForward](%1, %2, %3), uses = [[%0.i0], []];
return (%5);
}

View File

@ -1,12 +0,0 @@
graph(%1 : Double(3, 20)
%2 : Double(3, 20)) {
%7 : Double(6, 20) = fusion_group_0(%1, %2), uses = [[%0.i0]];
return (%7);
}
with fusion_group_0 = graph(%4 : Double(3, 20)
%5 : Double(3, 20)) {
%7 : Double(3, 20) = add[alpha={1}](%4, %5), uses = [%3.i0];
%6 : Double(3, 20) = mul(%4, %5), uses = [%3.i1];
%3 : Double(6, 20) = cat[dim=0](%7, %6), uses = [%0.i0];
return (%3);
}

View File

@ -1,6 +0,0 @@
graph(%1 : Double(20, 16, 50, 40)
%2 : Double(13, 16, 3, 3)) {
%4 : UNKNOWN_TYPE = Undefined(), uses = [%3.i2], scope: Conv2d;
%5 : Double(20, 13, 48, 38), %6 : Handle = CppOp[ConvForward](%1, %2, %4), uses = [[%0.i0], []], scope: Conv2d;
return (%5);
}

View File

@ -1,10 +0,0 @@
graph(%1 : Double(2)
%2 : Double(2)) {
%3 : Double(2) = add[alpha={1}](%1, %2), uses = [%5.i0, %5.i1, %7.i1];
%5 : Double(2) = mul(%3, %3), uses = [%7.i0];
%7 : Double(2) = mul(%5, %3), uses = [%8.i0, %16.i0];
%8 : Double(2) = tanh(%7), uses = [%10.i0, %10.i1];
%10 : Double(2) = add[alpha={1}](%8, %8), uses = [%16.i1];
%16 : Double(2) = add[alpha={1}](%7, %10), uses = [%0.i0];
return (%16);
}

View File

@ -1,4 +0,0 @@
graph(%1 : Double(2, 2)) {
%3 : Double(2, 2), %4 : Handle = ^Dropout(0.6, True, False)(%1), uses = [[%0.i0], []], scope: Dropout;
return (%3);
}

View File

@ -1,26 +0,0 @@
graph(%1 : Double(3, 10)
%2 : Double(3, 20)
%3 : Double(3, 20)
%4 : Double(80, 10)
%5 : Double(80, 20)
%6 : Double(80)
%7 : Double(80)) {
%8 : Double(10!, 80!) = Transpose[perm=[1, 0]](%4), uses = [%9.i0];
%9 : UNKNOWN_TYPE = Transpose(%8), uses = [%10.i1];
%10 : Double(3, 80) = FC(%1, %9, %6), uses = [%14.i0];
%11 : Double(20!, 80!) = Transpose[perm=[1, 0]](%5), uses = [%12.i0];
%12 : UNKNOWN_TYPE = Transpose(%11), uses = [%13.i1];
%13 : Double(3, 80) = FC(%2, %12, %7), uses = [%14.i1];
%14 : Double(3, 80) = Add(%10, %13), uses = [%15.i0];
%16 : Double(3!, 20), %17 : Double(3!, 20), %18 : Double(3!, 20), %19 : Double(3!, 20) = Split[split=[20, 20, 20, 20], axis=1](%14), uses = [[%20.i0], [%21.i0], [%22.i0], [%23.i0]];
%20 : Double(3, 20) = Sigmoid(%16), uses = [%25.i0];
%21 : Double(3, 20) = Sigmoid(%17), uses = [%24.i0];
%22 : Double(3, 20) = Tanh(%18), uses = [%25.i1];
%23 : Double(3, 20) = Sigmoid(%19), uses = [%28.i0];
%24 : Double(3, 20) = Mul(%21, %3), uses = [%26.i0];
%25 : Double(3, 20) = Mul(%20, %22), uses = [%26.i1];
%26 : Double(3, 20) = Add(%24, %25), uses = [%27.i0, %0.i1];
%27 : Double(3, 20) = Tanh(%26), uses = [%28.i1];
%28 : Double(3, 20) = Mul(%23, %27), uses = [%0.i0];
return (%28, %26);
}

View File

@ -1,7 +0,0 @@
graph(%1 : Double(4, 4)
%2 : Double(4, 4)) {
%3 : Double(4, 4) = add[alpha={1}](%1, %2), uses = [%4.i0];
%5 : Double(4!, 2), %6 : Double(4!, 2) = split[split_size=2, dim=1](%3), uses = [[%7.i0], [%7.i1]];
%7 : Double(4, 2) = mul(%5, %6), uses = [%0.i0];
return (%7);
}

View File

@ -1,16 +0,0 @@
graph(%1 : Double(4, 4)
%2 : Double(4, 4)) {
%9 : Double(4!, 2), %10 : Double(4!, 2) = split[split_size=2, dim=1](%1), uses = [[%16.i0], [%16.i2]];
%12 : Double(4!, 2), %13 : Double(4!, 2) = split[split_size=2, dim=1](%2), uses = [[%16.i1], [%16.i3]];
%17 : Double(4, 2) = fusion_group_0(%9, %12, %10, %13), uses = [[%0.i0]];
return (%17);
}
with fusion_group_0 = graph(%4 : Double(4!, 2)
%5 : Double(4!, 2)
%7 : Double(4!, 2)
%8 : Double(4!, 2)) {
%9 : Double(4, 2) = add[alpha={1}](%7, %8), uses = [%3.i1];
%6 : Double(4, 2) = add[alpha={1}](%4, %5), uses = [%3.i0];
%3 : Double(4, 2) = mul(%6, %9), uses = [%0.i0];
return (%3);
}

View File

@ -1,6 +0,0 @@
graph(%1 : Double(1)) {
%2 : Double(1) = clone(%1), uses = [%3.i0];
%3 : Double(1) = add[other={2}, alpha={1}](%2), uses = [%4.i0];
%4 : Double(1) = add[other={3}, alpha={1}](%3), uses = [%0.i0];
return (%4);
}

View File

@ -1,42 +0,0 @@
graph(%1 : Double(3, 10)
%2 : Double(3, 20)
%3 : Double(3, 20)
%4 : Double(80, 10)
%5 : Double(80, 20)
%6 : Double(80)
%7 : Double(80)) {
%8 : Double(10!, 80!) = Transpose[perm=[1, 0]](%4), uses = [%9.i0];
%9 : UNKNOWN_TYPE = Transpose(%8), uses = [%10.i1];
%10 : Double(3, 80) = FC(%1, %9, %6), uses = [%32.i0];
%11 : Double(20!, 80!) = Transpose[perm=[1, 0]](%5), uses = [%12.i0];
%12 : UNKNOWN_TYPE = Transpose(%11), uses = [%13.i1];
%13 : Double(3, 80) = FC(%2, %12, %7), uses = [%33.i0];
%36 : Double(3!, 20), %39 : Double(3!, 20), %42 : Double(3!, 20), %45 : Double(3!, 20) = Split[split=[20, 20, 20, 20], axis=1](%13), uses = [[%29.i8], [%29.i6], [%29.i4], [%29.i2]];
%35 : Double(3!, 20), %38 : Double(3!, 20), %41 : Double(3!, 20), %44 : Double(3!, 20) = Split[split=[20, 20, 20, 20], axis=1](%10), uses = [[%29.i7], [%29.i5], [%29.i3], [%29.i1]];
%30 : Double(3, 20), %31 : Double(3, 20) = fusion_group_0(%3, %44, %45, %41, %42, %38, %39, %35, %36), uses = [[%0.i0], [%0.i1]];
return (%30, %31);
}
with fusion_group_0 = graph(%13 : Double(3, 20)
%23 : Double(3!, 20)
%24 : Double(3!, 20)
%26 : Double(3!, 20)
%27 : Double(3!, 20)
%29 : Double(3!, 20)
%30 : Double(3!, 20)
%32 : Double(3!, 20)
%33 : Double(3!, 20)) {
%34 : Double(3, 20) = Add(%32, %33), uses = [%22.i0];
%31 : Double(3, 20) = Add(%29, %30), uses = [%20.i0];
%28 : Double(3, 20) = Add(%26, %27), uses = [%18.i0];
%25 : Double(3, 20) = Add(%23, %24), uses = [%16.i0];
%22 : Double(3, 20) = Sigmoid(%34), uses = [%11.i0];
%20 : Double(3, 20) = Sigmoid(%31), uses = [%14.i0];
%18 : Double(3, 20) = Tanh(%28), uses = [%11.i1];
%16 : Double(3, 20) = Sigmoid(%25), uses = [%3.i0];
%14 : Double(3, 20) = Mul(%20, %13), uses = [%8.i0];
%11 : Double(3, 20) = Mul(%22, %18), uses = [%8.i1];
%8 : Double(3, 20) = Add(%14, %11), uses = [%5.i0, %0.i1];
%5 : Double(3, 20) = Tanh(%8), uses = [%3.i1];
%3 : Double(3, 20) = Mul(%16, %5), uses = [%0.i0];
return (%3, %8);
}

View File

@ -1,42 +0,0 @@
graph(%1 : Double(3, 10)
%2 : Double(3, 20)
%3 : Double(3, 20)
%4 : Double(80, 10)
%5 : Double(80, 20)
%6 : Double(80)
%7 : Double(80)) {
%8 : Double(10!, 80!) = t(%4), uses = [%10.i2];
%9 : Double(3!, 80) = expand[size=[3, 80]](%6), uses = [%10.i0];
%10 : Double(3, 80) = addmm[beta={1}, alpha={1}](%9, %1, %8), uses = [%32.i0];
%11 : Double(20!, 80!) = t(%5), uses = [%13.i2];
%12 : Double(3!, 80) = expand[size=[3, 80]](%7), uses = [%13.i0];
%13 : Double(3, 80) = addmm[beta={1}, alpha={1}](%12, %2, %11), uses = [%37.i0];
%33 : Double(3!, 20), %34 : Double(3!, 20), %35 : Double(3!, 20), %36 : Double(3!, 20) = split[split_size=20, dim=1](%10), uses = [[%29.i7], [%29.i5], [%29.i3], [%29.i1]];
%38 : Double(3!, 20), %39 : Double(3!, 20), %40 : Double(3!, 20), %41 : Double(3!, 20) = split[split_size=20, dim=1](%13), uses = [[%29.i8], [%29.i6], [%29.i4], [%29.i2]];
%30 : Double(3, 20), %31 : Double(3, 20) = fusion_group_0(%3, %36, %41, %35, %40, %34, %39, %33, %38), uses = [[%0.i0], [%0.i1]];
return (%30, %31);
}
with fusion_group_0 = graph(%13 : Double(3, 20)
%23 : Double(3!, 20)
%24 : Double(3!, 20)
%26 : Double(3!, 20)
%27 : Double(3!, 20)
%29 : Double(3!, 20)
%30 : Double(3!, 20)
%32 : Double(3!, 20)
%33 : Double(3!, 20)) {
%34 : Double(3, 20) = add[alpha={1}](%32, %33), uses = [%22.i0];
%31 : Double(3, 20) = add[alpha={1}](%29, %30), uses = [%20.i0];
%28 : Double(3, 20) = add[alpha={1}](%26, %27), uses = [%18.i0];
%25 : Double(3, 20) = add[alpha={1}](%23, %24), uses = [%16.i0];
%22 : Double(3, 20) = sigmoid(%34), uses = [%11.i0];
%20 : Double(3, 20) = sigmoid(%31), uses = [%14.i0];
%18 : Double(3, 20) = tanh(%28), uses = [%11.i1];
%16 : Double(3, 20) = sigmoid(%25), uses = [%3.i0];
%14 : Double(3, 20) = mul(%20, %13), uses = [%8.i0];
%11 : Double(3, 20) = mul(%22, %18), uses = [%8.i1];
%8 : Double(3, 20) = add[alpha={1}](%14, %11), uses = [%5.i0, %0.i1];
%5 : Double(3, 20) = tanh(%8), uses = [%3.i1];
%3 : Double(3, 20) = mul(%16, %5), uses = [%0.i0];
return (%3, %8);
}

View File

@ -1,9 +0,0 @@
graph(%1 : UNKNOWN_TYPE
%2 : UNKNOWN_TYPE) {
%3 : Double(1) = add[alpha={1}](%1, %2), uses = [%4.i1];
%4 : Double(1) = mul(%1, %3), uses = [%5.i0];
%5 : Double(1) = tanh(%4), uses = [%6.i0];
%6 : Double(1) = sigmoid(%5), uses = [%0.i0];
%7 : UNKNOWN_TYPE = TensorTest[a= 1 1 1 1 [ CPUDoubleTensor{2,2} ]](), uses = [];
return (%6);
}

View File

@ -1,8 +0,0 @@
graph(%1 : Double(1)
%2 : Double(1)) {
%3 : Double(1) = add[alpha={1}](%1, %2), uses = [%4.i1];
%4 : Double(1) = mul(%1, %3), uses = [%5.i0], scope: Foo;
%5 : Double(1) = tanh(%4), uses = [%6.i0], scope: Foo/Bar;
%6 : Double(1) = sigmoid(%5), uses = [%0.i0], scope: Foo;
return (%6);
}

View File

@ -1,9 +0,0 @@
graph(%1 : Double(1, 3, 227, 227)
%2 : Double(64, 3, 11, 11)
%3 : Double(64)) {
%5 : UNKNOWN_TYPE = Conv[kernel_shape=[11, 11], strides=[4, 4], pads=[2, 2, 2, 2], dilations=[1, 1], group=1](%1, %2), uses = [[%6.i0]], scope: Net/Sequential[features]/Conv2d[0];
%6 : Double(1, 64, 56, 56) = Add[broadcast=1, axis=1](%5, %3), uses = [%7.i0], scope: Net/Sequential[features]/Conv2d[0];
%7 : Double(1, 64, 56, 56) = Relu(%6), uses = [%8.i0], scope: Net/Sequential[features]/ReLU[1];
%8 : Double(1, 64, 27, 27) = MaxPool[kernel_shape=[3, 3], pads=[0, 0], strides=[2, 2]](%7), uses = [%0.i0], scope: Net/Sequential[features]/MaxPool2d[2];
return (%8);
}

View File

@ -1,5 +0,0 @@
graph(%1 : Double(2)) {
%2 : Double(2) = Softmax[axis=0](%1), uses = [%3.i0], scope: Net;
%3 : Double(2) = Log(%2), uses = [%0.i0], scope: Net;
return (%3);
}

View File

@ -1,8 +0,0 @@
graph(%1 : Double(1)
%2 : Double(1)) {
%3 : Double(1) = add[alpha={1}](%1, %2), uses = [%4.i1];
%4 : Double(1) = mul(%1, %3), uses = [%5.i0];
%5 : Double(1) = tanh(%4), uses = [%6.i0];
%6 : Double(1) = sigmoid(%5), uses = [%0.i0];
return (%6);
}

View File

@ -20,18 +20,11 @@ fi
pushd "$(dirname "$0")"
echo "Running JIT tests"
$PYCMD test_jit.py $@
echo "Running torch tests"
$PYCMD test_torch.py $@
echo "Running autograd tests"
$PYCMD test_autograd.py $@
$PYCMD test_potrf.py $@
echo "Running torch.distributions tests"
$PYCMD test_distributions.py $@
echo "Running sparse tests"
$PYCMD test_sparse.py $@
@ -80,31 +73,16 @@ distributed_set_up
BACKEND=tcp WORLD_SIZE=3 $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the TCP backend with file init_method"
distributed_set_up
BACKEND=tcp WORLD_SIZE=3 INIT_METHOD='file://'$TEMP_DIR'/shared_init_file' $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the Gloo backend"
distributed_set_up
BACKEND=gloo WORLD_SIZE=3 $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the Gloo backend with file init_method"
distributed_set_up
BACKEND=gloo WORLD_SIZE=3 INIT_METHOD='file://'$TEMP_DIR'/shared_init_file' $PYCMD ./test_distributed.py
distributed_tear_down
if [ -x "$(command -v mpiexec)" ]; then
echo "Running distributed tests for the MPI backend"
distributed_set_up
BACKEND=mpi mpiexec -n 3 $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the MPI backend with file init_method"
distributed_set_up
BACKEND=mpi INIT_METHOD='file://'$TEMP_DIR'/shared_init_file' mpiexec -n 3 $PYCMD ./test_distributed.py
distributed_tear_down
else
echo "Skipping MPI backend tests (MPI not found)"
fi

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
import math
import tempfile
import re
import unittest
from itertools import repeat
@ -17,11 +16,6 @@ if not torch.cuda.is_available():
TestCase = object # noqa: F811
HAS_CUDA = False
HAS_MAGMA = HAS_CUDA
if HAS_CUDA:
torch.ones(1).cuda() # has_magma shows up after cuda is initialized
HAS_MAGMA = torch.cuda.has_magma
def is_floating(t):
return type(t) in [torch.FloatTensor, torch.DoubleTensor,
@ -59,17 +53,6 @@ def make_tensor(t, *sizes):
return t(*sizes).copy_(torch.randn(*sizes))
def make_sparse_tensor(t, n, *sizes):
assert t.is_sparse
tensor = t()
i = tensor._indices()
i = i.new(len(sizes), n).copy_(
torch.cat([torch.LongTensor(1, n).random_(s) for s in sizes], 0))
v = tensor._values()
v = v.new(n).copy_(torch.randn(n))
return t(i, v, torch.Size(sizes))
def small_2d(t):
return make_tensor(t, S, S)
@ -97,10 +80,6 @@ def medium_2d(t):
return make_tensor(t, M, M)
def medium_2d_expanded(t):
return t(1).expand(M, M)
def medium_2d_scaled(t, scale=10):
return make_tensor(t, M, M).mul(scale)
@ -138,22 +117,11 @@ def large_2d_lapack(t):
return t(1000, 1000).normal_()
def long_type(t):
return torch.cuda.LongTensor if 'cuda' in t.__module__ else torch.LongTensor
def new_t(*sizes):
def tmp(t):
return t(*sizes).copy_(torch.randn(*sizes))
return tmp
# Content of each tuple:
# - function name
# - constructor for the tensor, signature: fn(tensor_type) -> tensor
# - constructor for the arguments, signature: fn(tensor_type) -> list
# - postfix name for the test (must be unique for a given function) (default='')
# - tensor types to use (default=types)
# - disable inplace test, if set to True, no inplace test will be done (default=False)
tests = [
('add', small_3d, lambda t: [number(3.14, 3, t)]),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
@ -165,11 +133,6 @@ tests = [
('div', small_3d, lambda t: [number(3.14, 3, t)],),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
('pow', small_3d, lambda t: [number(1., 1, t)], 'pow1', float_types),
('pow', small_3d, lambda t: [number(2., 2, t)], 'pow2', float_types),
('pow', small_3d, lambda t: [number(3., 3, t)], 'pow3', float_types),
('pow', small_3d, lambda t: [number(-1., -1, t)], 'pow-1', float_types),
('pow', small_3d, lambda t: [number(-2., -2, t)], 'pow-2', float_types),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
@ -270,8 +233,6 @@ tests = [
('norm', small_3d, lambda t: [3, -2], '3_norm_neg_dim'),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
('put_', new_t(2, 5, 3), lambda t: [long_type(t)([[0], [-2]]), t([[3], [4]])],),
('put_', new_t(2, 2), lambda t: [long_type(t)([[1], [-3]]), t([[1], [2]]), True], 'accumulate'),
('prod', small_2d_oneish, lambda t: [],),
('prod', small_3d, lambda t: [1], 'dim'),
('prod', small_3d, lambda t: [-1], 'neg_dim'),
@ -297,7 +258,6 @@ tests = [
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
('squeeze', new_t(1, 2, 1, 4), lambda t: [-2], 'neg_dim'),
('t', new_t(1, 2), lambda t: [],),
('take', new_t(3, 4), lambda t: [long_type(t)([[0], [-2]])],),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
('to_list', small_3d, lambda t: [],),
@ -306,11 +266,9 @@ tests = [
('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'),
('trace', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [],),
('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
('tril', medium_2d, lambda t: [2], 'positive'),
('tril', medium_2d, lambda t: [-2], 'negative'),
('triu', medium_2d, lambda t: [],),
('triu', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
('triu', medium_2d, lambda t: [2], 'positive'),
('triu', medium_2d, lambda t: [-2], 'negative'),
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
@ -319,7 +277,6 @@ tests = [
('view_as', small_3d, lambda t: [t(100, 10)],),
('zero', small_3d, lambda t: [],),
('zeros', small_3d, lambda t: [1, 2, 3, 4],),
('eye', small_2d, lambda t: [3, 4],),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
@ -365,8 +322,6 @@ simple_pointwise_float = [
'atan',
'cos',
'cosh',
'erf',
'erfinv',
'exp',
'reciprocal',
'floor',
@ -397,24 +352,18 @@ def get_cycles_per_ms():
return _cycles_per_ms
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5, force_gpu_half=False):
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
def tmp(self):
cpu_tensor = tensor_constructor(t)
type_map = {}
if force_gpu_half:
type_map = {
'torch.FloatTensor': 'torch.cuda.HalfTensor',
'torch.DoubleTensor': 'torch.cuda.HalfTensor',
}
gpu_tensor = to_gpu(cpu_tensor, type_map)
gpu_tensor = to_gpu(cpu_tensor)
cpu_args = arg_constructor(t)
gpu_args = [to_gpu(arg, type_map) for arg in cpu_args]
gpu_args = [to_gpu(arg) for arg in cpu_args]
cpu_result = getattr(cpu_tensor, fn)(*cpu_args)
try:
gpu_result = getattr(gpu_tensor, fn)(*gpu_args)
except RuntimeError as e:
reason = e.args[0]
if 'only supports floating-point types' in reason or 'unimplemented data type' in reason:
if 'unimplemented data type' in reason:
raise unittest.SkipTest('unimplemented data type')
raise
except AttributeError as e:
@ -506,9 +455,6 @@ class TestCuda(TestCase):
x = torch.randn(5, 5).cuda(1)
self.assertEqual(x.int().get_device(), 1)
def test_neg(self):
TestTorch._test_neg(self, lambda t: t.cuda())
def _test_broadcast(self, input):
if torch.cuda.device_count() < 2:
raise unittest.SkipTest("only one GPU detected")
@ -523,55 +469,32 @@ class TestCuda(TestCase):
def test_broadcast_gpu(self):
self._test_broadcast(torch.randn(5, 5))
@staticmethod
def _test_broadcast_coalesced(self, tensors, buffer_size):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_broadcast_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
b_tensors = [comm.broadcast(t, (0, 1)) for t in tensors]
for (_, bt), t in zip(b_tensors, tensors):
self.assertEqual(bt.get_device(), 1)
self.assertEqual(bt, t)
self.assertIsInstance(bt, type(t))
bc_tensors = comm.broadcast_coalesced(tensors, (0, 1), buffer_size=buffer_size)
bc_tensors = comm.broadcast_coalesced(tensors, (0, 1), buffer_size=num_bytes * 5 // 2)
bc_tensors_t = list(zip(*bc_tensors))
self.assertEqual(b_tensors, bc_tensors_t)
for (_, bt), (_, bct) in zip(b_tensors, bc_tensors_t):
self.assertEqual(bt.get_device(), bct.get_device())
self.assertIsInstance(bct, type(bt))
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_broadcast_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 1, 2, 3),
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 10, 2, 3),
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 5, 2, 3),
make_sparse_tensor(torch.cuda.sparse.LongTensor, 7, 3, 3),
make_sparse_tensor(torch.cuda.sparse.FloatTensor, 2, 2, 3),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
make_sparse_tensor(torch.cuda.sparse.LongTensor, 3, 2, 7),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_broadcast_coalesced_dense_only(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add(self):
x = torch.randn(5, 5)
@ -582,8 +505,18 @@ class TestCuda(TestCase):
self.assertEqual(result.get_device(), 0)
self.assertEqual(result.cpu(), x + y)
@staticmethod
def _test_reduce_add_coalesced(self, tensors, buffer_size):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
dup_tensors = [tensors, list(map(lambda t: t.cuda(1), tensors))]
r_tensors = list(map(comm.reduce_add, zip(*dup_tensors)))
@ -592,46 +525,12 @@ class TestCuda(TestCase):
self.assertEqual(r, t * 2)
self.assertIsInstance(r, type(t))
rc_tensors = comm.reduce_add_coalesced(dup_tensors, buffer_size=buffer_size)
rc_tensors = comm.reduce_add_coalesced(dup_tensors, buffer_size=num_bytes * 5 // 2)
self.assertEqual(r_tensors, rc_tensors)
for r, rc in zip(r_tensors, rc_tensors):
self.assertEqual(rc.get_device(), r.get_device())
self.assertIsInstance(rc, type(r))
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 1, 2, 3),
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 10, 2, 3),
make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 5, 2, 3),
make_sparse_tensor(torch.cuda.sparse.LongTensor, 7, 3, 3),
make_sparse_tensor(torch.cuda.sparse.FloatTensor, 2, 2, 3),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
make_sparse_tensor(torch.cuda.sparse.LongTensor, 3, 2, 7),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add_coalesced_dense_only(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2)
def _test_scatter(self, input, chunk_sizes=None, dim=0):
if torch.cuda.device_count() < 2:
raise unittest.SkipTest("only one GPU detected")
@ -732,38 +631,6 @@ class TestCuda(TestCase):
z = torch.cat([x, y], 0)
self.assertEqual(z.get_device(), x.get_device())
def test_cat(self):
SIZE = 10
for dim in range(-3, 3):
pos_dim = dim if dim >= 0 else 3 + dim
x = torch.rand(13, SIZE, SIZE).transpose(0, pos_dim).cuda()
y = torch.rand(17, SIZE, SIZE).transpose(0, pos_dim).cuda()
z = torch.rand(19, SIZE, SIZE).transpose(0, pos_dim).cuda()
res1 = torch.cat((x, y, z), dim)
self.assertEqual(res1.narrow(pos_dim, 0, 13), x, 0)
self.assertEqual(res1.narrow(pos_dim, 13, 17), y, 0)
self.assertEqual(res1.narrow(pos_dim, 30, 19), z, 0)
x = torch.randn(20, SIZE, SIZE).cuda()
self.assertEqual(torch.cat(torch.split(x, 7)), x)
self.assertEqual(torch.cat(torch.chunk(x, 7)), x)
y = torch.randn(1, SIZE, SIZE).cuda()
z = torch.cat([x, y])
self.assertEqual(z.size(), (21, SIZE, SIZE))
def test_cat_bad_input_sizes(self):
x = torch.randn(2, 1).cuda()
y = torch.randn(2, 1, 1).cuda()
z = torch.randn(2, 1, 1).cuda()
self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z]))
x = torch.randn(2, 1, 2).cuda()
y = torch.randn(2, 1, 1).cuda()
z = torch.randn(2, 2, 1).cuda()
self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z], dim=1))
def test_serialization(self):
x = torch.randn(4, 4).cuda()
with tempfile.NamedTemporaryFile() as f:
@ -930,11 +797,6 @@ class TestCuda(TestCase):
tmp3 = torch.cuda.FloatTensor(t.size())
self.assertEqual(tmp3.data_ptr(), ptr[0], 'allocation not re-used')
def test_noncontiguous_pinned_memory(self):
# See issue #3266
x = torch.arange(0, 10).view((2, 5))
self.assertEqual(x.t(), x.t().pin_memory())
def test_caching_pinned_memory(self):
cycles_per_ms = get_cycles_per_ms()
@ -986,9 +848,6 @@ class TestCuda(TestCase):
def test_broadcast(self):
TestTorch._test_broadcast(self, lambda t: t.cuda())
def test_contiguous(self):
TestTorch._test_contiguous(self, lambda t: t.cuda())
def test_broadcast_fallback(self):
TestTorch._test_broadcast_fallback(self, lambda t: t.cuda())
@ -1010,9 +869,6 @@ class TestCuda(TestCase):
def test_btrisolve(self):
TestTorch._test_btrisolve(self, lambda t: t.cuda())
def test_dim_reduction(self):
TestTorch._test_dim_reduction(self, lambda t: t.cuda())
def test_tensor_gather(self):
TestTorch._test_gather(self, lambda t: t.cuda(), False)
@ -1025,69 +881,6 @@ class TestCuda(TestCase):
def test_tensor_scatterFill(self):
TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False)
def test_var(self):
cpu_tensor = torch.randn(2, 3, 3)
gpu_tensor = cpu_tensor.cuda()
self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
self.assertEqual(gpu_tensor.var(1), cpu_tensor.var(1))
self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
self.assertEqual(gpu_tensor.std(), cpu_tensor.std())
self.assertEqual(gpu_tensor.std(1), cpu_tensor.std(1))
self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
cpu_tensor = torch.randn(100)
gpu_tensor = cpu_tensor.cuda()
self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
def test_var_unbiased(self):
tensor = torch.randn(100).cuda()
self.assertEqual(tensor.var(0), tensor.var(0, unbiased=True))
self.assertEqual(tensor.var(), tensor.var(unbiased=True))
self.assertEqual(tensor.var(unbiased=False), tensor.var(0, unbiased=False)[0])
tensor = torch.FloatTensor([1.0, 2.0]).cuda()
self.assertEqual(tensor.var(unbiased=True), 0.5)
self.assertEqual(tensor.var(unbiased=False), 0.25)
tensor = torch.randn(100).cuda()
self.assertEqual(tensor.std(0), tensor.std(0, unbiased=True))
self.assertEqual(tensor.std(), tensor.std(unbiased=True))
self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0])
def test_var_large_input(self):
# Large, not-nice input
tensor_cpu = torch.randn(2 * 32 * 1024 + 1, 2, 67)
tensor_cuda = tensor_cpu.cuda()
self.assertEqual(tensor_cpu.var(2), tensor_cuda.var(2).cpu())
def test_var_stability(self):
tensor = torch.FloatTensor([2281.5, 2281.25]).cuda()
# Stability for inner dim
self.assertEqual(tensor.var(0)[0], 0.03125)
# General stability
self.assertEqual(tensor.var(), 0.03125)
# Stability for outer dimensions
tensor = tensor.unsqueeze(1)
self.assertEqual(tensor.var(0)[0], 0.03125)
@unittest.skipIf(not HAS_MAGMA, "no MAGMA library detected")
def test_symeig(self):
# Small case
tensor = torch.randn(3, 3).cuda()
tensor = torch.mm(tensor, tensor.t())
eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
# Large case
tensor = torch.randn(257, 257).cuda()
tensor = torch.mm(tensor, tensor.t())
eigval, eigvec = torch.symeig(tensor, eigenvectors=True)
self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t()))
def test_arange(self):
for t in ['IntTensor', 'LongTensor', 'FloatTensor', 'DoubleTensor']:
a = torch.cuda.__dict__[t]()
@ -1096,17 +889,6 @@ class TestCuda(TestCase):
torch.arange(0, 10, out=b)
self.assertEqual(a, b.cuda())
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_get_set_rng_state_all(self):
states = torch.cuda.get_rng_state_all()
before0 = torch.cuda.FloatTensor(100, device=0).normal_()
before1 = torch.cuda.FloatTensor(100, device=1).normal_()
torch.cuda.set_rng_state_all(states)
after0 = torch.cuda.FloatTensor(100, device=0).normal_()
after1 = torch.cuda.FloatTensor(100, device=1).normal_()
self.assertEqual(before0, after0, 0)
self.assertEqual(before1, after1, 0)
def test_nvtx(self):
# Just making sure we can see the symbols
torch.cuda.nvtx.range_push("foo")
@ -1119,27 +901,18 @@ if HAS_CUDA:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
# Default values
desc = ''
type_subset = types
no_inplace = False
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
elif len(decl) == 6:
name, constr, arg_constr, desc, type_subset, no_inplace = decl
if t not in type_subset:
continue
if t not in type_subset:
continue
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace and no_inplace:
continue
if inplace:
name_inner = name + '_'
else:
@ -1156,15 +929,7 @@ if HAS_CUDA:
test_name += '_' + desc
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda,
test_name,
compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
if t == torch.FloatTensor:
assert not hasattr(TestCuda, test_name + '_gpu_half'), "Duplicated test name: " + test_name
setattr(TestCuda,
test_name + '_gpu_half',
compare_cpu_gpu(constr, arg_constr, name_inner, t,
precision, force_gpu_half=True))
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
if __name__ == '__main__':

View File

@ -1,46 +1,13 @@
import math
import sys
import errno
import os
import ctypes
import signal
import torch
import time
import traceback
import unittest
from torch import multiprocessing
from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset
from torch.utils.data.dataset import random_split
from torch.utils.data.dataloader import default_collate, ExceptionWrapper
from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS
from common import TestCase, run_tests, TEST_NUMPY
from common_nn import TEST_CUDA
JOIN_TIMEOUT = 17.0 if IS_WINDOWS else 4.5
class TestDatasetRandomSplit(TestCase):
def test_lengths_must_equal_datset_size(self):
with self.assertRaises(ValueError):
random_split([1, 2, 3, 4], [1, 2])
def test_splits_have_correct_size(self):
splits = random_split([1, 2, 3, 4, 5, 6], [2, 4])
self.assertEqual(len(splits), 2)
self.assertEqual(len(splits[0]), 2)
self.assertEqual(len(splits[1]), 4)
def test_splits_are_mutually_exclusive(self):
data = [5, 2, 3, 4, 1, 6]
splits = random_split(data, [2, 4])
all_values = []
all_values.extend(list(splits[0]))
all_values.extend(list(splits[1]))
data.sort()
all_values.sort()
self.assertListEqual(data, all_values)
class TestTensorDataset(TestCase):
def test_len(self):
@ -95,56 +62,6 @@ class TestConcatDataset(TestCase):
# this one goes to 11
result[11]
def test_add_dataset(self):
d1 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7))
d2 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7))
d3 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7))
result = d1 + d2 + d3
self.assertEqual(21, len(result))
self.assertEqual(0, (d1[0][0] - result[0][0]).abs().sum())
self.assertEqual(0, (d2[0][0] - result[7][0]).abs().sum())
self.assertEqual(0, (d3[0][0] - result[14][0]).abs().sum())
# Stores the first encountered exception in .exception.
# Inspired by https://stackoverflow.com/a/33599967
class ErrorTrackingProcess(multiprocessing.Process):
def __init__(self, *args, **kwargs):
super(ErrorTrackingProcess, self).__init__(*args, **kwargs)
self._pconn, self._cconn = multiprocessing.Pipe()
self._exception = None
def run(self):
# Disable stderr printing from os level, and make workers not printing
# to stderr.
# Can't use sys.stderr.close, otherwise Python `raise` will error with
# ValueError: I/O operation on closed file.
os.close(sys.stderr.fileno())
try:
super(ErrorTrackingProcess, self).run()
self._cconn.send(None)
except Exception as e:
self._cconn.send(ExceptionWrapper(sys.exc_info()))
raise
@property
def exception(self):
if self._pconn.poll():
self._exception = self._pconn.recv()
if self._exception is None:
return None
else:
return self._exception.exc_type(self._exception.exc_msg)
# ESRCH means that os.kill can't finds alive proc
def send_signal(self, signum, ignore_ESRCH=False):
try:
os.kill(self.pid, signum)
except OSError as e:
if not ignore_ESRCH or e.errno != errno.ESRCH:
raise
class ErrorDataset(Dataset):
@ -155,84 +72,6 @@ class ErrorDataset(Dataset):
return self.size
class SegfaultDataset(Dataset):
def __init__(self, size):
self.size = size
def __getitem__(self, idx):
return ctypes.string_at(0)
def __len__(self):
return self.size
class SleepDataset(Dataset):
def __init__(self, size, sleep_sec):
self.size = size
self.sleep_sec = sleep_sec
def __getitem__(self, idx):
time.sleep(self.sleep_sec)
return idx
def __len__(self):
return self.size
class SeedDataset(Dataset):
def __init__(self, size):
self.size = size
def __getitem__(self, idx):
return torch.initial_seed()
def __len__(self):
return self.size
# Inspired by https://stackoverflow.com/a/26703365
# This will ensure that each worker at least processes one data
class SynchronizedSeedDataset(Dataset):
def __init__(self, size, num_workers):
assert size >= num_workers
self.count = multiprocessing.Value('i', 0)
self.barrier = multiprocessing.Semaphore(0)
self.num_workers = num_workers
self.size = size
def __getitem__(self, idx):
self.count.value += 1
if self.count.value == self.num_workers:
self.barrier.release()
self.barrier.acquire()
self.barrier.release()
return torch.initial_seed()
def __len__(self):
return self.size
def _test_timeout():
dataset = SleepDataset(10, 10)
dataloader = DataLoader(dataset, batch_size=2, num_workers=2, timeout=1)
_ = next(iter(dataloader))
def _test_segfault():
dataset = SegfaultDataset(10)
dataloader = DataLoader(dataset, batch_size=2, num_workers=2)
_ = next(iter(dataloader))
# test custom init function
def init_fn(worker_id):
torch.manual_seed(12345)
class TestDataLoader(TestCase):
def setUp(self):
@ -299,62 +138,6 @@ class TestDataLoader(TestCase):
self.assertTrue(input.is_pinned())
self.assertTrue(target.is_pinned())
def test_multiple_dataloaders(self):
loader1_it = iter(DataLoader(self.dataset, num_workers=1))
loader2_it = iter(DataLoader(self.dataset, num_workers=2))
next(loader1_it)
next(loader1_it)
next(loader2_it)
next(loader2_it)
next(loader1_it)
next(loader2_it)
@unittest.skipIf(True, "flaky test")
def test_segfault(self):
p = ErrorTrackingProcess(target=_test_segfault)
p.start()
p.join(JOIN_TIMEOUT)
try:
self.assertFalse(p.is_alive())
self.assertNotEqual(p.exitcode, 0)
if IS_WINDOWS:
self.assertIsInstance(p.exception, OSError)
self.assertRegex(str(p.exception), r'access violation reading ')
else:
self.assertIsInstance(p.exception, RuntimeError)
self.assertRegex(str(p.exception), r'DataLoader worker \(pid \d+\) is killed by signal: ')
finally:
p.terminate()
def test_timeout(self):
p = ErrorTrackingProcess(target=_test_timeout)
p.start()
p.join(JOIN_TIMEOUT)
try:
self.assertFalse(p.is_alive())
self.assertNotEqual(p.exitcode, 0)
self.assertIsInstance(p.exception, RuntimeError)
self.assertRegex(str(p.exception), r'DataLoader timed out after \d+ seconds')
finally:
p.terminate()
def test_worker_seed(self):
num_workers = 6
dataset = SynchronizedSeedDataset(num_workers, num_workers)
dataloader = DataLoader(dataset, batch_size=1, num_workers=num_workers)
seeds = set()
for batch in dataloader:
seeds.add(batch[0])
self.assertEqual(len(seeds), num_workers)
def test_worker_init_fn(self):
dataset = SeedDataset(4)
dataloader = DataLoader(dataset, batch_size=2, num_workers=2,
worker_init_fn=init_fn)
for batch in dataloader:
self.assertEqual(12345, batch[0])
self.assertEqual(12345, batch[1])
def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))
@ -430,17 +213,17 @@ class TestDataLoader(TestCase):
"check that workers exit even if the iterator is not exhausted"
loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True))
workers = loader.workers
worker_manager_thread = loader.worker_manager_thread
pin_thread = loader.pin_thread
for i, sample in enumerate(loader):
if i == 3:
break
del loader
for w in workers:
w.join(JOIN_TIMEOUT)
w.join(1.0) # timeout of one second
self.assertFalse(w.is_alive(), 'subprocess not terminated')
self.assertEqual(w.exitcode, 0)
worker_manager_thread.join(JOIN_TIMEOUT)
self.assertFalse(worker_manager_thread.is_alive())
pin_thread.join(1.0)
self.assertFalse(pin_thread.is_alive())
def test_len(self):
def check_len(dl, expected):
@ -483,23 +266,6 @@ class TestDataLoader(TestCase):
batch = next(iter(loader))
self.assertIsInstance(batch, tt)
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
def test_default_colate_bad_numpy_types(self):
import numpy as np
# Should be a no-op
arr = np.array(['a', 'b', 'c'])
default_collate(arr)
arr = np.array([[['a', 'b', 'c']]])
self.assertRaises(TypeError, lambda: default_collate(arr))
arr = np.array([object(), object(), object()])
self.assertRaises(TypeError, lambda: default_collate(arr))
arr = np.array([[[object(), object(), object()]]])
self.assertRaises(TypeError, lambda: default_collate(arr))
class StringDataset(Dataset):
def __init__(self):

View File

@ -8,13 +8,11 @@ from functools import wraps, reduce
from contextlib import contextmanager
import torch
import torch.cuda
import torch.distributed as dist
from common import TestCase
BACKEND = os.environ['BACKEND']
TEMP_DIR = os.environ['TEMP_DIR']
INIT_METHOD = os.getenv('INIT_METHOD', 'env://')
MASTER_PORT = '29500'
MASTER_ADDR = '127.0.0.1'
@ -23,20 +21,6 @@ if not dist.is_available():
print('Distributed not available, skipping tests')
sys.exit(0)
SKIP_IF_NO_CUDA_EXIT_CODE = 75
def skip_if_no_cuda_distributed(func):
func.skip_if_no_cuda_distributed = True
@wraps(func)
def wrapper(*args, **kwargs):
if not torch.cuda.is_available():
sys.exit(SKIP_IF_NO_CUDA_EXIT_CODE)
return func(*args, **kwargs)
return wrapper
@contextmanager
def _lock():
@ -173,9 +157,8 @@ class _DistTestBase(object):
if src == rank:
continue
tensor = _build_tensor(10, value=-1)
sender = dist.recv(tensor)
self.assertTrue(tensor.eq(sender).all())
recv_ranks.add(sender)
dist.recv(tensor)
recv_ranks.add(tensor.resize_(1)[0])
self.assertEqual(len(recv_ranks), dist.get_world_size() - 1)
self._barrier()
@ -244,7 +227,6 @@ class _DistTestBase(object):
self._test_broadcast_helper(group, group_id, rank)
@unittest.skipIf(BACKEND != 'gloo', "Only Gloo backend supports CUDA allReduce")
@skip_if_no_cuda_distributed
def test_broadcast_cuda(self):
group, group_id, rank = self._init_global_test()
self._test_broadcast_helper(group, group_id, rank, True)
@ -350,7 +332,6 @@ class _DistTestBase(object):
)
@unittest.skipIf(BACKEND != 'gloo', "Only Gloo backend supports CUDA allReduce")
@skip_if_no_cuda_distributed
def test_all_reduce_sum_cuda(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
@ -505,7 +486,7 @@ if BACKEND == 'tcp' or BACKEND == 'gloo':
@wraps(fn)
def wrapper(self):
if self.rank == self.MANAGER_PROCESS_RANK:
self._join_and_reduce(fn)
self._join_and_reduce()
else:
fn(self)
return wrapper
@ -542,7 +523,7 @@ if BACKEND == 'tcp' or BACKEND == 'gloo':
def _run(self, rank):
self.rank = rank
try:
dist.init_process_group(init_method=INIT_METHOD, backend=BACKEND, world_size=int(WORLD_SIZE))
dist.init_process_group(backend=BACKEND)
except RuntimeError as e:
if 'recompile' in e.args[0]:
sys.exit(0)
@ -551,25 +532,13 @@ if BACKEND == 'tcp' or BACKEND == 'gloo':
getattr(self, self.id().split(".")[2])()
sys.exit(0)
def _join_and_reduce(self, fn):
skip_ok = getattr(fn, "skip_if_no_cuda_distributed", False)
def _join_and_reduce(self):
for p in self.processes:
p.join(self.JOIN_TIMEOUT)
if not skip_ok:
self.assertEqual(p.exitcode, 0)
if skip_ok:
first_process = self.processes[0]
# do this first so we don't give an error message about mismatched exit codes if the first isn't valid
assert first_process.exitcode == 0 or first_process.exitcode == SKIP_IF_NO_CUDA_EXIT_CODE
for p in self.processes:
self.assertEqual(p.exitcode, first_process.exitcode)
if first_process.exitcode == SKIP_IF_NO_CUDA_EXIT_CODE:
raise unittest.SkipTest("cuda is not available")
self.assertEqual(p.exitcode, 0)
elif BACKEND == 'mpi':
dist.init_process_group(init_method=INIT_METHOD, backend='mpi')
dist.init_process_group(backend='mpi')
class TestMPI(TestCase, _DistTestBase):
pass

View File

@ -1,94 +0,0 @@
from common import TestCase, run_tests
import math
import torch
from torch.autograd import Variable, gradcheck
from torch.distributions import Bernoulli, Categorical, Normal
class TestDistributions(TestCase):
def _gradcheck_log_prob(self, dist_ctor, ctor_params):
# performs gradient checks on log_prob
distribution = dist_ctor(*ctor_params)
s = distribution.sample()
self.assertEqual(s.size(), distribution.log_prob(s).size())
def apply_fn(*params):
return dist_ctor(*params).log_prob(s)
gradcheck(apply_fn, ctor_params, raise_exception=True)
def _check_log_prob(self, dist, asset_fn):
# checks that the log_prob matches a reference function
s = dist.sample()
log_probs = dist.log_prob(s)
for i, (val, log_prob) in enumerate(zip(s.data.view(-1), log_probs.data.view(-1))):
asset_fn(i, val, log_prob)
def test_bernoulli(self):
p = Variable(torch.Tensor([0.7, 0.2, 0.4]), requires_grad=True)
r = Variable(torch.Tensor([0.3]), requires_grad=True)
self.assertEqual(Bernoulli(p).sample_n(8).size(), (8, 3))
self.assertEqual(Bernoulli(r).sample_n(8).size(), (8, 1))
self.assertEqual(Bernoulli(r).sample().size(), (1,))
self._gradcheck_log_prob(Bernoulli, (p,))
def ref_log_prob(idx, val, log_prob):
prob = p.data[idx]
self.assertEqual(log_prob, math.log(prob if val else 1 - prob))
self._check_log_prob(Bernoulli(p), ref_log_prob)
def test_bernoulli_3d(self):
p = Variable(torch.Tensor(2, 3, 5).fill_(0.5), requires_grad=True)
self.assertEqual(Bernoulli(p).sample().size(), (2, 3, 5))
self.assertEqual(Bernoulli(p).sample_n(2).size(), (2, 2, 3, 5))
def test_multinomial_1d(self):
p = Variable(torch.Tensor([0.1, 0.2, 0.3]), requires_grad=True)
# TODO: this should return a 0-dim tensor once we have Scalar support
self.assertEqual(Categorical(p).sample().size(), (1,))
self.assertEqual(Categorical(p).sample_n(1).size(), (1, 1))
self._gradcheck_log_prob(Categorical, (p,))
def test_multinomial_2d(self):
probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]]
p = Variable(torch.Tensor(probabilities), requires_grad=True)
self.assertEqual(Categorical(p).sample().size(), (2,))
self.assertEqual(Categorical(p).sample_n(6).size(), (6, 2))
self._gradcheck_log_prob(Categorical, (p,))
def ref_log_prob(idx, val, log_prob):
sample_prob = p.data[idx][val] / p.data[idx].sum()
self.assertEqual(log_prob, math.log(sample_prob))
self._check_log_prob(Categorical(p), ref_log_prob)
def test_normal(self):
mean = Variable(torch.randn(5, 5), requires_grad=True)
std = Variable(torch.randn(5, 5).abs(), requires_grad=True)
mean_1d = Variable(torch.randn(1), requires_grad=True)
std_1d = Variable(torch.randn(1), requires_grad=True)
self.assertEqual(Normal(mean, std).sample().size(), (5, 5))
self.assertEqual(Normal(mean, std).sample_n(7).size(), (7, 5, 5))
self.assertEqual(Normal(mean_1d, std_1d).sample_n(1).size(), (1, 1))
self.assertEqual(Normal(mean_1d, std_1d).sample().size(), (1,))
self.assertEqual(Normal(0.2, .6).sample_n(1).size(), (1, 1))
self.assertEqual(Normal(-0.7, 50.0).sample_n(1).size(), (1, 1))
self._gradcheck_log_prob(Normal, (mean, std))
self._gradcheck_log_prob(Normal, (mean, 1.0))
self._gradcheck_log_prob(Normal, (0.0, std))
def ref_log_prob(idx, x, log_prob):
m = mean.data.view(-1)[idx]
s = std.data.view(-1)[idx]
expected = (math.exp(-(x - m) ** 2 / (2 * s ** 2)) /
math.sqrt(2 * math.pi * s ** 2))
self.assertAlmostEqual(log_prob, math.log(expected), places=3)
self._check_log_prob(Normal(mean, std), ref_log_prob)
if __name__ == '__main__':
run_tests()

View File

@ -1,777 +0,0 @@
import torch
import torch.jit
import torch.nn as nn
import torch.nn.functional as F
import unittest
from itertools import product
from torch.autograd import Variable, Function
from torch.autograd.function import traceable
from common import TestCase, run_tests
import io
try:
import torchvision
HAS_TORCHVISION = True
except ImportError:
HAS_TORCHVISION = False
RUN_CUDA = torch.cuda.is_available()
if torch.cuda.is_available():
CUDA_VERSION = torch._C._cuda_getCompiledVersion()
for d in range(torch.cuda.device_count()):
major = torch.cuda.get_device_capability(d)[0]
if (CUDA_VERSION < 8000 and major >= 6) or (CUDA_VERSION < 9000 and major >= 7):
RUN_CUDA = False
skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
forgetgate = F.sigmoid(forgetgate)
cellgate = F.tanh(cellgate)
outgate = F.sigmoid(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * F.tanh(cy)
return hy, cy
def LSTMCellC(*args, **kwargs):
hy, cy = LSTMCell(*args, **kwargs)
return torch.cat((hy, cy))
class TestJit(TestCase):
maxDiff = None
def assertExpectedTrace(self, trace, *args, **kwargs):
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_dce(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace), *args, **kwargs)
def test_simple(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
def test_scopes(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
out = x + y
with torch.jit.scope('Foo', out):
out = x * out
with torch.jit.scope('Bar', out):
out = torch.tanh(out)
out = torch.sigmoid(out)
return out
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
def test_scopes_intermediate_node(self):
class Net(nn.Module):
def forward(self, x):
return F.log_softmax(x, dim=0)
net = Net()
t = Variable(torch.ones(2), requires_grad=True)
trace, _ = torch.jit.trace(net, (t, ))
torch.onnx._optimize_trace(trace)
self.assertExpectedTrace(trace)
def test_scopes_identity_node(self):
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
def forward(self, x):
x = self.features(x)
return x
model = Net()
t = Variable(torch.ones(1, 3, 227, 227), requires_grad=True)
with torch.onnx.set_training(model, False):
trace, _ = torch.jit.trace(model, (t, ))
torch.onnx._optimize_trace(trace)
self.assertExpectedTrace(trace)
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_lstm_fusion(self):
input = Variable(torch.randn(3, 10).cuda())
hx = Variable(torch.randn(3, 20).cuda())
cx = Variable(torch.randn(3, 20).cuda())
module = nn.LSTMCell(10, 20).cuda() # Just to allocate weights with correct sizes
trace, _ = torch.jit.trace(LSTMCell, (input, (hx, cx)) + tuple(module.parameters()))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_fuse(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_run_lstm_fusion(self):
input = Variable(torch.randn(3, 10).cuda())
hx = Variable(torch.randn(3, 20).cuda())
cx = Variable(torch.randn(3, 20).cuda())
module = nn.LSTMCell(10, 20).cuda() # Just to allocate weights with correct sizes
CompiledLSTMCell = torch.jit.compile(nderivs=0)(LSTMCell)
z = CompiledLSTMCell(input, (hx, cx), *module.parameters())
z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
self.assertEqual(z, z2)
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_run_lstm_fusion_concat(self):
input = Variable(torch.randn(3, 10).cuda())
hx = Variable(torch.randn(3, 20).cuda())
cx = Variable(torch.randn(3, 20).cuda())
module = nn.LSTMCell(10, 20).cuda() # Just to allocate weights with correct sizes
CompiledLSTMCell = torch.jit.compile(nderivs=0)(LSTMCellC)
z = CompiledLSTMCell(input, (hx, cx), *module.parameters())
z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True)
self.assertEqual(z, z2)
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_concat_fusion(self):
hx = Variable(torch.randn(3, 20).cuda())
cx = Variable(torch.randn(3, 20).cuda())
def Foo(hx, cx):
return torch.cat((hx + cx, hx * cx))
trace, _ = torch.jit.trace(Foo, (hx, cx))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_fuse(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_fusion_distribute(self):
def f(x, y):
z1, z2 = (x + y).chunk(2, dim=1)
return z1 * z2
x = Variable(torch.randn(4, 4).cuda())
y = Variable(torch.randn(4, 4).cuda())
trace, _ = torch.jit.trace(f, (x, y), nderivs=0)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace), 'raw')
torch._C._jit_pass_fuse(trace)
torch._C._jit_pass_lint(trace)
self.assertExpected(str(trace))
def test_cse(self):
x = Variable(torch.Tensor([0.4, 0.3]), requires_grad=True)
y = Variable(torch.Tensor([0.7, 0.5]), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 0)
w = (x + y) * (x + y) * (x + y)
t = torch.tanh(w) + torch.tanh(w)
z = (x + y) * (x + y) * (x + y) + t
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_cse(trace)
self.assertExpected(str(trace))
def test_compile_run_twice(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(nderivs=0, optimize=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y, _assert_compiled=True)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
def test_compile_addc(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True).cuda()
y = Variable(torch.Tensor([0.7]), requires_grad=True).cuda()
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y) + 1))
z = doit(x, y)
z2 = doit(x, y, _assert_compiled=True)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y) + 1)))
self.assertEqual(z, z2)
def test_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y, _assert_compiled=True)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_disabled_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(enabled=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_assign_traces(self):
"""Check that output Variables are assigned traces before they are saved."""
@traceable
class MyFn(Function):
@staticmethod
def forward(ctx, a):
out = a * 2
ctx.save_for_backward(out)
return out
@staticmethod
def backward(ctx, grad_a):
a, = ctx.saved_variables
return a * grad_a
x = Variable(torch.randn(10, 10), requires_grad=True)
trace, out = torch.jit.trace(MyFn.apply, x, nderivs=1)
out.sum().backward()
torch._C._jit_pass_dce(trace)
self.assertExpected(str(trace))
def test_traced_module(self):
input = Variable(torch.randn(3, 10))
hx = Variable(torch.randn(3, 20))
cx = Variable(torch.randn(3, 20))
@torch.jit.compile(nderivs=0)
class MyLSTMCell(nn.LSTMCell):
pass
lstm = MyLSTMCell(10, 20)
out = lstm(input, (hx, cx))
out2 = lstm(input, (hx, cx), _assert_compiled=True)
self.assertEqual(out, out2)
def test_autograd_closure(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 1)
z = torch.sigmoid(x * (x + y))
w = torch.abs(x * x * x + y) + Variable(torch.ones(1))
torch._C._tracer_exit((z, w))
torch._C._jit_pass_lint(trace)
(z * w).backward()
torch._C._jit_pass_dce(trace)
torch._C._jit_pass_lint(trace)
x_grad = x.grad.data.clone()
x.grad.data.zero_()
function = torch._C._jit_createAutogradClosure(trace)
torch._C._jit_pass_lint(trace)
z2, w2 = function()(x, y)
(z2 * w2).backward()
self.assertEqual(z, z2)
self.assertEqual(w, w2)
self.assertEqual(x.grad.data, x_grad)
def test_verify(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile
def f(x, y):
z = torch.sigmoid(x * (x + y))
w = torch.abs(x * x * x + y) + Variable(torch.ones(1))
return z, w
torch.jit.verify(f, (x, y), loss_fn=lambda z, w: z * w, devices=[])
def test_constant(self):
x = Variable(torch.randn(2, 2), requires_grad=True)
trace = torch._C._tracer_enter((x,), 0)
y = Variable(torch.diag(torch.Tensor([2, 2])))
z = x.matmul(y)
torch._C._tracer_exit((z,))
function = torch._C._jit_createAutogradClosure(trace)
z2 = function()(x)
self.assertEqual(z, z2)
y.data.fill_(1000) # make sure the data has been cloned
x2 = Variable(torch.ones(2, 2) * 2, requires_grad=True)
z3 = function()(x2)
self.assertEqual(z3.data, torch.ones(2, 2) * 4)
def test_c_function(self):
x = Variable(torch.randn(1, 3, 10, 10))
m = nn.Conv2d(3, 8, 3, 1)
trace = torch._C._tracer_enter((x,) + tuple(m.parameters()), 0)
y = m(x)
torch._C._tracer_exit((y,))
self.assertExpected(str(trace))
def test_legacy_fail(self):
class MyLegacyFn(Function):
def forward(self, x):
return x
def backward(self, grad_output):
return grad_output
x = Variable(torch.Tensor([0]), requires_grad=True)
trace = torch._C._tracer_enter((x,), 0)
self.assertRaisesRegex(RuntimeError, "MyLegacyFn", lambda: MyLegacyFn()(x))
torch._C._tracer_exit((x,))
def test_inplace_transplant(self):
x = Variable(torch.Tensor([0]), requires_grad=True)
trace = torch._C._tracer_enter((x,), 0)
y = x.clone()
y.add_(2)
y.add_(3)
torch._C._tracer_exit((y,))
self.assertExpected(str(trace))
def test_inplace_flags(self):
class InplaceFn(Function):
@staticmethod
def forward(ctx, x):
ctx.mark_dirty(x)
return x.add_(1)
@staticmethod
def backward(ctx, go):
return go
class RegularFn(Function):
@staticmethod
def forward(ctx, x):
return x.add(1)
@staticmethod
def backward(ctx, go):
return go
x = Variable(torch.Tensor([0]), requires_grad=True)
trace = torch._C._tracer_enter((x,), 0)
y = RegularFn.apply(x)
y = InplaceFn.apply(y)
y = InplaceFn.apply(y)
y = RegularFn.apply(y)
torch._C._tracer_exit((y,))
ops = [n for n in trace.graph().nodes() if n.kind() != 'Select']
for op in ops:
self.assertTrue(op.hasAttribute('inplace'))
inplace_flags = [False, True, True, False]
for op, is_inplace in zip(ops, inplace_flags):
self.assertEqual(op.i('inplace'), is_inplace)
def test_inplace_check(self):
class MyInplaceFn(Function):
@staticmethod
def forward(self, x):
x.add_(1)
self.mark_dirty(x)
return x
@staticmethod
def backward(self, grad):
return grad
@torch.jit.compile(nderivs=0)
def fn(x):
return MyInplaceFn.apply(x)
x = Variable(torch.randn(5, 5))
fn(x) # trace
with self.assertRaisesRegex(RuntimeError, 'inplace MyInplaceFn'):
fn(x, _assert_compiled=True) # create closure
def test_backward(self):
a = Variable(torch.randn(2, 2), requires_grad=True)
b = Variable(torch.randn(2, 2), requires_grad=True)
x = a
y = a * b
trace = torch._C._tracer_enter((x, y), 2)
z = y * 2 * x
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
# Run first backward
grad, = torch.autograd.grad(z, x, Variable(torch.ones(2, 2), requires_grad=True), create_graph=True)
torch._C._jit_pass_lint(trace)
# Run second backward
grad.sum().backward(create_graph=True)
torch._C._jit_pass_lint(trace)
# Run dead code elimination to remove unused trace nodes
torch._C._jit_pass_dce(trace)
self.assertExpected(str(trace))
def test_backward_opaque(self):
x = Variable(torch.randn(3, 3), requires_grad=True)
y = Variable(torch.randn(3, 3), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 2)
z = x.cross(y)
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
# Run first backward
grad, = torch.autograd.grad(z, x, Variable(torch.ones(3, 3), requires_grad=True), create_graph=True)
torch._C._jit_pass_lint(trace)
# Run dead code elimination to remove unused trace nodes
torch._C._jit_pass_dce(trace)
self.assertExpected(str(trace))
def test_backward_closure(self):
"""Check that autograd closures handle multiple stages correctly."""
x = Variable(torch.randn(1), requires_grad=True)
@torch.jit.compile(nderivs=2)
def fn(x):
return x * x
# Generate trace
grad_x, = torch.autograd.grad(fn(x), (x,), create_graph=True)
self.assertFalse(fn.has_trace_for(x))
grad_x.backward()
self.assertTrue(fn.has_trace_for(x))
x_grad = x.grad.data.clone()
x.grad.data.zero_()
# Run the trace
grad_x, = torch.autograd.grad(fn(x, _assert_compiled=True), (x,), create_graph=True)
grad_x.backward()
self.assertEqual(x.grad.data, x_grad)
def test_trace_expire(self):
x = Variable(torch.randn(2, 2), requires_grad=True)
y = Variable(torch.randn(2, 2), requires_grad=True)
def record_trace(num_backwards):
trace = torch._C._tracer_enter((x, y), num_backwards)
z = y * 2 * x
torch._C._tracer_exit((z,))
return z, trace
def check(expired, complete):
self.assertEqual(trace.is_expired, expired)
self.assertEqual(trace.is_complete, complete)
z, trace = record_trace(0)
check(False, True)
del z
check(False, True)
z, trace = record_trace(1)
check(False, False)
del z
check(True, False)
z, trace = record_trace(1)
check(False, False)
z.sum().backward()
check(False, True)
del z
check(False, True)
def test_multiuse_fn(self):
x = Variable(torch.randn(2, 2), requires_grad=True)
w = Variable(torch.randn(2, 2), requires_grad=True)
@torch.jit.compile
def cell(x, w):
return x * w + 2
out = cell(cell(cell(x, w), w), w)
self.assertFalse(cell.has_trace_for(x, w))
out.sum().backward()
self.assertTrue(cell.has_trace_for(x, w))
torch.jit.verify(cell, (x, w), devices=[])
def test_output_unflatten(self):
"""Check that outputs of traced functions retain the original structure and nesting"""
x = Variable(torch.randn(2, 2), requires_grad=True)
def fn(x):
return (x * 2, (x ** 2, x + 4, (x + 2,), ), x * 4)
expected_out = fn(x)
fn = torch.jit.compile(fn)
def recursive_sum(obj):
if isinstance(obj, Variable):
return obj.sum()
else:
return sum(recursive_sum(o) for o in obj)
recursive_sum(fn(x)).backward()
self.assertTrue(fn.has_trace_for(x))
self.assertEqual(fn(x, _assert_compiled=True), expected_out)
def test_input_flatten(self):
"""Check that inputs to traced functions are flattened"""
def make_var():
return Variable(torch.randn(1), requires_grad=True)
x = (make_var(), (make_var(), make_var()))
def fn(x, t):
y, z = t
return x * y * z
expected_out = fn(*x)
fn = torch.jit.compile(fn)
fn(*x).backward()
self.assertTrue(fn.has_trace_for(*x))
self.assertEqual(fn(*x, _assert_compiled=True), expected_out)
def test_flags(self):
x = Variable(torch.randn(2, 2))
y = Variable(torch.randn(2, 2))
@torch.jit.compile
def fn(x, y):
return (x * x + y * y + x * y).sum()
grads = {}
for rx, ry in product((True, False), repeat=2):
x.requires_grad = rx
y.requires_grad = ry
self.assertFalse(fn.has_trace_for(x, y))
out = fn(x, y)
self.assertFalse(fn.has_trace_for(x, y))
for v, name, compute in [(x, 'x', rx), (y, 'y', ry)]:
if not compute:
continue
grad_v, = torch.autograd.grad(out, v, retain_graph=True)
expected_grad = grads.setdefault(name, grad_v)
self.assertEqual(grad_v, expected_grad)
self.assertEqual(fn.has_trace_for(x, y), rx or ry)
def test_volatile_fallback(self):
"""Check that Traceable falls back to num_backwards=0 if given volatile inputs"""
x = Variable(torch.randn(2, 2))
y = Variable(torch.randn(2, 2), requires_grad=True)
@torch.jit.compile
def fn(x, y):
return x * x + x * y
out = fn(x, y)
self.assertFalse(fn.has_trace_for(x, y))
x.volatile = True
self.assertFalse(fn.has_trace_for(x, y))
out = fn(x, y)
self.assertTrue(fn.has_trace_for(x, y))
out2 = fn(x, y, _assert_compiled=True)
self.assertEqual(out, out2)
def test_backward_flag_checks(self):
x = Variable(torch.randn(1), requires_grad=True)
@torch.jit.compile(nderivs=2)
def fn(x):
return x * x
grad_x, = torch.autograd.grad(fn(x), (x,), create_graph=True)
self.assertFalse(fn.has_trace_for(x))
grad_x.backward()
self.assertTrue(fn.has_trace_for(x))
with self.assertRaisesRegex(RuntimeError, 'different flags'):
fn(x).backward(Variable(torch.ones(1), requires_grad=True))
with self.assertRaisesRegex(RuntimeError, 'different flags'):
grad_x, = torch.autograd.grad(fn(x), (x,), create_graph=True)
grad_x.backward(Variable(torch.ones(1), requires_grad=True))
# TODO: Test executing this
def test_python_ir(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
traced, _ = torch.jit.trace(doit, (x, y))
g = torch._C._jit_get_graph(traced)
g2 = torch._C.Graph()
g_to_g2 = {}
for node in g.inputs():
g_to_g2[node] = g2.addInput()
for node in g.nodes():
if node.kind() == "PythonOp":
n_ = g2.create(node.pyname(),
[g_to_g2[i] for i in node.inputs()]) \
.setType(node.typeOption()) \
.s_("note", "from_pyop") \
.i_("some_value", len(node.scalar_args()))
assert(n_.i("some_value") == len(node.scalar_args()))
else:
n_ = g2.createClone(node, lambda x: g_to_g2[x])
g_to_g2[node] = g2.appendNode(n_)
for node in g.outputs():
g2.registerOutput(g_to_g2[node])
t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
assert(t_node.attributeNames() == ["a"])
g2.appendNode(t_node)
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
self.assertExpected(str(g2))
@unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA")
def test_cpp(self):
torch._C._jit_run_cpp_tests()
def test_batchnorm(self):
x = Variable(torch.randn(2, 2).fill_(1.0), requires_grad=True)
trace, _ = torch.jit.trace(nn.BatchNorm2d(2), x)
self.assertExpected(str(trace))
def test_dropout(self):
x = Variable(torch.randn(2, 2).fill_(1.0), requires_grad=True)
trace, _ = torch.jit.trace(nn.Dropout(0.6), x)
self.assertExpected(str(trace))
@unittest.skip("unrecognized NodeKind: SpatialBN")
def test_batchnorm_run_twice(self):
@torch.jit.compile(nderivs=0)
class MyBatchNorm2d(nn.BatchNorm2d):
pass
bn = MyBatchNorm2d(1)
x = Variable(torch.randn(5, 1))
z = bn(x)
z2 = bn(x, _assert_compiled=True)
self.assertEqual(z, z2)
def test_non_decorator_use_fails(self):
MyLSTM = torch.jit.compile(nn.LSTM)
self.assertRaisesRegex(TypeError, "class decorator", lambda: MyLSTM(2, 2))
def test_conv(self):
x = Variable(torch.randn(20, 16, 50, 40).fill_(1.0), requires_grad=True)
trace, _ = torch.jit.trace(nn.Conv2d(16, 13, 3, bias=False), x)
self.assertExpected(str(trace))
def test_reuse_function(self):
@torch.jit.compile(nderivs=0)
def clinear(*args):
return F.linear(*args)
def cast(x):
return x
input = Variable(cast(torch.randn(1, 1)))
weights = Variable(cast(torch.randn(1, 1)))
bias = Variable(cast(torch.randn(1, 1)))
# linear AKA addmm without bias is of particular interest
# because we allocate a zero-filled new variable when we execute,
# and then *fill* it with the result
r1 = clinear(clinear(input, weights), weights, _assert_compiled=True)
r2 = F.linear(F.linear(input, weights), weights)
self.assertEqual(r1, r2)
def test_mini_wlm(self):
"""Exercise null-edge pruning in the tracer."""
@torch.jit.compile
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.encoder = nn.Embedding(2, 2)
def forward(self, input, hidden):
emb = self.encoder(input)
hidden = hidden.clone() # simulate some RNN operation
return emb, hidden
model = MyModel()
x = Variable(torch.LongTensor([[0, 1], [1, 0]]))
y = Variable(torch.FloatTensor([0]))
z, _ = model(x, y)
z.sum().backward()
z, _ = model(x, y, _assert_compiled=True)
z.sum().backward()
@skipIfNoTorchVision
def test_alexnet(self):
x = Variable(torch.randn(10, 3, 224, 224).fill_(1.0), requires_grad=True)
trace, _ = torch.jit.trace(torchvision.models.AlexNet(), x)
self.assertExpected(str(trace))
# NB: Purposely NOT testing protobuf export here
if __name__ == '__main__':
run_tests()

View File

@ -47,151 +47,151 @@ class OldModuleTest(ModuleTest):
# TODO: hessian tests
tests = [
OldModuleTest(nn.Add,
constructor_args=(torch.Size([5, 4]),),
(torch.Size([5, 4]),),
input_size=(3, 5, 4),
desc='3D'),
OldModuleTest(nn.Add,
constructor_args=(1, True),
(1, True),
input_size=(3, 1, 4),
desc='scalar'),
OldModuleTest(nn.AddConstant,
constructor_args=(3.5,),
(3.5,),
input_size=(3, 5, 4),
reference_fn=lambda i, _: i + 3.5,
check_inplace=True),
OldModuleTest(nn.BatchNormalization,
constructor_args=(10,),
(10,),
input_size=(4, 10),
desc='affine'),
OldModuleTest(nn.BatchNormalization,
constructor_args=(10, 1e-3, 0.3, False),
(10, 1e-3, 0.3, False),
input_size=(4, 10),
desc='not_affine'),
OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3,),
(3,),
input_size=(2, 3, 6, 6)),
OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3, 1e-3, 0.8),
(3, 1e-3, 0.8),
input_size=(2, 3, 6, 6),
desc='momentum'),
OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3, 1e-3, 0.8, False),
(3, 1e-3, 0.8, False),
input_size=(2, 3, 6, 6),
desc='no_affine'),
OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3,),
(3,),
input_size=(2, 3, 4, 4, 4)),
OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3, 1e-3, 0.7),
(3, 1e-3, 0.7),
input_size=(2, 3, 4, 4, 4),
desc='momentum'),
OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3, 1e-3, 0.7, False),
(3, 1e-3, 0.7, False),
input_size=(2, 3, 4, 4, 4),
desc='no_affine'),
OldModuleTest(nn.CMul,
constructor_args=(5, 6),
(5, 6),
input_size=(10, 5, 6),
desc='3D'),
OldModuleTest(nn.CMul,
constructor_args=(50, 4),
(50, 4),
input_size=(1, 50, 4),
desc='3D_single_example'),
OldModuleTest(nn.CMul,
constructor_args=(1, 5),
input_fn=lambda: torch.randn(10, 3, 5)[:, 1],
(1, 5),
input=torch.randn(10, 3, 5)[:, 1],
desc='3D_noncontiguous'),
OldModuleTest(nn.Exp,
input_size=(2, 3, 4),
reference_fn=lambda i, _: i.exp()),
OldModuleTest(nn.Log,
input_fn=lambda: torch.rand(2, 3, 2) + 0.1,
input=torch.rand(2, 3, 2) + 0.1,
reference_fn=lambda i, _: i.log()),
OldModuleTest(nn.Clamp,
constructor_args=(-2., 5.),
input_fn=lambda: torch.randn(3, 2, 50) * 6,
(-2., 5.),
input=torch.randn(3, 2, 50) * 6,
reference_fn=lambda i, _: i.clamp(-2, 5)),
OldModuleTest(nn.Abs,
input_size=(3, 20, 5),
reference_fn=lambda i, _: i.abs()),
OldModuleTest(nn.Bilinear,
constructor_args=(2, 3, 10),
(2, 3, 10),
input_size=[(4, 2), (4, 3)]),
OldModuleTest(nn.Bilinear,
constructor_args=(5, 4, 2),
(5, 4, 2),
input_size=[(2, 5), (2, 4)],
desc='small_output'),
OldModuleTest(nn.Euclidean,
constructor_args=(5, 7),
(5, 7),
input_size=(10, 5)),
OldModuleTest(nn.WeightedEuclidean,
constructor_args=(5, 7),
(5, 7),
input_size=(10, 5)),
OldModuleTest(nn.Cosine,
constructor_args=(5, 7),
(5, 7),
input_size=(10, 5)),
OldModuleTest(nn.CAddTable,
input_size=[(5, 7), (5, 7)]),
OldModuleTest(nn.CSubTable,
input_size=[(5, 7), (5, 7)]),
OldModuleTest(nn.CDivTable,
input_fn=lambda: [torch.randn(1, 7), torch.rand(1, 7) + 0.1]),
input=[torch.randn(1, 7), torch.rand(1, 7) + 0.1]),
OldModuleTest(nn.CMulTable,
input_size=[(5, 7), (5, 7)]),
OldModuleTest(nn.Square,
input_size=(10, 2, 4),
reference_fn=lambda i, _: i.mul(i)),
OldModuleTest(nn.Sqrt,
input_fn=lambda: torch.rand(10, 2, 4) + 0.01,
input=torch.rand(10, 2, 4) + 0.01,
reference_fn=lambda i, _: i.sqrt()),
OldModuleTest(nn.Squeeze,
input_size=(2, 1, 1, 4, 5),
reference_fn=lambda i, _: i.squeeze()),
OldModuleTest(nn.Squeeze,
constructor_args=(1,),
(1,),
input_size=(2, 1, 1, 4, 5),
reference_fn=lambda i, _: i.squeeze(1),
desc='dim'),
OldModuleTest(nn.Unsqueeze,
constructor_args=(1,),
(1,),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.view(2, 1, 4, 5)),
OldModuleTest(nn.Unsqueeze,
constructor_args=(0,),
(0,),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.view(1, 2, 4, 5),
desc='fist_dim'),
OldModuleTest(nn.Unsqueeze,
constructor_args=(3,),
(3,),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.view(2, 4, 5, 1),
desc='last_dim'),
OldModuleTest(nn.View,
constructor_args=(-1, 2, 20),
(-1, 2, 20),
input_size=(2, 2, 4, 5),
reference_fn=lambda i, _: i.view(-1, 2, 20),
desc='infer_batch'),
OldModuleTest(nn.View,
constructor_args=(2, 2, 2, 5),
(2, 2, 2, 5),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.view(2, 2, 2, 5),
desc='split_dim'),
OldModuleTest(nn.View,
constructor_args=(2, -1, 2, 5),
(2, -1, 2, 5),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.view(2, -1, 2, 5),
desc='infer_middle'),
OldModuleTest(nn.Sum,
constructor_args=(1,),
(1,),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.sum(1, keepdim=False)),
OldModuleTest(nn.Sum,
constructor_args=(1, True),
(1, True),
input_size=(2, 4, 5),
reference_fn=lambda i, _: i.sum(1, keepdim=False).div(i.size(1)),
desc='sizeAverage'),
OldModuleTest(nn.Mean,
constructor_args=(1,),
(1,),
input_size=(2, 4, 5),
reference_fn=lambda i, _: torch.mean(i, 1, keepdim=False)),
OldModuleTest(lambda: nn.Sequential().add(nn.GradientReversal()).add(nn.GradientReversal()),
@ -211,17 +211,17 @@ tests = [
a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1])))
),
OldModuleTest(nn.JoinTable,
constructor_args=(0,),
(0,),
input_size=[(10, 4), (10, 4)],
reference_fn=lambda i, _: torch.cat(i, 0),
desc='first_dim'),
OldModuleTest(nn.JoinTable,
constructor_args=(2,),
(2,),
input_size=[(2, 4, 2), (2, 4, 2)],
reference_fn=lambda i, _: torch.cat(i, 2),
desc='positive_dim_index'),
OldModuleTest(nn.JoinTable,
constructor_args=(-1,),
(-1,),
input_size=[(2, 4, 2, 4), (2, 4, 2, 4)],
reference_fn=lambda i, _: torch.cat(i, 3),
desc='negative_dim_index'),
@ -235,7 +235,7 @@ tests = [
input_size=(4, 5, 3),
reference_fn=lambda i, _: torch.max(i, 0, False)[0]),
OldModuleTest(nn.Max,
constructor_args=(1,),
(1,),
input_size=(4, 5, 3),
reference_fn=lambda i, _: torch.max(i, 1, False)[0],
desc='with_dimension'),
@ -243,175 +243,176 @@ tests = [
input_size=(4, 5, 3),
reference_fn=lambda i, _: torch.min(i, 0, False)[0]),
OldModuleTest(nn.Min,
constructor_args=(1,),
(1,),
input_size=(4, 5, 3),
reference_fn=lambda i, _: torch.min(i, 1, False)[0],
desc='with_dimension'),
OldModuleTest(nn.MixtureTable,
tuple(),
input_size=[(5, 3), (5, 3, 6)]),
OldModuleTest(nn.LookupTable,
constructor_args=(4, 3),
input_fn=lambda: torch.randperm(2).repeat(1, 2),
(4, 3),
input=torch.randperm(2).repeat(1, 2),
jacobian_input=False),
OldModuleTest(nn.Mul,
input_size=(2, 3, 4, 2),
reference_fn=lambda i, p: i * p[0][0]),
OldModuleTest(nn.MulConstant,
constructor_args=(4,),
(4,),
input_size=(2, 3, 4, 2),
reference_fn=lambda i, _: i * 4,
check_inplace=True),
OldModuleTest(nn.Narrow,
constructor_args=(0, 0),
(0, 0),
input_size=(2, 3, 4, 2),
reference_fn=lambda i, _: i.narrow(0, 0, 1)),
OldModuleTest(nn.Narrow,
constructor_args=(1, 1, 2),
(1, 1, 2),
input_size=(2, 3, 4, 2),
reference_fn=lambda i, _: i.narrow(1, 1, 2),
desc='length'),
OldModuleTest(nn.Transpose,
constructor_args=((1, 2), (1, 3)),
((1, 2), (1, 3)),
input_size=(2, 3, 4, 5),
reference_fn=lambda i, _: i.transpose(1, 2).transpose(1, 3)),
OldModuleTest(nn.Transpose,
constructor_args=((1, 2),),
((1, 2),),
input_size=(2, 3, 4, 5),
reference_fn=lambda i, _: i.transpose(1, 2),
desc='single_arg'),
# TODO: this seems to be very slow
OldModuleTest(nn.Replicate,
constructor_args=(2, 1),
(2, 1),
input_size=(10, 3, 4, 5),
reference_fn=lambda i, _: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)),
OldModuleTest(nn.Padding,
constructor_args=(0, 2, -10),
(0, 2, -10),
input_size=(2, 3, 4, 5)),
OldModuleTest(nn.Padding,
constructor_args=(0, 2, -10, 1),
(0, 2, -10, 1),
input_size=(2, 3, 4, 5),
desc='index'),
OldModuleTest(nn.Padding,
constructor_args=(0, -2, -10, 1),
(0, -2, -10, 1),
input_size=(2, 3, 4, 5),
desc='negative_pad'),
OldModuleTest(nn.PartialLinear,
constructor_args=(5, 6),
(5, 6),
input_size=(4, 5)),
OldModuleTest(lambda: nn.PartialLinear(5, 6).setPartition(torch.Tensor((2, 4))),
input_size=(4, 5),
fullname='PartialLinear_setPartition'),
OldModuleTest(nn.Power,
constructor_args=(2,),
(2,),
input_size=(2, 3, 4, 5)),
OldModuleTest(nn.Power,
constructor_args=(1.5,),
input_fn=lambda: torch.rand(3, 4, 5),
(1.5,),
input=torch.rand(3, 4, 5),
desc='fractional'),
OldModuleTest(nn.Reshape,
constructor_args=(4, 5),
(4, 5),
input_size=(3, 4 * 5),
desc='add_dim'),
OldModuleTest(nn.Reshape,
constructor_args=(4 * 5,),
(4 * 5,),
input_size=(3, 4, 5),
desc='squash_dim'),
OldModuleTest(nn.Select,
constructor_args=(1, 2),
(1, 2),
input_size=(3, 4, 5),
reference_fn=lambda i, _: i.select(1, 2)),
OldModuleTest(nn.SelectTable,
constructor_args=(1,),
(1,),
input_size=[(1,), (2,), (3,), (4,)],
reference_fn=lambda i, _: i[1]),
OldModuleTest(nn.SpatialAveragePooling,
constructor_args=(2, 2),
(2, 2),
input_size=(2, 3, 6, 6)),
OldModuleTest(nn.SpatialAveragePooling,
constructor_args=(2, 2, 2, 2),
(2, 2, 2, 2),
input_size=(2, 3, 6, 6),
desc='stride'),
OldModuleTest(nn.SpatialAveragePooling,
constructor_args=(2, 2, 2, 2, 1, 1),
(2, 2, 2, 2, 1, 1),
input_size=(2, 3, 6, 6),
desc='stride_pad'),
OldModuleTest(nn.SpatialAdaptiveMaxPooling,
constructor_args=(4, 4),
(4, 4),
input_size=(2, 3, 8, 8),
reference_fn=lambda i, _: nn.SpatialMaxPooling(2, 2).forward(i)),
OldModuleTest(nn.SpatialAdaptiveMaxPooling,
constructor_args=(4, 4),
(4, 4),
input_size=(2, 3, 7, 11),
desc='irregular'),
OldModuleTest(nn.SpatialConvolution,
constructor_args=(3, 4, 3, 3),
(3, 4, 3, 3),
input_size=(2, 3, 6, 6)),
OldModuleTest(nn.SpatialConvolution,
constructor_args=(3, 4, 3, 3, 2, 2),
(3, 4, 3, 3, 2, 2),
input_size=(2, 3, 6, 6),
desc='strided'),
OldModuleTest(nn.SpatialConvolution,
constructor_args=(3, 4, 3, 3, 2, 2, 1, 1),
(3, 4, 3, 3, 2, 2, 1, 1),
input_size=(2, 3, 6, 6),
desc='padding'),
OldModuleTest(nn.SpatialConvolutionLocal,
constructor_args=(3, 2, 4, 4, 2, 2),
(3, 2, 4, 4, 2, 2),
input_size=(1, 3, 4, 4)),
OldModuleTest(nn.SpatialConvolutionLocal,
constructor_args=(3, 2, 6, 6, 2, 2, 2, 2),
(3, 2, 6, 6, 2, 2, 2, 2),
input_size=(2, 3, 6, 6),
desc='stride'),
OldModuleTest(nn.SpatialConvolutionLocal,
constructor_args=(3, 2, 6, 6, 2, 2, 2, 2, 1, 1),
(3, 2, 6, 6, 2, 2, 2, 2, 1, 1),
input_size=(2, 3, 6, 6),
desc='stride_pad'),
OldModuleTest(nn.SpatialDivisiveNormalization,
constructor_args=(3,),
(3,),
input_size=(2, 3, 8, 8)),
OldModuleTest(nn.SpatialContrastiveNormalization,
constructor_args=(3,),
(3,),
input_size=(2, 3, 8, 8)),
OldModuleTest(nn.SpatialDilatedConvolution,
constructor_args=(3, 2, 3, 3, 2, 2, 1, 1, 2, 2),
(3, 2, 3, 3, 2, 2, 1, 1, 2, 2),
input_size=(2, 3, 8, 8)),
OldModuleTest(nn.SpatialDilatedConvolution,
constructor_args=(3, 2, 3, 3, 2, 2, 1, 1, 2, 2),
(3, 2, 3, 3, 2, 2, 1, 1, 2, 2),
input_size=(2, 3, 8, 8),
desc='stride_pad'),
OldModuleTest(nn.SpatialMaxPooling,
constructor_args=(3, 3, 2, 2, 1, 1),
(3, 3, 2, 2, 1, 1),
input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialReflectionPadding,
constructor_args=(1, 2, 3, 4),
(1, 2, 3, 4),
input_size=(2, 3, 8, 8)),
OldModuleTest(nn.SpatialReplicationPadding,
constructor_args=(1, 2, 3, 4),
(1, 2, 3, 4),
input_size=(2, 3, 4, 4)),
OldModuleTest(nn.SpatialZeroPadding,
constructor_args=(1, 2, 3, 4),
(1, 2, 3, 4),
input_size=(2, 3, 4, 4)),
OldModuleTest(nn.SpatialConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3),
(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3),
input_size=(3, 5, 5),
desc='oneToOne'),
OldModuleTest(nn.SpatialConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2),
(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2),
input_size=(3, 5, 5),
desc='oneToOne_stride'),
OldModuleTest(nn.SpatialConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3),
(nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3),
input_size=(3, 5, 5),
desc='full'),
OldModuleTest(nn.SpatialFullConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3),
(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3),
input_size=(3, 5, 5),
desc='oneToOne'),
OldModuleTest(nn.SpatialFullConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2),
(nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2),
input_size=(3, 5, 5),
desc='oneToOne_stride'),
OldModuleTest(nn.SpatialFullConvolutionMap,
constructor_args=(nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3),
(nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3),
input_size=(3, 5, 5),
desc='full'),
# TODO: test CUDA
@ -424,103 +425,99 @@ tests = [
fullname='SpatialFractionalMaxPooling_size',
test_cuda=False),
OldModuleTest(nn.SpatialFullConvolution,
constructor_args=(3, 4, 3, 3, 2, 2, 1, 1, 1, 1),
(3, 4, 3, 3, 2, 2, 1, 1, 1, 1),
input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialLPPooling,
constructor_args=(3, 2, 2, 2, 2, 2),
(3, 2, 2, 2, 2, 2),
input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialSubSampling,
constructor_args=(3, 3, 3, 2, 2),
(3, 3, 3, 2, 2),
input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialSubtractiveNormalization,
constructor_args=(3,),
(3,),
input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialSubtractiveNormalization,
constructor_args=(3, torch.rand(3)),
(3, torch.rand(3)),
input_size=(1, 3, 7, 7),
desc='kernel'),
OldModuleTest(nn.SpatialUpSamplingNearest,
constructor_args=(2,),
(2,),
input_size=(1, 3, 4, 4)),
OldModuleTest(nn.TemporalConvolution,
constructor_args=(4, 5, 3),
(4, 5, 3),
input_size=(2, 10, 4)),
OldModuleTest(nn.TemporalConvolution,
constructor_args=(4, 5, 3, 2),
(4, 5, 3, 2),
input_size=(2, 10, 4),
desc='stride'),
# TODO: this runs in non-batch mode only
OldModuleTest(nn.TemporalSubSampling,
constructor_args=(4, 3),
(4, 3),
input_size=(10, 4)),
OldModuleTest(nn.TemporalSubSampling,
constructor_args=(4, 3, 2),
(4, 3, 2),
input_size=(10, 4),
desc='stride'),
OldModuleTest(nn.VolumetricAveragePooling,
constructor_args=(2, 2, 2),
(2, 2, 2),
input_size=(2, 3, 4, 4, 4)),
OldModuleTest(nn.VolumetricAveragePooling,
constructor_args=(2, 2, 2, 2, 2, 2),
(2, 2, 2, 2, 2, 2),
input_size=(2, 3, 5, 5, 5),
desc='stride'),
OldModuleTest(nn.VolumetricAveragePooling,
constructor_args=(2, 2, 2, 2, 2, 2, 1, 1, 1),
input_size=(2, 3, 5, 5, 5),
desc='stride_pad'),
OldModuleTest(nn.VolumetricConvolution,
constructor_args=(3, 4, 2, 2, 2),
(3, 4, 2, 2, 2),
input_size=(2, 3, 3, 3, 3)),
OldModuleTest(nn.VolumetricConvolution,
constructor_args=(3, 4, 2, 2, 2, 2, 2, 2),
(3, 4, 2, 2, 2, 2, 2, 2),
input_size=(2, 3, 5, 5, 5),
desc='stride'),
OldModuleTest(nn.VolumetricConvolution,
constructor_args=(3, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1),
(3, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1),
input_size=(2, 3, 5, 5, 5),
desc='stride_padding'),
OldModuleTest(nn.VolumetricFullConvolution,
constructor_args=(2, 3, 2, 2, 2),
(2, 3, 2, 2, 2),
input_size=(1, 2, 4, 4, 4)),
OldModuleTest(nn.VolumetricMaxPooling,
constructor_args=(2, 2, 2),
input_fn=lambda: (torch.randn(2, 3, 5, 5, 5) * 1000)),
(2, 2, 2),
input=(torch.randn(2, 3, 5, 5, 5) * 1000)),
OldModuleTest(nn.VolumetricMaxPooling,
constructor_args=(2, 2, 2, 2, 2, 2),
input_fn=lambda: (torch.randn(2, 3, 5, 5, 5) * 1000),
(2, 2, 2, 2, 2, 2),
input=(torch.randn(2, 3, 5, 5, 5) * 1000),
desc='stride'),
OldModuleTest(nn.VolumetricMaxPooling,
constructor_args=(2, 2, 2, 2, 2, 2, 1, 1, 1),
input_fn=lambda: (torch.randn(2, 3, 5, 5, 5) * 1000),
(2, 2, 2, 2, 2, 2, 1, 1, 1),
input=(torch.randn(2, 3, 5, 5, 5) * 1000),
desc='stride_padding'),
OldModuleTest(nn.VolumetricReplicationPadding,
constructor_args=(1, 2, 3, 4, 5, 6),
(1, 2, 3, 4, 5, 6),
input_size=(2, 3, 5, 5, 5)),
CriterionTest(nn.L1Cost,
input_size=(2, 3, 4, 5),
input=torch.randn(2, 3, 4, 5),
target=None),
CriterionTest(nn.L1HingeEmbeddingCriterion,
input_size=[(2, 3, 4, 5), (2, 3, 4, 5)],
input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)],
target=1),
CriterionTest(nn.L1HingeEmbeddingCriterion,
constructor_args=(2,),
input_size=[(2, 3, 4, 5), (2, 3, 4, 5)],
(2,),
input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)],
target=1,
desc='margin'),
CriterionTest(nn.WeightedMSECriterion,
constructor_args_fn=lambda: (torch.rand(3, 4, 5),),
input_size=(2, 3, 4, 5),
target_size=(2, 3, 4, 5)),
(torch.rand(3, 4, 5),),
input=torch.randn(2, 3, 4, 5),
target=torch.randn(2, 3, 4, 5)),
CriterionTest(nn.MarginCriterion,
input_size=(5, 10),
target_fn=lambda: torch.randn(5, 10).sign()),
target=torch.randn(5, 10).sign()),
CriterionTest(nn.ClassSimplexCriterion,
constructor_args=(30,),
input_fn=lambda: torch.randn(5, 30).mul(10).renorm(2, 0, 1),
target_fn=lambda: torch.rand(5).mul(30).floor().long(),
(30,),
input=torch.randn(5, 30).mul(10).renorm(2, 0, 1),
target=torch.rand(5).mul(30).floor().long(),
desc='margin'),
]
# TODO: FlattenTable gradient
@ -532,7 +529,7 @@ tests = [
for p in (1, 2, 1.5):
tests.append(
OldModuleTest(nn.Normalize,
constructor_args=(p,),
(p,),
input_size=(4, 5),
# Eh, we need to use p as a default, so it's passed by value
reference_fn=lambda i, _, p=p: i.div(i.norm(p, 1, True).expand_as(i)),
@ -541,7 +538,7 @@ for p in (1, 2, 1.5):
for p in range(1, 4 + 1):
tests.append(
OldModuleTest(nn.PairwiseDistance,
constructor_args=(p,),
(p,),
input_size=[(4, 10), (4, 10)],
desc=str(p))
)

View File

@ -11,15 +11,14 @@ import torch.cuda
import torch.multiprocessing as mp
from torch.autograd import Variable
from torch.nn import Parameter
from common import TestCase, run_tests, IS_WINDOWS
from common import TestCase, run_tests
TEST_REPEATS = 30
HAS_SHM_FILES = os.path.isdir('/dev/shm')
TEST_CUDA_IPC = torch.cuda.is_available() and \
sys.version_info[0] == 3 and \
sys.platform != 'darwin' and \
sys.platform != 'win32'
sys.platform != 'darwin'
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
@ -319,7 +318,6 @@ class TestMultiprocessing(TestCase):
self.assertEqual(tensor_size, 5)
self.assertEqual(storage_size, 5)
@unittest.skipIf(IS_WINDOWS, 'not applicable to Windows (only fails with fork)')
@unittest.skipIf(not torch.cuda.is_available(), 'CUDA not available')
def test_cuda_bad_call(self):
# Initialize CUDA

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
import math
import unittest
import functools
from copy import deepcopy
@ -9,7 +8,7 @@ import torch.nn.functional as F
from torch.optim import SGD
from torch.autograd import Variable
from torch import sparse
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
from common import TestCase, run_tests
@ -62,14 +61,13 @@ class TestOptim(TestCase):
self.assertLessEqual(params.data.dist(solution), initial_dist)
def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
def _test_rosenbrock_sparse(self, constructor):
params_t = torch.Tensor([1.5, 1.5])
params = Variable(params_t, requires_grad=True)
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
params_c = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
optimizer = constructor([params])
if not sparse_only:
params_c = Variable(params_t.clone(), requires_grad=True)
optimizer_c = constructor([params_c])
optimizer_c = constructor([params_c])
solution = torch.Tensor([1, 1])
initial_dist = params.data.dist(solution)
@ -101,9 +99,8 @@ class TestOptim(TestCase):
# Do cyclic coordinate descent
w = i % 2
optimizer.step(functools.partial(eval, params, True, w))
if not sparse_only:
optimizer_c.step(functools.partial(eval, params_c, False, w))
self.assertEqual(params.data, params_c.data)
optimizer_c.step(functools.partial(eval, params_c, False, w))
self.assertEqual(params.data, params_c.data)
self.assertLessEqual(params.data.dist(solution), initial_dist)
@ -232,11 +229,6 @@ class TestOptim(TestCase):
lr=1e-3)
)
def test_sgd_sparse(self):
self._test_rosenbrock_sparse(
lambda params: optim.SGD(params, lr=5e-3)
)
def test_adam(self):
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2),
@ -255,12 +247,6 @@ class TestOptim(TestCase):
lr=1e-3)
)
def test_sparse_adam(self):
self._test_rosenbrock_sparse(
lambda params: optim.SparseAdam(params, lr=4e-2),
True
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
@ -437,10 +423,10 @@ class TestLRScheduler(TestCase):
# lr = 0.05 if epoch < 3
# lr = 0.005 if 30 <= epoch < 6
# lr = 0.0005 if epoch >= 9
epochs = 10
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
epochs = 10
self._test(scheduler, targets, epochs)
def test_multi_step_lr(self):
@ -448,116 +434,106 @@ class TestLRScheduler(TestCase):
# lr = 0.005 if 2 <= epoch < 5
# lr = 0.0005 if epoch < 9
# lr = 0.00005 if epoch >= 9
epochs = 10
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
epochs = 10
self._test(scheduler, targets, epochs)
def test_exp_lr(self):
epochs = 10
single_targets = [0.05 * (0.9 ** x) for x in range(epochs)]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
single_targets = [0.05 * (0.9 ** x) for x in range(10)]
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
scheduler = ExponentialLR(self.opt, gamma=0.9)
self._test(scheduler, targets, epochs)
def test_cos_anneal_lr(self):
epochs = 10
eta_min = 1e-10
single_targets = [eta_min + (0.05 - eta_min) *
(1 + math.cos(math.pi * x / epochs)) / 2
for x in range(epochs)]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
self._test(scheduler, targets, epochs)
def test_reduce_lr_on_plateau1(self):
epochs = 10
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [10 - i * 0.0167 for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min',
threshold=0.01, patience=5, cooldown=5)
epochs = 10
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau2(self):
epochs = 22
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2]
metrics = [10 - i * 0.0165 for i in range(22)]
scheduler = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
mode='min', threshold=0.1)
epochs = 22
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau3(self):
epochs = 22
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [-0.8] * 2 + [-0.234] * 20
scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5,
threshold_mode='abs')
epochs = 22
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau4(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [1.5 * (1.025 ** i) for i in range(20)] # 1.025 > 1.1**0.25
scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=3,
threshold_mode='rel', threshold=0.1)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau5(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [1.5 * (1.005 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel',
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau6(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [1.5 * (0.85 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
threshold=0.1)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau7(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [1] * 7 + [0.6] + [0.5] * 12
scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_reduce_lr_on_plateau8(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14]
metrics = [1.5 * (1.005 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', min_lr=[0.4, 0.3],
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
def test_lambda_lr(self):
epochs = 10
self.opt.param_groups[0]['lr'] = 0.05
self.opt.param_groups[1]['lr'] = 0.4
targets = [[0.05 * (0.9 ** x) for x in range(epochs)], [0.4 * (0.8 ** x) for x in range(epochs)]]
targets = [[0.05 * (0.9 ** x) for x in range(10)], [0.4 * (0.8 ** x) for x in range(10)]]
scheduler = LambdaLR(self.opt,
lr_lambda=[lambda x1: 0.9 ** x1, lambda x2: 0.8 ** x2])
epochs = 10
self._test(scheduler, targets, epochs)
def _test(self, scheduler, targets, epochs=10):

View File

@ -1,52 +0,0 @@
import torch
import numpy as np
from test_autograd import _make_cov
from torch.autograd import Variable
from common import TestCase, run_tests, skipIfNoLapack
from torch.autograd._functions.linalg import Potrf
class TestPotrf(TestCase):
def _calc_deriv_numeric(self, A, L, upper):
# numerical forward derivative
dA = Variable(_make_cov(5))
eps = 1e-6
outb = Potrf.apply(A + (eps / 2) * dA, upper)
outa = Potrf.apply(A - (eps / 2) * dA, upper)
dL = (outb - outa) / eps
return dA, dL
def _calc_deriv_sym(self, A, L, upper):
# reverse mode
Lbar = Variable(torch.rand(5, 5).tril())
if upper:
Lbar = Lbar.t()
L.backward(Lbar)
Abar = A.grad
return Abar, Lbar
def _check_total_variation(self, A, L, upper):
dA, dL = self._calc_deriv_numeric(A, L, upper)
Abar, Lbar = self._calc_deriv_sym(A, L, upper)
# compare df = Tr(dA^T Abar) = Tr(dL^T Lbar)
df1 = (dL * Lbar).sum()
df2 = (dA * Abar).sum()
atol = 1e-5
rtol = 1e-3
assert (df1 - df2).abs().data[0] <= atol + rtol * df1.abs().data[0]
@skipIfNoLapack
def test_potrf(self):
for upper in [True, False]:
A = Variable(_make_cov(5), requires_grad=True)
L = Potrf.apply(A, upper)
self._check_total_variation(A, L, upper)
if __name__ == '__main__':
run_tests()

View File

@ -110,16 +110,10 @@ class TestSparse(TestCase):
self.assertEqual(i, x._indices())
self.assertEqual(v, x._values())
self.assertEqual(x.ndimension(), 3)
self.assertEqual(self.safeCoalesce(x)._nnz(), 10)
self.assertEqual(x.coalesce()._nnz(), 10)
for i in range(3):
self.assertEqual(x.size(i), 100)
# Make sure that coalesce handles duplicate indices correctly
i = self.IndexTensor([[9, 0, 0, 0, 8, 1, 1, 1, 2, 7, 2, 2, 3, 4, 6, 9]])
v = self.ValueTensor([[idx**2, idx] for idx in range(i.size(1))])
x = self.SparseTensor(i, v, torch.Size([10, 2]))
self.assertEqual(self.safeCoalesce(x)._nnz(), 9)
# Make sure we can access empty indices / values
x = self.SparseTensor()
self.assertEqual(x._indices().numel(), 0)
@ -551,43 +545,6 @@ class TestSparse(TestCase):
self._test_sparse_mask_shape([50, 30, 20])
self._test_sparse_mask_shape([5, 5, 5, 5, 5, 5])
def _test_zeros(self, shape, out_shape_i, out_shape_v=None):
out_shape = out_shape_i + (out_shape_v or [])
for nnz in [9, 12]:
out, _, _ = self._gen_sparse(len(out_shape_i), nnz, out_shape)
torch.zeros(*shape, out=out)
self.assertEqual(tuple(out.size()), tuple(shape))
self.assertTrue(out._indices().numel() == out._values().numel() == 0)
self.assertEqual(out._nnz(), 0)
self.assertEqual(out._dimI(), len(shape))
self.assertEqual(out._dimV(), 0)
def test_zeros(self):
i_shapes = [2, 3, 4]
v_shapes = [3, 4, 5, 6]
for i_dim in range(1, len(i_shapes) + 1):
for v_dim in range(len(v_shapes) + 1):
self._test_zeros([2, 3, 4], i_shapes[:i_dim], v_shapes[:v_dim])
def _test_zeros_like(self, template_shape_i, template_shape_v=None):
template_shape_v = template_shape_v or []
template_shape = template_shape_i + template_shape_v
for nnz in [9, 12]:
t, _, _ = self._gen_sparse(len(template_shape_i), nnz, template_shape)
res = torch.zeros_like(t)
self.assertEqual(tuple(res.size()), tuple(template_shape))
self.assertTrue(res._indices().numel() == res._values().numel() == 0)
self.assertEqual(res._nnz(), 0)
self.assertEqual(res._dimI(), len(template_shape_i))
self.assertEqual(res._dimV(), len(template_shape_v))
def test_zeros_like(self):
i_shapes = [2, 3, 4]
v_shapes = [3, 4, 5, 6]
for i_dim in range(1, len(i_shapes) + 1):
for v_dim in range(len(v_shapes) + 1):
self._test_zeros_like(i_shapes[:i_dim], v_shapes[:v_dim])
def _test_sparse_mask_hybrid_fixed(self):
i = self.IndexTensor([
[1, 3, 0, 4],
@ -618,15 +575,6 @@ class TestSparse(TestCase):
self._test_sparse_mask_shape([50, 30, 20], [2])
self._test_sparse_mask_shape([5, 5, 5, 5, 5, 5], [2])
def test_sparse_add_coalesce(self):
i = self.IndexTensor([[1, 2, 1]])
v = self.ValueTensor([3, 4, 5])
x = self.SparseTensor(i, v, torch.Size([3]))
y = self.SparseTensor(i, v, torch.Size([3]))
z = x + y
self.assertFalse(z._indices().numel() != 2 and z.is_coalesced())
@cuda_only
def test_storage_not_null(self):
x = torch.cuda.sparse.FloatTensor(2)
@ -650,28 +598,6 @@ class TestSparse(TestCase):
v = self.ValueTensor([5]).cuda(0)
self.assertRaises(RuntimeError, lambda: self.SparseTensor(i, v, torch.Size([3])))
def _test_new_device(self, size, device):
with torch.cuda.device(device):
x = torch.cuda.sparse.DoubleTensor(*size)
self.assertEqual(x.get_device(), device)
x1 = x.new()
x2 = x.new(2, 3)
self.assertEqual(x1.get_device(), device)
self.assertEqual(x2.get_device(), device)
@cuda_only
def test_new_device_single_gpu(self):
self._test_new_device((), 0)
self._test_new_device((30, 20), 0)
self._test_new_device((30, 20, 10), 0)
@cuda_only
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_new_device_multi_gpu(self):
self._test_new_device((), 1)
self._test_new_device((30, 20), 1)
self._test_new_device((30, 20, 10), 1)
class TestUncoalescedSparse(TestSparse):
def setUp(self):

File diff suppressed because it is too large Load Diff

View File

@ -16,8 +16,6 @@ from torch.utils.trainer import Trainer
from torch.utils.trainer.plugins import *
from torch.utils.trainer.plugins.plugin import Plugin
from torch.utils.serialization import load_lua
from torch.autograd._functions.utils import prepare_onnx_paddings
from torch.autograd._functions.utils import check_onnx_broadcast
HAS_CUDA = torch.cuda.is_available()
@ -381,66 +379,6 @@ class TestLuaReader(TestCase):
return input, target.sub(1)
class TestONNXUtils(TestCase):
def test_prepare_onnx_paddings(self):
sizes = [2, 3, 4]
pad = [1, 2, 3, 4]
paddings = prepare_onnx_paddings(len(sizes), pad)
self.assertEqual(paddings, [0, 3, 1, 0, 4, 2])
def test_check_onnx_broadcast(self):
def try_check_onnx_broadcast(dims1, dims2, expect_broadcast, expect_fail):
broadcast = True
fail = False
try:
broadcast = check_onnx_broadcast(dims1, dims2)
except ValueError:
fail = True
self.assertEqual(broadcast, expect_broadcast)
self.assertEqual(fail, expect_fail)
# Case 1, check the case when len(dims1) < len(dims2) and numel(dims2) > 1
dims1 = [3, 4]
dims2 = [2, 3, 4]
try_check_onnx_broadcast(dims1, dims2, True, True)
# Case 2, check the case when len(dims1) < len(dims2) and numel(dims2) == 1
dims1 = [3, 4]
dims2 = [1, 1, 1]
try_check_onnx_broadcast(dims1, dims2, True, False)
# Case 3, check the case when len(dims1) > len(dims2) and numel(dims2) == 1
dims1 = [1, 1]
dims2 = [1]
try_check_onnx_broadcast(dims1, dims2, True, False)
# Case 4, check the case when len(dims1) > len(dims2) and dims1[x:] == dims2
dims1 = [2, 3, 4]
dims2 = [3, 4]
try_check_onnx_broadcast(dims1, dims2, True, False)
# Case 5, check the case when len(dims1) > len(dims2), but dims1[x:] != dims2
dims1 = [2, 3, 4]
dims2 = [1, 4]
try_check_onnx_broadcast(dims1, dims2, True, True)
# Case 6, check the equal case, no broadcast
dims1 = [3, 4]
dims2 = [3, 4]
try_check_onnx_broadcast(dims1, dims2, False, False)
# Case 7, check the case when len(dims1) == len(dims2), but dims1 != dims2
dims1 = [3, 4]
dims2 = [1, 4]
try_check_onnx_broadcast(dims1, dims2, True, True)
# Case 8, check the case when len(dims1) == len(dims2) and numel(s2) == 1
dims1 = [3, 4]
dims2 = [1, 1]
try_check_onnx_broadcast(dims1, dims2, True, False)
TestLuaReader.init()
if __name__ == '__main__':
run_tests()

View File

@ -1,44 +0,0 @@
# Deprecated function signatures. These are exposed in Python, but not included
# in the error message suggestions.
- name: add(Tensor self, Scalar alpha, Tensor other)
aten: add(self, other, alpha)
- name: addbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2)
aten: addbmm(self, batch1, batch2, beta, alpha)
- name: addbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2)
aten: addbmm(self, batch1, batch2, beta, 1)
- name: addcdiv(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2)
aten: addcdiv(self, tensor1, tensor2, value)
- name: addcmul(Tensor self, Scalar value, Tensor tensor1, Tensor tensor2)
aten: addcmul(self, tensor1, tensor2, value)
- name: addmm(Scalar beta, Tensor self, Scalar alpha, Tensor mat1, Tensor mat2)
aten: addmm(self, mat1, mat2, beta, alpha)
- name: addmm(Scalar beta, Tensor self, Tensor mat1, Tensor mat2)
aten: addmm(self, mat1, mat2, beta, 1)
- name: addmv(Scalar beta, Tensor self, Scalar alpha, Tensor mat, Tensor vec)
aten: addmv(self, mat, vec, beta, alpha)
- name: addmv(Scalar beta, Tensor self, Tensor mat, Tensor vec)
aten: addmv(self, mat, vec, beta, 1)
- name: addr(Scalar beta, Tensor self, Scalar alpha, Tensor vec1, Tensor vec2)
aten: addr(self, vec1, vec2, beta, alpha)
- name: addr(Scalar beta, Tensor self, Tensor vec1, Tensor vec2)
aten: addr(self, vec1, vec2, beta, 1)
- name: baddbmm(Scalar beta, Tensor self, Scalar alpha, Tensor batch1, Tensor batch2)
aten: baddbmm(self, batch1, batch2, beta, alpha)
- name: baddbmm(Scalar beta, Tensor self, Tensor batch1, Tensor batch2)
aten: baddbmm(self, batch1, batch2, beta, 1)
- name: sub(Tensor self, Scalar alpha, Tensor other)
aten: sub(self, other, alpha)

View File

@ -1,660 +0,0 @@
# Defines derivative formulas and Python signatures of methods on Variable
#
# NB: The parameter names here MUST be consistent with the parameter names
# in ./torch/lib/ATen/Declarations.cwrap
- name: abs(Tensor self)
self: grad * self.sign()
- name: acos(Tensor self)
self: grad * -((-self * self + 1).sqrt().reciprocal())
- name: add(Tensor self, Scalar other, *, Scalar alpha=1)
self: grad
- name: add(Tensor self, Tensor other, *, Scalar alpha=1)
self: grad
other: maybe_multiply(grad, alpha)
- name: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
self: maybe_multiply(grad, beta)
batch1: grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) }).bmm(batch2.transpose(1, 2)) * alpha
batch2: batch1.transpose(1, 2).bmm(grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) })) * alpha
- name: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1)
self: grad
tensor1: grad * value / tensor2
tensor2: -grad * value * tensor1 / (tensor2 * tensor2)
- name: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1)
self: grad
tensor1: grad * tensor2 * value
tensor2: grad * tensor1 * value
- name: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1)
self: maybe_multiply(grad, beta)
mat1: mm_mat1_backward(grad, mat2, mat1.sizes(), mat1.strides(), alpha)
mat2: mm_mat2_backward(grad, mat1, mat2.sizes(), mat2.strides(), alpha)
- name: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1)
self: maybe_multiply(grad, beta)
mat: grad.ger(vec) * alpha
vec: mat.t().mv(grad) * alpha
- name: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1)
self: maybe_multiply(grad, beta)
vec1: grad.mv(vec2) * alpha
vec2: grad.t().mv(vec1) * alpha
- name: all # fallthrough
- name: any # fallthrough
- name: arange # fallthrough
- name: asin(Tensor self)
self: grad * (-self * self + 1).sqrt().reciprocal()
- name: atan(Tensor self)
self: grad * (self * self + 1).reciprocal()
- name: atan2(Tensor self, Tensor other)
self: grad * other * ((self * self + other * other).reciprocal())
other: grad * -self * ((self * self + other * other).reciprocal())
- name: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1)
self: maybe_multiply(grad, beta)
batch1: grad.bmm(batch2.transpose(1, 2)) * alpha
batch2: batch1.transpose(1, 2).bmm(grad) * alpha
- name: bmm(Tensor self, Tensor mat2)
self: grad.bmm(mat2.transpose(1, 2))
mat2: self.transpose(1, 2).bmm(grad)
- name: btrifact(Tensor self, Tensor info, bool pivot)
self: not_implemented("btrifact")
- name: btrisolve(Tensor self, Tensor LU_data, Tensor LU_pivots)
self: not_implemented("btrisolve")
- name: cat(TensorList tensors, int64_t dim=0)
tensors: cat_tensors_backward(grad, to_arg_sizes(tensors, dim), dim)
- name: cauchy # TODO: reinforce
- name: ceil(Tensor self)
self: zeros_like(grad)
- name: clone(Tensor self)
self: grad
- name: cos(Tensor self)
self: grad * -self.sin()
- name: cosh(Tensor self)
self: grad * self.sinh()
- name: cross(Tensor self, Tensor other, int64_t dim=-1)
self: other.cross(grad, dim)
other: grad.cross(self, dim)
- name: cumprod # complicated
- name: cumsum(Tensor self, int64_t dim)
self: cumsum_backward(grad, dim)
- name: data_ptr # fallthrough
- name: diag(Tensor self, int64_t diagonal=0)
self: grad.diag(diagonal)
- name: dist(Tensor self, Tensor other, Scalar p=2)
self: norm_backward(grad, self - other, p, result)
other: -norm_backward(grad, self - other, p, result)
- name: div(Tensor self, Scalar other)
self: grad / other
- name: div(Tensor self, Tensor other)
self: grad / other
other: -grad * self / (other * other)
- name: dot(Tensor self, Tensor tensor)
self: grad * tensor
tensor: grad * self
- name: eig(Tensor self, bool eigenvectors)
self: not_implemented("eig")
- name: eq(Tensor self, Scalar other)
self: zeros_like(self)
- name: eq(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: equal # fallthrough
- name: erf(Tensor self)
self: 2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
- name: erfinv(Tensor self)
self: 0.5 * sqrt(M_PI) * exp(self.erfinv().pow(2)) * grad
- name: exp(Tensor self)
self: grad * result
- name: expand(Tensor self, IntList size)
self: reduce_to(grad, self.sizes())
__view__: True
- name: eye # fallthrough
- name: fill(Tensor self, Scalar value)
self: zeros_like(grad)
- name: floor(Tensor self)
self: zeros_like(grad)
- name: fmod(Tensor self, Scalar other)
self: grad
- name: fmod(Tensor self, Tensor other)
self: grad
other: 'not_implemented("fmod: other")'
- name: frac(Tensor self)
self: grad
- name: gather(Tensor self, int64_t dim, Tensor index)
self: grad.type().zeros(self.sizes()).scatter_add_(dim, index, grad)
- name: ge(Tensor self, Scalar other)
self: zeros_like(self)
- name: ge(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: gels(Tensor self, Tensor A)
self: not_implemented("gels")
A: not_implemented("gels")
- name: geometric(Tensor self, double p, Generator Generator)
self: zeros_like(grad)
- name: geqrf(Tensor self)
self: not_implemented("geqrf")
- name: ger(Tensor self, Tensor vec2)
self: grad.mv(vec2)
vec2: grad.t().mv(self)
- name: gesv(Tensor self, Tensor A)
self: std::get<0>(gesv(grad, A.t()))
A: -at::mm(std::get<0>(gesv(grad, A.t())), solution.t())
- name: get_device # fallthrough
- name: gt(Tensor self, Scalar other)
self: zeros_like(self)
- name: gt(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: histc(Tensor self, int64_t bins, Scalar min, Scalar max)
self: not_implemented("histc")
- name: index_add(Tensor self, int64_t dim, Tensor index, Tensor source)
self: grad
source: grad.index_select(dim, index)
- name: index_copy(Tensor self, int64_t dim, Tensor index, Tensor source)
self: grad.clone().index_fill_(dim, index, 0)
source: grad.index_select(dim, index)
- name: index_fill(Tensor self, int64_t dim, Tensor index, Scalar value)
self: grad.clone().index_fill_(dim, index, 0)
- name: index_select(Tensor self, int64_t dim, Tensor index)
self: grad.type().zeros(self.sizes()).index_add_(dim, index, grad)
- name: inverse(Tensor self)
self: -at::mm(output.t(), at::mm(grad, output.t()))
- name: is_contiguous # fallthrough
- name: is_same_size # fallthrough
- name: is_set_to # fallthrough
- name: kthvalue(Tensor self, int64_t k, int64_t dim, bool keepdim)
self: select_backward(grad, dim, indices, self.sizes(), keepdim)
- name: le(Tensor self, Scalar other)
self: zeros_like(self)
- name: le(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: lerp(Tensor self, Tensor end, Scalar weight)
self: grad * (1 - weight.toDouble())
end: grad * weight
- name: lgamma(Tensor self)
self: not_implemented("lgamma")
- name: linspace(Scalar start, Scalar end, int64_t steps)
- name: log(Tensor self)
self: grad.div(self)
- name: log1p(Tensor self)
self: grad / (self + 1)
- name: log_normal(Tensor self, double mean, double std, Generator generator)
self: zeros_like(grad)
- name: logspace # fallthrough
- name: lt(Tensor self, Scalar other)
self: zeros_like(self)
- name: lt(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: masked_fill(Tensor self, Tensor mask, Scalar value)
self: grad.clone().masked_fill_(mask, 0)
- name: masked_scatter(Tensor self, Tensor mask, Tensor source)
self: grad.clone().masked_fill_(mask, 0)
source: masked_scatter_backward(grad, mask, source.sizes())
- name: masked_select(Tensor self, Tensor mask)
self: zeros_like(self).masked_scatter_(mask, grad)
- name: max(Tensor self, int64_t dim, bool keepdim)
self: select_backward(grad, dim, max_indices, self.sizes(), keepdim)
- name: max(Tensor self)
self: select_backward_scalar(grad, self, result)
- name: max(Tensor self, Tensor other)
self: grad.clone().masked_fill_(self <= other, 0)
other: grad.clone().masked_fill_(self > other, 0)
- name: mean(Tensor self, int64_t dim, bool keepdim)
self: sum_backward(grad, self.sizes(), dim, keepdim) / self.size(dim)
- name: mean(Tensor self)
self: grad.expand(self.sizes()) / self.numel()
- name: median(Tensor self)
self: select_backward_scalar(grad, self, result)
- name: median(Tensor self, int64_t dim, bool keepdim)
self: select_backward(grad, dim, indices, self.sizes(), keepdim)
- name: min(Tensor self, int64_t dim, bool keepdim)
self: select_backward(grad, dim, min_indices, self.sizes(), keepdim)
- name: min(Tensor self)
self: select_backward_scalar(grad, self, result)
- name: min(Tensor self, Tensor other)
self: grad.clone().masked_fill_(self >= other, 0)
other: grad.clone().masked_fill_(self < other, 0)
- name: mm(Tensor self, Tensor mat2)
self: mm_mat1_backward(grad, mat2, self.sizes(), self.strides(), 1)
mat2: mm_mat2_backward(grad, self, mat2.sizes(), mat2.strides(), 1)
- name: mode(Tensor self, int64_t dim, bool keepdim)
self: select_backward(grad, dim, indices, self.sizes(), keepdim)
- name: mul(Tensor self, Scalar other)
self: grad * other
- name: mul(Tensor self, Tensor other)
self: grad * other
other: grad * self
- name: multinomial # TODO: reinforce
- name: mv(Tensor self, Tensor vec)
self: grad.ger(vec)
vec: self.t().mv(grad)
- name: narrow(Tensor self, int64_t dimension, int64_t start, int64_t length)
self: grad._unnarrow(dimension, start, self.size(dimension))
__view__: True
- name: _unnarrow(Tensor self, int64_t dimension, int64_t offset, int64_t dimSize)
self: grad.narrow(dimension, offset, self.size(dimension))
- name: ne(Tensor self, Scalar other)
self: zeros_like(self)
- name: ne(Tensor self, Tensor other)
self: zeros_like(self)
other: zeros_like(other)
- name: neg(Tensor self)
self: grad.neg()
- name: nonzero(Tensor self)
self: zeros_like(grad)
- name: norm(Tensor self, Scalar p=2)
self: norm_backward(grad, self, p, result)
- name: norm(Tensor self, Scalar p, int64_t dim, bool keepdim=False)
self: norm_backward(grad, self, p, destination, dim, keepdim)
- name: numel # fallthrough
- name: ones # fallthrough
- name: orgqr(Tensor self, Tensor input2)
self: not_implemented("orgqr")
input2: not_implemented("orgqr")
- name: ormqr(Tensor self, Tensor input2, Tensor input3, bool left, bool transpose)
self: not_implemented("ormqr")
input2: not_implemented("ormqr")
input3: not_implemented("ormqr")
- name: permute(Tensor self, IntList dims)
self: permute_backwards(grad, dims)
__view__: True
- name: potrf(Tensor self, bool upper)
self: potrf_backward(grad, upper, output)
- name: potri(Tensor self, bool upper)
self: not_implemented("potri")
- name: potrs(Tensor self, Tensor input2, bool upper)
self: not_implemented("potri")
input2: not_implemented("potri")
- name: pow(Tensor self, Scalar exponent)
self: grad * exponent * self.pow(exponent.toDouble() - 1)
- name: pow(Tensor self, Tensor exponent)
self: grad * exponent * self.pow(exponent - 1)
exponent: grad * self.pow(exponent) * self.log()
# TODO: complicated
# - name: prod(Tensor self, int64_t dim, bool keepdim)
# - name: prod(Tensor self)
- name: pstrf(Tensor self, bool upper, Scalar tol)
self: not_implemented("pstrf")
- name: put(Tensor self, Tensor index, Tensor source, bool accumulate)
self: grad.clone().put_(index, zeros_like(source), accumulate)
source: grad.take(index)
- name: qr(Tensor self)
self: not_implemented("qr")
- name: rand # fallthrough
- name: randn # fallthrough
- name: randperm # fallthrough
- name: range # fallthrough
- name: reciprocal(Tensor self)
self: grad / -(self * self)
- name: remainder(Tensor self, Scalar other)
self: grad
- name: remainder(Tensor self, Tensor other)
self: grad
- name: renorm # TODO!
- name: round(Tensor self)
self: zeros_like(grad)
- name: rsqrt(Tensor self)
self: -0.5 * grad * result.pow(3)
- name: scatter(Tensor self, int64_t dim, Tensor index, Tensor src)
self: grad.clone().scatter_(dim, index, 0)
src: grad.gather(dim, index)
- name: scatter(Tensor self, int64_t dim, Tensor index, Scalar value)
self: grad.clone().scatter_(dim, index, 0)
- name: scatter_add(Tensor self, int64_t dim, Tensor index, Tensor src)
self: grad
src: grad.gather(dim, index)
- name: select(Tensor self, int64_t dim, int64_t sliceIndex)
self: maybe_unsqueeze(grad, dim, self.sizes().size() != 1)._unnarrow(dim, sliceIndex, self.size(dim))
__view__: True
- name: set # fallthrough
- name: sigmoid(Tensor self)
self: _sigmoid_backward(grad, result)
- name: sign(Tensor self)
self: zeros_like(grad)
- name: sin(Tensor self)
self: grad * self.cos()
- name: sinh(Tensor self)
self: grad * self.cosh()
- name: size # fallthrough
- name: sort(Tensor self, int64_t dim, bool descending)
self: select_backward(grad, dim, indices, self.sizes(), true)
- name: split(Tensor self, int64_t split_size, int64_t dim=0)
self: split_backward(grads, split_size, dim, self.sizes(), self.type())
- name: sqrt(Tensor self)
self: grad * self.pow(-0.5) / 2
- name: squeeze(Tensor self)
self: unsqueeze_to(grad, self.sizes());
__view__: True
- name: squeeze(Tensor self, int64_t dim)
self: maybe_unsqueeze(grad, dim, self.size(dim) == 1 && self.sizes().size() != 1)
__view__: True
- name: std
- name: storage_offset # fallthrough
- name: stride # fallthrough
- name: sub(Tensor self, Scalar other, *, Scalar alpha)
self: grad
- name: sub(Tensor self, Tensor other, *, Scalar alpha)
self: grad
other: -grad * alpha
- name: sum(Tensor self)
self: grad.expand(self.sizes())
- name: sum(Tensor self, int64_t dim, bool keepdim=False)
self: sum_backward(grad, self.sizes(), dim, keepdim)
- name: svd(Tensor self, bool some)
self: not_implemented("svd")
- name: symeig(Tensor self, bool eigenvectors, bool upper)
self: not_implemented("symeig")
- name: t(Tensor self)
self: grad.t()
__view__: True
- name: take(Tensor self, Tensor index)
self: zeros_like(self).put_(index, grad, true)
- name: tan(Tensor self)
self: grad / self.cos().pow(2)
- name: tanh(Tensor self)
self: _tanh_backward(grad, result)
- name: tensor # fallthrough
- name: topk(Tensor self, int64_t k, int64_t dim, bool largest, bool sorted)
self: select_backward(grad, dim, indices, self.sizes(), true)
- name: trace(Tensor self)
self: trace_backward(grad, self.sizes())
- name: transpose(Tensor self, int64_t dim0, int64_t dim1)
self: grad.transpose(dim0, dim1)
__view__: True
- name: tril(Tensor self, int64_t diagonal=0)
self: grad.tril(diagonal)
- name: triu(Tensor self, int64_t diagonal=0)
self: grad.triu(diagonal)
- name: trtrs(Tensor self, Tensor A, bool upper, bool transpose, bool unitriangular)
self: not_implemented("trtrs")
- name: trunc(Tensor self)
self: zeros_like(grad)
- name: unfold(Tensor self, int64_t dimension, int64_t size, int64_t step)
self: unfold_backward(grad, self.sizes(), dimension, size, step)
- name: uniform # fallthrough
- name: unsqueeze(Tensor self, int64_t dim)
self: grad.squeeze(dim)
__view__: True
- name: var # TODO
- name: view(Tensor self, IntList size)
self: grad.contiguous().view(self.sizes())
__view__: True
- name: zero(Tensor self)
self: zeros_like(grad)
- name: zeros # fallthrough
# NN double backwards support
- name: avg_pool2d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, bool ceil_mode, bool count_include_pad)
grad_output: avg_pool2d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad)
input: zeros_like(input)
- name: avg_pool3d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, bool ceil_mode, bool count_include_pad)
grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad)
input: zeros_like(input)
- name: elu_backward(Tensor grad_output, Scalar alpha, Tensor output)
grad_output: elu_backward(grad, alpha, output)
output: grad * grad_output * (output < 0).toType(grad.type())
- name: glu_backward(Tensor grad_output, Tensor input, int64_t dim)
grad_output: glu_double_backward_grad_output(grad, input, dim)
input: glu_double_backward(grad, grad_output, input, dim)
- name: hardshrink_backward(Tensor grad_output, Tensor input, Scalar lambd)
grad_output: hardshrink_backward(grad, input, lambd)
input: zeros_like(grad)
- name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val)
grad_output: hardtanh_backward(grad, input, min_val, max_val)
input: zeros_like(grad)
- name: kl_div_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
grad_output: kl_div_double_backward_grad_output(grad, input, target, size_average, reduce)
input: zeros_like(grad)
- name: l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
grad_output: l1_loss_double_backward_grad_output(grad, input, target, size_average, reduce)
input: zeros_like(grad)
- name: log_sigmoid_backward(Tensor grad_output, Tensor input, Tensor buffer)
grad_output: log_sigmoid_backward(grad, input, buffer)
input: log_sigmoid_double_backward(grad * grad_output, input)
- name: log_softmax_backward(Tensor grad_output, Tensor input, int64_t dim, Tensor output)
grad_output: grad - (grad * output.exp()).sum(dim, true)
input: log_softmax_double_backward(grad, grad_output, dim, output)
- name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope)
grad_output: leaky_relu_backward(grad, input, negative_slope)
input: zeros_like(grad)
- name: max_pool2d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
grad_output: max_pool2d_double_backward(grad, indices);
input: zeros_like(input)
- name: max_unpool2d_backward(Tensor grad_output, Tensor input, Tensor indices, IntList output_size)
grad_output: max_unpool2d(grad, indices, output_size)
input: zeros_like(input)
- name: mse_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
grad_output: mse_loss_double_backward_grad_output(grad, grad_output, input, target, size_average, reduce)
input: mse_loss_double_backward(grad * grad_output, input, size_average, reduce)
- name: nll_loss_backward(Tensor grad_output, Tensor input, Tensor target, Tensor weight, bool size_average, int64_t ignore_index, bool reduce, Tensor total_weight)
grad_output: nll_loss(grad, target, weight, size_average, ignore_index, reduce)
input: zeros_like(grad)
- name: nll_loss2d_backward(Tensor grad_output, Tensor input, Tensor target, Tensor weight, bool size_average, int64_t ignore_index, bool reduce, Tensor total_weight)
grad_output: nll_loss2d(grad, target, weight, size_average, ignore_index, reduce)
input: zeros_like(grad)
- name: prelu_backward(Tensor grad_output, Tensor input, Tensor weight, std::array<bool, 2> output_mask)
grad_output: zeros_like(grad_output)
input: zeros_like(input)
weight: zeros_like(weight)
- name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, Tensor noise)
grad_output: rrelu_backward(grad, input, lower, upper, training, noise)
input: zeros_like(grad)
- name: smooth_l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce)
grad_output: smooth_l1_loss_double_backward_grad_output(grad, grad_output, input, target, size_average, reduce)
input: smooth_l1_loss_double_backward(grad * grad_output, input, target, size_average, reduce)
- name: softplus_backward(Tensor grad_output, Tensor input, Scalar beta, Scalar threshold, Tensor output)
grad_output: softplus_backward(grad, input, beta, threshold, output)
input: softplus_double_backward(grad * grad_output, input, beta, threshold)
- name: softmax_backward(Tensor grad_output, Tensor input, int64_t dim, Tensor output)
grad_output: softmax_backward(grad, input, dim, output)
input: softmax_double_backward(grad, grad_output, dim, output)
- name: soft_margin_loss_backward(Tensor input, Tensor target, bool size_average)
input: soft_margin_loss_double_backward(grad, input, target, size_average)
- name: softshrink_backward(Tensor grad_output, Tensor input, Scalar lambd)
grad_output: softshrink_backward(grad, input, lambd)
input: zeros_like(grad)
- name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value)
grad_output: threshold_backward(grad, input, threshold, value)
input: zeros_like(grad)
- name: _sigmoid_backward(Tensor grad_output, Tensor output)
grad_output: _sigmoid_backward(grad, output)
output: grad * grad_output * (-2 * output + 1)
- name: _tanh_backward(Tensor grad_output, Tensor output)
grad_output: _tanh_backward(grad, output)
output: -2 * output * grad * grad_output

View File

@ -1,181 +0,0 @@
from .nested_dict import nested_dict
from tools.shared.module_loader import import_module
CodeTemplate = import_module('code_template', 'torch/lib/ATen/code_template.py').CodeTemplate
PY_VARIABLE_METHOD_VARARGS = CodeTemplate("""\
static PyObject * ${pycname}(PyObject* self, PyObject* args, PyObject* kwargs)
{
HANDLE_TH_ERRORS
static PythonArgParser parser({
${prototypes}
});
${unpack_self}
PyObject* parsed_args[${max_args}];
auto r = parser.parse(args, kwargs, parsed_args);
${dispatch}
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
""")
PY_VARIABLE_METHOD_NOARGS = CodeTemplate("""\
static PyObject * ${pycname}(PyObject* self, PyObject* args)
{
HANDLE_TH_ERRORS
${unpack_self}
return wrap(${dispatch_name}(${actuals}));
END_HANDLE_TH_ERRORS
}
""")
PY_VARIABLE_CASE = CodeTemplate("""\
${cond} (r.idx == ${i}) {
return wrap(${dispatch_name}(${actuals}));
""")
PY_VARIABLE_DISPATCH = CodeTemplate("""\
inline ${return_type} ${dispatch_name}(${formal_args}) {
${AutoNoGIL}
${AutoGPU}
return ${dispatch_call}(${dispatch_args});
}
""")
PY_VARIABLE_METHOD_DEF = CodeTemplate("""\
{"${name}", (PyCFunction)${pycname}, ${flags}, NULL},""")
UNPACK_SELF = "auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata;"
# XXX: if you got here because of an assertion failure, it doesn't mean
# it's enough to just extend the list here. Before you do this, make sure
# to add an appropriate wrap() overload in torch/csrc/autograd/utils/wrap_outputs.h.
SUPPORTED_RETURN_TYPES = {
'Tensor', 'std::tuple<Tensor,Tensor>',
'std::tuple<Tensor,Tensor,Tensor>', 'std::vector<Tensor>',
'Scalar', 'bool', 'int64_t', 'void*'
}
def create_python_bindings(
python_functions, py_methods, py_method_defs, py_method_dispatch,
is_class):
"""python_variable_methods.cpp
Generates Python bindings to Variable methods
"""
unpack_methods = {
'const Tensor &': 'tensor',
'Generator *': 'generator',
'Storage &': 'storage',
'int64_t': 'toInt64',
'bool': 'toBool',
'double': 'toDouble',
}
def first_tensor_arg(arguments):
for arg in arguments:
if arg['simple_type'] in {'Tensor', 'TensorList'}:
return arg['name']
return None
def auto_gpu(option):
tensor_arg = first_tensor_arg(option['arguments'])
if tensor_arg is None:
return ''
return 'AutoGPU auto_gpu({});'.format(tensor_arg)
def emit_dispatch(i, function):
env = {}
simple_return_type = function['return_type'].replace(' &', '')
assert simple_return_type in SUPPORTED_RETURN_TYPES, \
function['name'] + ' returns unsupported type: ' + simple_return_type
actuals = []
formal_args = []
arg_idx = 0
for arg in function['arguments']:
if 'Tensor' in function['method_of'] and arg['name'] == 'self':
formal_args.append('Tensor & {}'.format(arg['name']))
actuals.append('self_')
continue
typename = arg['type']
if typename.startswith('IntList['):
typename = 'IntList'
if typename.startswith('LongTensor'):
typename = 'Tensor'
unpack = unpack_methods.get(typename, typename.lower())
actuals.append('r.{}({})'.format(unpack, arg_idx))
dispatch_type = typename
dispatch_type = 'const Tensor &' if dispatch_type == 'Tensor' else dispatch_type
formal_args.append('{} {}'.format(dispatch_type, arg['name']))
arg_idx += 1
env['i'] = i
env['actuals'] = actuals
env['formal_args'] = formal_args
if 'call_args' in function:
env['dispatch_args'] = function['call_args']
else:
env['dispatch_args'] = [arg['name'] for arg in function['arguments']]
if 'Tensor' in function['method_of']:
env['dispatch_args'] = [arg for arg in env['dispatch_args'] if arg != 'self']
env['dispatch_call'] = 'self.{}'.format(function['name'])
else:
env['dispatch_call'] = 'at::{}'.format(function['name'])
env['AutoNoGIL'] = 'AutoNoGIL no_gil;'
env['AutoGPU'] = auto_gpu(function)
env['cond'] = 'if' if i == 0 else '} else if'
env = nested_dict(env, function)
py_method_dispatch.append(PY_VARIABLE_DISPATCH.substitute(env))
return PY_VARIABLE_CASE.substitute(env)
def process_function(name, functions):
env = {
'name': name,
'dispatch_name': 'dispatch_{}'.format(name),
'pycname': 'THPVariable_{}'.format(name),
'prototypes': [],
'max_args': max(len(o['arguments']) for o in functions),
'unpack_self': [],
'dispatch': [],
}
is_method = 'Tensor' in functions[0]['method_of']
if is_method:
env['unpack_self'] = [UNPACK_SELF]
for o in functions:
prototype = o['prototype']
if is_method:
prototype = prototype.replace('Tensor self, ', '')
prototype = prototype.replace('Tensor self', '')
if 'deprecated' in o:
prototype += '|deprecated'
env['prototypes'].append('"{}",'.format(prototype))
for i, option in enumerate(functions):
env['dispatch'].append(emit_dispatch(i, nested_dict(env, option)))
env['dispatch'].append('}')
if len(functions) == 1 and len(functions[0]['args']) == 1 and is_method:
tmpl = PY_VARIABLE_METHOD_NOARGS
env['actuals'] = ['self_']
env['flags'] = 'METH_NOARGS'
else:
tmpl = PY_VARIABLE_METHOD_VARARGS
env['flags'] = 'METH_VARARGS | METH_KEYWORDS'
if is_class and not is_method:
env['flags'] += ' | METH_STATIC'
py_methods.append(tmpl.substitute(env))
py_method_defs.append(PY_VARIABLE_METHOD_DEF.substitute(env))
for name in sorted(python_functions.keys()):
process_function(name, python_functions[name])

View File

@ -1,992 +0,0 @@
import argparse
import copy
import os
import re
import yaml
from collections import defaultdict
from tools.shared.module_loader import import_module
from .nested_dict import nested_dict
CodeTemplate = import_module('code_template', 'torch/lib/ATen/code_template.py').CodeTemplate
try:
# use faster C loader if available
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
METHOD_DECLARATION = CodeTemplate("""\
virtual ${return_type} ${method_prefix}${api_name}(${formals}) const override;
""")
METHOD_DEFINITION = CodeTemplate("""\
${return_type} VariableType::${method_prefix}${api_name}(${formals}) const {
${type_definition_body}
}
""")
METHOD_DEFINITION_NYI = CodeTemplate("""\
throw std::runtime_error("VariableType::${api_name} NYI");""")
BASE_CALL = CodeTemplate("""\
baseType->${method_prefix}${base_name}(${unpacked_args})""")
METHOD_DEFINITION_FALLTHROUGH = CodeTemplate("""\
${unpack_args}
return baseType->${method_prefix}${api_name}(${unpacked_args});""")
METHOD_DEFINITION_FALLTHROUGH_VARIABLE = CodeTemplate("""\
${unpack_args}
auto flags = compute_flags({ ${args_with_derivatives} });
auto var = as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));
var.is_volatile() = flags.is_volatile;
return var;
""")
METHOD_DEFINITION_FALLTHROUGH_INPLACE = CodeTemplate("""\
${unpack_args}
baseType->${method_prefix}${api_name}(${unpacked_args});
increment_version(self);
return self;
""")
UNPACK_TENSOR = CodeTemplate("""\
auto${ref} ${arg_name}_ = unpack${suffix}(${arg_name}, "${arg_name}", ${arg_pos});""")
FUNCTION_DECLARATION = CodeTemplate("""\
struct ${op} : public TraceableFunction {
using TraceableFunction::TraceableFunction;
variable_list apply(const variable_list& grads) override;
std::string name() override { return "${op}"; }
void releaseVariables() override {
${release_variables}
}
${saved_variables}
};
""")
FUNCTION_DEFINITION = CodeTemplate("""\
variable_list ${op}::apply(const variable_list& grads) {
variable_list grad_inputs{${num_inputs}};
${body}
ensure_no_aten_scalars(grad_inputs);
return grad_inputs;
}
""")
PY_FUNCTION_DEFINITION = CodeTemplate("""\
static PyTypeObject ${op}Class;
addClass<${op}>(${op}Class, "${op}");
""")
DERIVATIVE_TENSOR = CodeTemplate("""\
if (should_compute_output(${idx})) {
grad_inputs[${idx}] = ${derivative};
}
""")
DERIVATIVE_MULTI = CodeTemplate("""\
if (should_compute_output({ ${idxs} })) {
auto output_mask = std::array<bool, ${n}>{
${masks}
};
std::tie(${grad_inputs}) = ${derivative};
}
""")
DERIVATIVE_TENSORLIST = CodeTemplate("""\
if (should_compute_any_outputs()) {
grad_inputs = ${derivative};
}
""")
METHOD_DEFINITION_DERIVATIVE = CodeTemplate("""\
profiler::RecordFunction profiler("${name}");
${unpack_args}
${buffers}
${check_inplace}
${check_no_requires_grad}
std::shared_ptr<${op}> grad_fn;
auto flags = compute_flags({ ${args_with_derivatives} });
if (flags.requires_grad) {
grad_fn = std::make_shared<${op}>(${op_ctor});
grad_fn->is_executable = true;
grad_fn->next_functions = compute_next_functions({ ${args_with_derivatives} });
${save_inputs}
}
${base_impl_call}
${version_counter}
set_flags(${result}, flags, grad_fn);
${save_outputs}
${record_trace}
return ${return_value};
""")
RECORD_TRACE = CodeTemplate("""\
if (jit::tracer::isTracing({ ${tensor_args} })) {
jit::Node *n = jit::tracer::recordTrace( "${trace_name}", ${trace_inputs}, ${trace_outputs} );
${record_attributes}
}
""")
RECORD_ATTRIBUTE = CodeTemplate("""\
setattr(n, jit::stringToSymbol("${name}"), ${name});""")
CONDITIONAL = CodeTemplate("""\
if (${cond}) {
${statements}
}
""")
FUNCTION_PROTOTYPE = CodeTemplate("""\
${name}(${typed_args})""")
BUFFER_DECLARATION = CodeTemplate("""\
auto ${name} = tensor();
auto& ${name}_ = static_cast<VariableImpl*>(${name}.get())->data;""")
GENERATED_COMMENT = CodeTemplate("""\
generated from tools/autograd/templates/${filename}""")
template_path = os.path.join(os.path.dirname(__file__), 'templates')
VARIABLE_TYPE_H = CodeTemplate.from_file(template_path + '/VariableType.h')
VARIABLE_TYPE_CPP = CodeTemplate.from_file(template_path + '/VariableType.cpp')
FUNCTIONS_H = CodeTemplate.from_file(template_path + '/Functions.h')
FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/Functions.cpp')
PY_VARIABLE_METHODS_CPP = CodeTemplate.from_file(template_path + '/python_variable_methods.cpp')
PY_VARIABLE_DISPATCH_H = CodeTemplate.from_file(template_path + '/python_variable_methods_dispatch.h')
PY_NN_FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/python_nn_functions.cpp')
PY_NN_FUNCTIONS_H = CodeTemplate.from_file(template_path + '/python_nn_functions.h')
PY_NN_DISPATCH_H = CodeTemplate.from_file(template_path + '/python_nn_functions_dispatch.h')
PY_FUNCTIONS_H = CodeTemplate.from_file(template_path + '/python_functions.h')
PY_FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/python_functions.cpp')
derivatives_path = os.path.join(os.path.dirname(__file__), 'derivatives.yaml')
deprecated_path = os.path.join(os.path.dirname(__file__), 'deprecated.yaml')
# Functions with these return types delegate completely to the underlying
# base at::Type
FALLTHROUGH_RETURN_TYPES = {'int64_t', 'void*', 'bool', 'IntList'}
FALLTHROUGH_FUNCTIONS = {
'arange', 'eye', 'linspace', 'logspace', 'tensor', 'ones', 'ones_like',
'rand', 'randn', 'randperm', 'range', 'tensor', 'uniform', 'zeros',
'zeros_like', 'set_',
# these are only implemented on integral types
'__and__', '__iand__', '__ilshift__', '__ior__', '__irshift__', '__ixor__',
'__lshift__', '__or__', '__rshift__', '__xor__',
}
MANUAL_IMPLEMENTATIONS = {
'contiguous', 'resize_', 'resize_as_'
}
# Matches "foo" in "foo, bar" but not "foobar". Used to search for the
# occurence of a parameter in the derivative formula
IDENT_REGEX = r'(^|\W){}($|\W)'
def format_return_type(returns):
if len(returns) == 0:
return 'void'
elif len(returns) == 1:
return returns[0]['type']
else:
return_types = [r['type'] for r in returns]
return 'std::tuple<{}>'.format(','.join(return_types))
def write(dirname, name, template, env):
env['generated_comment'] = GENERATED_COMMENT.substitute(filename=name)
path = os.path.join(dirname, name)
with open(path, 'w') as f:
f.write(template.substitute(env))
def saved_variables(formula, args):
# find which arguments need to be saved
saved = []
for arg in args:
if 'name' not in arg:
# some returned arguments do not have names
continue
name = arg['name']
def replace_sizes(m):
res = name + '_sizes'
saved.append({'name': res, 'type': 'IntList'})
return res
def replace_zeros(m):
r = name + '_info'
saved.append({'name': r, 'type': 'TypeAndSize'})
return r + '.zeros()'
def replace_size_n(m):
res = name + '_argsize_{}'.format(*m.groups())
saved.append({'name': res, 'type': 'int64_t'})
return res
def replace_to_arg_sizes(m):
res = name + '_argsizes_{}'.format(*m.groups())
saved.append({'name': res, 'type': 'IntList'})
return res
# replace self.sizes() with self_sizes
formula = re.sub(r'{}.sizes\(\)'.format(name), replace_sizes, formula)
# replace zeros_like(self) with self_info
formula = re.sub(r'zeros_like\({}\)'.format(name), replace_zeros, formula)
# replace self.size(2) with self_size_2
formula = re.sub(r'{}.size\((\w+)\)'.format(name), replace_size_n, formula)
# replace to_arg_sizes(self, 2) with self_argsizes_2
formula = re.sub(r'to_arg_sizes\({}, (\w+)\)'.format(name), replace_to_arg_sizes, formula)
if re.search(IDENT_REGEX.format(name), formula):
arg = copy.deepcopy(arg)
arg['type'] = arg['type'].replace('const ', '').replace(' &', '')
saved.append(arg)
return formula, saved
def create_derivative(declaration, formula, output_indices, var_names):
returns = [r for r in declaration['returns'] if r.get('name') != 'self']
arguments = declaration['arguments']
if any(arg['name'] == 'inplace' for arg in arguments):
for arg in arguments:
if arg['name'] == 'input':
returns += [arg]
arguments = [arg for arg in arguments if arg['name'] != 'input']
formula, saved_inputs = saved_variables(formula, arguments)
formula, saved_outputs = saved_variables(formula, returns)
return {
'formula': formula,
'output_indices': output_indices,
'saved_inputs': saved_inputs,
'saved_outputs': saved_outputs,
'var_names': var_names,
}
def create_autograd_function(name, derivatives, num_inputs, buffers=None):
return {
'name': name,
'op': to_camel_case(name) + 'Backward',
'num_inputs': num_inputs,
'derivatives': derivatives,
'buffers': [] if buffers is None else buffers,
'saved_inputs': all_saved_variables(derivatives, 'saved_inputs'),
'saved_outputs': all_saved_variables(derivatives, 'saved_outputs'),
}
def all_saved_variables(derivatives, key):
seen = set()
saved = []
for d in derivatives:
for saved_arg in d[key]:
if saved_arg['name'] in seen:
continue
seen.add(saved_arg['name'])
saved.append(saved_arg)
return saved
def to_camel_case(name):
return ''.join([p.title() for p in name.split('_')])
def split_name_params(prototype):
name, params = re.match('(\w+)\((.*)\)', prototype).groups()
return name, params.split(', ')
def load_derivatives(path, declarations_by_signature):
with open(path, 'r') as f:
definitions = yaml.load(f, Loader=Loader)
def canonical_declaration(declarations, name):
for declaration in declarations:
if declaration['name'] == name:
return declaration
# some functions only have in-place variants
assert name + '_' == declarations[0]['name']
return declarations[0]
# Parse each entry from derivatives.yaml
autograd_functions = []
for defn in definitions:
if '(' not in defn['name']:
continue
name, params = split_name_params(defn['name'])
param_types = [p.split(' ')[0] for p in params if p != '*']
signature = '{}({})'.format(name, ', '.join(param_types))
declarations = declarations_by_signature[signature]
if len(declarations) == 0:
raise RuntimeError('no ATen declaration found for: {}'.format(signature))
canonical = canonical_declaration(declarations, name)
num_inputs = 0
derivatives = []
for arg in canonical['arguments']:
if arg['name'] not in defn:
continue
formula = defn[arg['name']]
if arg['type'] == 'TensorList':
num_inputs = ''
output_indices = '*'
else:
output_indices = [num_inputs]
num_inputs += 1
derivatives.append(create_derivative(canonical, formula, output_indices, [arg['name']]))
func = create_autograd_function(name, derivatives, num_inputs)
func['__view__'] = defn.get('__view__', False)
autograd_functions.append(func)
for declaration in declarations:
declaration['derivative'] = func
return autograd_functions
def ensure_unique_names(autograd_functions):
# de-duplicate operation names
functions_by_name = defaultdict(list)
for func in autograd_functions:
functions_by_name[func['op']].append(func)
for op in functions_by_name.keys():
overloads = functions_by_name[op]
if len(overloads) > 1:
for i, func in enumerate(overloads):
func['op'] += str(i)
def preprocess_nn_functions(declarations):
declarations_by_name = defaultdict(list)
for d in declarations:
declarations_by_name[d['name']].append(d)
autograd_functions = []
for declaration in declarations:
name = declaration['name']
if name == 'batch_norm' or 'conv' in name:
continue
fwd_name = name + '_forward'
if fwd_name not in declarations_by_name:
continue
declaration['base_name'] = fwd_name
fwd = declarations_by_name[fwd_name][0]
input_num = 0
bwd_name = name + '_backward'
assert len(declarations_by_name[bwd_name]) == 1
bwd = declarations_by_name[bwd_name][0]
def actual(arg):
name = arg['name']
return name if name != 'inplace' else 'false'
actuals = [actual(arg) for arg in bwd['arguments']]
formula = '{}({})'.format(bwd_name, ', '.join(actuals))
formula = formula.replace('grad_output', 'grad')
if not re.search(IDENT_REGEX.format('grad'), formula):
formula = '({}).mul_(grad)'.format(formula)
# we are computing the derivatives w.r.t these variables
var_names = []
for ret in bwd['returns']:
assert ret['name'].startswith('grad_')
var_names.append(ret['name'][5:]) # remove grad_ prefix
output_indices = list(range(len(var_names)))
derivatives = [create_derivative(fwd, formula, output_indices, var_names)]
input_num += len(output_indices)
# find arguments to foo_forward() call which don't exist in foo()
# these are buffers which have to be saved for the backwards call
args_by_name = {arg['name']: arg for arg in declaration['arguments']}
buffers = [arg['name'] for arg in fwd['arguments']
if arg['name'] not in args_by_name]
func = create_autograd_function(name, derivatives, input_num, buffers)
declaration['derivative'] = func
autograd_functions.append(func)
return autograd_functions
def create_autograd_functions(top_env, autogen_functions):
"""Functions.h and Functions.cpp body
These contain the auto-generated subclasses of torch::autograd::Function
for each every differentiable torch function.
"""
function_definitions = top_env['autograd_function_definitions']
function_declarations = top_env['autograd_function_declarations']
py_function_initializers = top_env['py_function_initializers']
def process_function(func):
env = {}
saved_variables = []
release_variables = []
unpack = []
def save_arg(arg, is_output):
name = arg['name']
if arg['type'] == 'Tensor' or (arg['type'] == 'Scalar' and is_output):
saved_variables.append('SavedVariable {}_;'.format(name))
release_variables.append('{}_.data.reset();'.format(name))
ptr = 'shared_from_this()' if is_output else ''
unpack.append('auto {} = {}_.unpack({});'.format(name, name, ptr))
elif arg['type'] == 'IntList':
saved_variables.append('std::vector<int64_t> {};'.format(name))
else:
saved_variables.append('{} {};'.format(arg['type'], name))
for arg in func['saved_inputs']:
save_arg(arg, is_output=False)
for arg in func['saved_outputs']:
save_arg(arg, is_output=True)
env['saved_variables'] = saved_variables
env['release_variables'] = release_variables
def uses_grad(func):
for derivative in func['derivatives']:
formula = derivative['formula']
if re.search(IDENT_REGEX.format('grad'), formula):
return True
return False
body = []
if uses_grad(func):
body.append('auto& grad = grads[0];')
def emit_derivative(derivative):
formula = derivative['formula']
idxs = derivative['output_indices']
if idxs == '*':
return DERIVATIVE_TENSORLIST.substitute(derivative=formula)
elif len(idxs) == 1:
return DERIVATIVE_TENSOR.substitute(idx=idxs[0], derivative=formula)
else:
grad_inputs = ', '.join(['grad_inputs[{}]'.format(i) for i in idxs])
masks = ['should_compute_output({}),'.format(i) for i in idxs]
return DERIVATIVE_MULTI.substitute(
idxs=idxs, derivative=formula, grad_inputs=grad_inputs,
masks=masks, n=len(idxs))
body.extend(unpack)
for derivative in func['derivatives']:
body.append(emit_derivative(derivative))
env['body'] = body
env = nested_dict(env, func)
function_declarations.append(FUNCTION_DECLARATION.substitute(env))
function_definitions.append(FUNCTION_DEFINITION.substitute(env))
py_function_initializers.append(PY_FUNCTION_DEFINITION.substitute(env))
for func in autogen_functions:
process_function(func)
def is_implemented(option):
return (option['return_type'] in FALLTHROUGH_RETURN_TYPES or
option['name'] in FALLTHROUGH_FUNCTIONS or
option['name'].endswith('_backward') or
option.get('derivative') is not None)
def create_variable_type(top_env, aten_declarations):
"""VariableType.h and VariableType.cpp body
This is the at::Type subclass for differentiable tensors. The
implementation of each function dispatches to the base tensor type to
compute the output. The grad_fn is attached to differentiable functions.
"""
type_declarations = top_env['type_derived_method_declarations']
type_definitions = top_env['type_derived_method_definitions']
def skip_function(name):
return (name.endswith('_out') or name.endswith('_forward'))
def find_args_with_derivatives(func, tensor_arg_names):
"""Find arguments that have derivative definitions"""
names = set(name for d in func['derivatives'] for name in d['var_names'])
differentiable = [arg for arg in tensor_arg_names if arg in names]
if len(differentiable) != len(names):
missing = names - set(differentiable)
raise RuntimeError('Missing arguments for derivatives: {}'.format(missing))
return differentiable
def save_variables(option, saved_variables, is_output):
# assign the saved variables to the generated grad_fn
stmts = []
for arg in saved_variables:
name = arg['name']
expr = arg['name']
if is_output and not option['inplace']:
if len(option['returns']) > 1:
# unpack multiple outputs
return_names = [r['name'] for r in option['returns']]
idx = return_names.index(name)
stmts.append('auto& {} = std::get<{}>(ret);'.format(name, idx))
elif name != 'input':
stmts.append('auto& {} = ret;'.format(name))
if '_sizes' in name:
expr = name.replace('_sizes', '.sizes()')
elif name.endswith('_info'):
expr = name.replace('_info', '')
elif '_argsize_' in name:
# turn x_argsize_y into x.size(y)
expr = re.sub(r"(\w+)_argsize_(\w+)", r"\1.size(\2)", name)
elif '_argsizes_' in name:
# turn x_argsizes_y into to_arg_sizes(x, y)
expr = re.sub(r"(\w+)_argsizes_(\w+)", r"to_arg_sizes(\1, \2)", name)
elif arg['type'] == 'Tensor' or (is_output and arg['type'] == 'Scalar'):
name += '_'
var = arg['name']
if var == 'self' and option['inplace']:
var = 'self.clone()'
assert not is_output
if option['inplace'] and is_output:
var = 'self'
ptr = 'grad_fn.get()' if is_output else 'nullptr'
expr = 'SavedVariable({}, {})'.format(var, ptr)
stmts.append('grad_fn->{} = {};'.format(name, expr))
return stmts
def requires_unpack(arg):
return 'Tensor' in arg['dynamic_type']
def get_suffix(dynamic_type, is_nullable):
if is_nullable:
assert dynamic_type == 'Tensor'
return '_opt'
elif dynamic_type == 'IndexTensor':
return '_long'
elif dynamic_type == 'BoolTensor':
return '_byte'
else:
return ''
def unpack_args(env, option):
body = []
unpacked_args = []
for i, arg in enumerate(option['arguments']):
if not requires_unpack(arg):
unpacked_args.append(arg['name'])
continue
dynamic_type = arg['dynamic_type']
is_nullable = arg.get('is_nullable', False)
ref = (not is_nullable) and dynamic_type != 'TensorList'
suffix = get_suffix(dynamic_type, is_nullable)
body.append(UNPACK_TENSOR.substitute(
arg_name=arg['name'],
arg_pos=i,
suffix=suffix,
ref='&' if ref else '',
))
unpacked_args.append(arg['name'] + '_')
if option.get('derivative') is not None:
for arg in option['derivative'].get('buffers', []):
unpacked_args.append(arg + '_')
env['unpacked_args'] = unpacked_args
return body
def emit_buffers(buffers):
res = []
for name in buffers:
res.append(BUFFER_DECLARATION.substitute(name=name))
return res
def emit_record_trace(env, declaration):
# Note [clang-802.0.42 tuple overload bug]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Originally, my plan for emit_$ecord_trace was to keep it as
# simple as possible, if at the expense of some somewhat ugly
# overloads. So this meant we had a 'recordTrace' function
# with overloads like this:
#
# recordTrace(..., const Variable& out)
# recordTrace(..., const std::tuple<Variable, Variable>& out)
#
# Unfortunately, this triggers a bug in clang-802.0.42
# (widely used in macOS Sierra 10.12.6) wherein a Variable is
# implicitly convertible into a std::tuple<Variable, Variable>;
# a minimal repro can be seen below here:
#
# #include <tuple>
# struct T {};
# void f(const std::tuple<T, T>&) {}
# void g(T& x) { f(x); }
#
# To work around this bug, the code generator is a bit more
# complicated, and is taught how to handle this situation.
local = {}
arguments = declaration['arguments']
tensor_args = [arg for arg in arguments if arg['simple_type'] in {'Tensor', 'TensorList'}]
if len(tensor_args) == 1 and tensor_args[0]['simple_type'] == 'TensorList':
# Special case for TensorList. This only works when there
# is a single argument
local['trace_inputs'] = "cast_tensor_list({})".format(declaration['arguments'][0]['name'])
else:
local['trace_inputs'] = CodeTemplate("{ ${tensor_args} }").substitute(env)
local['record_attributes'] = []
for arg in declaration['arguments']:
if arg['simple_type'] in {'Tensor', 'TensorList'}:
continue
local['record_attributes'].append(RECORD_ATTRIBUTE.substitute(name=arg['name']))
if not local['record_attributes']:
local['record_attributes'].append('(void)n;')
local['trace_name'] = declaration['api_name']
if local['trace_name'].endswith('_'):
local['trace_name'] = local['trace_name'][:-1]
combined = nested_dict(local, nested_dict(env, declaration))
return RECORD_TRACE.substitute(combined)
def emit_check_no_requires_grad(tensor_args, args_with_derivatives):
"""Checks that arguments without derivatives don't require grad"""
body = []
for arg in tensor_args:
name = arg['name']
if name in args_with_derivatives:
continue
if name == 'output':
# Double-backwards definitions sometimes take in 'input' and
# 'output', but only define the derivative for input.
continue
if arg['dynamic_type'] in {'IndexTensor', 'BoolTensor'}:
continue
body.append('check_no_requires_grad({}, "{}");'.format(name, name))
return body
def emit_body(declaration):
if not is_implemented(declaration):
return METHOD_DEFINITION_NYI.substitute(declaration)
env = {}
body = []
env['unpack_args'] = unpack_args(env, declaration)
combined = nested_dict(env, declaration)
if declaration['return_type'] in FALLTHROUGH_RETURN_TYPES:
body.extend(METHOD_DEFINITION_FALLTHROUGH.substitute(combined).split('\n'))
return body
arguments = declaration['arguments']
tensor_args = [arg for arg in arguments if arg['simple_type'] in {'Tensor', 'TensorList'}]
env['tensor_args'] = [arg['name'] for arg in tensor_args]
if declaration['inplace']:
env['return_value'] = 'self'
env['result'] = 'static_cast<Variable&>(self)'
env['trace_outputs'] = '{ self }'
elif declaration['return_type'] == 'std::vector<Tensor>':
env['return_value'] = 'as_tensor_list(ret)'
env['result'] = 'ret'
env['trace_outputs'] = 'ret'
else:
env['return_value'] = '{}(std::move(ret))'.format(declaration['return_type'])
env['result'] = 'std::get<0>(ret)' if len(declaration['returns']) > 1 else 'ret'
if len(declaration['returns']) > 1:
# NB: This won't work if we get heterogenous outputs
outs = ['std::get<{}>(ret)'.format(i)
for i, v in enumerate(declaration['returns']) if v['type'] == 'Tensor']
else:
outs = ['ret']
env['trace_outputs'] = CodeTemplate("{ ${outs} }").substitute(outs=outs)
if any(arg['simple_type'] in {'Generator', 'Storage'} for arg in arguments):
env['record_trace'] = []
else:
env['record_trace'] = emit_record_trace(env, declaration)
is_view = False
func = declaration.get('derivative')
if func is not None:
env['op'] = func['op']
env['op_ctor'] = ''
env['buffers'] = emit_buffers(func.get('buffers', []))
env['save_inputs'] = save_variables(declaration, func['saved_inputs'], False)
env['save_outputs'] = save_variables(declaration, func['saved_outputs'], True)
env['args_with_derivatives'] = find_args_with_derivatives(func, env['tensor_args'])
is_view = func.get('__view__', False)
else:
env['op'] = 'Error'
env['op_ctor'] = '"the derivative for {} is not implemented"'.format(declaration['api_name'])
env['buffers'] = []
env['save_inputs'] = []
env['save_outputs'] = []
env['args_with_derivatives'] = env['tensor_args']
env['check_no_requires_grad'] = emit_check_no_requires_grad(
tensor_args, env['args_with_derivatives'])
if len(env['save_outputs']) > 0:
env['save_outputs'] = CONDITIONAL.substitute(
cond='grad_fn', statements=env['save_outputs'])
env['check_inplace'] = ''
env['version_counter'] = ''
if declaration['inplace']:
env['check_inplace'] = 'check_inplace(self);'
env['version_counter'] = 'increment_version(self);'
elif any(arg['name'] == 'inplace' for arg in arguments):
assert not is_view, declaration['name']
env['check_inplace'] = 'if (inplace) check_inplace(input);'
env['version_counter'] = 'if (inplace) increment_version(input);'
elif is_view:
env['version_counter'] = 'take_version_counter(ret, self);'
if declaration['name'] in FALLTHROUGH_FUNCTIONS:
tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace']
else METHOD_DEFINITION_FALLTHROUGH_VARIABLE)
body.extend(tmpl.substitute(combined).split('\n'))
return body
base_call = BASE_CALL.substitute(combined)
if not declaration['inplace']:
base_call = 'auto ret = as_variable({})'.format(base_call)
env['base_impl_call'] = base_call + ';'
body.extend(METHOD_DEFINITION_DERIVATIVE.substitute(combined).split('\n'))
return body
def process_function(declaration):
if skip_function(declaration['name']):
return
if declaration.get('derivative') is None and declaration['mode'] == 'native':
# native functions without a derivative don't need Type implementations
return
env = {}
env['type_definition_body'] = emit_body(declaration)
combined = nested_dict(env, declaration)
if 'Type' in combined['method_of']:
type_declarations.append(METHOD_DECLARATION.substitute(combined))
if declaration['name'] not in MANUAL_IMPLEMENTATIONS:
type_definitions.append(METHOD_DEFINITION.substitute(combined))
for declaration in aten_declarations:
process_function(declaration)
def load_aten_declarations(path):
with open(path, 'r') as f:
declarations = yaml.load(f, Loader=Loader)
# enrich declarations with additional information
for declaration in declarations:
for arg in declaration['arguments']:
simple_type = arg['type']
simple_type = simple_type.replace(' &', '').replace('const ', '')
simple_type = simple_type.replace('Generator *', 'Generator')
arg['simple_type'] = simple_type
declaration['formals'] = [arg['type'] + ' ' + arg['name']
for arg in declaration['arguments']]
declaration['args'] = [arg['name'] for arg in declaration['arguments']]
declaration['api_name'] = declaration['name']
declaration['return_type'] = format_return_type(declaration['returns'])
declaration['base_name'] = declaration['name']
# if the return value is missing a name, call it 'output'
for ret in declaration['returns']:
if 'name' not in ret:
assert len(declaration['returns']) == 1
ret['name'] = 'result'
# Compute the Python function prototype for argument parsing
typed_args = []
positional = True
for arg in declaration['arguments']:
if arg.get('kwarg_only', False) and positional:
typed_args.append('*')
positional = False
typename = arg['simple_type']
if arg.get('size') is not None:
typename = '{}[{}]'.format(typename, arg['size'])
param = typename + ' ' + arg['name']
if arg.get('default') is not None:
default = arg['default']
if default == 'nullptr' or default == '{}':
default = 'None'
param += '=' + str(default)
typed_args.append(param)
# Python function prototype
declaration['typed_args'] = typed_args
declaration['prototype'] = FUNCTION_PROTOTYPE.substitute(declaration)
return declarations
def load_deprecated_signatures(declarations_by_signature):
with open(deprecated_path, 'r') as f:
deprecated_defs = yaml.load(f, Loader=Loader)
declarations = []
def get_signature(name, params, call_args):
# create a mapping of parameter name to parameter type
types = dict([param.split(' ')[::-1] for param in params])
# if the name in the call is not in the parameter list, assume it's
# a literal Scalar
rearranged_types = [types.get(arg, 'Scalar') for arg in call_args]
return '{}({})'.format(name, ', '.join(rearranged_types))
for deprecated in deprecated_defs:
prototype = deprecated['name']
call_args = split_name_params(deprecated['aten'])[1]
name, params = split_name_params(prototype)
signature = get_signature(name, params, call_args)
for declaration in declarations_by_signature[signature]:
declaration = copy.deepcopy(declaration)
declaration['deprecated'] = True
declaration['call_args'] = call_args
if declaration['inplace']:
declaration['prototype'] = prototype.replace(name, name + '_')
else:
declaration['prototype'] = prototype
args_by_name = {arg['name']: arg for arg in declaration['arguments']}
declaration['arguments'] = []
for arg in params:
_, arg_name = arg.split(' ')
declaration['arguments'].append(args_by_name[arg_name])
declarations.append(declaration)
return declarations
def gen_variable_type(declarations, out):
aten_decls = load_aten_declarations(declarations)
def group_declarations_by_signature():
d = defaultdict(list)
for declaration in aten_decls:
name = declaration['name']
base_name = name[:-1] if declaration['inplace'] else name
simple_types = [arg['simple_type'] for arg in declaration['arguments']]
signature = '{}({})'.format(base_name, ', '.join(simple_types))
d[signature].append(declaration)
return d
declarations_by_signature = group_declarations_by_signature()
th_autograd_funcs = load_derivatives(derivatives_path, declarations_by_signature)
nn_autograd_funcs = preprocess_nn_functions(aten_decls)
all_autograd_functions = th_autograd_funcs + nn_autograd_funcs
ensure_unique_names(all_autograd_functions)
def should_generate_python_binding(declaration):
name = declaration['name']
# don't bind (non-native) unimplemented functions to prevent errors in test_autograd.
# Native functions, even if they don't have derivatives specified, should be bound
# so they can be called from python (their derivatives are defined based on the functions
# they call).
if not is_implemented(declaration) and declaration['mode'] != 'native':
return False
# don't bind size or stride since the python signatures are different
if name in ['size', 'stride']:
return False
if name.endswith('_backward'):
return False
# we don't currently support functions which are only defined on Type
# such as zeros(), randn(), etc.
method_of = declaration['method_of']
if 'Tensor' not in method_of and 'namespace' not in method_of:
return False
return True
py_variable_methods = defaultdict(list)
py_nn_functions = defaultdict(list)
for declaration in aten_decls:
name = declaration['name']
if not should_generate_python_binding(declaration):
continue
if declaration['mode'] == 'NN':
py_nn_functions[name].append(declaration)
else:
py_variable_methods[name].append(declaration)
for declaration in load_deprecated_signatures(declarations_by_signature):
py_variable_methods[declaration['name']].append(declaration)
env = {
'autograd_function_declarations': [],
'autograd_function_definitions': [],
'type_derived_method_declarations': [],
'type_derived_method_definitions': [],
'py_methods': [],
'py_method_defs': [],
'py_method_dispatch': [],
'py_function_initializers': [],
'py_nn_functions': [],
'py_nn_function_defs': [],
'py_nn_function_dispatch': [],
}
create_autograd_functions(env, all_autograd_functions)
create_variable_type(env, aten_decls)
from .gen_python_functions import create_python_bindings
create_python_bindings(
py_variable_methods,
env['py_methods'],
env['py_method_defs'],
env['py_method_dispatch'],
is_class=True)
create_python_bindings(
py_nn_functions,
env['py_nn_functions'],
env['py_nn_function_defs'],
env['py_nn_function_dispatch'],
is_class=False)
write(out, 'VariableType.h', VARIABLE_TYPE_H, env)
write(out, 'VariableType.cpp', VARIABLE_TYPE_CPP, env)
write(out, 'Functions.h', FUNCTIONS_H, env)
write(out, 'Functions.cpp', FUNCTIONS_CPP, env)
write(out, 'python_variable_methods.cpp', PY_VARIABLE_METHODS_CPP, env)
write(out, 'python_variable_methods_dispatch.h', PY_VARIABLE_DISPATCH_H, env)
write(out, 'python_nn_functions.cpp', PY_NN_FUNCTIONS_CPP, env)
write(out, 'python_nn_functions.h', PY_NN_FUNCTIONS_H, env)
write(out, 'python_nn_functions_dispatch.h', PY_NN_DISPATCH_H, env)
write(out, 'python_functions.h', PY_FUNCTIONS_H, env)
write(out, 'python_functions.cpp', PY_FUNCTIONS_CPP, env)
def main():
parser = argparse.ArgumentParser(
description='Generate autograd C++ files script')
parser.add_argument('declarations', metavar='DECL',
help='path to Declarations.yaml')
parser.add_argument('out', metavar='OUT',
help='path to output directory')
args = parser.parse_args()
gen_variable_type(args.declarations, args.out)
if __name__ == '__main__':
main()

View File

@ -1,19 +0,0 @@
# TODO: refactor nested_dict into common library with ATen
class nested_dict(object):
"""
A nested dict is a dictionary with a parent. If key lookup fails,
it recursively continues into the parent. Writes always happen to
the top level dict.
"""
def __init__(self, base, parent):
self.base, self.parent = base, parent
def __contains__(self, item):
return item in self.base or item in self.parent
def __getitem__(self, x):
r = self.base.get(x)
if r is not None:
return r
return self.parent[x]

View File

@ -1,414 +0,0 @@
#include "Functions.h"
#include <ATen/WrapDimUtils.h>
#include <math.h>
// ${generated_comment}
using at::Tensor;
using at::Scalar;
using at::IntList;
using at::TensorList;
namespace torch { namespace autograd { namespace generated {
namespace {
Tensor not_implemented(const char* name) {
throw std::runtime_error(
std::string("the derivative for '") + name + "' is not implemented");
}
Tensor maybe_multiply(const Tensor & t, const Scalar & s) {
bool is_one = false;
if (s.isFloatingPoint()) {
is_one = s.toDouble() == 1;
} else if(s.isIntegral()) {
is_one = s.toLong() == 1;
}
if (is_one) {
return t;
} else {
return t * s;
}
}
// Don't expose ATen scalars to Variable API, because they are not supported yet.
void ensure_no_aten_scalars(variable_list &vars) {
for (auto& v : vars) {
if (v.defined() && v.dim() == 0) {
v.data().as_strided_({1}, {1});
}
}
}
Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_, const Tensor & norm) {
double p = p_.toDouble();
Tensor self_scaled;
Tensor scale_v;
if (p == 0.0) {
return zeros_like(self);
} else if (p == 1.0) {
return self.sign() * grad;
} else if (p < 2.0) {
self_scaled = self.sign() * self.abs().pow(p - 1);
scale_v = grad / norm.pow(p - 1);
} else if (p == 2.0) {
self_scaled = self;
scale_v = grad / norm;
} else {
self_scaled = self * self.abs().pow(p - 2);
scale_v = grad / norm.pow(p - 1);
}
// handle case at 0 where we return a subgradient containing 0
scale_v.masked_fill_(norm == 0, 0);
return self_scaled * scale_v;
}
Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, Tensor norm, int64_t dim, bool keepdim) {
if (!keepdim && self.dim() > 1) {
grad = grad.unsqueeze(dim);
norm = norm.unsqueeze(dim);
}
return norm_backward(grad, self, p_, norm);
}
Tensor reduce_to(const Tensor & grad, IntList sizes) {
if (sizes.size() == 0) {
return grad.sum().toTensor();
}
Tensor result = grad;
while (result.dim() > (int64_t)sizes.size()) {
result = result.sum(0, false);
}
for (int64_t i = 0; i < result.dim(); ++i) {
if (sizes[i] == 1 && result.sizes()[i] > 1) {
result = result.sum(i, true);
}
}
return result;
}
Tensor permute_backwards(const Tensor & grad, IntList fwd_dims) {
// invert the permutation
std::vector<int64_t> dims(fwd_dims.size());
for (size_t i = 0; i < fwd_dims.size(); i++) {
dims[fwd_dims[i]] = i;
}
return grad.permute(dims);
}
Tensor sum_backward(const Tensor & grad, IntList sizes, int64_t dim, bool keepdim) {
if (!keepdim && sizes.size() > 1) {
return grad.unsqueeze(dim).expand(sizes);
} else {
return grad.expand(sizes);
}
}
Tensor cumsum_backward(const Tensor & x, int64_t dim) {
auto ret = at::cumsum(-x, dim);
auto ret_sum = ret.narrow(dim, ret.size(dim) - 1, 1).clone();
ret -= ret_sum.expand(ret.sizes());
ret += x;
return ret;
}
Tensor unsqueeze_to(const Tensor & self, IntList sizes) {
auto result = self;
int64_t nDims = sizes.size();
for (int64_t dim = 0; dim < nDims; dim++) {
if (sizes[dim] == 1) {
result = result.unsqueeze(dim);
}
}
return result;
}
Tensor maybe_unsqueeze(const Tensor & self, int64_t dim, bool unsqueeze) {
if (unsqueeze) {
return self.unsqueeze(dim);
}
return self;
}
variable_list cat_tensors_backward(const Tensor & grad, const std::vector<int64_t> &sizes, int64_t dim) {
variable_list grad_inputs(sizes.size());
int64_t accumulate = 0;
for (size_t i = 0; i < sizes.size(); ++i) {
auto size = sizes[i];
accumulate += size;
grad_inputs[i] = grad.narrow(dim, accumulate - size, size);
}
return grad_inputs;
}
Tensor mm_mat1_backward(const Tensor & grad, const Tensor & mat2, IntList sizes, IntList strides, const Scalar & alpha) {
// if input was column-major, return grad as column-order for efficiency
if (strides[0] == 1 && strides[1] == sizes[0]) {
return maybe_multiply(mat2.mm(grad.t()).t(), alpha);
} else {
return maybe_multiply(grad.mm(mat2.t()), alpha);
}
}
Tensor mm_mat2_backward(const Tensor & grad, const Tensor & mat1, IntList sizes, IntList strides, const Scalar & alpha) {
// if input was column-major, return grad as column-order for efficiency
if (strides[0] == 1 && strides[1] == sizes[0]) {
return maybe_multiply(grad.t().mm(mat1).t(), alpha);
} else {
return maybe_multiply(mat1.t().mm(grad), alpha);
}
}
Tensor select_backward_scalar(Tensor grad, const Tensor & input, const Tensor & value) {
if (grad.dim() == 1) {
// TODO: remove this once zero-dim tensor work properly in PyTorch
grad = grad.view({});
}
auto grad_input = zeros_like(input);
grad_input.masked_fill_(input == value, Scalar(grad));
return grad_input;
}
Tensor select_backward(Tensor grad, int64_t dim, Tensor indices, IntList sizes, bool keepdim) {
if (!keepdim && sizes.size() > 1) {
grad = grad.unsqueeze(dim);
indices = indices.unsqueeze(dim);
}
return grad.type().zeros(sizes).scatter_(dim, indices, grad);
}
Tensor trace_backward(const Tensor & grad, IntList sizes) {
if (sizes.size() != 2) {
throw std::runtime_error("expected matrix input");
}
// TODO: simplify once toScalarType is virtual
auto& long_type = *VariableImpl::getType(
Variable(grad).data().type().toScalarType(at::kLong));
auto grad_input = grad.type().zeros(sizes[0] * sizes[1]);
auto indices = long_type.arange(0, grad_input.numel(), sizes[1] + 1);
grad_input.index_fill_(0, indices, Scalar(grad.view({})));
return grad_input.view(sizes);
}
Tensor unfold_backward(const Tensor & grad, IntList input_sizes, int64_t dim, int64_t size, int64_t step) {
// TODO: simplify once toScalarType is virtual
auto& long_type = *VariableImpl::getType(
Variable(grad).data().type().toScalarType(at::kLong));
int64_t numel = 1;
for (auto size : input_sizes) {
numel *= size;
}
auto idx = long_type.arange(0, numel).view(input_sizes);
auto idx_unfolded = idx.unfold(dim, size, step).contiguous().view(-1);
auto grad_input = grad.type().zeros({numel});
grad_input.index_add_(0, idx_unfolded, grad.contiguous().view(-1));
return grad_input.view(input_sizes);
}
Tensor masked_scatter_backward(const Tensor & grad, const Tensor & mask, IntList sizes) {
int64_t numel = 1;
for (auto size : sizes) {
numel *= size;
}
auto mask_selected = grad.masked_select(mask);
auto diff_nelem = numel - mask_selected.numel();
if (diff_nelem > 0) {
// because mask_selected returns a 1-d tensor with size of masked elements that are 1,
// we need to fill out the rest with zeros then reshape back to tensor2's size.
auto zeros_fillin = grad.type().zeros({diff_nelem});
mask_selected = at::cat({mask_selected, zeros_fillin}, 0);
}
return mask_selected.view(sizes);
}
Tensor potrf_backward(Tensor grad, bool upper, Tensor L) {
// cf. Iain Murray (2016); arXiv 1602.07527
if (upper) {
L = L.t();
grad = grad.t();
}
auto phi = [](const Tensor & A) -> Tensor {
auto B = A.tril();
B = B - 0.5 * at::diag(at::diag(B));
return B;
};
// make sure not to double-count variation, since
// only half of output matrix is unique
auto Lbar = grad.tril();
auto P = phi(at::mm(L.t(), Lbar));
Tensor S;
std::tie(S, std::ignore) = at::gesv(P + P.t(), L.t());
std::tie(S, std::ignore) = at::gesv(S.t(), L.t());
S = phi(S);
return S;
}
Tensor split_backward(const std::vector<torch::autograd::Variable> &grads, int64_t split_size, int64_t dim, IntList sizes, const Type &type) {
dim = at::maybe_wrap_dim(dim, sizes.size());
int64_t dim_size = sizes[dim];
int64_t num_splits = (dim_size + split_size - 1) / split_size;
// it's possible some of the grads are not defined (represents tensors of all 0s).
// Since at::cat can't handle those, let's define them
std::vector<Tensor> grads_all_defined(grads.size());
for (size_t j = 0; j < grads.size(); ++j) {
if (grads[j].defined()) {
grads_all_defined[ j ] = grads[ j ];
} else {
auto length = (int64_t)j < (num_splits - 1) ? split_size : split_size - (split_size * num_splits - dim_size);
std::vector<int64_t> grad_size(sizes);
grad_size[ dim ] = length;
grads_all_defined[ j ] = type.zeros(grad_size);
}
}
auto ret = at::cat(grads_all_defined, dim);
return ret;
}
Tensor glu_double_backward(const Tensor & grad, const Tensor & grad_output, const Tensor & input, int64_t dim) {
auto& gO = grad_output;
auto input_size = input.size(dim) / 2;
auto first_half = input.narrow(dim, 0, input_size);
auto second_half = input.narrow(dim, input_size, input_size);
auto sig_second_half = second_half.sigmoid();
auto one_sub_sig_second_half = 1 - sig_second_half;
auto sig_one_sub_sig = sig_second_half * one_sub_sig_second_half;
auto ggI_first_half = grad.narrow(dim, 0, input_size);
auto ggI_second_half = grad.narrow(dim, input_size, input_size);
auto ggI_second_half_times_first_half = ggI_second_half * first_half;
auto gI_first_half = ggI_second_half * gO * sig_one_sub_sig;
auto second_order_sh = sig_one_sub_sig * one_sub_sig_second_half - sig_second_half * sig_one_sub_sig;
auto gI_second_half = ggI_second_half_times_first_half * gO * second_order_sh + ggI_first_half * gO * sig_one_sub_sig;
return at::cat({gI_first_half, gI_second_half}, dim);
}
Tensor glu_double_backward_grad_output(const Tensor & grad, const Tensor & input, int64_t dim) {
if (dim < 0) dim += input.dim();
std::vector<int64_t> sizes = input.sizes();
sizes[dim] /= 2;
auto tmp = grad * glu_backward(input.type().ones(sizes), input, dim);
return tmp.narrow(dim, 0, sizes[dim]) + tmp.narrow(dim, sizes[dim], sizes[dim]);
}
Tensor kl_div_double_backward_grad_output(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
auto result = kl_div_backward(grad, input, target, size_average, false);
if (reduce && size_average) {
return result.mean().toTensor();
} else if (reduce) {
return result.sum().toTensor();
}
return result;
}
Tensor log_sigmoid_double_backward(const Tensor & grad, const Tensor & input) {
auto z = input.sigmoid();
return grad * (z - 1) * z;
}
Tensor softmax_double_backward(const Tensor & grad, const Tensor & grad_output, int dim, const Tensor & output) {
auto gO = grad_output;
auto ggI = grad;
auto ggI_output = ggI * output;
auto ggI_out_sum = ggI_output.sum(dim, true);
auto ggI_out_sum_output = ggI_out_sum * output;
auto gO_out_sum = (gO * output).sum(dim, true);
// gI calculation
auto gI_t0 = ggI_output * (gO - gO_out_sum);
auto gI_t1 = output * ((ggI_output * gO).sum(dim, true).sub_(gO_out_sum * ggI_out_sum));
auto gI_t2 = ggI_out_sum_output * gO;
auto gI_t3 = ggI_out_sum_output * gO_out_sum;
return gI_t0 - gI_t1 - gI_t2 + gI_t3;
}
Tensor log_softmax_double_backward(const Tensor & grad, const Tensor & grad_output, int dim, const Tensor & output) {
auto z = output.exp();
return z * grad_output.sum(dim, true) * ((grad * z).sum(dim, true) - grad);
}
Tensor l1_loss_double_backward_grad_output(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
auto output = l1_loss_backward(grad, input, target, size_average, false);
if (reduce and size_average) {
return output.mean().toTensor();
} else if (reduce) {
return output.sum().toTensor();
}
return output;
}
Tensor smooth_l1_loss_double_backward(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
auto d = (input - target).abs();
auto grad_input = grad * (d < 1).toType(grad.type());
if (size_average && reduce) {
grad_input /= input.numel();
}
return grad_input;
}
Tensor smooth_l1_loss_double_backward_grad_output(const Tensor & grad, const Tensor & grad_output, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
if (!reduce) {
return smooth_l1_loss_backward(grad, input, target, size_average, reduce);
}
auto r = smooth_l1_loss_backward(ones_like(grad_output), input, target, size_average, true);
return (r * grad).sum().toTensor().view({1});
}
Tensor max_pool2d_double_backward(const Tensor & grad, const Tensor & indices) {
// fold the first two dims together and the last two together
auto fold = [](const Tensor & t) -> Tensor {
auto sizes = t.sizes();
return t.contiguous().view({sizes[0] * sizes[1], sizes[2] * sizes[3]});
};
return fold(grad).gather(1, fold(indices)).view(indices.sizes());
}
Tensor mse_loss_double_backward(const Tensor & grad, const Tensor & input, bool size_average, bool reduce) {
auto grad_input = 2 * grad;
if (size_average && reduce) {
grad_input /= input.numel();
}
return grad_input;
}
Tensor mse_loss_double_backward_grad_output(const Tensor & grad, const Tensor & grad_output, const Tensor & input, const Tensor & target, bool size_average, bool reduce) {
if (!reduce) {
return mse_loss_backward(grad, input, target, size_average, reduce);
}
auto r = mse_loss_backward(ones_like(grad_output), input, target, size_average, true);
return (r * grad).sum().toTensor().view({1});
}
Tensor soft_margin_loss_double_backward(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average) {
auto z = (input * -target).exp();
auto zplus1 = z + 1;
auto grad_input = grad * (target * target) * z / (zplus1 * zplus1);
if (size_average) {
grad_input /= input.numel();
}
return grad_input;
}
Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scalar beta, Scalar threshold) {
auto x = (input * beta);
return _sigmoid_backward(grad, x.sigmoid()) * (x < threshold).toType(grad.type()) * beta;
}
}
${autograd_function_definitions}
}}} // namespace torch::autograd::generated

View File

@ -1,34 +0,0 @@
#pragma once
// ${generated_comment}
#include <ATen/ATen.h>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/autograd/saved_variable.h"
namespace torch { namespace autograd { namespace generated {
using at::Scalar;
using at::Tensor;
using at::IntList;
using at::Type;
struct TypeAndSize {
TypeAndSize() : type(nullptr) {}
/* implicit */
TypeAndSize(const Tensor & t)
: sizes(t.sizes())
, type(&t.type()) {}
Tensor zeros() { return type->zeros(sizes); }
private:
std::vector<int64_t> sizes;
Type* type;
};
${autograd_function_declarations}
}}} // namespace torch::autograd::generated

View File

@ -1,354 +0,0 @@
#include "VariableType.h"
// ${generated_comment}
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/saved_variable.h"
#include "torch/csrc/autograd/generated/Functions.h"
#include "torch/csrc/autograd/functions/tensor.h"
#include "torch/csrc/autograd/functions/basic_ops.h"
#include "torch/csrc/jit/tracer.h"
#include <initializer_list>
#include <iostream>
#include <functional>
#ifdef _MSC_VER
#ifdef Type
#undef Type
#endif
#endif
using namespace at;
using namespace torch::autograd::generated;
namespace torch { namespace autograd {
// Helper methods for working with Attributes (torch/csrc/jit/attributes.h)
// The overloaded accessors are convenient for the generated code (since we
// don't want to make the codegen do the dispatch manually)
static void setattr(jit::Node* n, jit::Symbol name, int64_t v) { n->i_(name, v); }
static void setattr(jit::Node* n, jit::Symbol name, const at::Scalar& v) { n->t_(name, v.toTensor()); }
static void setattr(jit::Node* n, jit::Symbol name, const at::IntList& v) { n->is_(name, v); }
static void setattr(jit::Node* n, jit::Symbol name, bool v) { n->i_(name, v); }
static void setattr(jit::Node* n, jit::Symbol name, double v) { n->f_(name, v); }
template<unsigned long N>
static void setattr(jit::Node* n, jit::Symbol name, std::array<bool, N> v) { n->is_(name, std::vector<int64_t>(v.begin(), v.end())); }
VariableType::VariableType(Context* context, Type* baseType)
: Type(context)
, baseType(baseType) {
str = std::string("Variable[") + baseType->toString() + "]";
}
ScalarType VariableType::scalarType() const {
return baseType->scalarType();
}
Backend VariableType::backend() const {
return baseType->backend();
}
bool VariableType::isCuda() const { return baseType->isCuda(); }
bool VariableType::isSparse() const { return baseType->isSparse(); }
bool VariableType::isDistributed() const { return baseType->isDistributed(); }
std::unique_ptr<Storage> VariableType::storage() const {
return baseType->storage();
}
std::unique_ptr<Storage> VariableType::storage(size_t size) const {
return baseType->storage(size);
}
std::unique_ptr<Storage> VariableType::storageFromBlob(void * data, int64_t size, const std::function<void(void*)> & deleter) const {
return baseType->storageFromBlob(data, size, deleter);
}
Tensor VariableType::unsafeTensorFromTH(void * th_pointer, bool retain) const {
return baseType->unsafeTensorFromTH(th_pointer, retain);
}
std::unique_ptr<Generator> VariableType::generator() const {
return baseType->generator();
}
const char * VariableType::toString() const {
return str.c_str();
}
size_t VariableType::elementSizeInBytes() const {
return baseType->elementSizeInBytes();
}
TypeID VariableType::ID() const {
throw std::runtime_error("VariableType::ID() not implemented");
}
const char * VariableType::typeString() {
return "VariableType";
}
Variable & VariableType::checked_cast(const Type & type, const Tensor & t, const char * name, int pos) {
if(!t.defined()) {
runtime_error("Expected a Tensor of type %s but found an undefined Tensor for argument #%d '%s'",
type.toString(), pos, name);
}
if (&t.type() != &type) {
runtime_error("Expected object of type %s but found type %s for argument #%d '%s'",
type.toString(), t.type().toString(), pos, name);
}
return static_cast<Variable&>(const_cast<Tensor&>(t));
}
Tensor & VariableType::unpack(const Tensor & t, const char * name, int pos) const {
return checked_cast(*this, t, name, pos).data();
}
Tensor & VariableType::unpack_long(const Tensor & t, const char * name, int pos) const {
auto& type = *VariableImpl::getType(baseType->toScalarType(kLong));
return checked_cast(type, t, name, pos).data();
}
Tensor & VariableType::unpack_byte(const Tensor & t, const char * name, int pos) const {
auto& type = *VariableImpl::getType(baseType->toScalarType(kByte));
return checked_cast(type, t, name, pos).data();
}
Tensor & VariableType::unpack_any(const Tensor & t, const char * name, int pos) const {
if (!t.defined()) {
runtime_error("Expected a Tensor of type Variable but found an undefined Tensor for argument #%d '%s'",
pos, name);
}
auto scalarType = t.type().scalarType();
auto& type = *VariableImpl::getType(baseType->toScalarType(scalarType));
return checked_cast(type, t, name, pos).data();
}
Tensor VariableType::unpack_opt(const Tensor & t, const char * name, int pos) const {
if(!t.defined()) {
return Tensor();
}
return unpack(t, name, pos);
}
std::vector<at::Tensor> VariableType::unpack(const at::TensorList &tl, const char *name, int pos) const {
std::vector<at::Tensor> ret(tl.size());
for (size_t i = 0; i < tl.size(); ++i) {
const auto &t = tl[i];
if (!t.defined()) {
runtime_error("Expected a Tensor of type %s but found an undefined Tensor at position #%d "
"for iterable argument #%d '%s'",
toString(), i, pos, name);
}
if (&t.type() == this) {
ret[i] = static_cast<VariableImpl*>(t.pImpl)->data;
} else {
runtime_error("Expected object of type %s but found type %s at position #%d "
"for iterable argument #%d '%s'",
toString(),t.type().toString(), i, pos, name);
}
}
return ret;
}
Variable VariableType::as_variable(Tensor tensor) const {
return make_variable(std::move(tensor));
}
std::tuple<Variable, Variable>
VariableType::as_variable(std::tuple<Tensor, Tensor> tensors) const {
return std::make_tuple<>(
make_variable(std::move(std::get<0>(tensors))),
make_variable(std::move(std::get<1>(tensors))));
}
std::tuple<Variable, Variable, Variable>
VariableType::as_variable(std::tuple<Tensor, Tensor, Tensor> tensors) const {
return std::make_tuple<>(
make_variable(std::move(std::get<0>(tensors))),
make_variable(std::move(std::get<1>(tensors))),
make_variable(std::move(std::get<2>(tensors))));
}
std::vector<Variable> VariableType::as_variable(TensorList tl) const {
std::vector<Variable> variables;
for (auto& t : tl) {
variables.emplace_back(make_variable(std::move(t)));
}
return variables;
}
Variable VariableType::as_variable(const Scalar & scalar) const {
auto tensor = scalar.toTensor();
if (&tensor.type() != baseType) {
tensor = tensor.toType(*baseType);
}
return make_variable(std::move(tensor));
}
struct VariableFlags {
bool requires_grad;
bool is_volatile;
};
template<typename T>
static VariableFlags compute_flags_tmpl(T tensors) {
VariableFlags flags = {false, false};
for (const Tensor& tensor : tensors) {
auto& var = static_cast<const Variable&>(tensor);
if (var.defined()) {
flags.requires_grad |= var.requires_grad();
flags.is_volatile |= var.is_volatile();
}
}
flags.requires_grad &= !flags.is_volatile;
return flags;
}
using TensorRef = std::reference_wrapper<const Tensor>;
using TensorRefList = std::initializer_list<TensorRef>;
// ArrayRef is not covariant, which means there is no
// implicit conversion between TensorList (aka ArrayRef<Tensor>)
// and ArrayRef<Variable>. What we do instead is manually
// construct a variable_list, which itself is implicitly convertible
// into an ArrayRef<Variable> (but don't return an ArrayRef<Variable>;
// ArrayRef is non-owning!)
static variable_list cast_tensor_list(const TensorList& tensors) {
// TODO: Eliminate the intermediate vector allocation
return variable_list(tensors.begin(), tensors.end());
}
static VariableFlags compute_flags(const TensorRefList& tensors) {
return compute_flags_tmpl(tensors);
}
static VariableFlags compute_flags(TensorList tensors) {
return compute_flags_tmpl(tensors);
}
static void check_no_requires_grad(const Tensor& tensor, const char* name) {
auto& var = static_cast<const Variable&>(tensor);
if (var.defined() && var.requires_grad()) {
std::string msg = "the derivative for '";
msg += name;
msg += "' is not implemented";
throw std::runtime_error(msg);
}
}
static function_list compute_next_functions(const std::initializer_list<Tensor>& tensors) {
return Function::flags(tensors).next_functions;
}
static function_list compute_next_functions(TensorList tensors) {
return Function::flags(tensors).next_functions;
}
static void check_inplace(const Tensor& tensor) {
auto& var = static_cast<const Variable&>(tensor);
if (var.requires_grad() && !var.grad_fn()) {
at::runtime_error(
"a leaf Variable that requires grad has been used in an in-place operation.");
}
auto live_refs = var.version_counter().live_refs();
if (live_refs > 1) {
at::runtime_error(
"in-place operations can be only used on variables that don't share "
"storage with any other variables, but detected that there are %d objects "
"sharing it", live_refs);
}
}
static void set_flags(Variable& var, VariableFlags flags, std::shared_ptr<Function> grad_fn) {
var.requires_grad() = flags.requires_grad;
var.is_volatile() = flags.is_volatile;
if (grad_fn) {
var.output_nr() = grad_fn->num_inputs++;
var.grad_fn() = std::move(grad_fn);
}
}
static void set_flags(std::vector<Variable> &vl, VariableFlags flags, std::shared_ptr<Function> grad_fn) {
for (auto& v : vl) {
set_flags(v, flags, grad_fn);
}
}
std::vector<Tensor> as_tensor_list(std::vector<Variable> &vars) {
std::vector<Tensor> tensors;
for (auto& v : vars) {
tensors.emplace_back(std::move(v));
}
return tensors;
}
static void increment_version(const Tensor & t) {
auto& var = static_cast<const Variable&>(t);
var.version_counter().increment();
}
static void take_version_counter(Tensor & dst, const Tensor & src) {
// replaces the version counter in dst with the one in src
// call when dst is a view of src
auto& src_var = static_cast<const Variable&>(src);
auto& dst_var = static_cast<Variable&>(dst);
dst_var.version_counter() = src_var.version_counter();
}
static bool isFloatingPoint(ScalarType s) {
return s == kFloat || s == kDouble || s == kHalf;
}
void VariableType::s_copy(const Tensor & src, Tensor & dst) const {
// TODO: once copy is exposed in Declarations.yaml we may be able to bind
// it automatically
auto& src_ = unpack_any(src, "src", 0);
auto& dst_ = unpack(dst, "dst", 1);
check_inplace(dst);
std::shared_ptr<Identity> grad_fn;
auto flags = compute_flags({ src });
flags.requires_grad &= isFloatingPoint(dst.type().scalarType());
if (flags.requires_grad) {
// TODO: handle type conversions
grad_fn = std::make_shared<Identity>();
grad_fn->is_executable = true;
grad_fn->next_functions = compute_next_functions({ src });
}
baseType->s_copy(src_, dst_);
increment_version(dst);
set_flags(static_cast<Variable&>(dst), flags, std::move(grad_fn));
}
Tensor & VariableType::m_resize_(Tensor & self, IntList size) const {
auto& self_ = unpack(self, "self", 0);
check_inplace(self);
auto& self_var = static_cast<Variable&>(self);
if (self_var.grad_fn()) {
at::runtime_error("cannot resize non-leaf variables");
}
if (self_var.requires_grad()) {
at::runtime_error("cannot resize variables which require grad");
}
baseType->m_resize_(self_, size);
return self;
}
Tensor & VariableType::m_resize_as_(Tensor & self, const Tensor & the_template) const {
return m_resize_(self, the_template.sizes());
}
Tensor VariableType::m_contiguous(const Tensor & self) const {
unpack(self, "self", 0);
if (self.is_contiguous()) {
return self;
}
return self.clone();
}
std::vector<int64_t> to_arg_sizes(TensorList tensors, int64_t dim) {
std::vector<int64_t> arg_sizes(tensors.size());
for (size_t i = 0; i < tensors.size(); ++i) {
arg_sizes[i] = tensors[i].size(dim);
}
return arg_sizes;
}
${type_derived_method_definitions}
}} // namespace torch::autograd

View File

@ -1,62 +0,0 @@
#pragma once
// ${generated_comment}
#include <ATen/ATen.h>
#include <string>
namespace torch { namespace autograd {
struct Variable;
using at::Context;
using at::Generator;
using at::IntList;
using at::Scalar;
using at::SparseTensor;
using at::Storage;
using at::Tensor;
using at::TensorList;
using at::Type;
struct VariableType : public at::Type {
VariableType(Context* context, at::Type* baseType);
virtual at::ScalarType scalarType() const override;
virtual at::Backend backend() const override;
virtual bool isCuda() const override;
virtual bool isSparse() const override;
virtual bool isDistributed() const override;
virtual std::unique_ptr<at::Storage> storage() const override;
virtual std::unique_ptr<at::Storage> storage(size_t size) const override;
virtual std::unique_ptr<at::Storage> storageFromBlob(void * data, int64_t size, const std::function<void(void*)> & deleter) const override;
virtual std::unique_ptr<at::Generator> generator() const override;
virtual const char * toString() const override;
virtual at::TypeID ID() const override;
virtual size_t elementSizeInBytes() const override;
static const char * typeString();
at::Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override;
virtual void s_copy(const Tensor & src, Tensor & dst) const override;
${type_derived_method_declarations}
private:
// checks that t is actually a Variable with the given expected_type
static Variable & checked_cast(const Type & expected_type, const Tensor & t, const char * name, int pos);
at::Tensor & unpack(const Tensor & t, const char * name, int pos) const;
at::Tensor & unpack_long(const Tensor & t, const char * name, int pos) const;
at::Tensor & unpack_byte(const Tensor & t, const char * name, int pos) const;
at::Tensor & unpack_any(const Tensor & t, const char * name, int pos) const;
at::Tensor unpack_opt(const Tensor & t, const char * name, int pos) const;
std::vector<at::Tensor> unpack(const at::TensorList &tl, const char *name, int pos) const;
Variable as_variable(const Scalar & scalar) const;
Variable as_variable(Tensor tensor) const;
std::tuple<Variable, Variable> as_variable(std::tuple<Tensor, Tensor> tensor) const;
std::tuple<Variable, Variable, Variable> as_variable(std::tuple<Tensor, Tensor, Tensor> tensor) const;
std::vector<Variable> as_variable(TensorList tensor) const;
private:
at::Type* baseType;
std::string str;
};
}} // namespace torch::autograd

View File

@ -1,26 +0,0 @@
#include "python_functions.h"
// ${generated_comment}
#include <Python.h>
#include <ATen/ATen.h>
#include "Functions.h"
#include "torch/csrc/autograd/python_cpp_function.h"
namespace torch { namespace autograd { namespace generated {
template<typename C>
static void addClass(PyTypeObject& type, const char* name,
PyGetSetDef* function_properties=NULL, PyMethodDef* function_methods=NULL)
{
_initFunctionPyTypeObject(type, name, function_properties, function_methods);
Py_INCREF(&type);
registerCppFunction(typeid(C), &type);
}
void initialize_autogenerated_functions() {
${py_function_initializers}
}
}}} // namespace torch::autograd::generated

View File

@ -1,11 +0,0 @@
#pragma once
// ${generated_comment}
// Python bindings for automatically generated autograd functions
namespace torch { namespace autograd { namespace generated {
void initialize_autogenerated_functions();
}}} // namespace torch::autograd::generated

View File

@ -1,48 +0,0 @@
#include "python_nn_functions.h"
// ${generated_comment}
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "python_nn_functions_dispatch.h"
using at::Tensor;
using at::Scalar;
using namespace torch::autograd::utils;
namespace torch { namespace autograd {
${py_nn_functions}
static PyMethodDef nn_functions[] = {
${py_nn_function_defs}
{NULL}
};
void initNNFunctions(PyObject* module) {
#if PY_MAJOR_VERSION == 2
PyObject* nn = Py_InitModule("torch._C._nn", nn_functions);
Py_XINCREF(nn); // Py_InitModule returns "borrowed" reference
#else
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
"torch._C._nn",
NULL,
-1,
nn_functions
};
PyObject* nn = PyModule_Create(&def);
#endif
if (!nn) {
throw python_error();
}
// steals a reference to nn
if (PyModule_AddObject(module, "_nn", nn) != 0) {
throw python_error();
}
}
}} // namespace torch::autograd

View File

@ -1,7 +0,0 @@
#include <Python.h>
namespace torch { namespace autograd {
void initNNFunctions(PyObject* module);
}} // namespace torch::autograd

View File

@ -1,19 +0,0 @@
#pragma once
// ${generated_comment}
#include <ATen/ATen.h>
#include "torch/csrc/utils/auto_gil.h"
#include "torch/csrc/utils/auto_gpu.h"
// Contains inline wrappers around ATen functions which release the GIL and
// switch to the correct CUDA device.
namespace torch { namespace autograd {
using namespace at;
using at::Generator;
${py_nn_function_dispatch}
}} // namespace torch::autograd

View File

@ -1,38 +0,0 @@
// ${generated_comment}
#include <Python.h>
#include "torch/csrc/Exceptions.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/utils/wrap_outputs.h"
#include "torch/csrc/utils/python_arg_parser.h"
#include "torch/csrc/utils/python_numbers.h"
#include "python_variable_methods_dispatch.h"
using at::Tensor;
using at::Scalar;
using namespace torch::autograd::utils;
namespace torch { namespace autograd {
${py_methods}
PyMethodDef variable_methods[] = {
{"__add__", (PyCFunction)THPVariable_add, METH_VARARGS | METH_KEYWORDS, NULL},
{"__radd__", (PyCFunction)THPVariable_add, METH_VARARGS | METH_KEYWORDS, NULL},
{"__iadd__", (PyCFunction)THPVariable_add_, METH_VARARGS | METH_KEYWORDS, NULL},
{"__rmul__", (PyCFunction)THPVariable_mul, METH_VARARGS | METH_KEYWORDS, NULL},
{"__mul__", (PyCFunction)THPVariable_mul, METH_VARARGS | METH_KEYWORDS, NULL},
{"__imul__", (PyCFunction)THPVariable_mul_, METH_VARARGS | METH_KEYWORDS, NULL},
{"__sub__", (PyCFunction)THPVariable_sub, METH_VARARGS | METH_KEYWORDS, NULL},
{"__isub__", (PyCFunction)THPVariable_sub_, METH_VARARGS | METH_KEYWORDS, NULL},
{"__div__", (PyCFunction)THPVariable_div, METH_VARARGS | METH_KEYWORDS, NULL},
{"__truediv__", (PyCFunction)THPVariable_div, METH_VARARGS | METH_KEYWORDS, NULL},
{"__idiv__", (PyCFunction)THPVariable_div_, METH_VARARGS | METH_KEYWORDS, NULL},
{"__mod__", (PyCFunction)THPVariable_remainder, METH_VARARGS | METH_KEYWORDS, NULL},
${py_method_defs}
{NULL}
};
}} // namespace torch::autograd

View File

@ -1,24 +0,0 @@
#pragma once
// ${generated_comment}
#include <ATen/ATen.h>
#include "torch/csrc/utils/auto_gil.h"
#include "torch/csrc/utils/auto_gpu.h"
// Contains inline wrappers around ATen functions which release the GIL and
// switch to the correct CUDA device.
namespace torch { namespace autograd {
using at::Tensor;
using at::Scalar;
using at::TensorList;
using at::IntList;
using at::Generator;
using at::SparseTensor;
using at::Storage;
${py_method_dispatch}
}} // namespace torch::autograd

View File

@ -9,14 +9,10 @@ class BeforeAfterCall(CWrapPlugin):
def insert_snippet(self, template, option, offset, name):
prepend_str = option.get(name)
if isinstance(prepend_str, dict):
backend = option['backends'][0]
prepend_str = prepend_str.get(backend, None)
if prepend_str is None:
return
if '$' in prepend_str:
before_call_template = Template(prepend_str)
before_call_template = Template(option[name])
args = {'arg' + str(i): self.cwrap.get_arg_accessor(arg, option) for i, arg
in enumerate(option['arguments'])}
prepend_str = before_call_template.substitute(args)

View File

@ -3,7 +3,7 @@ from string import Template
# Arguments to the Broadcast Plugin:
# broadcast: args_to_broadcast_against [inplace] [fallback]
# [args_to_broadcast_against]: either a single argument (e.g. "arg1") or a comma-separated
# [args_to_broadcast_against]: either a single argument (e.g. "arg1") or a comma-seperated
# list of two arguments (e.g. "tensor1,tensor2") indicating
# arguments to broadcast specified argument (usually "self") against
# [inplace] will generate code for in-place function, which doesn't allow the in-place
@ -51,37 +51,22 @@ class Broadcast(CWrapPlugin):
def getPreArgStringTemplate(self, type=None):
if type is None:
ret = """THTensor *${arg_op_other}_save = ${arg_op_other};
THTensorPtr ${arg_op_other}_guard(nullptr);\n"""
THTensorPtr ${arg_op_other}_guard(THTensor_(new)(LIBRARY_STATE_NOARGS));\n"""
else:
cpu_t = "TH" + type + "Tensor"
gpu_t = "THCuda" + type + "Tensor"
ret = ("#if !IS_CUDA\n" +
cpu_t + " *${arg_op_other}_save = ${arg_op_other};\n" +
cpu_t + "Ptr ${arg_op_other}_guard(nullptr);\n" +
cpu_t + "Ptr ${arg_op_other}_guard(" + cpu_t + "_new(LIBRARY_STATE_NOARGS));\n" +
"#else\n" +
gpu_t + " *${arg_op_other}_save = ${arg_op_other};\n" +
"THPPointer<" + gpu_t + "> ${arg_op_other}_guard(nullptr);\n" +
"THPPointer<" + gpu_t + "> ${arg_op_other}_guard(\n" + gpu_t + "_new(LIBRARY_STATE_NOARGS));\n" +
"#endif\n")
return Template(ret)
def getNewForExpand(self, type):
if type is None:
ret = """THTensor_(new)(LIBRARY_STATE_NOARGS);\n"""
else:
cpu_t = "TH" + type + "Tensor"
gpu_t = "THCuda" + type + "Tensor"
ret = ("#if !IS_CUDA\n" +
cpu_t + "_new(LIBRARY_STATE_NOARGS);\n" +
"#else\n" +
gpu_t + "_new(LIBRARY_STATE_NOARGS);\n" +
"#endif\n")
return ret
def getExpandTemplate(self, same_size_check, expand_call, success_code, raise_errors):
def getExpandTemplate(self, expand_call, success_code, raise_errors):
if not raise_errors:
return Template(
"bool try_expand = !" + same_size_check + "\n" +
"if (try_expand) {\n" +
"bool expand_success = false;\n" +
"try {\n" +
expand_call +
@ -90,44 +75,29 @@ class Broadcast(CWrapPlugin):
"catch (std::exception &e) {}\n" +
"if(expand_success) {\n" +
success_code +
"\n}" +
"\n}\n")
else:
return Template(
"bool try_expand = !" + same_size_check + "\n" +
"if (try_expand) {\n" +
expand_call + "\n" +
success_code + "\n"
"}\n")
success_code + "\n")
def getOutPlacePreExpand2Template(self, type_op_a, type_op_other, raise_errors):
size_check = """THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other}->size, ${arg_op_other}->nDimension);"""
expand_code = ("${arg_op_a}_guard = \n" + self.getNewForExpand(type_op_a) + "\n" +
"${arg_op_other}_guard = \n" + self.getNewForExpand(type_op_other) + "\n" +
"""expand_outplace2(LIBRARY_STATE ${arg_op_a}_guard.get(), ${arg_op_other}_guard.get(),
${arg_op_a}, ${arg_op_other},
\"${op_a}\", \"${op_other}\", !${raise_errors});""")
def getOutPlacePreExpand2Template(self, raise_errors):
expand_code = """expand_outplace2(LIBRARY_STATE ${arg_op_a}_guard.get(), ${arg_op_other}_guard.get(),
${arg_op_a}, ${arg_op_other},
\"${op_a}\", \"${op_other}\", !${raise_errors});"""
success_code = """${arg_op_a} = ${arg_op_a}_guard.get();
${arg_op_other} = ${arg_op_other}_guard.get();"""
return self.getExpandTemplate(size_check, expand_code, success_code, raise_errors)
return self.getExpandTemplate(expand_code, success_code, raise_errors)
def getOutPlacePreExpand3Template(self, type_op_a, type_op_other1, type_op_other2, raise_errors):
size_check = """(THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other1}->size, ${arg_op_other1}->nDimension) &&
THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other2}->size, ${arg_op_other2}->nDimension));"""
expand_code = ("${arg_op_a}_guard = \n" + self.getNewForExpand(type_op_a) + "\n" +
"${arg_op_other1}_guard = \n" + self.getNewForExpand(type_op_other1) + "\n" +
"${arg_op_other2}_guard = \n" + self.getNewForExpand(type_op_other2) + "\n" +
"""expand_outplace3(LIBRARY_STATE ${arg_op_a}_guard.get(),
def getOutPlacePreExpand3Template(self, raise_errors):
expand_code = """expand_outplace3(LIBRARY_STATE ${arg_op_a}_guard.get(),
${arg_op_other1}_guard.get(), ${arg_op_other2}_guard.get(),
${arg_op_a}, ${arg_op_other1}, ${arg_op_other2},
\"${op_a}\", \"${op_other1}\", \"${op_other2}\", !${raise_errors});""")
\"${op_a}\", \"${op_other1}\", \"${op_other2}\", !${raise_errors});"""
success_code = """${arg_op_a} = ${arg_op_a}_guard.get();
${arg_op_other1} = ${arg_op_other1}_guard.get();
${arg_op_other2} = ${arg_op_other2}_guard.get();"""
return self.getExpandTemplate(size_check, expand_code, success_code, raise_errors)
return self.getExpandTemplate(expand_code, success_code, raise_errors)
OUT_PLACE_PRE_EXPAND_PRE_DIM_TEMPLATE = Template(
"""if(THTensor_(nDimension)(LIBRARY_STATE ${arg_op_dim}) <= ${arg_op_dim_value}) {
@ -147,40 +117,28 @@ class Broadcast(CWrapPlugin):
"""THLongStoragePtr ${arg_op_a}_storage(
THLongStorage_newWithSize3(${arg_op_a}_dim0_size, ${arg_op_a}_dim1_size, ${arg_op_a}_dim2_size));\n""")
def getOutPlacePreExpandPostDimTemplate(self, type_op_a, raise_errors):
size_check = """THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_a}_storage->data, ${arg_op_a}_storage->size);"""
expand_code = ("${arg_op_a}_guard = \n" + self.getNewForExpand(type_op_a) + "\n" +
"""expand(LIBRARY_STATE ${arg_op_a}_guard.get(), ${arg_op_a}, ${arg_op_a}_storage);""")
def getOutPlacePreExpandPostDimTemplate(self, raise_errors):
expand_code = """expand(LIBRARY_STATE ${arg_op_a}_guard.get(), ${arg_op_a}, ${arg_op_a}_storage);"""
success_code = """${arg_op_a} = ${arg_op_a}_guard.get();"""
return self.getExpandTemplate(size_check, expand_code, success_code, raise_errors)
return self.getExpandTemplate(expand_code, success_code, raise_errors)
OUT_PLACE_PRE_TEMPLATE = Template(
"""${code_arg_op_a}${code_arg_op_other1}${code_arg_op_other2}
${expand_code}""")
def getInPlacePreExpand1Template(self, type_op_other, raise_errors):
size_check = """THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other}->size, ${arg_op_other}->nDimension);"""
expand_code = ("${arg_op_other}_guard = \n" + self.getNewForExpand(type_op_other) + "\n" +
"""expand_inplace1(LIBRARY_STATE ${arg_op_other}_guard.get(), ${arg_op_other}, ${arg_op_a},
\"${op_other}\", \"${op_a}\", !${raise_errors});""")
def getInPlacePreExpand1Template(self, raise_errors):
expand_code = """expand_inplace1(LIBRARY_STATE ${arg_op_other}_guard.get(), ${arg_op_other}, ${arg_op_a},
\"${op_other}\", \"${op_a}\", !${raise_errors});"""
success_code = """${arg_op_other} = ${arg_op_other}_guard.get();"""
return self.getExpandTemplate(size_check, expand_code, success_code, raise_errors)
return self.getExpandTemplate(expand_code, success_code, raise_errors)
def getInPlacePreExpand2Template(self, type_op_other1, type_op_other2, raise_errors):
size_check = """(THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other1}->size, ${arg_op_other1}->nDimension) &&
THSize_isSameSizeAs(${arg_op_a}->size, ${arg_op_a}->nDimension,
${arg_op_other2}->size, ${arg_op_other2}->nDimension));"""
expand_code = ("${arg_op_other1}_guard = \n" + self.getNewForExpand(type_op_other1) + "\n" +
"${arg_op_other2}_guard = \n" + self.getNewForExpand(type_op_other2) + "\n" +
"""expand_inplace2(LIBRARY_STATE ${arg_op_other1}_guard.get(), ${arg_op_other2}_guard.get(),
def getInPlacePreExpand2Template(self, raise_errors):
expand_code = """expand_inplace2(LIBRARY_STATE ${arg_op_other1}_guard.get(), ${arg_op_other2}_guard.get(),
${arg_op_other1}, ${arg_op_other2}, ${arg_op_a},
\"${op_other1}\", \"${op_other2}\", \"${op_a}\", !${raise_errors});""")
\"${op_other1}\", \"${op_other2}\", \"${op_a}\", !${raise_errors});"""
success_code = """${arg_op_other1} = ${arg_op_other1}_guard.get();
${arg_op_other2} = ${arg_op_other2}_guard.get();"""
return self.getExpandTemplate(size_check, expand_code, success_code, raise_errors)
return self.getExpandTemplate(expand_code, success_code, raise_errors)
IN_PLACE_PRE_TEMPLATE = Template(
"""${code_arg_op_other1}${code_arg_op_other2}
@ -260,7 +218,6 @@ class Broadcast(CWrapPlugin):
"arg_op_other": arg_op_c,
"raise_errors": raise_errors
}
raise_errors_s = raise_errors == "true"
if in_place:
code_arg_op_other1 = self.getPreArgStringTemplate(type=type_op_b).substitute(op_b_mapping)
@ -268,14 +225,14 @@ class Broadcast(CWrapPlugin):
self.getPreArgStringTemplate(type=type_op_c).substitute(op_c_mapping) if op_c else "")
if op_c:
expand_code = self.getInPlacePreExpand2Template(type_op_b, type_op_c, raise_errors_s).substitute(
expand_code = self.getInPlacePreExpand2Template(raise_errors == "true").substitute(
op_b_mapping,
op_other1=op_b,
op_other2=op_c,
arg_op_other1=arg_op_b,
arg_op_other2=arg_op_c)
else:
expand_code = self.getInPlacePreExpand1Template(type_op_b, raise_errors_s).substitute(op_b_mapping)
expand_code = self.getInPlacePreExpand1Template(raise_errors == "true").substitute(op_b_mapping)
new_code_pre.append(self.IN_PLACE_PRE_TEMPLATE.substitute(
arg_op_a=arg_op_a,
@ -320,7 +277,7 @@ class Broadcast(CWrapPlugin):
arg_op_dim0=dims_kvs[0]["arg_op"],
arg_op_dim1=dims_kvs[1]["arg_op"],
arg_op_dim2=dims_kvs[2]["arg_op"])
expand_code += self.getOutPlacePreExpandPostDimTemplate(None, raise_errors_s).substitute(
expand_code += self.getOutPlacePreExpandPostDimTemplate(raise_errors == "true").substitute(
arg_op_a=arg_op_a,
raise_errors=raise_errors)
post_code = self.POST_TEMPLATE.substitute(arg_op_other=arg_op_a)
@ -332,8 +289,7 @@ class Broadcast(CWrapPlugin):
if op_c else "")
if op_c:
expand_template = self.getOutPlacePreExpand3Template(None, type_op_b, type_op_c, raise_errors_s)
expand_code = expand_template.substitute(
expand_code = self.getOutPlacePreExpand3Template(raise_errors == "true").substitute(
op_b_mapping,
op_other1=op_b,
op_other2=op_c,
@ -341,8 +297,8 @@ class Broadcast(CWrapPlugin):
arg_op_other2=arg_op_c)
else:
expand_code = self.getOutPlacePreExpand2Template(None, type_op_b, raise_errors_s).substitute(
op_b_mapping)
expand_code = self.getOutPlacePreExpand2Template(
raise_errors == "true").substitute(op_b_mapping)
post_code = self.POST_TEMPLATE.substitute(arg_op_other=arg_op_a)
post_code += self.POST_TEMPLATE.substitute(op_b_mapping)

View File

@ -18,4 +18,4 @@ class ConstantArguments(CWrapPlugin):
def get_arg_accessor(self, arg, option):
if arg['type'] == 'CONSTANT':
return arg.get('default', arg['name'])
return arg['name']

View File

@ -0,0 +1,223 @@
import copy
from string import Template
from . import CWrapPlugin
class GenericNN(CWrapPlugin):
INPUT_TYPE_CHECK = Template("checkTypes(is_cuda, $type, $tensor_args);")
HEADER_TEMPLATE = Template("void $name($args);")
WRAPPER_TEMPLATE = Template("""\
void $name($args)
{
bool is_cuda = $input->isCuda();
auto type = $input->type();
$type_check
$options
} else {
throw std::runtime_error("invalid arguments");
}
}
""")
THNN_TEMPLATE = Template("""\
if (type == thpp::Type::FLOAT) {
THNN_Float$name(
NULL,
$float_args);
} else if (type == thpp::Type::DOUBLE) {
THNN_Double$name(
NULL,
$double_args);
} else {
throw std::runtime_error("unsupported tensor type");
}""")
THCUNN_TEMPLATE = Template("""\
#ifdef WITH_CUDA
if (type == thpp::Type::FLOAT) {
THNN_Cuda$name(
state,
$float_args);
} else if (type == thpp::Type::DOUBLE) {
THNN_CudaDouble$name(
state,
$double_args);
} else if (type == thpp::Type::HALF) {
THNN_CudaHalf$name(
state,
$half_args);
} else {
throw std::runtime_error("unsupported tensor type");
}
#endif
""")
INDEX_TENSOR_TYPES = {'THIndexTensor*', 'THCIndexTensor*'}
REAL_TENSOR_TYPES = {'THTensor*', 'THCTensor*'}
INPUT_ARGUMENT_MAP = {
'THNNState*': 'void*',
'THCState*': 'void*',
'THTensor*': 'thpp::Tensor*',
'THCTensor*': 'thpp::Tensor*',
'THIndexTensor*': 'thpp::Tensor*',
'THCIndexTensor*': 'thpp::Tensor*',
'THIndex_t': 'long',
'accreal': 'double',
}
def __init__(self, header=False):
self.header = header
self.declarations = []
def process_full_file(self, base_wrapper):
if self.header:
wrapper = '#pragma once\n\n'
wrapper += '#include <THPP/Tensor.hpp>\n\n'
else:
wrapper = '#include "THNN_generic.h"\n'
wrapper = '#include "THNN_generic.inc.h"\n\n'
wrapper += 'namespace torch { namespace nn {\n\n'
wrapper += base_wrapper
wrapper += '}} // namespace torch::nn\n'
return wrapper
def process_declarations(self, declarations):
for declaration in declarations:
base_args = declaration['options'][0]['arguments']
for option in declaration['options']:
for idx, arg in enumerate(option['arguments']):
arg['assign_name'] = base_args[idx]['name']
arg['assign_type'] = base_args[idx]['type']
if idx != 1:
arg['ignore_check'] = True
return declarations
def get_arg_accessor(self, arg, option):
return self.get_type_unpack(arg, option)
def process_pre_arg_assign(self, pre_arg_assign, option):
if option['backend'] == 'cunn':
# Enclose arg_assign with CUDA guard
pre_arg_assign.append('#ifdef WITH_CUDA')
return pre_arg_assign
def process_option_code_template(self, template, option):
template = []
if option['backend'] == 'cunn':
template.append('#endif')
def base_cast(arg, CReal, real):
name = 'arg_' + arg['assign_name']
type = arg['type']
if type in self.REAL_TENSOR_TYPES:
return ('(TH{CReal}Tensor*){name}->cdata()'
.format(CReal=CReal, name=name))
elif type in self.INDEX_TENSOR_TYPES:
return '({type}){name}->cdata()'.format(type=type, name=name)
elif type == 'THCState*':
return '({}){}'.format(type, name)
elif type == 'real':
if real == 'half':
return 'THC_float2half({})'.format(name)
return '({real}){name}'.format(real=real, name=name)
return name
def cast(arg, CReal, real):
expr = base_cast(arg, CReal, real)
if arg.get('optional', False):
name = 'arg_' + arg['assign_name']
return '{name} ? {expr} : NULL'.format(name=name, expr=expr)
return expr
if option['backend'] == 'nn':
float_args = []
double_args = []
for idx, arg in enumerate(option['arguments']):
float_args.append(cast(arg, 'Float', 'float'))
double_args.append(cast(arg, 'Double', 'double'))
code = self.THNN_TEMPLATE.substitute(
name=option['cname'],
float_args=',\n'.join(float_args),
double_args=',\n'.join(double_args))
template.append(code)
elif option['backend'] == 'cunn':
float_args = []
double_args = []
half_args = []
for idx, arg in enumerate(option['arguments']):
float_args.append(cast(arg, 'Cuda', 'float'))
double_args.append(cast(arg, 'CudaDouble', 'double'))
half_args.append(cast(arg, 'CudaHalf', 'half'))
code = self.THCUNN_TEMPLATE.substitute(
name=option['cname'],
float_args=',\n'.join(float_args),
double_args=',\n'.join(double_args),
half_args=',\n'.join(half_args))
template.append(code)
template.append('')
return template
def get_type_unpack(self, arg, option):
return Template(arg.get('assign_name', arg['name']))
def get_type_check(self, arg, option):
if option['backend'] == 'cunn':
return Template('is_cuda')
else:
return Template('!is_cuda')
def get_assign_args(self, arguments):
assign_args = []
for arg in arguments:
arg = copy.copy(arg)
new_type = self.INPUT_ARGUMENT_MAP.get(arg['type'])
if new_type is not None:
arg['type'] = new_type
assign_args.append(arg)
return assign_args
def get_wrapper_template(self, declaration):
# get assign arguments string
base_arguments = declaration['options'][0]['arguments']
args = self.get_assign_args(base_arguments)
arg_str = ', '.join([arg['type'] + ' ' + arg['name'] for arg in args])
if self.header:
return Template(self.HEADER_TEMPLATE.safe_substitute(args=arg_str))
def get_checked_args(tensor_types):
checked_args = []
for arg in base_arguments:
if arg['type'] in tensor_types:
name = arg.get('assign_name', arg['name'])
name_str = name
if arg.get('optional', False):
name_str = '?' + name_str
checked_args += ['"' + name_str + '"', name]
checked_args += ['NULL']
return checked_args
real_args = get_checked_args(self.REAL_TENSOR_TYPES)
long_args = get_checked_args(self.INDEX_TENSOR_TYPES)
# check input types
types_checks = []
if len(real_args) > 1:
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
type='type', tensor_args=', '.join(real_args)))
if len(long_args) > 1:
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
type='thpp::Type::LONG', tensor_args=', '.join(long_args)))
return Template(self.WRAPPER_TEMPLATE.safe_substitute(
input=args[0]['name'],
args=arg_str,
type_check='\n '.join(types_checks)))

View File

@ -40,7 +40,6 @@ class StandaloneExtension(CWrapPlugin):
'bool': Template('($arg == Py_True ? true : false)'),
'int': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'int64_t': Template('THPUtils_unpackLong($arg)'),
'void*': Template('(void*)THPUtils_unpackLong($arg)'),
'THGenerator*': Template('THPGenerator_CData((THPGenerator*)$arg)'),
}
@ -60,7 +59,6 @@ class StandaloneExtension(CWrapPlugin):
'bool': Template('PyBool_Check($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'int64_t': Template('THPUtils_checkLong($arg)'),
'void*': Template('THPUtils_checkLong($arg)'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
}
@ -93,7 +91,6 @@ PyObject * $name(PyObject *_unused, PyObject *args)
'THIntTensor*': 'torch.IntTensor',
'THLongStorage*': 'torch.LongStorage',
'long': 'int',
'int64_t': 'int',
'int': 'int',
'real': 'float',
'half': 'float',

View File

@ -200,27 +200,12 @@ ${cpu}
self.stateless_declarations = []
self.docstrings = []
BACKEND_SUBSTITUTIONS = {
'CPU': 'TH',
'CUDA': 'THCuda',
}
def substitute_tensor_backend(self, arg, option):
if 'Backend' in arg['type']:
arg['type'] = arg['type'].replace('Backend',
self.BACKEND_SUBSTITUTIONS.get(option['backends'][0]))
# handle the fact that THCudaTensor isn't THCudaFloatTensor
if option['backends'][0] == 'CUDA' and 'Float' in arg['type']:
arg['type'] = arg['type'].replace('Float', '')
def get_type_unpack(self, arg, option):
self.substitute_tensor_backend(arg, option)
return self.TYPE_UNPACK.get(arg['type'], None)
def get_type_check(self, arg, option):
if arg['type'] == 'THSize*' and arg.get('long_args', False):
return self.SIZE_VARARG_CHECK
self.substitute_tensor_backend(arg, option)
return self.TYPE_CHECK.get(arg['type'], None)
# TODO: argument descriptions shouldn't be part of THP, but rather a general cwrap thing

View File

@ -8,9 +8,7 @@ class WrapDim(CWrapPlugin):
"""${arg_tensor}->nDimension""")
CODE_TEMPLATE = Template(
"""THPUtils_assert(${ndim} > 0,
"dimension specified as %d, but tensor has no dimensions", ${arg_dim});
THPUtils_assert(${arg_dim} >= -(${ndim}) && ${arg_dim} < (${ndim}),
"""THPUtils_assert(${arg_dim} >= -(${ndim}) && ${arg_dim} < (${ndim}),
"dimension out of range (expected to be in range of [%d, %d], but got %d)",
-(${ndim}), (${ndim})-1, ${arg_dim});
if (${arg_dim} < 0) ${arg_dim} += (${ndim});""")

View File

@ -431,5 +431,6 @@ from .ReturnArguments import ReturnArguments
from .GILRelease import GILRelease
from .AutoGPU import AutoGPU
from .CuDNNPlugin import CuDNNPlugin
from .GenericNN import GenericNN
from .WrapDim import WrapDim
from .Broadcast import Broadcast

View File

@ -1,41 +0,0 @@
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
curl \
vim \
ca-certificates \
libnccl2=2.0.5-3+cuda9.0 \
libnccl-dev=2.0.5-3+cuda9.0 \
libjpeg-dev \
libpng-dev &&\
rm -rf /var/lib/apt/lists/*
ENV PYTHON_VERSION=3.6
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
# /opt/conda/bin/conda install conda-build && \
/opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \
/opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH
RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda90
# This must be done before pip so that requirements.txt is available
WORKDIR /opt/pytorch
COPY . .
RUN git submodule update --init
RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
pip install -v .
RUN git clone https://github.com/pytorch/vision.git && cd vision && pip install -v .
WORKDIR /workspace
RUN chmod -R a+w /workspace

View File

@ -1,23 +0,0 @@
#!/bin/sh
# This script should be executed in pytorch root folder.
TEMP_DIR=tools/temp
set -ex
# Assumed to be run like tools/gen_onnx.sh
(cd torch/lib/nanopb/generator/proto && make)
# It always searches the same dir as the proto, so
# we have got to copy the option file over
mkdir -p $TEMP_DIR
cp torch/csrc/onnx/onnx.options $TEMP_DIR/onnx.options
wget https://raw.githubusercontent.com/onnx/onnx/master/onnx/onnx.proto -O $TEMP_DIR/onnx.proto
protoc --plugin=protoc-gen-nanopb=$PWD/torch/lib/nanopb/generator/protoc-gen-nanopb \
$TEMP_DIR/onnx.proto \
--nanopb_out=-T:.
# NB: -T suppresses timestamp. See https://github.com/nanopb/nanopb/issues/274
# nanopb generated C files are valid CPP! Yay!
cp $TEMP_DIR/onnx.pb.c torch/csrc/onnx/onnx.pb.cpp
cp $TEMP_DIR/onnx.pb.h torch/csrc/onnx/onnx.pb.h
rm -r $TEMP_DIR

View File

View File

@ -1,124 +0,0 @@
import os
import argparse
from collections import defaultdict
from tools.shared.module_loader import import_module
from itertools import count
from ..autograd.gen_variable_type import load_aten_declarations, CodeTemplate, write, \
FALLTHROUGH_RETURN_TYPES, FALLTHROUGH_FUNCTIONS, GENERATED_COMMENT
template_path = os.path.join(os.path.dirname(__file__), 'templates')
ATEN_DISPATCH_H = CodeTemplate.from_file(template_path + '/aten_dispatch.h')
ATEN_DISPATCH_CPP = CodeTemplate.from_file(template_path + '/aten_dispatch.cpp')
ATTR_METHOD_MAP = {
'int64_t': 'i',
'IntList': 'is',
'Scalar': 't',
'bool': 'i',
'double': 'f',
'std::array<bool, 2>': 'is',
'std::array<bool, 3>': 'is',
}
TYPE_CASTS = {
'std::array<bool, 2>': 'as_bool_array<2>',
'std::array<bool, 3>': 'as_bool_array<3>',
'Scalar': 'Scalar',
'IntList': 'std::vector<int64_t>',
}
ATTR_ASSIGNMENT = CodeTemplate("""\
auto ${name} = ${type_cast}(node->${method}(stringToSymbol("${name}")));\
""")
CALL_NAMESPACE = CodeTemplate("at::${name}(${args})")
CALL_METHOD = CodeTemplate("vars[0].${name}(${args})")
CONSTRUCTOR = CodeTemplate("""\
{"${descriptor}", [](Node *node) {
${assignments}
return TensorOp([=](const variable_list& vars) -> variable_list {
return pack_list(${call});
}, "${name}", ${num_inputs});
}},
""")
def is_jit_op(decl):
return (not decl['api_name'].endswith('_') and
not decl['name'].endswith('_out') and
not decl['name'].endswith('_forward') and
not any(arg['simple_type'] == 'Generator' for arg in decl['arguments']) and
not any(arg['simple_type'] == 'SparseTensor' for arg in decl['arguments']) and
not decl['return_type'] in FALLTHROUGH_RETURN_TYPES and
not decl['name'] in FALLTHROUGH_FUNCTIONS)
def gen_jit_dispatch(declarations, out):
aten_decls = load_aten_declarations(declarations)
jit_decls = [d for d in aten_decls if is_jit_op(d)]
def is_tensor_arg(arg):
return arg['simple_type'] in {'Tensor', 'TensorList'}
ops = {}
for decl in jit_decls:
arguments = decl['arguments']
name = decl['name']
scalar_args = [arg for arg in arguments if not is_tensor_arg(arg)]
# Descriptor is a unique identified for a particular overload of an op
attr_names = sorted([arg['name'] for arg in scalar_args])
num_inputs = len(arguments) - len(scalar_args)
descriptor = '-'.join([decl['name'], str(num_inputs)] + attr_names)
# All scalar args need to be assigned, so they can be captured by a lambda
assignments = [ATTR_ASSIGNMENT.substitute(type=arg['simple_type'],
type_cast=TYPE_CASTS.get(arg['simple_type'], arg['simple_type']),
name=arg['name'],
method=ATTR_METHOD_MAP[arg['simple_type']])
for arg in scalar_args]
# Generate the actuall ATen call. This gets a bit tricky because of
# TensorList arguments, and functions that are only available as methods.
if 'namespace' in decl['method_of']:
if any(arg['simple_type'] == 'TensorList' for arg in arguments):
assert sum(map(is_tensor_arg, arguments)) == 1
args = ['as_tensor_list(vars)' if is_tensor_arg(arg) else arg['name']
for arg in arguments]
else:
tensor_id = iter(count(start=0))
args = ['vars[{}]'.format(next(tensor_id)) if is_tensor_arg(arg) else arg['name']
for arg in arguments]
call = CALL_NAMESPACE.substitute(name=name, args=args)
else:
tensor_id = iter(count(start=1))
args = ['vars[{}]'.format(next(tensor_id)) if is_tensor_arg(arg) else arg['name']
for arg in arguments[1:]]
call = CALL_METHOD.substitute(name=name, args=args)
constructor = CONSTRUCTOR.substitute(descriptor=descriptor, name=name, call=call,
assignments=assignments, num_inputs=num_inputs)
assert descriptor not in ops, descriptor
ops[descriptor] = constructor
# Sort the generated snippets to ensure that the generation is deterministic
env = {'constructors': sorted(list(ops.values()))}
write(out, 'aten_dispatch.h', ATEN_DISPATCH_H, env)
write(out, 'aten_dispatch.cpp', ATEN_DISPATCH_CPP, env)
def main():
parser = argparse.ArgumentParser(
description='Generate JIT op dispatch')
parser.add_argument('declarations', metavar='DECL',
help='path to Declarations.yaml')
parser.add_argument('out', metavar='OUT',
help='path to output directory')
args = parser.parse_args()
gen_jit_dispatch(args.declarations, args.out)
if __name__ == '__main__':
main()

View File

@ -1,72 +0,0 @@
#include "aten_dispatch.h"
#include "torch/csrc/jit/interned_strings.h"
#include "torch/csrc/utils/functional.h"
#include <unordered_map>
#include <cstring>
// ${generated_comment}
namespace torch { namespace jit {
using autograd::Variable;
using autograd::variable_list;
using at::Scalar;
using at::Tensor;
using at::IntList;
using at::TensorList;
using operator_constructor = std::function<TensorOp(jit::Node*)>;
namespace {
variable_list pack_list(Tensor v) { return { std::move(v) }; }
variable_list pack_list(Scalar v) { return { v.toTensor() }; }
variable_list pack_list(std::vector<Tensor> t) { return fmap<Variable>(t); }
variable_list pack_list(std::tuple<Tensor, Tensor> v) {
return { std::move(std::get<0>(v)), std::move(std::get<1>(v)) };
}
variable_list pack_list(std::tuple<Tensor, Tensor, Tensor> v) {
return { std::get<0>(v), std::get<1>(v), std::get<2>(v) };
}
std::vector<Tensor> as_tensor_list(const variable_list& vars) {
return fmap(vars, [](Variable v) { return static_cast<Tensor>(v); });
}
template<size_t N>
std::array<bool, N> as_bool_array(const std::vector<int64_t>& vec) {
std::array<bool, N> res;
JIT_ASSERT(vec.size() == N);
std::copy(vec.begin(), vec.end(), res.begin());
return res;
}
std::unordered_map<std::string, operator_constructor> constructors = {
${constructors}
};
std::string getDescriptor(jit::Node* n) {
std::stringstream s;
s << symbolToString(n->kind()) << "-" << n->inputs().size();
std::vector<const char*> attr_names = fmap(n->attributeNames(), &symbolToString);
std::sort(attr_names.begin(), attr_names.end(), [](const char *a, const char *b) {
return std::strcmp(a, b) < 0;
});
for (const auto & name : attr_names)
s << "-" << name;
return s.str();
}
} // anonymous namespace
TensorOp getTensorOp(jit::Node* n) {
auto signature = getDescriptor(n);
try {
return constructors.at(signature)(n);
} catch (std::out_of_range &e) {
throw std::runtime_error("Unsupported op descriptor: " + signature + ". "
"File a bug report.");
}
};
}} // namespace torch::jit

View File

@ -1,25 +0,0 @@
#include "torch/csrc/jit/ir.h"
#include "torch/csrc/autograd/function.h"
#include <functional>
// ${generated_comment}
namespace torch { namespace jit {
struct TensorOp {
using op_type = std::function<autograd::variable_list(const autograd::variable_list&)>;
TensorOp(op_type op, std::string name, size_t num_inputs)
: op(op)
, name(name)
, num_inputs(num_inputs) {}
const op_type op;
const std::string name;
const size_t num_inputs;
};
TensorOp getTensorOp(jit::Node* n);
}} // namespace torch::jit;

View File

@ -1,2 +1,2 @@
from .generate_wrappers import generate_wrappers, wrap_function, \
import_module
import_module, wrap_generic_function

View File

@ -2,7 +2,7 @@ import os
import sys
from string import Template, ascii_lowercase
from ..cwrap import cwrap
from ..cwrap.plugins import StandaloneExtension, NullableArguments, AutoGPU
from ..cwrap.plugins import StandaloneExtension, GenericNN, NullableArguments, AutoGPU
from ..shared import import_module
BASE_PATH = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
@ -98,6 +98,7 @@ def wrap_function(name, type, arguments):
def generate_wrappers():
wrap_nn()
wrap_cunn()
wrap_generic()
def wrap_nn():
@ -128,3 +129,67 @@ def wrap_cunn():
NullableArguments(),
AutoGPU(has_self=False),
])
GENERIC_FUNCTION_TEMPLATE = Template("""\
[[
name: $name
return: void
options:
""")
def wrap_generic_function(name, backends):
declaration = ''
declaration += GENERIC_FUNCTION_TEMPLATE.substitute(name=name)
for backend in backends:
declaration += ' - cname: ' + name + '\n'
declaration += ' backend: ' + backend['name'] + '\n'
declaration += ' arguments:\n'
for arg in backend['arguments']:
declaration += ' - arg: ' + arg.type + ' ' + arg.name + '\n'
if arg.is_optional:
declaration += ' optional: True\n'
declaration += ']]\n\n\n'
return declaration
def wrap_generic():
from collections import OrderedDict
defs = OrderedDict()
def should_wrap_function(name):
if name.startswith('LookupTable_'):
return False
return (name.endswith('updateOutput') or
name.endswith('updateGradInput') or
name.endswith('accGradParameters') or
name.endswith('backward'))
def add_functions(name, functions):
for fn in functions:
if not should_wrap_function(fn.name):
continue
if fn.name not in defs:
defs[fn.name] = []
defs[fn.name] += [{
'name': name,
'arguments': fn.arguments[1:],
}]
add_functions('nn', thnn_utils.parse_header(thnn_utils.THNN_H_PATH))
add_functions('cunn', thnn_utils.parse_header(thnn_utils.THCUNN_H_PATH))
wrapper = ''
for name, backends in defs.items():
wrapper += wrap_generic_function(name, backends)
with open('torch/csrc/nn/THNN_generic.cwrap', 'w') as f:
f.write(wrapper)
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
GenericNN(header=True),
], default_plugins=False, destination='torch/csrc/nn/THNN_generic.h')
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
GenericNN(),
], default_plugins=False)

Some files were not shown because too many files have changed in this diff Show More