mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-26 08:34:52 +08:00
Compare commits
276 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ac9245aeb3 | |||
| 60736bdf99 | |||
| 7d58765cee | |||
| 76f7d749e4 | |||
| 0b7374eb44 | |||
| 6fff764155 | |||
| 8ced72ccb8 | |||
| b1ae7f90d5 | |||
| 8b61ee522e | |||
| 76ca3eb191 | |||
| fea50a51ee | |||
| 51e589ed73 | |||
| 2e87643761 | |||
| ba9a85f271 | |||
| 0714d7a3ca | |||
| c238ee3681 | |||
| f5338a1fb8 | |||
| d96ad41191 | |||
| f17cfe4293 | |||
| aec182ae72 | |||
| c93c884ee2 | |||
| c42a2d4d24 | |||
| f89252c336 | |||
| 490c15fae9 | |||
| f2d72ba10f | |||
| 2108b42b92 | |||
| bae8df62d3 | |||
| 98775b6bb4 | |||
| b7cc2a501f | |||
| 0720ba53b3 | |||
| ff5fa11129 | |||
| 5e7f5db332 | |||
| b5f7592140 | |||
| f366e5fc81 | |||
| 48f087f6ce | |||
| 7ad948ffa9 | |||
| 3277d83648 | |||
| 1487278fdf | |||
| 977630bc15 | |||
| 12efd53dba | |||
| 37e05485d9 | |||
| c76770f40e | |||
| da725830c2 | |||
| fc6fcf23f7 | |||
| b190f1b5bc | |||
| dfca8dfdc5 | |||
| b46d5e0b04 | |||
| f19a11a306 | |||
| cfcf69703f | |||
| e22b8e0d17 | |||
| fbfba6bdca | |||
| 3cc89afde6 | |||
| 1e4aee057c | |||
| 8dfcf7e35a | |||
| 76de151ddd | |||
| 2676cc46c2 | |||
| 1bf7bc9768 | |||
| 3c41c9fe46 | |||
| 6ff7750364 | |||
| 4d25c3d048 | |||
| 267b7ade50 | |||
| 80429ad9f7 | |||
| 5ca6516ecb | |||
| 67f94557ff | |||
| 61bd5a0643 | |||
| 748d011c8b | |||
| 5d5cfe2e57 | |||
| 7cbe255296 | |||
| 4ef303698c | |||
| 83e8b3f6c3 | |||
| 502ebed796 | |||
| 68ff58d771 | |||
| 969c1602e6 | |||
| 5e1d6a3691 | |||
| 533cfc0381 | |||
| 2b23712dc3 | |||
| 88275da5e8 | |||
| bd7a5ad6f0 | |||
| 1f6f82dbcf | |||
| 1f8939937a | |||
| b3d41a5f96 | |||
| fec2d493a9 | |||
| 86ee75f63f | |||
| 31941918cf | |||
| 19a65d2bea | |||
| 819d4b2b83 | |||
| b87c113cf4 | |||
| b25182971f | |||
| 1ee2c47e37 | |||
| 2dc563f1f1 | |||
| 15ba71a275 | |||
| e5b3fc49d6 | |||
| ae1766951d | |||
| 02d08dafd9 | |||
| 13a5090695 | |||
| 8e32e4c04c | |||
| cf991310c3 | |||
| 938706099e | |||
| 3330287dc7 | |||
| 38c8520adf | |||
| 492e1746af | |||
| 91a8109cfd | |||
| 161490d34a | |||
| 9c302852eb | |||
| 8654fcfd60 | |||
| b3d527d9a0 | |||
| 4d495218c9 | |||
| 13a041284c | |||
| c60c1a003d | |||
| 97add1a5ea | |||
| ca02930e47 | |||
| 20d5e95077 | |||
| eb4a7dc11d | |||
| f722498b72 | |||
| aadfb6fe83 | |||
| 6c273594c9 | |||
| e475c82fa1 | |||
| 0c2e6665df | |||
| 6295e6e94b | |||
| 670a4aa708 | |||
| 1bdc2e64ed | |||
| c587be1e50 | |||
| bd481596f5 | |||
| a504d56b43 | |||
| 91c4dfccea | |||
| 27f618c44d | |||
| a14482a1df | |||
| aa50c5734b | |||
| 293001a4fe | |||
| 638cfdf150 | |||
| 5f80a14525 | |||
| 1342fd3975 | |||
| 8d4af38489 | |||
| 575a064e66 | |||
| 3ab21a3c4f | |||
| 2f592e6c7d | |||
| 5661ffb766 | |||
| 9b74503daa | |||
| 24848f1cd8 | |||
| a31a07ede9 | |||
| c8c4c9b23d | |||
| e1ed9303f0 | |||
| a43aab13c2 | |||
| c698b4a45e | |||
| c6a0ffab50 | |||
| 8ba7cc30d1 | |||
| 61bf08ca24 | |||
| 6ada3c0c16 | |||
| 60061fbe79 | |||
| 46e7042add | |||
| d0c182773b | |||
| b6f60585b5 | |||
| 4b0e3ee219 | |||
| 838842d4b2 | |||
| e71cf20192 | |||
| adb4cb2b5b | |||
| 6073f9b46c | |||
| 8e8022b735 | |||
| da82d2dd70 | |||
| 82176473a5 | |||
| 2d269a9a72 | |||
| 240372a991 | |||
| 5b10411c8c | |||
| 4c474a9939 | |||
| 7ea6ae57c8 | |||
| 42633f8986 | |||
| 84248690a9 | |||
| 53409ca0fb | |||
| c2c1710047 | |||
| 876202503f | |||
| 946a7d9bc3 | |||
| 608bcd3b15 | |||
| 632b02a477 | |||
| 0db9c63300 | |||
| 873ed4e6b6 | |||
| 01bd43037d | |||
| 68c9e3f232 | |||
| a25c8555eb | |||
| dfd1dff383 | |||
| 8f391d4d51 | |||
| 2a6b7685ae | |||
| eb9573107d | |||
| ee43cd7adc | |||
| 4ca26fbc1b | |||
| c165226325 | |||
| 49295ebe54 | |||
| 455038e470 | |||
| ca7f02ea0c | |||
| 04aba1caec | |||
| f6c1bbfa48 | |||
| 4e2c8c6db5 | |||
| c26b9c0a5e | |||
| aaf41c61a6 | |||
| dd844f741b | |||
| 7117a9012e | |||
| 1bdc28161a | |||
| 5e150caf38 | |||
| c0c62d099a | |||
| b9ece39685 | |||
| 15ef008877 | |||
| b14d6318f8 | |||
| 7c44506441 | |||
| 937ba581d7 | |||
| 2ae54f1194 | |||
| a217fefee1 | |||
| 34b7fed802 | |||
| 5221745c21 | |||
| 000ca44b16 | |||
| 8f3d44033b | |||
| 7cc14c595a | |||
| 797544c47a | |||
| 0426f2f3ec | |||
| 336eeee895 | |||
| 593f867e3e | |||
| 385913be1c | |||
| 6aaa14f5fe | |||
| 07f5b21ef1 | |||
| e454870396 | |||
| 2822013437 | |||
| 72c1982734 | |||
| 0de2ea305a | |||
| d899385a3d | |||
| c6d6cbe8a6 | |||
| 85e82e85d8 | |||
| a1534cc37d | |||
| 8c8dc791ef | |||
| 63edca44f2 | |||
| 8d90ab2d9b | |||
| bd5303010d | |||
| 16d2c3d7b3 | |||
| 407a92dc26 | |||
| 0a893abc7b | |||
| 34fa5e0dc7 | |||
| 712686ce91 | |||
| 518864a7e0 | |||
| 750fb5cc73 | |||
| 0f4749907a | |||
| bd2dc63ef6 | |||
| 19a8795450 | |||
| d9dccfdd71 | |||
| 7547a06c4f | |||
| 8929b75795 | |||
| 4d37ef878c | |||
| 126e77d5c6 | |||
| 53eec78bea | |||
| a4edaec81a | |||
| 92481b59d3 | |||
| 6c77fa9121 | |||
| aeb7a72620 | |||
| 73d232ee45 | |||
| c0c65bf915 | |||
| f6cee952af | |||
| e74184f679 | |||
| 3884d36176 | |||
| e7c6886a00 | |||
| ed8e92f63d | |||
| fb97df5d65 | |||
| e9b05c71b4 | |||
| 7926324385 | |||
| 1527b37c26 | |||
| de4659659b | |||
| a96a8c8336 | |||
| 691aa19b88 | |||
| 6b07dc9e22 | |||
| 8aa259b52b | |||
| ac9312e9f8 | |||
| 91a17b702b | |||
| a9785bba44 | |||
| fc354a0d6e | |||
| 262611fcd3 | |||
| b8a34f3033 | |||
| 41ddc2a786 | |||
| e4886f6589 | |||
| 6328981fcf | |||
| 2b948c42cd | |||
| b2ae054410 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,6 +2,7 @@ build/
|
||||
dist/
|
||||
torch.egg-info/
|
||||
*/**/__pycache__
|
||||
torch/version.py
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
torch/lib/*.so*
|
||||
torch/lib/*.dylib*
|
||||
|
||||
@ -18,7 +18,8 @@ install:
|
||||
- export CC="ccache gcc-4.8"
|
||||
- export CXX="ccache g++-4.8"
|
||||
- ccache --show-stats
|
||||
- travis_retry pip install -r requirements.txt
|
||||
- travis_retry pip install --upgrade pip setuptools wheel
|
||||
- travis_retry pip install -r requirements.txt --only-binary=scipy
|
||||
- python setup.py install
|
||||
|
||||
script:
|
||||
@ -43,5 +44,5 @@ matrix:
|
||||
env: LINT_CHECK
|
||||
python: "2.7"
|
||||
addons: true
|
||||
install: pip install pep8
|
||||
script: pep8
|
||||
install: pip install flake8
|
||||
script: flake8
|
||||
|
||||
42
README.md
42
README.md
@ -30,15 +30,32 @@ We are in an early-release Beta. Expect some adventures and rough edges.
|
||||
|
||||
At a granular level, PyTorch is a library that consists of the following components:
|
||||
|
||||
| \_ | \_ |
|
||||
| ------------------------ | --- |
|
||||
| torch | a Tensor library like NumPy, with strong GPU support |
|
||||
| torch.autograd | a tape based automatic differentiation library that supports all differentiable Tensor operations in torch |
|
||||
| torch.nn | a neural networks library deeply integrated with autograd designed for maximum flexibility |
|
||||
| torch.optim | an optimization package to be used with torch.nn with standard optimization methods such as SGD, RMSProp, LBFGS, Adam etc. |
|
||||
| torch.multiprocessing | python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. |
|
||||
| torch.utils | DataLoader, Trainer and other utility functions for convenience |
|
||||
| torch.legacy(.nn/.optim) | legacy code that has been ported over from torch for backward compatibility reasons |
|
||||
<table>
|
||||
<tr>
|
||||
<td><b> torch </b></td>
|
||||
<td> a Tensor library like NumPy, with strong GPU support </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.autograd </b></td>
|
||||
<td> a tape based automatic differentiation library that supports all differentiable Tensor operations in torch </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.nn </b></td>
|
||||
<td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.multiprocessing </b></td>
|
||||
<td> python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.utils </b></td>
|
||||
<td> DataLoader, Trainer and other utility functions for convenience </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.legacy(.nn/.optim) </b></td>
|
||||
<td> legacy code that has been ported over from torch for backward compatibility reasons </td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Usually one uses PyTorch either as:
|
||||
|
||||
@ -128,10 +145,9 @@ There is no wrapper code that needs to be written. [You can see an example here]
|
||||
## Installation
|
||||
|
||||
### Binaries
|
||||
- Anaconda
|
||||
```bash
|
||||
conda install pytorch torchvision -c soumith
|
||||
```
|
||||
Commands to install from binaries via Conda or pip wheels are on our website:
|
||||
|
||||
[http://pytorch.org](http://pytorch.org)
|
||||
|
||||
### From source
|
||||
|
||||
|
||||
@ -63,11 +63,16 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
|
||||
"}\n")
|
||||
|
||||
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
|
||||
"-ccbin" ${CMAKE_CXX_COMPILER}
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
|
||||
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(nvcc_res EQUAL 0)
|
||||
# only keep the last line of nvcc_out
|
||||
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
|
||||
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
|
||||
list(GET nvcc_out -1 nvcc_out)
|
||||
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
|
||||
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
|
||||
endif()
|
||||
@ -116,13 +121,13 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
|
||||
set(add_ptx TRUE)
|
||||
set(arch_name ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
if(arch_name MATCHES "([0-9]\\.[0-9])$")
|
||||
if(arch_name MATCHES "(^[0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
|
||||
set(arch_bin ${CMAKE_MATCH_1})
|
||||
set(arch_ptx ${arch_bin})
|
||||
else()
|
||||
# Look for it in our list of known architectures
|
||||
if(${arch_name} STREQUAL "Fermi")
|
||||
set(arch_bin 2.0 "2.1(2.0)")
|
||||
set(arch_bin "2.0 2.1(2.0)")
|
||||
elseif(${arch_name} STREQUAL "Kepler+Tegra")
|
||||
set(arch_bin 3.2)
|
||||
elseif(${arch_name} STREQUAL "Kepler+Tesla")
|
||||
@ -173,11 +178,11 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
|
||||
# Tell NVCC to add binaries for the specified GPUs
|
||||
foreach(arch ${cuda_arch_bin})
|
||||
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
|
||||
# User explicitly specified PTX for the concrete BIN
|
||||
# User explicitly specified ARCH for the concrete CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
|
||||
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
|
||||
else()
|
||||
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
|
||||
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
|
||||
list(APPEND nvcc_archs_readable sm_${arch})
|
||||
endif()
|
||||
|
||||
@ -74,9 +74,11 @@ author = 'Torch Contributors'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.1.6'
|
||||
# TODO: change to [:2] at v1.0
|
||||
version = '.'.join(torch.__version__.split('+')[0].split('.')[:3])
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '0.1.6'
|
||||
# TODO: verify this works as expected
|
||||
release = torch.__version__.split('+')[0]
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
|
||||
@ -22,6 +22,24 @@ Containers
|
||||
.. autoclass:: Module
|
||||
:members:
|
||||
|
||||
:hidden:`Sequential`
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: Sequential
|
||||
:members:
|
||||
|
||||
:hidden:`ModuleList`
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ModuleList
|
||||
:members:
|
||||
|
||||
:hidden:`ParameterList`
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ParameterList
|
||||
:members:
|
||||
|
||||
Convolution Layers
|
||||
----------------------------------
|
||||
|
||||
@ -445,13 +463,13 @@ Vision layers
|
||||
:members:
|
||||
|
||||
:hidden:`UpsamplingNearest2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: UpsamplingNearest2d
|
||||
:members:
|
||||
|
||||
:hidden:`UpsamplingBilinear2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: UpsamplingBilinear2d
|
||||
:members:
|
||||
@ -466,6 +484,36 @@ Multi-GPU layers
|
||||
.. autoclass:: DataParallel
|
||||
:members:
|
||||
|
||||
|
||||
Utilities
|
||||
---------
|
||||
|
||||
:hidden:`clip_grad_norm`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.clip_grad_norm
|
||||
|
||||
|
||||
.. currentmodule:: torch.nn.utils.rnn
|
||||
|
||||
:hidden:`PackedSequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.PackedSequence
|
||||
|
||||
|
||||
:hidden:`pack_padded_sequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.pack_padded_sequence
|
||||
|
||||
|
||||
:hidden:`pad_packed_sequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.pad_packed_sequence
|
||||
|
||||
|
||||
torch.nn.functional
|
||||
===================
|
||||
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
.. _cuda-semantics:
|
||||
|
||||
CUDA semantics
|
||||
==============
|
||||
|
||||
@ -61,3 +63,21 @@ call. This can be used to overlap data transfers with computation.
|
||||
|
||||
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
|
||||
pinned memory by passing ``pinned=True`` to its constructor.
|
||||
|
||||
.. _cuda-nn-dataparallel-instead:
|
||||
|
||||
Use nn.DataParallel instead of multiprocessing
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Most use cases involving batched input and multiple GPUs should default to using
|
||||
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
|
||||
a single python process can saturate multiple GPUs.
|
||||
|
||||
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
|
||||
However, this is a known issue that is under active development. As always,
|
||||
test your use case.
|
||||
|
||||
There are significant caveats to using CUDA models with
|
||||
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
|
||||
requirements exactly, it is likely that your program will have incorrect or
|
||||
undefined behavior.
|
||||
|
||||
@ -132,7 +132,7 @@ This is how a ``Linear`` module can be implemented::
|
||||
# nn.Parameters can never be volatile and, different than Variables,
|
||||
# they require gradients by default.
|
||||
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
|
||||
if bias is not None:
|
||||
if bias:
|
||||
self.bias = nn.Parameter(torch.Tensor(output_features))
|
||||
else:
|
||||
# You should always register all possible parameters, but the
|
||||
|
||||
@ -33,6 +33,8 @@ by the CUDA runtime.
|
||||
kinds of data should be done with care. Note that this restriction doesn't
|
||||
apply to shared CPU memory.
|
||||
|
||||
See also: :ref:`cuda-nn-dataparallel-instead`
|
||||
|
||||
|
||||
Best practices and tips
|
||||
-----------------------
|
||||
|
||||
34
docs/source/notes/serialization.rst
Normal file
34
docs/source/notes/serialization.rst
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
Serialization semantics
|
||||
=======================
|
||||
|
||||
Best practices
|
||||
--------------
|
||||
|
||||
.. _recommend-saving-models:
|
||||
|
||||
Recommended approach for saving a model
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
There are two main approaches for serializing and restoring a model.
|
||||
|
||||
The first (recommended) saves and loads only the model parameters::
|
||||
|
||||
torch.save(the_model.state_dict(), PATH)
|
||||
|
||||
Then later::
|
||||
|
||||
the_model = TheModelClass(*args, **kwargs)
|
||||
the_model.load_state_dict(torch.load(PATH))
|
||||
|
||||
The second saves and loads the entire model::
|
||||
|
||||
torch.save(the_model, PATH)
|
||||
|
||||
Then later::
|
||||
|
||||
the_model = torch.load(PATH))
|
||||
|
||||
However in this case, the serialized data is bound to the specific classes
|
||||
and the exact directory structure used, so it can break in various ways when
|
||||
used in other projects, or after some serious refactors.
|
||||
@ -38,6 +38,7 @@ Indexing, Slicing, Joining, Mutating Ops
|
||||
.. autofunction:: t
|
||||
.. autofunction:: transpose
|
||||
.. autofunction:: unbind
|
||||
.. autofunction:: unsqueeze
|
||||
|
||||
|
||||
Random sampling
|
||||
|
||||
@ -1,8 +0,0 @@
|
||||
[pep8]
|
||||
max-line-length = 120
|
||||
ignore = E402,E721,E731,W503
|
||||
exclude = docs/src
|
||||
|
||||
[flake8]
|
||||
max-line-length = 120
|
||||
ignore = E305,E402,E721,E731,F401,F403,F405,F811,F812,F821,F841
|
||||
58
setup.py
58
setup.py
@ -1,6 +1,8 @@
|
||||
from setuptools import setup, Extension, distutils, Command, find_packages
|
||||
import setuptools.command.build_ext
|
||||
import setuptools.command.install
|
||||
import setuptools.command.develop
|
||||
import setuptools.command.build_py
|
||||
import distutils.unixccompiler
|
||||
import distutils.command.build
|
||||
import distutils.command.clean
|
||||
@ -94,6 +96,28 @@ class build_module(Command):
|
||||
self.run_command('build_ext')
|
||||
|
||||
|
||||
class build_py(setuptools.command.build_py.build_py):
|
||||
|
||||
def run(self):
|
||||
self.create_version_file()
|
||||
setuptools.command.build_py.build_py.run(self)
|
||||
|
||||
@staticmethod
|
||||
def create_version_file():
|
||||
global version, cwd
|
||||
print('-- Building version ' + version)
|
||||
version_path = os.path.join(cwd, 'torch', 'version.py')
|
||||
with open(version_path, 'w') as f:
|
||||
f.write("__version__ = '{}'\n".format(version))
|
||||
|
||||
|
||||
class develop(setuptools.command.develop.develop):
|
||||
|
||||
def run(self):
|
||||
build_py.create_version_file()
|
||||
setuptools.command.develop.develop.run(self)
|
||||
|
||||
|
||||
class build_ext(setuptools.command.build_ext.build_ext):
|
||||
|
||||
def run(self):
|
||||
@ -168,6 +192,7 @@ class clean(distutils.command.clean.clean):
|
||||
################################################################################
|
||||
|
||||
include_dirs = []
|
||||
library_dirs = []
|
||||
extra_link_args = []
|
||||
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
|
||||
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
|
||||
@ -188,7 +213,7 @@ include_dirs += [
|
||||
tmp_install_path + "/include/THNN",
|
||||
]
|
||||
|
||||
extra_link_args.append('-L' + lib_path)
|
||||
library_dirs.append(lib_path)
|
||||
|
||||
# we specify exact lib names to avoid conflict with lua-torch installs
|
||||
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
|
||||
@ -220,14 +245,23 @@ main_sources = [
|
||||
"torch/csrc/Exceptions.cpp",
|
||||
"torch/csrc/Tensor.cpp",
|
||||
"torch/csrc/Storage.cpp",
|
||||
"torch/csrc/DynamicTypes.cpp",
|
||||
"torch/csrc/byte_order.cpp",
|
||||
"torch/csrc/utils.cpp",
|
||||
"torch/csrc/utils/object_ptr.cpp",
|
||||
"torch/csrc/allocators.cpp",
|
||||
"torch/csrc/serialization.cpp",
|
||||
"torch/csrc/autograd/init.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/engine.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/grad_buffer.cpp",
|
||||
"torch/csrc/autograd/python_function.cpp",
|
||||
"torch/csrc/autograd/python_cpp_function.cpp",
|
||||
"torch/csrc/autograd/python_variable.cpp",
|
||||
"torch/csrc/autograd/python_engine.cpp",
|
||||
"torch/csrc/autograd/functions/batch_normalization.cpp",
|
||||
"torch/csrc/autograd/functions/init.cpp",
|
||||
"torch/csrc/nn/THNN_generic.cpp",
|
||||
]
|
||||
|
||||
@ -262,10 +296,11 @@ if WITH_CUDA:
|
||||
break
|
||||
include_dirs.append(cuda_include_path)
|
||||
include_dirs.append(tmp_install_path + "/include/THCUNN")
|
||||
extra_link_args.append('-L' + cuda_lib_path)
|
||||
library_dirs.append(cuda_lib_path)
|
||||
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
|
||||
extra_compile_args += ['-DWITH_CUDA']
|
||||
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
|
||||
main_libraries += ['cudart']
|
||||
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
|
||||
main_sources += [
|
||||
"torch/csrc/cuda/Module.cpp",
|
||||
@ -280,7 +315,7 @@ if WITH_CUDA:
|
||||
if WITH_CUDNN:
|
||||
main_libraries += ['cudnn']
|
||||
include_dirs.append(CUDNN_INCLUDE_DIR)
|
||||
extra_link_args.append('-L' + CUDNN_LIB_DIR)
|
||||
library_dirs.append(CUDNN_LIB_DIR)
|
||||
main_sources += [
|
||||
"torch/csrc/cudnn/BatchNorm.cpp",
|
||||
"torch/csrc/cudnn/Conv.cpp",
|
||||
@ -314,6 +349,7 @@ C = Extension("torch._C",
|
||||
language='c++',
|
||||
extra_compile_args=main_compile_args + extra_compile_args,
|
||||
include_dirs=include_dirs,
|
||||
library_dirs=library_dirs,
|
||||
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
|
||||
)
|
||||
extensions.append(C)
|
||||
@ -352,18 +388,28 @@ if WITH_CUDA:
|
||||
)
|
||||
extensions.append(THCUNN)
|
||||
|
||||
version = "0.1"
|
||||
version = '0.1.10'
|
||||
if os.getenv('PYTORCH_BUILD_VERSION'):
|
||||
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
|
||||
version = os.getenv('PYTORCH_BUILD_VERSION') \
|
||||
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
|
||||
else:
|
||||
try:
|
||||
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
|
||||
version += '+' + sha[:7]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
|
||||
setup(name="torch", version=version,
|
||||
ext_modules=extensions,
|
||||
cmdclass={
|
||||
'build': build,
|
||||
'build_py': build_py,
|
||||
'build_ext': build_ext,
|
||||
'build_deps': build_deps,
|
||||
'build_module': build_module,
|
||||
'develop': develop,
|
||||
'install': install,
|
||||
'clean': clean,
|
||||
},
|
||||
|
||||
117
test/common.py
117
test/common.py
@ -1,13 +1,15 @@
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import unittest
|
||||
import contextlib
|
||||
from functools import wraps
|
||||
from itertools import product
|
||||
from copy import deepcopy
|
||||
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch.autograd import Variable, Function
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
torch.set_default_tensor_type('torch.DoubleTensor')
|
||||
@ -30,6 +32,24 @@ try:
|
||||
except ImportError:
|
||||
TEST_NUMPY = False
|
||||
|
||||
TEST_SCIPY = True
|
||||
try:
|
||||
import scipy
|
||||
except ImportError:
|
||||
TEST_SCIPY = False
|
||||
|
||||
|
||||
def skipIfNoLapack(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
fn(*args, **kwargs)
|
||||
except Exception as e:
|
||||
if 'Lapack library not found' in e.args[0]:
|
||||
raise unittest.SkipTest('Compiled without Lapack')
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
|
||||
def get_cpu_type(t):
|
||||
assert t.__module__ == 'torch.cuda'
|
||||
@ -98,11 +118,18 @@ class TestCase(unittest.TestCase):
|
||||
y = y.data
|
||||
|
||||
if torch.is_tensor(x) and torch.is_tensor(y):
|
||||
max_err = 0
|
||||
super(TestCase, self).assertEqual(x.size(), y.size())
|
||||
for index in iter_indices(x):
|
||||
max_err = max(max_err, abs(x[index] - y[index]))
|
||||
self.assertLessEqual(max_err, prec, message)
|
||||
def assertTensorsEqual(a, b):
|
||||
max_err = 0
|
||||
super(TestCase, self).assertEqual(a.size(), b.size())
|
||||
for index in iter_indices(a):
|
||||
max_err = max(max_err, abs(a[index] - b[index]))
|
||||
self.assertLessEqual(max_err, prec, message)
|
||||
self.assertEqual(x.is_sparse, y.is_sparse, message)
|
||||
if x.is_sparse:
|
||||
assertTensorsEqual(x.indices(), y.indices())
|
||||
assertTensorsEqual(x.values(), y.values())
|
||||
else:
|
||||
assertTensorsEqual(x, y)
|
||||
elif type(x) == str and type(y) == str:
|
||||
super(TestCase, self).assertEqual(x, y)
|
||||
elif is_iterable(x) and is_iterable(y):
|
||||
@ -150,65 +177,23 @@ class TestCase(unittest.TestCase):
|
||||
raise AssertionError("object not found in iterable")
|
||||
|
||||
|
||||
def make_jacobian(input, num_out):
|
||||
if isinstance(input, Variable) and not input.requires_grad:
|
||||
return None
|
||||
if torch.is_tensor(input) or isinstance(input, Variable):
|
||||
return torch.zeros(input.nelement(), num_out)
|
||||
def download_file(url, path, binary=True):
|
||||
if sys.version_info < (3,):
|
||||
import urllib2
|
||||
request = urllib2
|
||||
error = urllib2
|
||||
else:
|
||||
return type(input)(filter(lambda x: x is not None,
|
||||
(make_jacobian(elem, num_out) for elem in input)))
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
request = urllib.request
|
||||
error = urllib.error
|
||||
|
||||
|
||||
def iter_tensors(x, only_requiring_grad=False):
|
||||
if torch.is_tensor(x):
|
||||
yield x
|
||||
elif isinstance(x, Variable):
|
||||
if x.requires_grad or not only_requiring_grad:
|
||||
yield x.data
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_tensors(elem, only_requiring_grad):
|
||||
yield result
|
||||
|
||||
|
||||
def contiguous(input):
|
||||
if torch.is_tensor(input):
|
||||
return input.contiguous()
|
||||
elif isinstance(input, Variable):
|
||||
return input.contiguous()
|
||||
else:
|
||||
return type(input)(contiguous(e) for e in input)
|
||||
|
||||
|
||||
def get_numerical_jacobian(fn, input, target):
|
||||
perturbation = 1e-6
|
||||
# To be able to use .view(-1) input must be contiguous
|
||||
input = contiguous(input)
|
||||
output_size = fn(input).numel()
|
||||
jacobian = make_jacobian(target, output_size)
|
||||
|
||||
# It's much easier to iterate over flattened lists of tensors.
|
||||
# These are reference to the same objects in jacobian, so any changes
|
||||
# will be reflected in it as well.
|
||||
x_tensors = [t for t in iter_tensors(target, True)]
|
||||
j_tensors = [t for t in iter_tensors(jacobian)]
|
||||
|
||||
outa = torch.DoubleTensor(output_size)
|
||||
outb = torch.DoubleTensor(output_size)
|
||||
|
||||
# TODO: compare structure
|
||||
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
|
||||
flat_tensor = x_tensor.view(-1)
|
||||
for i in range(flat_tensor.nelement()):
|
||||
orig = flat_tensor[i]
|
||||
flat_tensor[i] = orig - perturbation
|
||||
outa.copy_(fn(input))
|
||||
flat_tensor[i] = orig + perturbation
|
||||
outb.copy_(fn(input))
|
||||
flat_tensor[i] = orig
|
||||
|
||||
outb.add_(-1, outa).div_(2 * perturbation)
|
||||
d_tensor[i] = outb
|
||||
|
||||
return jacobian
|
||||
if os.path.exists(path):
|
||||
return True
|
||||
try:
|
||||
data = request.urlopen(url, timeout=15).read()
|
||||
with open(path, 'wb' if binary else 'w') as f:
|
||||
f.write(data)
|
||||
return True
|
||||
except error.URLError as e:
|
||||
return False
|
||||
|
||||
@ -2,11 +2,13 @@ import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch.autograd import Variable
|
||||
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
|
||||
from common import TestCase, to_gpu, freeze_rng_state
|
||||
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors, contiguous
|
||||
import torch.backends.cudnn
|
||||
|
||||
# tarfile module tries to obtain a file object name in python 3.3
|
||||
@ -245,6 +247,13 @@ criterion_tests = [
|
||||
input_size=(2, 3, 5, 5),
|
||||
target=torch.rand(2, 5, 5).mul(3).floor().long()
|
||||
),
|
||||
dict(
|
||||
module_name='NLLLoss2d',
|
||||
constructor_args=(torch.rand(3),),
|
||||
input_size=(2, 3, 5, 5),
|
||||
target=torch.rand(2, 5, 5).mul(3).floor().long(),
|
||||
desc='weights'
|
||||
),
|
||||
dict(
|
||||
module_name='HingeEmbeddingLoss',
|
||||
input=torch.rand(10),
|
||||
@ -328,15 +337,19 @@ class NNTestCase(TestCase):
|
||||
|
||||
def _flatten_tensors(self, x):
|
||||
if torch.is_tensor(x):
|
||||
return x.view(-1)
|
||||
if x.is_sparse:
|
||||
return x.to_dense().view(-1)
|
||||
else:
|
||||
return x.view(-1)
|
||||
elif isinstance(x, Variable):
|
||||
return x.data.view(-1)
|
||||
return self._flatten_tensors(x.data)
|
||||
else:
|
||||
return tuple(self._flatten_tensors(a) for a in x)
|
||||
|
||||
def _zero_grad_input(self, input):
|
||||
if isinstance(input, Variable):
|
||||
input.grad.data.zero_()
|
||||
if input.requires_grad and input.grad is not None:
|
||||
input.grad.data.zero_()
|
||||
elif torch.is_tensor(input):
|
||||
return
|
||||
else:
|
||||
@ -400,9 +413,9 @@ class NNTestCase(TestCase):
|
||||
# TODO: enable non-contig tests
|
||||
input = contiguous(input)
|
||||
if jacobian_input:
|
||||
res += get_numerical_jacobian(fw, input, input),
|
||||
res += get_numerical_jacobian(fw, input, input, eps=1e-6),
|
||||
if jacobian_parameters:
|
||||
res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0),
|
||||
res += torch.cat(list(get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param), 0),
|
||||
return res
|
||||
|
||||
def check_jacobian(self, module, input, jacobian_input=True):
|
||||
@ -516,6 +529,8 @@ class ModuleTest(TestBase):
|
||||
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
|
||||
test_case.assertEqual(out, expected_out)
|
||||
|
||||
self.test_noncontig(test_case, module, input)
|
||||
|
||||
# TODO: do this with in-memory files as soon as torch.save will support it
|
||||
with TemporaryFile() as f:
|
||||
test_case._forward(module, input)
|
||||
@ -526,6 +541,51 @@ class ModuleTest(TestBase):
|
||||
|
||||
self._do_test(test_case, module, input)
|
||||
|
||||
def noncontiguize(self, obj):
|
||||
if isinstance(obj, list):
|
||||
return [self.noncontiguize(o) for o in obj]
|
||||
tensor = obj.data if isinstance(obj, Variable) else obj
|
||||
ndim = tensor.dim()
|
||||
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
|
||||
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
|
||||
if isinstance(obj, Variable):
|
||||
return Variable(noncontig, requires_grad=obj.requires_grad)
|
||||
return noncontig
|
||||
|
||||
def test_noncontig(self, test_case, module, input):
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(input)
|
||||
with freeze_rng_state():
|
||||
output = test_case._forward(module, input)
|
||||
grad_output = output
|
||||
if isinstance(grad_output, Variable):
|
||||
grad_output = grad_output.data.clone()
|
||||
else:
|
||||
grad_output = grad_output.clone()
|
||||
output = output.clone()
|
||||
grad_output.normal_()
|
||||
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
|
||||
d_param = deepcopy(test_case._get_parameters(module)[1])
|
||||
|
||||
nc_input = self.noncontiguize(input)
|
||||
nc_grad_output = self.noncontiguize(grad_output)
|
||||
for contig_i, contig_g in product((True, False), repeat=2):
|
||||
i = input if contig_i else nc_input
|
||||
go = grad_output if contig_g else nc_grad_output
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(i)
|
||||
with freeze_rng_state():
|
||||
try:
|
||||
out = test_case._forward(module, i)
|
||||
except Exception:
|
||||
# Some modules will fail because of non contiguous inputs and we're ok with that
|
||||
continue
|
||||
grad = test_case._backward(module, i, out, go)
|
||||
|
||||
test_case.assertEqual(out, output)
|
||||
test_case.assertEqual(grad, d_input, 1e-4)
|
||||
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
|
||||
|
||||
def test_cuda(self, test_case):
|
||||
if not TEST_CUDA or not self.should_test_cuda:
|
||||
raise unittest.SkipTest('Excluded from CUDA tests')
|
||||
@ -536,8 +596,6 @@ class ModuleTest(TestBase):
|
||||
|
||||
cpu_module = self.constructor(*self.constructor_args)
|
||||
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_param = test_case._get_parameters(cpu_module)
|
||||
gpu_param = test_case._get_parameters(gpu_module)
|
||||
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
|
||||
@ -547,6 +605,10 @@ class ModuleTest(TestBase):
|
||||
gpu_p = gpu_p.data
|
||||
gpu_p.copy_(cpu_p)
|
||||
|
||||
test_case._zero_grad_input(cpu_input)
|
||||
test_case._zero_grad_input(gpu_input)
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_output = test_case._forward(cpu_module, cpu_input)
|
||||
gpu_output = test_case._forward(gpu_module, gpu_input)
|
||||
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
|
||||
@ -560,6 +622,8 @@ class ModuleTest(TestBase):
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
|
||||
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
|
||||
|
||||
self.test_noncontig(test_case, gpu_module, gpu_input)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
# TODO: remove this after CUDA scatter_ is implemented
|
||||
|
||||
@ -6,9 +6,9 @@ import torch
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
from collections import OrderedDict
|
||||
from torch.autograd import gradcheck
|
||||
|
||||
from common import make_jacobian, TestCase, iter_tensors, \
|
||||
get_numerical_jacobian, run_tests
|
||||
from common import TestCase, run_tests
|
||||
from torch.autograd._functions import *
|
||||
from torch.autograd import Variable, Function
|
||||
|
||||
@ -20,37 +20,6 @@ else:
|
||||
PRECISION = 1e-4
|
||||
|
||||
|
||||
def iter_gradients(x):
|
||||
if isinstance(x, Variable):
|
||||
if x.requires_grad:
|
||||
yield x.grad.data
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_gradients(elem):
|
||||
yield result
|
||||
|
||||
|
||||
def zero_gradients(i):
|
||||
for t in iter_gradients(i):
|
||||
t.zero_()
|
||||
|
||||
|
||||
def get_analytical_jacobian(input, output):
|
||||
jacobian = make_jacobian(input, output.numel())
|
||||
grad_output = output.data.clone().zero_()
|
||||
flat_grad_output = grad_output.view(-1)
|
||||
|
||||
for i in range(flat_grad_output.numel()):
|
||||
flat_grad_output.zero_()
|
||||
flat_grad_output[i] = 1
|
||||
zero_gradients(input)
|
||||
output.backward(grad_output, retain_variables=True)
|
||||
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
|
||||
jacobian_x[:, i] = d_x
|
||||
|
||||
return jacobian
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def backward_engine(engine):
|
||||
_prev_engine = Variable._execution_engine
|
||||
@ -74,6 +43,7 @@ class TestAutograd(TestCase):
|
||||
counter[0] += inc
|
||||
|
||||
z = x ** 2 + x * 2 + x * y + y
|
||||
x.register_hook(lambda *args: bw_hook(0, *args))
|
||||
test = z.register_hook(lambda *args: bw_hook(1, *args))
|
||||
z.backward(torch.ones(5, 5), retain_variables=True)
|
||||
self.assertEqual(counter[0], 1)
|
||||
@ -158,6 +128,49 @@ class TestAutograd(TestCase):
|
||||
def test_backward(self):
|
||||
self._test_backward()
|
||||
|
||||
def test_sparse_backward(self):
|
||||
class FixedGradientFunction(Function):
|
||||
|
||||
def __init__(self, grad):
|
||||
self.grad = grad
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
def backward(self, grad_x):
|
||||
return self.grad
|
||||
|
||||
size = torch.Size([6, 3, 2])
|
||||
i1 = torch.LongTensor([
|
||||
[0, 3, 4],
|
||||
[0, 2, 2],
|
||||
])
|
||||
v1 = torch.DoubleTensor([[1, 2], [4, 5], [7, 8]])
|
||||
sparse_grad1 = torch.sparse.DoubleTensor(i1, v1, size)
|
||||
i2 = torch.LongTensor([
|
||||
[0, 1, 3, 4],
|
||||
[0, 1, 2, 2],
|
||||
])
|
||||
v2 = torch.DoubleTensor([[1, 2], [4, 3], [4, 5], [7, 8]])
|
||||
sparse_grad2 = torch.sparse.DoubleTensor(i2, v2, size)
|
||||
dense_grad = torch.rand(size).double()
|
||||
sparse_fn1 = FixedGradientFunction(sparse_grad1)
|
||||
sparse_fn2 = FixedGradientFunction(sparse_grad2)
|
||||
dense_fn = FixedGradientFunction(dense_grad)
|
||||
|
||||
# sparse first
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
(sparse_fn1(x) + dense_fn(x) + sparse_fn2(x)).sum().backward()
|
||||
self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
|
||||
# dense first
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
(dense_fn(x) + sparse_fn1(x) + sparse_fn2(x)).sum().backward()
|
||||
self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
|
||||
# sparse only
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
(sparse_fn1(x) + sparse_fn2(x)).sum().backward()
|
||||
self.assertEqual(x.grad.data, sparse_grad1 + sparse_grad2)
|
||||
|
||||
@unittest.skip("BasicEngine is out of date")
|
||||
def test_backward_basic_engine(self):
|
||||
with backward_engine(torch.autograd.engine.BasicEngine):
|
||||
@ -224,14 +237,50 @@ class TestAutograd(TestCase):
|
||||
|
||||
def test_indexing(self):
|
||||
x = torch.range(1, 16).resize_(4, 4)
|
||||
y = Variable(x)
|
||||
self.assertEqual(x[1], y[1].data)
|
||||
self.assertEqual(x[1, 1], y[1, 1].data[0])
|
||||
self.assertEqual(x[1:], y[1:].data)
|
||||
self.assertEqual(x[:2], y[:2].data)
|
||||
self.assertEqual(x[:2, 2], y[:2, 2].data)
|
||||
self.assertEqual(x[1:2, 2], y[1:2, 2].data)
|
||||
self.assertEqual(x[1, 2:], y[1, 2:].data)
|
||||
y = Variable(x, requires_grad=True)
|
||||
|
||||
def check_index(idx):
|
||||
if y.grad is not None:
|
||||
y.grad.data.zero_()
|
||||
indexed_tensor = x[idx]
|
||||
indexed_var = y[idx]
|
||||
|
||||
indexed_var_t = indexed_var.data
|
||||
if not torch.is_tensor(indexed_tensor):
|
||||
indexed_var_t = indexed_var_t[0]
|
||||
self.assertEqual(indexed_tensor, indexed_var)
|
||||
|
||||
indexed_var.sum().backward()
|
||||
expected_grad = torch.zeros(4, 4)
|
||||
expected_grad[idx] = 1
|
||||
self.assertEqual(y.grad.data, expected_grad)
|
||||
|
||||
check_index(1)
|
||||
check_index((1, 1))
|
||||
check_index(slice(1, None))
|
||||
check_index(slice(None, 2))
|
||||
check_index((slice(None, 2), 2))
|
||||
check_index((slice(1, 2), 2))
|
||||
check_index((1, slice(2, None)))
|
||||
check_index((slice(None, None), slice(2, None)))
|
||||
check_index(torch.LongTensor([0, 2]))
|
||||
check_index(torch.rand(4, 4).bernoulli().byte())
|
||||
check_index((Ellipsis, slice(2, None)))
|
||||
|
||||
def test_basic_op_grad(self):
|
||||
"""Grad output might need to be reshaped to match the second argument."""
|
||||
x = Variable(torch.randn(4, 6), requires_grad=True)
|
||||
b = Variable(torch.rand(12, 1) + 1e-2, requires_grad=True)
|
||||
|
||||
def y():
|
||||
# .mm() depends on the grad_output being of correct size
|
||||
return b.mm(Variable(torch.rand(1, 2) + 1e-2))
|
||||
|
||||
(x + y()).sum().backward()
|
||||
(x - y()).sum().backward()
|
||||
(x * y()).sum().backward()
|
||||
(x / y()).sum().backward()
|
||||
(x.abs() ** y()).sum().backward()
|
||||
|
||||
def test_requires_grad(self):
|
||||
x = Variable(torch.randn(5, 5))
|
||||
@ -253,6 +302,53 @@ class TestAutograd(TestCase):
|
||||
y._backward_hooks['test'] = error
|
||||
b.backward(torch.ones(5, 5))
|
||||
|
||||
def test_requires_grad_inplace(self):
|
||||
a = Variable(torch.randn(5, 5))
|
||||
b = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
a += b
|
||||
self.assertTrue(a.requires_grad)
|
||||
|
||||
# non-leaf Variable
|
||||
a = Variable(torch.randn(5, 5)) + 0
|
||||
b = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
a += b
|
||||
self.assertTrue(a.requires_grad)
|
||||
|
||||
def test_duplicate_backward_root(self):
|
||||
a = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
b = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
x = a * b
|
||||
grad_output = x.data.clone().normal_()
|
||||
torch.autograd.backward([x, x], [grad_output, grad_output])
|
||||
|
||||
self.assertEqual(a.grad.data, b.data * grad_output * 2)
|
||||
self.assertEqual(b.grad.data, a.data * grad_output * 2)
|
||||
|
||||
def test_backward_no_grad(self):
|
||||
a = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
b = a + 2
|
||||
with self.assertRaises(RuntimeError):
|
||||
torch.autograd.backward([b], [None])
|
||||
|
||||
def test_previous_functions(self):
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
y = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
a = x + y
|
||||
self.assertIsNotNone(a.creator)
|
||||
previous_functions = a.creator.previous_functions
|
||||
self.assertEqual(len(previous_functions), 2)
|
||||
self.assertIs(previous_functions[0][0], x)
|
||||
self.assertEqual(previous_functions[0][1], 0)
|
||||
self.assertIs(previous_functions[1][0], y)
|
||||
self.assertEqual(previous_functions[1][1], 0)
|
||||
|
||||
b = a + 5
|
||||
previous_functions = b.creator.previous_functions
|
||||
self.assertEqual(len(previous_functions), 1)
|
||||
self.assertIs(previous_functions[0][0], a.creator)
|
||||
|
||||
def test_inplace(self):
|
||||
x = Variable(torch.ones(5, 5), requires_grad=True)
|
||||
y = Variable(torch.ones(5, 5) * 4, requires_grad=True)
|
||||
@ -408,15 +504,31 @@ class TestAutograd(TestCase):
|
||||
y = x * 2
|
||||
y = y.detach()
|
||||
self.assertFalse(y.requires_grad)
|
||||
self.assertFalse(y.creator.requires_grad)
|
||||
self.assertIsNone(y.creator)
|
||||
z = x + y
|
||||
z.sum().backward()
|
||||
# This is an incorrect gradient, but we assume that's what the user
|
||||
# wanted. detach() is an advanced option.
|
||||
self.assertEqual(x.grad.data, torch.ones(10, 10))
|
||||
|
||||
# detach() should preserve volatile flag
|
||||
x = Variable(torch.randn(10, 10), volatile=True)
|
||||
y = x * 2
|
||||
y = y.detach()
|
||||
self.assertTrue(y.volatile)
|
||||
|
||||
# in-place detach
|
||||
x = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
y = Variable(torch.randn(10, 10), requires_grad=True)
|
||||
a = x * 2
|
||||
(y + a).sum().backward(retain_variables=True)
|
||||
a.detach_()
|
||||
self.assertFalse(a.requires_grad)
|
||||
(y + a).sum().backward() # this won't backprop to x
|
||||
self.assertEqual(x.grad.data, torch.ones(10, 10) * 2)
|
||||
self.assertEqual(y.grad.data, torch.ones(10, 10) * 2)
|
||||
|
||||
def test_type_conversions(self):
|
||||
import torch.cuda
|
||||
x = Variable(torch.randn(5, 5))
|
||||
self.assertIs(type(x.float().data), torch.FloatTensor)
|
||||
self.assertIs(type(x.int().data), torch.IntTensor)
|
||||
@ -435,6 +547,15 @@ class TestAutograd(TestCase):
|
||||
self.assertIs(type(x2.data), torch.cuda.FloatTensor)
|
||||
self.assertIs(x2.get_device(), 1)
|
||||
|
||||
def test_isolated_node(self):
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
y = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
a = x + y
|
||||
b = torch.max(a, 1)[1].repeat(1, 5).double()
|
||||
o = (b + a).sum()
|
||||
o.backward()
|
||||
|
||||
def test_return_leaf(self):
|
||||
class Identity(Function):
|
||||
|
||||
@ -609,6 +730,31 @@ class TestAutograd(TestCase):
|
||||
y.sum().backward()
|
||||
self.assertEqual(x.grad.data, x.data.clone().fill_(1))
|
||||
|
||||
def test_reinforce_check(self):
|
||||
x = Variable(torch.randn(5, 5), requires_grad=True)
|
||||
|
||||
# these should be ok
|
||||
y = torch.normal(x)
|
||||
y.reinforce(torch.randn(5, 5))
|
||||
y = torch.normal(x)
|
||||
y.reinforce(2)
|
||||
|
||||
# can't call reinforce on non-stochastic variables
|
||||
self.assertRaises(RuntimeError, lambda: x.reinforce(2))
|
||||
|
||||
# can't call reinforce twice
|
||||
y = torch.normal(x)
|
||||
y.reinforce(2)
|
||||
self.assertRaises(RuntimeError, lambda: y.reinforce(2))
|
||||
|
||||
# check type of reward
|
||||
y = torch.normal(x)
|
||||
self.assertRaises(TypeError, lambda: y.reinforce(torch.randn(5, 5).long()))
|
||||
|
||||
# check size of reward
|
||||
y = torch.normal(x)
|
||||
self.assertRaises(ValueError, lambda: y.reinforce(torch.randn(4, 5)))
|
||||
|
||||
def test_stochastic(self):
|
||||
x = Variable(torch.rand(2, 10), requires_grad=True)
|
||||
stddevs = Variable(torch.rand(2, 10) * 5, requires_grad=True)
|
||||
@ -646,6 +792,18 @@ class TestAutograd(TestCase):
|
||||
|
||||
self.assertGreater(x.grad.data.abs().sum(), 0)
|
||||
|
||||
def test_stochastic_require_grad(self):
|
||||
# This tests a DSD function sequence (D=deterministic, S=stochastic),
|
||||
# where all functions require grad.
|
||||
x = Variable(torch.randn(2, 10), requires_grad=True)
|
||||
y = Variable(torch.randn(2, 10), requires_grad=True)
|
||||
z = torch.normal(x + 2, 2)
|
||||
o = z + y
|
||||
z.reinforce(torch.randn(2, 10))
|
||||
o.sum().backward()
|
||||
self.assertEqual(y.grad.data, torch.ones(2, 10))
|
||||
self.assertGreater(x.grad.data.abs().sum(), 0)
|
||||
|
||||
def test_stochastic_sequence(self):
|
||||
x = Variable(torch.rand(10).clamp_(0, 1), requires_grad=True)
|
||||
b = x.bernoulli()
|
||||
@ -754,7 +912,10 @@ function_tests = [
|
||||
(Index, (slice(0, 3),), (torch.rand(S, S, S),), 'slice'),
|
||||
(Index, ((slice(0, 3), 1),), (torch.rand(S, S, S),), 'slice_index'),
|
||||
(View, (S * S, S), (torch.rand(S, S, S),)),
|
||||
(Expand, ((S, 5, S, 5),), ((S, 1, S, 1),)),
|
||||
(Expand, ((5, S, 5, S, 5),), ((1, S, 1, S, 1),)),
|
||||
(Expand, ((S, S, S),), ((S, 1),), 'new_dim'),
|
||||
(Expand, ((S, S, S),), ((1, S),), 'new_dim_front'),
|
||||
(Expand, ((S, S, S),), ((1,),), 'scalar'),
|
||||
(Exp, (), (torch.rand(S, S, S),)),
|
||||
(Log, (), (torch.rand(S, S, S) + 1e-2,)),
|
||||
(Log1p, (), (torch.rand(S, S, S),)),
|
||||
@ -804,7 +965,7 @@ function_tests = [
|
||||
(Addr, (0.1, 0.4), ((S, M), (S,), (M,)), 'coef'),
|
||||
(Dot, (), ((L,), (L,)),),
|
||||
(Max, (), ((S, S, S),),),
|
||||
(Repeat, (torch.Size([2, 3, 1, 4]),), ((S, S, S, S),)),
|
||||
(Repeat, (torch.Size([2, 3, 1, 2]),), ((S, S, S, S),)),
|
||||
(Min, (), ((S, S, S),),),
|
||||
(Max, (0,), ((S, S, S),), 'dim'),
|
||||
(Min, (0,), ((S, S, S),), 'dim'),
|
||||
@ -819,8 +980,8 @@ function_tests = [
|
||||
(Norm, (3, 0), ((S, S, S),), '3_dim'),
|
||||
(Addcmul, (), ((S, S), (S, S), (S, S))),
|
||||
(Addcmul, (0.6,), ((S, S), (S, S), (S, S)), 'scale'),
|
||||
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 1e-2)),
|
||||
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 1e-2), 'scale'),
|
||||
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 5e-2)),
|
||||
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 5e-2), 'scale'),
|
||||
(IndexAdd, (0,), ((S, S), index_variable(2, S), (2, S))),
|
||||
# (IndexCopy, (0,), ((S, S), index_variable(2, S), (2, S)) ),
|
||||
(IndexFill, (0, 2), ((S, S), index_variable(2, S))),
|
||||
@ -870,8 +1031,10 @@ method_tests = [
|
||||
('t', (1, 2), ()),
|
||||
('view', (S, S, S), (S * S, S),),
|
||||
('view_as', (S, S, S), ((S * S, S),)),
|
||||
('expand', (S, 1, S), (S, S, S)),
|
||||
('expand', (S, 1, 1), (S, S, S)),
|
||||
('expand', (torch.Size([S, 1, S]),), (S, S, S), 'size'),
|
||||
('expand', (S, 1), (S, S, S), 'new_dim'),
|
||||
('expand', (1,), (S, S, S), 'scalar'),
|
||||
('exp', (S, S, S), ()),
|
||||
('log', (S, S, S), ()),
|
||||
('log1p', (S, S, S), ()),
|
||||
@ -973,18 +1136,18 @@ method_tests = [
|
||||
# TODO: clamp with min/max
|
||||
|
||||
|
||||
def create_input(call_args):
|
||||
def create_input(call_args, requires_grad=True):
|
||||
if not isinstance(call_args, tuple):
|
||||
call_args = (call_args,)
|
||||
|
||||
def map_arg(arg):
|
||||
if isinstance(arg, tuple) and not isinstance(arg[0], Variable):
|
||||
return Variable(torch.randn(*arg).double(), requires_grad=True)
|
||||
return Variable(torch.randn(*arg).double(), requires_grad=requires_grad)
|
||||
elif torch.is_tensor(arg):
|
||||
if isinstance(arg, torch.FloatTensor):
|
||||
return Variable(arg.double(), requires_grad=True)
|
||||
return Variable(arg.double(), requires_grad=requires_grad)
|
||||
else:
|
||||
return Variable(arg, requires_grad=True)
|
||||
return Variable(arg, requires_grad=requires_grad)
|
||||
else:
|
||||
return arg
|
||||
return tuple(map_arg(arg) for arg in call_args)
|
||||
@ -1011,26 +1174,12 @@ for test in function_tests:
|
||||
def do_test(self, cls=cls, constructor_args=constructor_args,
|
||||
call_args=call_args, test_name=test_name):
|
||||
input = create_input(call_args)
|
||||
output = cls(*constructor_args)(*input)
|
||||
if not isinstance(output, tuple):
|
||||
output = (output,)
|
||||
for i, o in enumerate(output):
|
||||
if not o.requires_grad:
|
||||
continue
|
||||
analytical = get_analytical_jacobian(input, o)
|
||||
|
||||
def fn(input):
|
||||
tmp = cls(*constructor_args)(*input)
|
||||
if not isinstance(tmp, tuple):
|
||||
tmp = (tmp,)
|
||||
return tmp[i].data
|
||||
numerical = get_numerical_jacobian(fn, input, input)
|
||||
self.assertLessEqual(
|
||||
max(a.add(-1, n).abs().max() for a, n in zip(analytical, numerical)),
|
||||
PRECISION
|
||||
)
|
||||
self.assertEqual(gradcheck(cls(*constructor_args), input, eps=1e-6, atol=PRECISION), True)
|
||||
|
||||
if test_name not in ignore_inplace and issubclass(cls, InplaceFunction):
|
||||
output = cls(*constructor_args)(*input)
|
||||
if not isinstance(output, tuple):
|
||||
output = (output,)
|
||||
inplace_input = deepcopy(input)
|
||||
inplace_input_copy = tuple(i + 0 for i in inplace_input)
|
||||
fn = cls(*constructor_args, inplace=True)
|
||||
@ -1068,8 +1217,8 @@ for test in method_tests:
|
||||
|
||||
def do_test(self, name=name, self_size=self_size, args=args, test_name=test_name):
|
||||
def check(name):
|
||||
self_variable = create_input((self_size,))[0]
|
||||
args_variable = create_input(args)
|
||||
self_variable = create_input((self_size,), requires_grad=False)[0]
|
||||
args_variable = create_input(args, requires_grad=False)
|
||||
self_tensor = deepcopy(self_variable.data)
|
||||
args_tensor = deepcopy(unpack_variables(args_variable))
|
||||
output_variable = getattr(self_variable, name)(*args_variable)
|
||||
|
||||
@ -9,10 +9,11 @@ import torch.cuda.comm as comm
|
||||
|
||||
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests
|
||||
|
||||
HAS_CUDA = True
|
||||
if not torch.cuda.is_available():
|
||||
print('CUDA not available, skipping tests')
|
||||
import sys
|
||||
sys.exit()
|
||||
TestCase = object # noqa: F811
|
||||
HAS_CUDA = False
|
||||
|
||||
|
||||
def is_floating(t):
|
||||
@ -59,6 +60,13 @@ def small_2d_scaled(t, scale=10):
|
||||
return make_tensor(t, S, S).mul(scale)
|
||||
|
||||
|
||||
def small_2d_oneish(t):
|
||||
if is_floating(t):
|
||||
return make_tensor(t, S, S).clamp(min=0.99, max=1.01)
|
||||
else:
|
||||
return t(S, S).fill_(1)
|
||||
|
||||
|
||||
def small_3d(t):
|
||||
return make_tensor(t, S, S, S)
|
||||
|
||||
@ -206,7 +214,7 @@ tests = [
|
||||
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
|
||||
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
|
||||
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
|
||||
('prod', small_3d, lambda t: [],),
|
||||
('prod', small_2d_oneish, lambda t: [],),
|
||||
('prod', small_3d, lambda t: [1], 'dim'),
|
||||
('sum', small_2d, lambda t: [],),
|
||||
('sum', small_3d, lambda t: [1], 'dim'),
|
||||
@ -233,6 +241,7 @@ tests = [
|
||||
('triu', medium_2d, lambda t: [],),
|
||||
('triu', medium_2d, lambda t: [2], 'positive'),
|
||||
('triu', medium_2d, lambda t: [-2], 'negative'),
|
||||
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
|
||||
('view', small_3d, lambda t: [100, 10],),
|
||||
('view_as', small_3d, lambda t: [t(100, 10)],),
|
||||
('zero', small_3d, lambda t: [],),
|
||||
@ -338,21 +347,21 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
|
||||
|
||||
class TestCuda(TestCase):
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_autogpu(self):
|
||||
if torch.cuda.device_count() > 1:
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
with torch.cuda.device(1):
|
||||
z = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(z.get_device(), 1)
|
||||
q = x.add(y)
|
||||
self.assertEqual(q.get_device(), 0)
|
||||
w = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(w.get_device(), 1)
|
||||
z = z.cuda()
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
with torch.cuda.device(1):
|
||||
z = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(z.get_device(), 1)
|
||||
q = x.add(y)
|
||||
self.assertEqual(q.get_device(), 0)
|
||||
w = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(w.get_device(), 1)
|
||||
z = z.cuda()
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_copy_device(self):
|
||||
@ -374,7 +383,7 @@ class TestCuda(TestCase):
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
self.assertIs(z.cuda(0), z)
|
||||
|
||||
def test_serialization(self):
|
||||
def test_serialization_array_with_storage(self):
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.IntTensor(2, 5).fill_(0).cuda()
|
||||
q = [x, y, x, y.storage()]
|
||||
@ -512,6 +521,13 @@ class TestCuda(TestCase):
|
||||
self.assertEqual(x, y)
|
||||
self.assertEqual(torch.cuda.initial_seed(), 2)
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_cat_autogpu(self):
|
||||
x = torch.randn(4, 4).cuda(1)
|
||||
y = torch.randn(4, 4).cuda(1)
|
||||
z = torch.cat([x, y], 0)
|
||||
self.assertEqual(z.get_device(), x.get_device())
|
||||
|
||||
def test_serialization(self):
|
||||
x = torch.randn(4, 4).cuda()
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
@ -522,7 +538,7 @@ class TestCuda(TestCase):
|
||||
self.assertIs(type(x_copy), type(x))
|
||||
self.assertEqual(x_copy.get_device(), x.get_device())
|
||||
|
||||
def test_serialization_empty(self):
|
||||
def test_serialization_array_with_empty(self):
|
||||
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
torch.save(x, f)
|
||||
@ -665,40 +681,67 @@ class TestCuda(TestCase):
|
||||
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
|
||||
self.assertEqual(list(gpu_tensor), [1])
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_caching_pinned_memory_multi_gpu(self):
|
||||
# checks that the events preventing pinned memory from being re-used
|
||||
# too early are recorded on the correct GPU
|
||||
cycles_per_ms = get_cycles_per_ms()
|
||||
|
||||
for decl in tests:
|
||||
for t in types:
|
||||
tensor = t()
|
||||
gpu_tensor = get_gpu_type(t)()
|
||||
if len(decl) == 3:
|
||||
name, constr, arg_constr = decl
|
||||
desc = ''
|
||||
elif len(decl) == 4:
|
||||
name, constr, arg_constr, desc = decl
|
||||
elif len(decl) == 5:
|
||||
name, constr, arg_constr, desc, type_subset = decl
|
||||
if t not in type_subset:
|
||||
continue
|
||||
t = torch.FloatTensor([1]).pin_memory()
|
||||
ptr = t.data_ptr()
|
||||
gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
|
||||
gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
|
||||
|
||||
precision = custom_precision.get(name, TestCuda.precision)
|
||||
for inplace in (True, False):
|
||||
if inplace:
|
||||
name_inner = name + '_'
|
||||
else:
|
||||
name_inner = name
|
||||
if not hasattr(tensor, name_inner):
|
||||
continue
|
||||
if not hasattr(gpu_tensor, name_inner):
|
||||
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
|
||||
name_inner, gpu_tensor.__class__.__name__))
|
||||
continue
|
||||
with torch.cuda.device(1):
|
||||
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
|
||||
gpu_tensor1.copy_(t, async=True)
|
||||
|
||||
test_name = 'test_' + t.__name__ + '_' + name_inner
|
||||
if desc:
|
||||
test_name += '_' + desc
|
||||
del t
|
||||
t = torch.FloatTensor([2]).pin_memory()
|
||||
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
|
||||
|
||||
with torch.cuda.device(0):
|
||||
gpu_tensor0.copy_(t, async=True)
|
||||
|
||||
self.assertEqual(gpu_tensor1[0], 1)
|
||||
self.assertEqual(gpu_tensor0[0], 2)
|
||||
|
||||
|
||||
if HAS_CUDA:
|
||||
for decl in tests:
|
||||
for t in types:
|
||||
tensor = t()
|
||||
gpu_tensor = get_gpu_type(t)()
|
||||
if len(decl) == 3:
|
||||
name, constr, arg_constr = decl
|
||||
desc = ''
|
||||
elif len(decl) == 4:
|
||||
name, constr, arg_constr, desc = decl
|
||||
elif len(decl) == 5:
|
||||
name, constr, arg_constr, desc, type_subset = decl
|
||||
if t not in type_subset:
|
||||
continue
|
||||
|
||||
precision = custom_precision.get(name, TestCuda.precision)
|
||||
for inplace in (True, False):
|
||||
if inplace:
|
||||
name_inner = name + '_'
|
||||
else:
|
||||
name_inner = name
|
||||
if not hasattr(tensor, name_inner):
|
||||
continue
|
||||
if not hasattr(gpu_tensor, name_inner):
|
||||
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
|
||||
name_inner, gpu_tensor.__class__.__name__))
|
||||
continue
|
||||
|
||||
test_name = 'test_' + t.__name__ + '_' + name_inner
|
||||
if desc:
|
||||
test_name += '_' + desc
|
||||
|
||||
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
|
||||
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
|
||||
|
||||
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
|
||||
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
||||
@ -4,7 +4,7 @@ import torch
|
||||
import traceback
|
||||
import unittest
|
||||
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||
from common import TestCase, run_tests
|
||||
from common import TestCase, run_tests, TEST_NUMPY
|
||||
from common_nn import TEST_CUDA
|
||||
|
||||
|
||||
@ -27,8 +27,8 @@ class TestTensorDataset(TestCase):
|
||||
l = torch.randn(15)
|
||||
source = TensorDataset(t, l)
|
||||
for i in range(15):
|
||||
self.assertEqual(t[i:i + 1], source[i][0])
|
||||
self.assertEqual(l[i:i + 1], source[i][1])
|
||||
self.assertEqual(t[i], source[i][0])
|
||||
self.assertEqual(l[i], source[i][1])
|
||||
|
||||
|
||||
class ErrorDataset(Dataset):
|
||||
@ -52,7 +52,7 @@ class TestDataLoader(TestCase):
|
||||
for i, (sample, target) in enumerate(loader):
|
||||
idx = i * batch_size
|
||||
self.assertEqual(sample, self.data[idx:idx + batch_size])
|
||||
self.assertEqual(target, self.labels[idx:idx + batch_size].view(-1, 1))
|
||||
self.assertEqual(target, self.labels[idx:idx + batch_size])
|
||||
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
|
||||
|
||||
def _test_shuffle(self, loader):
|
||||
@ -66,7 +66,7 @@ class TestDataLoader(TestCase):
|
||||
self.assertFalse(found_data[data_point_idx])
|
||||
found_data[data_point_idx] += 1
|
||||
break
|
||||
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
|
||||
self.assertEqual(target, self.labels[data_point_idx])
|
||||
found_labels[data_point_idx] += 1
|
||||
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
|
||||
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
|
||||
@ -123,6 +123,22 @@ class TestDataLoader(TestCase):
|
||||
self.assertTrue(input.is_pinned())
|
||||
self.assertTrue(target.is_pinned())
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
|
||||
def test_numpy(self):
|
||||
import numpy as np
|
||||
|
||||
class TestDataset(torch.utils.data.Dataset):
|
||||
def __getitem__(self, i):
|
||||
return np.ones((2, 3, 4)) * i
|
||||
|
||||
def __len__(self):
|
||||
return 1000
|
||||
|
||||
loader = DataLoader(TestDataset(), batch_size=12)
|
||||
batch = next(iter(loader))
|
||||
self.assertIsInstance(batch, torch.DoubleTensor)
|
||||
self.assertEqual(batch.size(), torch.Size([12, 2, 3, 4]))
|
||||
|
||||
def test_error(self):
|
||||
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
|
||||
|
||||
|
||||
@ -1154,6 +1154,15 @@ class TestNN(NNTestCase):
|
||||
module.__repr__()
|
||||
str(module)
|
||||
|
||||
def test_accUpdateGradParameters(self):
|
||||
module = nn.LookupTable(5, 3)
|
||||
module.weight.fill_(2)
|
||||
input = torch.LongTensor([1, 3])
|
||||
output = module.updateOutput(input)
|
||||
module.backwardUpdate(input, output, 0.1)
|
||||
self.assertEqual(module.weight[0, 0], 2)
|
||||
self.assertEqual(module.weight[3, 0], 1.8)
|
||||
|
||||
def _build_net(self):
|
||||
return (nn.Sequential()
|
||||
.add(nn.Concat(0)
|
||||
|
||||
@ -19,6 +19,7 @@ HAS_SHM_FILES = os.path.isdir('/dev/shm')
|
||||
TEST_CUDA_IPC = torch.cuda.is_available() and \
|
||||
sys.version_info[0] == 3 and \
|
||||
sys.platform != 'darwin'
|
||||
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
|
||||
|
||||
|
||||
def simple_fill(queue, event):
|
||||
@ -79,9 +80,8 @@ def autograd_sharing(queue, ready, master_modified):
|
||||
is_ok = var.data.equal(expected_var)
|
||||
var.data[:] = torch.ones(5, 5)
|
||||
|
||||
if var.grad is not None:
|
||||
is_ok &= var.grad.data.equal(torch.ones(5, 5) * 4)
|
||||
var.grad.data[:] = torch.ones(5, 5)
|
||||
is_ok &= var.grad is None
|
||||
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
|
||||
|
||||
queue.put(is_ok)
|
||||
|
||||
@ -289,6 +289,7 @@ class TestMultiprocessing(TestCase):
|
||||
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
|
||||
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
|
||||
def test_cuda_small_tensors(self):
|
||||
# Check multiple small tensors which will likely use the same
|
||||
# underlying cached allocation
|
||||
@ -357,20 +358,19 @@ class TestMultiprocessing(TestCase):
|
||||
queue = mp.Queue()
|
||||
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
|
||||
p.start()
|
||||
var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
|
||||
queue.put(var)
|
||||
|
||||
ready.wait()
|
||||
var.data[0, 0] = 1000
|
||||
if var.grad is not None:
|
||||
var.grad.data[:] = torch.ones(5, 5) * 4
|
||||
var.grad.data[:] = torch.ones(5, 5) * 4
|
||||
master_modified.set()
|
||||
|
||||
worker_ok = queue.get()
|
||||
self.assertTrue(worker_ok)
|
||||
|
||||
self.assertEqual(var.data, torch.ones(5, 5))
|
||||
if var.grad is not None:
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5))
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
|
||||
p.join()
|
||||
|
||||
def test_variable_sharing(self):
|
||||
|
||||
@ -6,12 +6,10 @@ import torch.cuda
|
||||
|
||||
from common import TestCase, run_tests
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print('CUDA not available, skipping tests')
|
||||
import sys
|
||||
sys.exit()
|
||||
|
||||
nGPUs = torch.cuda.device_count()
|
||||
if nGPUs == 0:
|
||||
print('CUDA not available, skipping tests')
|
||||
TestCase = object # noqa: F811
|
||||
|
||||
|
||||
class TestNCCL(TestCase):
|
||||
|
||||
830
test/test_nn.py
830
test/test_nn.py
File diff suppressed because it is too large
Load Diff
@ -14,13 +14,15 @@ class TestSparse(TestCase):
|
||||
|
||||
@staticmethod
|
||||
def _gen_sparse(d, nnz, with_size):
|
||||
v = torch.randn(nnz)
|
||||
if isinstance(with_size, Number):
|
||||
v = torch.randn(nnz)
|
||||
i = (torch.rand(d, nnz) * with_size).type(torch.LongTensor)
|
||||
x = SparseTensor(i, v)
|
||||
else:
|
||||
v_size = [nnz] + list(with_size[d:])
|
||||
v = torch.randn(*v_size)
|
||||
i = torch.rand(d, nnz) * \
|
||||
torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1)
|
||||
torch.Tensor(with_size[:d]).repeat(nnz, 1).transpose(0, 1)
|
||||
i = i.type(torch.LongTensor)
|
||||
x = SparseTensor(i, v, torch.Size(with_size))
|
||||
|
||||
@ -73,6 +75,33 @@ class TestSparse(TestCase):
|
||||
x.to_dense()
|
||||
self.assertEqual(res, x.to_dense())
|
||||
|
||||
def test_to_dense_hybrid(self):
|
||||
i = torch.LongTensor([
|
||||
[0, 1, 2, 2],
|
||||
[0, 0, 0, 3],
|
||||
])
|
||||
v = torch.Tensor([[2, 3], [1, 2], [3, 4], [4, 5]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 2]))
|
||||
res = torch.Tensor([
|
||||
[[2, 3],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0]],
|
||||
[[1, 2],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0]],
|
||||
[[3, 4],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[4, 5]],
|
||||
])
|
||||
|
||||
x.to_dense() # Tests double to_dense for memory corruption
|
||||
x.to_dense()
|
||||
x.to_dense()
|
||||
self.assertEqual(res, x.to_dense())
|
||||
|
||||
def test_contig(self):
|
||||
i = torch.LongTensor([
|
||||
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
|
||||
@ -126,6 +155,65 @@ class TestSparse(TestCase):
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
def test_contig_hybrid(self):
|
||||
i = torch.LongTensor([
|
||||
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
|
||||
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
|
||||
])
|
||||
v = torch.Tensor([
|
||||
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
|
||||
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
|
||||
])
|
||||
x = SparseTensor(i, v, torch.Size([100, 100, 2]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
|
||||
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
|
||||
])
|
||||
exp_v = torch.Tensor([
|
||||
[2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
|
||||
[3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
|
||||
])
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
i = torch.LongTensor([
|
||||
[2, 0, 2, 1],
|
||||
[0, 0, 3, 0],
|
||||
[1, 0, 4, 0],
|
||||
])
|
||||
v = torch.Tensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 1, 2, 2],
|
||||
[0, 0, 0, 3],
|
||||
[0, 0, 1, 4],
|
||||
])
|
||||
exp_v = torch.Tensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]])
|
||||
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
# Duplicate indices
|
||||
i = torch.LongTensor([
|
||||
[0, 0, 2, 0],
|
||||
[0, 0, 3, 0],
|
||||
[0, 0, 4, 0],
|
||||
])
|
||||
v = torch.Tensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 2],
|
||||
[0, 3],
|
||||
[0, 4],
|
||||
])
|
||||
exp_v = torch.Tensor([[6, 4, 5], [4, 3, 4]])
|
||||
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
def test_transpose(self):
|
||||
x = self._gen_sparse(4, 20, 5)[0]
|
||||
y = x.to_dense()
|
||||
@ -187,33 +275,97 @@ class TestSparse(TestCase):
|
||||
test_shape(1000, 100, 100)
|
||||
test_shape(3000, 64, 300)
|
||||
|
||||
def _test_spadd_shape(self, shape_i, shape_v=None):
|
||||
shape = shape_i + (shape_v or [])
|
||||
x, _, _ = self._gen_sparse(len(shape_i), 10, shape)
|
||||
y = torch.randn(*shape)
|
||||
r = random.random()
|
||||
|
||||
expected = y + r * x.to_dense()
|
||||
res = torch.add(y, r, x)
|
||||
|
||||
self.assertEqual(res, expected)
|
||||
|
||||
# Non contiguous dense tensor
|
||||
s = list(shape)
|
||||
s[0] = shape[-1]
|
||||
s[-1] = shape[0]
|
||||
y = torch.randn(*s).transpose_(0, len(s) - 1)
|
||||
r = random.random()
|
||||
|
||||
expected = y + r * x.to_dense()
|
||||
res = torch.add(y, r, x)
|
||||
|
||||
self.assertEqual(res, expected)
|
||||
|
||||
def test_spadd(self):
|
||||
def test_shape(*shape):
|
||||
x, _, _ = self._gen_sparse(len(shape), 10, shape)
|
||||
y = torch.randn(*shape)
|
||||
r = random.random()
|
||||
self._test_spadd_shape([5, 6])
|
||||
self._test_spadd_shape([10, 10, 10])
|
||||
self._test_spadd_shape([50, 30, 20])
|
||||
self._test_spadd_shape([5, 5, 5, 5, 5, 5])
|
||||
|
||||
expected = y + r * x.to_dense()
|
||||
res = torch.add(y, r, x)
|
||||
def test_spadd_hybrid(self):
|
||||
self._test_spadd_shape([5, 6], [2, 3])
|
||||
self._test_spadd_shape([10, 10, 10], [3])
|
||||
self._test_spadd_shape([50, 30, 20], [2])
|
||||
self._test_spadd_shape([5, 5, 5, 5, 5, 5], [2])
|
||||
|
||||
self.assertEqual(res, expected)
|
||||
def _test_basic_ops_shape(self, shape_i, shape_v=None):
|
||||
shape = shape_i + (shape_v or [])
|
||||
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape)
|
||||
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape)
|
||||
|
||||
# Non contiguous dense tensor
|
||||
s = list(shape)
|
||||
s[0] = shape[-1]
|
||||
s[-1] = shape[0]
|
||||
y = torch.randn(*s).transpose_(0, len(s) - 1)
|
||||
r = random.random()
|
||||
y1 = x1 + x2
|
||||
y2 = x1.clone()
|
||||
y2.add_(x2)
|
||||
expected = x1.to_dense() + x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
expected = y + r * x.to_dense()
|
||||
res = torch.add(y, r, x)
|
||||
y1 = x1 - x2
|
||||
y2 = x1.clone()
|
||||
y2.sub_(x2)
|
||||
expected = x1.to_dense() - x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
self.assertEqual(res, expected)
|
||||
y1 = x1 * x2
|
||||
y2 = x1.clone()
|
||||
y2.mul_(x2)
|
||||
expected = x1.to_dense() * x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
test_shape(5, 6)
|
||||
test_shape(10, 10, 10)
|
||||
test_shape(50, 30, 20)
|
||||
test_shape(5, 5, 5, 5, 5, 5)
|
||||
y1 = x1 * 37.5
|
||||
y2 = x1.clone()
|
||||
y2.mul_(37.5)
|
||||
expected = x1.to_dense() * 37.5
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y1 = x1 / 37.5
|
||||
y2 = x1.clone()
|
||||
y2.div_(37.5)
|
||||
expected = x1.to_dense() / 37.5
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y = x1.clone()
|
||||
y.zero_()
|
||||
expected = torch.zeros(x1.size())
|
||||
self.assertEqual(y.to_dense(), expected)
|
||||
|
||||
def test_basic_ops(self):
|
||||
self._test_basic_ops_shape([5, 6])
|
||||
self._test_basic_ops_shape([10, 10, 10])
|
||||
self._test_basic_ops_shape([50, 30, 20])
|
||||
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5])
|
||||
|
||||
def test_basic_ops_hybrid(self):
|
||||
self._test_basic_ops_shape([5, 6], [2, 3])
|
||||
self._test_basic_ops_shape([10, 10, 10], [3])
|
||||
self._test_basic_ops_shape([50, 30, 20], [2])
|
||||
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5], [2])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
import random
|
||||
import torch
|
||||
@ -6,9 +7,8 @@ import torch.cuda
|
||||
import tempfile
|
||||
import unittest
|
||||
import warnings
|
||||
from itertools import product, chain
|
||||
from functools import wraps
|
||||
from common import TestCase, iter_indices, TEST_NUMPY, run_tests
|
||||
from itertools import product, combinations
|
||||
from common import TestCase, iter_indices, TEST_NUMPY, run_tests, download_file, skipIfNoLapack
|
||||
|
||||
if TEST_NUMPY:
|
||||
import numpy as np
|
||||
@ -16,18 +16,6 @@ if TEST_NUMPY:
|
||||
SIZE = 100
|
||||
|
||||
|
||||
def skipIfNoLapack(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
fn(*args, **kwargs)
|
||||
except Exception as e:
|
||||
if 'Lapack library not found' in e.args[0]:
|
||||
raise unittest.SkipTest('Compiled without Lapack')
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
|
||||
class TestTorch(TestCase):
|
||||
|
||||
def test_dot(self):
|
||||
@ -797,9 +785,11 @@ class TestTorch(TestCase):
|
||||
def assertIsOrdered(self, order, x, mxx, ixx, task):
|
||||
SIZE = 4
|
||||
if order == 'descending':
|
||||
check_order = lambda a, b: a >= b
|
||||
def check_order(a, b):
|
||||
return a >= b
|
||||
elif order == 'ascending':
|
||||
check_order = lambda a, b: a <= b
|
||||
def check_order(a, b):
|
||||
return a <= b
|
||||
else:
|
||||
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
|
||||
|
||||
@ -1652,7 +1642,7 @@ class TestTorch(TestCase):
|
||||
self._test_conv_corr_eq(lambda x, k: torch.xcorr3(x, k), reference)
|
||||
|
||||
@unittest.skip("Not implemented yet")
|
||||
def test_xcorr3_xcorr2_eq(self):
|
||||
def test_xcorr3_xcorr2_eq_full(self):
|
||||
def reference(x, k, o3, o32):
|
||||
for i in range(x.size(1)):
|
||||
for j in range(k.size(1)):
|
||||
@ -1660,7 +1650,7 @@ class TestTorch(TestCase):
|
||||
self._test_conv_corr_eq(lambda x, k: torch.xcorr3(x, k, 'F'), reference)
|
||||
|
||||
@unittest.skip("Not implemented yet")
|
||||
def test_conv3_conv2_eq(self):
|
||||
def test_conv3_conv2_eq_valid(self):
|
||||
def reference(x, k, o3, o32):
|
||||
for i in range(o3.size(1)):
|
||||
for j in range(k.size(1)):
|
||||
@ -1867,7 +1857,7 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(reference[2, 2, 2], 27, 0)
|
||||
self.assertEqual(reference[:], self._consecutive((3, 3, 3)), 0)
|
||||
|
||||
# Check Ellipsis
|
||||
# indexing with Ellipsis
|
||||
self.assertEqual(reference[..., 2], torch.Tensor([[3, 6, 9],
|
||||
[12, 15, 18],
|
||||
[21, 24, 27]]), 0)
|
||||
@ -1879,18 +1869,61 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(reference[2, ..., 2, 2], 27, 0)
|
||||
self.assertEqual(reference[2, 2, ..., 2], 27, 0)
|
||||
self.assertEqual(reference[2, 2, 2, ...], 27, 0)
|
||||
|
||||
# LongTensor indexing
|
||||
reference = self._consecutive((5, 5, 5))
|
||||
idx = torch.LongTensor([2, 4])
|
||||
self.assertEqual(reference[idx], torch.stack([reference[2], reference[4]]))
|
||||
self.assertEqual(reference[2, idx], torch.stack([reference[2, 2], reference[2, 4]]))
|
||||
self.assertEqual(reference[3, idx, 1], torch.stack([reference[3, 2], reference[3, 4]])[:, 1])
|
||||
self.assertEqual(reference[...], reference, 0)
|
||||
|
||||
reference_5d = self._consecutive((3, 3, 3, 3, 3))
|
||||
self.assertEqual(reference_5d[..., 1, 0], reference_5d[:, :, :, 1, 0], 0)
|
||||
self.assertEqual(reference_5d[2, ..., 1, 0], reference_5d[2, :, :, 1, 0], 0)
|
||||
self.assertEqual(reference_5d[2, 1, 0, ..., 1], reference_5d[2, 1, 0, :, 1], 0)
|
||||
self.assertEqual(reference_5d[...], reference_5d, 0)
|
||||
|
||||
# LongTensor indexing
|
||||
reference = self._consecutive((5, 5, 5))
|
||||
idx = torch.LongTensor([2, 4])
|
||||
self.assertEqual(reference[idx], torch.stack([reference[2], reference[4]]))
|
||||
# TODO: enable one indexing is implemented like in numpy
|
||||
# self.assertEqual(reference[2, idx], torch.stack([reference[2, 2], reference[2, 4]]))
|
||||
# self.assertEqual(reference[3, idx, 1], torch.stack([reference[3, 2], reference[3, 4]])[:, 1])
|
||||
|
||||
# None indexing
|
||||
self.assertEqual(reference[2, None], reference[2].unsqueeze(0))
|
||||
self.assertEqual(reference[2, None, None], reference[2].unsqueeze(0).unsqueeze(0))
|
||||
self.assertEqual(reference[2:4, None], reference[2:4].unsqueeze(1))
|
||||
self.assertEqual(reference[None, 2, None, None], reference.unsqueeze(0)[:, 2].unsqueeze(0).unsqueeze(0))
|
||||
self.assertEqual(reference[None, 2:5, None, None], reference.unsqueeze(0)[:, 2:5].unsqueeze(2).unsqueeze(2))
|
||||
|
||||
# indexing with step
|
||||
reference = self._consecutive((10, 10, 10))
|
||||
self.assertEqual(reference[1:5:2], torch.stack([reference[1], reference[3]], 0))
|
||||
self.assertEqual(reference[1:6:2], torch.stack([reference[1], reference[3], reference[5]], 0))
|
||||
self.assertEqual(reference[1:9:4], torch.stack([reference[1], reference[5]], 0))
|
||||
self.assertEqual(reference[2:4, 1:5:2], torch.stack([reference[2:4, 1], reference[2:4, 3]], 1))
|
||||
self.assertEqual(reference[3, 1:6:2], torch.stack([reference[3, 1], reference[3, 3], reference[3, 5]], 0))
|
||||
self.assertEqual(reference[None, 2, 1:9:4], torch.stack([reference[2, 1], reference[2, 5]], 0).unsqueeze(0))
|
||||
self.assertEqual(reference[:, 2, 1:6:2],
|
||||
torch.stack([reference[:, 2, 1], reference[:, 2, 3], reference[:, 2, 5]], 1))
|
||||
|
||||
lst = [list(range(i, i + 10)) for i in range(0, 100, 10)]
|
||||
tensor = torch.DoubleTensor(lst)
|
||||
for i in range(100):
|
||||
idx1_start = random.randrange(10)
|
||||
idx1_end = idx1_start + random.randrange(1, 10 - idx1_start + 1)
|
||||
idx1_step = random.randrange(1, 8)
|
||||
idx1 = slice(idx1_start, idx1_end, idx1_step)
|
||||
if random.randrange(2) == 0:
|
||||
idx2_start = random.randrange(10)
|
||||
idx2_end = idx2_start + random.randrange(1, 10 - idx2_start + 1)
|
||||
idx2_step = random.randrange(1, 8)
|
||||
idx2 = slice(idx2_start, idx2_end, idx2_step)
|
||||
lst_indexed = list(map(lambda l: l[idx2], lst[idx1]))
|
||||
tensor_indexed = tensor[idx1, idx2]
|
||||
else:
|
||||
lst_indexed = lst[idx1]
|
||||
tensor_indexed = tensor[idx1]
|
||||
self.assertEqual(torch.DoubleTensor(lst_indexed), tensor_indexed)
|
||||
|
||||
self.assertRaises(ValueError, lambda: reference[1:9:0])
|
||||
self.assertRaises(ValueError, lambda: reference[1:9:-1])
|
||||
|
||||
self.assertRaises(IndexError, lambda: reference[1, 1, 1, 1])
|
||||
self.assertRaises(IndexError, lambda: reference[1, 1, 1, 1:1])
|
||||
@ -1920,6 +1953,7 @@ class TestTorch(TestCase):
|
||||
checkPartialAssign((0, 1))
|
||||
checkPartialAssign((1, 2))
|
||||
checkPartialAssign((0, 2))
|
||||
checkPartialAssign(torch.LongTensor((0, 2)))
|
||||
|
||||
with self.assertRaises(IndexError):
|
||||
reference[1, 1, 1, 1] = 1
|
||||
@ -1940,10 +1974,8 @@ class TestTorch(TestCase):
|
||||
with self.assertRaises(TypeError):
|
||||
reference[0.0, :, 0.0] = 1
|
||||
|
||||
# LongTensor assignments are not supported yet
|
||||
with self.assertRaises(RuntimeError):
|
||||
reference[torch.LongTensor([2, 4])] = 1
|
||||
with self.assertRaises(RuntimeError):
|
||||
# LongTensor assignments are not fully supported yet
|
||||
with self.assertRaises(TypeError):
|
||||
reference[0, torch.LongTensor([2, 4])] = 1
|
||||
|
||||
def test_index_copy(self):
|
||||
@ -2152,15 +2184,35 @@ class TestTorch(TestCase):
|
||||
self.assertEqual((tensor_view - tensor).abs().max(), 0)
|
||||
self.assertEqual(empty.view_as(empty), empty)
|
||||
self.assertEqual(empty.view(0), empty)
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(15, 0))
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
|
||||
self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
|
||||
|
||||
def test_expand(self):
|
||||
result = torch.Tensor()
|
||||
tensor = torch.rand(8, 1)
|
||||
template = torch.rand(8, 5)
|
||||
tensor = torch.rand(1, 8, 1)
|
||||
tensor2 = torch.rand(5)
|
||||
template = torch.rand(4, 8, 5)
|
||||
target = template.size()
|
||||
self.assertEqual(tensor.expand_as(template).size(), target)
|
||||
self.assertEqual(tensor.expand(8, 5).size(), target)
|
||||
self.assertEqual(tensor.expand(torch.Size([8, 5])).size(), target)
|
||||
self.assertEqual(tensor.expand(4, 8, 5).size(), target)
|
||||
self.assertEqual(tensor.expand(target).size(), target)
|
||||
self.assertEqual(tensor2.expand_as(template).size(), target)
|
||||
self.assertEqual(tensor2.expand(4, 8, 5).size(), target)
|
||||
self.assertEqual(tensor2.expand(target).size(), target)
|
||||
|
||||
# test double expand
|
||||
self.assertEqual(tensor2.expand(1, 5).expand(2, 2, 5), tensor2.repeat(2, 2, 1))
|
||||
|
||||
# test non-contiguous
|
||||
noncontig = torch.randn(5, 2, 1, 3)[:, 0]
|
||||
assert not noncontig.is_contiguous()
|
||||
self.assertEqual(noncontig.expand(2, 5, 4, 3), noncontig.contiguous().repeat(2, 1, 4, 1))
|
||||
|
||||
# make sure it's compatible with unsqueeze
|
||||
expanded = tensor2.expand(1, 1, 5)
|
||||
unsqueezed = tensor2.unsqueeze(0).unsqueeze(1)
|
||||
self.assertEqual(expanded, unsqueezed)
|
||||
self.assertEqual(expanded.stride(), unsqueezed.stride())
|
||||
|
||||
def test_repeat(self):
|
||||
result = torch.Tensor()
|
||||
@ -2425,7 +2477,9 @@ class TestTorch(TestCase):
|
||||
a_clone = a.clone()
|
||||
b = copy(a)
|
||||
b.fill_(1)
|
||||
self.assertEqual(a, a_clone)
|
||||
# copy is a shallow copy, only copies the tensor view,
|
||||
# not the data
|
||||
self.assertEqual(a, b)
|
||||
|
||||
def test_pickle(self):
|
||||
if sys.version_info[0] == 2:
|
||||
@ -2497,6 +2551,11 @@ class TestTorch(TestCase):
|
||||
b = [a[i % 2] for i in range(4)]
|
||||
b += [a[0].storage()]
|
||||
b += [a[0].storage()[1:4]]
|
||||
b += [torch.range(1, 10).int()]
|
||||
t1 = torch.FloatTensor().set_(a[0].storage()[1:4], 0, (3,), (1,))
|
||||
t2 = torch.FloatTensor().set_(a[0].storage()[1:4], 0, (3,), (1,))
|
||||
b += [(t1.storage(), t1.storage(), t2.storage())]
|
||||
b += [a[0].storage()[0:2]]
|
||||
for use_name in (False, True):
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
handle = f if not use_name else f.name
|
||||
@ -2516,6 +2575,89 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(c[1], c[3], 0)
|
||||
self.assertEqual(c[4], c[5][1:4], 0)
|
||||
|
||||
# check that serializing the same storage view object unpickles
|
||||
# it as one object not two (and vice versa)
|
||||
views = c[7]
|
||||
self.assertEqual(views[0]._cdata, views[1]._cdata)
|
||||
self.assertEqual(views[0], views[2])
|
||||
self.assertNotEqual(views[0]._cdata, views[2]._cdata)
|
||||
|
||||
rootview = c[8]
|
||||
self.assertEqual(rootview.data_ptr(), c[0].data_ptr())
|
||||
|
||||
def test_half_tensor(self):
|
||||
x = torch.randn(5, 5).float()
|
||||
y = torch.randn(5, 5).float()
|
||||
xh, yh = x.half(), y.half()
|
||||
|
||||
self.assertEqual(x.half().float(), x, 1e-3)
|
||||
|
||||
z = torch.Tensor(5, 5)
|
||||
self.assertEqual(z.copy_(xh), x, 1e-3)
|
||||
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
torch.save(xh, f)
|
||||
f.seek(0)
|
||||
xh2 = torch.load(f)
|
||||
self.assertEqual(xh, xh2)
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
|
||||
def test_half_tensor_cuda(self):
|
||||
x = torch.randn(5, 5).half()
|
||||
self.assertEqual(x.cuda().cpu(), x)
|
||||
|
||||
xc = x.cuda()
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
torch.save(xc, f)
|
||||
f.seek(0)
|
||||
xc2 = torch.load(f)
|
||||
self.assertIsInstance(xc2, type(xc))
|
||||
self.assertEqual(xc, xc2)
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
|
||||
def test_serialization_cuda(self):
|
||||
device_count = torch.cuda.device_count()
|
||||
t0 = torch.cuda.FloatTensor(5).fill_(1)
|
||||
torch.cuda.set_device(device_count - 1)
|
||||
tn = torch.cuda.FloatTensor(3).fill_(2)
|
||||
torch.cuda.set_device(0)
|
||||
b = (t0, tn)
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
torch.save(b, f)
|
||||
f.seek(0)
|
||||
c = torch.load(f)
|
||||
self.assertEqual(b, c, 0)
|
||||
u0, un = c
|
||||
self.assertEqual(u0.get_device(), 0)
|
||||
self.assertEqual(un.get_device(), device_count - 1)
|
||||
|
||||
def test_serialization_backwards_compat(self):
|
||||
a = [torch.range(1 + i, 25 + i).view(5, 5).float() for i in range(2)]
|
||||
b = [a[i % 2] for i in range(4)]
|
||||
b += [a[0].storage()]
|
||||
b += [a[0].storage()[1:4]]
|
||||
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_serialized.pt'
|
||||
data_dir = os.path.join(os.path.dirname(__file__), 'data')
|
||||
test_file_path = os.path.join(data_dir, 'legacy_serialized.pt')
|
||||
succ = download_file(DATA_URL, test_file_path)
|
||||
if not succ:
|
||||
warnings.warn(("Couldn't download the test file for backwards compatibility! "
|
||||
"Tests will be incomplete!"), RuntimeWarning)
|
||||
return
|
||||
c = torch.load(test_file_path)
|
||||
self.assertEqual(b, c, 0)
|
||||
self.assertTrue(isinstance(c[0], torch.FloatTensor))
|
||||
self.assertTrue(isinstance(c[1], torch.FloatTensor))
|
||||
self.assertTrue(isinstance(c[2], torch.FloatTensor))
|
||||
self.assertTrue(isinstance(c[3], torch.FloatTensor))
|
||||
self.assertTrue(isinstance(c[4], torch.FloatStorage))
|
||||
c[0].fill_(10)
|
||||
self.assertEqual(c[0], c[2], 0)
|
||||
self.assertEqual(c[4], torch.FloatStorage(25).fill_(10), 0)
|
||||
c[1].fill_(20)
|
||||
self.assertEqual(c[1], c[3], 0)
|
||||
self.assertEqual(c[4], c[5][1:4], 0)
|
||||
|
||||
def test_serialization_container(self):
|
||||
def import_module(name, filename):
|
||||
if sys.version_info >= (3, 5):
|
||||
@ -2600,6 +2742,8 @@ class TestTorch(TestCase):
|
||||
y = x.clone().unsqueeze_(2)
|
||||
self.assertEqual(y, x.contiguous().view(2, 4, 1))
|
||||
|
||||
self.assertRaises(RuntimeError, lambda: torch.Tensor().unsqueeze(0))
|
||||
|
||||
def test_iter(self):
|
||||
x = torch.randn(5, 5)
|
||||
for i, sub in enumerate(x):
|
||||
@ -2724,6 +2868,7 @@ class TestTorch(TestCase):
|
||||
np.float,
|
||||
np.int64,
|
||||
np.int32,
|
||||
np.int16,
|
||||
np.uint8
|
||||
]
|
||||
for dtype in dtypes:
|
||||
@ -2835,8 +2980,30 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(x[0], 1)
|
||||
self.assertEqual(x[1], 2)
|
||||
self.assertEqual(x[2], 3)
|
||||
self.assertEqual(len(x), 3)
|
||||
self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3)))
|
||||
|
||||
self.assertIsInstance(x * 2, torch.Size)
|
||||
self.assertIsInstance(x[:-1], torch.Size)
|
||||
self.assertIsInstance(x + x, torch.Size)
|
||||
|
||||
def test_transpose_neg(self):
|
||||
x = torch.randn(10, 20, 30)
|
||||
ndim = 3
|
||||
|
||||
for i, j in combinations(range(ndim), 2):
|
||||
a = x.transpose(i, j)
|
||||
b = x.transpose(i - ndim, j - ndim)
|
||||
self.assertEqual(a, b)
|
||||
|
||||
a = torch.transpose(x, i, j)
|
||||
b = torch.transpose(x, i - ndim, j - ndim)
|
||||
self.assertEqual(a, b)
|
||||
|
||||
a = x.clone()
|
||||
x.transpose_(i, j)
|
||||
x.transpose_(i - ndim, j - ndim)
|
||||
self.assertEqual(a, x)
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
||||
@ -6,7 +6,6 @@ import shutil
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
import sys
|
||||
import traceback
|
||||
import torch
|
||||
import torch.cuda
|
||||
@ -19,7 +18,7 @@ from torch.utils.serialization import load_lua
|
||||
|
||||
HAS_CUDA = torch.cuda.is_available()
|
||||
|
||||
from common import TestCase, run_tests
|
||||
from common import TestCase, run_tests, download_file
|
||||
|
||||
try:
|
||||
import cffi
|
||||
@ -296,35 +295,13 @@ class TestLuaReader(TestCase):
|
||||
self.assertEqual(grad_input, test['grad_input'])
|
||||
return do_test
|
||||
|
||||
@classmethod
|
||||
def _download_data(cls, test_file_path):
|
||||
if os.path.exists(test_file_path):
|
||||
return
|
||||
print('Downloading test file for TestLuaReader.')
|
||||
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
|
||||
urllib = cls._get_urllib('request')
|
||||
data = urllib.urlopen(DATA_URL, timeout=15).read()
|
||||
with open(test_file_path, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
@staticmethod
|
||||
def _get_urllib(submodule):
|
||||
if sys.version_info < (3,):
|
||||
import urllib2
|
||||
return urllib2
|
||||
else:
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
return getattr(urllib, submodule)
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
|
||||
data_dir = os.path.join(os.path.dirname(__file__), 'data')
|
||||
test_file_path = os.path.join(data_dir, 'legacy_modules.t7')
|
||||
urllib = cls._get_urllib('error')
|
||||
try:
|
||||
cls._download_data(test_file_path)
|
||||
except urllib.URLError as e:
|
||||
succ = download_file(DATA_URL, test_file_path)
|
||||
if not succ:
|
||||
warnings.warn(("Couldn't download the test file for TestLuaReader! "
|
||||
"Tests will be incomplete!"), RuntimeWarning)
|
||||
return
|
||||
|
||||
@ -65,7 +65,7 @@ void $name($args)
|
||||
'THCTensor*': 'thpp::Tensor*',
|
||||
'THIndexTensor*': 'thpp::Tensor*',
|
||||
'THIndex_t': 'long',
|
||||
'real': 'double',
|
||||
'accreal': 'double',
|
||||
}
|
||||
|
||||
def __init__(self, header=False):
|
||||
|
||||
@ -53,9 +53,9 @@ class KwargsPlugin(CWrapPlugin):
|
||||
name not in seen_args):
|
||||
seen_args.add(name)
|
||||
args.append(name)
|
||||
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args])
|
||||
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(a) for a in args])
|
||||
lookups = '\n '.join(
|
||||
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args])
|
||||
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=a) for a in args])
|
||||
start_idx = code.find('{') + 1
|
||||
new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups)
|
||||
return code[:start_idx] + new_code + code[start_idx:]
|
||||
|
||||
@ -18,6 +18,7 @@ class THPPlugin(CWrapPlugin):
|
||||
|
||||
'THCudaTensor*': Template('((THCPFloatTensor*)$arg)->cdata'),
|
||||
'THCudaDoubleTensor*': Template('((THCPDoubleTensor*)$arg)->cdata'),
|
||||
'THCudaLongTensor*': Template('((THCPLongTensor*)$arg)->cdata'),
|
||||
|
||||
'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'),
|
||||
'THSDoubleTensor*': Template('((THSPDoubleTensor*)$arg)->cdata'),
|
||||
@ -53,6 +54,7 @@ class THPPlugin(CWrapPlugin):
|
||||
|
||||
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
|
||||
'THCudaDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPDoubleTensorClass'),
|
||||
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
|
||||
|
||||
'THSDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPDoubleTensorClass'),
|
||||
'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'),
|
||||
@ -84,6 +86,7 @@ class THPPlugin(CWrapPlugin):
|
||||
'THSTensor*': Template('return THSPTensor_(New)($result);'),
|
||||
'THLongTensor*': Template('return THPLongTensor_New($result);'),
|
||||
'THLongStorage*': Template('return THPLongStorage_New($result);'),
|
||||
'THCudaLongTensor*': Template('return THCPLongTensor_New($result);'),
|
||||
# TODO: make it smarter - it should return python long if result doesn't fit into an int
|
||||
'long': Template('return PyInt_FromLong($result);'),
|
||||
'accreal': Template('return THPUtils_(newAccreal)($result);'),
|
||||
@ -167,6 +170,7 @@ ${cpu}
|
||||
'THDoubleTensor*': '" THPModuleStr "DoubleTensor',
|
||||
'THCudaTensor*': 'torch.cuda.FloatTensor',
|
||||
'THCudaDoubleTensor*': 'torch.cuda.DoubleTensor',
|
||||
'THCudaLongTensor*': 'torch.cuda.LongTensor',
|
||||
'THSize*': 'torch.Size',
|
||||
'THStride*': 'tuple',
|
||||
'long': 'int',
|
||||
@ -303,8 +307,6 @@ ${cpu}
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
new_declarations = []
|
||||
register_only = [d for d in declarations if d.get('only_register', False)]
|
||||
declarations = [d for d in declarations if not d.get('only_register', False)]
|
||||
|
||||
def has_arg_type(declaration, type_name):
|
||||
return any(arg['type'] == type_name
|
||||
@ -322,8 +324,16 @@ ${cpu}
|
||||
for arg in option['arguments'])
|
||||
|
||||
for declaration in declarations:
|
||||
# Disable all methods for THHalfTensor, unless cpu_half is True
|
||||
if not declaration.get('cpu_half', False):
|
||||
defined_if = '!defined(TH_REAL_IS_HALF)'
|
||||
if 'defined_if' in declaration:
|
||||
defined_if += ' && (' + declaration['defined_if'] + ')'
|
||||
declaration['defined_if'] = defined_if
|
||||
|
||||
if declaration.get('only_register', False):
|
||||
continue
|
||||
|
||||
declaration.setdefault('python_name', declaration['name'])
|
||||
declaration.setdefault('variables', [])
|
||||
if has_arg_type(declaration, 'THSize*'):
|
||||
@ -353,7 +363,9 @@ ${cpu}
|
||||
if arg['name'] == 'self':
|
||||
arg['ignore_check'] = True
|
||||
|
||||
declarations = [d for d in declarations if not d.get('only_stateless', False)]
|
||||
register_only = [d for d in declarations if d.get('only_register', False)]
|
||||
declarations = [d for d in declarations
|
||||
if (not d.get('only_stateless', False)) and (not d.get('only_register', False))]
|
||||
self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only))
|
||||
self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only))
|
||||
|
||||
@ -390,11 +402,14 @@ ${cpu}
|
||||
if 'defined_if' in declaration:
|
||||
entry = self.preprocessor_guard(entry, declaration['defined_if'])
|
||||
tensor_methods += entry
|
||||
return self.TENSOR_METHODS_DECLARATION.substitute(
|
||||
generated = self.TENSOR_METHODS_DECLARATION.substitute(
|
||||
methods=tensor_methods,
|
||||
stateless=('' if not stateless else 'stateless_'),
|
||||
sparse=('' if not sparse else 'S'),
|
||||
)
|
||||
if sparse:
|
||||
generated = '#ifndef TH_REAL_IS_HALF\n' + generated + '\n#endif\n\n'
|
||||
return generated
|
||||
|
||||
def process_full_file(self, code):
|
||||
# We have to find a place before all undefs
|
||||
|
||||
@ -1 +1,2 @@
|
||||
from .generate_wrappers import generate_wrappers, wrap_function, import_module
|
||||
from .generate_wrappers import generate_wrappers, wrap_function, \
|
||||
import_module, wrap_generic_function
|
||||
|
||||
@ -52,22 +52,27 @@ TYPE_TRANSFORMS = {
|
||||
'Float': {
|
||||
'THTensor*': 'THFloatTensor*',
|
||||
'real': 'float',
|
||||
'accreal': 'double',
|
||||
},
|
||||
'Double': {
|
||||
'THTensor*': 'THDoubleTensor*',
|
||||
'real': 'double',
|
||||
'accreal': 'double',
|
||||
},
|
||||
'CudaHalf': {
|
||||
'THCTensor*': 'THCudaHalfTensor*',
|
||||
'real': 'half',
|
||||
'accreal': 'float',
|
||||
},
|
||||
'Cuda': {
|
||||
'THCTensor*': 'THCudaTensor*',
|
||||
'real': 'float',
|
||||
'accreal': 'float',
|
||||
},
|
||||
'CudaDouble': {
|
||||
'THCTensor*': 'THCudaDoubleTensor*',
|
||||
'real': 'double',
|
||||
'accreal': 'double',
|
||||
},
|
||||
}
|
||||
for t, transforms in TYPE_TRANSFORMS.items():
|
||||
|
||||
@ -10,6 +10,7 @@ on an NVIDIA GPU with compute capability >= 2.0.
|
||||
|
||||
import sys
|
||||
from ._utils import _import_dotted_name
|
||||
from .version import __version__
|
||||
|
||||
__all__ = [
|
||||
'typename', 'is_tensor', 'is_storage', 'set_default_tensor_type',
|
||||
@ -30,6 +31,13 @@ __all__ = [
|
||||
# automatically filled by the dynamic loader.
|
||||
import os as _dl_flags
|
||||
|
||||
# if we have numpy, it *must* be imported before the call to setdlopenflags()
|
||||
# or there is risk that later c modules will segfault when importing numpy
|
||||
try:
|
||||
import numpy as np
|
||||
except:
|
||||
pass
|
||||
|
||||
# first check if the os package has the required flags
|
||||
if not hasattr(_dl_flags, 'RTLD_GLOBAL') or not hasattr(_dl_flags, 'RTLD_NOW'):
|
||||
try:
|
||||
@ -75,10 +83,20 @@ def typename(o):
|
||||
|
||||
|
||||
def is_tensor(obj):
|
||||
r"""Returns True if `obj` is a pytorch tensor.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _tensor_classes
|
||||
|
||||
|
||||
def is_storage(obj):
|
||||
r"""Returns True if `obj` is a pytorch storage object.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _storage_classes
|
||||
|
||||
|
||||
@ -140,6 +158,10 @@ class FloatStorage(_C.FloatStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class HalfStorage(_C.HalfStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class LongStorage(_C.LongStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
@ -180,6 +202,16 @@ class FloatTensor(_C.FloatTensorBase, _TensorBase):
|
||||
return FloatStorage
|
||||
|
||||
|
||||
class HalfTensor(_C.HalfTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return HalfStorage
|
||||
|
||||
|
||||
class LongTensor(_C.LongTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
|
||||
@ -1632,6 +1632,20 @@ Fills this tensor with numbers sampled from the uniform distribution:
|
||||
P(x) = \dfrac{1}{to - from}
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.unsqueeze,
|
||||
"""
|
||||
unsqueeze(dim)
|
||||
|
||||
See :func:`torch.unsqueeze`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.unsqueeze_,
|
||||
"""
|
||||
unsqueeze_(dim)
|
||||
|
||||
In-place version of :meth:`~Tensor.unsqueeze`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.var,
|
||||
"""
|
||||
var() -> float
|
||||
@ -1639,6 +1653,31 @@ var() -> float
|
||||
See :func:`torch.var`
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.view,
|
||||
"""
|
||||
view(*args) -> Tensor
|
||||
|
||||
Returns a new tensor with the same data but different size.
|
||||
|
||||
The returned tensor shares the same data and must have the same number
|
||||
of elements, but may have a different size. A tensor must be
|
||||
:func:`contiguous` to be viewed.
|
||||
|
||||
Args:
|
||||
args (torch.Size or int...): Desired size
|
||||
|
||||
Example:
|
||||
>>> x = torch.randn(4, 4)
|
||||
>>> x.size()
|
||||
torch.Size([4, 4])
|
||||
>>> y = x.view(16)
|
||||
>>> y.size()
|
||||
torch.Size([16])
|
||||
>>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions
|
||||
>>> z.size()
|
||||
torch.Size([2, 8])
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.FloatTensorBase.zero_,
|
||||
"""
|
||||
zero_()
|
||||
|
||||
@ -58,7 +58,10 @@ for t in ['Float', 'Double']:
|
||||
type2backend.backends['torch.{}Tensor'.format(t)] = backend
|
||||
type2backend.backends[getattr(torch, '{}Tensor'.format(t))] = backend
|
||||
|
||||
backend = Backend('Cuda', 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCudaBackend'] = backend
|
||||
type2backend.backends['torch.cuda.FloatTensor'] = backend
|
||||
type2backend.backends[torch.cuda.FloatTensor] = backend
|
||||
|
||||
for t in ['Half', '', 'Double']:
|
||||
backend = Backend('Cuda' + t, 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCuda{}Backend'.format(t)] = backend
|
||||
py_name = 'Float' if t == '' else t
|
||||
type2backend.backends['torch.cuda.{}Tensor'.format(py_name)] = backend
|
||||
type2backend.backends[getattr(torch.cuda, '{}Tensor'.format(py_name))] = backend
|
||||
|
||||
@ -3621,7 +3621,6 @@ Example::
|
||||
>>> y = torch.squeeze(x, 1)
|
||||
>>> y.size()
|
||||
(2L, 2L, 1L, 2L)
|
||||
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.std,
|
||||
@ -3992,13 +3991,13 @@ Example::
|
||||
|
||||
>>> torch.topk(x, 3)
|
||||
(
|
||||
2
|
||||
1
|
||||
5
|
||||
4
|
||||
3
|
||||
[torch.FloatTensor of size 3]
|
||||
,
|
||||
1
|
||||
0
|
||||
4
|
||||
3
|
||||
2
|
||||
[torch.LongTensor of size 3]
|
||||
)
|
||||
@ -4214,6 +4213,33 @@ Example::
|
||||
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.unsqueeze,
|
||||
"""
|
||||
unsqueeze(input, dim, out=None)
|
||||
|
||||
Returns a new tensor with a dimension of size one inserted at the
|
||||
specified position.
|
||||
|
||||
The returned tensor shares the same underlying data with this tensor.
|
||||
|
||||
Args:
|
||||
input (Tensor): the input `Tensor`
|
||||
dim (int): The index at which to insert the singleton dimension
|
||||
out (Tensor, optional): The result `Tensor`
|
||||
|
||||
Example:
|
||||
>>> x = torch.Tensor([1, 2, 3, 4])
|
||||
>>> torch.unsqueeze(x, 0)
|
||||
1 2 3 4
|
||||
[torch.FloatTensor of size 1x4]
|
||||
>>> torch.unsqueeze(x, 1)
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
[torch.FloatTensor of size 4x1]
|
||||
""")
|
||||
|
||||
add_docstr(torch._C.var,
|
||||
"""
|
||||
.. function:: var(input) -> float
|
||||
|
||||
@ -21,6 +21,15 @@ def _type(self, new_type=None, async=False):
|
||||
new_type = _import_dotted_name(new_type)
|
||||
if new_type == type(self):
|
||||
return self
|
||||
if self.is_sparse:
|
||||
if not new_type.is_sparse:
|
||||
raise RuntimeError("Cannot cast sparse tensor to dense tensor")
|
||||
new_type_name = new_type.__module__ + '.' + new_type.__name__
|
||||
new_values_type_name = new_type_name.replace('.sparse', '')
|
||||
new_values = self.values().type(new_values_type_name, async)
|
||||
return new_type(self.indices(), new_values, self.size())
|
||||
if new_type.is_sparse:
|
||||
raise RuntimeError("Cannot cast dense tensor to sparse tensor")
|
||||
return new_type(self.size()).copy_(self, async)
|
||||
|
||||
|
||||
@ -39,16 +48,20 @@ def _cuda(self, device=None, async=False):
|
||||
if self.is_cuda:
|
||||
if device is None:
|
||||
device = torch.cuda.current_device()
|
||||
if self.get_device() != device:
|
||||
with torch.cuda.device(device):
|
||||
return type(self)(self.size()).copy_(self, async)
|
||||
else:
|
||||
if self.get_device() == device:
|
||||
return self
|
||||
else:
|
||||
if device is None:
|
||||
device = -1
|
||||
with torch.cuda.device(device):
|
||||
return self.type(getattr(torch.cuda, self.__class__.__name__), async)
|
||||
with torch.cuda.device(device):
|
||||
if self.is_sparse:
|
||||
new_type = getattr(torch.cuda.sparse, self.__class__.__name__)
|
||||
indices = self.indices().cuda(device, async)
|
||||
values = self.values().cuda(device, async)
|
||||
return new_type(indices, values, self.size())
|
||||
else:
|
||||
new_type = getattr(torch.cuda, self.__class__.__name__)
|
||||
return new_type(self.size()).copy_(self, async)
|
||||
|
||||
|
||||
def _range(*args, **kwargs):
|
||||
|
||||
@ -9,6 +9,7 @@ import torch
|
||||
from .variable import Variable
|
||||
from .function import Function, NestedIOFunction
|
||||
from .stochastic_function import StochasticFunction
|
||||
from .gradcheck import gradcheck
|
||||
|
||||
__all__ = ['Variable', 'Function', 'StochasticFunction', 'backward']
|
||||
|
||||
|
||||
@ -3,9 +3,16 @@ from ..function import Function, InplaceFunction
|
||||
import math
|
||||
|
||||
|
||||
def maybe_view(tensor, size):
|
||||
if tensor.size() == size:
|
||||
return tensor
|
||||
return tensor.contiguous().view(size)
|
||||
|
||||
|
||||
class Add(InplaceFunction):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
if self.inplace:
|
||||
self.mark_dirty(a)
|
||||
return a.add_(b)
|
||||
@ -13,12 +20,13 @@ class Add(InplaceFunction):
|
||||
return a.add(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
return grad_output, grad_output
|
||||
return grad_output, maybe_view(grad_output, self.b_size)
|
||||
|
||||
|
||||
class Sub(InplaceFunction):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
if self.inplace:
|
||||
self.mark_dirty(a)
|
||||
return a.sub_(b)
|
||||
@ -26,40 +34,43 @@ class Sub(InplaceFunction):
|
||||
return a.sub(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
return grad_output, grad_output.neg()
|
||||
return grad_output, maybe_view(grad_output.neg(), self.b_size)
|
||||
|
||||
|
||||
class Mul(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.mul(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.mul(b), grad_output.mul(a)
|
||||
return grad_output.mul(b), maybe_view(grad_output.mul(a), self.b_size)
|
||||
|
||||
|
||||
class Div(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.div(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.div(b), grad_output.neg().mul(a).div_(b).div_(b)
|
||||
return grad_output.div(b), maybe_view(grad_output.neg().mul(a).div_(b).div_(b), self.b_size)
|
||||
|
||||
|
||||
class Pow(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.pow(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.mul(b).mul_(a.pow(b - 1)), grad_output.mul(a.pow(b)).mul_(a.log())
|
||||
return grad_output.mul(b).mul_(a.pow(b - 1)), maybe_view(grad_output.mul(a.pow(b)).mul_(a.log()), self.b_size)
|
||||
|
||||
|
||||
class AddConstant(InplaceFunction):
|
||||
|
||||
@ -168,7 +168,7 @@ class Addr(_BlasBase):
|
||||
|
||||
if self.needs_input_grad[2]:
|
||||
# TODO: maybe it's better to do transpose + mv + transpose
|
||||
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output)
|
||||
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
|
||||
if self.beta != 1:
|
||||
grad_vector2 *= self.beta
|
||||
|
||||
|
||||
@ -18,9 +18,8 @@ class Index(Function):
|
||||
return result
|
||||
|
||||
def backward(self, grad_output):
|
||||
# TODO: this won't have to be zeroed
|
||||
grad_input = grad_output.new(self.input_size).zero_()
|
||||
grad_input.index(self.index).copy_(grad_output)
|
||||
grad_input._set_index(self.index, grad_output)
|
||||
return grad_input
|
||||
|
||||
|
||||
@ -99,7 +98,7 @@ class View(Function):
|
||||
|
||||
def backward(self, grad_output):
|
||||
# TODO: not sure if this clone is necessary
|
||||
return grad_output.clone().view(self.input_size)
|
||||
return grad_output.contiguous().view(self.input_size)
|
||||
|
||||
|
||||
class Expand(Function):
|
||||
@ -110,10 +109,11 @@ class Expand(Function):
|
||||
self.expanded_dims = []
|
||||
|
||||
def forward(self, i):
|
||||
self.expanded_dims = [dim for dim, (expanded, original)
|
||||
in enumerate(zip(self.sizes, i.size()))
|
||||
if expanded != original]
|
||||
result = i.expand(*self.sizes)
|
||||
unsqueezed = (1,) * (len(self.sizes) - len(i.size()))
|
||||
self.expanded_dims = [dim for dim, (expanded, original)
|
||||
in enumerate(zip(self.sizes, unsqueezed + i.size()))
|
||||
if expanded != original]
|
||||
self.mark_shared_storage((i, result))
|
||||
return result
|
||||
|
||||
|
||||
@ -2,7 +2,6 @@ import torch
|
||||
import torch._C as _C
|
||||
import torch.utils.hooks as hooks
|
||||
from collections import OrderedDict
|
||||
from itertools import chain
|
||||
|
||||
|
||||
class Function(_C._FunctionBase):
|
||||
@ -98,9 +97,9 @@ class Function(_C._FunctionBase):
|
||||
**This should be called at most once, only from inside the**
|
||||
:func:`forward` **method, and all arguments should be outputs.**
|
||||
|
||||
This will mark outputs as non requiring gradient, increasing the
|
||||
This will mark outputs as not requiring gradients, increasing the
|
||||
efficiency of backward computation. You still need to accept a gradient
|
||||
for this output in :meth:`~Function.backward`, but it's always going to
|
||||
for each output in :meth:`~Function.backward`, but it's always going to
|
||||
be ``None``.
|
||||
|
||||
This is used e.g. for indices returned from a max :class:`Function`.
|
||||
@ -204,11 +203,17 @@ class NestedIOFunction(Function):
|
||||
nested_variables = _unflatten(flat_output, self._nested_output)
|
||||
return nested_variables
|
||||
|
||||
def _do_backward(self, gradients, retain_variables):
|
||||
self.retain_variables = retain_variables
|
||||
result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
|
||||
if not retain_variables:
|
||||
del self._nested_output
|
||||
del self._to_save_nested
|
||||
return result
|
||||
|
||||
def backward(self, *gradients):
|
||||
nested_gradients = _unflatten(gradients, self._nested_output)
|
||||
del self._nested_output
|
||||
result = self.backward_extended(*nested_gradients)
|
||||
del self._to_save_nested
|
||||
return tuple(_iter_None_tensors(result))
|
||||
|
||||
__call__ = _do_forward
|
||||
|
||||
149
torch/autograd/gradcheck.py
Normal file
149
torch/autograd/gradcheck.py
Normal file
@ -0,0 +1,149 @@
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
def iter_gradients(x):
|
||||
if isinstance(x, Variable):
|
||||
if x.requires_grad:
|
||||
yield x.grad.data if x.grad is not None else None
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_gradients(elem):
|
||||
yield result
|
||||
|
||||
|
||||
def zero_gradients(i):
|
||||
for t in iter_gradients(i):
|
||||
if t is not None:
|
||||
t.zero_()
|
||||
|
||||
|
||||
def make_jacobian(input, num_out):
|
||||
if isinstance(input, Variable) and not input.requires_grad:
|
||||
return None
|
||||
if torch.is_tensor(input) or isinstance(input, Variable):
|
||||
return torch.zeros(input.nelement(), num_out)
|
||||
else:
|
||||
return type(input)(filter(lambda x: x is not None,
|
||||
(make_jacobian(elem, num_out) for elem in input)))
|
||||
|
||||
|
||||
def iter_tensors(x, only_requiring_grad=False):
|
||||
if torch.is_tensor(x):
|
||||
yield x
|
||||
elif isinstance(x, Variable):
|
||||
if x.requires_grad or not only_requiring_grad:
|
||||
yield x.data
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_tensors(elem, only_requiring_grad):
|
||||
yield result
|
||||
|
||||
|
||||
def contiguous(input):
|
||||
if torch.is_tensor(input):
|
||||
return input.contiguous()
|
||||
elif isinstance(input, Variable):
|
||||
return input.contiguous()
|
||||
else:
|
||||
return type(input)(contiguous(e) for e in input)
|
||||
|
||||
|
||||
def get_numerical_jacobian(fn, input, target, eps=1e-3):
|
||||
# To be able to use .view(-1) input must be contiguous
|
||||
input = contiguous(input)
|
||||
output_size = fn(input).numel()
|
||||
jacobian = make_jacobian(target, output_size)
|
||||
|
||||
# It's much easier to iterate over flattened lists of tensors.
|
||||
# These are reference to the same objects in jacobian, so any changes
|
||||
# will be reflected in it as well.
|
||||
x_tensors = [t for t in iter_tensors(target, True)]
|
||||
j_tensors = [t for t in iter_tensors(jacobian)]
|
||||
|
||||
outa = torch.DoubleTensor(output_size)
|
||||
outb = torch.DoubleTensor(output_size)
|
||||
|
||||
# TODO: compare structure
|
||||
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
|
||||
flat_tensor = x_tensor.view(-1)
|
||||
for i in range(flat_tensor.nelement()):
|
||||
orig = flat_tensor[i]
|
||||
flat_tensor[i] = orig - eps
|
||||
outa.copy_(fn(input))
|
||||
flat_tensor[i] = orig + eps
|
||||
outb.copy_(fn(input))
|
||||
flat_tensor[i] = orig
|
||||
|
||||
outb.add_(-1, outa).div_(2 * eps)
|
||||
d_tensor[i] = outb
|
||||
|
||||
return jacobian
|
||||
|
||||
|
||||
def get_analytical_jacobian(input, output):
|
||||
jacobian = make_jacobian(input, output.numel())
|
||||
grad_output = output.data.clone().zero_()
|
||||
flat_grad_output = grad_output.view(-1)
|
||||
|
||||
for i in range(flat_grad_output.numel()):
|
||||
flat_grad_output.zero_()
|
||||
flat_grad_output[i] = 1
|
||||
zero_gradients(input)
|
||||
output.backward(grad_output, retain_variables=True)
|
||||
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
|
||||
if d_x is None:
|
||||
jacobian_x[:, i].zero_()
|
||||
else:
|
||||
jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
|
||||
|
||||
return jacobian
|
||||
|
||||
|
||||
def _as_tuple(x):
|
||||
if isinstance(x, tuple):
|
||||
return x
|
||||
elif isinstance(x, list):
|
||||
return tuple(x)
|
||||
else:
|
||||
return x,
|
||||
|
||||
|
||||
def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3):
|
||||
"""Check gradients computed via small finite differences
|
||||
against analytical gradients
|
||||
|
||||
The check between numerical and analytical has the same behaviour as
|
||||
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
|
||||
meaning it check that
|
||||
absolute(a - n) <= (atol + rtol * absolute(n))
|
||||
is true for all elements of analytical jacobian a and numerical jacobian n.
|
||||
|
||||
Args:
|
||||
func: Python function that takes Variable inputs and returns
|
||||
a tuple of Variables
|
||||
inputs: tuple of Variables
|
||||
eps: perturbation for finite differences
|
||||
atol: absolute tolerance
|
||||
rtol: relative tolerance
|
||||
|
||||
Returns:
|
||||
True if all differences satisfy allclose condition
|
||||
"""
|
||||
output = func(*inputs)
|
||||
output = _as_tuple(output)
|
||||
|
||||
for i, o in enumerate(output):
|
||||
if not o.requires_grad:
|
||||
continue
|
||||
|
||||
def fn(input):
|
||||
return _as_tuple(func(*input))[i].data
|
||||
|
||||
numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
|
||||
analytical = get_analytical_jacobian(_as_tuple(inputs), o)
|
||||
|
||||
for a, n in zip(analytical, numerical):
|
||||
if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
|
||||
return False
|
||||
return True
|
||||
@ -1,3 +1,5 @@
|
||||
import torch
|
||||
from numbers import Number
|
||||
from .function import Function
|
||||
|
||||
_NOT_PROVIDED = object()
|
||||
@ -17,5 +19,26 @@ class StochasticFunction(Function):
|
||||
self.reward = None
|
||||
return result
|
||||
|
||||
def _do_forward(self, *inputs):
|
||||
result = super(StochasticFunction, self)._do_forward(*inputs)
|
||||
# save output type and size, to check the type of reward
|
||||
assert isinstance(result, torch.autograd.Variable), \
|
||||
"stochastic functions support only a single output at the moment"
|
||||
self.reward_info = (type(inputs[0].data), result.size())
|
||||
return result
|
||||
|
||||
__call__ = _do_forward
|
||||
|
||||
def _reinforce(self, reward):
|
||||
is_number = isinstance(reward, Number)
|
||||
if not is_number and type(reward) != self.reward_info[0]:
|
||||
raise TypeError("mismatch between reward and output type: got {}, "
|
||||
"but expected {}".format(torch.typename(reward),
|
||||
torch.typename(self.reward_info[0])))
|
||||
if not is_number and reward.size() != self.reward_info[1]:
|
||||
raise ValueError("got reward of size {}, but expected a tensor of size {}".format(
|
||||
'x'.join(map(str, reward.size())),
|
||||
'x'.join(map(str, self.reward_info[1]))))
|
||||
if self.reward is not _NOT_PROVIDED:
|
||||
raise RuntimeError("you can only reinforce a stochastic Function once")
|
||||
self.reward = reward
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import torch._C as _C
|
||||
from collections import OrderedDict
|
||||
import torch.sparse as sparse
|
||||
import torch.utils.hooks as hooks
|
||||
|
||||
from ._functions import *
|
||||
@ -56,30 +57,6 @@ class Variable(_C._VariableBase):
|
||||
'is_cuda',
|
||||
}
|
||||
|
||||
@property
|
||||
def grad(self):
|
||||
if self.requires_grad and self._grad is None:
|
||||
# TODO: this won't have to be zeroed in the future
|
||||
self._grad = Variable(self.data.new(self.data.size()).zero_())
|
||||
return self._grad
|
||||
|
||||
@property
|
||||
def requires_grad(self):
|
||||
return self._requires_grad
|
||||
|
||||
@requires_grad.setter
|
||||
def requires_grad(self, value):
|
||||
if self.creator is not None:
|
||||
if value is False:
|
||||
hint = (" If you want to use a computed variable in a subgraph "
|
||||
"that doesn't require differentiation use "
|
||||
"var_no_grad = var.detach().")
|
||||
else:
|
||||
hint = ''
|
||||
raise RuntimeError("you can only change requires_grad flags of "
|
||||
"leaf variables." + hint)
|
||||
self._requires_grad = value
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._fallthrough_methods:
|
||||
return getattr(self.data, name)
|
||||
@ -108,19 +85,30 @@ class Variable(_C._VariableBase):
|
||||
if self.creator is not None:
|
||||
raise RuntimeError("Only Variables created explicitly by the user "
|
||||
"(graph leaves) support the deepcopy protocol at the moment")
|
||||
result = type(self)(self.data.clone(), requires_grad=self.requires_grad,
|
||||
volatile=self.volatile)
|
||||
result = type(self)(self.data.clone())
|
||||
result.requires_grad = self.requires_grad
|
||||
result.volatile = self.volatile
|
||||
memo[id(self)] = result
|
||||
return result
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
state = (self.requires_grad, self.volatile, self._backward_hooks)
|
||||
if proto > 1:
|
||||
return super(Variable, self).__reduce_ex__(proto)
|
||||
return type(self), (self.data,), state
|
||||
if sys.version_info[0] == 2:
|
||||
from copy_reg import __newobj__
|
||||
else:
|
||||
from copyreg import __newobj__
|
||||
return __newobj__, (type(self),), self.__getstate__()
|
||||
return __newobj__, (type(self), self.data), state
|
||||
|
||||
def __setstate__(self, state):
|
||||
if len(state) == 5:
|
||||
# legacy serialization of Variable
|
||||
self.data = state[0]
|
||||
state = (state[3], state[4], state[2])
|
||||
if self.creator is not None:
|
||||
raise RuntimeError('__setstate__ can be only called on leaf variables')
|
||||
self.requires_grad, self.volatile, self._backward_hooks = state
|
||||
|
||||
def __repr__(self):
|
||||
return 'Variable containing:' + self.data.__repr__()
|
||||
@ -225,8 +213,25 @@ class Variable(_C._VariableBase):
|
||||
self.creator._reinforce(reward)
|
||||
|
||||
def detach(self):
|
||||
"""Detaches the Variable from the graph that created it."""
|
||||
return NoGrad()(self)
|
||||
"""Returns a new Variable, detached from the current graph.
|
||||
|
||||
Result will never require gradient. If the input is volatile, the output
|
||||
will be volatile too.
|
||||
|
||||
.. note::
|
||||
|
||||
Returned Variable uses the same data tensor, as the original one, and
|
||||
in-place modifications on either of them will be seen, and may trigger
|
||||
errors in correctness checks.
|
||||
"""
|
||||
result = NoGrad()(self) # this is needed, because it merges version counters
|
||||
result._creator = None
|
||||
return result
|
||||
|
||||
def detach_(self):
|
||||
"""Detaches the Variable from the graph that created it, making it a leaf."""
|
||||
self._creator = None
|
||||
self.requires_grad = False
|
||||
|
||||
def contiguous(self):
|
||||
self.data = self.data.contiguous()
|
||||
@ -426,12 +431,6 @@ class Variable(_C._VariableBase):
|
||||
def trunc(self):
|
||||
return Trunc()(self)
|
||||
|
||||
def floor(self):
|
||||
return Floor()(self)
|
||||
|
||||
def ceil(self):
|
||||
return Ceil()(self)
|
||||
|
||||
def fmod(self, value):
|
||||
return Fmod(value)(self)
|
||||
|
||||
@ -487,9 +486,6 @@ class Variable(_C._VariableBase):
|
||||
def split(self, split_size, dim=0):
|
||||
return torch.split(self, split_size, dim)
|
||||
|
||||
def chunk(self, n_chunks, dim=0):
|
||||
return torch.chunk(self, n_chunks, dim)
|
||||
|
||||
def repeat(self, *repeats):
|
||||
if len(repeats) == 1 and isinstance(repeats[0], torch.Size):
|
||||
repeats = repeats[0]
|
||||
|
||||
@ -179,17 +179,19 @@ class TensorDescriptorArray(object):
|
||||
def __getitem__(self, key):
|
||||
return ctypes.c_void_p(self.ptrs[key])
|
||||
|
||||
def set(self, tensor):
|
||||
self._type = tensor.type()
|
||||
self._size = tensor.size()
|
||||
self._stride = tensor.stride()
|
||||
def set_all(self, tensor):
|
||||
_type = _typemap[tensor.type()]
|
||||
_ndim = tensor.dim()
|
||||
_size = int_array(tensor.size())
|
||||
_stride = int_array(tensor.stride())
|
||||
for ptr in self.ptrs:
|
||||
check_error(lib.cudnnSetTensorNdDescriptor(
|
||||
ctypes.c_void_p(ptr), _typemap[tensor.type()], tensor.dim(),
|
||||
int_array(tensor.size()), int_array(tensor.stride())))
|
||||
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
|
||||
|
||||
def as_tuple(self):
|
||||
return (self._type, tuple(self._size), tuple(self._stride))
|
||||
def set_raw(self, i, _type, _ndim, _size, _stride):
|
||||
ptr = self.ptrs[i]
|
||||
check_error(lib.cudnnSetTensorNdDescriptor(
|
||||
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
|
||||
|
||||
|
||||
class ConvolutionDescriptor(object):
|
||||
@ -241,24 +243,42 @@ class DropoutDescriptor(object):
|
||||
def __init__(self, handle, dropout, seed):
|
||||
ptr = ctypes.c_void_p()
|
||||
check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr)))
|
||||
|
||||
self._as_parameter_ = ptr
|
||||
self.state = None
|
||||
self.dropout = dropout
|
||||
self.handle = handle
|
||||
|
||||
dropout_states_size = ctypes.c_long()
|
||||
check_error(lib.cudnnDropoutGetStatesSize(
|
||||
handle,
|
||||
ctypes.byref(dropout_states_size)))
|
||||
self._set(dropout, seed)
|
||||
|
||||
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
|
||||
def set_dropout(self, dropout, seed):
|
||||
if dropout != self.dropout:
|
||||
self._set(dropout, seed)
|
||||
|
||||
def _set(self, dropout, seed):
|
||||
if self.state is None and dropout > 0:
|
||||
dropout_states_size = ctypes.c_long()
|
||||
check_error(lib.cudnnDropoutGetStatesSize(
|
||||
self.handle,
|
||||
ctypes.byref(dropout_states_size)))
|
||||
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
|
||||
state_ptr = self.state.data_ptr()
|
||||
state_size = self.state.size(0)
|
||||
else:
|
||||
state_ptr = None
|
||||
state_size = 0
|
||||
|
||||
check_error(lib.cudnnSetDropoutDescriptor(
|
||||
self,
|
||||
handle,
|
||||
self.handle,
|
||||
ctypes.c_float(dropout),
|
||||
ctypes.c_void_p(self.state.data_ptr()),
|
||||
ctypes.c_size_t(self.state.size(0)),
|
||||
ctypes.c_void_p(state_ptr),
|
||||
ctypes.c_size_t(state_size),
|
||||
ctypes.c_ulonglong(seed),
|
||||
))
|
||||
|
||||
self.dropout = dropout
|
||||
|
||||
def __del__(self):
|
||||
check_error(lib.cudnnDestroyDropoutDescriptor(self))
|
||||
|
||||
@ -368,17 +388,30 @@ def int_array(itr):
|
||||
|
||||
|
||||
def descriptor(tensor, N=None):
|
||||
padded_size = tensor.size() + ((1,) * (5 - tensor.dim()))
|
||||
tensor = tensor.view(padded_size)
|
||||
if N is not None:
|
||||
descriptor = TensorDescriptorArray(N)
|
||||
descriptor.set_all(tensor)
|
||||
else:
|
||||
descriptor = TensorDescriptor()
|
||||
if tensor.dim() == 2:
|
||||
tensor = tensor.view(tensor.size(0), tensor.size(1), 1, 1)
|
||||
elif tensor.dim() == 3:
|
||||
tensor = tensor.view(tensor.size(0), tensor.size(1), tensor.size(2), 1)
|
||||
descriptor.set(tensor)
|
||||
descriptor.set(tensor)
|
||||
return descriptor
|
||||
|
||||
|
||||
def descriptor_sequence(tensor, batch_sizes):
|
||||
descriptors = TensorDescriptorArray(len(batch_sizes))
|
||||
_type = _typemap[tensor.type()]
|
||||
_ndim = 5
|
||||
dim_pad = (1,) * (5 - tensor.dim())
|
||||
_size = int_array(tensor.size() + dim_pad)
|
||||
_stride = int_array(tensor.stride() + dim_pad)
|
||||
for i, batch_size in enumerate(batch_sizes):
|
||||
_size[0] = batch_size
|
||||
descriptors.set_raw(i, _type, _ndim, _size, _stride)
|
||||
return descriptors
|
||||
|
||||
|
||||
_autotuner_forward = {}
|
||||
_autotuner_backward_data = {}
|
||||
_autotuner_backward_filter = {}
|
||||
|
||||
@ -34,20 +34,20 @@ class Unserializable(object):
|
||||
self.inner = None
|
||||
|
||||
|
||||
def init_dropout_descriptor(fn, handle):
|
||||
return cudnn.DropoutDescriptor(
|
||||
handle,
|
||||
fn.dropout,
|
||||
fn.dropout_seed
|
||||
)
|
||||
|
||||
|
||||
def init_rnn_descriptor(fn, handle):
|
||||
dropout_desc_name = 'desc_' + str(torch.cuda.current_device())
|
||||
dropout_p = fn.dropout if fn.train else 0
|
||||
if (dropout_desc_name not in fn.dropout_state) or (fn.dropout_state[dropout_desc_name].get() is None):
|
||||
fn.dropout_state[dropout_desc_name] = Unserializable(
|
||||
cudnn.DropoutDescriptor(handle, dropout_p, fn.dropout_seed)
|
||||
)
|
||||
dropout_desc = fn.dropout_state[dropout_desc_name].get()
|
||||
dropout_desc.set_dropout(dropout_p, fn.dropout_seed)
|
||||
return cudnn.RNNDescriptor(
|
||||
handle,
|
||||
fn.hidden_size,
|
||||
fn.num_layers,
|
||||
fn.dropout_state['desc'].get(),
|
||||
dropout_desc,
|
||||
fn.input_mode,
|
||||
fn.bidirectional,
|
||||
fn.mode,
|
||||
@ -62,16 +62,22 @@ def init_weight_descriptor(fn, weight):
|
||||
return w_desc
|
||||
|
||||
|
||||
def _input_size(fn):
|
||||
return (fn.seq_length, fn.mini_batch, fn.input_size)
|
||||
def _input_size(fn, input):
|
||||
if fn.batch_sizes is not None:
|
||||
return (input.size(0), fn.input_size)
|
||||
else:
|
||||
return (fn.seq_length, fn.mini_batch, fn.input_size)
|
||||
|
||||
|
||||
def _hidden_size(fn):
|
||||
return (fn.num_layers * fn.num_directions, fn.mini_batch, fn.hidden_size)
|
||||
|
||||
|
||||
def _output_size(fn):
|
||||
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
|
||||
def _output_size(fn, input):
|
||||
if fn.batch_sizes is not None:
|
||||
return (input.size(0), fn.hidden_size * fn.num_directions)
|
||||
else:
|
||||
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
|
||||
|
||||
|
||||
def get_num_weights(handle, rnn_desc, x_desc, datatype):
|
||||
@ -183,6 +189,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
lib = cudnn.lib
|
||||
handle = cudnn.get_handle()
|
||||
fn.datatype = cudnn._typemap[input.type()]
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
hx, cx = hx
|
||||
@ -190,22 +197,30 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
else:
|
||||
cx, cy = None, None
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
|
||||
if input.dim() != 3:
|
||||
if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
|
||||
raise RuntimeError(
|
||||
'input must have 3 dimensions, got {}'.format(input.dim()))
|
||||
if fn.input_size != input.size(2):
|
||||
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
|
||||
fn.input_size
|
||||
if fn.input_size != input.size(-1):
|
||||
raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
|
||||
fn.input_size, input.size(-1)
|
||||
))
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
|
||||
|
||||
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
|
||||
if is_input_packed:
|
||||
fn.seq_length = len(fn.batch_sizes)
|
||||
fn.mini_batch = fn.batch_sizes[0]
|
||||
fn.input_size = input.size(-1)
|
||||
else:
|
||||
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
output_size = _output_size(fn, input)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
output.resize_(*output_size)
|
||||
hy.resize_(*hidden_size)
|
||||
@ -214,13 +229,13 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
y = output
|
||||
|
||||
# init descriptors
|
||||
if ('desc' not in fn.dropout_state) or (fn.dropout_state['desc'].get() is None):
|
||||
fn.dropout_state['desc'] = Unserializable(
|
||||
init_dropout_descriptor(fn, handle)
|
||||
)
|
||||
fn.rnn_desc = init_rnn_descriptor(fn, handle)
|
||||
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
|
||||
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
|
||||
if is_input_packed:
|
||||
fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
|
||||
fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
|
||||
else:
|
||||
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
|
||||
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
|
||||
fn.hx_desc = cudnn.descriptor(hx)
|
||||
fn.hy_desc = cudnn.descriptor(hx)
|
||||
fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
|
||||
@ -229,7 +244,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
# create the weight buffer and copy the weights into it
|
||||
num_weights = get_num_weights(
|
||||
handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
|
||||
fn.weight_buf = input.new(num_weights)
|
||||
fn.weight_buf = x.new(num_weights)
|
||||
fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
|
||||
w = fn.weight_buf
|
||||
# this zero might not seem necessary, but it is in the case
|
||||
@ -255,7 +270,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
ctypes.byref(workspace_size)
|
||||
))
|
||||
fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
|
||||
if fn.train:
|
||||
if fn.requires_grad:
|
||||
reserve_size = ctypes.c_long()
|
||||
check_error(lib.cudnnGetRNNTrainingReserveSize(
|
||||
handle,
|
||||
@ -295,12 +310,13 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0)
|
||||
))
|
||||
|
||||
if fn.batch_first:
|
||||
output = output.transpose_(0, 1)
|
||||
if fn.batch_first and not is_input_packed:
|
||||
output.transpose_(0, 1)
|
||||
|
||||
|
||||
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
|
||||
with torch.cuda.device_of(input):
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
handle = cudnn.get_handle()
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
@ -310,15 +326,17 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
else:
|
||||
cx, grad_cx, grad_cy = None, None, None
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
grad_output = grad_output.transpose(0, 1)
|
||||
output = output.transpose(0, 1)
|
||||
|
||||
input_size = _input_size(fn)
|
||||
input_size = _input_size(fn, input)
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
output_size = _output_size(fn, input)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
dy = grad_output.contiguous()
|
||||
y = output
|
||||
@ -331,12 +349,12 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_grad can only be called when training!')
|
||||
if not fn.requires_grad:
|
||||
raise RuntimeError('backward_grad can only be called when the function requires grad!')
|
||||
if tuple(input.size()) != input_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
input_size, tuple(input.size())))
|
||||
if tuple(output.size()) != _output_size(fn):
|
||||
if tuple(output.size()) != output_size:
|
||||
raise RuntimeError('Expected output size {}, got {}'.format(
|
||||
output_size, output.size()))
|
||||
if hx is not None and tuple(hx.size()) != hidden_size:
|
||||
@ -351,6 +369,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
if dcy is not None and tuple(dcy.size()) != hidden_size:
|
||||
raise RuntimeError('Expected d_cell size {}, got {}'.format(
|
||||
hidden_size, dcy.size()))
|
||||
if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
|
||||
raise RuntimeError('Gradients aren\'t CUDA tensors')
|
||||
|
||||
check_error(cudnn.lib.cudnnRNNBackwardData(
|
||||
handle,
|
||||
@ -370,7 +390,7 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
|
||||
))
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
grad_input = grad_input.transpose_(0, 1)
|
||||
|
||||
|
||||
@ -389,30 +409,32 @@ def _num_linear_layers(fn):
|
||||
|
||||
def backward_weight(fn, input, hx, output, weight, grad_weight):
|
||||
with torch.cuda.device_of(input):
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
handle = cudnn.get_handle()
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
hx, cx = hx
|
||||
else:
|
||||
cx = None
|
||||
if fn.batch_first:
|
||||
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
output = output.transpose(0, 1)
|
||||
input_size = _input_size(fn)
|
||||
input_size = _input_size(fn, input)
|
||||
hidden_size = _hidden_size(fn)
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_weight can only be called when training!')
|
||||
if not fn.requires_grad:
|
||||
raise RuntimeError('backward_weight can only be called when the function requires grad!')
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
|
||||
if tuple(input.size()) != input_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
input_size, tuple(input.size())))
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_weight can only be called when training!')
|
||||
if tuple(hx.size()) != hidden_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
hidden_size, hx.size()))
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
y = output
|
||||
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()
|
||||
|
||||
181
torch/csrc/DynamicTypes.cpp
Normal file
181
torch/csrc/DynamicTypes.cpp
Normal file
@ -0,0 +1,181 @@
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#include "THP.h"
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <THPP/tensors/THTensor.hpp>
|
||||
#include <THPP/tensors/THSTensor.hpp>
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include <THC/THC.h>
|
||||
#include <THCS/THCS.h>
|
||||
#include <THPP/tensors/THCTensor.hpp>
|
||||
#include <THPP/tensors/THCSTensor.hpp>
|
||||
extern THCState* state;
|
||||
#endif
|
||||
|
||||
|
||||
using namespace thpp;
|
||||
|
||||
namespace torch {
|
||||
|
||||
struct TensorType {
|
||||
Type data_type;
|
||||
bool is_cuda;
|
||||
bool is_sparse;
|
||||
|
||||
friend bool operator==(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return (t1.data_type == t2.data_type &&
|
||||
t1.is_cuda == t2.is_cuda &&
|
||||
t1.is_sparse == t2.is_sparse);
|
||||
}
|
||||
|
||||
friend bool operator!=(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return !(t1 == t2);
|
||||
}
|
||||
};
|
||||
|
||||
struct TensorTypeHasher
|
||||
{
|
||||
std::size_t operator()(const TensorType& k) const
|
||||
{
|
||||
size_t hash = static_cast<size_t>(k.data_type);
|
||||
hash = (hash << 8) + k.is_cuda;
|
||||
hash = (hash << 1) + k.is_sparse;
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
static std::unordered_map<std::string, Type> type_names = {
|
||||
{"Float", Type::FLOAT},
|
||||
{"Double", Type::DOUBLE},
|
||||
{"Half", Type::HALF},
|
||||
{"Byte", Type::UCHAR},
|
||||
{"Char", Type::CHAR},
|
||||
{"Short", Type::SHORT},
|
||||
{"Int", Type::INT},
|
||||
{"Long", Type::LONG},
|
||||
};
|
||||
static std::unordered_map<PyTypeObject*, TensorType> pytype_to_tensortype;
|
||||
static std::unordered_map<TensorType, PyTypeObject*, TensorTypeHasher> tensortype_to_pytype;
|
||||
|
||||
void registerPyTypeObject(PyTypeObject *pytype, const std::string& name, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = type_names.at(name);
|
||||
type.is_cuda = is_cuda;
|
||||
type.is_sparse = is_sparse;
|
||||
|
||||
pytype_to_tensortype[pytype] = type;
|
||||
tensortype_to_pytype[type] = pytype;
|
||||
}
|
||||
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = tensor.type();
|
||||
type.is_cuda = tensor.isCuda();
|
||||
type.is_sparse = tensor.isSparse();
|
||||
|
||||
return tensortype_to_pytype.at(type);
|
||||
}
|
||||
|
||||
static std::unique_ptr<Tensor> createTensor(void *tensor, Type type, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
if (is_cuda) {
|
||||
#ifdef WITH_CUDA
|
||||
if (is_sparse) {
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<unsigned char>(state, (THCSByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<char>(state, (THCSCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<short>(state, (THCSShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<int>(state, (THCSIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<long>(state, (THCSLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<float>(state, (THCSFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<double>(state, (THCSDoubleTensor*)tensor));
|
||||
} else if (type == Type::HALF) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<half>(state, (THCSHalfTensor*)tensor));
|
||||
}
|
||||
} else if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<unsigned char>(state, (THCudaByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<char>(state, (THCudaCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<short>(state, (THCudaShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<int>(state, (THCudaIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<long>(state, (THCudaLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<float>(state, (THCudaTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<double>(state, (THCudaDoubleTensor*)tensor));
|
||||
} else if (type == Type::HALF) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<half>(state, (THCudaHalfTensor*)tensor));
|
||||
}
|
||||
#else
|
||||
throw std::runtime_error("Compiled without CUDA support");
|
||||
#endif
|
||||
} else if (is_sparse) {
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<unsigned char>((THSByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<char>((THSCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<short>((THSShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<int>((THSIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<long>((THSLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<float>((THSFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<double>((THSDoubleTensor*)tensor));
|
||||
}
|
||||
} else if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<unsigned char>((THByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<char>((THCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<short>((THShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<int>((THIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<long>((THLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<float>((THFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<double>((THDoubleTensor*)tensor));
|
||||
}
|
||||
throw std::invalid_argument("Unsupported tensor type");
|
||||
}
|
||||
|
||||
std::unique_ptr<Tensor> createTensor(PyObject *data)
|
||||
{
|
||||
auto tensor_type = pytype_to_tensortype.at(Py_TYPE(data));
|
||||
auto type = tensor_type.data_type;
|
||||
auto tensor = ((THPVoidTensor *)data)->cdata;
|
||||
auto wrapper = createTensor(tensor, type, tensor_type.is_cuda, tensor_type.is_sparse);
|
||||
wrapper->retain();
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
auto type = getPyTypeObject(tensor);
|
||||
PyObject *obj = type->tp_alloc(type, 0);
|
||||
if (obj) {
|
||||
((THPVoidTensor*)obj)->cdata = (THVoidTensor *)const_cast<thpp::Tensor&>(tensor).retain().cdata();
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
25
torch/csrc/DynamicTypes.h
Normal file
25
torch/csrc/DynamicTypes.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
// Provides conversions between Python tensor objects and thpp::Tensors.
|
||||
|
||||
#include <memory>
|
||||
#include <Python.h>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
namespace torch {
|
||||
|
||||
// Register a PyTypeObject* with the given attributes
|
||||
void registerPyTypeObject(
|
||||
PyTypeObject *pytype, const std::string& name,
|
||||
bool is_cuda, bool is_sparse);
|
||||
|
||||
// Gets the PyTypeObject* corresponding to the Tensor
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor);
|
||||
|
||||
// Creates a Tensor from a Python tensor object
|
||||
std::unique_ptr<thpp::Tensor> createTensor(PyObject *data);
|
||||
|
||||
// Creates Python tensor object from a Tensor
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor);
|
||||
|
||||
} // namespace torch
|
||||
@ -5,12 +5,16 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "THP.h"
|
||||
// Throwing this exception means that the python error flags have been already
|
||||
// set and control should be immediately returned to the interpreter.
|
||||
class python_error : public std::exception {};
|
||||
|
||||
#define HANDLE_TH_ERRORS \
|
||||
try {
|
||||
|
||||
#define END_HANDLE_TH_ERRORS_RET(retval) \
|
||||
} catch (python_error &e) { \
|
||||
return retval; \
|
||||
} catch (std::exception &e) { \
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what()); \
|
||||
return retval; \
|
||||
@ -21,6 +25,7 @@
|
||||
extern PyObject *THPException_FatalError;
|
||||
|
||||
#ifdef _THP_CORE
|
||||
|
||||
struct THException: public std::exception {
|
||||
THException(const char* msg): msg(msg) {};
|
||||
|
||||
|
||||
@ -33,25 +33,25 @@ static bool THPModule_loadClasses(PyObject *self)
|
||||
THPUtils_setError("class loader couldn't access torch module");
|
||||
return false;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
|
||||
ASSERT_NOT_NULL(tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_tensor_classes"));
|
||||
ASSERT_NOT_NULL(tensor_classes = PyObject_GetAttrString(torch_module, (char*)"_tensor_classes"));
|
||||
if (!THPDoubleTensor_postInit(torch_module)) return false;
|
||||
if (!THPFloatTensor_postInit(torch_module)) return false;
|
||||
if (!THPHalfTensor_postInit(torch_module)) return false;
|
||||
if (!THPLongTensor_postInit(torch_module)) return false;
|
||||
if (!THPIntTensor_postInit(torch_module)) return false;
|
||||
if (!THPShortTensor_postInit(torch_module)) return false;
|
||||
if (!THPCharTensor_postInit(torch_module)) return false;
|
||||
if (!THPByteTensor_postInit(torch_module)) return false;
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyMapping_GetItemString(module_dict,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyMapping_GetItemString(module_dict,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyMapping_GetItemString(module_dict,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyMapping_GetItemString(module_dict,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyMapping_GetItemString(module_dict,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyMapping_GetItemString(module_dict,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyMapping_GetItemString(module_dict,(char*)"ByteStorage"));
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleTensorClass = PyMapping_GetItemString(module_dict,(char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THPFloatTensorClass = PyMapping_GetItemString(module_dict,(char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THPLongTensorClass = PyMapping_GetItemString(module_dict,(char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THPIntTensorClass = PyMapping_GetItemString(module_dict,(char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THPShortTensorClass = PyMapping_GetItemString(module_dict,(char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THPCharTensorClass = PyMapping_GetItemString(module_dict,(char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THPByteTensorClass = PyMapping_GetItemString(module_dict,(char*)"ByteTensor"));
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyObject_GetAttrString(torch_module,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyObject_GetAttrString(torch_module,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPHalfStorageClass = PyObject_GetAttrString(torch_module,(char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyObject_GetAttrString(torch_module,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyObject_GetAttrString(torch_module,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyObject_GetAttrString(torch_module,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyObject_GetAttrString(torch_module,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyObject_GetAttrString(torch_module,(char*)"ByteStorage"));
|
||||
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
@ -72,6 +72,7 @@ static bool THPModule_assignStateless(PyObject *self)
|
||||
PyObject *stateless;
|
||||
INIT_STATELESS(Double);
|
||||
INIT_STATELESS(Float);
|
||||
INIT_STATELESS(Half);
|
||||
INIT_STATELESS(Long);
|
||||
INIT_STATELESS(Int);
|
||||
INIT_STATELESS(Short);
|
||||
@ -92,6 +93,7 @@ static PyObject * THPModule_initExtension(PyObject *self, PyObject *shm_manager_
|
||||
libshm_init(THPUtils_bytesAsString(shm_manager_path));
|
||||
if (!THPModule_loadClasses(self)) return NULL;
|
||||
if (!THPModule_assignStateless(self)) return NULL;
|
||||
if (!THPAutograd_initFunctions(self)) return NULL;
|
||||
return PyBool_FromLong(true);
|
||||
}
|
||||
|
||||
@ -139,6 +141,8 @@ PyObject * THPModule_fromNumpy(PyObject *_unused, PyObject *array)
|
||||
return PyObject_CallFunctionObjArgs(THPLongTensorClass, array, NULL);
|
||||
} else if (type == NPY_INT32) {
|
||||
return PyObject_CallFunctionObjArgs(THPIntTensorClass, array, NULL);
|
||||
} else if (type == NPY_INT16) {
|
||||
return PyObject_CallFunctionObjArgs(THPShortTensorClass, array, NULL);
|
||||
} else if (type == NPY_UINT8) {
|
||||
return PyObject_CallFunctionObjArgs(THPByteTensorClass, array, NULL);
|
||||
}
|
||||
@ -243,6 +247,7 @@ IMPLEMENT_STATELESS(topk)
|
||||
IMPLEMENT_STATELESS(t)
|
||||
IMPLEMENT_STATELESS(transpose)
|
||||
IMPLEMENT_STATELESS(squeeze)
|
||||
IMPLEMENT_STATELESS(unsqueeze)
|
||||
IMPLEMENT_STATELESS(renorm)
|
||||
IMPLEMENT_STATELESS(dist)
|
||||
IMPLEMENT_STATELESS(linspace)
|
||||
@ -492,6 +497,8 @@ extern PyObject * THCPModule_cudaHostAllocator(PyObject *_unused);
|
||||
extern PyObject * THCPModule_cudaSynchronize(PyObject *_unused);
|
||||
extern PyObject * THCPModule_getLibPath(PyObject *_unused);
|
||||
extern PyObject * THCPModule_cudaSleep(PyObject *_unused, PyObject *cycles);
|
||||
extern PyObject * THCPModule_cudaLockMutex(PyObject *module);
|
||||
extern PyObject * THCPModule_cudaUnlockMutex(PyObject *module);
|
||||
|
||||
extern PyObject * THCSPModule_initExtension(PyObject *self);
|
||||
#endif
|
||||
@ -522,6 +529,8 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"_cuda_getLibPath", (PyCFunction)THCPModule_getLibPath, METH_NOARGS, NULL},
|
||||
{"_cuda_sleep", (PyCFunction)THCPModule_cudaSleep, METH_O, NULL},
|
||||
{"_cuda_sparse_init", (PyCFunction)THCSPModule_initExtension, METH_NOARGS, NULL},
|
||||
{"_cuda_lock_mutex", (PyCFunction)THCPModule_cudaLockMutex, METH_NOARGS, NULL},
|
||||
{"_cuda_unlock_mutex", (PyCFunction)THCPModule_cudaUnlockMutex, METH_NOARGS, NULL},
|
||||
#endif
|
||||
{"_safe_call", (PyCFunction)THPModule_safeCall, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"_set_default_tensor_type", (PyCFunction)THPModule_setDefaultTensorType, METH_O, NULL},
|
||||
@ -593,6 +602,7 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"t", (PyCFunction)THPModule_t, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"unsqueeze", (PyCFunction)THPModule_unsqueeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"nonzero", (PyCFunction)THPModule_nonzero, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
@ -649,6 +659,7 @@ static PyMethodDef TorchMethods[] = {
|
||||
// Sparse functions
|
||||
{"smm", (PyCFunction)THSPModule_sspmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"saddmm", (PyCFunction)THSPModule_sspaddmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"dsmm", (PyCFunction)THSPModule_spmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
@ -764,6 +775,7 @@ PyMODINIT_FUNC PyInit__C()
|
||||
|
||||
ASSERT_TRUE(THPDoubleStorage_init(module));
|
||||
ASSERT_TRUE(THPFloatStorage_init(module));
|
||||
ASSERT_TRUE(THPHalfStorage_init(module));
|
||||
ASSERT_TRUE(THPLongStorage_init(module));
|
||||
ASSERT_TRUE(THPIntStorage_init(module));
|
||||
ASSERT_TRUE(THPShortStorage_init(module));
|
||||
@ -772,6 +784,7 @@ PyMODINIT_FUNC PyInit__C()
|
||||
|
||||
ASSERT_TRUE(THPDoubleTensor_init(module));
|
||||
ASSERT_TRUE(THPFloatTensor_init(module));
|
||||
ASSERT_TRUE(THPHalfTensor_init(module));
|
||||
ASSERT_TRUE(THPLongTensor_init(module));
|
||||
ASSERT_TRUE(THPIntTensor_init(module));
|
||||
ASSERT_TRUE(THPShortTensor_init(module));
|
||||
|
||||
@ -6,20 +6,16 @@ PyObject* sparse_tensor_classes;
|
||||
// SPARSE MODULE INITIALIZATION
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool THSPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THSPModule_loadClasses(PyObject *sparse_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(sparse_tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_sparse_tensor_classes"));
|
||||
ASSERT_NOT_NULL(THSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
|
||||
if (!THSPDoubleTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPFloatTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPLongTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPIntTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPShortTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPCharTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPByteTensor_postInit(sparse_module)) return false;
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
}
|
||||
|
||||
static bool THSPModule_assignStateless()
|
||||
@ -50,18 +46,11 @@ static bool THSPModule_assignStateless()
|
||||
// Callback for python part. Used for additional initialization of python classes
|
||||
PyObject *THSPModule_initExtension(PyObject *self)
|
||||
{
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { Py_RETURN_FALSE; }
|
||||
PyObject *module = PyImport_ImportModule("torch.sparse");
|
||||
if (!module) {
|
||||
THPUtils_setError("class loader couldn't access torch.sparse module");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject* module_dict = PyModule_GetDict(module);
|
||||
ASSERT_TRUE(THSPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THSPModule_assignStateless());
|
||||
Py_RETURN_TRUE;
|
||||
#undef ASSERT_TRUE
|
||||
if (!module) return NULL;
|
||||
if (!THSPModule_loadClasses(module)) return NULL;
|
||||
if (!THSPModule_assignStateless()) return NULL;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -80,19 +69,19 @@ bool THPModule_isSparseTensor(PyObject *obj)
|
||||
#define IMPLEMENT_SPARSE_STATELESS(name) \
|
||||
static PyObject * TH_CONCAT_2(THSPModule_, name)(PyObject *_unused, PyObject *args, PyObject *kwargs) \
|
||||
{ \
|
||||
PyObject *tensor = THSPFloatTensorClass; \
|
||||
PyObject *tensor = THSPFloatTensorClass; \
|
||||
PyObject *key, *value; \
|
||||
Py_ssize_t pos = 0; \
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) { \
|
||||
PyObject *item = PyTuple_GET_ITEM(args, i); \
|
||||
if (THPModule_isTensor(item) || THPVariable_CheckType(item, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
|
||||
tensor = item; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
} \
|
||||
if (kwargs) { \
|
||||
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_CheckType(value, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
|
||||
tensor = value; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
@ -109,6 +98,7 @@ dispatch: \
|
||||
return PyObject_Call(method, args, kwargs); \
|
||||
}
|
||||
|
||||
IMPLEMENT_SPARSE_STATELESS(spmm);
|
||||
IMPLEMENT_SPARSE_STATELESS(sspmm);
|
||||
IMPLEMENT_SPARSE_STATELESS(sspaddmm);
|
||||
|
||||
|
||||
@ -54,6 +54,50 @@ static PyObject * THPSize_repr(THPSize *self)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern PyTypeObject THPSizeType;
|
||||
|
||||
template<typename FnType, FnType fn, typename ...Args>
|
||||
static PyObject* wrap_tuple_fn(Args ... args)
|
||||
{
|
||||
PyObject *result = (*fn)(std::forward<Args>(args)...);
|
||||
if (!result) return NULL;
|
||||
if (PyTuple_Check(result)) {
|
||||
return PyObject_CallFunctionObjArgs((PyObject*)&THPSizeType, result, NULL);
|
||||
}
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static auto sq_concat = PyTuple_Type.tp_as_sequence->sq_concat;
|
||||
static auto sq_repeat = PyTuple_Type.tp_as_sequence->sq_repeat;
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
static auto sq_slice = PyTuple_Type.tp_as_sequence->sq_slice;
|
||||
#endif
|
||||
static auto mp_subscript = PyTuple_Type.tp_as_mapping->mp_subscript;
|
||||
|
||||
|
||||
static PySequenceMethods THPSize_as_sequence = {
|
||||
PyTuple_Type.tp_as_sequence->sq_length,
|
||||
wrap_tuple_fn<decltype(&sq_concat), &sq_concat>,
|
||||
wrap_tuple_fn<decltype(&sq_repeat), &sq_repeat>,
|
||||
PyTuple_Type.tp_as_sequence->sq_item,
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
wrap_tuple_fn<decltype(&sq_slice), &sq_slice>,
|
||||
#else
|
||||
0, /* sq_slice */
|
||||
#endif
|
||||
0, /* sq_ass_item */
|
||||
0, /* sq_ass_slice */
|
||||
PyTuple_Type.tp_as_sequence->sq_contains
|
||||
};
|
||||
|
||||
static PyMappingMethods THPSize_as_mapping = {
|
||||
PyTuple_Type.tp_as_mapping->mp_length,
|
||||
wrap_tuple_fn<decltype(&mp_subscript), &mp_subscript>,
|
||||
0
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPSizeType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch.Size", /* tp_name */
|
||||
@ -66,8 +110,8 @@ PyTypeObject THPSizeType = {
|
||||
0, /* tp_reserved */
|
||||
(reprfunc)THPSize_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
&THPSize_as_sequence, /* tp_as_sequence */
|
||||
&THPSize_as_mapping, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
#define THP_HOST_HALF
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <TH/TH.h>
|
||||
#include <libshm.h>
|
||||
@ -9,3 +11,6 @@
|
||||
|
||||
#include "generic/Storage.cpp"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Storage.cpp"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
@ -10,6 +10,8 @@
|
||||
PyObject_IsInstance(obj, THPDoubleStorageClass)
|
||||
#define THPFloatStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPFloatStorageClass)
|
||||
#define THPHalfStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPFloatStorageClass)
|
||||
#define THPLongStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPLongStorageClass)
|
||||
#define THPIntStorage_Check(obj) \
|
||||
@ -23,6 +25,7 @@
|
||||
|
||||
#define THPDoubleStorage_CData(obj) (obj)->cdata
|
||||
#define THPFloatStorage_CData(obj) (obj)->cdata
|
||||
#define THPHalfStorage_CData(obj) (obj)->cdata
|
||||
#define THPLongStorage_CData(obj) (obj)->cdata
|
||||
#define THPIntStorage_CData(obj) (obj)->cdata
|
||||
#define THPShortStorage_CData(obj) (obj)->cdata
|
||||
@ -37,4 +40,7 @@
|
||||
#include "generic/Storage.h"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Storage.h"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
#define THP_HOST_HALF
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
@ -9,6 +11,10 @@
|
||||
|
||||
#include "THP.h"
|
||||
#include "copy_utils.h"
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#include "generic/Tensor.cpp"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Tensor.cpp"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
#define THPDoubleTensor_Check(obj) PyObject_IsInstance(obj, THPDoubleTensorClass)
|
||||
#define THPFloatTensor_Check(obj) PyObject_IsInstance(obj, THPFloatTensorClass)
|
||||
#define THPHalfTensor_Check(obj) PyObject_IsInstance(obj, THPHalfTensorClass)
|
||||
#define THPLongTensor_Check(obj) PyObject_IsInstance(obj, THPLongTensorClass)
|
||||
#define THPIntTensor_Check(obj) PyObject_IsInstance(obj, THPIntTensorClass)
|
||||
#define THPShortTensor_Check(obj) PyObject_IsInstance(obj, THPShortTensorClass)
|
||||
@ -16,6 +17,7 @@
|
||||
|
||||
#define THPDoubleTensor_CData(obj) (obj)->cdata
|
||||
#define THPFloatTensor_CData(obj) (obj)->cdata
|
||||
#define THPHalfTensor_CData(obj) (obj)->cdata
|
||||
#define THPLongTensor_CData(obj) (obj)->cdata
|
||||
#define THPIntTensor_CData(obj) (obj)->cdata
|
||||
#define THPShortTensor_CData(obj) (obj)->cdata
|
||||
@ -63,4 +65,7 @@
|
||||
#include "generic/Tensor.h"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Tensor.h"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#endif
|
||||
|
||||
@ -2,9 +2,10 @@
|
||||
#define THP_AUTOGRAD_H
|
||||
|
||||
PyObject * THPAutograd_initExtension(PyObject *_unused);
|
||||
bool THPAutograd_initFunctions(PyObject* module);
|
||||
|
||||
#include "variable.h"
|
||||
#include "function.h"
|
||||
#include "engine.h"
|
||||
#include "torch/csrc/autograd/python_function.h"
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
#include "torch/csrc/autograd/python_engine.h"
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,342 +1,187 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "torch/csrc/autograd/engine.h"
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "THP.h"
|
||||
using thpp::Tensor;
|
||||
|
||||
PyObject *THPEngineClass = NULL;
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// used for topological sort
|
||||
using dependencies_type = std::unordered_map<THPFunction *, int>;
|
||||
// stores gradient buffers
|
||||
using grad_list_type = std::vector<THPObjectPtr>;
|
||||
// used for need_copy set (to ensure correct gradient buffering)
|
||||
using buffer_set_type = std::set<std::pair<size_t, int>>;
|
||||
// gradient buffer - a list of gradient tensors + id
|
||||
struct grad_buffer_type: public grad_list_type {
|
||||
template<typename... Args>
|
||||
grad_buffer_type(size_t buffer_id, Args&&... args):
|
||||
grad_list_type(std::forward<Args>(args)...),
|
||||
buffer_id(buffer_id) {};
|
||||
grad_buffer_type(grad_buffer_type &&other):
|
||||
grad_list_type(std::move(other)),
|
||||
buffer_id(other.buffer_id) {};
|
||||
grad_buffer_type& operator=(grad_buffer_type &&other) {
|
||||
grad_list_type::operator=(std::move(other));
|
||||
buffer_id = other.buffer_id;
|
||||
return *this;
|
||||
};
|
||||
|
||||
size_t buffer_id;
|
||||
};
|
||||
// used for the queue of nodes ready for processing
|
||||
using ready_queue_type = std::deque<std::pair<THPFunction *, grad_buffer_type>>;
|
||||
|
||||
// Computes graph dependencies (using a super simple topological sort)
|
||||
void THPEngine_compute_dependencies(std::vector<THPFunction*> queue,
|
||||
dependencies_type& dependencies, ready_queue_type& ready)
|
||||
{
|
||||
std::set<THPFunction *> seen;
|
||||
while (queue.size() > 0) {
|
||||
THPFunction *fn = queue.back(); queue.pop_back();
|
||||
for (int i = 0; i < fn->num_inputs; i++) {
|
||||
THPFunction *prev_fn = (THPFunction*)fn->previous_functions[i].get();
|
||||
// We can ignore variables (their backprop is called every time we have
|
||||
// gradient ready).
|
||||
if (THPVariable_Check((PyObject*)prev_fn))
|
||||
continue;
|
||||
// Stochastic functions are ready for backward immediately
|
||||
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass) &&
|
||||
prev_fn->requires_grad &&
|
||||
seen.count(prev_fn) == 0) {
|
||||
ready.emplace_back(prev_fn, grad_buffer_type(0));
|
||||
} else if (fn->requires_grad && prev_fn->requires_grad) {
|
||||
dependencies[prev_fn] += 1;
|
||||
auto Engine::compute_dependencies(function_queue queue, ready_queue_type& ready) -> dependencies_type {
|
||||
// First, search the graph and find all stochastic functions. Append them to the queue.
|
||||
std::unordered_set<Function*> seen;
|
||||
function_queue search_queue(queue);
|
||||
while (search_queue.size() > 0) {
|
||||
auto fn = search_queue.back(); search_queue.pop_back();
|
||||
for (auto& prev_fn_pair : fn->previous_functions) {
|
||||
auto& prev_fn = prev_fn_pair.first;
|
||||
Function* prev_ptr = prev_fn.get();
|
||||
if (!prev_ptr) continue;
|
||||
if (prev_ptr->is_stochastic && prev_ptr->requires_grad && seen.count(prev_ptr) == 0) {
|
||||
ready.emplace_back(prev_fn, GradBuffer(0));
|
||||
queue.push_back(prev_ptr);
|
||||
}
|
||||
if (seen.count(prev_fn) == 0) {
|
||||
seen.insert(prev_fn);
|
||||
queue.push_back(prev_fn);
|
||||
if (seen.count(prev_ptr) == 0) {
|
||||
seen.insert(prev_ptr);
|
||||
search_queue.push_back(prev_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Frees backward dependency and returns true if prev_fn is ready for backward
|
||||
bool THPEngine_free_backward_dependency(dependencies_type &dependencies,
|
||||
THPFunction *prev_fn)
|
||||
{
|
||||
int deps = --dependencies[prev_fn];
|
||||
if (deps < 0) {
|
||||
std::string msg = "dependencies is negative: ";
|
||||
msg += Py_TYPE((PyObject*)prev_fn)->tp_name;
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
if (deps == 0) {
|
||||
dependencies.erase(prev_fn);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Accumulates d_prev_fn gradient tensor into output_idx position of prev_grad buffer
|
||||
bool THPEngine_add_grad(buffer_set_type &need_copy, grad_buffer_type &prev_grad,
|
||||
int output_nr, PyObject *d_prev_fn)
|
||||
{
|
||||
// TODO: we should probably clean up need_copy, because most tensors will
|
||||
// probably never hit the else clause
|
||||
auto set_key = std::make_pair(prev_grad.buffer_id, output_nr);
|
||||
if (!prev_grad[output_nr]) {
|
||||
Py_INCREF(d_prev_fn);
|
||||
prev_grad[output_nr] = d_prev_fn;
|
||||
need_copy.insert(set_key);
|
||||
} else {
|
||||
PyObject *grad_tensor = prev_grad[output_nr];
|
||||
if (need_copy.count(set_key) != 0) {
|
||||
grad_tensor = PyObject_CallMethod(grad_tensor, "clone", "");
|
||||
if (!grad_tensor)
|
||||
return false;
|
||||
need_copy.erase(set_key);
|
||||
prev_grad[output_nr] = grad_tensor;
|
||||
}
|
||||
THPObjectPtr result = PyObject_CallMethod(grad_tensor, "add_", "O", d_prev_fn);
|
||||
if (!result)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Main backward function
|
||||
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *variables = NULL;
|
||||
PyObject *grad_variables = NULL;
|
||||
unsigned char retain_variables = 0;
|
||||
size_t next_buf_id = 0;
|
||||
const char *accepted_kwargs[] = {"variables", "grad_variables",
|
||||
"retain_variables", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
|
||||
&variables, &grad_variables, &retain_variables))
|
||||
return NULL;
|
||||
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
|
||||
|
||||
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
|
||||
"retain_variables argument is expected to be a bool, but got %s",
|
||||
THPUtils_typename(retain_variables_obj));
|
||||
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
|
||||
"be a tuple, but got %s", THPUtils_typename(variables));
|
||||
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
|
||||
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
|
||||
|
||||
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
|
||||
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
|
||||
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
|
||||
"gradients", num_variables, num_gradients);
|
||||
|
||||
ready_queue_type ready;
|
||||
std::unordered_map<THPFunction *, grad_buffer_type> not_ready;
|
||||
// Now, queue contains all nodes that will start propagating gradients. We no longer have
|
||||
// to expand functions that don't require grad.
|
||||
dependencies_type dependencies;
|
||||
buffer_set_type need_copy;
|
||||
seen.clear();
|
||||
// Just to make sure that they will never be added to the queue again
|
||||
seen.insert(queue.begin(), queue.end());
|
||||
while (queue.size() > 0) {
|
||||
auto fn = std::move(queue.back()); queue.pop_back();
|
||||
// This is needed only to filter out backward roots that don't require grad
|
||||
if (!fn->requires_grad) continue;
|
||||
for (auto& prev_fn_pair : fn->previous_functions) {
|
||||
Function* prev_ptr = prev_fn_pair.first.get();
|
||||
if (!prev_ptr) continue;
|
||||
if (dynamic_cast<Variable*>(prev_ptr)) continue;
|
||||
if (!prev_ptr->requires_grad) continue;
|
||||
if (prev_ptr->is_stochastic) continue; // Stochastic nodes were in the queue already
|
||||
dependencies[prev_ptr] += 1;
|
||||
if (seen.count(prev_ptr) == 0) {
|
||||
seen.insert(prev_ptr);
|
||||
queue.push_back(prev_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
return dependencies;
|
||||
}
|
||||
|
||||
auto Engine::backward(const variable_list& variables,
|
||||
tensor_list& grad_variables,
|
||||
bool retain_variables) -> void {
|
||||
function_queue creators;
|
||||
std::unordered_map<std::shared_ptr<Function>, std::unique_ptr<GradBuffer>> creator_grad;
|
||||
ready_queue_type ready;
|
||||
|
||||
bool did_leaf_backward = false;
|
||||
std::vector<THPFunction*> creators;
|
||||
for (int i = 0; i < num_variables; i++) {
|
||||
THPVariable *variable = (THPVariable*)PyTuple_GET_ITEM(variables, i);
|
||||
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
|
||||
THPUtils_assert(THPVariable_Check((PyObject*)variable), "element %d of variables "
|
||||
"tuple is not a Variable", i);
|
||||
// If someone calls .backward() on a leaf, it's simple...
|
||||
if (variable->creator == NULL) {
|
||||
if (variable->requires_grad) {
|
||||
THPObjectPtr result = PyObject_CallMethod((PyObject*)variable,
|
||||
"_do_backward", "(O)O", grad, retain_variables_obj);
|
||||
if (!result) return NULL;
|
||||
int size = variables.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto& var = variables[i];
|
||||
auto& grad = grad_variables[i];
|
||||
if (!var->creator) {
|
||||
// If someone calls .backward() on a leaf, it's simple...
|
||||
if (var->requires_grad) {
|
||||
var->backward(std::make_shared<Variable>(std::move(grad), false, true));
|
||||
did_leaf_backward = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
THPFunction *creator = (THPFunction*)variable->creator;
|
||||
creators.push_back(creator);
|
||||
// Initialize the queue
|
||||
if (creator->requires_grad) {
|
||||
grad_buffer_type buf(next_buf_id++, creator->num_outputs);
|
||||
Py_INCREF(grad);
|
||||
buf[variable->output_nr] = grad;
|
||||
ready.emplace_front(creator, std::move(buf));
|
||||
} else {
|
||||
auto& creator = var->creator;
|
||||
auto& buf = creator_grad[creator];
|
||||
if (creator->requires_grad) {
|
||||
if (!buf) buf.reset(new GradBuffer(creator->num_outputs));
|
||||
buf->addGrad(var->output_nr, Variable::of(std::move(grad)));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto& entry: creator_grad) {
|
||||
const auto& creator = entry.first;
|
||||
auto& buf = entry.second; // WARNING: this is nullptr if !creator->requires_grad
|
||||
creators.push_back(creator.get());
|
||||
if (creator->requires_grad) {
|
||||
ready.emplace_back(creator, std::move(*buf));
|
||||
}
|
||||
}
|
||||
creator_grad.clear(); // Clear the shared pointers
|
||||
|
||||
THPEngine_compute_dependencies(std::move(creators), dependencies, ready);
|
||||
auto dependencies = compute_dependencies(std::move(creators), ready);
|
||||
|
||||
THPUtils_assert(did_leaf_backward || ready.size() > 0, "there are no graph "
|
||||
"nodes that require computing gradients");
|
||||
if (!did_leaf_backward && ready.size() == 0) {
|
||||
throw std::runtime_error(
|
||||
"there are no graph nodes that require computing gradients");
|
||||
}
|
||||
|
||||
std::unordered_map<Function*, GradBuffer> not_ready;
|
||||
while (ready.size() > 0) {
|
||||
std::pair<THPFunction *, grad_buffer_type> ready_pair =
|
||||
std::move(ready.back()); ready.pop_back();
|
||||
THPFunction *fn = ready_pair.first;
|
||||
grad_buffer_type &fn_grad_buffer = ready_pair.second;
|
||||
auto ready_pair = std::move(ready.back()); ready.pop_back();
|
||||
auto& fn = ready_pair.first;
|
||||
|
||||
// Prepare a tuple for a call to _do_backward
|
||||
THPObjectPtr grad_tuple = PyTuple_New(fn_grad_buffer.size());
|
||||
if (!grad_tuple) return NULL;
|
||||
for (unsigned int i = 0; i < fn_grad_buffer.size(); i++) {
|
||||
PyObject *_grad;
|
||||
if (fn_grad_buffer[i]) {
|
||||
_grad = fn_grad_buffer[i].release();
|
||||
} else {
|
||||
_grad = Py_None;
|
||||
Py_INCREF(_grad);
|
||||
}
|
||||
PyTuple_SET_ITEM(grad_tuple.get(), i, _grad);
|
||||
auto grad_inputs = fn->apply(GradBuffer::variables(std::move(ready_pair.second)));
|
||||
if (!retain_variables) {
|
||||
fn->releaseVariables();
|
||||
}
|
||||
|
||||
// Call _do_backward and make sure grad_input is sound
|
||||
THPObjectPtr grad_input = PyObject_CallMethod((PyObject*)fn, "_do_backward",
|
||||
"OO", grad_tuple.get(), retain_variables_obj);
|
||||
if (!grad_input)
|
||||
return NULL;
|
||||
THPUtils_assert(PyTuple_Check(grad_input), "error, _do_backward should "
|
||||
"return a tuple, but got %s", THPUtils_typename(grad_input));
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
if (grad_inputs.size() != fn->previous_functions.size()) {
|
||||
std::string msg("Function returned an invalid number of gradients - expected ");
|
||||
msg += fn->previous_functions.size();
|
||||
msg += ", but got ";
|
||||
msg += grad_inputs.size();
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
// Process tensors inside grad_input
|
||||
for (int i = 0; i < num_grads; i++) {
|
||||
PyObject *prev_obj = fn->previous_functions[i].get();
|
||||
PyObject *grad_prev = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
int size = grad_inputs.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto& grad_input = grad_inputs[i];
|
||||
auto& prev_fn = fn->previous_functions[i].first;
|
||||
int output_nr = fn->previous_functions[i].second;
|
||||
|
||||
// A shortcut for variables - there's no need to buffer gradients for them
|
||||
// as their _do_backward is super fast (and we can save memory).
|
||||
// FIXME: this might call leaf variable hooks multiple times
|
||||
if (THPVariable_Check(prev_obj)) {
|
||||
THPVariable *prev_var = (THPVariable*)prev_obj;
|
||||
if (prev_var->requires_grad) {
|
||||
THPObjectPtr ret = PyObject_CallMethod(prev_obj, "_do_backward",
|
||||
"(O)O", grad_prev, retain_variables_obj);
|
||||
if (!ret) return NULL;
|
||||
// null inputs have no previous_function and we skip them here
|
||||
if (!prev_fn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto var = dynamic_cast<Variable*>(prev_fn.get())) {
|
||||
if (var->requires_grad) {
|
||||
var->backward(grad_input);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// No need to do any work for functions that don't require gradients
|
||||
THPFunction *prev_fn = (THPFunction*)prev_obj;
|
||||
if (!prev_fn->requires_grad)
|
||||
continue;
|
||||
// Stochastic functions are immediately ready
|
||||
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass))
|
||||
// Stochastic functions are placed in the ready queue by
|
||||
// compute_dependencies, so we can skip them here.
|
||||
if (prev_fn->is_stochastic || !prev_fn->requires_grad) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the function is ready for backward and see if it has any
|
||||
// buffers allocated
|
||||
int output_idx = fn->previous_functions[i].output_nr;
|
||||
bool is_ready = THPEngine_free_backward_dependency(dependencies, prev_fn);
|
||||
auto not_ready_it = not_ready.find(prev_fn);
|
||||
// Check if the function is ready for backward
|
||||
bool is_ready = false;
|
||||
auto it = dependencies.find(prev_fn.get());
|
||||
if (it == dependencies.end()) {
|
||||
throw std::runtime_error("dependency not found");
|
||||
} else if (--it->second == 0) {
|
||||
dependencies.erase(it);
|
||||
is_ready = true;
|
||||
}
|
||||
|
||||
auto not_ready_it = not_ready.find(prev_fn.get());
|
||||
if (is_ready) {
|
||||
// this is only a temporary, so no need for a correct id
|
||||
grad_buffer_type prev_buffer(-1);
|
||||
if (not_ready_it == not_ready.end()) {
|
||||
// The function is ready and no buffers have been allocated for it.
|
||||
prev_buffer = grad_buffer_type(next_buf_id++, prev_fn->num_outputs);
|
||||
Py_INCREF(grad_prev);
|
||||
prev_buffer[output_idx] = grad_prev;
|
||||
// The function is ready and no buffers have been allocated for it
|
||||
GradBuffer prev_buffer(prev_fn->num_outputs);
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
} else {
|
||||
// The function is ready and it already has a buffer allocated.
|
||||
prev_buffer = std::move(not_ready_it->second);
|
||||
auto prev_buffer = std::move(not_ready_it->second);
|
||||
not_ready.erase(not_ready_it);
|
||||
if (!THPEngine_add_grad(need_copy, prev_buffer, output_idx, grad_prev))
|
||||
return NULL;
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
}
|
||||
// Put the function into the ready queue.
|
||||
ready.emplace_front(prev_fn, std::move(prev_buffer));
|
||||
} else {
|
||||
// Allocate a buffer if necessary
|
||||
// Allocate a buffer if necessary and accumulate gradient
|
||||
if (not_ready_it == not_ready.end()) {
|
||||
int num_prev_fn_outputs = prev_fn->num_outputs;
|
||||
std::tie(not_ready_it, std::ignore) =
|
||||
not_ready.emplace(prev_fn, grad_buffer_type(next_buf_id++, num_prev_fn_outputs));
|
||||
GradBuffer prev_buffer(prev_fn->num_outputs);
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
not_ready.emplace(prev_fn.get(), std::move(prev_buffer));
|
||||
} else {
|
||||
auto &prev_buffer = not_ready_it->second;
|
||||
prev_buffer.addGrad(output_nr, std::move(grad_input));
|
||||
}
|
||||
// Accumulate the gradient into the buffer
|
||||
grad_buffer_type &grad_buffer = not_ready_it->second;
|
||||
if (!THPEngine_add_grad(need_copy, grad_buffer, output_idx, grad_prev))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!not_ready.empty()) {
|
||||
std::string names;
|
||||
for (auto &it : not_ready) {
|
||||
if (!names.empty()) names += ", ";
|
||||
names += Py_TYPE((PyObject *)it.first)->tp_name;
|
||||
}
|
||||
THPUtils_assert(not_ready.empty(),
|
||||
"could not compute gradients for some functions (%s)", names.c_str());
|
||||
throw std::runtime_error("could not compute gradients for some functions");
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
return type->tp_alloc(type, 0);
|
||||
}
|
||||
|
||||
static struct PyMethodDef THPEngine_methods[] = {
|
||||
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPEngineType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._EngineBase", /* tp_name */
|
||||
sizeof(THPEngine), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
0, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPEngine_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPEngine_new /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
bool THPEngine_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPEngineType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPEngineType);
|
||||
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
||||
@ -1,10 +1,35 @@
|
||||
#ifndef THP_ENGINE_H
|
||||
#define THP_ENGINE_H
|
||||
#pragma once
|
||||
|
||||
struct THPEngine {
|
||||
PyObject_HEAD
|
||||
// Engine implements backpropagation from output variables and their gradients
|
||||
// to "root" variables (variables created by the user with requires_grad=True).
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/grad_buffer.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct Engine {
|
||||
using ready_queue_type = std::deque<std::pair<std::shared_ptr<Function>, GradBuffer>>;
|
||||
using function_queue = std::vector<Function*>;
|
||||
using dependencies_type = std::unordered_map<Function*, int>;
|
||||
|
||||
// Given a list of output variables and their gradients, computes the
|
||||
// gradients of "root" variables by backpropagation.
|
||||
static void backward(
|
||||
const variable_list& variables,
|
||||
tensor_list& grad_variables,
|
||||
bool retain_variables);
|
||||
|
||||
private:
|
||||
static dependencies_type compute_dependencies(
|
||||
function_queue queue,
|
||||
ready_queue_type& ready);
|
||||
};
|
||||
|
||||
bool THPEngine_initModule(PyObject *module);
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
||||
@ -1,976 +1,31 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "function.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <exception>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "THP.h"
|
||||
#include "variable.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include "cuda/AutoGPU.h"
|
||||
#endif
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// Throwing this exception means that the python error flags have been already
|
||||
// set and control should be immediately returned to the interpreter.
|
||||
class python_error : public std::exception {};
|
||||
|
||||
#define THPFunction_assert(condition, ...) \
|
||||
if (!(condition)) { THPUtils_setError(__VA_ARGS__); throw python_error(); }
|
||||
|
||||
|
||||
PyObject *THPFunctionClass = NULL;
|
||||
PyObject *THPStochasticFunctionClass = NULL;
|
||||
|
||||
// Traverse and clear are required for supporting Python's GC cycle handling.
|
||||
static int THPFunction_traverse(THPFunction *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->needs_input_grad);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
for (int i = 0; i < self->num_inputs; i++)
|
||||
Py_VISIT(self->previous_functions[i].get());
|
||||
if (self->saved_variables) {
|
||||
for (unsigned int i = 0; i < self->saved_variables->size(); i++)
|
||||
Py_VISIT(std::get<0>(self->saved_variables->at(i)));
|
||||
}
|
||||
if (self->output_backward_hooks) {
|
||||
for (int i = 0; i < self->num_inputs; i++)
|
||||
Py_VISIT(self->output_backward_hooks[i].get());
|
||||
}
|
||||
|
||||
Py_VISIT(self->to_save);
|
||||
Py_VISIT(self->shared_pairs);
|
||||
Py_VISIT(self->non_differentiable);
|
||||
Py_VISIT(self->dirty_tensors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPFunction_clear(THPFunction *self)
|
||||
{
|
||||
self->num_inputs = 0;
|
||||
self->num_outputs = 0;
|
||||
|
||||
Py_CLEAR(self->needs_input_grad);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
|
||||
Py_CLEAR(self->to_save);
|
||||
Py_CLEAR(self->shared_pairs);
|
||||
Py_CLEAR(self->non_differentiable);
|
||||
Py_CLEAR(self->dirty_tensors);
|
||||
|
||||
THPFunctionPtr *previous_functions = self->previous_functions;
|
||||
self->previous_functions = NULL;
|
||||
delete[] previous_functions;
|
||||
|
||||
auto saved_variables = self->saved_variables;
|
||||
self->saved_variables = NULL;
|
||||
delete saved_variables;
|
||||
|
||||
auto output_backward_hooks = self->output_backward_hooks;
|
||||
self->output_backward_hooks = NULL;
|
||||
delete[] output_backward_hooks;
|
||||
|
||||
auto output_info = self->output_info;
|
||||
self->output_info = NULL;
|
||||
delete output_info;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPFunction_dealloc(THPFunction* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
THPFunction_clear(self);
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
PyObject *THPFunction_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
THPFunction *self = (THPFunction*)type->tp_alloc(type, 0);
|
||||
if (!self)
|
||||
return NULL;
|
||||
// Python zero-initializes the object memory, so there's no need to initialize
|
||||
// most fields
|
||||
self->num_outputs = -1;
|
||||
return (PyObject*)self;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Forward
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
using t2var_type = std::unordered_map<PyObject *, THPVariable *>;
|
||||
|
||||
static void _mark_dirty(THPFunction *self, t2var_type &t2var,
|
||||
std::unordered_set<PyObject *> &dirty_inputs)
|
||||
{
|
||||
// Increase versions of modified tensors
|
||||
if (!self->dirty_tensors) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->dirty_tensors), "autograd "
|
||||
"internal error: dirty_tensors attribute is expected to be a tuple "
|
||||
"but is %s", THPUtils_typename(self->dirty_tensors));
|
||||
Py_ssize_t num_dirty = PyTuple_GET_SIZE(self->dirty_tensors);
|
||||
for (int i = 0; i < num_dirty; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(self->dirty_tensors, i);
|
||||
dirty_inputs.insert(tensor);
|
||||
THPVariable *variable;
|
||||
try {
|
||||
variable = t2var.at(tensor);
|
||||
} catch (std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(tensor), "mark_dirty can "
|
||||
"only accept tensors, but argument %d is of type %s", i,
|
||||
THPUtils_typename(tensor));
|
||||
THPFunction_assert(false, "mark_dirty only accepts input tensors, but "
|
||||
"argument %d isn't one", i);
|
||||
}
|
||||
auto &v_counter = *variable->version_counter;
|
||||
THPFunction_assert(v_counter.var_refcnt() == 1, "in-place operations can be "
|
||||
"only used on variables that don't share storage with any other "
|
||||
"variables, but detected that there are %d objects sharing it",
|
||||
v_counter.var_refcnt());
|
||||
v_counter++;
|
||||
}
|
||||
// We're not going to ever need this so let's remove references now
|
||||
Py_DECREF(self->dirty_tensors);
|
||||
self->dirty_tensors = NULL;
|
||||
}
|
||||
|
||||
static void _wrap_outputs(THPFunction *self, t2var_type &t2var,
|
||||
std::unordered_set<PyObject *> &dirty_inputs, PyObject *raw_output,
|
||||
PyObject *outputs)
|
||||
{
|
||||
// Wrap outputs in Variables
|
||||
Py_ssize_t num_outputs = PyTuple_GET_SIZE(raw_output);
|
||||
self->output_info = new std::vector<output_info_type>(num_outputs);
|
||||
auto &output_info = *self->output_info;
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
PyObject *output = PyTuple_GET_ITEM(raw_output, i);
|
||||
THPVariable *output_var;
|
||||
auto it = t2var.find(output);
|
||||
if (it == t2var.end()) {
|
||||
// A completely new tensor - just wrap it and continue
|
||||
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self, self->requires_grad);
|
||||
} else {
|
||||
// If one of the outputs was also an input tensor it's a bit more complicated.
|
||||
THPVariable *input_var = it->second;
|
||||
if (input_var->creator) {
|
||||
// If it's not a leaf we want to move it in the graph so backprop
|
||||
// will be computed correctly:
|
||||
// creator <- variable <- self ==> creator <- self <- variable
|
||||
Py_INCREF(input_var);
|
||||
output_var = input_var;
|
||||
Py_DECREF(input_var->creator);
|
||||
Py_INCREF(self);
|
||||
input_var->creator = (PyObject*)self;
|
||||
auto Function::flags(const variable_list& inputs) -> FunctionFlags {
|
||||
int num_inputs = inputs.size();
|
||||
FunctionFlags f;
|
||||
f.requires_grad = false;
|
||||
f.is_volatile = false;
|
||||
f.previous_functions.resize(num_inputs);
|
||||
for (int i = 0; i != num_inputs; ++i) {
|
||||
auto& var = inputs[i];
|
||||
if (var) {
|
||||
f.requires_grad |= var->requires_grad;
|
||||
f.is_volatile |= var->is_volatile;
|
||||
if (var->creator) {
|
||||
f.previous_functions[i] = std::make_pair<>(var->creator, var->output_nr);
|
||||
} else {
|
||||
// If the Variable has been changed, we have to move it after the
|
||||
// current function to ensure the gradient is computed correctly.
|
||||
// There are two cases now:
|
||||
// 1. If it requires grad, it is an error, and this will be caught
|
||||
// when its _do_backward is called, because it won't be a leaf anymore.
|
||||
// Also we'll change its version.
|
||||
// 2. If it doesn't require grad, we can safely move it in the graph,
|
||||
// because its _do_backward will never be called.
|
||||
if (dirty_inputs.count(output) > 0) {
|
||||
Py_INCREF(input_var);
|
||||
output_var = input_var;
|
||||
Py_INCREF(self);
|
||||
output_var->creator = (PyObject*)self;
|
||||
if (!output_var->requires_grad && self->requires_grad) {
|
||||
// Now, there's another subtlety. We move the input in the graph
|
||||
// and we change its requires_grad to True. However, remember
|
||||
// that we're still holding a reference to is as a previous
|
||||
// function. Backward engine will think that it was really a
|
||||
// leaf that initialy did require grad and call its _do_backward
|
||||
// and that will throw. Because of this, we need to allocate
|
||||
// a dummy leaf that doesn't require grad and put it as our
|
||||
// previous function.
|
||||
output_var->requires_grad = self->requires_grad;
|
||||
PyObject* dummy_prev_fn = THPVariable_New(output, NULL, false);
|
||||
if (!dummy_prev_fn) throw python_error();
|
||||
self->previous_functions[i] = THPFunctionPtr(dummy_prev_fn, 0);
|
||||
}
|
||||
} else {
|
||||
// An input has been returned, but it wasn't modified. It's better
|
||||
// not to move the Variable, because there are some legitimate cases
|
||||
// where making it non-leaf would break stuff (e.g. broadcast). Also,
|
||||
// returning the input Variable is not a good option either,
|
||||
// because if someone registers hooks on it, they will fire with grads
|
||||
// from all usages, not only from usages of this output. This is why
|
||||
// we'll return a copy and join their version counters. This has
|
||||
// a side-effect of making in-place ops on any of these Variables an
|
||||
// immediate error, but it would be raised anyway once someone
|
||||
// calls backward.
|
||||
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self,
|
||||
self->requires_grad);
|
||||
if (!output_var) throw python_error();
|
||||
output_var->version_counter->join_with(*input_var->version_counter);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!output_var) throw python_error();
|
||||
|
||||
torch::THPVoidTensor *output_obj = (torch::THPVoidTensor*)output_var->data;
|
||||
torch::THVoidTensor *output_tensor = output_obj->cdata;
|
||||
long ndim = output_tensor->nDimension;
|
||||
int device_id = -1;
|
||||
THPObjectPtr is_cuda = PyObject_GetAttrString(output_var->data, "is_cuda");
|
||||
if (is_cuda.get() == Py_True) {
|
||||
THPObjectPtr device_id_obj = PyObject_CallMethod(output_var->data,
|
||||
"get_device", "");
|
||||
THPFunction_assert(THPUtils_checkLong(device_id_obj), "get_device "
|
||||
"should return an int, but got %s", THPUtils_typename(device_id_obj));
|
||||
device_id = THPUtils_unpackLong(device_id_obj);
|
||||
}
|
||||
output_info[i] = std::make_tuple(
|
||||
(PyObject*)Py_TYPE(output_var->data),
|
||||
device_id,
|
||||
std::vector<long>(output_tensor->size, output_tensor->size + ndim)
|
||||
);
|
||||
t2var[output] = output_var;
|
||||
output_var->output_nr = i;
|
||||
PyTuple_SET_ITEM(outputs, i, (PyObject*)output_var);
|
||||
}
|
||||
}
|
||||
|
||||
static void _save_variables(THPFunction*self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->to_save) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->to_save), "autograd internal "
|
||||
"error: to_save attribute is expected to be a tuple but is %s",
|
||||
THPUtils_typename(self->to_save));
|
||||
Py_ssize_t num_saved = PyTuple_GET_SIZE(self->to_save);
|
||||
self->saved_variables = new std::vector<saved_var_info_type>();
|
||||
self->saved_variables->reserve(num_saved);
|
||||
for (int i = 0; i < num_saved; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(self->to_save, i);
|
||||
if (tensor == Py_None) {
|
||||
Py_INCREF(tensor);
|
||||
self->saved_variables->emplace_back(tensor, 0, nullptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
THPVariable *variable;
|
||||
try {
|
||||
variable = t2var.at(tensor);
|
||||
} catch(std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(tensor),
|
||||
"save_for_backward can only save tensors, but argument %d is of "
|
||||
"type %s", i, THPUtils_typename(tensor));
|
||||
THPFunction_assert(false, "save_for_backward can only save input or output "
|
||||
"tensors, but argument %d doesn't satisfy this condition", i);
|
||||
}
|
||||
|
||||
Py_INCREF(tensor);
|
||||
self->saved_variables->emplace_back(
|
||||
tensor,
|
||||
**variable->version_counter,
|
||||
std::unique_ptr<THPVariableVersion>(variable->version_counter->new_saved_ref())
|
||||
);
|
||||
}
|
||||
// Free .to_save
|
||||
Py_DECREF(self->to_save);
|
||||
self->to_save = NULL;
|
||||
}
|
||||
|
||||
static void _join_version_counters(THPFunction *self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->shared_pairs) return;
|
||||
THPFunction_assert(PyTuple_Check(self->shared_pairs), "autograd internal "
|
||||
"error: shared_pairs attribute is expected to be a tuple but is %s",
|
||||
THPUtils_typename(self->shared_pairs));
|
||||
Py_ssize_t num_shared = PyTuple_GET_SIZE(self->shared_pairs);
|
||||
for (int i = 0; i < num_shared; i++) {
|
||||
PyObject *shared_tuple = PyTuple_GET_ITEM(self->shared_pairs, i);
|
||||
THPFunction_assert(PyTuple_Check(shared_tuple), "mark_shared_storages "
|
||||
"accepts a number of pairs, but one of the arguments is of type %s",
|
||||
THPUtils_typename(shared_tuple));
|
||||
THPFunction_assert(PyTuple_GET_SIZE(shared_tuple) == 2,
|
||||
"mark_shared_storages accepts pairs, but argument %d is a tuple of "
|
||||
"%d elements", i, PyTuple_GET_SIZE(shared_tuple));
|
||||
|
||||
// Now we're sure it's really a pair!
|
||||
THPVariable *v1, *v2;
|
||||
try {
|
||||
v1 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 0));
|
||||
v2 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 1));
|
||||
} catch(std::out_of_range &e) {
|
||||
// One tuple items wasn't present in t2var, so there are two cases:
|
||||
// 1. it's not a tensor
|
||||
// 2. it's not an input nor an output
|
||||
PyObject *t1 = PyTuple_GET_ITEM(shared_tuple, 0);
|
||||
PyObject *t2 = PyTuple_GET_ITEM(shared_tuple, 1);
|
||||
THPFunction_assert(THPModule_isTensor(t1) && THPModule_isTensor(t2),
|
||||
"mark_shared_storages accepts pairs of tensors, but one of them "
|
||||
"contains %s and %s", THPUtils_typename(t1), THPUtils_typename(t2));
|
||||
THPFunction_assert(false, "mark_shared_storages only accepts pairs of input "
|
||||
"and output tensors, but argument %d doesn't satify this "
|
||||
"condition", i);
|
||||
}
|
||||
v2->version_counter->join_with(*v1->version_counter);
|
||||
}
|
||||
// Free .shared_pairs
|
||||
Py_DECREF(self->shared_pairs);
|
||||
self->shared_pairs = NULL;
|
||||
}
|
||||
|
||||
static void _mark_non_differentiable(THPFunction *self, t2var_type &t2var)
|
||||
{
|
||||
if (!self->non_differentiable) return;
|
||||
|
||||
THPFunction_assert(PyTuple_Check(self->non_differentiable), "autograd "
|
||||
"internal error: non_differentiable attribute is expected to be a "
|
||||
"tuple but is %s", THPUtils_typename(self->non_differentiable));
|
||||
Py_ssize_t num_nondiff = PyTuple_GET_SIZE(self->non_differentiable);
|
||||
for (int i = 0; i < num_nondiff; i++) {
|
||||
PyObject *t = PyTuple_GET_ITEM(self->non_differentiable, i);
|
||||
THPVariable *var;
|
||||
try {
|
||||
var = t2var.at(t);
|
||||
THPFunction_assert(var->creator == (PyObject*)self,
|
||||
"mark_non_differentiable only accepts output tensors, but "
|
||||
"argument %d isn't an output", i);
|
||||
} catch (std::out_of_range &e) {
|
||||
THPFunction_assert(THPModule_isTensor(t), "mark_non_differentiable "
|
||||
"only accepts tensor arguments, but got %s", THPUtils_typename(t));
|
||||
THPFunction_assert(false, "mark_non_differentiable only accepts function "
|
||||
"outputs");
|
||||
}
|
||||
var->requires_grad = 0;
|
||||
}
|
||||
Py_DECREF(self->non_differentiable);
|
||||
self->non_differentiable = NULL;
|
||||
}
|
||||
|
||||
static bool _ensure_tuple(THPObjectPtr& obj)
|
||||
{
|
||||
if (PyTuple_Check(obj.get()))
|
||||
return false;
|
||||
|
||||
PyObject *tuple = PyTuple_New(1);
|
||||
if (!tuple) throw python_error();
|
||||
PyTuple_SET_ITEM(tuple, 0, obj.release());
|
||||
obj = tuple;
|
||||
return true;
|
||||
}
|
||||
|
||||
PyObject *THPFunction_do_forward(THPFunction *self, PyObject *inputs)
|
||||
{
|
||||
try {
|
||||
Py_ssize_t num_inputs = inputs ? PyTuple_GET_SIZE(inputs) : 0;
|
||||
|
||||
// Unpack inputs and check if they require gradients or are volatile
|
||||
THPObjectPtr unpacked_inputs = PyTuple_New(num_inputs);
|
||||
self->needs_input_grad = PyTuple_New(num_inputs);
|
||||
self->requires_grad = false;
|
||||
bool is_volatile = false;
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
PyObject *input = PyTuple_GET_ITEM(inputs, i);
|
||||
THPUtils_assert(THPVariable_Check(input), "expected a Variable argument, "
|
||||
"but got %s", THPUtils_typename(input));
|
||||
THPVariable *variable = (THPVariable*)input;
|
||||
|
||||
// Unpack the variable - SET_ITEM steals a reference so INCREF it
|
||||
Py_INCREF(variable->data);
|
||||
PyTuple_SET_ITEM(unpacked_inputs.get(), i, variable->data);
|
||||
|
||||
// We can't move this to C, because it's going to be accessed from user code.
|
||||
PyTuple_SET_ITEM(self->needs_input_grad, i, PyBool_FromLong(variable->requires_grad));
|
||||
|
||||
is_volatile = is_volatile || variable->is_volatile;
|
||||
self->requires_grad = self->requires_grad || variable->requires_grad;
|
||||
}
|
||||
|
||||
|
||||
// Now we're ready to call a forward (implemented in Python)
|
||||
THPObjectPtr forward_fn = PyObject_GetAttrString((PyObject*)self, "forward");
|
||||
THPUtils_assert(forward_fn.get(), "function %s doesn't implement a required "
|
||||
"'forward' method", THPUtils_typename((PyObject*)self));
|
||||
THPObjectPtr raw_output = PyObject_CallObject(forward_fn, unpacked_inputs);
|
||||
if (!raw_output) return NULL;
|
||||
// Wrap output in a tuple, if it's not one already
|
||||
bool unpack_output = _ensure_tuple(raw_output);
|
||||
int num_outputs = PyTuple_GET_SIZE(raw_output.get());
|
||||
|
||||
|
||||
THPObjectPtr outputs = PyTuple_New(num_outputs);
|
||||
if (!outputs) return NULL;
|
||||
if (is_volatile) {
|
||||
// If one of the inputs is volatile let's take a fast path - we want
|
||||
// minimize the overhead of inference
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
PyObject *output = PyTuple_GET_ITEM(raw_output.get(), i);
|
||||
THPVariable *output_var = (THPVariable*)THPVariable_NewVolatile(output);
|
||||
if (!output_var) return NULL;
|
||||
output_var->output_nr = i;
|
||||
PyTuple_SET_ITEM(outputs.get(), i, (PyObject*)output_var);
|
||||
}
|
||||
} else {
|
||||
// We're not volatile, so there's a lot of bookkeeping to do...
|
||||
self->num_inputs = num_inputs;
|
||||
self->num_outputs = num_outputs;
|
||||
t2var_type t2var;
|
||||
|
||||
// Save previous functions and initialize t2var map
|
||||
self->previous_functions = new THPFunctionPtr[num_inputs];
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
THPVariable *input_var = (THPVariable*)PyTuple_GET_ITEM(inputs, i);
|
||||
t2var.emplace(input_var->data, input_var);
|
||||
|
||||
// Save previous function in a helper class (that has a smart pointer to
|
||||
// the object and remembers which output did we use).
|
||||
PyObject *prev_fn = input_var->creator ? input_var->creator : (PyObject*)input_var;
|
||||
Py_INCREF(prev_fn);
|
||||
self->previous_functions[i] = THPFunctionPtr(prev_fn, input_var->output_nr);
|
||||
}
|
||||
|
||||
std::unordered_set<PyObject *> dirty_inputs;
|
||||
_mark_dirty(self, t2var, dirty_inputs);
|
||||
_wrap_outputs(self, t2var, dirty_inputs, raw_output, outputs);
|
||||
_join_version_counters(self, t2var);
|
||||
if (self->requires_grad ||
|
||||
PyObject_IsInstance((PyObject*)self, THPStochasticFunctionClass)) {
|
||||
_save_variables(self, t2var);
|
||||
_mark_non_differentiable(self, t2var);
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack the output, unless .forward() returned a tuple
|
||||
if (unpack_output) {
|
||||
PyObject *output = PyTuple_GET_ITEM(outputs.get(), 0);
|
||||
Py_INCREF(output);
|
||||
return output;
|
||||
}
|
||||
|
||||
return outputs.release();
|
||||
|
||||
} catch (python_error& e) {
|
||||
return NULL;
|
||||
} catch (std::exception& e) {
|
||||
THPUtils_setError(e.what());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Backward
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// We need a reference to a smart pointer that will outlive the duration of
|
||||
// a function call, so that the char* pointer is valid even after it returns
|
||||
static char* _try_get_name(PyObject *hook, THPObjectPtr& tmp) {
|
||||
tmp = PyObject_GetAttrString(hook, "__name__");
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
if (tmp && PyString_Check(tmp.get())) {
|
||||
return PyString_AS_STRING(tmp.get());
|
||||
}
|
||||
#else
|
||||
if (tmp && PyUnicode_Check(tmp.get())) {
|
||||
tmp = PyUnicode_AsASCIIString(tmp.get());
|
||||
return PyBytes_AS_STRING(tmp.get());
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define OPTIONAL_HOOK_NAME \
|
||||
hook_name ? "'" : "", \
|
||||
hook_name ? hook_name : "", \
|
||||
hook_name ? "' " : ""
|
||||
|
||||
static void _ensure_correct_hook_result_single(PyObject *original,
|
||||
PyObject *returned, PyObject *hook)
|
||||
{
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
static PyObject *IS_SAME_SIZE_NAME = PyString_FromString("is_same_size");
|
||||
#else
|
||||
static PyObject *IS_SAME_SIZE_NAME = PyUnicode_FromString("is_same_size");
|
||||
#endif
|
||||
THPObjectPtr tmp;
|
||||
// Check that the type matches
|
||||
if(Py_TYPE(original) != Py_TYPE(returned)) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the type of "
|
||||
"grad_input (was %s, but got %s)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
THPUtils_typename(original),
|
||||
THPUtils_typename(returned)
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
// Special case - None gradient. The type matches so it's everything we
|
||||
// had to check.
|
||||
if (original == Py_None) return;
|
||||
|
||||
THPVariable *original_var = (THPVariable*)original;
|
||||
THPVariable *returned_var = (THPVariable*)returned;
|
||||
|
||||
// Check that data types match
|
||||
if (Py_TYPE(original_var->data) != Py_TYPE(returned_var->data)) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the type of "
|
||||
"grad_input data (was %s, but got %s)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
THPUtils_typename(original_var->data),
|
||||
THPUtils_typename(returned_var->data)
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
// Check that the size matches
|
||||
THPObjectPtr is_same_size = PyObject_CallMethodObjArgs(original,
|
||||
IS_SAME_SIZE_NAME, returned, NULL);
|
||||
if(is_same_size.get() != Py_True) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%shas changed the size of "
|
||||
"grad_input",
|
||||
OPTIONAL_HOOK_NAME
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
}
|
||||
|
||||
static void _ensure_correct_hook_result(THPObjectPtr& grad_input,
|
||||
THPObjectPtr& result, PyObject *hook)
|
||||
{
|
||||
THPObjectPtr tmp;
|
||||
// Check that the tuple sizes match
|
||||
if (PyTuple_GET_SIZE(result.get()) != PyTuple_GET_SIZE(grad_input.get())) {
|
||||
char *hook_name = _try_get_name(hook, tmp);
|
||||
THPUtils_setError("backward hook %s%s%sreturned an incorrect number "
|
||||
"of gradients (got %ld, but expected %ld)",
|
||||
OPTIONAL_HOOK_NAME,
|
||||
PyTuple_GET_SIZE(result.get()),
|
||||
PyTuple_GET_SIZE(grad_input.get())
|
||||
);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(grad_input.get());
|
||||
for (int i = 0; i < size; i++) {
|
||||
PyObject *original = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
PyObject *returned = PyTuple_GET_ITEM(result.get(), i);
|
||||
_ensure_correct_hook_result_single(original, returned, hook);
|
||||
}
|
||||
}
|
||||
|
||||
static void _call_output_hooks(THPFunction *self, THPObjectPtr& grad_output)
|
||||
{
|
||||
if (!self->output_backward_hooks) return;
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
// We can't reuse the tuple we got, so allocate a new one.
|
||||
THPObjectPtr new_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!new_grad_output) throw python_error();
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
bool updated_gradient = false;
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
// Copy grad to a new tuple
|
||||
PyObject *old_grad = PyTuple_GET_ITEM(grad_output.get(), i);
|
||||
// FIXME: no need to pack them again after changing grads to Variables
|
||||
PyObject *old_grad_var;
|
||||
if (old_grad == Py_None) {
|
||||
old_grad_var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
old_grad_var = THPVariable_NewVolatile(old_grad);
|
||||
if (!old_grad_var) throw python_error();
|
||||
}
|
||||
PyTuple_SET_ITEM(new_grad_output.get(), i, old_grad_var);
|
||||
|
||||
// Make sure that we're really going to operate on a dict
|
||||
PyObject *hook_dict = self->output_backward_hooks[i];
|
||||
if (!hook_dict) continue;
|
||||
THPFunction_assert(PyDict_Check(hook_dict), "backward_hooks "
|
||||
"attribute has to be a dictionary");
|
||||
|
||||
while (PyDict_Next(hook_dict, &pos, &key, &value)) {
|
||||
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
|
||||
old_grad_var, NULL);
|
||||
if (!result) throw python_error();
|
||||
|
||||
// If the hook returns a something else than None, we treat that as a sign
|
||||
// to replace this grad with the return value.
|
||||
if (result.get() != Py_None) {
|
||||
updated_gradient = true;
|
||||
|
||||
// Check all possible inconsistencies of the output that we can detect
|
||||
// (sizes, types, etc.)
|
||||
_ensure_correct_hook_result_single(old_grad_var, result, value);
|
||||
|
||||
// Replace the old gradient
|
||||
PyTuple_SET_ITEM(new_grad_output.get(), i, result.release());
|
||||
Py_XDECREF(old_grad_var);
|
||||
old_grad_var = PyTuple_GET_ITEM(new_grad_output.get(), i);
|
||||
f.previous_functions[i] = std::make_pair<>(var, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: no need to do this after multiple backward
|
||||
if (updated_gradient) {
|
||||
THPObjectPtr unpacked_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!unpacked_grad_output) throw python_error();
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(new_grad_output.get(), i);
|
||||
if (grad == Py_None) {
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, Py_None);
|
||||
} else {
|
||||
THPVariable *var = (THPVariable*)grad;
|
||||
Py_INCREF(var->data);
|
||||
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, var->data);
|
||||
}
|
||||
}
|
||||
grad_output = unpacked_grad_output.release();
|
||||
}
|
||||
f.requires_grad &= !f.is_volatile;
|
||||
return f;
|
||||
}
|
||||
|
||||
static void _call_function_hooks(THPFunction *self, THPObjectPtr& grad_input, THPObjectPtr& grad_output)
|
||||
{
|
||||
if (!self->backward_hooks) return;
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
|
||||
THPFunction_assert(PyDict_Check(self->backward_hooks), "backward_hooks "
|
||||
"attribute has to be a dictionary");
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
bool updated_gradient = false;
|
||||
THPObjectPtr packed_grad_input = PyTuple_New(self->num_inputs);
|
||||
if (!packed_grad_input.get()) throw python_error();
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(grad_input.get(), i);
|
||||
PyObject *var;
|
||||
if (tensor == Py_None) {
|
||||
var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
var = THPVariable_NewVolatile(tensor);
|
||||
}
|
||||
if (!var) throw python_error();
|
||||
PyTuple_SET_ITEM(packed_grad_input.get(), i, var);
|
||||
}
|
||||
THPObjectPtr packed_grad_output = PyTuple_New(self->num_outputs);
|
||||
if (!packed_grad_output.get()) throw python_error();
|
||||
for (int i = 0; i < self->num_outputs; i++) {
|
||||
PyObject *tensor = PyTuple_GET_ITEM(grad_output.get(), i);
|
||||
PyObject *var;
|
||||
if (tensor == Py_None) {
|
||||
var = Py_None;
|
||||
Py_INCREF(Py_None);
|
||||
} else {
|
||||
var = THPVariable_NewVolatile(tensor);
|
||||
}
|
||||
if (!var) throw python_error();
|
||||
PyTuple_SET_ITEM(packed_grad_output.get(), i, var);
|
||||
}
|
||||
|
||||
while (PyDict_Next(self->backward_hooks, &pos, &key, &value)) {
|
||||
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
|
||||
packed_grad_input.get(), packed_grad_output.get(), NULL);
|
||||
if (!result) throw python_error();
|
||||
|
||||
// If the hook returns a something else than None, we treat that as a sign
|
||||
// to replace grad_input with its return value.
|
||||
if (result.get() != Py_None) {
|
||||
updated_gradient = true;
|
||||
// Make sure we're working with a tuple
|
||||
_ensure_tuple(result);
|
||||
// Check all possible inconsistencies of the output that we can detect
|
||||
// (sizes, types, etc.)
|
||||
_ensure_correct_hook_result(packed_grad_input, result, value);
|
||||
packed_grad_input = result.release();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: until multiple backward only
|
||||
if (updated_gradient) {
|
||||
THPObjectPtr unpacked_grad_input = PyTuple_New(self->num_inputs);
|
||||
if (!unpacked_grad_input) throw python_error();
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(packed_grad_input.get(), i);
|
||||
if (grad == Py_None) {
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, Py_None);
|
||||
} else {
|
||||
THPVariable *var = (THPVariable*)grad;
|
||||
Py_INCREF(var->data);
|
||||
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, var->data);
|
||||
}
|
||||
}
|
||||
grad_input = unpacked_grad_input.release();
|
||||
}
|
||||
}
|
||||
|
||||
static void _prepare_grad_output(THPFunction *self, THPObjectPtr& raw_grad_output)
|
||||
{
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU gpu_guard(-1);
|
||||
#endif
|
||||
int num_grad_output = PyTuple_GET_SIZE(raw_grad_output.get());
|
||||
// First, check if any of grad_outputs is None. If not, there's nothing to do
|
||||
bool has_none = false;
|
||||
for (int i = 0; i < num_grad_output; i++) {
|
||||
if (PyTuple_GET_ITEM(raw_grad_output.get(), i) == Py_None) {
|
||||
has_none = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_none)
|
||||
return;
|
||||
|
||||
THPObjectPtr grad_output;
|
||||
grad_output = PyTuple_New(num_grad_output);
|
||||
if (!grad_output) throw python_error();
|
||||
|
||||
// Look for Nones and replace them with new buffers
|
||||
for (int i = 0; i < num_grad_output; i++) {
|
||||
PyObject *grad = PyTuple_GET_ITEM(raw_grad_output.get(), i);
|
||||
if (grad == Py_None) {
|
||||
auto &info = (*self->output_info)[i];
|
||||
PyObject *tensor_cls = std::get<0>(info);
|
||||
#ifdef WITH_CUDA
|
||||
gpu_guard.setDevice(std::get<1>(info));
|
||||
#endif
|
||||
std::vector<long> &sizes = std::get<2>(info);
|
||||
THPObjectPtr grad_size = THPSize_New(sizes.size(), sizes.data());
|
||||
THPObjectPtr new_grad = PyObject_CallFunctionObjArgs(tensor_cls, grad_size.get(), NULL);
|
||||
if (!new_grad) throw python_error();
|
||||
THPObjectPtr result = PyObject_CallMethod(new_grad.get(), "zero_", "");
|
||||
if (!result) throw python_error();
|
||||
grad = new_grad.release();
|
||||
} else {
|
||||
Py_INCREF(grad);
|
||||
}
|
||||
PyTuple_SET_ITEM(grad_output.get(), i, grad);
|
||||
}
|
||||
raw_grad_output = grad_output.release();
|
||||
}
|
||||
|
||||
static void _trim_grad_input(THPFunction *self, THPObjectPtr& grad_input)
|
||||
{
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
int num_prev_fns = self->num_inputs;
|
||||
if (num_grads > num_prev_fns) {
|
||||
// Check that all extra grads are none
|
||||
bool all_none = true;
|
||||
for (int i = num_prev_fns; i < num_grads; i++) {
|
||||
all_none = (PyTuple_GET_ITEM(grad_input.get(), i) == Py_None);
|
||||
if (!all_none) break;
|
||||
}
|
||||
// If yes, slice the tuple
|
||||
if (all_none) {
|
||||
num_grads = num_prev_fns;
|
||||
grad_input = PyTuple_GetSlice(grad_input.get(), 0, num_grads);
|
||||
if (!grad_input) throw python_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PyObject * THPFunction_do_backward(THPFunction *self, PyObject *args)
|
||||
{
|
||||
try {
|
||||
Py_ssize_t num_args = args ? PyTuple_GET_SIZE(args) : 0;
|
||||
THPUtils_assert(num_args == 2, "_do_backward expects exactly two arguments");
|
||||
PyObject *raw_grad_output = PyTuple_GET_ITEM(args, 0);
|
||||
PyObject *retain_variables = PyTuple_GET_ITEM(args, 1);
|
||||
if (!PyTuple_Check(raw_grad_output) || !PyBool_Check(retain_variables)) {
|
||||
THPUtils_invalidArguments(args, NULL, "_do_backward", 1, "(tuple, bool)");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Some of the output might have been unused, so we have to allocate
|
||||
// zero-filled buffers instead
|
||||
Py_INCREF(raw_grad_output);
|
||||
THPObjectPtr grad_output = raw_grad_output;
|
||||
_prepare_grad_output(self, grad_output);
|
||||
|
||||
// Call output hooks (this can modify grad_output!)
|
||||
_call_output_hooks(self, grad_output);
|
||||
|
||||
// self.backward(*grad_output)
|
||||
THPObjectPtr backward_fn = PyObject_GetAttrString((PyObject*)self, "backward");
|
||||
THPUtils_assert(backward_fn.get(), "function %s doesn't implement a required "
|
||||
"'backward' method", THPUtils_typename((PyObject*)self));
|
||||
THPObjectPtr grad_input = PyObject_CallObject(backward_fn, grad_output.get());
|
||||
if (!grad_input) return NULL;
|
||||
_ensure_tuple(grad_input);
|
||||
|
||||
// We allow functions to return more gradients, than there were outputs,
|
||||
// if and only if the additional ones are all None
|
||||
_trim_grad_input(self, grad_input);
|
||||
int num_grads = PyTuple_GET_SIZE(grad_input.get());
|
||||
int num_prev_fns = self->num_inputs;
|
||||
THPUtils_assert(num_grads == num_prev_fns, "%s returned an invalid number of "
|
||||
"gradient tensors (expected %d, but got %d)", THPUtils_typename(self),
|
||||
num_prev_fns, num_grads);
|
||||
|
||||
// Call function hooks (this can modify grad_input!)
|
||||
_call_function_hooks(self, grad_input, grad_output);
|
||||
|
||||
// Free buffers only if they're not going to be ever used again
|
||||
if (retain_variables == Py_False) {
|
||||
delete self->saved_variables;
|
||||
self->saved_variables = nullptr;
|
||||
self->has_freed_buffers = 1;
|
||||
}
|
||||
|
||||
return grad_input.release();
|
||||
|
||||
} catch (python_error& e) {
|
||||
return NULL;
|
||||
} catch (std::exception& e) {
|
||||
THPUtils_setError(e.what());
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Other methods / attributes
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PyObject* THPFunction__register_hook_dict(THPFunction *self, PyObject *_var)
|
||||
{
|
||||
THPUtils_assert(THPVariable_Check(_var), "_register_hook_dict expected a variable");
|
||||
THPVariable *var = (THPVariable*)_var;
|
||||
|
||||
if (!self->output_backward_hooks)
|
||||
self->output_backward_hooks = new THPObjectPtr[self->num_inputs];
|
||||
Py_INCREF(var->backward_hooks);
|
||||
self->output_backward_hooks[var->output_nr] = var->backward_hooks;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
PyObject *THPFunction_saved_tensors(THPFunction *self, void *_unused)
|
||||
{
|
||||
THPUtils_assert(!self->has_freed_buffers, "Trying to backward through the "
|
||||
"graph second time, but the buffers have already been freed. Please "
|
||||
"specify retain_variables=True when calling backward for the first time.");
|
||||
if (!self->saved_variables)
|
||||
return PyTuple_New(0);
|
||||
|
||||
int num_saved = self->saved_variables->size();
|
||||
THPObjectPtr saved_tensors = PyTuple_New(num_saved);
|
||||
if (!saved_tensors)
|
||||
return NULL;
|
||||
for (int i = 0; i < num_saved; i++) {
|
||||
saved_var_info_type &tuple = (*self->saved_variables)[i];
|
||||
PyObject *tensor = std::get<0>(tuple);
|
||||
if (tensor != Py_None) {
|
||||
int expected_version = std::get<1>(tuple);
|
||||
int current_version = **(std::get<2>(tuple));
|
||||
THPUtils_assert(expected_version == current_version, "one of the variables "
|
||||
"needed for gradient computation has been modified by an "
|
||||
"inplace operation");
|
||||
}
|
||||
Py_INCREF(tensor);
|
||||
PyTuple_SET_ITEM(saved_tensors.get(), i, tensor);
|
||||
}
|
||||
return saved_tensors.release();
|
||||
}
|
||||
|
||||
PyObject *THPFunction_previous_functions(THPFunction *self, void *_unused)
|
||||
{
|
||||
THPObjectPtr previous_functions = PyTuple_New(self->num_inputs);
|
||||
if (!previous_functions)
|
||||
return NULL;
|
||||
for (int i = 0; i < self->num_inputs; i++) {
|
||||
THPObjectPtr fn_tuple = PyTuple_New(2);
|
||||
if (!fn_tuple)
|
||||
return NULL;
|
||||
Py_INCREF(self->previous_functions[i].get());
|
||||
PyTuple_SET_ITEM(fn_tuple.get(), 0, self->previous_functions[i].get());
|
||||
PyTuple_SET_ITEM(fn_tuple.get(), 1, PyInt_FromLong(self->previous_functions[i].output_nr));
|
||||
PyTuple_SET_ITEM(previous_functions.get(), i, fn_tuple.release());
|
||||
}
|
||||
return previous_functions.release();
|
||||
}
|
||||
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
static struct PyGetSetDef THPFunction_properties[] = {
|
||||
{"saved_tensors", (getter)THPFunction_saved_tensors, NULL, NULL, NULL},
|
||||
{"previous_functions", (getter)THPFunction_previous_functions, NULL, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMemberDef THPFunction_members[] = {
|
||||
{(char*)"_backward_hooks", T_OBJECT, offsetof(THPFunction, backward_hooks), 0, NULL},
|
||||
{(char*)"to_save", T_OBJECT, offsetof(THPFunction, to_save), 0, NULL},
|
||||
{(char*)"shared_pairs", T_OBJECT, offsetof(THPFunction, shared_pairs), 0, NULL},
|
||||
{(char*)"non_differentiable", T_OBJECT, offsetof(THPFunction, non_differentiable), 0, NULL},
|
||||
{(char*)"dirty_tensors", T_OBJECT, offsetof(THPFunction, dirty_tensors), 0, NULL},
|
||||
{(char*)"needs_input_grad", T_OBJECT, offsetof(THPFunction, needs_input_grad), 0, NULL},
|
||||
{(char*)"requires_grad", T_BOOL, offsetof(THPFunction, requires_grad), 0, NULL},
|
||||
{(char*)"num_inputs", T_INT, offsetof(THPFunction, num_inputs), 0, NULL},
|
||||
{(char*)"num_outputs", T_INT, offsetof(THPFunction, num_outputs), 0, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMethodDef THPFunction_methods[] = {
|
||||
{(char*)"_do_forward", (PyCFunction)THPFunction_do_forward, METH_VARARGS, NULL},
|
||||
{(char*)"_do_backward", (PyCFunction)THPFunction_do_backward, METH_VARARGS, NULL},
|
||||
{(char*)"_register_hook_dict", (PyCFunction)THPFunction__register_hook_dict, METH_O, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
PyTypeObject THPFunctionType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._FunctionBase", /* tp_name */
|
||||
sizeof(THPFunction), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPFunction_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPFunction_traverse, /* tp_traverse */
|
||||
(inquiry)THPFunction_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPFunction_methods, /* tp_methods */
|
||||
THPFunction_members, /* tp_members */
|
||||
THPFunction_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPFunction_new /* tp_new */
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPFunctionType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPFunctionType);
|
||||
PyModule_AddObject(module, "_FunctionBase", (PyObject *)&THPFunctionType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
||||
@ -1,61 +1,73 @@
|
||||
#ifndef THP_FUNCTION_H
|
||||
#define THP_FUNCTION_H
|
||||
#pragma once
|
||||
|
||||
struct THPFunction;
|
||||
// Function is an abstract class that represents a single operation from one or
|
||||
// more variables to one more or varaibles.
|
||||
//
|
||||
// Subclasses may represent "forward" or "backward" operations (i.e functions
|
||||
// and their derivatives). Some functions may be used as both.
|
||||
|
||||
struct THPFunctionPtr: public THPObjectPtr {
|
||||
THPFunctionPtr(): THPObjectPtr(nullptr), output_nr(-1) {};
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
#include <vector>
|
||||
|
||||
THPFunctionPtr(PyObject *fn, int output_nr):
|
||||
THPObjectPtr(fn), output_nr(output_nr) {};
|
||||
#include "torch/csrc/autograd/saved_variable.h"
|
||||
|
||||
THPFunctionPtr(THPFunction *fn, int output_nr):
|
||||
THPObjectPtr((PyObject*)fn), output_nr(output_nr) {};
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
THPFunctionPtr(THPFunctionPtr &&other):
|
||||
THPObjectPtr(std::move(other)), output_nr(other.output_nr) {}
|
||||
struct Function;
|
||||
struct Variable;
|
||||
|
||||
THPPointer& operator =(THPFunctionPtr &&other) {
|
||||
output_nr = other.output_nr;
|
||||
THPObjectPtr::operator=(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
using tensor_list = std::vector<std::unique_ptr<thpp::Tensor>>;
|
||||
using variable_list = std::vector<std::shared_ptr<Variable>>;
|
||||
using function_list = std::vector<std::pair<std::shared_ptr<Function>, int>>;
|
||||
|
||||
int output_nr;
|
||||
// State used to create "backward" functions
|
||||
struct FunctionFlags {
|
||||
bool requires_grad;
|
||||
bool is_volatile;
|
||||
function_list previous_functions;
|
||||
};
|
||||
|
||||
// (class, gpu id, sizes)
|
||||
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
|
||||
// (tensor, version when saved, version counter)
|
||||
// or
|
||||
// (None, 0, nullptr)
|
||||
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<THPVariableVersion>>;
|
||||
struct Function {
|
||||
Function()
|
||||
: num_outputs(0)
|
||||
, previous_functions()
|
||||
, requires_grad(false)
|
||||
, is_volatile(false)
|
||||
, is_stochastic(false)
|
||||
{}
|
||||
|
||||
struct THPFunction {
|
||||
PyObject_HEAD
|
||||
Function(FunctionFlags flags)
|
||||
: num_outputs(0)
|
||||
, previous_functions(std::move(flags.previous_functions))
|
||||
, requires_grad(flags.requires_grad)
|
||||
, is_volatile(flags.is_volatile)
|
||||
, is_stochastic(false)
|
||||
{}
|
||||
|
||||
PyObject *needs_input_grad;
|
||||
PyObject *backward_hooks;
|
||||
THPObjectPtr *output_backward_hooks;
|
||||
Function(const Function& other) = delete;
|
||||
Function(Function&& other) = delete;
|
||||
virtual ~Function() {}
|
||||
|
||||
PyObject *to_save;
|
||||
PyObject *shared_pairs;
|
||||
PyObject *non_differentiable;
|
||||
PyObject *dirty_tensors;
|
||||
// Implements the operation
|
||||
virtual variable_list apply(const variable_list& inputs) = 0;
|
||||
|
||||
THPFunctionPtr *previous_functions;
|
||||
std::vector<output_info_type> *output_info;
|
||||
std::vector<saved_var_info_type> *saved_variables;
|
||||
int num_inputs;
|
||||
int num_outputs;
|
||||
char requires_grad;
|
||||
char has_freed_buffers;
|
||||
// Computes requires_grad, is_volatile, and previous_functions from a list
|
||||
// of input variables
|
||||
static FunctionFlags flags(const variable_list& inputs);
|
||||
|
||||
// Releases saved variables if the operation won't be reused
|
||||
virtual inline void releaseVariables() {}
|
||||
|
||||
// These variables are usually only meaningful for "backward" functions.
|
||||
// num_outputs is the number of outputs of corresponding "forward" function;
|
||||
// it's actually the number of inputs of this function.
|
||||
int num_outputs;
|
||||
function_list previous_functions;
|
||||
bool requires_grad;
|
||||
bool is_volatile;
|
||||
bool is_stochastic;
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module);
|
||||
extern PyObject *THPFunctionClass;
|
||||
extern PyObject *THPStochasticFunctionClass;
|
||||
|
||||
#define THPFunction_Check(obj) PyObject_IsInstance(obj, THPFunctionClass)
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
||||
166
torch/csrc/autograd/functions/batch_normalization.cpp
Normal file
166
torch/csrc/autograd/functions/batch_normalization.cpp
Normal file
@ -0,0 +1,166 @@
|
||||
#include "batch_normalization.h"
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
#include "torch/csrc/nn/THNN_generic.h"
|
||||
|
||||
#ifdef WITH_CUDNN
|
||||
#include "torch/csrc/cudnn/BatchNorm.h"
|
||||
#include "torch/csrc/cudnn/Handles.h"
|
||||
#include "torch/csrc/cudnn/Types.h"
|
||||
extern THCState* state;
|
||||
#endif
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
using thpp::Tensor;
|
||||
|
||||
auto BatchNormForward::apply(const variable_list& inputs) -> variable_list {
|
||||
if (inputs.size() != 3) throw std::runtime_error("expected three inputs");
|
||||
|
||||
auto& input = inputs[0];
|
||||
auto& weight = inputs[1];
|
||||
auto& bias = inputs[2];
|
||||
|
||||
bool use_cudnn = false;
|
||||
#ifdef WITH_CUDNN
|
||||
use_cudnn = (input->data->isCuda()
|
||||
&& input->data->type() != thpp::Type::HALF
|
||||
&& weight && bias);
|
||||
#endif
|
||||
|
||||
auto output = input->data->newTensor();
|
||||
output->resizeAs(*input->data);
|
||||
|
||||
std::unique_ptr<Tensor> save_mean(output->newTensor());
|
||||
save_mean->resizeAs(*running_mean);
|
||||
std::unique_ptr<Tensor> save_std(output->newTensor());
|
||||
save_std->resizeAs(*running_var);
|
||||
|
||||
if (use_cudnn) {
|
||||
#ifdef WITH_CUDNN
|
||||
torch::cudnn::cudnn_batch_norm_forward(
|
||||
state,
|
||||
torch::cudnn::getCudnnHandle(),
|
||||
torch::cudnn::getCudnnDataType(*input->data),
|
||||
(THVoidTensor*)input->data->cdata(),
|
||||
(THVoidTensor*)output->cdata(),
|
||||
(THVoidTensor*)weight->data->cdata(),
|
||||
(THVoidTensor*)bias->data->cdata(),
|
||||
(THVoidTensor*)running_mean->cdata(),
|
||||
(THVoidTensor*)running_var->cdata(),
|
||||
(THVoidTensor*)save_mean->cdata(),
|
||||
(THVoidTensor*)save_std->cdata(),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
#endif
|
||||
} else {
|
||||
torch::nn::BatchNormalization_updateOutput(
|
||||
input->data.get(),
|
||||
output.get(),
|
||||
weight ? weight->data.get() : nullptr,
|
||||
bias ? bias->data.get() : nullptr,
|
||||
running_mean.get(),
|
||||
running_var.get(),
|
||||
save_mean.get(),
|
||||
save_std.get(),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
}
|
||||
|
||||
auto creator = std::make_shared<BatchNormBackward>(
|
||||
flags(inputs),
|
||||
std::unique_ptr<thpp::Tensor>(running_mean->clone_shallow()),
|
||||
std::unique_ptr<thpp::Tensor>(running_var->clone_shallow()),
|
||||
std::move(save_mean),
|
||||
std::move(save_std),
|
||||
input->save(),
|
||||
Variable::save_opt(weight.get()),
|
||||
Variable::save_opt(bias.get()),
|
||||
training,
|
||||
momentum,
|
||||
eps);
|
||||
variable_list results(1);
|
||||
results[0] = std::make_shared<Variable>(std::move(output), creator);
|
||||
return results;
|
||||
};
|
||||
|
||||
auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_list {
|
||||
auto& input = this->input.unpack();
|
||||
auto& weight = this->weight.unpack();
|
||||
auto& bias = this->bias.unpack();
|
||||
|
||||
bool use_cudnn = false;
|
||||
#ifdef WITH_CUDNN
|
||||
use_cudnn = (input->isCuda()
|
||||
&& input->type() != thpp::Type::HALF
|
||||
&& weight && bias && training);
|
||||
#endif
|
||||
|
||||
std::unique_ptr<Tensor> grad_input = input->newTensor();
|
||||
grad_input->resizeAs(*input);
|
||||
|
||||
std::unique_ptr<Tensor> grad_weight;
|
||||
if (weight) {
|
||||
grad_weight = weight->newTensor();
|
||||
grad_weight->resizeAs(*weight);
|
||||
if (!use_cudnn) {
|
||||
grad_weight->zero();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Tensor> grad_bias;
|
||||
if (bias) {
|
||||
grad_bias = bias->newTensor();
|
||||
grad_bias->resizeAs(*bias);
|
||||
grad_bias->zero();
|
||||
if (!use_cudnn) {
|
||||
grad_bias->zero();
|
||||
}
|
||||
}
|
||||
|
||||
if (use_cudnn) {
|
||||
#ifdef WITH_CUDNN
|
||||
torch::cudnn::cudnn_batch_norm_backward(
|
||||
state,
|
||||
torch::cudnn::getCudnnHandle(),
|
||||
torch::cudnn::getCudnnDataType(*input),
|
||||
(THVoidTensor*)input->cdata(),
|
||||
(THVoidTensor*)grad_outputs[0]->data->cdata(),
|
||||
(THVoidTensor*)grad_input->cdata(),
|
||||
(THVoidTensor*)grad_weight->cdata(),
|
||||
(THVoidTensor*)grad_bias->cdata(),
|
||||
(THVoidTensor*)weight->cdata(),
|
||||
(THVoidTensor*)running_mean->cdata(),
|
||||
(THVoidTensor*)running_var->cdata(),
|
||||
(THVoidTensor*)save_mean->cdata(),
|
||||
(THVoidTensor*)save_std->cdata(),
|
||||
training,
|
||||
eps);
|
||||
#endif
|
||||
} else {
|
||||
torch::nn::BatchNormalization_backward(
|
||||
input.get(),
|
||||
grad_outputs[0]->data.get(),
|
||||
grad_input.get(),
|
||||
grad_weight.get(),
|
||||
grad_bias.get(),
|
||||
weight.get(),
|
||||
running_mean.get(),
|
||||
running_var.get(),
|
||||
save_mean.get(),
|
||||
save_std.get(),
|
||||
training,
|
||||
1.0,
|
||||
eps);
|
||||
}
|
||||
|
||||
variable_list results(3);
|
||||
results[0] = Variable::of(std::move(grad_input));
|
||||
results[1] = Variable::of(std::move(grad_weight));
|
||||
results[2] = Variable::of(std::move(grad_bias));
|
||||
return results;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
72
torch/csrc/autograd/functions/batch_normalization.h
Normal file
72
torch/csrc/autograd/functions/batch_normalization.h
Normal file
@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct BatchNormForward : public Function {
|
||||
BatchNormForward(
|
||||
std::unique_ptr<thpp::Tensor> running_mean,
|
||||
std::unique_ptr<thpp::Tensor> running_var,
|
||||
bool training,
|
||||
double momentum,
|
||||
double eps)
|
||||
: running_mean(std::move(running_mean))
|
||||
, running_var(std::move(running_var))
|
||||
, training(training)
|
||||
, momentum(momentum)
|
||||
, eps(eps) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& inputs) override;
|
||||
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
bool training;
|
||||
double momentum;
|
||||
double eps;
|
||||
};
|
||||
|
||||
struct BatchNormBackward : public Function {
|
||||
BatchNormBackward(
|
||||
FunctionFlags flags,
|
||||
std::unique_ptr<thpp::Tensor> running_mean,
|
||||
std::unique_ptr<thpp::Tensor> running_var,
|
||||
std::unique_ptr<thpp::Tensor> save_mean,
|
||||
std::unique_ptr<thpp::Tensor> save_std,
|
||||
SavedVariable input,
|
||||
SavedVariable weight,
|
||||
SavedVariable bias,
|
||||
bool training,
|
||||
double momentum,
|
||||
double eps)
|
||||
: Function(std::move(flags))
|
||||
, running_mean(std::move(running_mean))
|
||||
, running_var(std::move(running_var))
|
||||
, save_mean(std::move(save_mean))
|
||||
, save_std(std::move(save_std))
|
||||
, input(std::move(input))
|
||||
, weight(std::move(weight))
|
||||
, bias(std::move(bias))
|
||||
, training(training)
|
||||
, momentum(momentum)
|
||||
, eps(eps) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& gradOutputs) override;
|
||||
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
std::unique_ptr<thpp::Tensor> save_mean;
|
||||
std::unique_ptr<thpp::Tensor> save_std;
|
||||
SavedVariable input;
|
||||
SavedVariable weight;
|
||||
SavedVariable bias;
|
||||
bool training;
|
||||
double momentum;
|
||||
double eps;
|
||||
};
|
||||
|
||||
}}
|
||||
56
torch/csrc/autograd/functions/init.cpp
Normal file
56
torch/csrc/autograd/functions/init.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <Python.h>
|
||||
#include "batch_normalization.h"
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
static PyTypeObject BatchNormClass;
|
||||
static PyTypeObject BatchNormBackwardClass;
|
||||
|
||||
struct BatchNormCtor {
|
||||
BatchNormForward* operator()(PyObject* args) {
|
||||
std::unique_ptr<thpp::Tensor> running_mean;
|
||||
std::unique_ptr<thpp::Tensor> running_var;
|
||||
char training;
|
||||
double momentum;
|
||||
double eps;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O&O&Bdd:BatchNorm",
|
||||
TensorConverter, &running_mean,
|
||||
TensorConverter, &running_var,
|
||||
&training, &momentum, &eps)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return new BatchNormForward(
|
||||
std::move(running_mean),
|
||||
std::move(running_var),
|
||||
(bool)training,
|
||||
momentum,
|
||||
eps);
|
||||
}
|
||||
};
|
||||
|
||||
struct NoCtor {
|
||||
Function* operator()(PyObject* args) {
|
||||
throw std::runtime_error("Cannot construct");
|
||||
}
|
||||
};
|
||||
|
||||
template<typename C, typename T>
|
||||
static void addClass(PyObject* module, PyTypeObject& type, const char* name)
|
||||
{
|
||||
createForwardFunctionPyTypeObject<T>(type, name);
|
||||
Py_INCREF(&type);
|
||||
PyModule_AddObject(module, name, (PyObject*)&type);
|
||||
registerCppFunction(typeid(C), &type);
|
||||
}
|
||||
|
||||
bool THPAutograd_initFunctions(PyObject* _unused)
|
||||
{
|
||||
THPObjectPtr module = PyImport_ImportModule("torch.nn._functions.thnn");
|
||||
if (!module) return false;
|
||||
addClass<BatchNormForward, BatchNormCtor>(module, BatchNormClass, "BatchNorm");
|
||||
addClass<BatchNormBackward, NoCtor>(module, BatchNormBackwardClass, "BatchNormBackward");
|
||||
return true;
|
||||
}
|
||||
52
torch/csrc/autograd/grad_buffer.cpp
Normal file
52
torch/csrc/autograd/grad_buffer.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include "torch/csrc/autograd/grad_buffer.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#endif
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
GradBuffer::GradBuffer(size_t size)
|
||||
: buffer(size)
|
||||
{}
|
||||
|
||||
auto GradBuffer::addGrad(size_t pos, std::shared_ptr<Variable>&& var) -> void {
|
||||
auto& item = buffer[pos];
|
||||
if (!var) {
|
||||
return;
|
||||
}
|
||||
auto& tensor = var->data;
|
||||
if (!item.first) {
|
||||
buffer[pos] = std::make_pair<>(std::move(tensor), true);
|
||||
} else {
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU auto_gpu(tensor->getDevice());
|
||||
#endif
|
||||
if (item.first->isSparse() && !tensor->isSparse()) {
|
||||
auto* sum = tensor->clone();
|
||||
sum->cadd(*sum, *item.first);
|
||||
item.first.reset(sum);
|
||||
} else {
|
||||
if (item.second) {
|
||||
item.first.reset(item.first->clone());
|
||||
}
|
||||
item.first->cadd(*item.first, *tensor);
|
||||
}
|
||||
item.second = false;
|
||||
}
|
||||
}
|
||||
|
||||
auto GradBuffer::variables(GradBuffer&& g) -> std::vector<std::shared_ptr<Variable>> {
|
||||
auto buffer = std::move(g.buffer);
|
||||
int size = buffer.size();
|
||||
std::vector<std::shared_ptr<Variable>> result(size);
|
||||
for (int i = 0; i != size; ++i) {
|
||||
if (buffer[i].first) {
|
||||
result[i] = std::make_shared<Variable>(
|
||||
std::move(buffer[i].first), false, true);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}} // namespace torch::autograd
|
||||
31
torch/csrc/autograd/grad_buffer.h
Normal file
31
torch/csrc/autograd/grad_buffer.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
// The GradBuffer class accumulates a list of gradients for use by a
|
||||
// "backward" function. It implements logic to avoid modiyfing the passed
|
||||
// gradients in-place
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct GradBuffer {
|
||||
explicit GradBuffer(size_t size);
|
||||
GradBuffer(const GradBuffer& other) = delete;
|
||||
GradBuffer(GradBuffer&& other) = default;
|
||||
|
||||
// Accumulates the gradient "var" at the specified index
|
||||
void addGrad(size_t idx, std::shared_ptr<Variable>&& var);
|
||||
|
||||
// Returns the gradients as a list of variables. Destroys this GradBuffer.
|
||||
static std::vector<std::shared_ptr<Variable>> variables(GradBuffer&& buffer);
|
||||
|
||||
private:
|
||||
std::vector<std::pair<std::unique_ptr<thpp::Tensor>, bool>> buffer;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
133
torch/csrc/autograd/python_cpp_function.cpp
Normal file
133
torch/csrc/autograd/python_cpp_function.cpp
Normal file
@ -0,0 +1,133 @@
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
#include <stdio.h>
|
||||
#include <THPP/THPP.h>
|
||||
#include <typeindex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "torch/csrc/autograd/python_function.h"
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
#include "torch/csrc/Exceptions.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
namespace {
|
||||
|
||||
PyObject* THPCppFunction_call(PyObject* self, PyObject* args, PyObject *kwargs)
|
||||
{
|
||||
if (kwargs && PyDict_Size(kwargs) != 0) {
|
||||
return PyErr_Format(PyExc_TypeError, "keyword arguments are not supported");
|
||||
}
|
||||
|
||||
int num_inputs = PyTuple_GET_SIZE(args);
|
||||
variable_list vars(num_inputs);
|
||||
for (int i = 0; i != num_inputs; ++i) {
|
||||
PyObject* arg = PyTuple_GET_ITEM(args, i);
|
||||
if (arg == Py_None) {
|
||||
continue;
|
||||
}
|
||||
if (!THPVariable_Check(arg)) {
|
||||
return PyErr_Format(PyExc_TypeError, "argument %d is not a Variable", i);
|
||||
}
|
||||
vars[i] = ((THPVariable*)arg)->cdata;
|
||||
}
|
||||
|
||||
variable_list output;
|
||||
|
||||
HANDLE_TH_ERRORS {
|
||||
AutoNoGIL nogil;
|
||||
output = ((THPCppFunction*)self)->cdata->apply(vars);
|
||||
}
|
||||
END_HANDLE_TH_ERRORS
|
||||
|
||||
int num_outputs = output.size();
|
||||
if (num_outputs == 1) {
|
||||
// assume we want to unpack one element tuples for now
|
||||
return THPVariable_Wrap(output[0]);
|
||||
}
|
||||
|
||||
THPObjectPtr tuple = PyTuple_New(num_outputs);
|
||||
for (int i = 0; i != num_outputs; ++i) {
|
||||
PyTuple_SET_ITEM(tuple.get(), i, THPVariable_Wrap(output[i]));
|
||||
}
|
||||
return tuple.release();
|
||||
}
|
||||
|
||||
void THPCppFunction_dealloc(PyObject* self)
|
||||
{
|
||||
((THPCppFunction*)self)->cdata.~shared_ptr();
|
||||
Py_TYPE(self)->tp_free(self);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address)
|
||||
{
|
||||
try {
|
||||
*address = createTensor(obj);
|
||||
} catch (std::exception& e) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"expected a tensor, got %s", Py_TYPE(obj)->tp_name);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name)
|
||||
{
|
||||
type.tp_flags = Py_TPFLAGS_DEFAULT;
|
||||
type.tp_name = name;
|
||||
type.tp_basicsize = sizeof(THPCppFunction);
|
||||
type.tp_call = THPCppFunction_call;
|
||||
type.tp_dealloc = THPCppFunction_dealloc;
|
||||
if (PyType_Ready(&type) < 0) {
|
||||
auto msg = std::string("Unable to instantiate PyTypeObject for ") + name;
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
return &type;
|
||||
}
|
||||
|
||||
static std::unordered_map<std::type_index, THPObjectPtr> cpp_function_types;
|
||||
|
||||
PyObject* functionToPyObject(std::shared_ptr<Function> cdata)
|
||||
{
|
||||
if (auto pfw = dynamic_cast<PyFunction*>(cdata.get())) {
|
||||
PyObject* obj = pfw->obj;
|
||||
Py_INCREF(obj);
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (auto var = std::dynamic_pointer_cast<Variable>(cdata)) {
|
||||
return THPVariable_Wrap(var);
|
||||
}
|
||||
|
||||
auto it = cpp_function_types.find(std::type_index(typeid(*cdata)));
|
||||
if (it == cpp_function_types.end()) {
|
||||
return PyErr_Format(PyExc_TypeError,
|
||||
"Don't know how to create Python object for %s", typeid(*cdata).name());
|
||||
}
|
||||
|
||||
PyTypeObject* type = (PyTypeObject*)it->second.get();
|
||||
THPObjectPtr obj = type->tp_alloc(type, 0);
|
||||
if (!obj) return NULL;
|
||||
THPCppFunction* f = (THPCppFunction*)obj.get();
|
||||
new (&f->cdata) std::shared_ptr<Function>(cdata);
|
||||
if (!f->cdata) {
|
||||
return NULL;
|
||||
}
|
||||
return obj.release();
|
||||
}
|
||||
|
||||
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype)
|
||||
{
|
||||
Py_INCREF((PyObject*)pytype);
|
||||
cpp_function_types[std::type_index(type)] = THPObjectPtr((PyObject*)pytype);
|
||||
}
|
||||
|
||||
}} // namespace torch::autograd
|
||||
45
torch/csrc/autograd/python_cpp_function.h
Normal file
45
torch/csrc/autograd/python_cpp_function.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/utils/object_ptr.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct THPCppFunction {
|
||||
PyObject_HEAD
|
||||
std::shared_ptr<Function> cdata;
|
||||
};
|
||||
|
||||
template<typename Ctor>
|
||||
PyObject* CppFunction_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
THPObjectPtr obj = type->tp_alloc(type, 0);
|
||||
if (!obj) return NULL;
|
||||
THPCppFunction* f = (THPCppFunction*)obj.get();
|
||||
new (&f->cdata) std::shared_ptr<Function>(Ctor()(args));
|
||||
if (!f->cdata) {
|
||||
return NULL;
|
||||
}
|
||||
return obj.release();
|
||||
}
|
||||
|
||||
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name);
|
||||
|
||||
template<typename Ctor>
|
||||
PyTypeObject* createForwardFunctionPyTypeObject(PyTypeObject& type, const char* name)
|
||||
{
|
||||
type.tp_new = &CppFunction_pynew<Ctor>;
|
||||
return _initFunctionPyTypeObject(type, name);
|
||||
}
|
||||
|
||||
// conversion utilities for PyArg_ParseTuple
|
||||
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address);
|
||||
|
||||
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype);
|
||||
PyObject* functionToPyObject(std::shared_ptr<Function> cdata);
|
||||
|
||||
}} // namespace torch::autograd
|
||||
131
torch/csrc/autograd/python_engine.cpp
Normal file
131
torch/csrc/autograd/python_engine.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
#include "torch/csrc/autograd/python_engine.h"
|
||||
|
||||
#include "torch/csrc/autograd/engine.h"
|
||||
#include "torch/csrc/THP.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
struct THPEngine {
|
||||
PyObject_HEAD
|
||||
};
|
||||
|
||||
PyObject *THPEngineClass = NULL;
|
||||
|
||||
// Main backward function
|
||||
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *variables = NULL;
|
||||
PyObject *grad_variables = NULL;
|
||||
unsigned char retain_variables = 0;
|
||||
const char *accepted_kwargs[] = {"variables", "grad_variables",
|
||||
"retain_variables", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
|
||||
&variables, &grad_variables, &retain_variables))
|
||||
return NULL;
|
||||
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
|
||||
|
||||
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
|
||||
"retain_variables argument is expected to be a bool, but got %s",
|
||||
THPUtils_typename(retain_variables_obj));
|
||||
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
|
||||
"be a tuple, but got %s", THPUtils_typename(variables));
|
||||
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
|
||||
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
|
||||
|
||||
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
|
||||
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
|
||||
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
|
||||
"gradients", num_variables, num_gradients);
|
||||
|
||||
variable_list vars(num_variables);
|
||||
tensor_list grads(num_variables);
|
||||
for (int i = 0; i < num_variables; i++) {
|
||||
PyObject *variable = PyTuple_GET_ITEM(variables, i);
|
||||
THPUtils_assert(THPVariable_Check(variable), "element %d of variables "
|
||||
"tuple is not a Variable", i);
|
||||
vars[i] = ((THPVariable*)variable)->cdata;
|
||||
|
||||
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
|
||||
if (THPModule_isTensor(grad)) {
|
||||
grads[i] = torch::createTensor(grad);
|
||||
} else {
|
||||
THPUtils_assert(grad == Py_None,
|
||||
"element %d of gradients tuple is not a Tensor or None", i);
|
||||
THPUtils_assert(!vars[i]->requires_grad,
|
||||
"element %d of gradients tuple is None, but the corresponding Variable requires grad");
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
Engine::backward(vars, grads, retain_variables);
|
||||
} catch (python_error &e) {
|
||||
return nullptr;
|
||||
} catch (std::exception &e) {
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
return type->tp_alloc(type, 0);
|
||||
}
|
||||
|
||||
static struct PyMethodDef THPEngine_methods[] = {
|
||||
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPEngineType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._EngineBase", /* tp_name */
|
||||
sizeof(THPEngine), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
0, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPEngine_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPEngine_new /* tp_new */
|
||||
};
|
||||
|
||||
bool THPEngine_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPEngineType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPEngineType);
|
||||
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
|
||||
return true;
|
||||
}
|
||||
5
torch/csrc/autograd/python_engine.h
Normal file
5
torch/csrc/autograd/python_engine.h
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
bool THPEngine_initModule(PyObject *module);
|
||||
1101
torch/csrc/autograd/python_function.cpp
Normal file
1101
torch/csrc/autograd/python_function.cpp
Normal file
File diff suppressed because it is too large
Load Diff
59
torch/csrc/autograd/python_function.h
Normal file
59
torch/csrc/autograd/python_function.h
Normal file
@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
#include "torch/csrc/utils/object_ptr.h"
|
||||
|
||||
// (class, gpu id, sizes)
|
||||
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
|
||||
// (tensor, version when saved, version counter)
|
||||
// or
|
||||
// (None, 0, nullptr)
|
||||
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<torch::autograd::VariableVersion>>;
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct PyFunction : public Function {
|
||||
PyFunction(PyObject* obj) : obj(obj) {}
|
||||
|
||||
virtual variable_list apply(const variable_list& inputs) override;
|
||||
virtual void releaseVariables() override;
|
||||
|
||||
PyObject* obj;
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
|
||||
struct THPFunction {
|
||||
PyObject_HEAD
|
||||
|
||||
PyObject *needs_input_grad;
|
||||
PyObject *backward_hooks;
|
||||
THPObjectPtr *output_backward_hooks;
|
||||
|
||||
PyObject *to_save;
|
||||
PyObject *shared_pairs;
|
||||
PyObject *non_differentiable;
|
||||
PyObject *dirty_tensors;
|
||||
|
||||
std::vector<output_info_type> *output_info;
|
||||
std::vector<saved_var_info_type> *saved_variables;
|
||||
int num_inputs;
|
||||
char has_freed_buffers;
|
||||
|
||||
torch::autograd::PyFunction cdata;
|
||||
};
|
||||
|
||||
bool THPFunction_initModule(PyObject *module);
|
||||
extern PyObject *THPFunctionClass;
|
||||
extern PyObject *THPStochasticFunctionClass;
|
||||
|
||||
std::shared_ptr<torch::autograd::PyFunction> THPFunction_asFunction(THPFunction* self);
|
||||
|
||||
inline bool THPFunction_Check(PyObject* obj) {
|
||||
return PyObject_IsInstance(obj, THPFunctionClass);
|
||||
}
|
||||
399
torch/csrc/autograd/python_variable.cpp
Normal file
399
torch/csrc/autograd/python_variable.cpp
Normal file
@ -0,0 +1,399 @@
|
||||
#include "torch/csrc/autograd/python_variable.h"
|
||||
|
||||
#include <structmember.h>
|
||||
|
||||
#include "THP.h"
|
||||
#include "torch/csrc/DynamicTypes.h"
|
||||
#include "torch/csrc/Types.h"
|
||||
#include "torch/csrc/autograd/python_cpp_function.h"
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
#include "torch/csrc/Exceptions.h"
|
||||
#include <THPP/tensors/THTensor.hpp>
|
||||
|
||||
|
||||
using namespace torch::autograd;
|
||||
|
||||
PyObject *THPVariableClass = NULL;
|
||||
|
||||
static PyObject* THPVariable_NewWithVar(PyTypeObject* type, std::shared_ptr<Variable> var)
|
||||
{
|
||||
PyObject* obj = type->tp_alloc(type, 0);
|
||||
if (obj) {
|
||||
auto v = (THPVariable*) obj;
|
||||
new (&v->cdata) std::shared_ptr<Variable>(std::move(var));
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
PyObject * THPVariable_Wrap(const std::shared_ptr<Variable>& var)
|
||||
{
|
||||
if (var->pyobj) {
|
||||
Py_INCREF(var->pyobj);
|
||||
} else {
|
||||
var->pyobj = THPVariable_NewWithVar((PyTypeObject *)THPVariableClass, var);
|
||||
}
|
||||
return var->pyobj;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data and creator
|
||||
// To create a leaf Variable pass NULL as creator.
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile)
|
||||
{
|
||||
THPUtils_assert(THPModule_isTensor(data), "data must be a Tensor");
|
||||
THPUtils_assert(!creator || THPFunction_Check(creator), "creator must be a Function");
|
||||
auto v = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
|
||||
PyObject* obj = THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, v);
|
||||
if (obj) {
|
||||
v->pyobj = obj;
|
||||
v->creator = THPFunction_asFunction((THPFunction*)creator);
|
||||
((THPVariable*)obj)->data = data;
|
||||
Py_INCREF(data);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data)
|
||||
{
|
||||
return THPVariable_New(data, nullptr, false, true);
|
||||
}
|
||||
|
||||
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->data);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPVariable_clear(THPVariable *self)
|
||||
{
|
||||
Py_CLEAR(self->data);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPVariable_dealloc(THPVariable* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
Py_XDECREF(self->data);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
self->cdata->pyobj = nullptr;
|
||||
self->cdata.~shared_ptr<Variable>();
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
THPObjectPtr _data;
|
||||
PyObject *data = NULL;
|
||||
PyObject *creator = NULL;
|
||||
char is_volatile = 0;
|
||||
char requires_grad = 0;
|
||||
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OObb", (char**)accepted_args,
|
||||
&data, &creator, &is_volatile, &requires_grad))
|
||||
return NULL;
|
||||
|
||||
if (creator == Py_None)
|
||||
creator = NULL;
|
||||
|
||||
if (data == NULL || data == Py_None) {
|
||||
// For legacy serialization code, create an empty tensor temporarily.
|
||||
thpp::THTensor<float> tensor;
|
||||
_data = torch::createPyObject(tensor);
|
||||
data = _data.get();
|
||||
}
|
||||
|
||||
THPUtils_assert(!(is_volatile && requires_grad),
|
||||
"Variable can't be volatile and require_grad at the same time!");
|
||||
THPUtils_assert(!creator || THPFunction_Check(creator),
|
||||
"Variable creator has to be a Function object or None, but got %s",
|
||||
THPUtils_typename(creator));
|
||||
THPUtils_assert(THPModule_isTensor(data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(data));
|
||||
|
||||
auto var = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
|
||||
PyObject* self = THPVariable_NewWithVar(type, var);
|
||||
if (self) {
|
||||
var->pyobj = self;
|
||||
var->creator = THPFunction_asFunction((THPFunction*)creator);
|
||||
((THPVariable*)self)->cdata = var;
|
||||
((THPVariable*)self)->data = data;
|
||||
Py_INCREF(data);
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
int THPVariable_pyinit(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
// Ensures that calls to Variable() and subclasses contain data argument.
|
||||
// The 'data' argument is optional in __new__ to handle legacy serialized
|
||||
// Variables.
|
||||
PyObject *data;
|
||||
PyObject *creator = NULL;
|
||||
char is_volatile = 0;
|
||||
char requires_grad = 0;
|
||||
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Obb", (char**)accepted_args,
|
||||
&data, &creator, &is_volatile, &requires_grad))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
PyObject *THPVariable_get_version(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyInt_FromLong(**var.version_counter);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_creator(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.creator) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return functionToPyObject(var.creator);
|
||||
}
|
||||
|
||||
int THPVariable_set_creator(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, obj == Py_None, "_creator can be only set to None");
|
||||
self->cdata->creator = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject * THPVariable_get_data(THPVariable *self)
|
||||
{
|
||||
if (!self->data) {
|
||||
auto& var = *self->cdata;
|
||||
PyTypeObject* type = torch::getPyTypeObject(*var.data);
|
||||
self->data = type->tp_alloc(type, 0);
|
||||
if (self->data) {
|
||||
((torch::THPVoidTensor*)self->data)->cdata =
|
||||
(torch::THVoidTensor *)var.data->retain().cdata();
|
||||
}
|
||||
}
|
||||
Py_INCREF(self->data);
|
||||
return self->data;
|
||||
}
|
||||
|
||||
int THPVariable_set_data(THPVariable *self, PyObject *data)
|
||||
{
|
||||
THPUtils_assertRet(-1, THPModule_isTensor(data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(data));
|
||||
Py_INCREF(data);
|
||||
Py_XDECREF(self->data);
|
||||
self->data = data;
|
||||
auto& var = *self->cdata;
|
||||
auto tensor = torch::createTensor(data);
|
||||
var.data.swap(tensor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_raw_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.grad) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return THPVariable_Wrap(var.grad);
|
||||
}
|
||||
|
||||
int THPVariable_set_raw_grad(THPVariable *self, PyObject *data)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (data == Py_None) {
|
||||
var.grad.reset();
|
||||
return 0;
|
||||
}
|
||||
THPUtils_assertRet(-1, THPVariable_Check(data),
|
||||
"expected Variable or None (got %s)", THPUtils_typename(data));
|
||||
var.grad = ((THPVariable*)data)->cdata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
if (!var.grad) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
return THPVariable_Wrap(var.grad);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_volatile(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyBool_FromLong(var.is_volatile);
|
||||
}
|
||||
|
||||
int THPVariable_set_volatile(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, PyBool_Check(obj), "volatile must be a bool");
|
||||
THPUtils_assertRet(-1, !self->cdata->creator,
|
||||
"volatile can only be set on leaf variables");
|
||||
auto& var = *self->cdata;
|
||||
var.is_volatile = (obj == Py_True);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_output_nr(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyInt_FromLong(var.output_nr);
|
||||
}
|
||||
|
||||
PyObject *THPVariable_get_requires_grad(THPVariable *self)
|
||||
{
|
||||
auto& var = *self->cdata;
|
||||
return PyBool_FromLong(var.requires_grad);
|
||||
}
|
||||
|
||||
int THPVariable_set_requires_grad(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
THPUtils_assertRet(-1, PyBool_Check(obj), "requires_grad must be a bool");
|
||||
auto& var = *self->cdata;
|
||||
if (var.creator) {
|
||||
const char *hint = "";
|
||||
if (obj == Py_False) {
|
||||
hint = " If you want to use a computed variable in a subgraph "
|
||||
"that doesn't require differentiation use "
|
||||
"var_no_grad = var.detach().";
|
||||
}
|
||||
THPUtils_setError("you can only change requires_grad flags of leaf variables.%s", hint);
|
||||
return -1;
|
||||
}
|
||||
var.requires_grad = (obj == Py_True);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct PyVariableHook : public VariableHook {
|
||||
PyVariableHook(PyObject* dict) : dict(dict) {
|
||||
Py_INCREF(dict);
|
||||
}
|
||||
~PyVariableHook() {
|
||||
AutoGIL gil;
|
||||
Py_DECREF(dict);
|
||||
}
|
||||
|
||||
std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& _grad) override {
|
||||
AutoGIL gil;
|
||||
|
||||
THPObjectPtr grad = THPVariable_Wrap(_grad);
|
||||
if (!grad) throw python_error();
|
||||
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
while (PyDict_Next(dict, &pos, &key, &value)) {
|
||||
THPObjectPtr res = PyObject_CallFunctionObjArgs(value, grad.get(), nullptr);
|
||||
if (!res) throw python_error();
|
||||
if (res == Py_None) continue;
|
||||
if (!PyObject_IsInstance(res.get(), THPVariableClass)) {
|
||||
PyErr_Format(PyExc_TypeError, "expected Variable, but hook returned '%s'",
|
||||
THPUtils_typename(res.get()));
|
||||
throw python_error();
|
||||
}
|
||||
grad = std::move(res);
|
||||
}
|
||||
return ((THPVariable*)grad.get())->cdata;
|
||||
}
|
||||
|
||||
PyObject* dict;
|
||||
};
|
||||
|
||||
PyObject *THPVariable_get_backwards_hooks(THPVariable *self)
|
||||
{
|
||||
if (self->backward_hooks) {
|
||||
Py_INCREF(self->backward_hooks);
|
||||
return self->backward_hooks;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
int THPVariable_set_backwards_hooks(THPVariable *self, PyObject *obj)
|
||||
{
|
||||
if (obj == Py_None) {
|
||||
obj = nullptr;
|
||||
}
|
||||
Py_XINCREF(obj);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
self->backward_hooks = obj;
|
||||
if (obj) {
|
||||
self->cdata->backward_hook.reset(new PyVariableHook(obj));
|
||||
} else {
|
||||
self->cdata->backward_hook.reset();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct PyGetSetDef THPVariable_properties[] = {
|
||||
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
|
||||
{"creator", (getter)THPVariable_get_creator, NULL, NULL, NULL},
|
||||
{"_creator", (getter)THPVariable_get_creator, (setter)THPVariable_set_creator, NULL, NULL},
|
||||
{"data", (getter)THPVariable_get_data, (setter)THPVariable_set_data, NULL, NULL},
|
||||
{"_grad", (getter)THPVariable_get_raw_grad, (setter)THPVariable_set_raw_grad, NULL, NULL},
|
||||
{"grad", (getter)THPVariable_get_grad, NULL, NULL, NULL},
|
||||
{"volatile", (getter)THPVariable_get_volatile, (setter)THPVariable_set_volatile, NULL, NULL},
|
||||
{"output_nr", (getter)THPVariable_get_output_nr, NULL, NULL, NULL},
|
||||
{"requires_grad", (getter)THPVariable_get_requires_grad, (setter)THPVariable_set_requires_grad, NULL, NULL},
|
||||
{"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
PyTypeObject THPVariableType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._VariableBase", /* tp_name */
|
||||
sizeof(THPVariable), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPVariable_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPVariable_traverse, /* tp_traverse */
|
||||
(inquiry)THPVariable_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
THPVariable_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
THPVariable_pyinit, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPVariable_pynew /* tp_new */
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPVariableType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPVariableType);
|
||||
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
|
||||
return true;
|
||||
}
|
||||
25
torch/csrc/autograd/python_variable.h
Normal file
25
torch/csrc/autograd/python_variable.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Python.h>
|
||||
#include <memory>
|
||||
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
struct THPVariable {
|
||||
PyObject_HEAD
|
||||
std::shared_ptr<torch::autograd::Variable> cdata;
|
||||
PyObject* data;
|
||||
PyObject* backward_hooks;
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module);
|
||||
extern PyObject *THPVariableClass;
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data);
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile=false);
|
||||
PyObject * THPVariable_Wrap(const std::shared_ptr<torch::autograd::Variable>& var);
|
||||
PyObject * THPVariable_get_data(THPVariable *self);
|
||||
|
||||
inline bool THPVariable_Check(PyObject *obj)
|
||||
{
|
||||
return THPVariableClass && PyObject_IsInstance(obj, THPVariableClass);
|
||||
}
|
||||
31
torch/csrc/autograd/saved_variable.h
Normal file
31
torch/csrc/autograd/saved_variable.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <THPP/THPP.h>
|
||||
#include <memory>
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct VariableVersion;
|
||||
|
||||
struct SavedVariable {
|
||||
SavedVariable()
|
||||
: data()
|
||||
, expected_version(-1)
|
||||
, version() {}
|
||||
|
||||
SavedVariable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
int expected_version,
|
||||
std::unique_ptr<VariableVersion> version)
|
||||
: data(std::move(data))
|
||||
, expected_version(expected_version)
|
||||
, version(std::move(version)) {}
|
||||
|
||||
std::unique_ptr<thpp::Tensor> data;
|
||||
int expected_version;
|
||||
std::unique_ptr<VariableVersion> version;
|
||||
|
||||
std::unique_ptr<thpp::Tensor>& unpack();
|
||||
};
|
||||
|
||||
}} // namespace torch::autograd
|
||||
@ -1,276 +1,109 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include "torch/csrc/autograd/variable.h"
|
||||
|
||||
#include "THP.h"
|
||||
#ifdef WITH_CUDA
|
||||
#include "torch/csrc/cuda/AutoGPU.h"
|
||||
#endif
|
||||
|
||||
PyObject *THPVariableClass = NULL;
|
||||
using namespace torch;
|
||||
using namespace thpp;
|
||||
|
||||
constexpr size_t CACHE_SIZE = 100000;
|
||||
static THPVariable *cached_variables[CACHE_SIZE];
|
||||
static size_t num_cached;
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
// This helper steals a reference to data and creator
|
||||
static inline THPVariable * pop_cache(PyObject *data, PyObject *creator, char requires_grad)
|
||||
Variable::Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
bool requires_grad,
|
||||
bool is_volatile)
|
||||
: data(std::move(data))
|
||||
, creator(nullptr)
|
||||
, grad(nullptr)
|
||||
, version_counter(new VariableVersion())
|
||||
, output_nr(0)
|
||||
, backward_hook()
|
||||
, pyobj(nullptr)
|
||||
{
|
||||
THPVariable *self = cached_variables[--num_cached];
|
||||
PyObject_Init((PyObject*)self, Py_TYPE(self));
|
||||
PyObject_GC_Track(self);
|
||||
|
||||
self->is_volatile = 0;
|
||||
self->version_counter = new THPVariableVersion();
|
||||
self->grad = NULL;
|
||||
self->backward_hooks = NULL;
|
||||
self->requires_grad = requires_grad;
|
||||
|
||||
self->data = data;
|
||||
self->creator = creator;
|
||||
return self;
|
||||
if (!this->data) {
|
||||
throw std::runtime_error("Variable data is NULL");
|
||||
}
|
||||
this->is_volatile = is_volatile;
|
||||
this->requires_grad = requires_grad;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data)
|
||||
Variable::Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
std::shared_ptr<Function> creator)
|
||||
: data(std::move(data))
|
||||
, creator(creator)
|
||||
, grad(nullptr)
|
||||
, version_counter(new VariableVersion())
|
||||
, output_nr(creator->num_outputs++)
|
||||
, backward_hook()
|
||||
, pyobj(nullptr)
|
||||
{
|
||||
THPVariable *variable;
|
||||
if (num_cached > 0) {
|
||||
Py_INCREF(data);
|
||||
variable = pop_cache(data, NULL, 0);
|
||||
if (!this->data) {
|
||||
throw std::runtime_error("Variable data is NULL");
|
||||
}
|
||||
this->is_volatile = creator->is_volatile;
|
||||
this->requires_grad = creator->requires_grad;
|
||||
previous_functions.resize(1);
|
||||
previous_functions[0] = std::make_pair<>(creator, output_nr);
|
||||
}
|
||||
|
||||
bool Variable::is_cuda()
|
||||
{
|
||||
return data->isCuda();
|
||||
}
|
||||
|
||||
auto Variable::backward(std::shared_ptr<Variable> gradOutput) -> void {
|
||||
if (backward_hook) {
|
||||
gradOutput = (*backward_hook)(gradOutput);
|
||||
}
|
||||
#ifdef WITH_CUDA
|
||||
THCPAutoGPU auto_gpu(gradOutput->data->getDevice());
|
||||
#endif
|
||||
if (!grad) {
|
||||
std::unique_ptr<Tensor> data(gradOutput->data->clone());
|
||||
grad = std::make_shared<Variable>(std::move(data), false, true);
|
||||
} else if (grad->data->isSparse() && !gradOutput->data->isSparse()) {
|
||||
auto* sum = gradOutput->data->clone();
|
||||
sum->cadd(*sum, *grad->data);
|
||||
grad->data.reset(sum);
|
||||
} else {
|
||||
variable = (THPVariable*)PyObject_CallFunctionObjArgs(THPVariableClass, data, NULL);
|
||||
}
|
||||
if (variable) ((THPVariable*)variable)->is_volatile = 1;
|
||||
return (PyObject*)variable;
|
||||
}
|
||||
|
||||
// This function DOES NOT steal a reference to data and creator
|
||||
// To create a leaf Variable pass NULL as creator.
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad)
|
||||
{
|
||||
if (num_cached > 0) {
|
||||
Py_INCREF(data);
|
||||
Py_XINCREF(creator);
|
||||
return (PyObject*)pop_cache(data, creator, requires_grad);
|
||||
}
|
||||
// We can't pass a NULL creator to this Python call, because Py_BuildValue
|
||||
// will raise an error (it tries to be overly smart by setting its own error
|
||||
// if there's no flag set at the moment and we're giving NULL to some
|
||||
// function).
|
||||
creator = creator ? creator : Py_None;
|
||||
return PyObject_CallFunction(THPVariableClass, "OObb", data, creator, (char)0, requires_grad);
|
||||
}
|
||||
|
||||
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(self->creator);
|
||||
Py_VISIT(self->data);
|
||||
Py_VISIT(self->grad);
|
||||
Py_VISIT(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int THPVariable_clear(THPVariable *self)
|
||||
{
|
||||
Py_CLEAR(self->creator);
|
||||
Py_CLEAR(self->data);
|
||||
Py_CLEAR(self->grad);
|
||||
Py_CLEAR(self->backward_hooks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void THPVariable_dealloc(THPVariable* self)
|
||||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
Py_XDECREF(self->creator);
|
||||
Py_XDECREF(self->data);
|
||||
Py_XDECREF(self->grad);
|
||||
Py_XDECREF(self->backward_hooks);
|
||||
delete self->version_counter;
|
||||
self->version_counter = nullptr;
|
||||
|
||||
// We don't want to cache any subclasses
|
||||
if ((PyObject*)Py_TYPE(self) == THPVariableClass && num_cached < CACHE_SIZE) {
|
||||
cached_variables[num_cached++] = self;
|
||||
// Variable class is defined in Python code, and as such has a
|
||||
// Py_TPFLAGS_HEAPTYPE flag set, so python DECREFs the class at each
|
||||
// object dealloc.
|
||||
Py_INCREF(Py_TYPE(self));
|
||||
} else {
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
grad->data->cadd(*grad->data, *gradOutput->data);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *THPVariable_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
THPVariable *self;
|
||||
if ((PyObject*)type != THPVariableClass || num_cached == 0) {
|
||||
self = (THPVariable*)type->tp_alloc(type, 0);
|
||||
if (!self) return NULL;
|
||||
self->version_counter = new THPVariableVersion();
|
||||
} else {
|
||||
self = pop_cache(NULL, NULL, 0);
|
||||
auto Variable::apply(const variable_list& gradOutputs) -> variable_list {
|
||||
if (creator || **version_counter != 0) {
|
||||
throw std::runtime_error("leaf variable was used in an inplace operation");
|
||||
}
|
||||
return (PyObject*)self;
|
||||
if (gradOutputs.size() != 1) {
|
||||
throw std::runtime_error("incorrect number of gradOutputs");
|
||||
}
|
||||
backward(gradOutputs[0]);
|
||||
return variable_list();
|
||||
}
|
||||
|
||||
int THPVariable_init(THPVariable *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Obb", (char**)accepted_args,
|
||||
&self->data, &self->creator, &self->is_volatile,
|
||||
&self->requires_grad))
|
||||
return -1;
|
||||
Py_INCREF(self->data);
|
||||
if (self->creator == Py_None)
|
||||
self->creator = NULL;
|
||||
Py_XINCREF(self->creator);
|
||||
THPUtils_assertRet(-1, !(self->is_volatile && self->requires_grad),
|
||||
"Variable can't be volatile and require_grad at the same time!");
|
||||
THPUtils_assertRet(-1, !self->creator || THPFunction_Check(self->creator),
|
||||
"Variable creator has to be a Function object or None, but got %s",
|
||||
THPUtils_typename(self->creator));
|
||||
THPUtils_assertRet(-1, THPModule_isTensor(self->data), "Variable data has to "
|
||||
"be a tensor, but got %s", THPUtils_typename(self->data));
|
||||
return 0;
|
||||
auto Variable::save() const -> SavedVariable {
|
||||
return SavedVariable(
|
||||
std::unique_ptr<Tensor>(data->clone_shallow()),
|
||||
**version_counter,
|
||||
std::unique_ptr<VariableVersion>(version_counter->new_saved_ref()));
|
||||
}
|
||||
|
||||
PyObject * THPVariable_getstate(THPVariable *self)
|
||||
{
|
||||
THPUtils_assert(!self->creator, "serialization of non-leaf variables is not "
|
||||
"implemented yet");
|
||||
THPObjectPtr state = PyTuple_New(5);
|
||||
if (!state)
|
||||
return NULL;
|
||||
|
||||
Py_INCREF(self->data);
|
||||
PyTuple_SET_ITEM(state.get(), 0, self->data);
|
||||
|
||||
PyObject *grad = self->grad ? self->grad : Py_None;
|
||||
Py_INCREF(grad);
|
||||
PyTuple_SET_ITEM(state.get(), 1, grad);
|
||||
|
||||
PyObject *backward_hooks = self->backward_hooks ? self->backward_hooks : Py_None;
|
||||
Py_INCREF(backward_hooks);
|
||||
PyTuple_SET_ITEM(state.get(), 2, backward_hooks);
|
||||
|
||||
PyTuple_SET_ITEM(state.get(), 3, PyBool_FromLong(self->requires_grad));
|
||||
PyTuple_SET_ITEM(state.get(), 4, PyBool_FromLong(self->is_volatile));
|
||||
|
||||
return state.release();
|
||||
auto Variable::save_opt(Variable* var) -> SavedVariable {
|
||||
return var ? var->save() : SavedVariable();
|
||||
}
|
||||
|
||||
PyObject * THPVariable_setstate(THPVariable *self, PyObject *state)
|
||||
{
|
||||
THPUtils_assert(!self->creator, "__setstate__ can be only called on leaf "
|
||||
"variables");
|
||||
THPUtils_assert(PyTuple_Check(state), "__setstate__ expects state to be a "
|
||||
"tuple");
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(state);
|
||||
THPUtils_assert(size == 5, "__setstate__ expects state tuple to have 5 "
|
||||
"elements, but it has %d", size);
|
||||
|
||||
#define LOAD(NAME, IDX) \
|
||||
Py_XDECREF(self->NAME); \
|
||||
self->NAME = PyTuple_GET_ITEM(state, IDX) == Py_None ? NULL : PyTuple_GET_ITEM(state, IDX); \
|
||||
Py_XINCREF(self->NAME);
|
||||
THPUtils_assert(THPModule_isTensor(PyTuple_GET_ITEM(state, 0)), "first "
|
||||
"element of variable state tuple has to be a tensor");
|
||||
LOAD(data, 0);
|
||||
|
||||
LOAD(grad, 1);
|
||||
LOAD(backward_hooks, 2);
|
||||
#undef LOAD
|
||||
|
||||
PyObject *requires_grad_obj = PyTuple_GET_ITEM(state, 3);
|
||||
PyObject *is_volatile_obj = PyTuple_GET_ITEM(state, 4);
|
||||
THPUtils_assert(PyBool_Check(requires_grad_obj), "requires_grad "
|
||||
"found in state was expected to be a bool, but got %s",
|
||||
THPUtils_typename(requires_grad_obj));
|
||||
THPUtils_assert(PyBool_Check(is_volatile_obj), "is_volatile "
|
||||
"found in state was expected to be a bool, but got %s",
|
||||
THPUtils_typename(is_volatile_obj));
|
||||
self->requires_grad= requires_grad_obj == Py_True ? 1 : 0;
|
||||
self->is_volatile = is_volatile_obj == Py_True ? 1 : 0;
|
||||
|
||||
Py_RETURN_NONE;
|
||||
auto SavedVariable::unpack() -> std::unique_ptr<thpp::Tensor>& {
|
||||
if (data) {
|
||||
int current_version = **version;
|
||||
if (expected_version != current_version) {
|
||||
throw std::runtime_error("one of the variables "
|
||||
"needed for gradient computation has been modified by an "
|
||||
"inplace operation");
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
typedef PyObject *(*getter)(PyObject *, void *);
|
||||
typedef int (*setter)(PyObject *, PyObject *, void *);
|
||||
|
||||
PyObject *THPVariable_get_version(THPVariable *self)
|
||||
{
|
||||
return PyInt_FromLong(**self->version_counter);
|
||||
}
|
||||
|
||||
static struct PyGetSetDef THPVariable_properties[] = {
|
||||
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMemberDef THPVariable_members[] = {
|
||||
{(char*)"creator", T_OBJECT, offsetof(THPVariable, creator), 0, NULL},
|
||||
{(char*)"data", T_OBJECT, offsetof(THPVariable, data), 0, NULL},
|
||||
{(char*)"_grad", T_OBJECT, offsetof(THPVariable, grad), 0, NULL},
|
||||
{(char*)"volatile", T_BOOL, offsetof(THPVariable, is_volatile), 0, NULL},
|
||||
{(char*)"output_nr", T_INT, offsetof(THPVariable, output_nr), 0, NULL},
|
||||
{(char*)"_backward_hooks",T_OBJECT, offsetof(THPVariable, backward_hooks), 0, NULL},
|
||||
{(char*)"_requires_grad", T_BOOL, offsetof(THPVariable, requires_grad), 0, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
static struct PyMethodDef THPVariable_methods[] = {
|
||||
{"__getstate__", (PyCFunction)THPVariable_getstate, METH_NOARGS, NULL},
|
||||
{"__setstate__", (PyCFunction)THPVariable_setstate, METH_O, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPVariableType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._VariableBase", /* tp_name */
|
||||
sizeof(THPVariable), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPVariable_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
NULL, /* tp_doc */
|
||||
(traverseproc)THPVariable_traverse, /* tp_traverse */
|
||||
(inquiry)THPVariable_clear, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
THPVariable_methods, /* tp_methods */
|
||||
THPVariable_members, /* tp_members */
|
||||
THPVariable_properties, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)THPVariable_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
THPVariable_new /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
bool THPVariable_initModule(PyObject *module)
|
||||
{
|
||||
if (PyType_Ready(&THPVariableType) < 0)
|
||||
return false;
|
||||
Py_INCREF(&THPVariableType);
|
||||
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
|
||||
return true;
|
||||
}
|
||||
}} // namespace torch::autograd
|
||||
|
||||
@ -1,8 +1,57 @@
|
||||
#ifndef THP_VARIABLE_H
|
||||
#define THP_VARIABLE_H
|
||||
#pragma once
|
||||
|
||||
struct THPVariableVersion {
|
||||
THPVariableVersion() {
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
#include "torch/csrc/autograd/function.h"
|
||||
#include "torch/csrc/autograd/saved_variable.h"
|
||||
#include "torch/csrc/Types.h"
|
||||
|
||||
namespace torch { namespace autograd {
|
||||
|
||||
struct VariableHook;
|
||||
struct VariableVersion;
|
||||
|
||||
struct Variable : public Function {
|
||||
Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
std::shared_ptr<Function> creator);
|
||||
Variable(
|
||||
std::unique_ptr<thpp::Tensor> data,
|
||||
bool requires_grad,
|
||||
bool is_volatile);
|
||||
|
||||
bool is_cuda();
|
||||
bool is_sparse();
|
||||
void backward(std::shared_ptr<Variable> gradOutput);
|
||||
virtual variable_list apply(const variable_list& gradOutputs) override;
|
||||
|
||||
SavedVariable save() const;
|
||||
static SavedVariable save_opt(Variable* var);
|
||||
|
||||
static inline std::shared_ptr<Variable> of(std::unique_ptr<thpp::Tensor> data) {
|
||||
if (!data) {
|
||||
return std::shared_ptr<Variable>();
|
||||
}
|
||||
return std::make_shared<Variable>(std::move(data), 0, 0);
|
||||
}
|
||||
|
||||
std::unique_ptr<thpp::Tensor> data;
|
||||
std::shared_ptr<Function> creator;
|
||||
std::shared_ptr<Variable> grad;
|
||||
std::unique_ptr<VariableVersion> version_counter;
|
||||
int output_nr;
|
||||
std::unique_ptr<VariableHook> backward_hook;
|
||||
PyObject *pyobj; // weak reference
|
||||
};
|
||||
|
||||
struct VariableHook {
|
||||
virtual std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& grad) = 0;
|
||||
};
|
||||
|
||||
struct VariableVersion {
|
||||
VariableVersion() {
|
||||
saved_ref = false;
|
||||
version_block = new int[3];
|
||||
version_block[0] = 0; // version
|
||||
@ -16,15 +65,15 @@ struct THPVariableVersion {
|
||||
|
||||
int var_refcnt() { return version_block[2]; }
|
||||
|
||||
void join_with(THPVariableVersion &other) {
|
||||
void join_with(VariableVersion &other) {
|
||||
cleanup();
|
||||
version_block = other.version_block;
|
||||
version_block[1]++;
|
||||
version_block[2]++;
|
||||
}
|
||||
|
||||
THPVariableVersion* new_saved_ref() {
|
||||
auto new_ver = new THPVariableVersion();
|
||||
VariableVersion* new_saved_ref() {
|
||||
auto new_ver = new VariableVersion();
|
||||
new_ver->cleanup();
|
||||
new_ver->version_block = version_block;
|
||||
version_block[1]++;
|
||||
@ -39,36 +88,10 @@ struct THPVariableVersion {
|
||||
version_block = nullptr;
|
||||
}
|
||||
|
||||
~THPVariableVersion() { cleanup(); }
|
||||
~VariableVersion() { cleanup(); }
|
||||
|
||||
int *version_block;
|
||||
bool saved_ref;
|
||||
};
|
||||
|
||||
struct THPVariable {
|
||||
PyObject_HEAD
|
||||
PyObject *creator;
|
||||
PyObject *data;
|
||||
PyObject *grad;
|
||||
PyObject *backward_hooks;
|
||||
THPVariableVersion *version_counter;
|
||||
int output_nr;
|
||||
char is_volatile;
|
||||
char requires_grad;
|
||||
};
|
||||
|
||||
bool THPVariable_initModule(PyObject *module);
|
||||
extern PyObject *THPVariableClass;
|
||||
PyObject * THPVariable_NewVolatile(PyObject *data);
|
||||
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad);
|
||||
|
||||
#define THPVariable_Check(obj) \
|
||||
(THPVariableClass && \
|
||||
PyObject_IsInstance(obj, THPVariableClass))
|
||||
|
||||
#define THPVariable_CheckType(obj, func) \
|
||||
(THPVariableClass && \
|
||||
(PyObject_IsInstance(obj, THPVariableClass) && \
|
||||
func(((THPVariable*)obj)->data)))
|
||||
|
||||
#endif
|
||||
}} // namespace torch::autograd
|
||||
|
||||
@ -62,6 +62,16 @@ void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order,
|
||||
}
|
||||
}
|
||||
|
||||
void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
union { uint16_t x; THHalf f; };
|
||||
x = (order == THP_BIG_ENDIAN ? decodeUInt16BE(src) : decodeUInt16LE(src));
|
||||
dst[i] = f;
|
||||
src += sizeof(uint16_t);
|
||||
}
|
||||
}
|
||||
|
||||
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <THHalf.h>
|
||||
|
||||
enum THPByteOrder {
|
||||
THP_LITTLE_ENDIAN = 0,
|
||||
@ -14,6 +15,7 @@ THPByteOrder THP_nativeByteOrder();
|
||||
void THP_decodeInt16Buffer(int16_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
void THP_decodeInt32Buffer(int32_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order, size_t len);
|
||||
|
||||
|
||||
@ -3,59 +3,68 @@
|
||||
#include "THCP.h"
|
||||
#include <THC/THC.h>
|
||||
|
||||
THCPAutoGPU::THCPAutoGPU(int device_id) {
|
||||
setDevice(device_id);
|
||||
}
|
||||
|
||||
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self) {
|
||||
if (self && setObjDevice(self))
|
||||
return;
|
||||
|
||||
if (!args)
|
||||
return;
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) {
|
||||
PyObject *arg = PyTuple_GET_ITEM(args, i);
|
||||
if (setObjDevice(arg)) return;
|
||||
}
|
||||
}
|
||||
|
||||
bool THCPAutoGPU::setObjDevice(PyObject *obj) {
|
||||
int new_device = -1;
|
||||
static int getObjDevice(PyObject *obj) {
|
||||
PyObject *obj_type = (PyObject*)Py_TYPE(obj);
|
||||
if (obj_type == THCPDoubleTensorClass) {
|
||||
new_device = THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
|
||||
return THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPFloatTensorClass) {
|
||||
new_device = THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
|
||||
return THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPHalfTensorClass) {
|
||||
new_device = THCudaHalfTensor_getDevice(LIBRARY_STATE ((THCPHalfTensor*)obj)->cdata);
|
||||
return THCudaHalfTensor_getDevice(LIBRARY_STATE ((THCPHalfTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPLongTensorClass) {
|
||||
new_device = THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
|
||||
return THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPIntTensorClass) {
|
||||
new_device = THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
|
||||
return THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPShortTensorClass) {
|
||||
new_device = THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
|
||||
return THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPCharTensorClass) {
|
||||
new_device = THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
|
||||
return THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCPByteTensorClass) {
|
||||
new_device = THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
|
||||
return THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPDoubleTensorClass) {
|
||||
return THCSDoubleTensor_getDevice(LIBRARY_STATE ((THCSPDoubleTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPFloatTensorClass) {
|
||||
return THCSFloatTensor_getDevice(LIBRARY_STATE ((THCSPFloatTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPHalfTensorClass) {
|
||||
return THCSHalfTensor_getDevice(LIBRARY_STATE ((THCSPHalfTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPLongTensorClass) {
|
||||
return THCSLongTensor_getDevice(LIBRARY_STATE ((THCSPLongTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPIntTensorClass) {
|
||||
return THCSIntTensor_getDevice(LIBRARY_STATE ((THCSPIntTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPShortTensorClass) {
|
||||
return THCSShortTensor_getDevice(LIBRARY_STATE ((THCSPShortTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPCharTensorClass) {
|
||||
return THCSCharTensor_getDevice(LIBRARY_STATE ((THCSPCharTensor*)obj)->cdata);
|
||||
} else if (obj_type == THCSPByteTensorClass) {
|
||||
return THCSByteTensor_getDevice(LIBRARY_STATE ((THCSPByteTensor*)obj)->cdata);
|
||||
}
|
||||
return setDevice(new_device);
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool THCPAutoGPU::setDevice(int new_device) {
|
||||
if (new_device == -1)
|
||||
return false;
|
||||
|
||||
if (device == -1)
|
||||
THCudaCheck(cudaGetDevice(&device));
|
||||
if (new_device != device)
|
||||
THCPModule_setDevice(new_device);
|
||||
return true;
|
||||
static int getObjDevice(PyObject *args, PyObject *self) {
|
||||
if (self) {
|
||||
int device = getObjDevice(self);
|
||||
if (device != -1) {
|
||||
return device;
|
||||
}
|
||||
}
|
||||
if (args) {
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) {
|
||||
int device = getObjDevice(PyTuple_GET_ITEM(args, i));
|
||||
if (device != -1) {
|
||||
return device;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This can throw... But if it does I have no idea how to recover.
|
||||
THCPAutoGPU::~THCPAutoGPU() {
|
||||
if (device != -1)
|
||||
THCPModule_setDevice(device);
|
||||
THCPAutoGPU::THCPAutoGPU(int device_id) : AutoGPU(device_id) {}
|
||||
|
||||
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self)
|
||||
: AutoGPU(getObjDevice(args, self)) {
|
||||
}
|
||||
|
||||
void THCPAutoGPU::setObjDevice(PyObject *obj) {
|
||||
setDevice(getObjDevice(obj));
|
||||
}
|
||||
|
||||
@ -2,15 +2,13 @@
|
||||
#define THCP_AUTOGPU_INC
|
||||
|
||||
#include <Python.h>
|
||||
#include "torch/csrc/utils/auto_gpu.h"
|
||||
|
||||
class THCPAutoGPU {
|
||||
class THCPAutoGPU : public AutoGPU {
|
||||
public:
|
||||
THCPAutoGPU(int device_id=-1);
|
||||
explicit THCPAutoGPU(int device_id=-1);
|
||||
THCPAutoGPU(PyObject *args, PyObject *self=NULL);
|
||||
~THCPAutoGPU();
|
||||
bool setObjDevice(PyObject *obj);
|
||||
bool setDevice(int new_device);
|
||||
int device = -1;
|
||||
void setObjDevice(PyObject *obj);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -15,26 +15,26 @@ THCState *state;
|
||||
// Class pointer cache
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool THCPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THCPModule_loadClasses(PyObject *torch_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyMapping_GetItemString(module_dict, (char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THCPFloatStorageClass = PyMapping_GetItemString(module_dict, (char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THCPHalfStorageClass = PyMapping_GetItemString(module_dict, (char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THCPLongStorageClass = PyMapping_GetItemString(module_dict, (char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THCPIntStorageClass = PyMapping_GetItemString(module_dict, (char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THCPShortStorageClass = PyMapping_GetItemString(module_dict, (char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THCPCharStorageClass = PyMapping_GetItemString(module_dict, (char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THCPByteStorageClass = PyMapping_GetItemString(module_dict, (char*)"ByteStorage"));
|
||||
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyObject_GetAttrString(torch_module, (char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THCPFloatStorageClass = PyObject_GetAttrString(torch_module, (char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THCPHalfStorageClass = PyObject_GetAttrString(torch_module, (char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THCPLongStorageClass = PyObject_GetAttrString(torch_module, (char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THCPIntStorageClass = PyObject_GetAttrString(torch_module, (char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THCPShortStorageClass = PyObject_GetAttrString(torch_module, (char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THCPCharStorageClass = PyObject_GetAttrString(torch_module, (char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THCPByteStorageClass = PyObject_GetAttrString(torch_module, (char*)"ByteStorage"));
|
||||
|
||||
ASSERT_NOT_NULL(THCPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THCPHalfTensorClass = PyMapping_GetItemString(module_dict, (char*)"HalfTensor"));
|
||||
ASSERT_NOT_NULL(THCPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THCPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THCPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THCPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THCPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THCPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
if (!THCPDoubleTensor_postInit(torch_module)) return false;
|
||||
if (!THCPFloatTensor_postInit(torch_module)) return false;
|
||||
if (!THCPHalfTensor_postInit(torch_module)) return false;
|
||||
if (!THCPLongTensor_postInit(torch_module)) return false;
|
||||
if (!THCPIntTensor_postInit(torch_module)) return false;
|
||||
if (!THCPShortTensor_postInit(torch_module)) return false;
|
||||
if (!THCPCharTensor_postInit(torch_module)) return false;
|
||||
if (!THCPByteTensor_postInit(torch_module)) return false;
|
||||
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
@ -60,6 +60,7 @@ static bool THCPModule_assignStateless()
|
||||
PyObject *stateless;
|
||||
INIT_STATELESS(Double);
|
||||
INIT_STATELESS_DETAIL(Float, Cuda);
|
||||
INIT_STATELESS(Half);
|
||||
INIT_STATELESS(Long);
|
||||
INIT_STATELESS(Int);
|
||||
INIT_STATELESS(Short);
|
||||
@ -238,6 +239,20 @@ PyObject * THCPModule_cudaSleep(PyObject *_unused, PyObject *cycles)
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject * THCPModule_cudaLockMutex(PyObject *module)
|
||||
{
|
||||
auto mutex = THCCachingAllocator_getCudaFreeMutex();
|
||||
mutex->lock();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject * THCPModule_cudaUnlockMutex(PyObject *module)
|
||||
{
|
||||
auto mutex = THCCachingAllocator_getCudaFreeMutex();
|
||||
mutex->unlock();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
PyObject * THCPModule_getLibPath(PyObject *_unused)
|
||||
{
|
||||
#define _STR(x) #x
|
||||
@ -255,7 +270,8 @@ PyObject * THCPModule_getLibPath(PyObject *_unused)
|
||||
// Cuda module initialization
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool THCPModule_initCuda(PyObject *module_dict) {
|
||||
bool THCPModule_initCuda(PyObject *torch_module) {
|
||||
HANDLE_TH_ERRORS
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
|
||||
state = THCState_alloc();
|
||||
THCState_setDeviceAllocator(state, THCCachingAllocator_get());
|
||||
@ -264,25 +280,26 @@ bool THCPModule_initCuda(PyObject *module_dict) {
|
||||
|
||||
#ifdef USE_MAGMA
|
||||
THCMagma_init(state);
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(true)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(true)) != -1);
|
||||
#else
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(false)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(false)) != -1);
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(true)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(true)) != -1);
|
||||
#else
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(false)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(false)) != -1);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE(THCPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THCPModule_loadClasses(torch_module));
|
||||
ASSERT_TRUE(THCPModule_assignStateless());
|
||||
|
||||
ASSERT_TRUE(PyDict_SetItemString(module_dict, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
|
||||
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
|
||||
|
||||
// TODO: register THCudaShutdown handler at exit
|
||||
return true;
|
||||
#undef ASSERT_TRUE
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
// Callback for python part. Used for additional initialization of python classes
|
||||
@ -293,6 +310,5 @@ PyObject * THCPModule_initExtension(PyObject *self)
|
||||
THPUtils_setError("class loader couldn't access torch module");
|
||||
return NULL;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
return PyBool_FromLong(THCPModule_initCuda(module_dict));
|
||||
return PyBool_FromLong(THCPModule_initCuda(torch_module));
|
||||
}
|
||||
|
||||
@ -1,19 +1,18 @@
|
||||
#include "THCP.h"
|
||||
|
||||
static bool THCSPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THCSPModule_loadClasses(PyObject *sparse_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(THCSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THCSPHalfTensorClass = PyMapping_GetItemString(module_dict, (char*)"HalfTensor"));
|
||||
ASSERT_NOT_NULL(THCSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THCSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THCSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THCSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THCSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THCSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
|
||||
if (!THCSPDoubleTensor_postInit(sparse_module)) return false;
|
||||
if (!THCSPFloatTensor_postInit(sparse_module)) return false;
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
if (!THCSPHalfTensor_postInit(sparse_module)) return false;
|
||||
#endif
|
||||
if (!THCSPLongTensor_postInit(sparse_module)) return false;
|
||||
if (!THCSPIntTensor_postInit(sparse_module)) return false;
|
||||
if (!THCSPShortTensor_postInit(sparse_module)) return false;
|
||||
if (!THCSPCharTensor_postInit(sparse_module)) return false;
|
||||
if (!THCSPByteTensor_postInit(sparse_module)) return false;
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
}
|
||||
|
||||
static bool THCSPModule_assignStateless()
|
||||
@ -31,7 +30,9 @@ static bool THCSPModule_assignStateless()
|
||||
PyObject *stateless;
|
||||
INIT_STATELESS(Double);
|
||||
INIT_STATELESS(Float);
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
INIT_STATELESS(Half);
|
||||
#endif
|
||||
INIT_STATELESS(Long);
|
||||
INIT_STATELESS(Int);
|
||||
INIT_STATELESS(Short);
|
||||
@ -46,9 +47,9 @@ static bool THCSPModule_assignStateless()
|
||||
// Sparse Cuda module initialization
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool THCSPModule_initCudaSparse(PyObject *module_dict) {
|
||||
bool THCSPModule_initCudaSparse(PyObject *module) {
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
|
||||
ASSERT_TRUE(THCSPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THCSPModule_loadClasses(module));
|
||||
ASSERT_TRUE(THCSPModule_assignStateless());
|
||||
return true;
|
||||
#undef ASSERT_TRUE
|
||||
@ -56,11 +57,10 @@ bool THCSPModule_initCudaSparse(PyObject *module_dict) {
|
||||
|
||||
PyObject * THCSPModule_initExtension(PyObject *self)
|
||||
{
|
||||
PyObject *torch_module = PyImport_ImportModule("torch.cuda.sparse");
|
||||
if (!torch_module) {
|
||||
PyObject *module = PyImport_ImportModule("torch.cuda.sparse");
|
||||
if (!module) {
|
||||
THPUtils_setError("class loader couldn't access torch.cuda.sparse module");
|
||||
return NULL;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
return PyBool_FromLong(THCSPModule_initCudaSparse(module_dict));
|
||||
return PyBool_FromLong(THCSPModule_initCudaSparse(module));
|
||||
}
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "override_macros.h"
|
||||
#include "torch/csrc/copy_utils.h"
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#define THC_GENERIC_FILE "torch/csrc/generic/Tensor.cpp"
|
||||
#include <THC/THCGenerateAllTypes.h>
|
||||
|
||||
@ -62,6 +62,8 @@ void cudnn_batch_norm_forward(
|
||||
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
|
||||
double exponential_average_factor, double epsilon)
|
||||
{
|
||||
assertSameGPU(dataType, input, output, weight, bias, running_mean, running_var,
|
||||
save_mean, save_var);
|
||||
cudnnBatchNormMode_t mode;
|
||||
if (input->nDimension == 2) {
|
||||
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
|
||||
@ -120,6 +122,8 @@ void cudnn_batch_norm_backward(
|
||||
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
|
||||
double epsilon)
|
||||
{
|
||||
assertSameGPU(dataType, input, grad_output, grad_input, grad_weight, grad_bias, weight,
|
||||
running_mean, running_var, save_mean, save_var);
|
||||
cudnnBatchNormMode_t mode;
|
||||
if (input->nDimension == 2) {
|
||||
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
|
||||
@ -143,7 +147,7 @@ void cudnn_batch_norm_backward(
|
||||
THVoidTensor_assertContiguous(save_mean);
|
||||
THVoidTensor_assertContiguous(save_var);
|
||||
CHECK(cudnnBatchNormalizationBackward(
|
||||
handle, mode, &one, &zero, &one, &one,
|
||||
handle, mode, &one, &zero, &one, &zero,
|
||||
idesc.desc, tensorPointer(dataType, input),
|
||||
odesc.desc, tensorPointer(dataType, grad_output),
|
||||
gdesc.desc, tensorPointer(dataType, grad_input),
|
||||
|
||||
@ -198,6 +198,8 @@ Workspace chooseAlgorithm(
|
||||
|
||||
if (!cache.find(conv.params, algo)) {
|
||||
if (benchmark) {
|
||||
// findAlgorithm may call cudaFree()
|
||||
std::lock_guard<std::mutex> lock(*THCCachingAllocator_getCudaFreeMutex());
|
||||
auto perfResults = search::findAlgorithm(handle, conv);
|
||||
if (perfResults.status == CUDNN_STATUS_SUCCESS) {
|
||||
*algo = perfResults.algo;
|
||||
@ -285,6 +287,7 @@ void cudnn_convolution_forward(
|
||||
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* output,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, input, weight, output);
|
||||
int groups = info->groups;
|
||||
|
||||
cudnnConvolutionFwdAlgo_t fwdAlg;
|
||||
@ -309,6 +312,7 @@ void cudnn_convolution_add_bias(
|
||||
THVoidTensor* bias, THVoidTensor* output,
|
||||
Convolution* info)
|
||||
{
|
||||
assertSameGPU(dataType, bias, output);
|
||||
CHECK_ARG(output->nDimension <= 5);
|
||||
TensorDescriptor& bdesc = info->bdesc;
|
||||
|
||||
@ -329,6 +333,7 @@ void cudnn_convolution_backward_data(
|
||||
THVoidTensor* gradOutput, THVoidTensor* gradInput, THVoidTensor* weight,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, gradInput, weight);
|
||||
int groups = info->params.groups;
|
||||
|
||||
cudnnConvolutionBwdDataAlgo_t bwdDataAlg;
|
||||
@ -353,6 +358,7 @@ void cudnn_convolution_backward_filter(
|
||||
THVoidTensor* gradOutput, THVoidTensor* input, THVoidTensor* gradWeight,
|
||||
Convolution* info, bool benchmark)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, input, gradWeight);
|
||||
int groups = info->params.groups;
|
||||
|
||||
cudnnConvolutionBwdFilterAlgo_t bwdFilterAlg;
|
||||
@ -380,6 +386,7 @@ void cudnn_convolution_backward_bias(
|
||||
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
|
||||
THVoidTensor* gradOutput, THVoidTensor* gradBias, Convolution* info)
|
||||
{
|
||||
assertSameGPU(dataType, gradOutput, gradBias);
|
||||
Constant one(dataType, 1);
|
||||
Constant zero(dataType, 0);
|
||||
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, 0, 1, 0);
|
||||
|
||||
@ -1,17 +1,42 @@
|
||||
#ifndef THP_CUDNN_EXCEPTIONS_INC
|
||||
#define THP_CUDNN_EXCEPTIONS_INC
|
||||
|
||||
#include <THC/THC.h>
|
||||
#include <cudnn.h>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
#define CHECK_ARG(cond) _CHECK_ARG(cond, #cond, __FILE__, __LINE__)
|
||||
|
||||
extern THCState* state;
|
||||
|
||||
namespace torch { namespace cudnn {
|
||||
|
||||
template<typename ...T>
|
||||
void assertSameGPU(cudnnDataType_t dataType, T* ... tensors) {
|
||||
static_assert(std::is_same<THVoidTensor, typename std::common_type<T...>::type>::value,
|
||||
"all arguments to assertSameGPU have to be THVoidTensor*");
|
||||
int is_same;
|
||||
if (dataType == CUDNN_DATA_FLOAT) {
|
||||
is_same = THCudaTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaTensor*>(tensors)...);
|
||||
} else if (dataType == CUDNN_DATA_HALF) {
|
||||
is_same = THCudaHalfTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaHalfTensor*>(tensors)...);
|
||||
} else if (dataType == CUDNN_DATA_DOUBLE) {
|
||||
is_same = THCudaDoubleTensor_checkGPU(state, sizeof...(T),
|
||||
reinterpret_cast<THCudaDoubleTensor*>(tensors)...);
|
||||
} else {
|
||||
throw std::runtime_error("unknown cuDNN data type");
|
||||
}
|
||||
if (!is_same) {
|
||||
throw std::runtime_error("tensors are on different GPUs");
|
||||
}
|
||||
}
|
||||
|
||||
class cudnn_exception : public std::runtime_error {
|
||||
public:
|
||||
cudnnStatus_t status;
|
||||
|
||||
@ -20,6 +20,20 @@ cudnnDataType_t getCudnnDataType(PyObject *tensorClass)
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor)
|
||||
{
|
||||
if (tensor.type() == thpp::Type::FLOAT) {
|
||||
return CUDNN_DATA_FLOAT;
|
||||
} else if (tensor.type() == thpp::Type::DOUBLE) {
|
||||
return CUDNN_DATA_DOUBLE;
|
||||
} else if (tensor.type() == thpp::Type::HALF) {
|
||||
return CUDNN_DATA_HALF;
|
||||
}
|
||||
std::string msg("getCudnnDataType() not supported for ");
|
||||
msg += (int)tensor.type();
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
PyObject * getTensorClass(PyObject *args)
|
||||
{
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) {
|
||||
|
||||
@ -6,11 +6,13 @@
|
||||
#include <string>
|
||||
#include <cudnn.h>
|
||||
#include "../Types.h"
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
namespace torch { namespace cudnn {
|
||||
|
||||
PyObject * getTensorClass(PyObject *args);
|
||||
cudnnDataType_t getCudnnDataType(PyObject *tensorClass);
|
||||
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor);
|
||||
void _THVoidTensor_assertContiguous(THVoidTensor *tensor, const std::string& name);
|
||||
|
||||
#define THVoidTensor_assertContiguous(tensor) \
|
||||
|
||||
@ -26,9 +26,14 @@ static void THSPTensor_(dealloc)(THSPTensor* self)
|
||||
static PyObject * THSPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
#ifdef THC_GENERIC_FILE
|
||||
printf("Sparse CUDA Tensors not supported!\n");
|
||||
return NULL;
|
||||
#define THPIndexTensor_Check THCPLongTensor_Check
|
||||
#define THPIndexTensor THCPLongTensor
|
||||
#define THIndexTensor THCudaLongTensor
|
||||
#else
|
||||
#define THPIndexTensor_Check THPLongTensor_Check
|
||||
#define THPIndexTensor THPLongTensor
|
||||
#define THIndexTensor THLongTensor
|
||||
#endif
|
||||
HANDLE_TH_ERRORS
|
||||
Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
|
||||
|
||||
@ -71,24 +76,24 @@ static PyObject * THSPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
|
||||
self->cdata = THSTensor_(newWithSize)(LIBRARY_STATE sizes.get());
|
||||
}
|
||||
// torch.SparseTensor(torch.LongTensor indices, torch.LongTensor values)
|
||||
else if (num_args == 2 && THPLongTensor_Check(first_arg)) {
|
||||
else if (num_args == 2 && THPIndexTensor_Check(first_arg)) {
|
||||
PyObject *second_arg = PyTuple_GET_ITEM(args, 1);
|
||||
if (!THPTensor_(Check)(second_arg)) goto invalid_arguments;
|
||||
|
||||
THLongTensor *indices = ((THPLongTensor*)first_arg)->cdata;
|
||||
THIndexTensor *indices = ((THPIndexTensor*)first_arg)->cdata;
|
||||
THTensor *values = ((THPTensor*)second_arg)->cdata;
|
||||
self->cdata = THSTensor_(newWithTensor)(LIBRARY_STATE indices, values);
|
||||
}
|
||||
// torch.SparseTensor(torch.LongTensor indices,
|
||||
// torch.Tensor values,
|
||||
// torch.Size sizes)
|
||||
else if (num_args > 2 && THPLongTensor_Check(first_arg)) {
|
||||
else if (num_args > 2 && THPIndexTensor_Check(first_arg)) {
|
||||
PyObject *second_arg = PyTuple_GET_ITEM(args, 1);
|
||||
PyObject *third_arg = PyTuple_GET_ITEM(args, 2);
|
||||
if (!THPTensor_(Check)(second_arg)) goto invalid_arguments;
|
||||
if (!THPSize_Check(third_arg)) goto invalid_arguments;
|
||||
|
||||
THLongTensor *indices = ((THPLongTensor*)first_arg)->cdata;
|
||||
THIndexTensor *indices = ((THPIndexTensor*)first_arg)->cdata;
|
||||
THTensor *values = ((THPTensor*)second_arg)->cdata;
|
||||
THLongStoragePtr sizes = THPUtils_unpackSize(third_arg);
|
||||
self->cdata = THSTensor_(newWithTensorAndSize)(
|
||||
@ -107,12 +112,19 @@ invalid_arguments:
|
||||
"no arguments",
|
||||
"(int size)",
|
||||
"(torch.Size sizes)",
|
||||
#ifdef THC_GENERIC_FILE
|
||||
"(torch.cuda.LongTensor indices, " THPTensorStr " values)",
|
||||
"(torch.cuda.LongTensor indices, " THPTensorStr " values, torch.Size sizes)",
|
||||
#else
|
||||
"(torch.LongTensor indices, " THPTensorStr " values)",
|
||||
"(torch.LongTensor indices, " THPTensorStr " values, torch.Size sizes)",
|
||||
#endif
|
||||
"(int ...)");
|
||||
return NULL;
|
||||
END_HANDLE_TH_ERRORS
|
||||
#endif
|
||||
#undef THPIndexTensor_Check
|
||||
#undef THPIndexTensor
|
||||
#undef THIndexTensor
|
||||
}
|
||||
|
||||
// TODO: implement equality
|
||||
@ -227,3 +239,16 @@ bool THSPTensor_(init)(PyObject *module)
|
||||
PyModule_AddObject(module, THSPTensorBaseStr, (PyObject *)&THSPTensorType);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool THSPTensor_(postInit)(PyObject *module)
|
||||
{
|
||||
THSPTensorClass = PyObject_GetAttrString(module, TH_CONCAT_STRING_2(Real,Tensor));
|
||||
if (!THSPTensorClass) return false;
|
||||
bool is_cuda = false;
|
||||
#ifdef THC_GENERIC_FILE
|
||||
is_cuda = true;
|
||||
#endif
|
||||
const char *type_name = TH_CONCAT_STRING_2(Real,);
|
||||
torch::registerPyTypeObject((PyTypeObject*)THSPTensorClass, type_name, is_cuda, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -186,10 +186,15 @@ static PyObject * THPStorage_(get)(THPStorage *self, PyObject *index)
|
||||
return THPUtils_(newReal)(value);
|
||||
/* Slice index */
|
||||
} else if (PySlice_Check(index)) {
|
||||
Py_ssize_t start, stop, slicelength;
|
||||
Py_ssize_t start, stop, slicelength, step;
|
||||
long len = THStorage_(size)(LIBRARY_STATE self->cdata);
|
||||
if (!THPUtils_parseSlice(index, len, &start, &stop, &slicelength))
|
||||
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
|
||||
return NULL;
|
||||
if (step != 1) {
|
||||
THPUtils_setError("Trying to slice with a step of %ld, but only a step of "
|
||||
"1 is supported", (long)step);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
real *data = THStorage_(data)(LIBRARY_STATE self->cdata);
|
||||
THStoragePtr new_storage = THStorage_(newWithData)(LIBRARY_STATE data + start, slicelength);
|
||||
@ -223,10 +228,15 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
|
||||
THStorage_(set)(LIBRARY_STATE self->cdata, nindex, rvalue);
|
||||
return 0;
|
||||
} else if (PySlice_Check(index)) {
|
||||
Py_ssize_t start, stop;
|
||||
Py_ssize_t start, stop, slicelength, step;
|
||||
long len = THStorage_(size)(LIBRARY_STATE self->cdata);
|
||||
if (!THPUtils_parseSlice(index, len, &start, &stop, NULL))
|
||||
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
|
||||
return -1;
|
||||
if (step != 1) {
|
||||
THPUtils_setError("Trying to slice with a step of %ld, but only a step of "
|
||||
"1 is supported", (long)step);
|
||||
return 0;
|
||||
}
|
||||
// TODO: check the bounds only once
|
||||
// TODO: fill?
|
||||
for (;start < stop; start++)
|
||||
@ -304,6 +314,7 @@ void THPStorage_(initCopyMethods)()
|
||||
THPInsertCopyFunction(h, &THStorage_(copyShort));
|
||||
THPInsertCopyFunction(h, &THStorage_(copyInt));
|
||||
THPInsertCopyFunction(h, &THStorage_(copyLong));
|
||||
THPInsertCopyFunction(h, &THStorage_(copyHalf));
|
||||
THPInsertCopyFunction(h, &THStorage_(copyFloat));
|
||||
THPInsertCopyFunction(h, &THStorage_(copyDouble));
|
||||
#ifdef THC_GENERIC_FILE
|
||||
@ -318,7 +329,6 @@ void THPStorage_(initCopyMethods)()
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
THPInsertCopyFunction(h, &THStorage_(copyCudaHalf));
|
||||
#endif
|
||||
#ifndef THC_REAL_IS_HALF
|
||||
// add CPU <- GPU copies to base type
|
||||
#define THCpuStorage_(name) TH_CONCAT_4(TH, Real, Storage_, name)
|
||||
extern THPCopyList THCpuStorage_(copy_functions);
|
||||
@ -335,7 +345,6 @@ void THPStorage_(initCopyMethods)()
|
||||
#endif
|
||||
#undef THCpuStorage_
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "StorageMethods.cpp"
|
||||
|
||||
@ -159,6 +159,8 @@ static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyO
|
||||
#elif defined(TH_REAL_IS_LONG)
|
||||
// TODO: remove the cast
|
||||
THP_decodeInt64Buffer((int64_t*) storage->data, src + offset, byte_order, count);
|
||||
#elif defined(TH_REAL_IS_HALF)
|
||||
THP_decodeHalfBuffer(storage->data, src + offset, byte_order, count);
|
||||
#elif defined(TH_REAL_IS_FLOAT)
|
||||
THP_decodeFloatBuffer(storage->data, src + offset, byte_order, count);
|
||||
#elif defined(TH_REAL_IS_DOUBLE)
|
||||
@ -190,13 +192,36 @@ PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *file)
|
||||
int fd = PyObject_AsFileDescriptor(file);
|
||||
THPUtils_assert(fd != -1, "_new_with_file couldn't retrieve a file "
|
||||
"descriptor from given object");
|
||||
THStoragePtr storage = THPStorage_(readFileRaw)(fd);
|
||||
THStorage *storage = THPStorage_(readFileRaw)(fd, nullptr);
|
||||
if (storage == nullptr)
|
||||
return nullptr;
|
||||
PyObject *result = THPStorage_(New)(storage);
|
||||
storage.release();
|
||||
return result;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
static PyObject *THPStorage_(setFromFile)(THPStorage *self, PyObject *args)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
PyObject *file = PyTuple_GET_ITEM(args, 0);
|
||||
int fd = PyObject_AsFileDescriptor(file);
|
||||
|
||||
PyObject *offset = PyTuple_GET_ITEM(args, 1);
|
||||
if (offset != Py_None) {
|
||||
lseek(fd, THPUtils_unpackLong(offset), SEEK_SET);
|
||||
}
|
||||
|
||||
THPUtils_assert(fd != -1, "_set_from_file couldn't retrieve a file "
|
||||
"descriptor from given object");
|
||||
THStorage *storage = THPStorage_(readFileRaw)(fd, self->cdata);
|
||||
if (storage == nullptr)
|
||||
return nullptr;
|
||||
Py_INCREF(self);
|
||||
|
||||
return (PyObject *) self;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
#ifdef THC_GENERIC_FILE
|
||||
PyObject * THPStorage_(getDevice)(THPStorage *self)
|
||||
{
|
||||
@ -250,6 +275,7 @@ static PyMethodDef THPStorage_(methods)[] = {
|
||||
{"is_pinned", (PyCFunction)THPStorage_(isPinned), METH_NOARGS, NULL},
|
||||
{"_write_file", (PyCFunction)THPStorage_(writeFile), METH_O, NULL},
|
||||
{"_new_with_file", (PyCFunction)THPStorage_(newWithFile), METH_O | METH_STATIC, NULL},
|
||||
{"_set_from_file", (PyCFunction)THPStorage_(setFromFile), METH_VARARGS, NULL},
|
||||
#ifndef THC_GENERIC_FILE
|
||||
{"from_buffer", (PyCFunction)THPStorage_(fromBuffer), METH_VARARGS | METH_KEYWORDS | METH_STATIC, NULL},
|
||||
#endif
|
||||
|
||||
@ -16,6 +16,9 @@
|
||||
#ifdef TH_REAL_IS_INT
|
||||
#define NUMPY_TYPE_ENUM NPY_INT32
|
||||
#endif
|
||||
#ifdef TH_REAL_IS_SHORT
|
||||
#define NUMPY_TYPE_ENUM NPY_INT16
|
||||
#endif
|
||||
#ifdef TH_REAL_IS_BYTE
|
||||
#define NUMPY_TYPE_ENUM NPY_UINT8
|
||||
#endif
|
||||
@ -23,6 +26,7 @@
|
||||
#endif
|
||||
|
||||
PyObject *THPTensorClass = NULL;
|
||||
THPCopyList THTensor_(copy_functions);
|
||||
|
||||
PyObject * THPTensor_(NewEmpty)()
|
||||
{
|
||||
@ -412,32 +416,6 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject
|
||||
#define UNPACK_SCALAR(IDX_VARIABLE) idx = THPUtils_unpackLong(IDX_VARIABLE);
|
||||
#endif
|
||||
|
||||
#define INDEX_SCALAR(DIM, IDX_VARIABLE, TENSOR_VARIABLE, CASE_1D, CASE_MD) \
|
||||
int64_t idx; \
|
||||
UNPACK_SCALAR(IDX_VARIABLE); \
|
||||
long dimsize = THTensor_(size)(LIBRARY_STATE TENSOR_VARIABLE, DIM); \
|
||||
idx = (idx < 0) ? dimsize + idx : idx; \
|
||||
\
|
||||
if (dimsize <= 0) { \
|
||||
PyErr_SetString(PyExc_IndexError, "indexing an empty tensor"); \
|
||||
return false; \
|
||||
} \
|
||||
if (idx < 0 || idx >= dimsize) { \
|
||||
PyErr_Format(PyExc_IndexError, "index %lld is out of range for dimension " \
|
||||
"%lld (of size %lld)", (long long)idx, (long long)DIM, (long long)dimsize); \
|
||||
return false; \
|
||||
} \
|
||||
\
|
||||
if(THTensor_(nDimension)(LIBRARY_STATE TENSOR_VARIABLE) == 1) { \
|
||||
CASE_1D; \
|
||||
} else { \
|
||||
CASE_MD; \
|
||||
}
|
||||
|
||||
#define GET_OFFSET(t, idx) \
|
||||
t->storageOffset + t->stride[0] * idx;
|
||||
|
||||
|
||||
#ifdef THC_GENERIC_FILE
|
||||
#define THIndexTensor THCudaLongTensor
|
||||
#define THIndexTensor_(NAME) TH_CONCAT_2(THCudaLongTensor_,NAME)
|
||||
@ -451,58 +429,89 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject
|
||||
#endif
|
||||
|
||||
|
||||
template<bool allow_index>
|
||||
static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
|
||||
THTensorPtr &tresult, THStorage * &sresult, long &storage_offset)
|
||||
static bool THPTensor_(_indexOnce)(PyObject *index, int &indexed_dim,
|
||||
THTensorPtr &tresult, THStorage* &sresult, long &storage_offset)
|
||||
{
|
||||
#ifdef WITH_NUMPY
|
||||
static PyArray_Descr *NumpyLongArrDescr = PyArray_DescrFromType(NPY_INT64);
|
||||
bool is_long, is_scalar_array;
|
||||
#endif
|
||||
tresult = NULL;
|
||||
sresult = NULL;
|
||||
// Indexing with an integer
|
||||
// Indexing with a scalar
|
||||
if(IS_SCALAR(index)) {
|
||||
THTensor *self_t = self->cdata;
|
||||
INDEX_SCALAR(0, index, self_t,
|
||||
// 1D tensor
|
||||
sresult = self_t->storage;
|
||||
storage_offset = GET_OFFSET(self_t, idx),
|
||||
// >1D tensor
|
||||
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self_t);
|
||||
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, 0, idx)
|
||||
)
|
||||
return true;
|
||||
int64_t idx;
|
||||
UNPACK_SCALAR(index);
|
||||
long dimsize = THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim);
|
||||
idx = (idx < 0) ? dimsize + idx : idx;
|
||||
|
||||
if (dimsize <= 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "indexing an empty tensor");
|
||||
throw python_error();
|
||||
}
|
||||
if (idx < 0 || idx >= dimsize) {
|
||||
PyErr_Format(PyExc_IndexError, "index %lld is out of range for dimension "
|
||||
"%lld (of size %lld)", (long long)idx, (long long)indexed_dim, (long long)dimsize);
|
||||
throw python_error();
|
||||
}
|
||||
|
||||
if(THTensor_(nDimension)(LIBRARY_STATE tresult.get()) == 1) {
|
||||
sresult = tresult.get()->storage;
|
||||
storage_offset = tresult->storageOffset + tresult->stride[0] * idx;
|
||||
tresult = NULL;
|
||||
} else {
|
||||
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, indexed_dim, idx);
|
||||
}
|
||||
} else if (index == Py_None) {
|
||||
// _indexOnce will never be called with tresult == NULL, except for a None index
|
||||
if (!tresult) {
|
||||
tresult = THTensor_(newWithStorage1d)(LIBRARY_STATE sresult, storage_offset, 1, 1);
|
||||
sresult = NULL;
|
||||
} else {
|
||||
THTensor_(unsqueeze1d)(LIBRARY_STATE tresult.get(), NULL, indexed_dim++);
|
||||
}
|
||||
// Indexing with a slice
|
||||
} else if (PySlice_Check(index)) {
|
||||
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
|
||||
Py_ssize_t start, end, length;
|
||||
if (!THPUtils_parseSlice(index, THTensor_(size)(LIBRARY_STATE tresult.get(), 0), &start, &end, &length))
|
||||
return false;
|
||||
THTensor_(narrow)(LIBRARY_STATE tresult.get(), NULL, 0, start, length);
|
||||
return true;
|
||||
} else if (THPIndexTensor_Check(index)) {
|
||||
if (allow_index) {
|
||||
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
|
||||
tresult = THTensor_(new)(LIBRARY_STATE_NOARGS);
|
||||
THTensor_(indexSelect)(LIBRARY_STATE tresult.get(), self->cdata, 0, index_t);
|
||||
return true;
|
||||
} else {
|
||||
THPUtils_setError("assignments using LongTensors as index aren't supported yet");
|
||||
tresult = NULL;
|
||||
return false;
|
||||
Py_ssize_t start, end, length, step;
|
||||
if (!THPUtils_parseSlice(index, THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim), &start, &end, &step, &length))
|
||||
throw python_error();
|
||||
if (step <= 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "slice step has to be greater than 0");
|
||||
throw python_error();
|
||||
}
|
||||
// Indexing multiple dimensions
|
||||
} else if(PyTuple_Check(index)) {
|
||||
if (length == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "result of slicing is an empty tensor");
|
||||
throw python_error();
|
||||
}
|
||||
tresult->storageOffset += tresult->stride[indexed_dim] * start;
|
||||
tresult->stride[indexed_dim] *= step;
|
||||
tresult->size[indexed_dim] = length;
|
||||
indexed_dim++;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
|
||||
THTensorPtr &tresult, THStorage * &sresult, long &storage_offset)
|
||||
{
|
||||
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
|
||||
sresult = NULL;
|
||||
int indexed_dim = 0;
|
||||
if(PyTuple_Check(index)) {
|
||||
long num_index_dim = (long)PyTuple_Size(index);
|
||||
long num_effective_index = num_index_dim;
|
||||
long num_tensor_dim = THTensor_(nDimension)(LIBRARY_STATE self->cdata);
|
||||
long ellipsis_idx = num_tensor_dim + 1;
|
||||
long ellipsis_idx = -1;
|
||||
for (int i = 0; i < num_index_dim; i++) {
|
||||
if (PyTuple_GET_ITEM(index, i) == Py_Ellipsis) {
|
||||
PyObject *dimidx = PyTuple_GET_ITEM(index, i);
|
||||
if (dimidx == Py_Ellipsis) {
|
||||
if (ellipsis_idx != -1) throw std::runtime_error("ellipsis can be used at most once");
|
||||
ellipsis_idx = i;
|
||||
num_effective_index--;
|
||||
break;
|
||||
}
|
||||
if (dimidx == Py_None) {
|
||||
num_effective_index--;
|
||||
}
|
||||
}
|
||||
if (num_effective_index > num_tensor_dim) {
|
||||
@ -512,130 +521,52 @@ static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
|
||||
return false;
|
||||
}
|
||||
|
||||
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
|
||||
int t_dim = 0;
|
||||
bool valid = true;
|
||||
for(int dim = 0; dim < num_index_dim; dim++) {
|
||||
for (int dim = 0; dim < num_index_dim; dim++) {
|
||||
if (dim == ellipsis_idx) {
|
||||
t_dim = tresult->nDimension - (num_index_dim - dim - 1);
|
||||
// tresult can be NULL if ellipsis is the last item
|
||||
if (tresult) indexed_dim = tresult->nDimension - (num_index_dim - dim - 1);
|
||||
continue;
|
||||
}
|
||||
PyObject *dimidx = PyTuple_GET_ITEM(index, dim);
|
||||
if(IS_SCALAR(dimidx)) {
|
||||
INDEX_SCALAR(t_dim, dimidx, tresult,
|
||||
// 1D tensor
|
||||
sresult = tresult->storage;
|
||||
storage_offset = GET_OFFSET(tresult, idx);
|
||||
tresult = NULL;
|
||||
return true,
|
||||
// >1D tensor
|
||||
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, t_dim, idx)
|
||||
)
|
||||
} else if (PySlice_Check(dimidx)) {
|
||||
Py_ssize_t start, end, length;
|
||||
long size_dim = THTensor_(size)(LIBRARY_STATE tresult.get(), t_dim);
|
||||
if (!THPUtils_parseSlice(dimidx, size_dim, &start, &end, &length))
|
||||
return false;
|
||||
THTensor_(narrow)(LIBRARY_STATE tresult.get(), NULL, t_dim++, start, length);
|
||||
} else if (THPIndexTensor_Check(dimidx)) {
|
||||
if (allow_index) {
|
||||
THIndexTensor *index_t = ((THPIndexTensor*)dimidx)->cdata;
|
||||
THTensorPtr index_result = THTensor_(new)(LIBRARY_STATE_NOARGS);
|
||||
THTensor_(indexSelect)(LIBRARY_STATE index_result.get(), tresult.get(), t_dim++, index_t);
|
||||
tresult = index_result.release();
|
||||
} else {
|
||||
THPUtils_setError("assignments using LongTensors as index aren't supported yet");
|
||||
tresult = NULL;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
valid = THPTensor_(_indexOnce)(dimidx, indexed_dim, tresult, sresult, storage_offset);
|
||||
if (!valid) {
|
||||
tresult = NULL;
|
||||
valid = false;
|
||||
// overwrite this, so the message mentions the incorrect object
|
||||
index = dimidx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (valid) {
|
||||
if (valid) return true;
|
||||
} else if (index == Py_Ellipsis) {
|
||||
return true;
|
||||
} else {
|
||||
if (THPTensor_(_indexOnce)(index, indexed_dim, tresult, sresult, storage_offset))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
PyErr_Format(PyExc_TypeError, "indexing a tensor with an object of type %s. "
|
||||
"The only supported types are integers, slices"
|
||||
#ifdef WITH_NUMPY
|
||||
", numpy scalars"
|
||||
", numpy scalars and "
|
||||
#endif
|
||||
" and "
|
||||
#ifndef THC_GENERIC_FILE
|
||||
"torch.ByteTensor.",
|
||||
"torch.LongTensor or torch.ByteTensor as the only argument.",
|
||||
#else
|
||||
"torch.cuda.ByteTensor.",
|
||||
"torch.cuda.LongTensor or torch.cuda.ByteTensor as the only argument.",
|
||||
#endif
|
||||
THPUtils_typename(index));
|
||||
return false;
|
||||
}
|
||||
#undef IS_SCALAR
|
||||
#undef INDEX_SCALAR
|
||||
#undef GET_OFFSET
|
||||
#undef THIndexTensor
|
||||
#undef THIndexTensor_
|
||||
#undef THPIndexTensor
|
||||
#undef THPIndexTensor_Check
|
||||
|
||||
extern THPCopyList THTensor_(copy_functions);
|
||||
THPCopyList THTensor_(copy_functions);
|
||||
|
||||
void THPTensor_(initCopyMethods)()
|
||||
{
|
||||
auto& h = THTensor_(copy_functions);
|
||||
// copy from CPU types
|
||||
THPInsertCopyFunction(h, &THTensor_(copyByte));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyChar));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyShort));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyInt));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyLong));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyFloat));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyDouble));
|
||||
#ifdef THC_GENERIC_FILE
|
||||
// copy from GPU types
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaByte));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaChar));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaShort));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaInt));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaLong));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaFloat));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaDouble));
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaHalf));
|
||||
#endif
|
||||
#ifndef THC_REAL_IS_HALF
|
||||
THPInsertCopyFunction(h, &THCTensor_(copyAsyncCPU), true);
|
||||
// add CPU <- GPU copies to base type
|
||||
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
|
||||
extern THPCopyList THCpuTensor_(copy_functions);
|
||||
auto& b = THCpuTensor_(copy_functions);
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaByte));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaChar));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaShort));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaInt));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaLong));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaFloat));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaDouble));
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaHalf));
|
||||
#endif
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyAsyncCuda), true);
|
||||
#undef THCpuTensor_
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#undef UNPACK_SCALAR
|
||||
|
||||
template<bool force_tensor>
|
||||
static PyObject * THPTensor_(getValue)(THPTensor *self, PyObject *index)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
|
||||
#ifndef TH_REAL_IS_HALF
|
||||
#ifndef THC_GENERIC_FILE
|
||||
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
|
||||
#else
|
||||
@ -647,11 +578,18 @@ static PyObject * THPTensor_(getValue)(THPTensor *self, PyObject *index)
|
||||
THTensor_(maskedSelect)(LIBRARY_STATE t.get(), self->cdata, mask->cdata);
|
||||
return THPTensor_(New)(t.release());
|
||||
}
|
||||
if (THPIndexTensor_Check(index)) {
|
||||
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
|
||||
THTensorPtr index_result = THTensor_(new)(LIBRARY_STATE_NOARGS);
|
||||
THTensor_(indexSelect)(LIBRARY_STATE index_result.get(), self->cdata, 0, index_t);
|
||||
return THPTensor_(New)(index_result.release());
|
||||
}
|
||||
#endif
|
||||
|
||||
THTensorPtr tresult;
|
||||
THStorage *sresult;
|
||||
long storage_offset;
|
||||
if (!THPTensor_(_index)<true>(self, index, tresult, sresult, storage_offset))
|
||||
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
|
||||
return NULL;
|
||||
if (tresult)
|
||||
return THPTensor_(New)(tresult.release());
|
||||
@ -674,6 +612,7 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
|
||||
#ifndef TH_REAL_IS_HALF
|
||||
#ifndef THC_GENERIC_FILE
|
||||
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
|
||||
#else
|
||||
@ -693,11 +632,26 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (THPIndexTensor_Check(index)) {
|
||||
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
|
||||
if (THPUtils_(checkReal)(value)) {
|
||||
real v = THPUtils_(unpackReal)(value);
|
||||
THTensor_(indexFill)(LIBRARY_STATE self->cdata, 0, index_t, v);
|
||||
} else if (THPTensor_(Check)(value)) {
|
||||
THTensor_(indexCopy)(LIBRARY_STATE self->cdata, 0, index_t, ((THPTensor*)value)->cdata);
|
||||
} else {
|
||||
THPUtils_setError("can't assign %s to a " THPTensorStr " using a LongTensor "
|
||||
"(only " THPTensorStr " or %s are supported)",
|
||||
THPUtils_typename(value), THPUtils_typeTraits<real>::python_type_str);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
THTensorPtr tresult;
|
||||
THStorage *sresult;
|
||||
long storage_offset;
|
||||
if (!THPTensor_(_index)<false>(self, index, tresult, sresult, storage_offset))
|
||||
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
|
||||
return -1;
|
||||
if (sresult) {
|
||||
if (!force_tensor) {
|
||||
@ -714,7 +668,11 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
|
||||
}
|
||||
if (tresult) {
|
||||
if (THPUtils_(checkReal)(value)) {
|
||||
#ifndef TH_REAL_IS_HALF
|
||||
THTensor_(fill)(LIBRARY_STATE tresult.get(), THPUtils_(unpackReal)(value));
|
||||
#else
|
||||
throw std::runtime_error("torch.HalfTensors don't support scalar assignments");
|
||||
#endif
|
||||
} else {
|
||||
// TODO: try to do this without creating a temporary object
|
||||
THPTensorPtr tmp = (THPTensor*)THPTensor_(New)(tresult.release());
|
||||
@ -732,6 +690,10 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
|
||||
return -1;
|
||||
END_HANDLE_TH_ERRORS_RET(-1)
|
||||
}
|
||||
#undef THIndexTensor
|
||||
#undef THIndexTensor_
|
||||
#undef THPIndexTensor
|
||||
#undef THPIndexTensor_Check
|
||||
|
||||
Py_ssize_t THPTensor_(length)(THPTensor *self)
|
||||
{
|
||||
@ -847,11 +809,57 @@ PyTypeObject THPTensorStatelessType = {
|
||||
0, /* tp_weaklist */
|
||||
};
|
||||
|
||||
#ifndef TH_REAL_IS_HALF
|
||||
#include "SparseTensor.cpp"
|
||||
#endif
|
||||
|
||||
void THPTensor_(initCopyMethods)()
|
||||
{
|
||||
auto& h = THTensor_(copy_functions);
|
||||
// copy from CPU types
|
||||
THPInsertCopyFunction(h, &THTensor_(copyByte));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyChar));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyShort));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyInt));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyLong));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyFloat));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyHalf));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyDouble));
|
||||
#ifdef THC_GENERIC_FILE
|
||||
// copy from GPU types
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaByte));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaChar));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaShort));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaInt));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaLong));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaFloat));
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaDouble));
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
THPInsertCopyFunction(h, &THTensor_(copyCudaHalf));
|
||||
#endif
|
||||
THPInsertCopyFunction(h, &THCTensor_(copyAsyncCPU), true);
|
||||
// add CPU <- GPU copies to base type
|
||||
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
|
||||
extern THPCopyList THCpuTensor_(copy_functions);
|
||||
auto& b = THCpuTensor_(copy_functions);
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaByte));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaChar));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaShort));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaInt));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaLong));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaFloat));
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaDouble));
|
||||
#ifdef CUDA_HALF_TENSOR
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaHalf));
|
||||
#endif
|
||||
THPInsertCopyFunction(b, &THCpuTensor_(copyAsyncCuda), true);
|
||||
#undef THCpuTensor_
|
||||
#endif
|
||||
}
|
||||
|
||||
bool THPTensor_(init)(PyObject *module)
|
||||
{
|
||||
#ifndef THC_GENERIC_FILE
|
||||
#if !defined(THC_GENERIC_FILE) && !defined(TH_REAL_IS_HALF)
|
||||
THVector_(vectorDispatchInit)();
|
||||
#endif
|
||||
THPTensorType.tp_methods = THPTensor_(methods);
|
||||
@ -867,6 +875,20 @@ bool THPTensor_(init)(PyObject *module)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool THPTensor_(postInit)(PyObject *module)
|
||||
{
|
||||
THPTensorClass = PyObject_GetAttrString(module,(char*)TH_CONCAT_STRING_2(Real,Tensor));
|
||||
if (!THPTensorClass) return false;
|
||||
|
||||
bool is_cuda = false;
|
||||
#ifdef THC_GENERIC_FILE
|
||||
is_cuda = true;
|
||||
#endif
|
||||
const char *type_name = TH_CONCAT_STRING_2(Real,);
|
||||
torch::registerPyTypeObject((PyTypeObject*)THPTensorClass, type_name, is_cuda, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef NUMPY_TYPE_ENUM
|
||||
|
||||
#endif
|
||||
|
||||
@ -2,12 +2,18 @@
|
||||
#define TH_GENERIC_FILE "generic/Tensor.h"
|
||||
#else
|
||||
|
||||
#if defined(TH_REAL_IS_HALF) || defined(THD_GENERIC_FILE)
|
||||
#define GENERATE_SPARSE 0
|
||||
#else
|
||||
#define GENERATE_SPARSE 1
|
||||
#endif
|
||||
|
||||
struct THPTensor {
|
||||
PyObject_HEAD
|
||||
THTensor *cdata;
|
||||
};
|
||||
|
||||
#ifndef THD_GENERIC_FILE
|
||||
#if GENERATE_SPARSE
|
||||
struct THSPTensor {
|
||||
PyObject_HEAD
|
||||
THSTensor *cdata;
|
||||
@ -21,7 +27,7 @@ struct THSPTensor {
|
||||
* count is decremented.
|
||||
*/
|
||||
THP_API PyObject * THPTensor_(New)(THTensor *ptr);
|
||||
#ifndef THD_GENERIC_FILE
|
||||
#if GENERATE_SPARSE
|
||||
THP_API PyObject * THSPTensor_(New)(THSTensor *ptr);
|
||||
#endif
|
||||
|
||||
@ -29,12 +35,12 @@ THP_API PyObject * THSPTensor_(New)(THSTensor *ptr);
|
||||
* Creates a new empty Python Tensor object
|
||||
*/
|
||||
THP_API PyObject * THPTensor_(NewEmpty)(void);
|
||||
#ifndef THD_GENERIC_FILE
|
||||
#if GENERATE_SPARSE
|
||||
THP_API PyObject * THSPTensor_(NewEmpty)(void);
|
||||
#endif
|
||||
|
||||
extern PyObject *THPTensorClass;
|
||||
#ifndef THD_GENERIC_FILE
|
||||
#if GENERATE_SPARSE
|
||||
extern PyObject *THSPTensorClass;
|
||||
#endif
|
||||
|
||||
@ -43,12 +49,15 @@ extern PyObject *THSPTensorClass;
|
||||
|
||||
// TODO: init stateless in THPTensor_(init) and remove this
|
||||
extern PyTypeObject THPTensorStatelessType;
|
||||
#ifndef THD_GENERIC_FILE
|
||||
#if GENERATE_SPARSE
|
||||
extern PyTypeObject THSPTensorStatelessType;
|
||||
#endif
|
||||
|
||||
bool THPTensor_(init)(PyObject *module);
|
||||
#ifndef THD_GENERIC_FILE
|
||||
bool THPTensor_(postInit)(PyObject *module);
|
||||
#if GENERATE_SPARSE
|
||||
bool THSPTensor_(init)(PyObject *module);
|
||||
bool THSPTensor_(postInit)(PyObject *module);
|
||||
#endif
|
||||
|
||||
extern PyTypeObject THPTensorType;
|
||||
@ -58,4 +67,6 @@ template <> struct THPTypeInfo<THTensor> {
|
||||
};
|
||||
#endif
|
||||
|
||||
#undef GENERATE_SPARSE
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,34 +1,60 @@
|
||||
// Sparse Tensors not supported for CUDA
|
||||
#if IS_CUDA || !defined(TH_REAL_IS_HALF)
|
||||
PyObject * THSPTensor_(size)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
HANDLE_TH_ERRORS
|
||||
THSTensor* tensor = ((THSPTensor*)self)->cdata;
|
||||
if (PyTuple_Size(args) == 0 && (!kwargs || PyDict_Size(kwargs) == 0)) {
|
||||
return THPSize_New(tensor->nDimensionI + tensor->nDimensionV, tensor->size);
|
||||
}
|
||||
|
||||
int tuplecount = args ? PyTuple_Size(args) : 0;
|
||||
int dictcount = kwargs ? PyDict_Size(kwargs) : 0;
|
||||
|
||||
PyObject* pydim = NULL;
|
||||
if (tuplecount == 1 && dictcount == 0) {
|
||||
pydim = PyTuple_GET_ITEM(args, 0);
|
||||
} else if (dictcount == 1 && tuplecount == 0) {
|
||||
pydim = PyDict_GetItemString(kwargs, "dim");
|
||||
}
|
||||
|
||||
if (pydim && THPUtils_checkLong(pydim)) {
|
||||
int dim = (int)THPUtils_unpackLong(pydim);
|
||||
if (dim < 0)
|
||||
dim += tensor->nDimensionI + tensor->nDimensionV;
|
||||
return PyInt_FromLong(THSTensor_(size)(LIBRARY_STATE tensor, dim));
|
||||
}
|
||||
|
||||
THPUtils_invalidArguments(args, kwargs, "size", 2, "(int dim)", "no arguments");
|
||||
return NULL;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
[[
|
||||
name: size
|
||||
defined_if: "!IS_CUDA"
|
||||
name: THSPTensor_(size)
|
||||
python_name: size
|
||||
method_flags: METH_KEYWORDS
|
||||
only_register: True
|
||||
sparse: yes
|
||||
options:
|
||||
- return: long
|
||||
cname: size
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- long dim
|
||||
- return: THLongStorage*
|
||||
cname: newSizeOf
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
#endif
|
||||
|
||||
[[
|
||||
name: nDimension
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
python_name: ndimension
|
||||
return: long
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
[[
|
||||
name: THPTensor_(nDimension)
|
||||
python_name: dim
|
||||
only_register: True
|
||||
method_flags: METH_KEYWORDS
|
||||
sparse: yes
|
||||
]]
|
||||
|
||||
[[
|
||||
name: nnz
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
return: long
|
||||
arguments:
|
||||
@ -37,7 +63,6 @@
|
||||
|
||||
[[
|
||||
name: isContiguous
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
python_name: is_contiguous
|
||||
return: bool
|
||||
@ -54,9 +79,18 @@
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: indices
|
||||
defined_if: "IS_CUDA"
|
||||
sparse: yes
|
||||
return: THCudaLongTensor*
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
|
||||
[[
|
||||
name: values
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
return: THTensor*
|
||||
arguments:
|
||||
@ -65,16 +99,23 @@
|
||||
|
||||
[[
|
||||
name: contiguous
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: clone
|
||||
sparse: yes
|
||||
cname: newClone
|
||||
return: THSTensor*
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: toDense
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
python_name: to_dense
|
||||
return: THTensor*
|
||||
@ -82,9 +123,19 @@
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: resizeAs_
|
||||
python_name: resize_as_
|
||||
sparse: yes
|
||||
cname: resizeAs
|
||||
return: self
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* template
|
||||
]]
|
||||
|
||||
[[
|
||||
name: transpose
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
cname: newTranspose
|
||||
return: THSTensor*
|
||||
@ -96,7 +147,6 @@
|
||||
|
||||
[[
|
||||
name: transpose_
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
cname: transpose
|
||||
return: argument 0
|
||||
@ -108,7 +158,6 @@
|
||||
|
||||
[[
|
||||
name: mm
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
only_stateless: True
|
||||
cname: spaddmm
|
||||
@ -127,9 +176,29 @@
|
||||
- THTensor* mat2
|
||||
]]
|
||||
|
||||
[[
|
||||
name: spmm
|
||||
only_stateless: True
|
||||
sparse: yes
|
||||
cname: spaddmm
|
||||
return: argument 0
|
||||
before_call: |
|
||||
long s1 = THSTensor_(size)(LIBRARY_STATE ((THSPTensor*)$arg4)->cdata, 0);
|
||||
long s2 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg5)->cdata, 1);
|
||||
THTensor_(resize2d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s1, s2);
|
||||
THTensor_(zero)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata);
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- CONSTANT AS_REAL(0)
|
||||
- argument 0
|
||||
- CONSTANT AS_REAL(1)
|
||||
- THSTensor* mat1
|
||||
- THTensor* mat2
|
||||
]]
|
||||
|
||||
[[
|
||||
name: sspmm
|
||||
defined_if: "!IS_CUDA"
|
||||
only_stateless: True
|
||||
sparse: yes
|
||||
cname: sspaddmm
|
||||
@ -150,7 +219,6 @@
|
||||
|
||||
[[
|
||||
name: sspaddmm
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
@ -168,7 +236,6 @@
|
||||
|
||||
[[
|
||||
name: spadd
|
||||
defined_if: "!IS_CUDA"
|
||||
sparse: yes
|
||||
cname: spcadd
|
||||
with_stateless: True
|
||||
@ -182,3 +249,139 @@
|
||||
- THSTensor* mat2
|
||||
]]
|
||||
|
||||
[[
|
||||
name: zero_
|
||||
sparse: yes
|
||||
cname: zero
|
||||
return: self
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: add
|
||||
sparse: yes
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
cname: cadd
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THSTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: add_
|
||||
sparse: yes
|
||||
return: argument 0
|
||||
cname: cadd
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: sub
|
||||
sparse: yes
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
cname: csub
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THSTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: sub_
|
||||
sparse: yes
|
||||
return: argument 0
|
||||
cname: csub
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: mul
|
||||
sparse: yes
|
||||
return: argument 0
|
||||
with_stateless: True
|
||||
options:
|
||||
- cname: mul
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THSTensor* self
|
||||
- real value
|
||||
- cname: cmul
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THSTensor* self
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: mul_
|
||||
sparse: yes
|
||||
return: argument 0
|
||||
options:
|
||||
- cname: mul
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* self
|
||||
- real value
|
||||
- cname: cmul
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* self
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
name: div
|
||||
sparse: yes
|
||||
cname: div
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THSTensor* self
|
||||
- real value
|
||||
]]
|
||||
|
||||
[[
|
||||
name: div_
|
||||
sparse: yes
|
||||
cname: div
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THSTensor* self
|
||||
- THSTensor* self
|
||||
- real value
|
||||
]]
|
||||
|
||||
[[
|
||||
name: sparse_mask
|
||||
cname: sparseMask
|
||||
return: argument 0
|
||||
arguments:
|
||||
- arg: THSTensor* result
|
||||
output: True
|
||||
- THTensor* self
|
||||
- THSTensor* mask
|
||||
]]
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
[[
|
||||
name: THPTensor_(elementSize)
|
||||
python_name: element_size
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
]]
|
||||
static PyObject * THPTensor_(elementSize)(THPTensor *self, PyObject *args)
|
||||
@ -13,6 +14,7 @@ static PyObject * THPTensor_(elementSize)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: THPTensor_(storage)
|
||||
python_name: storage
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
]]
|
||||
static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
|
||||
@ -31,6 +33,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: storageOffset
|
||||
python_name: storage_offset
|
||||
cpu_half: True
|
||||
return: long
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -39,6 +42,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: nDimension
|
||||
python_name: ndimension
|
||||
cpu_half: True
|
||||
return: long
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -46,6 +50,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: THPTensor_(nDimension)
|
||||
python_name: dim
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
method_flags: METH_KEYWORDS
|
||||
]]
|
||||
@ -75,6 +80,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
|
||||
name: resize_
|
||||
return: self
|
||||
cname: resize
|
||||
cpu_half: True
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- arg: THSize* size
|
||||
@ -107,6 +113,8 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: numel
|
||||
return: long
|
||||
cname: nElement
|
||||
cpu_half: True
|
||||
with_stateless: True
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -114,6 +122,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: THPTensor_(numel)
|
||||
python_name: nelement
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
method_flags: METH_KEYWORDS
|
||||
]]
|
||||
@ -121,6 +130,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: set_
|
||||
cname: set
|
||||
cpu_half: True
|
||||
return: argument 0
|
||||
options:
|
||||
- cname: set
|
||||
@ -159,6 +169,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
|
||||
[[
|
||||
name: THPTensor_(select)
|
||||
python_name: select
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
]]
|
||||
static PyObject * THPTensor_(select)(THPTensor *self, PyObject *args)
|
||||
@ -213,6 +224,7 @@ PyObject * THPTensor_(size)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
[[
|
||||
name: THPTensor_(size)
|
||||
python_name: size
|
||||
cpu_half: True
|
||||
method_flags: METH_KEYWORDS
|
||||
only_register: True
|
||||
]]
|
||||
@ -253,6 +265,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
[[
|
||||
name: THPTensor_(stride)
|
||||
python_name: stride
|
||||
cpu_half: True
|
||||
method_flags: METH_KEYWORDS
|
||||
only_register: True
|
||||
]]
|
||||
@ -269,6 +282,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
[[
|
||||
name: isSameSizeAs
|
||||
python_name: is_same_size
|
||||
cpu_half: True
|
||||
return: bool
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -278,6 +292,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
[[
|
||||
name: isContiguous
|
||||
python_name: is_contiguous
|
||||
cpu_half: True
|
||||
return: bool
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -286,6 +301,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
[[
|
||||
name: isSetTo
|
||||
python_name: is_set_to
|
||||
cpu_half: True
|
||||
return: bool
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -326,20 +342,42 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- THBoolTensor* mask
|
||||
]]
|
||||
|
||||
#if IS_CUDA
|
||||
THTensor* THTensor_(transpose_neg)(THCState* state, THTensor *self, THTensor *src, int dim0, int dim1)
|
||||
#else
|
||||
THTensor* THTensor_(transpose_neg)(THTensor *self, THTensor *src, int dim0, int dim1)
|
||||
#endif
|
||||
{
|
||||
int ndim = self->nDimension;
|
||||
if (dim0 < 0)
|
||||
dim0 += ndim;
|
||||
if (dim1 < 0)
|
||||
dim1 += ndim;
|
||||
if (src != NULL) {
|
||||
THTensor_(transpose)(LIBRARY_STATE self, src, dim0, dim1);
|
||||
return NULL;
|
||||
} else {
|
||||
return THTensor_(newTranspose)(LIBRARY_STATE self, dim0, dim1);
|
||||
}
|
||||
}
|
||||
|
||||
[[
|
||||
name: transpose
|
||||
with_stateless: True
|
||||
cname: newTranspose
|
||||
cname: transpose_neg
|
||||
cpu_half: True
|
||||
return: THTensor*
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- CONSTANT NULL
|
||||
- long dim0
|
||||
- long dim1
|
||||
]]
|
||||
|
||||
[[
|
||||
name: transpose_
|
||||
cname: transpose
|
||||
cname: transpose_neg
|
||||
cpu_half: True
|
||||
return: self
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -378,6 +416,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
|
||||
[[
|
||||
name: squeeze
|
||||
cpu_half: True
|
||||
with_stateless: True
|
||||
return: argument 0
|
||||
options:
|
||||
@ -395,6 +434,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
|
||||
[[
|
||||
name: squeeze_
|
||||
cpu_half: True
|
||||
return: self
|
||||
options:
|
||||
- cname: squeeze
|
||||
@ -408,6 +448,30 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: unsqueeze
|
||||
with_stateless: True
|
||||
cpu_half: True
|
||||
return: argument 0
|
||||
cname: unsqueeze1d
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- THTensor* self
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: unsqueeze_
|
||||
cpu_half: True
|
||||
return: self
|
||||
cname: unsqueeze1d
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THTensor* self
|
||||
- long dim
|
||||
]]
|
||||
|
||||
[[
|
||||
name: nonzero
|
||||
with_stateless: True
|
||||
@ -434,6 +498,16 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
- THTensor* self
|
||||
]]
|
||||
|
||||
[[
|
||||
name: view
|
||||
cname: newView
|
||||
return: THTensor*
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- arg: THSize* size
|
||||
long_args: True
|
||||
]]
|
||||
|
||||
[[
|
||||
name: resizeAs_
|
||||
python_name: resize_as_
|
||||
@ -495,6 +569,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
|
||||
[[
|
||||
name: narrow
|
||||
cpu_half: True
|
||||
return: argument 0
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
@ -507,6 +582,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
|
||||
[[
|
||||
name: unfold
|
||||
cpu_half: True
|
||||
return: argument 0
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
@ -570,12 +646,13 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
only_register: True
|
||||
only_stateless: True
|
||||
]]
|
||||
#ifndef TH_REAL_IS_HALF
|
||||
static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
|
||||
{
|
||||
#if IS_CUDA && THCP_AUTO_GPU
|
||||
THCPAutoGPU __autogpu_guard = THCPAutoGPU(args);
|
||||
#endif
|
||||
HANDLE_TH_ERRORS
|
||||
#if IS_CUDA
|
||||
THCPAutoGPU __autogpu_guard(-1);
|
||||
#endif
|
||||
Py_ssize_t _argcount = args ? PyTuple_Size(args) : 0;
|
||||
std::vector<THPObjectPtr> items;
|
||||
std::vector<THTensor *> item_tensors;
|
||||
@ -608,6 +685,10 @@ static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
|
||||
dimension = 0;
|
||||
}
|
||||
|
||||
#if IS_CUDA
|
||||
__autogpu_guard.setDevice(THTensor_(getDevice)(LIBRARY_STATE item_tensors[0]));
|
||||
#endif
|
||||
|
||||
result = (THPTensor *)THPTensor_(NewEmpty)();
|
||||
if (!result) return NULL;
|
||||
|
||||
@ -622,10 +703,12 @@ invalid_arguments:
|
||||
return NULL;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
#endif
|
||||
|
||||
[[
|
||||
name: data_ptr
|
||||
return: void*
|
||||
cpu_half: True
|
||||
cname: data
|
||||
arguments:
|
||||
- THTensor* self
|
||||
@ -643,6 +726,7 @@ invalid_arguments:
|
||||
[[
|
||||
python_name: copy_
|
||||
name: THPTensor_(copy_)
|
||||
cpu_half: True
|
||||
method_flags: METH_KEYWORDS
|
||||
only_register: True
|
||||
]]
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
name: THPTensor_(apply)
|
||||
python_name: apply_
|
||||
defined_if: "!IS_CUDA"
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
override_method_flags: METH_O
|
||||
]]
|
||||
@ -43,6 +44,7 @@ static PyObject * THPTensor_(apply)(THPTensor *self, PyObject *arg)
|
||||
name: THPTensor_(map)
|
||||
python_name: map_
|
||||
defined_if: "!IS_CUDA"
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
]]
|
||||
static PyObject * THPTensor_(map)(THPTensor *self, PyObject *args)
|
||||
@ -78,6 +80,7 @@ static PyObject * THPTensor_(map)(THPTensor *self, PyObject *args)
|
||||
name: THPTensor_(map2)
|
||||
python_name: map2_
|
||||
defined_if: "!IS_CUDA"
|
||||
cpu_half: True
|
||||
only_register: True
|
||||
]]
|
||||
static PyObject * THPTensor_(map2)(THPTensor *self, PyObject *args)
|
||||
|
||||
@ -906,10 +906,10 @@
|
||||
arguments:
|
||||
- arg: THTensor* result
|
||||
output: True
|
||||
- THTensor* mat1
|
||||
- THTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* mat2
|
||||
- THSTensor* other
|
||||
]]
|
||||
|
||||
[[
|
||||
@ -1567,15 +1567,25 @@
|
||||
|
||||
[[
|
||||
name: addcmul_
|
||||
cname: addcmul
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THTensor* tensor1
|
||||
- THTensor* tensor2
|
||||
options:
|
||||
- cname: addcmul
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THTensor* tensor1
|
||||
- THTensor* tensor2
|
||||
- cname: spaddcmul
|
||||
return: argument 0
|
||||
arguments:
|
||||
- THTensor* self
|
||||
- THTensor* self
|
||||
- arg: real value
|
||||
default: AS_REAL(1)
|
||||
- THSTensor* tensor1
|
||||
- THSTensor* tensor2
|
||||
]]
|
||||
|
||||
[[
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
- long n
|
||||
]]
|
||||
|
||||
#if !IS_CUDA
|
||||
#if !defined(TH_REAL_IS_HALF) && !IS_CUDA
|
||||
static void THTensor_(random2__)(THTensor *self, THGenerator *gen, long a, long b)
|
||||
{
|
||||
THArgCheck(b >= a, 2, "upper bound must be greater or equal than lower bound");
|
||||
|
||||
@ -52,6 +52,10 @@ PyObject * THPTensor_(toNumpy)(THPTensor *self, PyObject *args) {
|
||||
#if !defined(WITH_NUMPY)
|
||||
THPUtils_setError("PyTorch was compiled without numpy support\n");
|
||||
return NULL;
|
||||
#elif defined(THC_GENERIC_FILE)
|
||||
THPUtils_setError("can't convert CUDA tensor to numpy (it doesn't support GPU arrays). "
|
||||
"Use .cpu() to move the tensor to host memory first.");
|
||||
return NULL;
|
||||
#elif !defined(NUMPY_TYPE_ENUM)
|
||||
THPUtils_setError("numpy conversion for %s is not supported\n", THPUtils_typename(self));
|
||||
return NULL;
|
||||
|
||||
@ -29,22 +29,35 @@ THTensor * THPTensor_(newWithMetadataFileRaw)(int fd, THStorage *storage)
|
||||
void THPStorage_(writeFileRaw)(THStorage *self, int fd)
|
||||
{
|
||||
real *data;
|
||||
int64_t size = self->size;
|
||||
#ifndef THC_GENERIC_FILE
|
||||
data = self->data;
|
||||
#else
|
||||
std::unique_ptr<char[]> cpu_data(new char[self->size * sizeof(real)]);
|
||||
std::unique_ptr<char[]> cpu_data(new char[size * sizeof(real)]);
|
||||
data = (real*)cpu_data.get();
|
||||
THCudaCheck(cudaMemcpy(data, self->data, self->size * sizeof(real), cudaMemcpyDeviceToHost));
|
||||
THCudaCheck(cudaMemcpy(data, self->data, size * sizeof(real), cudaMemcpyDeviceToHost));
|
||||
#endif
|
||||
SYSCHECK(write(fd, &self->size, sizeof(long)));
|
||||
ssize_t result = write(fd, &size, sizeof(int64_t));
|
||||
if (result != sizeof(int64_t))
|
||||
throw std::system_error(result, std::system_category());
|
||||
// fast track for bytes and little endian
|
||||
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
|
||||
SYSCHECK(write(fd, data, sizeof(real) * self->size));
|
||||
char *bytes = (char *) data;
|
||||
int64_t remaining = sizeof(real) * size;
|
||||
while (remaining > 0) {
|
||||
ssize_t result = write(fd, bytes, remaining);
|
||||
if (result < 0)
|
||||
throw std::system_error(result, std::system_category());
|
||||
bytes += result;
|
||||
remaining -= result;
|
||||
}
|
||||
if (remaining != 0)
|
||||
throw std::system_error(result, std::system_category());
|
||||
} else {
|
||||
long buffer_size = std::min(self->size, (long)5000);
|
||||
int64_t buffer_size = std::min(size, (int64_t)5000);
|
||||
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
|
||||
for (long i = 0; i < self->size; i += buffer_size) {
|
||||
size_t to_convert = std::min(self->size - i, buffer_size);
|
||||
for (int64_t i = 0; i < size; i += buffer_size) {
|
||||
size_t to_convert = std::min(size - i, buffer_size);
|
||||
if (sizeof(real) == 2) {
|
||||
THP_encodeInt16Buffer((uint8_t*)le_buffer.get(),
|
||||
(const int16_t*)data + i,
|
||||
@ -61,17 +74,27 @@ void THPStorage_(writeFileRaw)(THStorage *self, int fd)
|
||||
THPByteOrder::THP_LITTLE_ENDIAN,
|
||||
to_convert);
|
||||
}
|
||||
SYSCHECK(write(fd, data, to_convert * sizeof(real)));
|
||||
SYSCHECK(write(fd, le_buffer.get(), to_convert * sizeof(real)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THStorage * THPStorage_(readFileRaw)(int fd)
|
||||
THStorage * THPStorage_(readFileRaw)(int fd, THStorage *_storage)
|
||||
{
|
||||
real *data;
|
||||
long size;
|
||||
SYSCHECK(read(fd, &size, sizeof(long)));
|
||||
THStoragePtr storage = THStorage_(newWithSize)(LIBRARY_STATE size);
|
||||
int64_t size;
|
||||
ssize_t result = read(fd, &size, sizeof(int64_t));
|
||||
if (result != sizeof(int64_t))
|
||||
throw std::system_error(result, std::system_category());
|
||||
THStoragePtr storage;
|
||||
if (_storage == nullptr) {
|
||||
storage = THStorage_(newWithSize)(LIBRARY_STATE size);
|
||||
} else {
|
||||
THPUtils_assert(_storage->size == size,
|
||||
"storage has wrong size: expected %ld got %ld",
|
||||
size, _storage->size);
|
||||
storage = _storage;
|
||||
}
|
||||
|
||||
#ifndef THC_GENERIC_FILE
|
||||
data = storage->data;
|
||||
@ -82,11 +105,21 @@ THStorage * THPStorage_(readFileRaw)(int fd)
|
||||
|
||||
// fast track for bytes and little endian
|
||||
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
|
||||
SYSCHECK(read(fd, data, sizeof(real) * storage->size));
|
||||
char *bytes = (char *) data;
|
||||
int64_t remaining = sizeof(real) * storage->size;
|
||||
while (remaining > 0) {
|
||||
ssize_t result = read(fd, bytes, remaining);
|
||||
if (result <= 0) // 0 means EOF, which is also an error
|
||||
throw std::system_error(result, std::system_category());
|
||||
bytes += result;
|
||||
remaining -= result;
|
||||
}
|
||||
if (remaining != 0)
|
||||
throw std::system_error(result, std::system_category());
|
||||
} else {
|
||||
long buffer_size = std::min(size, (long)5000);
|
||||
int64_t buffer_size = std::min(size, (int64_t)5000);
|
||||
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
|
||||
for (long i = 0; i < size; i += buffer_size) {
|
||||
for (int64_t i = 0; i < size; i += buffer_size) {
|
||||
size_t to_convert = std::min(size - i, buffer_size);
|
||||
SYSCHECK(read(fd, le_buffer.get(), sizeof(real) * to_convert));
|
||||
if (sizeof(real) == 2) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user