Compare commits

..

276 Commits

Author SHA1 Message Date
ac9245aeb3 import numpy before setting dlopen flags (#928) 2017-03-05 14:30:13 -05:00
60736bdf99 fix corner case in kwargs for DataParallel (#930) 2017-03-05 14:27:52 -05:00
7d58765cee docs: Fixed example code bug in extending module doc. 2017-03-05 12:09:08 -05:00
76f7d749e4 bump version 2017-03-05 08:49:52 -08:00
0b7374eb44 add THCS to build_all flags 2017-03-05 11:32:43 -05:00
6fff764155 replace old select_compute_arch.cmake with new 2017-03-05 11:32:43 -05:00
8ced72ccb8 link THPP to THCS when CUDA available 2017-03-05 11:32:43 -05:00
b1ae7f90d5 Added functionality for data parallel table (#843) 2017-03-05 02:35:46 +01:00
8b61ee522e Merge commit 'aec182ae72d51dad0f46cdfe7ff9a41380d7da35' 2017-03-04 08:58:21 -08:00
76ca3eb191 Merge commit 'fea50a51ee2d9af15c42f785ab2232469357b557' 2017-03-04 08:58:02 -08:00
fea50a51ee reintroduce USE_AVX* for files which dont have -mavx* set 2017-03-04 08:55:43 -08:00
51e589ed73 fix critical bug in adds SSE implementation 2017-03-04 08:39:19 -08:00
2e87643761 remove fastmath for everything except simd/convolve 2017-03-04 08:16:47 -08:00
ba9a85f271 fix bug introduced in #952 2017-03-03 21:00:05 -08:00
0714d7a3ca set AVX/AVX2 flags only for specific files 2017-03-03 12:17:14 -08:00
c238ee3681 Fix issues with lazy grad initialization (#912) 2017-03-03 14:23:51 -05:00
f5338a1fb8 compile AVX and AVX2 intrinsic code in separate files. Cleanup use of USE_AVX and USE_AVX2 macros in favor of __AVX__ and __AVX2__ 2017-03-03 10:30:18 -08:00
d96ad41191 cleanup TH CMakeLists and THGeneral.h of unused flags 2017-03-03 09:48:26 -08:00
f17cfe4293 sparse tensor operations (#735) 2017-03-03 18:37:03 +01:00
aec182ae72 Support half precision in baddbmm 2017-03-03 16:15:39 +01:00
c93c884ee2 Add negative dimension to transpose and tests (#792) 2017-03-03 09:31:22 -05:00
c42a2d4d24 Fix dimension check for cat (#959)
* Use TH_INDEX_BASE when verifying dimension for cat

* Adding tests for cat when no dimension is specified.

- Also renamed ldimension to cat_dimension to be more specific.
2017-03-03 09:05:06 -05:00
f89252c336 Merge pull request #719 from twitter-forks/cat-fix
Fixes to cat
2017-03-03 09:04:06 -05:00
490c15fae9 Fix slicing with step (#905) 2017-03-03 09:00:14 -05:00
f2d72ba10f Revert "make handles to be thread-local"
This reverts commit 0720ba53b344809ce3d0bdfb1ea561afa5fe0646.
2017-03-02 17:48:24 -08:00
2108b42b92 Fix bug in cat when dimension is not specified.
- Code was using dimension specified which was negative
- Changed the cat_dimension variable to be more explicit
- Fixed code to use the cat_dimension variable
2017-03-02 16:14:09 -08:00
bae8df62d3 Add missing THCudaCheck around cudaMemcpy 2017-03-02 16:13:39 -08:00
98775b6bb4 Merge pull request #718 from killeent/templatize-scan
genericize PrefixSum --> PrefixScan via binary operator template parameter
2017-03-02 17:50:56 -05:00
b7cc2a501f genericize PrefixSum --> prefixScan 2017-03-02 14:31:27 -08:00
0720ba53b3 make handles to be thread-local 2017-03-02 11:10:49 -08:00
ff5fa11129 make mkl link to threaded version with GCC (#958) 2017-03-02 13:37:25 -05:00
5e7f5db332 add subset samplers (#888) 2017-03-02 09:26:10 -05:00
b5f7592140 boolean mode in module.train 2017-03-02 09:18:05 -05:00
f366e5fc81 Support int16 numpy conversions
issue #891
2017-03-02 09:15:57 -05:00
48f087f6ce C99 cleanup broke MSVC (#952)
* __pragma for MSVC.
2017-03-02 08:57:28 -05:00
7ad948ffa9 fix tests to not sys.exit(), also fix fatal error on THC initialization 2017-03-01 17:37:04 -05:00
3277d83648 Add Nesterov Momentum (#887) 2017-03-01 20:49:59 +01:00
1487278fdf Allow backprop through cuDNN RNN in eval mode
Handling of dropout descriptors has been improved too.
2017-03-01 19:42:39 +01:00
977630bc15 Handle duplicate backward roots in autograd 2017-03-01 19:42:39 +01:00
12efd53dba ConstantPad2d and F.pad (#856) 2017-03-01 19:39:44 +01:00
37e05485d9 added initialization schemes in torch.nn.init (#833) 2017-03-01 19:34:13 +01:00
c76770f40e Merge commit 'dfca8dfdc5988813ed5673589ffa4fdd1c4f3d2d' 2017-03-01 09:29:51 -08:00
da725830c2 Add support for variable length sequences in RNNs (#873) 2017-03-01 17:36:32 +01:00
fc6fcf23f7 Lock the cudaFree mutex. (#880)
Prevents NCCL calls from overlapping with cudaFree() which can lead to
deadlocks.
2017-03-01 11:29:25 -05:00
b190f1b5bc Add another pinned memory test.
Checks that pinned memory freed on a different GPU from which it was
allocated isn't re-used too soon.
2017-03-01 12:22:31 +01:00
dfca8dfdc5 ensure valid index in multinomial 2017-02-28 14:48:48 -08:00
b46d5e0b04 Fix NN bindings 2017-02-28 14:35:38 -08:00
f19a11a306 Merge commit '8e8022b7351401911e10b94aeb5ae35d32907705' 2017-02-28 14:35:20 -08:00
cfcf69703f Merge commit '80429ad9f7c4775f7f88344a2cf037e499f060b8' 2017-02-28 14:35:00 -08:00
e22b8e0d17 Merge commit '3cc89afde68a831434f3abe9e3af2ac0b134215e' 2017-02-28 14:34:44 -08:00
fbfba6bdca Merge commit '6ff77503645da59eeca5be473a1902e523c4adb3' 2017-02-28 14:34:29 -08:00
3cc89afde6 Merge pull request #713 from killeent/multinomial-indexing-fix
fix indexing bug in sampleMultinomialOnce
2017-02-28 17:13:44 -05:00
1e4aee057c Merge pull request #712 from killeent/multinomial-fixes
Fix sampleMultinomialOnce to better handle large distribution values
2017-02-28 17:12:48 -05:00
8dfcf7e35a Merge pull request #709 from colesbury/pinned_memory
Fix bug where pinned memory event could be recorded on incorrect device
2017-02-28 16:56:21 -05:00
76de151ddd Fix bug where pinned memory event could be recorded on incorrect device 2017-02-28 13:48:56 -08:00
2676cc46c2 fix indexing bug in sampleMultinomialOnce 2017-02-28 13:40:15 -08:00
1bf7bc9768 refactor sampleMultinomialOnce to use <real, accreal>, assertion for sum overflow 2017-02-28 12:46:12 -08:00
3c41c9fe46 Add AutoGPU RAII that doesn't depend on Python API (#875)
Separates out non-Python part of AutoGPU. This also compiles without
CUDA which is useful for generic tensor code.

Also fixes a bug where THCPAutoGPU may not always switch the device:

  THCPAutoGPU guard(-1);
  guard.setDevice(0);
  guard.setDevice(1);
  guard.setDevice(0);  // would not switch batch to 0
2017-02-28 14:39:20 -05:00
6ff7750364 add TH_TENSOR_APPLY variants for optimized redux (+refactor) 2017-02-28 10:30:31 -08:00
4d25c3d048 address comments and add tests 2017-02-28 10:23:36 -08:00
267b7ade50 Speed up reductions on non-contiguous dimensions 2017-02-28 10:23:36 -08:00
80429ad9f7 THVector_(add) -> THVector_(adds) 2017-02-28 12:20:44 -05:00
5ca6516ecb THVector_(add),(mul),(div) -> (adds),(muls),(divs) 2017-02-28 12:10:47 -05:00
67f94557ff Expose torch.HalfTensor 2017-02-27 19:35:47 -05:00
61bd5a0643 [Lint] Address F811 2017-02-27 19:33:00 -05:00
748d011c8b [Lint] Address F812 2017-02-27 19:33:00 -05:00
5d5cfe2e57 [Lint] Address E731 2017-02-27 19:33:00 -05:00
7cbe255296 [Lint] Use flake8 instead of pep8 2017-02-27 19:33:00 -05:00
4ef303698c Merge pull request #711 from gchanan/getDeviceAllocator
Add getter for cuda device allocator.
2017-02-27 19:29:39 -05:00
83e8b3f6c3 Add getter for cuda device allocator. 2017-02-27 15:44:44 -08:00
502ebed796 Fix one more reference cycle and ensure correct flag propagation (#868) 2017-02-27 18:38:29 -05:00
68ff58d771 Expose a mutex that is held around cudaFree() calls.
NCCL can deadlock if cudaFree() is called while it's launching kernels.
This exposes a mutex that can be held to prevent cudaFree() calls in the
caching allocator.
2017-02-27 15:08:30 -08:00
969c1602e6 Add Tensor::copy() to THPP
For now, this only supports copying from the same type. We can add
polymorphic copying in the future.
2017-02-27 21:33:40 +01:00
5e1d6a3691 Update functional.py (#862)
Fixed documentation error in conv3d
2017-02-27 10:42:02 -05:00
533cfc0381 Minor fix of docs of ModuleList and ParameterList (#861) 2017-02-27 10:09:54 +01:00
2b23712dc3 Improve autograd memory usage (#859) 2017-02-26 22:37:26 -05:00
88275da5e8 CUDA documentation tweaks (#858) 2017-02-26 20:37:43 +01:00
bd7a5ad6f0 Make Optimizer.load_state_dict use __setstate__ 2017-02-26 20:02:42 +01:00
1f6f82dbcf Fall back to indexing compatible with numpy 2017-02-26 20:02:42 +01:00
1f8939937a Allow using expand to broadcast tensors 2017-02-26 20:02:42 +01:00
b3d41a5f96 Add docs for ModuleList and ParameterList 2017-02-26 20:02:42 +01:00
fec2d493a9 Reshape grad_output in basic ops 2017-02-26 20:02:42 +01:00
86ee75f63f Fix for Long and Byte tensor indexing of Variables 2017-02-26 20:02:42 +01:00
31941918cf Prevent creation of reference cycles with leaf Variables that don't require grad
Also, raise an error immediately, if a leaf that requiers_grad is
modified in-place. Some comments were updated too.
2017-02-26 20:02:42 +01:00
19a65d2bea Expose stateless methods for torch.cuda.HalfTensor 2017-02-26 20:02:42 +01:00
819d4b2b83 Add finite differences gradcheck (#851) 2017-02-26 08:35:24 -05:00
b87c113cf4 CUDA documentation enhancement and docs versioning (#848)
* Add more detail to CUDA documentation

Also adds better cross-linking to the pages that discuss relevant topics.

* Adds recommendation to torch.save docs

* Make the version numbers for the docs dynamic

Might need tweaks for beta, 1.0, etc.
2017-02-26 08:33:26 -05:00
b25182971f readme change for getting clarity on binaries 2017-02-26 07:52:13 -05:00
1ee2c47e37 Correcting the description of LSTM attributes (#854) 2017-02-26 13:30:55 +01:00
2dc563f1f1 Fix indexing when passing only an Ellipsis 2017-02-25 23:34:09 +01:00
15ba71a275 Rebase fixes 2017-02-25 17:14:52 +01:00
e5b3fc49d6 Implementation of the 3rd set of tensor functions 2017-02-25 17:14:52 +01:00
ae1766951d Link TH and THPP to THD (#57)
* Fix THD library build

* THPP dependency added

* Minor cleanup; Fix build on OSX
2017-02-25 17:14:52 +01:00
02d08dafd9 Add support for IPv6 in Data Channel TCP (#53) 2017-02-25 17:14:52 +01:00
13a5090695 Added a size change in MaxPool1d module and improved tests (#771) (#832)
Backend is SpatialDilatedMaxPooling, so change 3D input (N*C*L)
to 4D size (N*C*1*L). Then output indices will range from 0 to L.
This range will not cause UnMaxPool1D error.

Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-25 08:53:30 -05:00
8e32e4c04c make wrap_generic_function importable 2017-02-24 14:27:54 -08:00
cf991310c3 c++ virtual function fix 2017-02-24 13:22:44 -08:00
938706099e adding environment flags to disable SIMD codepaths 2017-02-24 07:35:11 -05:00
3330287dc7 Update dataloader.py (#837) 2017-02-23 14:38:41 -05:00
38c8520adf adding unsqueeze to docs 2017-02-23 12:13:25 -05:00
492e1746af Fix THFree in THTensorApply 2017-02-23 06:01:13 -05:00
91a8109cfd Use C99 for openmp cleanup 2017-02-23 06:01:13 -05:00
161490d34a Add memcpy copy 2017-02-23 06:01:13 -05:00
9c302852eb comments fix 2017-02-23 06:01:13 -05:00
8654fcfd60 THVectorDefault style fix 2017-02-23 06:01:13 -05:00
b3d527d9a0 Tab style fix 2017-02-23 06:01:13 -05:00
4d495218c9 THTensorApply3 contiguous optimizations 2017-02-23 06:01:13 -05:00
13a041284c THTensorApply2 copy optimization 2017-02-23 06:01:13 -05:00
c60c1a003d TH_TENSOR_APPLY2 contiguous optimization 2017-02-23 06:01:13 -05:00
97add1a5ea comment fix 2017-02-23 06:01:13 -05:00
ca02930e47 Fill bug fix 2017-02-23 06:01:13 -05:00
20d5e95077 THTensorApply3 compress counter 2017-02-23 06:01:13 -05:00
eb4a7dc11d THTensorApply change dims to sizes 2017-02-23 06:01:13 -05:00
f722498b72 THTensorApply2 counter compress 2017-02-23 06:01:13 -05:00
aadfb6fe83 THTensorApply reduce memory overhead 2017-02-23 06:01:13 -05:00
6c273594c9 THTensorApply Counter compress 2017-02-23 06:01:13 -05:00
e475c82fa1 Add isTransposed judge and enable multithread of fill functions 2017-02-23 06:01:09 -05:00
0c2e6665df Add AVX copy 2017-02-23 05:50:34 -05:00
6295e6e94b Rebase master 2017-02-23 05:50:34 -05:00
670a4aa708 Fix AVX2 bugs 2017-02-23 05:50:34 -05:00
1bdc2e64ed Add fma cadd 2017-02-23 05:50:34 -05:00
c587be1e50 Add THVector Fill 2017-02-23 05:50:34 -05:00
bd481596f5 optimize THVector add mul div 2017-02-23 05:50:34 -05:00
a504d56b43 Fix THVector cmul AVX bug 2017-02-23 05:50:30 -05:00
91c4dfccea Use THVector cadd AVX 2017-02-23 05:46:44 -05:00
27f618c44d Add THVector Fill AVX 2017-02-23 05:46:44 -05:00
a14482a1df Add THVector cadd AVX 2017-02-23 05:46:40 -05:00
aa50c5734b Add THVector AVX cmul 2017-02-23 05:46:07 -05:00
293001a4fe Add THVector SSE div cdiv 2017-02-23 05:46:07 -05:00
638cfdf150 Add SSE add 2017-02-23 05:46:07 -05:00
5f80a14525 Separate SSE and AVX 2017-02-23 05:46:07 -05:00
1342fd3975 Remove THTensorMathSIMD THTensorMathDispatch 2017-02-23 05:46:07 -05:00
8d4af38489 Add THVector div cdiv 2017-02-23 05:46:07 -05:00
575a064e66 Remove THVector diff 2017-02-23 05:46:07 -05:00
3ab21a3c4f Merge THVector mul AVX 2017-02-23 05:46:07 -05:00
2f592e6c7d Remove THVector scale 2017-02-23 05:46:07 -05:00
5661ffb766 Merge THVector mul 2017-02-23 05:46:03 -05:00
9b74503daa Merge THVector cmul 2017-02-23 05:40:33 -05:00
24848f1cd8 Change THVector mul to cmul 2017-02-23 05:40:33 -05:00
a31a07ede9 Merge THVector add 2017-02-23 05:40:33 -05:00
c8c4c9b23d Change THVector add to cadd and fix NEON 2017-02-23 05:40:33 -05:00
e1ed9303f0 Add multi-thread add 2017-02-23 05:40:33 -05:00
a43aab13c2 Fix THTensorMath.c style 2017-02-23 05:40:33 -05:00
c698b4a45e Add Dispaches for div and mul 2017-02-23 05:40:29 -05:00
c6a0ffab50 Add AVX single float and double float add 2017-02-23 05:40:24 -05:00
8ba7cc30d1 Add THTensorMathSIMD.c 2017-02-23 05:32:34 -05:00
61bf08ca24 Fix compilation for simd tensor add 2017-02-23 05:32:28 -05:00
6ada3c0c16 Fast floating point add kernel in intrinsics (11x speedup over default for 10k elements) 2017-02-23 05:11:44 -05:00
60061fbe79 Fixed up CPU dispatch and tested. Can begin implementing kernels 2017-02-23 05:11:44 -05:00
46e7042add SIMD helper header, modified add in THTensorMath to check dispatch 2017-02-23 05:11:44 -05:00
d0c182773b First commit for dynamic CPU dispatch: general framework in place (need to create dispatch tables and stubs for all functions and make impls have hidden linkage) 2017-02-23 05:11:44 -05:00
b6f60585b5 fix AVX2 detection bugs 2017-02-23 05:00:55 -05:00
4b0e3ee219 Merge pull request #699 from twitter-forks/bitops
Bitwise operations
2017-02-23 04:15:35 -05:00
838842d4b2 fix documentation error. [issue #790](https://github.com/pytorch/pytorch/issues/790) (#831) 2017-02-23 08:59:29 +01:00
e71cf20192 improved serialization (no tar copy) (#713) 2017-02-22 22:24:20 +01:00
adb4cb2b5b contiguous view backward (#816) 2017-02-21 19:09:36 -05:00
6073f9b46c update table in README.md
it removes the empty top row
2017-02-21 12:58:04 -05:00
8e8022b735 Merge pull request #418 from ruotianluo/adaptiveAverage
Add SpatialAdaptiveAveragePooling.
2017-02-21 09:15:12 -05:00
da82d2dd70 Merge pull request #434 from bottler/master
VolumetricFractionalMaxPooling like spatial
2017-02-21 09:13:59 -05:00
82176473a5 Merge pull request #442 from twitter-forks/half-fixes
Convert real to accreal in libTHCUNN
2017-02-21 09:12:56 -05:00
2d269a9a72 Merge pull request #1137 from twitter-forks/half-fixes
Using accreal instead of real in the API
2017-02-21 09:12:32 -05:00
240372a991 Fixed topk documentation for largest=True 2017-02-21 04:38:24 -05:00
5b10411c8c Fixed some mistakes in examples
Fixed mistakes in LSTMCell and GRUCell examples.
2017-02-21 04:17:28 -05:00
4c474a9939 Improve prodall CUDA test 2017-02-20 23:28:31 -08:00
7ea6ae57c8 Support numpy arrays in default_collate 2017-02-20 23:28:31 -08:00
42633f8986 Fix misspelling and add support for weights in NLLLoss2d 2017-02-20 23:28:31 -08:00
84248690a9 Add support for indexing with None and slices with positive steps 2017-02-20 23:28:31 -08:00
53409ca0fb Fix a warning in THPP 2017-02-20 23:28:31 -08:00
c2c1710047 Add clip_grad_norm 2017-02-20 23:28:31 -08:00
876202503f Support multiple inputs in data parallel 2017-02-20 23:28:31 -08:00
946a7d9bc3 Make input contiguous only once in backward of cuDNN RNN 2017-02-20 23:28:31 -08:00
608bcd3b15 Return correct number of gradients from cuDNN RNN 2017-02-20 23:28:31 -08:00
632b02a477 Add checks for reward type and size in StochasticFunction 2017-02-20 23:28:31 -08:00
0db9c63300 Use library_dirs in setup.py 2017-02-20 23:28:31 -08:00
873ed4e6b6 Add better error message for conversion of CUDA tensors to numpy 2017-02-20 23:28:31 -08:00
01bd43037d add docs to torch/cuda/random 2017-02-20 20:43:47 -05:00
68c9e3f232 Fixed typo in GRUCell example 2017-02-21 01:37:04 +01:00
a25c8555eb Fixed paper references 2017-02-21 00:27:18 +01:00
dfd1dff383 Merge commit '4ca26fbc1b7be4e369f84e95df16431bb2f1dcb7' 2017-02-20 08:05:19 -08:00
8f391d4d51 Merge commit 'ee43cd7adca3b24a2071ce6c55dcd3a95a2b6ff6' 2017-02-20 07:55:46 -08:00
2a6b7685ae Merge commit 'f6c1bbfa483ad19c500dc94838baaa69f02d240b' 2017-02-20 07:55:19 -08:00
eb9573107d Merge commit '34b7fed802db1fda6322a70b648dcc4947858719' 2017-02-20 07:54:51 -08:00
ee43cd7adc Do SpatialClassNLLCriterion sizeAverage in a separate kernel 2017-02-20 06:54:23 -08:00
4ca26fbc1b Remove averaging from prodall 2017-02-20 11:37:53 +01:00
c165226325 Print a readable error message when arguments are on different GPUs 2017-02-20 11:35:50 +01:00
49295ebe54 Add sequential to documentation 2017-02-18 08:42:43 +05:30
455038e470 Use a more stable formula for spatial LogSoftMax 2017-02-17 13:05:45 -08:00
ca7f02ea0c Add shape checks for SpatialClassNLLCriterion 2017-02-17 13:01:56 -08:00
04aba1caec Fix cuDNN dropout desc for multi-gpu (#772) 2017-02-17 19:16:12 +01:00
f6c1bbfa48 Merge pull request #1105 from ruotianluo/adaptiveAvg
Add SpatialAdaptiveAveragePooling
2017-02-17 10:52:33 -05:00
4e2c8c6db5 Merge pull request #1123 from bottler/master
VolumetricFractionalMaxPooling like Spatial...
2017-02-17 10:42:21 -05:00
c26b9c0a5e Update rnn.py
Based on the https://github.com/pytorch/pytorch/blob/master/torch/backends/cudnn/rnn.py#L302 line, the output is returned in a (0,1) transposed version, if the batch_first argument is set to true.
2017-02-17 14:37:14 +01:00
aaf41c61a6 Fix Engine::compute_dependencies 2017-02-17 18:28:51 +05:30
dd844f741b Fix previous_functions when it contains Variables 2017-02-17 11:03:46 +05:30
7117a9012e Fix flaky non-contig test 2017-02-17 10:40:08 +05:30
1bdc28161a Add torch.__version__ 2017-02-17 10:40:08 +05:30
5e150caf38 Fix a bug in Engine::compute_dependencies 2017-02-17 10:40:08 +05:30
c0c62d099a Make detach() actually remove the creator 2017-02-17 10:40:08 +05:30
b9ece39685 Make torch.Size methods return torch.Size, not tuple 2017-02-17 10:40:08 +05:30
15ef008877 Using accreal instead of real in the API
- This reverts commit 7a07afe545b4deae5919d9dc268bfac3d37398c7.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:34:11 -08:00
b14d6318f8 Convert real to accreal in libTHCUNN
- This reverts commit 0d85922d116879448485ef88ae21e83a9255a0b0.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:33:03 -08:00
7c44506441 allow DataParallel to have tuple inputs on a single GPU 2017-02-16 19:07:17 +01:00
937ba581d7 Improve nn.legacy compatibility with Torch7 (#738) 2017-02-16 21:17:12 +05:30
2ae54f1194 setup.cfg -> tox.ini (#761) 2017-02-16 21:13:13 +05:30
a217fefee1 Update rnn.py
Fixed a problem with outputting the RuntimeError if arguments are incorrect in cudnn/rnn.py
2017-02-15 21:49:42 +01:00
34b7fed802 Fix gcc 4.4.7 build. 2017-02-15 09:06:25 -08:00
5221745c21 add test for bias=False for 3d convolution 2017-02-15 04:26:44 -08:00
000ca44b16 Merge commit '797544c47a4e9bdff02137a127f883a6df9b3dfe' 2017-02-15 04:24:14 -08:00
8f3d44033b Merge commit '0426f2f3ec2b932cb83d64101081244c2a1451b1' 2017-02-15 04:23:50 -08:00
7cc14c595a Merge commit '07f5b21ef1bd29d1451c616062dcbfc3f8fd7c6a' 2017-02-15 04:23:18 -08:00
797544c47a implementation of bias=False for VolConv.cu 2017-02-15 04:18:17 -08:00
0426f2f3ec implementation of bias=False for VolConv.c
Used .c file changes from 7318e2de13 as a starting point. All changes to .c files (except for whitespace details) are present here.
However, the required .h files were not present in that PR.
2017-02-15 04:16:09 -08:00
336eeee895 kernel_size as the default stride for avg_pool1d (#744)
Following the documentation, let stride to be kernel_size if stride is not provided.
2017-02-15 13:12:18 +05:30
593f867e3e Fixed a simple compiling erroin mac OS #745. (#746)
Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-15 12:19:03 +05:30
385913be1c Fix class torch.nn.ConvTransposeNd documentation (#739)
There is no `dilation`
`output_padding` doc was missing
2017-02-15 10:37:20 +05:30
6aaa14f5fe Fix LSTMCell Doc Typo (#743) 2017-02-15 08:29:17 +05:30
07f5b21ef1 Merge pull request #702 from gchanan/conservativeAllocator
Improve THCCachingHostAllocator performance by making it reclaim less aggressively
2017-02-15 08:26:48 +05:30
e454870396 Free set of stored streams and handle NULL streams. 2017-02-14 15:41:47 -08:00
2822013437 Fix flaky tests 2017-02-14 21:28:50 +01:00
72c1982734 Add some more asserts to cuDNN RNN 2017-02-14 21:28:50 +01:00
0de2ea305a Support retain_variables in cuDNN RNN 2017-02-14 21:28:50 +01:00
d899385a3d Raise error when too small input is given to conv 2017-02-14 21:28:50 +01:00
c6d6cbe8a6 Check that all tensors are on the same GPU in cuDNN bindings 2017-02-14 21:28:50 +01:00
85e82e85d8 Fix bug in zero_grad, when some parameters didn't require grad 2017-02-14 21:28:50 +01:00
a1534cc37d Fix auto-gpu in cat 2017-02-14 21:28:50 +01:00
8c8dc791ef Load half and double THCUNN backends 2017-02-14 21:28:50 +01:00
63edca44f2 Add tests for non-contiguous inputs and gradients 2017-02-14 21:28:50 +01:00
8d90ab2d9b compile with cudart (#737) 2017-02-14 06:40:35 +05:30
bd5303010d Refactor autograd package to separate Python dependencies. (#662)
The core autograd Variable, Function, and Engine no longer depend on the
Python API. This let's us implement functions in C++. In the future, we
can also multithread engine and release the GIL for most of the
non-Python backwards.
2017-02-13 16:00:16 -08:00
16d2c3d7b3 make networks converted with loadcaffe loadable 2017-02-13 23:53:46 +01:00
407a92dc26 std::min() requires same type (#732)
* std::min() requires same type

* cast buffer instead

* declare buffer_size as int64_t
2017-02-13 18:06:05 +01:00
0a893abc7b fix serialization bug for large files 2017-02-12 19:13:02 +01:00
34fa5e0dc7 Update docstrings for testing object type
Add docstring for `is_storage()` and `is_tensor()`
2017-02-12 09:21:01 +05:30
712686ce91 Add cat, contiguous, squeeze, and unsqueeze to THPP
Use unsqueeze and view from TH/THC
2017-02-11 17:49:31 +01:00
518864a7e0 Fix bug in legacy NN updateGradParameters (#714) 2017-02-11 11:04:18 +05:30
750fb5cc73 Fixes to support short and char tensors for bitwise operations 2017-02-09 18:52:59 -08:00
0f4749907a Adding bitwise operations
- lshift, rshift, bitand, bitor, bitxor
2017-02-09 18:11:58 -08:00
bd2dc63ef6 Adding bitand, bitor and bitxor 2017-02-09 17:06:04 -08:00
19a8795450 Changes to shift operations
- renaming lsh -> lshift, rsh -> rshift
- adding componentwise functions
2017-02-09 15:41:07 -08:00
d9dccfdd71 Fix for non-contiguous grad_output in cuDNN conv 2017-02-10 00:25:59 +01:00
7547a06c4f Avoiding duplicated unsigned as it causes error on gcc. 2017-02-09 13:29:05 -08:00
8929b75795 Added shift operations. 2017-02-09 13:28:36 -08:00
4d37ef878c Remove view on data and target tensors of dim 1 in TensorDataset (#609) 2017-02-09 22:06:39 +01:00
126e77d5c6 Merge commit 'e9b05c71b4acf210fad719f4da8bb58a425dd00b' 2017-02-09 12:31:58 -08:00
53eec78bea Merge commit 'ac9312e9f8002227b267a82e224a5a99c7a7e734' 2017-02-09 12:31:40 -08:00
a4edaec81a Merge commit 'aeb7a72620be47c0e6a8928a9cb6df49c06902a0' 2017-02-09 12:31:16 -08:00
92481b59d3 Merge commit '73d232ee454ca25de5552d347a2b06820f30d193' 2017-02-09 12:30:39 -08:00
6c77fa9121 Changes in RNNBase and Embedding for compatibility with DataParallel (#660) 2017-02-09 22:36:26 +05:30
aeb7a72620 Merge pull request #693 from colesbury/view
Add code for 'view' to THC
2017-02-09 12:09:28 +05:30
73d232ee45 Merge pull request #926 from colesbury/view
Add code for 'view' to TH
2017-02-09 12:08:57 +05:30
c0c65bf915 Merge pull request #696 from colesbury/unsqueeze
Add unsqueeze to THC
2017-02-09 11:08:20 +05:30
f6cee952af Merge pull request #929 from colesbury/unsqueeze
Add unsqueeze1d to TH
2017-02-09 11:07:47 +05:30
e74184f679 Make THCCachingHostAllocator less aggressive.
In cases where copyAsync is a large percentage of the work,
processing events in recordEvent can cause a large bottleneck.

Here, we relax the constraint that we reclaim blocks as fast as possible
(i.e. in copyAync); instead, we only check that a block can be re-allocated
in malloc and free.
2017-02-08 14:44:24 -08:00
3884d36176 Add unsqueeze to THC 2017-02-08 13:49:32 -08:00
e7c6886a00 Add unsqueeze1d to TH
Unsqueeze inserts a singleton dimension. Unlike view, it doesn't require
the tensor to be contiguous.
2017-02-08 09:52:50 -08:00
ed8e92f63d Expose rawSet and rawResize as resizeNd and setStorageNd 2017-02-08 09:00:22 -08:00
fb97df5d65 Expose rawSet and rawResize as resizeNd and setStorageNd
These methods are useful from C because they don't require constructing
THLongStorages to wrap the sizes and strides, which can lead to leaked
memory in case of an error. Instead the sizes and strides can be
represented on the stack using standard C long arrays.
2017-02-08 08:56:04 -08:00
e9b05c71b4 Use THCTensor rather than THCudaTensor in THCUNN.h definition of
GatedLinearUnit.
2017-02-08 07:54:10 -08:00
7926324385 Corrected parameter typo in Adam docstring (#697) 2017-02-07 19:00:10 +01:00
1527b37c26 Fixed typo and rendering of some equations (#693)
* Fixed typo and rendering of some equations

* Few more fixes to MSELoss docs

* Cleaning up whitespace to make pep8 happy
2017-02-07 18:59:27 +01:00
de4659659b The RNNCell's example can not run correctly 2017-02-07 18:58:19 +01:00
a96a8c8336 Static build support + Query CUDA driver, runtime versions (#695) 2017-02-07 08:34:20 +05:30
691aa19b88 Add code for 'view' to THC 2017-02-06 14:04:04 -08:00
6b07dc9e22 Add code for 'view' to TH 2017-02-06 14:00:48 -08:00
8aa259b52b review comments from gchanan 2017-02-06 11:08:23 +00:00
ac9312e9f8 Bugfix/rowconv (#1126) 2017-02-04 20:37:45 +05:30
91a17b702b half<->float conversion cleanup (#901)
* half<->float conversion cleanup
2017-02-04 07:30:13 +05:30
a9785bba44 cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-02-02 21:38:25 -08:00
fc354a0d6e Revert "cuda implementation of Gated Linear Unit, fixed issues with genericization" 2017-02-02 10:50:47 +05:30
262611fcd3 Merge pull request #430 from huihuifan/newCudaGLU
cuda implementation of Gated Linear Unit, fixed issues with genericization
2017-02-02 08:16:35 +05:30
b8a34f3033 Small fixups:
1) Add return after THError for completeness.
2) Fix brace formatting
2017-02-01 15:46:19 -08:00
41ddc2a786 VolumetricFractionalMaxPooling like Spatial... 2017-02-01 12:01:09 +00:00
e4886f6589 VolumetricFractionalMaxPooling like spatial 2017-02-01 11:52:49 +00:00
6328981fcf cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-01-26 22:56:33 -08:00
2b948c42cd Add SpatialAdaptiveAveragePooling. 2017-01-14 19:44:07 -06:00
b2ae054410 Add SpatialAdaptiveAveragePooling. 2017-01-14 15:27:52 -06:00
390 changed files with 17860 additions and 7535 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@ build/
dist/
torch.egg-info/
*/**/__pycache__
torch/version.py
torch/csrc/generic/TensorMethods.cpp
torch/lib/*.so*
torch/lib/*.dylib*

View File

@ -18,7 +18,8 @@ install:
- export CC="ccache gcc-4.8"
- export CXX="ccache g++-4.8"
- ccache --show-stats
- travis_retry pip install -r requirements.txt
- travis_retry pip install --upgrade pip setuptools wheel
- travis_retry pip install -r requirements.txt --only-binary=scipy
- python setup.py install
script:
@ -43,5 +44,5 @@ matrix:
env: LINT_CHECK
python: "2.7"
addons: true
install: pip install pep8
script: pep8
install: pip install flake8
script: flake8

View File

@ -30,15 +30,32 @@ We are in an early-release Beta. Expect some adventures and rough edges.
At a granular level, PyTorch is a library that consists of the following components:
| \_ | \_ |
| ------------------------ | --- |
| torch | a Tensor library like NumPy, with strong GPU support |
| torch.autograd | a tape based automatic differentiation library that supports all differentiable Tensor operations in torch |
| torch.nn | a neural networks library deeply integrated with autograd designed for maximum flexibility |
| torch.optim | an optimization package to be used with torch.nn with standard optimization methods such as SGD, RMSProp, LBFGS, Adam etc. |
| torch.multiprocessing | python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. |
| torch.utils | DataLoader, Trainer and other utility functions for convenience |
| torch.legacy(.nn/.optim) | legacy code that has been ported over from torch for backward compatibility reasons |
<table>
<tr>
<td><b> torch </b></td>
<td> a Tensor library like NumPy, with strong GPU support </td>
</tr>
<tr>
<td><b> torch.autograd </b></td>
<td> a tape based automatic differentiation library that supports all differentiable Tensor operations in torch </td>
</tr>
<tr>
<td><b> torch.nn </b></td>
<td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td>
</tr>
<tr>
<td><b> torch.multiprocessing </b></td>
<td> python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. </td>
</tr>
<tr>
<td><b> torch.utils </b></td>
<td> DataLoader, Trainer and other utility functions for convenience </td>
</tr>
<tr>
<td><b> torch.legacy(.nn/.optim) </b></td>
<td> legacy code that has been ported over from torch for backward compatibility reasons </td>
</tr>
</table>
Usually one uses PyTorch either as:
@ -128,10 +145,9 @@ There is no wrapper code that needs to be written. [You can see an example here]
## Installation
### Binaries
- Anaconda
```bash
conda install pytorch torchvision -c soumith
```
Commands to install from binaries via Conda or pip wheels are on our website:
[http://pytorch.org](http://pytorch.org)
### From source

View File

@ -63,11 +63,16 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
"}\n")
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
"-ccbin" ${CMAKE_CXX_COMPILER}
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(nvcc_res EQUAL 0)
# only keep the last line of nvcc_out
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
list(GET nvcc_out -1 nvcc_out)
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
endif()
@ -116,13 +121,13 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
set(add_ptx TRUE)
set(arch_name ${CMAKE_MATCH_1})
endif()
if(arch_name MATCHES "([0-9]\\.[0-9])$")
if(arch_name MATCHES "(^[0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
set(arch_ptx ${arch_bin})
else()
# Look for it in our list of known architectures
if(${arch_name} STREQUAL "Fermi")
set(arch_bin 2.0 "2.1(2.0)")
set(arch_bin "2.0 2.1(2.0)")
elseif(${arch_name} STREQUAL "Kepler+Tegra")
set(arch_bin 3.2)
elseif(${arch_name} STREQUAL "Kepler+Tesla")
@ -173,11 +178,11 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
# Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin})
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
# User explicitly specified ARCH for the concrete CODE
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()

View File

@ -74,9 +74,11 @@ author = 'Torch Contributors'
# built documents.
#
# The short X.Y version.
version = '0.1.6'
# TODO: change to [:2] at v1.0
version = '.'.join(torch.__version__.split('+')[0].split('.')[:3])
# The full version, including alpha/beta/rc tags.
release = '0.1.6'
# TODO: verify this works as expected
release = torch.__version__.split('+')[0]
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.

View File

@ -22,6 +22,24 @@ Containers
.. autoclass:: Module
:members:
:hidden:`Sequential`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Sequential
:members:
:hidden:`ModuleList`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ModuleList
:members:
:hidden:`ParameterList`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ParameterList
:members:
Convolution Layers
----------------------------------
@ -445,13 +463,13 @@ Vision layers
:members:
:hidden:`UpsamplingNearest2d`
~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingNearest2d
:members:
:hidden:`UpsamplingBilinear2d`
~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingBilinear2d
:members:
@ -466,6 +484,36 @@ Multi-GPU layers
.. autoclass:: DataParallel
:members:
Utilities
---------
:hidden:`clip_grad_norm`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.clip_grad_norm
.. currentmodule:: torch.nn.utils.rnn
:hidden:`PackedSequence`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.PackedSequence
:hidden:`pack_padded_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pack_padded_sequence
:hidden:`pad_packed_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pad_packed_sequence
torch.nn.functional
===================

View File

@ -1,3 +1,5 @@
.. _cuda-semantics:
CUDA semantics
==============
@ -61,3 +63,21 @@ call. This can be used to overlap data transfers with computation.
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
pinned memory by passing ``pinned=True`` to its constructor.
.. _cuda-nn-dataparallel-instead:
Use nn.DataParallel instead of multiprocessing
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most use cases involving batched input and multiple GPUs should default to using
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
a single python process can saturate multiple GPUs.
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
However, this is a known issue that is under active development. As always,
test your use case.
There are significant caveats to using CUDA models with
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
requirements exactly, it is likely that your program will have incorrect or
undefined behavior.

View File

@ -132,7 +132,7 @@ This is how a ``Linear`` module can be implemented::
# nn.Parameters can never be volatile and, different than Variables,
# they require gradients by default.
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
if bias is not None:
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
# You should always register all possible parameters, but the

View File

@ -33,6 +33,8 @@ by the CUDA runtime.
kinds of data should be done with care. Note that this restriction doesn't
apply to shared CPU memory.
See also: :ref:`cuda-nn-dataparallel-instead`
Best practices and tips
-----------------------

View File

@ -0,0 +1,34 @@
Serialization semantics
=======================
Best practices
--------------
.. _recommend-saving-models:
Recommended approach for saving a model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are two main approaches for serializing and restoring a model.
The first (recommended) saves and loads only the model parameters::
torch.save(the_model.state_dict(), PATH)
Then later::
the_model = TheModelClass(*args, **kwargs)
the_model.load_state_dict(torch.load(PATH))
The second saves and loads the entire model::
torch.save(the_model, PATH)
Then later::
the_model = torch.load(PATH))
However in this case, the serialized data is bound to the specific classes
and the exact directory structure used, so it can break in various ways when
used in other projects, or after some serious refactors.

View File

@ -38,6 +38,7 @@ Indexing, Slicing, Joining, Mutating Ops
.. autofunction:: t
.. autofunction:: transpose
.. autofunction:: unbind
.. autofunction:: unsqueeze
Random sampling

View File

@ -1,8 +0,0 @@
[pep8]
max-line-length = 120
ignore = E402,E721,E731,W503
exclude = docs/src
[flake8]
max-line-length = 120
ignore = E305,E402,E721,E731,F401,F403,F405,F811,F812,F821,F841

View File

@ -1,6 +1,8 @@
from setuptools import setup, Extension, distutils, Command, find_packages
import setuptools.command.build_ext
import setuptools.command.install
import setuptools.command.develop
import setuptools.command.build_py
import distutils.unixccompiler
import distutils.command.build
import distutils.command.clean
@ -94,6 +96,28 @@ class build_module(Command):
self.run_command('build_ext')
class build_py(setuptools.command.build_py.build_py):
def run(self):
self.create_version_file()
setuptools.command.build_py.build_py.run(self)
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'torch', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
class develop(setuptools.command.develop.develop):
def run(self):
build_py.create_version_file()
setuptools.command.develop.develop.run(self)
class build_ext(setuptools.command.build_ext.build_ext):
def run(self):
@ -168,6 +192,7 @@ class clean(distutils.command.clean.clean):
################################################################################
include_dirs = []
library_dirs = []
extra_link_args = []
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
@ -188,7 +213,7 @@ include_dirs += [
tmp_install_path + "/include/THNN",
]
extra_link_args.append('-L' + lib_path)
library_dirs.append(lib_path)
# we specify exact lib names to avoid conflict with lua-torch installs
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
@ -220,14 +245,23 @@ main_sources = [
"torch/csrc/Exceptions.cpp",
"torch/csrc/Tensor.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/DynamicTypes.cpp",
"torch/csrc/byte_order.cpp",
"torch/csrc/utils.cpp",
"torch/csrc/utils/object_ptr.cpp",
"torch/csrc/allocators.cpp",
"torch/csrc/serialization.cpp",
"torch/csrc/autograd/init.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/engine.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/grad_buffer.cpp",
"torch/csrc/autograd/python_function.cpp",
"torch/csrc/autograd/python_cpp_function.cpp",
"torch/csrc/autograd/python_variable.cpp",
"torch/csrc/autograd/python_engine.cpp",
"torch/csrc/autograd/functions/batch_normalization.cpp",
"torch/csrc/autograd/functions/init.cpp",
"torch/csrc/nn/THNN_generic.cpp",
]
@ -262,10 +296,11 @@ if WITH_CUDA:
break
include_dirs.append(cuda_include_path)
include_dirs.append(tmp_install_path + "/include/THCUNN")
extra_link_args.append('-L' + cuda_lib_path)
library_dirs.append(cuda_lib_path)
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
extra_compile_args += ['-DWITH_CUDA']
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
main_libraries += ['cudart']
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
main_sources += [
"torch/csrc/cuda/Module.cpp",
@ -280,7 +315,7 @@ if WITH_CUDA:
if WITH_CUDNN:
main_libraries += ['cudnn']
include_dirs.append(CUDNN_INCLUDE_DIR)
extra_link_args.append('-L' + CUDNN_LIB_DIR)
library_dirs.append(CUDNN_LIB_DIR)
main_sources += [
"torch/csrc/cudnn/BatchNorm.cpp",
"torch/csrc/cudnn/Conv.cpp",
@ -314,6 +349,7 @@ C = Extension("torch._C",
language='c++',
extra_compile_args=main_compile_args + extra_compile_args,
include_dirs=include_dirs,
library_dirs=library_dirs,
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
)
extensions.append(C)
@ -352,18 +388,28 @@ if WITH_CUDA:
)
extensions.append(THCUNN)
version = "0.1"
version = '0.1.10'
if os.getenv('PYTORCH_BUILD_VERSION'):
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
version = os.getenv('PYTORCH_BUILD_VERSION') \
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
else:
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except subprocess.CalledProcessError:
pass
setup(name="torch", version=version,
ext_modules=extensions,
cmdclass={
'build': build,
'build_py': build_py,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'develop': develop,
'install': install,
'clean': clean,
},

View File

@ -1,13 +1,15 @@
import sys
import os
import argparse
import unittest
import contextlib
from functools import wraps
from itertools import product
from copy import deepcopy
import torch
import torch.cuda
from torch.autograd import Variable, Function
from torch.autograd import Variable
torch.set_default_tensor_type('torch.DoubleTensor')
@ -30,6 +32,24 @@ try:
except ImportError:
TEST_NUMPY = False
TEST_SCIPY = True
try:
import scipy
except ImportError:
TEST_SCIPY = False
def skipIfNoLapack(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as e:
if 'Lapack library not found' in e.args[0]:
raise unittest.SkipTest('Compiled without Lapack')
raise
return wrapper
def get_cpu_type(t):
assert t.__module__ == 'torch.cuda'
@ -98,11 +118,18 @@ class TestCase(unittest.TestCase):
y = y.data
if torch.is_tensor(x) and torch.is_tensor(y):
max_err = 0
super(TestCase, self).assertEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertLessEqual(max_err, prec, message)
def assertTensorsEqual(a, b):
max_err = 0
super(TestCase, self).assertEqual(a.size(), b.size())
for index in iter_indices(a):
max_err = max(max_err, abs(a[index] - b[index]))
self.assertLessEqual(max_err, prec, message)
self.assertEqual(x.is_sparse, y.is_sparse, message)
if x.is_sparse:
assertTensorsEqual(x.indices(), y.indices())
assertTensorsEqual(x.values(), y.values())
else:
assertTensorsEqual(x, y)
elif type(x) == str and type(y) == str:
super(TestCase, self).assertEqual(x, y)
elif is_iterable(x) and is_iterable(y):
@ -150,65 +177,23 @@ class TestCase(unittest.TestCase):
raise AssertionError("object not found in iterable")
def make_jacobian(input, num_out):
if isinstance(input, Variable) and not input.requires_grad:
return None
if torch.is_tensor(input) or isinstance(input, Variable):
return torch.zeros(input.nelement(), num_out)
def download_file(url, path, binary=True):
if sys.version_info < (3,):
import urllib2
request = urllib2
error = urllib2
else:
return type(input)(filter(lambda x: x is not None,
(make_jacobian(elem, num_out) for elem in input)))
import urllib.request
import urllib.error
request = urllib.request
error = urllib.error
def iter_tensors(x, only_requiring_grad=False):
if torch.is_tensor(x):
yield x
elif isinstance(x, Variable):
if x.requires_grad or not only_requiring_grad:
yield x.data
else:
for elem in x:
for result in iter_tensors(elem, only_requiring_grad):
yield result
def contiguous(input):
if torch.is_tensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous()
else:
return type(input)(contiguous(e) for e in input)
def get_numerical_jacobian(fn, input, target):
perturbation = 1e-6
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target, True)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.DoubleTensor(output_size)
outb = torch.DoubleTensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nelement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - perturbation
outa.copy_(fn(input))
flat_tensor[i] = orig + perturbation
outb.copy_(fn(input))
flat_tensor[i] = orig
outb.add_(-1, outa).div_(2 * perturbation)
d_tensor[i] = outb
return jacobian
if os.path.exists(path):
return True
try:
data = request.urlopen(url, timeout=15).read()
with open(path, 'wb' if binary else 'w') as f:
f.write(data)
return True
except error.URLError as e:
return False

View File

@ -2,11 +2,13 @@ import sys
import tempfile
import unittest
from copy import deepcopy
from itertools import product
import torch
import torch.cuda
from torch.autograd import Variable
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
from common import TestCase, to_gpu, freeze_rng_state
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors, contiguous
import torch.backends.cudnn
# tarfile module tries to obtain a file object name in python 3.3
@ -245,6 +247,13 @@ criterion_tests = [
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long()
),
dict(
module_name='NLLLoss2d',
constructor_args=(torch.rand(3),),
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long(),
desc='weights'
),
dict(
module_name='HingeEmbeddingLoss',
input=torch.rand(10),
@ -328,15 +337,19 @@ class NNTestCase(TestCase):
def _flatten_tensors(self, x):
if torch.is_tensor(x):
return x.view(-1)
if x.is_sparse:
return x.to_dense().view(-1)
else:
return x.view(-1)
elif isinstance(x, Variable):
return x.data.view(-1)
return self._flatten_tensors(x.data)
else:
return tuple(self._flatten_tensors(a) for a in x)
def _zero_grad_input(self, input):
if isinstance(input, Variable):
input.grad.data.zero_()
if input.requires_grad and input.grad is not None:
input.grad.data.zero_()
elif torch.is_tensor(input):
return
else:
@ -400,9 +413,9 @@ class NNTestCase(TestCase):
# TODO: enable non-contig tests
input = contiguous(input)
if jacobian_input:
res += get_numerical_jacobian(fw, input, input),
res += get_numerical_jacobian(fw, input, input, eps=1e-6),
if jacobian_parameters:
res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0),
res += torch.cat(list(get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param), 0),
return res
def check_jacobian(self, module, input, jacobian_input=True):
@ -516,6 +529,8 @@ class ModuleTest(TestBase):
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
test_case.assertEqual(out, expected_out)
self.test_noncontig(test_case, module, input)
# TODO: do this with in-memory files as soon as torch.save will support it
with TemporaryFile() as f:
test_case._forward(module, input)
@ -526,6 +541,51 @@ class ModuleTest(TestBase):
self._do_test(test_case, module, input)
def noncontiguize(self, obj):
if isinstance(obj, list):
return [self.noncontiguize(o) for o in obj]
tensor = obj.data if isinstance(obj, Variable) else obj
ndim = tensor.dim()
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
if isinstance(obj, Variable):
return Variable(noncontig, requires_grad=obj.requires_grad)
return noncontig
def test_noncontig(self, test_case, module, input):
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(input)
with freeze_rng_state():
output = test_case._forward(module, input)
grad_output = output
if isinstance(grad_output, Variable):
grad_output = grad_output.data.clone()
else:
grad_output = grad_output.clone()
output = output.clone()
grad_output.normal_()
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
d_param = deepcopy(test_case._get_parameters(module)[1])
nc_input = self.noncontiguize(input)
nc_grad_output = self.noncontiguize(grad_output)
for contig_i, contig_g in product((True, False), repeat=2):
i = input if contig_i else nc_input
go = grad_output if contig_g else nc_grad_output
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(i)
with freeze_rng_state():
try:
out = test_case._forward(module, i)
except Exception:
# Some modules will fail because of non contiguous inputs and we're ok with that
continue
grad = test_case._backward(module, i, out, go)
test_case.assertEqual(out, output)
test_case.assertEqual(grad, d_input, 1e-4)
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
def test_cuda(self, test_case):
if not TEST_CUDA or not self.should_test_cuda:
raise unittest.SkipTest('Excluded from CUDA tests')
@ -536,8 +596,6 @@ class ModuleTest(TestBase):
cpu_module = self.constructor(*self.constructor_args)
gpu_module = self.constructor(*self.constructor_args).float().cuda()
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
cpu_param = test_case._get_parameters(cpu_module)
gpu_param = test_case._get_parameters(gpu_module)
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
@ -547,6 +605,10 @@ class ModuleTest(TestBase):
gpu_p = gpu_p.data
gpu_p.copy_(cpu_p)
test_case._zero_grad_input(cpu_input)
test_case._zero_grad_input(gpu_input)
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
cpu_output = test_case._forward(cpu_module, cpu_input)
gpu_output = test_case._forward(gpu_module, gpu_input)
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
@ -560,6 +622,8 @@ class ModuleTest(TestBase):
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
self.test_noncontig(test_case, gpu_module, gpu_input)
except NotImplementedError:
pass
# TODO: remove this after CUDA scatter_ is implemented

View File

@ -6,9 +6,9 @@ import torch
import unittest
from copy import deepcopy
from collections import OrderedDict
from torch.autograd import gradcheck
from common import make_jacobian, TestCase, iter_tensors, \
get_numerical_jacobian, run_tests
from common import TestCase, run_tests
from torch.autograd._functions import *
from torch.autograd import Variable, Function
@ -20,37 +20,6 @@ else:
PRECISION = 1e-4
def iter_gradients(x):
if isinstance(x, Variable):
if x.requires_grad:
yield x.grad.data
else:
for elem in x:
for result in iter_gradients(elem):
yield result
def zero_gradients(i):
for t in iter_gradients(i):
t.zero_()
def get_analytical_jacobian(input, output):
jacobian = make_jacobian(input, output.numel())
grad_output = output.data.clone().zero_()
flat_grad_output = grad_output.view(-1)
for i in range(flat_grad_output.numel()):
flat_grad_output.zero_()
flat_grad_output[i] = 1
zero_gradients(input)
output.backward(grad_output, retain_variables=True)
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
jacobian_x[:, i] = d_x
return jacobian
@contextlib.contextmanager
def backward_engine(engine):
_prev_engine = Variable._execution_engine
@ -74,6 +43,7 @@ class TestAutograd(TestCase):
counter[0] += inc
z = x ** 2 + x * 2 + x * y + y
x.register_hook(lambda *args: bw_hook(0, *args))
test = z.register_hook(lambda *args: bw_hook(1, *args))
z.backward(torch.ones(5, 5), retain_variables=True)
self.assertEqual(counter[0], 1)
@ -158,6 +128,49 @@ class TestAutograd(TestCase):
def test_backward(self):
self._test_backward()
def test_sparse_backward(self):
class FixedGradientFunction(Function):
def __init__(self, grad):
self.grad = grad
def forward(self, x):
return x
def backward(self, grad_x):
return self.grad
size = torch.Size([6, 3, 2])
i1 = torch.LongTensor([
[0, 3, 4],
[0, 2, 2],
])
v1 = torch.DoubleTensor([[1, 2], [4, 5], [7, 8]])
sparse_grad1 = torch.sparse.DoubleTensor(i1, v1, size)
i2 = torch.LongTensor([
[0, 1, 3, 4],
[0, 1, 2, 2],
])
v2 = torch.DoubleTensor([[1, 2], [4, 3], [4, 5], [7, 8]])
sparse_grad2 = torch.sparse.DoubleTensor(i2, v2, size)
dense_grad = torch.rand(size).double()
sparse_fn1 = FixedGradientFunction(sparse_grad1)
sparse_fn2 = FixedGradientFunction(sparse_grad2)
dense_fn = FixedGradientFunction(dense_grad)
# sparse first
x = Variable(torch.randn(5, 5), requires_grad=True)
(sparse_fn1(x) + dense_fn(x) + sparse_fn2(x)).sum().backward()
self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
# dense first
x = Variable(torch.randn(5, 5), requires_grad=True)
(dense_fn(x) + sparse_fn1(x) + sparse_fn2(x)).sum().backward()
self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
# sparse only
x = Variable(torch.randn(5, 5), requires_grad=True)
(sparse_fn1(x) + sparse_fn2(x)).sum().backward()
self.assertEqual(x.grad.data, sparse_grad1 + sparse_grad2)
@unittest.skip("BasicEngine is out of date")
def test_backward_basic_engine(self):
with backward_engine(torch.autograd.engine.BasicEngine):
@ -224,14 +237,50 @@ class TestAutograd(TestCase):
def test_indexing(self):
x = torch.range(1, 16).resize_(4, 4)
y = Variable(x)
self.assertEqual(x[1], y[1].data)
self.assertEqual(x[1, 1], y[1, 1].data[0])
self.assertEqual(x[1:], y[1:].data)
self.assertEqual(x[:2], y[:2].data)
self.assertEqual(x[:2, 2], y[:2, 2].data)
self.assertEqual(x[1:2, 2], y[1:2, 2].data)
self.assertEqual(x[1, 2:], y[1, 2:].data)
y = Variable(x, requires_grad=True)
def check_index(idx):
if y.grad is not None:
y.grad.data.zero_()
indexed_tensor = x[idx]
indexed_var = y[idx]
indexed_var_t = indexed_var.data
if not torch.is_tensor(indexed_tensor):
indexed_var_t = indexed_var_t[0]
self.assertEqual(indexed_tensor, indexed_var)
indexed_var.sum().backward()
expected_grad = torch.zeros(4, 4)
expected_grad[idx] = 1
self.assertEqual(y.grad.data, expected_grad)
check_index(1)
check_index((1, 1))
check_index(slice(1, None))
check_index(slice(None, 2))
check_index((slice(None, 2), 2))
check_index((slice(1, 2), 2))
check_index((1, slice(2, None)))
check_index((slice(None, None), slice(2, None)))
check_index(torch.LongTensor([0, 2]))
check_index(torch.rand(4, 4).bernoulli().byte())
check_index((Ellipsis, slice(2, None)))
def test_basic_op_grad(self):
"""Grad output might need to be reshaped to match the second argument."""
x = Variable(torch.randn(4, 6), requires_grad=True)
b = Variable(torch.rand(12, 1) + 1e-2, requires_grad=True)
def y():
# .mm() depends on the grad_output being of correct size
return b.mm(Variable(torch.rand(1, 2) + 1e-2))
(x + y()).sum().backward()
(x - y()).sum().backward()
(x * y()).sum().backward()
(x / y()).sum().backward()
(x.abs() ** y()).sum().backward()
def test_requires_grad(self):
x = Variable(torch.randn(5, 5))
@ -253,6 +302,53 @@ class TestAutograd(TestCase):
y._backward_hooks['test'] = error
b.backward(torch.ones(5, 5))
def test_requires_grad_inplace(self):
a = Variable(torch.randn(5, 5))
b = Variable(torch.randn(5, 5), requires_grad=True)
a += b
self.assertTrue(a.requires_grad)
# non-leaf Variable
a = Variable(torch.randn(5, 5)) + 0
b = Variable(torch.randn(5, 5), requires_grad=True)
a += b
self.assertTrue(a.requires_grad)
def test_duplicate_backward_root(self):
a = Variable(torch.randn(5, 5), requires_grad=True)
b = Variable(torch.randn(5, 5), requires_grad=True)
x = a * b
grad_output = x.data.clone().normal_()
torch.autograd.backward([x, x], [grad_output, grad_output])
self.assertEqual(a.grad.data, b.data * grad_output * 2)
self.assertEqual(b.grad.data, a.data * grad_output * 2)
def test_backward_no_grad(self):
a = Variable(torch.randn(5, 5), requires_grad=True)
b = a + 2
with self.assertRaises(RuntimeError):
torch.autograd.backward([b], [None])
def test_previous_functions(self):
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), requires_grad=True)
a = x + y
self.assertIsNotNone(a.creator)
previous_functions = a.creator.previous_functions
self.assertEqual(len(previous_functions), 2)
self.assertIs(previous_functions[0][0], x)
self.assertEqual(previous_functions[0][1], 0)
self.assertIs(previous_functions[1][0], y)
self.assertEqual(previous_functions[1][1], 0)
b = a + 5
previous_functions = b.creator.previous_functions
self.assertEqual(len(previous_functions), 1)
self.assertIs(previous_functions[0][0], a.creator)
def test_inplace(self):
x = Variable(torch.ones(5, 5), requires_grad=True)
y = Variable(torch.ones(5, 5) * 4, requires_grad=True)
@ -408,15 +504,31 @@ class TestAutograd(TestCase):
y = x * 2
y = y.detach()
self.assertFalse(y.requires_grad)
self.assertFalse(y.creator.requires_grad)
self.assertIsNone(y.creator)
z = x + y
z.sum().backward()
# This is an incorrect gradient, but we assume that's what the user
# wanted. detach() is an advanced option.
self.assertEqual(x.grad.data, torch.ones(10, 10))
# detach() should preserve volatile flag
x = Variable(torch.randn(10, 10), volatile=True)
y = x * 2
y = y.detach()
self.assertTrue(y.volatile)
# in-place detach
x = Variable(torch.randn(10, 10), requires_grad=True)
y = Variable(torch.randn(10, 10), requires_grad=True)
a = x * 2
(y + a).sum().backward(retain_variables=True)
a.detach_()
self.assertFalse(a.requires_grad)
(y + a).sum().backward() # this won't backprop to x
self.assertEqual(x.grad.data, torch.ones(10, 10) * 2)
self.assertEqual(y.grad.data, torch.ones(10, 10) * 2)
def test_type_conversions(self):
import torch.cuda
x = Variable(torch.randn(5, 5))
self.assertIs(type(x.float().data), torch.FloatTensor)
self.assertIs(type(x.int().data), torch.IntTensor)
@ -435,6 +547,15 @@ class TestAutograd(TestCase):
self.assertIs(type(x2.data), torch.cuda.FloatTensor)
self.assertIs(x2.get_device(), 1)
def test_isolated_node(self):
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), requires_grad=True)
a = x + y
b = torch.max(a, 1)[1].repeat(1, 5).double()
o = (b + a).sum()
o.backward()
def test_return_leaf(self):
class Identity(Function):
@ -609,6 +730,31 @@ class TestAutograd(TestCase):
y.sum().backward()
self.assertEqual(x.grad.data, x.data.clone().fill_(1))
def test_reinforce_check(self):
x = Variable(torch.randn(5, 5), requires_grad=True)
# these should be ok
y = torch.normal(x)
y.reinforce(torch.randn(5, 5))
y = torch.normal(x)
y.reinforce(2)
# can't call reinforce on non-stochastic variables
self.assertRaises(RuntimeError, lambda: x.reinforce(2))
# can't call reinforce twice
y = torch.normal(x)
y.reinforce(2)
self.assertRaises(RuntimeError, lambda: y.reinforce(2))
# check type of reward
y = torch.normal(x)
self.assertRaises(TypeError, lambda: y.reinforce(torch.randn(5, 5).long()))
# check size of reward
y = torch.normal(x)
self.assertRaises(ValueError, lambda: y.reinforce(torch.randn(4, 5)))
def test_stochastic(self):
x = Variable(torch.rand(2, 10), requires_grad=True)
stddevs = Variable(torch.rand(2, 10) * 5, requires_grad=True)
@ -646,6 +792,18 @@ class TestAutograd(TestCase):
self.assertGreater(x.grad.data.abs().sum(), 0)
def test_stochastic_require_grad(self):
# This tests a DSD function sequence (D=deterministic, S=stochastic),
# where all functions require grad.
x = Variable(torch.randn(2, 10), requires_grad=True)
y = Variable(torch.randn(2, 10), requires_grad=True)
z = torch.normal(x + 2, 2)
o = z + y
z.reinforce(torch.randn(2, 10))
o.sum().backward()
self.assertEqual(y.grad.data, torch.ones(2, 10))
self.assertGreater(x.grad.data.abs().sum(), 0)
def test_stochastic_sequence(self):
x = Variable(torch.rand(10).clamp_(0, 1), requires_grad=True)
b = x.bernoulli()
@ -754,7 +912,10 @@ function_tests = [
(Index, (slice(0, 3),), (torch.rand(S, S, S),), 'slice'),
(Index, ((slice(0, 3), 1),), (torch.rand(S, S, S),), 'slice_index'),
(View, (S * S, S), (torch.rand(S, S, S),)),
(Expand, ((S, 5, S, 5),), ((S, 1, S, 1),)),
(Expand, ((5, S, 5, S, 5),), ((1, S, 1, S, 1),)),
(Expand, ((S, S, S),), ((S, 1),), 'new_dim'),
(Expand, ((S, S, S),), ((1, S),), 'new_dim_front'),
(Expand, ((S, S, S),), ((1,),), 'scalar'),
(Exp, (), (torch.rand(S, S, S),)),
(Log, (), (torch.rand(S, S, S) + 1e-2,)),
(Log1p, (), (torch.rand(S, S, S),)),
@ -804,7 +965,7 @@ function_tests = [
(Addr, (0.1, 0.4), ((S, M), (S,), (M,)), 'coef'),
(Dot, (), ((L,), (L,)),),
(Max, (), ((S, S, S),),),
(Repeat, (torch.Size([2, 3, 1, 4]),), ((S, S, S, S),)),
(Repeat, (torch.Size([2, 3, 1, 2]),), ((S, S, S, S),)),
(Min, (), ((S, S, S),),),
(Max, (0,), ((S, S, S),), 'dim'),
(Min, (0,), ((S, S, S),), 'dim'),
@ -819,8 +980,8 @@ function_tests = [
(Norm, (3, 0), ((S, S, S),), '3_dim'),
(Addcmul, (), ((S, S), (S, S), (S, S))),
(Addcmul, (0.6,), ((S, S), (S, S), (S, S)), 'scale'),
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 1e-2)),
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 1e-2), 'scale'),
(Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 5e-2)),
(Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 5e-2), 'scale'),
(IndexAdd, (0,), ((S, S), index_variable(2, S), (2, S))),
# (IndexCopy, (0,), ((S, S), index_variable(2, S), (2, S)) ),
(IndexFill, (0, 2), ((S, S), index_variable(2, S))),
@ -870,8 +1031,10 @@ method_tests = [
('t', (1, 2), ()),
('view', (S, S, S), (S * S, S),),
('view_as', (S, S, S), ((S * S, S),)),
('expand', (S, 1, S), (S, S, S)),
('expand', (S, 1, 1), (S, S, S)),
('expand', (torch.Size([S, 1, S]),), (S, S, S), 'size'),
('expand', (S, 1), (S, S, S), 'new_dim'),
('expand', (1,), (S, S, S), 'scalar'),
('exp', (S, S, S), ()),
('log', (S, S, S), ()),
('log1p', (S, S, S), ()),
@ -973,18 +1136,18 @@ method_tests = [
# TODO: clamp with min/max
def create_input(call_args):
def create_input(call_args, requires_grad=True):
if not isinstance(call_args, tuple):
call_args = (call_args,)
def map_arg(arg):
if isinstance(arg, tuple) and not isinstance(arg[0], Variable):
return Variable(torch.randn(*arg).double(), requires_grad=True)
return Variable(torch.randn(*arg).double(), requires_grad=requires_grad)
elif torch.is_tensor(arg):
if isinstance(arg, torch.FloatTensor):
return Variable(arg.double(), requires_grad=True)
return Variable(arg.double(), requires_grad=requires_grad)
else:
return Variable(arg, requires_grad=True)
return Variable(arg, requires_grad=requires_grad)
else:
return arg
return tuple(map_arg(arg) for arg in call_args)
@ -1011,26 +1174,12 @@ for test in function_tests:
def do_test(self, cls=cls, constructor_args=constructor_args,
call_args=call_args, test_name=test_name):
input = create_input(call_args)
output = cls(*constructor_args)(*input)
if not isinstance(output, tuple):
output = (output,)
for i, o in enumerate(output):
if not o.requires_grad:
continue
analytical = get_analytical_jacobian(input, o)
def fn(input):
tmp = cls(*constructor_args)(*input)
if not isinstance(tmp, tuple):
tmp = (tmp,)
return tmp[i].data
numerical = get_numerical_jacobian(fn, input, input)
self.assertLessEqual(
max(a.add(-1, n).abs().max() for a, n in zip(analytical, numerical)),
PRECISION
)
self.assertEqual(gradcheck(cls(*constructor_args), input, eps=1e-6, atol=PRECISION), True)
if test_name not in ignore_inplace and issubclass(cls, InplaceFunction):
output = cls(*constructor_args)(*input)
if not isinstance(output, tuple):
output = (output,)
inplace_input = deepcopy(input)
inplace_input_copy = tuple(i + 0 for i in inplace_input)
fn = cls(*constructor_args, inplace=True)
@ -1068,8 +1217,8 @@ for test in method_tests:
def do_test(self, name=name, self_size=self_size, args=args, test_name=test_name):
def check(name):
self_variable = create_input((self_size,))[0]
args_variable = create_input(args)
self_variable = create_input((self_size,), requires_grad=False)[0]
args_variable = create_input(args, requires_grad=False)
self_tensor = deepcopy(self_variable.data)
args_tensor = deepcopy(unpack_variables(args_variable))
output_variable = getattr(self_variable, name)(*args_variable)

View File

@ -9,10 +9,11 @@ import torch.cuda.comm as comm
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests
HAS_CUDA = True
if not torch.cuda.is_available():
print('CUDA not available, skipping tests')
import sys
sys.exit()
TestCase = object # noqa: F811
HAS_CUDA = False
def is_floating(t):
@ -59,6 +60,13 @@ def small_2d_scaled(t, scale=10):
return make_tensor(t, S, S).mul(scale)
def small_2d_oneish(t):
if is_floating(t):
return make_tensor(t, S, S).clamp(min=0.99, max=1.01)
else:
return t(S, S).fill_(1)
def small_3d(t):
return make_tensor(t, S, S, S)
@ -206,7 +214,7 @@ tests = [
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
('prod', small_3d, lambda t: [],),
('prod', small_2d_oneish, lambda t: [],),
('prod', small_3d, lambda t: [1], 'dim'),
('sum', small_2d, lambda t: [],),
('sum', small_3d, lambda t: [1], 'dim'),
@ -233,6 +241,7 @@ tests = [
('triu', medium_2d, lambda t: [],),
('triu', medium_2d, lambda t: [2], 'positive'),
('triu', medium_2d, lambda t: [-2], 'negative'),
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
('view', small_3d, lambda t: [100, 10],),
('view_as', small_3d, lambda t: [t(100, 10)],),
('zero', small_3d, lambda t: [],),
@ -338,21 +347,21 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
class TestCuda(TestCase):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_autogpu(self):
if torch.cuda.device_count() > 1:
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.get_device(), 0)
self.assertEqual(x.get_device(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.get_device(), 1)
q = x.add(y)
self.assertEqual(q.get_device(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.get_device(), 1)
z = z.cuda()
self.assertEqual(z.get_device(), 0)
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.get_device(), 0)
self.assertEqual(x.get_device(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.get_device(), 1)
q = x.add(y)
self.assertEqual(q.get_device(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.get_device(), 1)
z = z.cuda()
self.assertEqual(z.get_device(), 0)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_copy_device(self):
@ -374,7 +383,7 @@ class TestCuda(TestCase):
self.assertEqual(z.get_device(), 0)
self.assertIs(z.cuda(0), z)
def test_serialization(self):
def test_serialization_array_with_storage(self):
x = torch.randn(5, 5).cuda()
y = torch.IntTensor(2, 5).fill_(0).cuda()
q = [x, y, x, y.storage()]
@ -512,6 +521,13 @@ class TestCuda(TestCase):
self.assertEqual(x, y)
self.assertEqual(torch.cuda.initial_seed(), 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_cat_autogpu(self):
x = torch.randn(4, 4).cuda(1)
y = torch.randn(4, 4).cuda(1)
z = torch.cat([x, y], 0)
self.assertEqual(z.get_device(), x.get_device())
def test_serialization(self):
x = torch.randn(4, 4).cuda()
with tempfile.NamedTemporaryFile() as f:
@ -522,7 +538,7 @@ class TestCuda(TestCase):
self.assertIs(type(x_copy), type(x))
self.assertEqual(x_copy.get_device(), x.get_device())
def test_serialization_empty(self):
def test_serialization_array_with_empty(self):
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
@ -665,40 +681,67 @@ class TestCuda(TestCase):
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
self.assertEqual(list(gpu_tensor), [1])
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_caching_pinned_memory_multi_gpu(self):
# checks that the events preventing pinned memory from being re-used
# too early are recorded on the correct GPU
cycles_per_ms = get_cycles_per_ms()
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
t = torch.FloatTensor([1]).pin_memory()
ptr = t.data_ptr()
gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
name_inner, gpu_tensor.__class__.__name__))
continue
with torch.cuda.device(1):
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
gpu_tensor1.copy_(t, async=True)
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
del t
t = torch.FloatTensor([2]).pin_memory()
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
with torch.cuda.device(0):
gpu_tensor0.copy_(t, async=True)
self.assertEqual(gpu_tensor1[0], 1)
self.assertEqual(gpu_tensor0[0], 2)
if HAS_CUDA:
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
name_inner, gpu_tensor.__class__.__name__))
continue
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
if __name__ == '__main__':
run_tests()

View File

@ -4,7 +4,7 @@ import torch
import traceback
import unittest
from torch.utils.data import Dataset, TensorDataset, DataLoader
from common import TestCase, run_tests
from common import TestCase, run_tests, TEST_NUMPY
from common_nn import TEST_CUDA
@ -27,8 +27,8 @@ class TestTensorDataset(TestCase):
l = torch.randn(15)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i:i + 1], source[i][0])
self.assertEqual(l[i:i + 1], source[i][1])
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
class ErrorDataset(Dataset):
@ -52,7 +52,7 @@ class TestDataLoader(TestCase):
for i, (sample, target) in enumerate(loader):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx + batch_size])
self.assertEqual(target, self.labels[idx:idx + batch_size].view(-1, 1))
self.assertEqual(target, self.labels[idx:idx + batch_size])
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
def _test_shuffle(self, loader):
@ -66,7 +66,7 @@ class TestDataLoader(TestCase):
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
self.assertEqual(target, self.labels[data_point_idx])
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
@ -123,6 +123,22 @@ class TestDataLoader(TestCase):
self.assertTrue(input.is_pinned())
self.assertTrue(target.is_pinned())
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
def test_numpy(self):
import numpy as np
class TestDataset(torch.utils.data.Dataset):
def __getitem__(self, i):
return np.ones((2, 3, 4)) * i
def __len__(self):
return 1000
loader = DataLoader(TestDataset(), batch_size=12)
batch = next(iter(loader))
self.assertIsInstance(batch, torch.DoubleTensor)
self.assertEqual(batch.size(), torch.Size([12, 2, 3, 4]))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))

View File

@ -1154,6 +1154,15 @@ class TestNN(NNTestCase):
module.__repr__()
str(module)
def test_accUpdateGradParameters(self):
module = nn.LookupTable(5, 3)
module.weight.fill_(2)
input = torch.LongTensor([1, 3])
output = module.updateOutput(input)
module.backwardUpdate(input, output, 0.1)
self.assertEqual(module.weight[0, 0], 2)
self.assertEqual(module.weight[3, 0], 1.8)
def _build_net(self):
return (nn.Sequential()
.add(nn.Concat(0)

View File

@ -19,6 +19,7 @@ HAS_SHM_FILES = os.path.isdir('/dev/shm')
TEST_CUDA_IPC = torch.cuda.is_available() and \
sys.version_info[0] == 3 and \
sys.platform != 'darwin'
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
def simple_fill(queue, event):
@ -79,9 +80,8 @@ def autograd_sharing(queue, ready, master_modified):
is_ok = var.data.equal(expected_var)
var.data[:] = torch.ones(5, 5)
if var.grad is not None:
is_ok &= var.grad.data.equal(torch.ones(5, 5) * 4)
var.grad.data[:] = torch.ones(5, 5)
is_ok &= var.grad is None
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
queue.put(is_ok)
@ -289,6 +289,7 @@ class TestMultiprocessing(TestCase):
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
def test_cuda_small_tensors(self):
# Check multiple small tensors which will likely use the same
# underlying cached allocation
@ -357,20 +358,19 @@ class TestMultiprocessing(TestCase):
queue = mp.Queue()
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
p.start()
var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
queue.put(var)
ready.wait()
var.data[0, 0] = 1000
if var.grad is not None:
var.grad.data[:] = torch.ones(5, 5) * 4
var.grad.data[:] = torch.ones(5, 5) * 4
master_modified.set()
worker_ok = queue.get()
self.assertTrue(worker_ok)
self.assertEqual(var.data, torch.ones(5, 5))
if var.grad is not None:
self.assertEqual(var.grad.data, torch.ones(5, 5))
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
p.join()
def test_variable_sharing(self):

View File

@ -6,12 +6,10 @@ import torch.cuda
from common import TestCase, run_tests
if not torch.cuda.is_available():
print('CUDA not available, skipping tests')
import sys
sys.exit()
nGPUs = torch.cuda.device_count()
if nGPUs == 0:
print('CUDA not available, skipping tests')
TestCase = object # noqa: F811
class TestNCCL(TestCase):

File diff suppressed because it is too large Load Diff

View File

@ -14,13 +14,15 @@ class TestSparse(TestCase):
@staticmethod
def _gen_sparse(d, nnz, with_size):
v = torch.randn(nnz)
if isinstance(with_size, Number):
v = torch.randn(nnz)
i = (torch.rand(d, nnz) * with_size).type(torch.LongTensor)
x = SparseTensor(i, v)
else:
v_size = [nnz] + list(with_size[d:])
v = torch.randn(*v_size)
i = torch.rand(d, nnz) * \
torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1)
torch.Tensor(with_size[:d]).repeat(nnz, 1).transpose(0, 1)
i = i.type(torch.LongTensor)
x = SparseTensor(i, v, torch.Size(with_size))
@ -73,6 +75,33 @@ class TestSparse(TestCase):
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_to_dense_hybrid(self):
i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
])
v = torch.Tensor([[2, 3], [1, 2], [3, 4], [4, 5]])
x = SparseTensor(i, v, torch.Size([3, 4, 2]))
res = torch.Tensor([
[[2, 3],
[0, 0],
[0, 0],
[0, 0]],
[[1, 2],
[0, 0],
[0, 0],
[0, 0]],
[[3, 4],
[0, 0],
[0, 0],
[4, 5]],
])
x.to_dense() # Tests double to_dense for memory corruption
x.to_dense()
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_contig(self):
i = torch.LongTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
@ -126,6 +155,65 @@ class TestSparse(TestCase):
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_contig_hybrid(self):
i = torch.LongTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
])
v = torch.Tensor([
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
])
x = SparseTensor(i, v, torch.Size([100, 100, 2]))
exp_i = torch.LongTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
])
exp_v = torch.Tensor([
[2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
[3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
i = torch.LongTensor([
[2, 0, 2, 1],
[0, 0, 3, 0],
[1, 0, 4, 0],
])
v = torch.Tensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
exp_v = torch.Tensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
# Duplicate indices
i = torch.LongTensor([
[0, 0, 2, 0],
[0, 0, 3, 0],
[0, 0, 4, 0],
])
v = torch.Tensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = torch.LongTensor([
[0, 2],
[0, 3],
[0, 4],
])
exp_v = torch.Tensor([[6, 4, 5], [4, 3, 4]])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_transpose(self):
x = self._gen_sparse(4, 20, 5)[0]
y = x.to_dense()
@ -187,33 +275,97 @@ class TestSparse(TestCase):
test_shape(1000, 100, 100)
test_shape(3000, 64, 300)
def _test_spadd_shape(self, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x, _, _ = self._gen_sparse(len(shape_i), 10, shape)
y = torch.randn(*shape)
r = random.random()
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
self.assertEqual(res, expected)
# Non contiguous dense tensor
s = list(shape)
s[0] = shape[-1]
s[-1] = shape[0]
y = torch.randn(*s).transpose_(0, len(s) - 1)
r = random.random()
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
self.assertEqual(res, expected)
def test_spadd(self):
def test_shape(*shape):
x, _, _ = self._gen_sparse(len(shape), 10, shape)
y = torch.randn(*shape)
r = random.random()
self._test_spadd_shape([5, 6])
self._test_spadd_shape([10, 10, 10])
self._test_spadd_shape([50, 30, 20])
self._test_spadd_shape([5, 5, 5, 5, 5, 5])
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
def test_spadd_hybrid(self):
self._test_spadd_shape([5, 6], [2, 3])
self._test_spadd_shape([10, 10, 10], [3])
self._test_spadd_shape([50, 30, 20], [2])
self._test_spadd_shape([5, 5, 5, 5, 5, 5], [2])
self.assertEqual(res, expected)
def _test_basic_ops_shape(self, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape)
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape)
# Non contiguous dense tensor
s = list(shape)
s[0] = shape[-1]
s[-1] = shape[0]
y = torch.randn(*s).transpose_(0, len(s) - 1)
r = random.random()
y1 = x1 + x2
y2 = x1.clone()
y2.add_(x2)
expected = x1.to_dense() + x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
y1 = x1 - x2
y2 = x1.clone()
y2.sub_(x2)
expected = x1.to_dense() - x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
self.assertEqual(res, expected)
y1 = x1 * x2
y2 = x1.clone()
y2.mul_(x2)
expected = x1.to_dense() * x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
test_shape(5, 6)
test_shape(10, 10, 10)
test_shape(50, 30, 20)
test_shape(5, 5, 5, 5, 5, 5)
y1 = x1 * 37.5
y2 = x1.clone()
y2.mul_(37.5)
expected = x1.to_dense() * 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 / 37.5
y2 = x1.clone()
y2.div_(37.5)
expected = x1.to_dense() / 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y = x1.clone()
y.zero_()
expected = torch.zeros(x1.size())
self.assertEqual(y.to_dense(), expected)
def test_basic_ops(self):
self._test_basic_ops_shape([5, 6])
self._test_basic_ops_shape([10, 10, 10])
self._test_basic_ops_shape([50, 30, 20])
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5])
def test_basic_ops_hybrid(self):
self._test_basic_ops_shape([5, 6], [2, 3])
self._test_basic_ops_shape([10, 10, 10], [3])
self._test_basic_ops_shape([50, 30, 20], [2])
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5], [2])
if __name__ == '__main__':

View File

@ -1,4 +1,5 @@
import sys
import os
import math
import random
import torch
@ -6,9 +7,8 @@ import torch.cuda
import tempfile
import unittest
import warnings
from itertools import product, chain
from functools import wraps
from common import TestCase, iter_indices, TEST_NUMPY, run_tests
from itertools import product, combinations
from common import TestCase, iter_indices, TEST_NUMPY, run_tests, download_file, skipIfNoLapack
if TEST_NUMPY:
import numpy as np
@ -16,18 +16,6 @@ if TEST_NUMPY:
SIZE = 100
def skipIfNoLapack(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as e:
if 'Lapack library not found' in e.args[0]:
raise unittest.SkipTest('Compiled without Lapack')
raise
return wrapper
class TestTorch(TestCase):
def test_dot(self):
@ -797,9 +785,11 @@ class TestTorch(TestCase):
def assertIsOrdered(self, order, x, mxx, ixx, task):
SIZE = 4
if order == 'descending':
check_order = lambda a, b: a >= b
def check_order(a, b):
return a >= b
elif order == 'ascending':
check_order = lambda a, b: a <= b
def check_order(a, b):
return a <= b
else:
error('unknown order "{}", must be "ascending" or "descending"'.format(order))
@ -1652,7 +1642,7 @@ class TestTorch(TestCase):
self._test_conv_corr_eq(lambda x, k: torch.xcorr3(x, k), reference)
@unittest.skip("Not implemented yet")
def test_xcorr3_xcorr2_eq(self):
def test_xcorr3_xcorr2_eq_full(self):
def reference(x, k, o3, o32):
for i in range(x.size(1)):
for j in range(k.size(1)):
@ -1660,7 +1650,7 @@ class TestTorch(TestCase):
self._test_conv_corr_eq(lambda x, k: torch.xcorr3(x, k, 'F'), reference)
@unittest.skip("Not implemented yet")
def test_conv3_conv2_eq(self):
def test_conv3_conv2_eq_valid(self):
def reference(x, k, o3, o32):
for i in range(o3.size(1)):
for j in range(k.size(1)):
@ -1867,7 +1857,7 @@ class TestTorch(TestCase):
self.assertEqual(reference[2, 2, 2], 27, 0)
self.assertEqual(reference[:], self._consecutive((3, 3, 3)), 0)
# Check Ellipsis
# indexing with Ellipsis
self.assertEqual(reference[..., 2], torch.Tensor([[3, 6, 9],
[12, 15, 18],
[21, 24, 27]]), 0)
@ -1879,18 +1869,61 @@ class TestTorch(TestCase):
self.assertEqual(reference[2, ..., 2, 2], 27, 0)
self.assertEqual(reference[2, 2, ..., 2], 27, 0)
self.assertEqual(reference[2, 2, 2, ...], 27, 0)
# LongTensor indexing
reference = self._consecutive((5, 5, 5))
idx = torch.LongTensor([2, 4])
self.assertEqual(reference[idx], torch.stack([reference[2], reference[4]]))
self.assertEqual(reference[2, idx], torch.stack([reference[2, 2], reference[2, 4]]))
self.assertEqual(reference[3, idx, 1], torch.stack([reference[3, 2], reference[3, 4]])[:, 1])
self.assertEqual(reference[...], reference, 0)
reference_5d = self._consecutive((3, 3, 3, 3, 3))
self.assertEqual(reference_5d[..., 1, 0], reference_5d[:, :, :, 1, 0], 0)
self.assertEqual(reference_5d[2, ..., 1, 0], reference_5d[2, :, :, 1, 0], 0)
self.assertEqual(reference_5d[2, 1, 0, ..., 1], reference_5d[2, 1, 0, :, 1], 0)
self.assertEqual(reference_5d[...], reference_5d, 0)
# LongTensor indexing
reference = self._consecutive((5, 5, 5))
idx = torch.LongTensor([2, 4])
self.assertEqual(reference[idx], torch.stack([reference[2], reference[4]]))
# TODO: enable one indexing is implemented like in numpy
# self.assertEqual(reference[2, idx], torch.stack([reference[2, 2], reference[2, 4]]))
# self.assertEqual(reference[3, idx, 1], torch.stack([reference[3, 2], reference[3, 4]])[:, 1])
# None indexing
self.assertEqual(reference[2, None], reference[2].unsqueeze(0))
self.assertEqual(reference[2, None, None], reference[2].unsqueeze(0).unsqueeze(0))
self.assertEqual(reference[2:4, None], reference[2:4].unsqueeze(1))
self.assertEqual(reference[None, 2, None, None], reference.unsqueeze(0)[:, 2].unsqueeze(0).unsqueeze(0))
self.assertEqual(reference[None, 2:5, None, None], reference.unsqueeze(0)[:, 2:5].unsqueeze(2).unsqueeze(2))
# indexing with step
reference = self._consecutive((10, 10, 10))
self.assertEqual(reference[1:5:2], torch.stack([reference[1], reference[3]], 0))
self.assertEqual(reference[1:6:2], torch.stack([reference[1], reference[3], reference[5]], 0))
self.assertEqual(reference[1:9:4], torch.stack([reference[1], reference[5]], 0))
self.assertEqual(reference[2:4, 1:5:2], torch.stack([reference[2:4, 1], reference[2:4, 3]], 1))
self.assertEqual(reference[3, 1:6:2], torch.stack([reference[3, 1], reference[3, 3], reference[3, 5]], 0))
self.assertEqual(reference[None, 2, 1:9:4], torch.stack([reference[2, 1], reference[2, 5]], 0).unsqueeze(0))
self.assertEqual(reference[:, 2, 1:6:2],
torch.stack([reference[:, 2, 1], reference[:, 2, 3], reference[:, 2, 5]], 1))
lst = [list(range(i, i + 10)) for i in range(0, 100, 10)]
tensor = torch.DoubleTensor(lst)
for i in range(100):
idx1_start = random.randrange(10)
idx1_end = idx1_start + random.randrange(1, 10 - idx1_start + 1)
idx1_step = random.randrange(1, 8)
idx1 = slice(idx1_start, idx1_end, idx1_step)
if random.randrange(2) == 0:
idx2_start = random.randrange(10)
idx2_end = idx2_start + random.randrange(1, 10 - idx2_start + 1)
idx2_step = random.randrange(1, 8)
idx2 = slice(idx2_start, idx2_end, idx2_step)
lst_indexed = list(map(lambda l: l[idx2], lst[idx1]))
tensor_indexed = tensor[idx1, idx2]
else:
lst_indexed = lst[idx1]
tensor_indexed = tensor[idx1]
self.assertEqual(torch.DoubleTensor(lst_indexed), tensor_indexed)
self.assertRaises(ValueError, lambda: reference[1:9:0])
self.assertRaises(ValueError, lambda: reference[1:9:-1])
self.assertRaises(IndexError, lambda: reference[1, 1, 1, 1])
self.assertRaises(IndexError, lambda: reference[1, 1, 1, 1:1])
@ -1920,6 +1953,7 @@ class TestTorch(TestCase):
checkPartialAssign((0, 1))
checkPartialAssign((1, 2))
checkPartialAssign((0, 2))
checkPartialAssign(torch.LongTensor((0, 2)))
with self.assertRaises(IndexError):
reference[1, 1, 1, 1] = 1
@ -1940,10 +1974,8 @@ class TestTorch(TestCase):
with self.assertRaises(TypeError):
reference[0.0, :, 0.0] = 1
# LongTensor assignments are not supported yet
with self.assertRaises(RuntimeError):
reference[torch.LongTensor([2, 4])] = 1
with self.assertRaises(RuntimeError):
# LongTensor assignments are not fully supported yet
with self.assertRaises(TypeError):
reference[0, torch.LongTensor([2, 4])] = 1
def test_index_copy(self):
@ -2152,15 +2184,35 @@ class TestTorch(TestCase):
self.assertEqual((tensor_view - tensor).abs().max(), 0)
self.assertEqual(empty.view_as(empty), empty)
self.assertEqual(empty.view(0), empty)
self.assertRaises(RuntimeError, lambda: tensor.view(15, 0))
self.assertRaises(RuntimeError, lambda: tensor.view(7, -1))
self.assertRaises(RuntimeError, lambda: tensor.view(15, -1, -1))
def test_expand(self):
result = torch.Tensor()
tensor = torch.rand(8, 1)
template = torch.rand(8, 5)
tensor = torch.rand(1, 8, 1)
tensor2 = torch.rand(5)
template = torch.rand(4, 8, 5)
target = template.size()
self.assertEqual(tensor.expand_as(template).size(), target)
self.assertEqual(tensor.expand(8, 5).size(), target)
self.assertEqual(tensor.expand(torch.Size([8, 5])).size(), target)
self.assertEqual(tensor.expand(4, 8, 5).size(), target)
self.assertEqual(tensor.expand(target).size(), target)
self.assertEqual(tensor2.expand_as(template).size(), target)
self.assertEqual(tensor2.expand(4, 8, 5).size(), target)
self.assertEqual(tensor2.expand(target).size(), target)
# test double expand
self.assertEqual(tensor2.expand(1, 5).expand(2, 2, 5), tensor2.repeat(2, 2, 1))
# test non-contiguous
noncontig = torch.randn(5, 2, 1, 3)[:, 0]
assert not noncontig.is_contiguous()
self.assertEqual(noncontig.expand(2, 5, 4, 3), noncontig.contiguous().repeat(2, 1, 4, 1))
# make sure it's compatible with unsqueeze
expanded = tensor2.expand(1, 1, 5)
unsqueezed = tensor2.unsqueeze(0).unsqueeze(1)
self.assertEqual(expanded, unsqueezed)
self.assertEqual(expanded.stride(), unsqueezed.stride())
def test_repeat(self):
result = torch.Tensor()
@ -2425,7 +2477,9 @@ class TestTorch(TestCase):
a_clone = a.clone()
b = copy(a)
b.fill_(1)
self.assertEqual(a, a_clone)
# copy is a shallow copy, only copies the tensor view,
# not the data
self.assertEqual(a, b)
def test_pickle(self):
if sys.version_info[0] == 2:
@ -2497,6 +2551,11 @@ class TestTorch(TestCase):
b = [a[i % 2] for i in range(4)]
b += [a[0].storage()]
b += [a[0].storage()[1:4]]
b += [torch.range(1, 10).int()]
t1 = torch.FloatTensor().set_(a[0].storage()[1:4], 0, (3,), (1,))
t2 = torch.FloatTensor().set_(a[0].storage()[1:4], 0, (3,), (1,))
b += [(t1.storage(), t1.storage(), t2.storage())]
b += [a[0].storage()[0:2]]
for use_name in (False, True):
with tempfile.NamedTemporaryFile() as f:
handle = f if not use_name else f.name
@ -2516,6 +2575,89 @@ class TestTorch(TestCase):
self.assertEqual(c[1], c[3], 0)
self.assertEqual(c[4], c[5][1:4], 0)
# check that serializing the same storage view object unpickles
# it as one object not two (and vice versa)
views = c[7]
self.assertEqual(views[0]._cdata, views[1]._cdata)
self.assertEqual(views[0], views[2])
self.assertNotEqual(views[0]._cdata, views[2]._cdata)
rootview = c[8]
self.assertEqual(rootview.data_ptr(), c[0].data_ptr())
def test_half_tensor(self):
x = torch.randn(5, 5).float()
y = torch.randn(5, 5).float()
xh, yh = x.half(), y.half()
self.assertEqual(x.half().float(), x, 1e-3)
z = torch.Tensor(5, 5)
self.assertEqual(z.copy_(xh), x, 1e-3)
with tempfile.NamedTemporaryFile() as f:
torch.save(xh, f)
f.seek(0)
xh2 = torch.load(f)
self.assertEqual(xh, xh2)
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_half_tensor_cuda(self):
x = torch.randn(5, 5).half()
self.assertEqual(x.cuda().cpu(), x)
xc = x.cuda()
with tempfile.NamedTemporaryFile() as f:
torch.save(xc, f)
f.seek(0)
xc2 = torch.load(f)
self.assertIsInstance(xc2, type(xc))
self.assertEqual(xc, xc2)
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_serialization_cuda(self):
device_count = torch.cuda.device_count()
t0 = torch.cuda.FloatTensor(5).fill_(1)
torch.cuda.set_device(device_count - 1)
tn = torch.cuda.FloatTensor(3).fill_(2)
torch.cuda.set_device(0)
b = (t0, tn)
with tempfile.NamedTemporaryFile() as f:
torch.save(b, f)
f.seek(0)
c = torch.load(f)
self.assertEqual(b, c, 0)
u0, un = c
self.assertEqual(u0.get_device(), 0)
self.assertEqual(un.get_device(), device_count - 1)
def test_serialization_backwards_compat(self):
a = [torch.range(1 + i, 25 + i).view(5, 5).float() for i in range(2)]
b = [a[i % 2] for i in range(4)]
b += [a[0].storage()]
b += [a[0].storage()[1:4]]
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_serialized.pt'
data_dir = os.path.join(os.path.dirname(__file__), 'data')
test_file_path = os.path.join(data_dir, 'legacy_serialized.pt')
succ = download_file(DATA_URL, test_file_path)
if not succ:
warnings.warn(("Couldn't download the test file for backwards compatibility! "
"Tests will be incomplete!"), RuntimeWarning)
return
c = torch.load(test_file_path)
self.assertEqual(b, c, 0)
self.assertTrue(isinstance(c[0], torch.FloatTensor))
self.assertTrue(isinstance(c[1], torch.FloatTensor))
self.assertTrue(isinstance(c[2], torch.FloatTensor))
self.assertTrue(isinstance(c[3], torch.FloatTensor))
self.assertTrue(isinstance(c[4], torch.FloatStorage))
c[0].fill_(10)
self.assertEqual(c[0], c[2], 0)
self.assertEqual(c[4], torch.FloatStorage(25).fill_(10), 0)
c[1].fill_(20)
self.assertEqual(c[1], c[3], 0)
self.assertEqual(c[4], c[5][1:4], 0)
def test_serialization_container(self):
def import_module(name, filename):
if sys.version_info >= (3, 5):
@ -2600,6 +2742,8 @@ class TestTorch(TestCase):
y = x.clone().unsqueeze_(2)
self.assertEqual(y, x.contiguous().view(2, 4, 1))
self.assertRaises(RuntimeError, lambda: torch.Tensor().unsqueeze(0))
def test_iter(self):
x = torch.randn(5, 5)
for i, sub in enumerate(x):
@ -2724,6 +2868,7 @@ class TestTorch(TestCase):
np.float,
np.int64,
np.int32,
np.int16,
np.uint8
]
for dtype in dtypes:
@ -2835,8 +2980,30 @@ class TestTorch(TestCase):
self.assertEqual(x[0], 1)
self.assertEqual(x[1], 2)
self.assertEqual(x[2], 3)
self.assertEqual(len(x), 3)
self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3)))
self.assertIsInstance(x * 2, torch.Size)
self.assertIsInstance(x[:-1], torch.Size)
self.assertIsInstance(x + x, torch.Size)
def test_transpose_neg(self):
x = torch.randn(10, 20, 30)
ndim = 3
for i, j in combinations(range(ndim), 2):
a = x.transpose(i, j)
b = x.transpose(i - ndim, j - ndim)
self.assertEqual(a, b)
a = torch.transpose(x, i, j)
b = torch.transpose(x, i - ndim, j - ndim)
self.assertEqual(a, b)
a = x.clone()
x.transpose_(i, j)
x.transpose_(i - ndim, j - ndim)
self.assertEqual(a, x)
if __name__ == '__main__':
run_tests()

View File

@ -6,7 +6,6 @@ import shutil
import random
import tempfile
import unittest
import sys
import traceback
import torch
import torch.cuda
@ -19,7 +18,7 @@ from torch.utils.serialization import load_lua
HAS_CUDA = torch.cuda.is_available()
from common import TestCase, run_tests
from common import TestCase, run_tests, download_file
try:
import cffi
@ -296,35 +295,13 @@ class TestLuaReader(TestCase):
self.assertEqual(grad_input, test['grad_input'])
return do_test
@classmethod
def _download_data(cls, test_file_path):
if os.path.exists(test_file_path):
return
print('Downloading test file for TestLuaReader.')
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
urllib = cls._get_urllib('request')
data = urllib.urlopen(DATA_URL, timeout=15).read()
with open(test_file_path, 'wb') as f:
f.write(data)
@staticmethod
def _get_urllib(submodule):
if sys.version_info < (3,):
import urllib2
return urllib2
else:
import urllib.error
import urllib.request
return getattr(urllib, submodule)
@classmethod
def init(cls):
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
data_dir = os.path.join(os.path.dirname(__file__), 'data')
test_file_path = os.path.join(data_dir, 'legacy_modules.t7')
urllib = cls._get_urllib('error')
try:
cls._download_data(test_file_path)
except urllib.URLError as e:
succ = download_file(DATA_URL, test_file_path)
if not succ:
warnings.warn(("Couldn't download the test file for TestLuaReader! "
"Tests will be incomplete!"), RuntimeWarning)
return

View File

@ -65,7 +65,7 @@ void $name($args)
'THCTensor*': 'thpp::Tensor*',
'THIndexTensor*': 'thpp::Tensor*',
'THIndex_t': 'long',
'real': 'double',
'accreal': 'double',
}
def __init__(self, header=False):

View File

@ -53,9 +53,9 @@ class KwargsPlugin(CWrapPlugin):
name not in seen_args):
seen_args.add(name)
args.append(name)
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args])
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(a) for a in args])
lookups = '\n '.join(
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args])
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=a) for a in args])
start_idx = code.find('{') + 1
new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups)
return code[:start_idx] + new_code + code[start_idx:]

View File

@ -18,6 +18,7 @@ class THPPlugin(CWrapPlugin):
'THCudaTensor*': Template('((THCPFloatTensor*)$arg)->cdata'),
'THCudaDoubleTensor*': Template('((THCPDoubleTensor*)$arg)->cdata'),
'THCudaLongTensor*': Template('((THCPLongTensor*)$arg)->cdata'),
'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'),
'THSDoubleTensor*': Template('((THSPDoubleTensor*)$arg)->cdata'),
@ -53,6 +54,7 @@ class THPPlugin(CWrapPlugin):
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THCudaDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPDoubleTensorClass'),
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
'THSDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPDoubleTensorClass'),
'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'),
@ -84,6 +86,7 @@ class THPPlugin(CWrapPlugin):
'THSTensor*': Template('return THSPTensor_(New)($result);'),
'THLongTensor*': Template('return THPLongTensor_New($result);'),
'THLongStorage*': Template('return THPLongStorage_New($result);'),
'THCudaLongTensor*': Template('return THCPLongTensor_New($result);'),
# TODO: make it smarter - it should return python long if result doesn't fit into an int
'long': Template('return PyInt_FromLong($result);'),
'accreal': Template('return THPUtils_(newAccreal)($result);'),
@ -167,6 +170,7 @@ ${cpu}
'THDoubleTensor*': '" THPModuleStr "DoubleTensor',
'THCudaTensor*': 'torch.cuda.FloatTensor',
'THCudaDoubleTensor*': 'torch.cuda.DoubleTensor',
'THCudaLongTensor*': 'torch.cuda.LongTensor',
'THSize*': 'torch.Size',
'THStride*': 'tuple',
'long': 'int',
@ -303,8 +307,6 @@ ${cpu}
def process_declarations(self, declarations):
new_declarations = []
register_only = [d for d in declarations if d.get('only_register', False)]
declarations = [d for d in declarations if not d.get('only_register', False)]
def has_arg_type(declaration, type_name):
return any(arg['type'] == type_name
@ -322,8 +324,16 @@ ${cpu}
for arg in option['arguments'])
for declaration in declarations:
# Disable all methods for THHalfTensor, unless cpu_half is True
if not declaration.get('cpu_half', False):
defined_if = '!defined(TH_REAL_IS_HALF)'
if 'defined_if' in declaration:
defined_if += ' && (' + declaration['defined_if'] + ')'
declaration['defined_if'] = defined_if
if declaration.get('only_register', False):
continue
declaration.setdefault('python_name', declaration['name'])
declaration.setdefault('variables', [])
if has_arg_type(declaration, 'THSize*'):
@ -353,7 +363,9 @@ ${cpu}
if arg['name'] == 'self':
arg['ignore_check'] = True
declarations = [d for d in declarations if not d.get('only_stateless', False)]
register_only = [d for d in declarations if d.get('only_register', False)]
declarations = [d for d in declarations
if (not d.get('only_stateless', False)) and (not d.get('only_register', False))]
self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only))
self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only))
@ -390,11 +402,14 @@ ${cpu}
if 'defined_if' in declaration:
entry = self.preprocessor_guard(entry, declaration['defined_if'])
tensor_methods += entry
return self.TENSOR_METHODS_DECLARATION.substitute(
generated = self.TENSOR_METHODS_DECLARATION.substitute(
methods=tensor_methods,
stateless=('' if not stateless else 'stateless_'),
sparse=('' if not sparse else 'S'),
)
if sparse:
generated = '#ifndef TH_REAL_IS_HALF\n' + generated + '\n#endif\n\n'
return generated
def process_full_file(self, code):
# We have to find a place before all undefs

View File

@ -1 +1,2 @@
from .generate_wrappers import generate_wrappers, wrap_function, import_module
from .generate_wrappers import generate_wrappers, wrap_function, \
import_module, wrap_generic_function

View File

@ -52,22 +52,27 @@ TYPE_TRANSFORMS = {
'Float': {
'THTensor*': 'THFloatTensor*',
'real': 'float',
'accreal': 'double',
},
'Double': {
'THTensor*': 'THDoubleTensor*',
'real': 'double',
'accreal': 'double',
},
'CudaHalf': {
'THCTensor*': 'THCudaHalfTensor*',
'real': 'half',
'accreal': 'float',
},
'Cuda': {
'THCTensor*': 'THCudaTensor*',
'real': 'float',
'accreal': 'float',
},
'CudaDouble': {
'THCTensor*': 'THCudaDoubleTensor*',
'real': 'double',
'accreal': 'double',
},
}
for t, transforms in TYPE_TRANSFORMS.items():

View File

@ -10,6 +10,7 @@ on an NVIDIA GPU with compute capability >= 2.0.
import sys
from ._utils import _import_dotted_name
from .version import __version__
__all__ = [
'typename', 'is_tensor', 'is_storage', 'set_default_tensor_type',
@ -30,6 +31,13 @@ __all__ = [
# automatically filled by the dynamic loader.
import os as _dl_flags
# if we have numpy, it *must* be imported before the call to setdlopenflags()
# or there is risk that later c modules will segfault when importing numpy
try:
import numpy as np
except:
pass
# first check if the os package has the required flags
if not hasattr(_dl_flags, 'RTLD_GLOBAL') or not hasattr(_dl_flags, 'RTLD_NOW'):
try:
@ -75,10 +83,20 @@ def typename(o):
def is_tensor(obj):
r"""Returns True if `obj` is a pytorch tensor.
Args:
obj (Object): Object to test
"""
return obj.__class__ in _tensor_classes
def is_storage(obj):
r"""Returns True if `obj` is a pytorch storage object.
Args:
obj (Object): Object to test
"""
return obj.__class__ in _storage_classes
@ -140,6 +158,10 @@ class FloatStorage(_C.FloatStorageBase, _StorageBase):
pass
class HalfStorage(_C.HalfStorageBase, _StorageBase):
pass
class LongStorage(_C.LongStorageBase, _StorageBase):
pass
@ -180,6 +202,16 @@ class FloatTensor(_C.FloatTensorBase, _TensorBase):
return FloatStorage
class HalfTensor(_C.HalfTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return HalfStorage
class LongTensor(_C.LongTensorBase, _TensorBase):
def is_signed(self):

View File

@ -1632,6 +1632,20 @@ Fills this tensor with numbers sampled from the uniform distribution:
P(x) = \dfrac{1}{to - from}
""")
add_docstr(torch._C.FloatTensorBase.unsqueeze,
"""
unsqueeze(dim)
See :func:`torch.unsqueeze`
""")
add_docstr(torch._C.FloatTensorBase.unsqueeze_,
"""
unsqueeze_(dim)
In-place version of :meth:`~Tensor.unsqueeze`
""")
add_docstr(torch._C.FloatTensorBase.var,
"""
var() -> float
@ -1639,6 +1653,31 @@ var() -> float
See :func:`torch.var`
""")
add_docstr(torch._C.FloatTensorBase.view,
"""
view(*args) -> Tensor
Returns a new tensor with the same data but different size.
The returned tensor shares the same data and must have the same number
of elements, but may have a different size. A tensor must be
:func:`contiguous` to be viewed.
Args:
args (torch.Size or int...): Desired size
Example:
>>> x = torch.randn(4, 4)
>>> x.size()
torch.Size([4, 4])
>>> y = x.view(16)
>>> y.size()
torch.Size([16])
>>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions
>>> z.size()
torch.Size([2, 8])
""")
add_docstr(torch._C.FloatTensorBase.zero_,
"""
zero_()

View File

@ -58,7 +58,10 @@ for t in ['Float', 'Double']:
type2backend.backends['torch.{}Tensor'.format(t)] = backend
type2backend.backends[getattr(torch, '{}Tensor'.format(t))] = backend
backend = Backend('Cuda', 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
type2backend.backends['THNNCudaBackend'] = backend
type2backend.backends['torch.cuda.FloatTensor'] = backend
type2backend.backends[torch.cuda.FloatTensor] = backend
for t in ['Half', '', 'Double']:
backend = Backend('Cuda' + t, 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
type2backend.backends['THNNCuda{}Backend'.format(t)] = backend
py_name = 'Float' if t == '' else t
type2backend.backends['torch.cuda.{}Tensor'.format(py_name)] = backend
type2backend.backends[getattr(torch.cuda, '{}Tensor'.format(py_name))] = backend

View File

@ -3621,7 +3621,6 @@ Example::
>>> y = torch.squeeze(x, 1)
>>> y.size()
(2L, 2L, 1L, 2L)
""")
add_docstr(torch._C.std,
@ -3992,13 +3991,13 @@ Example::
>>> torch.topk(x, 3)
(
2
1
5
4
3
[torch.FloatTensor of size 3]
,
1
0
4
3
2
[torch.LongTensor of size 3]
)
@ -4214,6 +4213,33 @@ Example::
""")
add_docstr(torch._C.unsqueeze,
"""
unsqueeze(input, dim, out=None)
Returns a new tensor with a dimension of size one inserted at the
specified position.
The returned tensor shares the same underlying data with this tensor.
Args:
input (Tensor): the input `Tensor`
dim (int): The index at which to insert the singleton dimension
out (Tensor, optional): The result `Tensor`
Example:
>>> x = torch.Tensor([1, 2, 3, 4])
>>> torch.unsqueeze(x, 0)
1 2 3 4
[torch.FloatTensor of size 1x4]
>>> torch.unsqueeze(x, 1)
1
2
3
4
[torch.FloatTensor of size 4x1]
""")
add_docstr(torch._C.var,
"""
.. function:: var(input) -> float

View File

@ -21,6 +21,15 @@ def _type(self, new_type=None, async=False):
new_type = _import_dotted_name(new_type)
if new_type == type(self):
return self
if self.is_sparse:
if not new_type.is_sparse:
raise RuntimeError("Cannot cast sparse tensor to dense tensor")
new_type_name = new_type.__module__ + '.' + new_type.__name__
new_values_type_name = new_type_name.replace('.sparse', '')
new_values = self.values().type(new_values_type_name, async)
return new_type(self.indices(), new_values, self.size())
if new_type.is_sparse:
raise RuntimeError("Cannot cast dense tensor to sparse tensor")
return new_type(self.size()).copy_(self, async)
@ -39,16 +48,20 @@ def _cuda(self, device=None, async=False):
if self.is_cuda:
if device is None:
device = torch.cuda.current_device()
if self.get_device() != device:
with torch.cuda.device(device):
return type(self)(self.size()).copy_(self, async)
else:
if self.get_device() == device:
return self
else:
if device is None:
device = -1
with torch.cuda.device(device):
return self.type(getattr(torch.cuda, self.__class__.__name__), async)
with torch.cuda.device(device):
if self.is_sparse:
new_type = getattr(torch.cuda.sparse, self.__class__.__name__)
indices = self.indices().cuda(device, async)
values = self.values().cuda(device, async)
return new_type(indices, values, self.size())
else:
new_type = getattr(torch.cuda, self.__class__.__name__)
return new_type(self.size()).copy_(self, async)
def _range(*args, **kwargs):

View File

@ -9,6 +9,7 @@ import torch
from .variable import Variable
from .function import Function, NestedIOFunction
from .stochastic_function import StochasticFunction
from .gradcheck import gradcheck
__all__ = ['Variable', 'Function', 'StochasticFunction', 'backward']

View File

@ -3,9 +3,16 @@ from ..function import Function, InplaceFunction
import math
def maybe_view(tensor, size):
if tensor.size() == size:
return tensor
return tensor.contiguous().view(size)
class Add(InplaceFunction):
def forward(self, a, b):
self.b_size = b.size()
if self.inplace:
self.mark_dirty(a)
return a.add_(b)
@ -13,12 +20,13 @@ class Add(InplaceFunction):
return a.add(b)
def backward(self, grad_output):
return grad_output, grad_output
return grad_output, maybe_view(grad_output, self.b_size)
class Sub(InplaceFunction):
def forward(self, a, b):
self.b_size = b.size()
if self.inplace:
self.mark_dirty(a)
return a.sub_(b)
@ -26,40 +34,43 @@ class Sub(InplaceFunction):
return a.sub(b)
def backward(self, grad_output):
return grad_output, grad_output.neg()
return grad_output, maybe_view(grad_output.neg(), self.b_size)
class Mul(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.mul(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.mul(b), grad_output.mul(a)
return grad_output.mul(b), maybe_view(grad_output.mul(a), self.b_size)
class Div(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.div(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.div(b), grad_output.neg().mul(a).div_(b).div_(b)
return grad_output.div(b), maybe_view(grad_output.neg().mul(a).div_(b).div_(b), self.b_size)
class Pow(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.pow(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.mul(b).mul_(a.pow(b - 1)), grad_output.mul(a.pow(b)).mul_(a.log())
return grad_output.mul(b).mul_(a.pow(b - 1)), maybe_view(grad_output.mul(a.pow(b)).mul_(a.log()), self.b_size)
class AddConstant(InplaceFunction):

View File

@ -168,7 +168,7 @@ class Addr(_BlasBase):
if self.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output)
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
if self.beta != 1:
grad_vector2 *= self.beta

View File

@ -18,9 +18,8 @@ class Index(Function):
return result
def backward(self, grad_output):
# TODO: this won't have to be zeroed
grad_input = grad_output.new(self.input_size).zero_()
grad_input.index(self.index).copy_(grad_output)
grad_input._set_index(self.index, grad_output)
return grad_input
@ -99,7 +98,7 @@ class View(Function):
def backward(self, grad_output):
# TODO: not sure if this clone is necessary
return grad_output.clone().view(self.input_size)
return grad_output.contiguous().view(self.input_size)
class Expand(Function):
@ -110,10 +109,11 @@ class Expand(Function):
self.expanded_dims = []
def forward(self, i):
self.expanded_dims = [dim for dim, (expanded, original)
in enumerate(zip(self.sizes, i.size()))
if expanded != original]
result = i.expand(*self.sizes)
unsqueezed = (1,) * (len(self.sizes) - len(i.size()))
self.expanded_dims = [dim for dim, (expanded, original)
in enumerate(zip(self.sizes, unsqueezed + i.size()))
if expanded != original]
self.mark_shared_storage((i, result))
return result

View File

@ -2,7 +2,6 @@ import torch
import torch._C as _C
import torch.utils.hooks as hooks
from collections import OrderedDict
from itertools import chain
class Function(_C._FunctionBase):
@ -98,9 +97,9 @@ class Function(_C._FunctionBase):
**This should be called at most once, only from inside the**
:func:`forward` **method, and all arguments should be outputs.**
This will mark outputs as non requiring gradient, increasing the
This will mark outputs as not requiring gradients, increasing the
efficiency of backward computation. You still need to accept a gradient
for this output in :meth:`~Function.backward`, but it's always going to
for each output in :meth:`~Function.backward`, but it's always going to
be ``None``.
This is used e.g. for indices returned from a max :class:`Function`.
@ -204,11 +203,17 @@ class NestedIOFunction(Function):
nested_variables = _unflatten(flat_output, self._nested_output)
return nested_variables
def _do_backward(self, gradients, retain_variables):
self.retain_variables = retain_variables
result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
if not retain_variables:
del self._nested_output
del self._to_save_nested
return result
def backward(self, *gradients):
nested_gradients = _unflatten(gradients, self._nested_output)
del self._nested_output
result = self.backward_extended(*nested_gradients)
del self._to_save_nested
return tuple(_iter_None_tensors(result))
__call__ = _do_forward

149
torch/autograd/gradcheck.py Normal file
View File

@ -0,0 +1,149 @@
import torch
from torch.autograd import Variable
def iter_gradients(x):
if isinstance(x, Variable):
if x.requires_grad:
yield x.grad.data if x.grad is not None else None
else:
for elem in x:
for result in iter_gradients(elem):
yield result
def zero_gradients(i):
for t in iter_gradients(i):
if t is not None:
t.zero_()
def make_jacobian(input, num_out):
if isinstance(input, Variable) and not input.requires_grad:
return None
if torch.is_tensor(input) or isinstance(input, Variable):
return torch.zeros(input.nelement(), num_out)
else:
return type(input)(filter(lambda x: x is not None,
(make_jacobian(elem, num_out) for elem in input)))
def iter_tensors(x, only_requiring_grad=False):
if torch.is_tensor(x):
yield x
elif isinstance(x, Variable):
if x.requires_grad or not only_requiring_grad:
yield x.data
else:
for elem in x:
for result in iter_tensors(elem, only_requiring_grad):
yield result
def contiguous(input):
if torch.is_tensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous()
else:
return type(input)(contiguous(e) for e in input)
def get_numerical_jacobian(fn, input, target, eps=1e-3):
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target, True)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.DoubleTensor(output_size)
outb = torch.DoubleTensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nelement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - eps
outa.copy_(fn(input))
flat_tensor[i] = orig + eps
outb.copy_(fn(input))
flat_tensor[i] = orig
outb.add_(-1, outa).div_(2 * eps)
d_tensor[i] = outb
return jacobian
def get_analytical_jacobian(input, output):
jacobian = make_jacobian(input, output.numel())
grad_output = output.data.clone().zero_()
flat_grad_output = grad_output.view(-1)
for i in range(flat_grad_output.numel()):
flat_grad_output.zero_()
flat_grad_output[i] = 1
zero_gradients(input)
output.backward(grad_output, retain_variables=True)
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
if d_x is None:
jacobian_x[:, i].zero_()
else:
jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
return jacobian
def _as_tuple(x):
if isinstance(x, tuple):
return x
elif isinstance(x, list):
return tuple(x)
else:
return x,
def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3):
"""Check gradients computed via small finite differences
against analytical gradients
The check between numerical and analytical has the same behaviour as
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
meaning it check that
absolute(a - n) <= (atol + rtol * absolute(n))
is true for all elements of analytical jacobian a and numerical jacobian n.
Args:
func: Python function that takes Variable inputs and returns
a tuple of Variables
inputs: tuple of Variables
eps: perturbation for finite differences
atol: absolute tolerance
rtol: relative tolerance
Returns:
True if all differences satisfy allclose condition
"""
output = func(*inputs)
output = _as_tuple(output)
for i, o in enumerate(output):
if not o.requires_grad:
continue
def fn(input):
return _as_tuple(func(*input))[i].data
numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
analytical = get_analytical_jacobian(_as_tuple(inputs), o)
for a, n in zip(analytical, numerical):
if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
return False
return True

View File

@ -1,3 +1,5 @@
import torch
from numbers import Number
from .function import Function
_NOT_PROVIDED = object()
@ -17,5 +19,26 @@ class StochasticFunction(Function):
self.reward = None
return result
def _do_forward(self, *inputs):
result = super(StochasticFunction, self)._do_forward(*inputs)
# save output type and size, to check the type of reward
assert isinstance(result, torch.autograd.Variable), \
"stochastic functions support only a single output at the moment"
self.reward_info = (type(inputs[0].data), result.size())
return result
__call__ = _do_forward
def _reinforce(self, reward):
is_number = isinstance(reward, Number)
if not is_number and type(reward) != self.reward_info[0]:
raise TypeError("mismatch between reward and output type: got {}, "
"but expected {}".format(torch.typename(reward),
torch.typename(self.reward_info[0])))
if not is_number and reward.size() != self.reward_info[1]:
raise ValueError("got reward of size {}, but expected a tensor of size {}".format(
'x'.join(map(str, reward.size())),
'x'.join(map(str, self.reward_info[1]))))
if self.reward is not _NOT_PROVIDED:
raise RuntimeError("you can only reinforce a stochastic Function once")
self.reward = reward

View File

@ -1,6 +1,7 @@
import sys
import torch._C as _C
from collections import OrderedDict
import torch.sparse as sparse
import torch.utils.hooks as hooks
from ._functions import *
@ -56,30 +57,6 @@ class Variable(_C._VariableBase):
'is_cuda',
}
@property
def grad(self):
if self.requires_grad and self._grad is None:
# TODO: this won't have to be zeroed in the future
self._grad = Variable(self.data.new(self.data.size()).zero_())
return self._grad
@property
def requires_grad(self):
return self._requires_grad
@requires_grad.setter
def requires_grad(self, value):
if self.creator is not None:
if value is False:
hint = (" If you want to use a computed variable in a subgraph "
"that doesn't require differentiation use "
"var_no_grad = var.detach().")
else:
hint = ''
raise RuntimeError("you can only change requires_grad flags of "
"leaf variables." + hint)
self._requires_grad = value
def __getattr__(self, name):
if name in self._fallthrough_methods:
return getattr(self.data, name)
@ -108,19 +85,30 @@ class Variable(_C._VariableBase):
if self.creator is not None:
raise RuntimeError("Only Variables created explicitly by the user "
"(graph leaves) support the deepcopy protocol at the moment")
result = type(self)(self.data.clone(), requires_grad=self.requires_grad,
volatile=self.volatile)
result = type(self)(self.data.clone())
result.requires_grad = self.requires_grad
result.volatile = self.volatile
memo[id(self)] = result
return result
def __reduce_ex__(self, proto):
state = (self.requires_grad, self.volatile, self._backward_hooks)
if proto > 1:
return super(Variable, self).__reduce_ex__(proto)
return type(self), (self.data,), state
if sys.version_info[0] == 2:
from copy_reg import __newobj__
else:
from copyreg import __newobj__
return __newobj__, (type(self),), self.__getstate__()
return __newobj__, (type(self), self.data), state
def __setstate__(self, state):
if len(state) == 5:
# legacy serialization of Variable
self.data = state[0]
state = (state[3], state[4], state[2])
if self.creator is not None:
raise RuntimeError('__setstate__ can be only called on leaf variables')
self.requires_grad, self.volatile, self._backward_hooks = state
def __repr__(self):
return 'Variable containing:' + self.data.__repr__()
@ -225,8 +213,25 @@ class Variable(_C._VariableBase):
self.creator._reinforce(reward)
def detach(self):
"""Detaches the Variable from the graph that created it."""
return NoGrad()(self)
"""Returns a new Variable, detached from the current graph.
Result will never require gradient. If the input is volatile, the output
will be volatile too.
.. note::
Returned Variable uses the same data tensor, as the original one, and
in-place modifications on either of them will be seen, and may trigger
errors in correctness checks.
"""
result = NoGrad()(self) # this is needed, because it merges version counters
result._creator = None
return result
def detach_(self):
"""Detaches the Variable from the graph that created it, making it a leaf."""
self._creator = None
self.requires_grad = False
def contiguous(self):
self.data = self.data.contiguous()
@ -426,12 +431,6 @@ class Variable(_C._VariableBase):
def trunc(self):
return Trunc()(self)
def floor(self):
return Floor()(self)
def ceil(self):
return Ceil()(self)
def fmod(self, value):
return Fmod(value)(self)
@ -487,9 +486,6 @@ class Variable(_C._VariableBase):
def split(self, split_size, dim=0):
return torch.split(self, split_size, dim)
def chunk(self, n_chunks, dim=0):
return torch.chunk(self, n_chunks, dim)
def repeat(self, *repeats):
if len(repeats) == 1 and isinstance(repeats[0], torch.Size):
repeats = repeats[0]

View File

@ -179,17 +179,19 @@ class TensorDescriptorArray(object):
def __getitem__(self, key):
return ctypes.c_void_p(self.ptrs[key])
def set(self, tensor):
self._type = tensor.type()
self._size = tensor.size()
self._stride = tensor.stride()
def set_all(self, tensor):
_type = _typemap[tensor.type()]
_ndim = tensor.dim()
_size = int_array(tensor.size())
_stride = int_array(tensor.stride())
for ptr in self.ptrs:
check_error(lib.cudnnSetTensorNdDescriptor(
ctypes.c_void_p(ptr), _typemap[tensor.type()], tensor.dim(),
int_array(tensor.size()), int_array(tensor.stride())))
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
def as_tuple(self):
return (self._type, tuple(self._size), tuple(self._stride))
def set_raw(self, i, _type, _ndim, _size, _stride):
ptr = self.ptrs[i]
check_error(lib.cudnnSetTensorNdDescriptor(
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
class ConvolutionDescriptor(object):
@ -241,24 +243,42 @@ class DropoutDescriptor(object):
def __init__(self, handle, dropout, seed):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
self.state = None
self.dropout = dropout
self.handle = handle
dropout_states_size = ctypes.c_long()
check_error(lib.cudnnDropoutGetStatesSize(
handle,
ctypes.byref(dropout_states_size)))
self._set(dropout, seed)
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
def set_dropout(self, dropout, seed):
if dropout != self.dropout:
self._set(dropout, seed)
def _set(self, dropout, seed):
if self.state is None and dropout > 0:
dropout_states_size = ctypes.c_long()
check_error(lib.cudnnDropoutGetStatesSize(
self.handle,
ctypes.byref(dropout_states_size)))
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
state_ptr = self.state.data_ptr()
state_size = self.state.size(0)
else:
state_ptr = None
state_size = 0
check_error(lib.cudnnSetDropoutDescriptor(
self,
handle,
self.handle,
ctypes.c_float(dropout),
ctypes.c_void_p(self.state.data_ptr()),
ctypes.c_size_t(self.state.size(0)),
ctypes.c_void_p(state_ptr),
ctypes.c_size_t(state_size),
ctypes.c_ulonglong(seed),
))
self.dropout = dropout
def __del__(self):
check_error(lib.cudnnDestroyDropoutDescriptor(self))
@ -368,17 +388,30 @@ def int_array(itr):
def descriptor(tensor, N=None):
padded_size = tensor.size() + ((1,) * (5 - tensor.dim()))
tensor = tensor.view(padded_size)
if N is not None:
descriptor = TensorDescriptorArray(N)
descriptor.set_all(tensor)
else:
descriptor = TensorDescriptor()
if tensor.dim() == 2:
tensor = tensor.view(tensor.size(0), tensor.size(1), 1, 1)
elif tensor.dim() == 3:
tensor = tensor.view(tensor.size(0), tensor.size(1), tensor.size(2), 1)
descriptor.set(tensor)
descriptor.set(tensor)
return descriptor
def descriptor_sequence(tensor, batch_sizes):
descriptors = TensorDescriptorArray(len(batch_sizes))
_type = _typemap[tensor.type()]
_ndim = 5
dim_pad = (1,) * (5 - tensor.dim())
_size = int_array(tensor.size() + dim_pad)
_stride = int_array(tensor.stride() + dim_pad)
for i, batch_size in enumerate(batch_sizes):
_size[0] = batch_size
descriptors.set_raw(i, _type, _ndim, _size, _stride)
return descriptors
_autotuner_forward = {}
_autotuner_backward_data = {}
_autotuner_backward_filter = {}

View File

@ -34,20 +34,20 @@ class Unserializable(object):
self.inner = None
def init_dropout_descriptor(fn, handle):
return cudnn.DropoutDescriptor(
handle,
fn.dropout,
fn.dropout_seed
)
def init_rnn_descriptor(fn, handle):
dropout_desc_name = 'desc_' + str(torch.cuda.current_device())
dropout_p = fn.dropout if fn.train else 0
if (dropout_desc_name not in fn.dropout_state) or (fn.dropout_state[dropout_desc_name].get() is None):
fn.dropout_state[dropout_desc_name] = Unserializable(
cudnn.DropoutDescriptor(handle, dropout_p, fn.dropout_seed)
)
dropout_desc = fn.dropout_state[dropout_desc_name].get()
dropout_desc.set_dropout(dropout_p, fn.dropout_seed)
return cudnn.RNNDescriptor(
handle,
fn.hidden_size,
fn.num_layers,
fn.dropout_state['desc'].get(),
dropout_desc,
fn.input_mode,
fn.bidirectional,
fn.mode,
@ -62,16 +62,22 @@ def init_weight_descriptor(fn, weight):
return w_desc
def _input_size(fn):
return (fn.seq_length, fn.mini_batch, fn.input_size)
def _input_size(fn, input):
if fn.batch_sizes is not None:
return (input.size(0), fn.input_size)
else:
return (fn.seq_length, fn.mini_batch, fn.input_size)
def _hidden_size(fn):
return (fn.num_layers * fn.num_directions, fn.mini_batch, fn.hidden_size)
def _output_size(fn):
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
def _output_size(fn, input):
if fn.batch_sizes is not None:
return (input.size(0), fn.hidden_size * fn.num_directions)
else:
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
def get_num_weights(handle, rnn_desc, x_desc, datatype):
@ -183,6 +189,7 @@ def forward(fn, input, hx, weight, output, hy):
lib = cudnn.lib
handle = cudnn.get_handle()
fn.datatype = cudnn._typemap[input.type()]
is_input_packed = fn.batch_sizes is not None
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
@ -190,22 +197,30 @@ def forward(fn, input, hx, weight, output, hy):
else:
cx, cy = None, None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
if input.dim() != 3:
if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
raise RuntimeError(
'input must have 3 dimensions, got {}'.format(input.dim()))
if fn.input_size != input.size(2):
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
fn.input_size
if fn.input_size != input.size(-1):
raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
fn.input_size, input.size(-1)
))
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
if is_input_packed:
fn.seq_length = len(fn.batch_sizes)
fn.mini_batch = fn.batch_sizes[0]
fn.input_size = input.size(-1)
else:
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
output_size = _output_size(fn, input)
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
output.resize_(*output_size)
hy.resize_(*hidden_size)
@ -214,13 +229,13 @@ def forward(fn, input, hx, weight, output, hy):
y = output
# init descriptors
if ('desc' not in fn.dropout_state) or (fn.dropout_state['desc'].get() is None):
fn.dropout_state['desc'] = Unserializable(
init_dropout_descriptor(fn, handle)
)
fn.rnn_desc = init_rnn_descriptor(fn, handle)
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
if is_input_packed:
fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
else:
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
fn.hx_desc = cudnn.descriptor(hx)
fn.hy_desc = cudnn.descriptor(hx)
fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
@ -229,7 +244,7 @@ def forward(fn, input, hx, weight, output, hy):
# create the weight buffer and copy the weights into it
num_weights = get_num_weights(
handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
fn.weight_buf = input.new(num_weights)
fn.weight_buf = x.new(num_weights)
fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
w = fn.weight_buf
# this zero might not seem necessary, but it is in the case
@ -255,7 +270,7 @@ def forward(fn, input, hx, weight, output, hy):
ctypes.byref(workspace_size)
))
fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
if fn.train:
if fn.requires_grad:
reserve_size = ctypes.c_long()
check_error(lib.cudnnGetRNNTrainingReserveSize(
handle,
@ -295,12 +310,13 @@ def forward(fn, input, hx, weight, output, hy):
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0)
))
if fn.batch_first:
output = output.transpose_(0, 1)
if fn.batch_first and not is_input_packed:
output.transpose_(0, 1)
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
with torch.cuda.device_of(input):
is_input_packed = fn.batch_sizes is not None
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
@ -310,15 +326,17 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
else:
cx, grad_cx, grad_cy = None, None, None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
grad_output = grad_output.transpose(0, 1)
output = output.transpose(0, 1)
input_size = _input_size(fn)
input_size = _input_size(fn, input)
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
output_size = _output_size(fn, input)
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
dy = grad_output.contiguous()
y = output
@ -331,12 +349,12 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if not fn.train:
raise RuntimeError('backward_grad can only be called when training!')
if not fn.requires_grad:
raise RuntimeError('backward_grad can only be called when the function requires grad!')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if tuple(output.size()) != _output_size(fn):
if tuple(output.size()) != output_size:
raise RuntimeError('Expected output size {}, got {}'.format(
output_size, output.size()))
if hx is not None and tuple(hx.size()) != hidden_size:
@ -351,6 +369,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
if dcy is not None and tuple(dcy.size()) != hidden_size:
raise RuntimeError('Expected d_cell size {}, got {}'.format(
hidden_size, dcy.size()))
if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
raise RuntimeError('Gradients aren\'t CUDA tensors')
check_error(cudnn.lib.cudnnRNNBackwardData(
handle,
@ -370,7 +390,7 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
))
if fn.batch_first:
if fn.batch_first and not is_input_packed:
grad_input = grad_input.transpose_(0, 1)
@ -389,30 +409,32 @@ def _num_linear_layers(fn):
def backward_weight(fn, input, hx, output, weight, grad_weight):
with torch.cuda.device_of(input):
is_input_packed = fn.batch_sizes is not None
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
else:
cx = None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
output = output.transpose(0, 1)
input_size = _input_size(fn)
input_size = _input_size(fn, input)
hidden_size = _hidden_size(fn)
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if not fn.requires_grad:
raise RuntimeError('backward_weight can only be called when the function requires grad!')
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected input size {}, got {}'.format(
hidden_size, hx.size()))
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
y = output
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()

181
torch/csrc/DynamicTypes.cpp Normal file
View File

@ -0,0 +1,181 @@
#include "DynamicTypes.h"
#include "THP.h"
#include <vector>
#include <unordered_map>
#include <THPP/tensors/THTensor.hpp>
#include <THPP/tensors/THSTensor.hpp>
#ifdef WITH_CUDA
#include <THC/THC.h>
#include <THCS/THCS.h>
#include <THPP/tensors/THCTensor.hpp>
#include <THPP/tensors/THCSTensor.hpp>
extern THCState* state;
#endif
using namespace thpp;
namespace torch {
struct TensorType {
Type data_type;
bool is_cuda;
bool is_sparse;
friend bool operator==(const TensorType &t1, const TensorType &t2)
{
return (t1.data_type == t2.data_type &&
t1.is_cuda == t2.is_cuda &&
t1.is_sparse == t2.is_sparse);
}
friend bool operator!=(const TensorType &t1, const TensorType &t2)
{
return !(t1 == t2);
}
};
struct TensorTypeHasher
{
std::size_t operator()(const TensorType& k) const
{
size_t hash = static_cast<size_t>(k.data_type);
hash = (hash << 8) + k.is_cuda;
hash = (hash << 1) + k.is_sparse;
return hash;
}
};
static std::unordered_map<std::string, Type> type_names = {
{"Float", Type::FLOAT},
{"Double", Type::DOUBLE},
{"Half", Type::HALF},
{"Byte", Type::UCHAR},
{"Char", Type::CHAR},
{"Short", Type::SHORT},
{"Int", Type::INT},
{"Long", Type::LONG},
};
static std::unordered_map<PyTypeObject*, TensorType> pytype_to_tensortype;
static std::unordered_map<TensorType, PyTypeObject*, TensorTypeHasher> tensortype_to_pytype;
void registerPyTypeObject(PyTypeObject *pytype, const std::string& name, bool is_cuda, bool is_sparse)
{
TensorType type;
type.data_type = type_names.at(name);
type.is_cuda = is_cuda;
type.is_sparse = is_sparse;
pytype_to_tensortype[pytype] = type;
tensortype_to_pytype[type] = pytype;
}
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor)
{
TensorType type;
type.data_type = tensor.type();
type.is_cuda = tensor.isCuda();
type.is_sparse = tensor.isSparse();
return tensortype_to_pytype.at(type);
}
static std::unique_ptr<Tensor> createTensor(void *tensor, Type type, bool is_cuda, bool is_sparse)
{
if (is_cuda) {
#ifdef WITH_CUDA
if (is_sparse) {
if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THCSTensor<unsigned char>(state, (THCSByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THCSTensor<char>(state, (THCSCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THCSTensor<short>(state, (THCSShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THCSTensor<int>(state, (THCSIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THCSTensor<long>(state, (THCSLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THCSTensor<float>(state, (THCSFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THCSTensor<double>(state, (THCSDoubleTensor*)tensor));
} else if (type == Type::HALF) {
return std::unique_ptr<Tensor>(new THCSTensor<half>(state, (THCSHalfTensor*)tensor));
}
} else if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THCTensor<unsigned char>(state, (THCudaByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THCTensor<char>(state, (THCudaCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THCTensor<short>(state, (THCudaShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THCTensor<int>(state, (THCudaIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THCTensor<long>(state, (THCudaLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THCTensor<float>(state, (THCudaTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THCTensor<double>(state, (THCudaDoubleTensor*)tensor));
} else if (type == Type::HALF) {
return std::unique_ptr<Tensor>(new THCTensor<half>(state, (THCudaHalfTensor*)tensor));
}
#else
throw std::runtime_error("Compiled without CUDA support");
#endif
} else if (is_sparse) {
if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THSTensor<unsigned char>((THSByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THSTensor<char>((THSCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THSTensor<short>((THSShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THSTensor<int>((THSIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THSTensor<long>((THSLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THSTensor<float>((THSFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THSTensor<double>((THSDoubleTensor*)tensor));
}
} else if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THTensor<unsigned char>((THByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THTensor<char>((THCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THTensor<short>((THShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THTensor<int>((THIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THTensor<long>((THLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THTensor<float>((THFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THTensor<double>((THDoubleTensor*)tensor));
}
throw std::invalid_argument("Unsupported tensor type");
}
std::unique_ptr<Tensor> createTensor(PyObject *data)
{
auto tensor_type = pytype_to_tensortype.at(Py_TYPE(data));
auto type = tensor_type.data_type;
auto tensor = ((THPVoidTensor *)data)->cdata;
auto wrapper = createTensor(tensor, type, tensor_type.is_cuda, tensor_type.is_sparse);
wrapper->retain();
return wrapper;
}
PyObject* createPyObject(const thpp::Tensor& tensor)
{
auto type = getPyTypeObject(tensor);
PyObject *obj = type->tp_alloc(type, 0);
if (obj) {
((THPVoidTensor*)obj)->cdata = (THVoidTensor *)const_cast<thpp::Tensor&>(tensor).retain().cdata();
}
return obj;
}
} // namespace

25
torch/csrc/DynamicTypes.h Normal file
View File

@ -0,0 +1,25 @@
#pragma once
// Provides conversions between Python tensor objects and thpp::Tensors.
#include <memory>
#include <Python.h>
#include <THPP/THPP.h>
namespace torch {
// Register a PyTypeObject* with the given attributes
void registerPyTypeObject(
PyTypeObject *pytype, const std::string& name,
bool is_cuda, bool is_sparse);
// Gets the PyTypeObject* corresponding to the Tensor
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor);
// Creates a Tensor from a Python tensor object
std::unique_ptr<thpp::Tensor> createTensor(PyObject *data);
// Creates Python tensor object from a Tensor
PyObject* createPyObject(const thpp::Tensor& tensor);
} // namespace torch

View File

@ -5,12 +5,16 @@
#include <stdexcept>
#include <string>
#include "THP.h"
// Throwing this exception means that the python error flags have been already
// set and control should be immediately returned to the interpreter.
class python_error : public std::exception {};
#define HANDLE_TH_ERRORS \
try {
#define END_HANDLE_TH_ERRORS_RET(retval) \
} catch (python_error &e) { \
return retval; \
} catch (std::exception &e) { \
PyErr_SetString(PyExc_RuntimeError, e.what()); \
return retval; \
@ -21,6 +25,7 @@
extern PyObject *THPException_FatalError;
#ifdef _THP_CORE
struct THException: public std::exception {
THException(const char* msg): msg(msg) {};

View File

@ -33,25 +33,25 @@ static bool THPModule_loadClasses(PyObject *self)
THPUtils_setError("class loader couldn't access torch module");
return false;
}
PyObject* module_dict = PyModule_GetDict(torch_module);
ASSERT_NOT_NULL(tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_tensor_classes"));
ASSERT_NOT_NULL(tensor_classes = PyObject_GetAttrString(torch_module, (char*)"_tensor_classes"));
if (!THPDoubleTensor_postInit(torch_module)) return false;
if (!THPFloatTensor_postInit(torch_module)) return false;
if (!THPHalfTensor_postInit(torch_module)) return false;
if (!THPLongTensor_postInit(torch_module)) return false;
if (!THPIntTensor_postInit(torch_module)) return false;
if (!THPShortTensor_postInit(torch_module)) return false;
if (!THPCharTensor_postInit(torch_module)) return false;
if (!THPByteTensor_postInit(torch_module)) return false;
ASSERT_NOT_NULL(THPDoubleStorageClass = PyMapping_GetItemString(module_dict,(char*)"DoubleStorage"));
ASSERT_NOT_NULL(THPFloatStorageClass = PyMapping_GetItemString(module_dict,(char*)"FloatStorage"));
ASSERT_NOT_NULL(THPLongStorageClass = PyMapping_GetItemString(module_dict,(char*)"LongStorage"));
ASSERT_NOT_NULL(THPIntStorageClass = PyMapping_GetItemString(module_dict,(char*)"IntStorage"));
ASSERT_NOT_NULL(THPShortStorageClass = PyMapping_GetItemString(module_dict,(char*)"ShortStorage"));
ASSERT_NOT_NULL(THPCharStorageClass = PyMapping_GetItemString(module_dict,(char*)"CharStorage"));
ASSERT_NOT_NULL(THPByteStorageClass = PyMapping_GetItemString(module_dict,(char*)"ByteStorage"));
ASSERT_NOT_NULL(THPDoubleTensorClass = PyMapping_GetItemString(module_dict,(char*)"DoubleTensor"));
ASSERT_NOT_NULL(THPFloatTensorClass = PyMapping_GetItemString(module_dict,(char*)"FloatTensor"));
ASSERT_NOT_NULL(THPLongTensorClass = PyMapping_GetItemString(module_dict,(char*)"LongTensor"));
ASSERT_NOT_NULL(THPIntTensorClass = PyMapping_GetItemString(module_dict,(char*)"IntTensor"));
ASSERT_NOT_NULL(THPShortTensorClass = PyMapping_GetItemString(module_dict,(char*)"ShortTensor"));
ASSERT_NOT_NULL(THPCharTensorClass = PyMapping_GetItemString(module_dict,(char*)"CharTensor"));
ASSERT_NOT_NULL(THPByteTensorClass = PyMapping_GetItemString(module_dict,(char*)"ByteTensor"));
ASSERT_NOT_NULL(THPDoubleStorageClass = PyObject_GetAttrString(torch_module,(char*)"DoubleStorage"));
ASSERT_NOT_NULL(THPFloatStorageClass = PyObject_GetAttrString(torch_module,(char*)"FloatStorage"));
ASSERT_NOT_NULL(THPHalfStorageClass = PyObject_GetAttrString(torch_module,(char*)"HalfStorage"));
ASSERT_NOT_NULL(THPLongStorageClass = PyObject_GetAttrString(torch_module,(char*)"LongStorage"));
ASSERT_NOT_NULL(THPIntStorageClass = PyObject_GetAttrString(torch_module,(char*)"IntStorage"));
ASSERT_NOT_NULL(THPShortStorageClass = PyObject_GetAttrString(torch_module,(char*)"ShortStorage"));
ASSERT_NOT_NULL(THPCharStorageClass = PyObject_GetAttrString(torch_module,(char*)"CharStorage"));
ASSERT_NOT_NULL(THPByteStorageClass = PyObject_GetAttrString(torch_module,(char*)"ByteStorage"));
return true;
#undef ASSERT_NOT_NULL
@ -72,6 +72,7 @@ static bool THPModule_assignStateless(PyObject *self)
PyObject *stateless;
INIT_STATELESS(Double);
INIT_STATELESS(Float);
INIT_STATELESS(Half);
INIT_STATELESS(Long);
INIT_STATELESS(Int);
INIT_STATELESS(Short);
@ -92,6 +93,7 @@ static PyObject * THPModule_initExtension(PyObject *self, PyObject *shm_manager_
libshm_init(THPUtils_bytesAsString(shm_manager_path));
if (!THPModule_loadClasses(self)) return NULL;
if (!THPModule_assignStateless(self)) return NULL;
if (!THPAutograd_initFunctions(self)) return NULL;
return PyBool_FromLong(true);
}
@ -139,6 +141,8 @@ PyObject * THPModule_fromNumpy(PyObject *_unused, PyObject *array)
return PyObject_CallFunctionObjArgs(THPLongTensorClass, array, NULL);
} else if (type == NPY_INT32) {
return PyObject_CallFunctionObjArgs(THPIntTensorClass, array, NULL);
} else if (type == NPY_INT16) {
return PyObject_CallFunctionObjArgs(THPShortTensorClass, array, NULL);
} else if (type == NPY_UINT8) {
return PyObject_CallFunctionObjArgs(THPByteTensorClass, array, NULL);
}
@ -243,6 +247,7 @@ IMPLEMENT_STATELESS(topk)
IMPLEMENT_STATELESS(t)
IMPLEMENT_STATELESS(transpose)
IMPLEMENT_STATELESS(squeeze)
IMPLEMENT_STATELESS(unsqueeze)
IMPLEMENT_STATELESS(renorm)
IMPLEMENT_STATELESS(dist)
IMPLEMENT_STATELESS(linspace)
@ -492,6 +497,8 @@ extern PyObject * THCPModule_cudaHostAllocator(PyObject *_unused);
extern PyObject * THCPModule_cudaSynchronize(PyObject *_unused);
extern PyObject * THCPModule_getLibPath(PyObject *_unused);
extern PyObject * THCPModule_cudaSleep(PyObject *_unused, PyObject *cycles);
extern PyObject * THCPModule_cudaLockMutex(PyObject *module);
extern PyObject * THCPModule_cudaUnlockMutex(PyObject *module);
extern PyObject * THCSPModule_initExtension(PyObject *self);
#endif
@ -522,6 +529,8 @@ static PyMethodDef TorchMethods[] = {
{"_cuda_getLibPath", (PyCFunction)THCPModule_getLibPath, METH_NOARGS, NULL},
{"_cuda_sleep", (PyCFunction)THCPModule_cudaSleep, METH_O, NULL},
{"_cuda_sparse_init", (PyCFunction)THCSPModule_initExtension, METH_NOARGS, NULL},
{"_cuda_lock_mutex", (PyCFunction)THCPModule_cudaLockMutex, METH_NOARGS, NULL},
{"_cuda_unlock_mutex", (PyCFunction)THCPModule_cudaUnlockMutex, METH_NOARGS, NULL},
#endif
{"_safe_call", (PyCFunction)THPModule_safeCall, METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_default_tensor_type", (PyCFunction)THPModule_setDefaultTensorType, METH_O, NULL},
@ -593,6 +602,7 @@ static PyMethodDef TorchMethods[] = {
{"t", (PyCFunction)THPModule_t, METH_VARARGS | METH_KEYWORDS, NULL},
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS | METH_KEYWORDS, NULL},
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS | METH_KEYWORDS, NULL},
{"unsqueeze", (PyCFunction)THPModule_unsqueeze, METH_VARARGS | METH_KEYWORDS, NULL},
{"nonzero", (PyCFunction)THPModule_nonzero, METH_VARARGS | METH_KEYWORDS, NULL},
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS | METH_KEYWORDS, NULL},
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS | METH_KEYWORDS, NULL},
@ -649,6 +659,7 @@ static PyMethodDef TorchMethods[] = {
// Sparse functions
{"smm", (PyCFunction)THSPModule_sspmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"saddmm", (PyCFunction)THSPModule_sspaddmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"dsmm", (PyCFunction)THSPModule_spmm, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}
};
@ -764,6 +775,7 @@ PyMODINIT_FUNC PyInit__C()
ASSERT_TRUE(THPDoubleStorage_init(module));
ASSERT_TRUE(THPFloatStorage_init(module));
ASSERT_TRUE(THPHalfStorage_init(module));
ASSERT_TRUE(THPLongStorage_init(module));
ASSERT_TRUE(THPIntStorage_init(module));
ASSERT_TRUE(THPShortStorage_init(module));
@ -772,6 +784,7 @@ PyMODINIT_FUNC PyInit__C()
ASSERT_TRUE(THPDoubleTensor_init(module));
ASSERT_TRUE(THPFloatTensor_init(module));
ASSERT_TRUE(THPHalfTensor_init(module));
ASSERT_TRUE(THPLongTensor_init(module));
ASSERT_TRUE(THPIntTensor_init(module));
ASSERT_TRUE(THPShortTensor_init(module));

View File

@ -6,20 +6,16 @@ PyObject* sparse_tensor_classes;
// SPARSE MODULE INITIALIZATION
////////////////////////////////////////////////////////////////////////////////
static bool THSPModule_loadClasses(PyObject *module_dict)
static bool THSPModule_loadClasses(PyObject *sparse_module)
{
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
ASSERT_NOT_NULL(sparse_tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_sparse_tensor_classes"));
ASSERT_NOT_NULL(THSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
ASSERT_NOT_NULL(THSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
ASSERT_NOT_NULL(THSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
ASSERT_NOT_NULL(THSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
ASSERT_NOT_NULL(THSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
ASSERT_NOT_NULL(THSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
ASSERT_NOT_NULL(THSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
if (!THSPDoubleTensor_postInit(sparse_module)) return false;
if (!THSPFloatTensor_postInit(sparse_module)) return false;
if (!THSPLongTensor_postInit(sparse_module)) return false;
if (!THSPIntTensor_postInit(sparse_module)) return false;
if (!THSPShortTensor_postInit(sparse_module)) return false;
if (!THSPCharTensor_postInit(sparse_module)) return false;
if (!THSPByteTensor_postInit(sparse_module)) return false;
return true;
#undef ASSERT_NOT_NULL
}
static bool THSPModule_assignStateless()
@ -50,18 +46,11 @@ static bool THSPModule_assignStateless()
// Callback for python part. Used for additional initialization of python classes
PyObject *THSPModule_initExtension(PyObject *self)
{
#define ASSERT_TRUE(cond) if (!(cond)) { Py_RETURN_FALSE; }
PyObject *module = PyImport_ImportModule("torch.sparse");
if (!module) {
THPUtils_setError("class loader couldn't access torch.sparse module");
return NULL;
}
PyObject* module_dict = PyModule_GetDict(module);
ASSERT_TRUE(THSPModule_loadClasses(module_dict));
ASSERT_TRUE(THSPModule_assignStateless());
Py_RETURN_TRUE;
#undef ASSERT_TRUE
if (!module) return NULL;
if (!THSPModule_loadClasses(module)) return NULL;
if (!THSPModule_assignStateless()) return NULL;
Py_RETURN_NONE;
}
////////////////////////////////////////////////////////////////////////////////
@ -80,19 +69,19 @@ bool THPModule_isSparseTensor(PyObject *obj)
#define IMPLEMENT_SPARSE_STATELESS(name) \
static PyObject * TH_CONCAT_2(THSPModule_, name)(PyObject *_unused, PyObject *args, PyObject *kwargs) \
{ \
PyObject *tensor = THSPFloatTensorClass; \
PyObject *tensor = THSPFloatTensorClass; \
PyObject *key, *value; \
Py_ssize_t pos = 0; \
for (int i = 0; i < PyTuple_Size(args); i++) { \
PyObject *item = PyTuple_GET_ITEM(args, i); \
if (THPModule_isTensor(item) || THPVariable_CheckType(item, THPModule_isSparseTensor)) { \
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
tensor = item; \
goto dispatch; \
} \
} \
if (kwargs) { \
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
if (THPModule_isTensor(value) || THPVariable_CheckType(value, THPModule_isSparseTensor)) { \
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
tensor = value; \
goto dispatch; \
} \
@ -109,6 +98,7 @@ dispatch: \
return PyObject_Call(method, args, kwargs); \
}
IMPLEMENT_SPARSE_STATELESS(spmm);
IMPLEMENT_SPARSE_STATELESS(sspmm);
IMPLEMENT_SPARSE_STATELESS(sspaddmm);

View File

@ -54,6 +54,50 @@ static PyObject * THPSize_repr(THPSize *self)
#endif
}
extern PyTypeObject THPSizeType;
template<typename FnType, FnType fn, typename ...Args>
static PyObject* wrap_tuple_fn(Args ... args)
{
PyObject *result = (*fn)(std::forward<Args>(args)...);
if (!result) return NULL;
if (PyTuple_Check(result)) {
return PyObject_CallFunctionObjArgs((PyObject*)&THPSizeType, result, NULL);
}
Py_INCREF(result);
return result;
}
static auto sq_concat = PyTuple_Type.tp_as_sequence->sq_concat;
static auto sq_repeat = PyTuple_Type.tp_as_sequence->sq_repeat;
#if PY_MAJOR_VERSION == 2
static auto sq_slice = PyTuple_Type.tp_as_sequence->sq_slice;
#endif
static auto mp_subscript = PyTuple_Type.tp_as_mapping->mp_subscript;
static PySequenceMethods THPSize_as_sequence = {
PyTuple_Type.tp_as_sequence->sq_length,
wrap_tuple_fn<decltype(&sq_concat), &sq_concat>,
wrap_tuple_fn<decltype(&sq_repeat), &sq_repeat>,
PyTuple_Type.tp_as_sequence->sq_item,
#if PY_MAJOR_VERSION == 2
wrap_tuple_fn<decltype(&sq_slice), &sq_slice>,
#else
0, /* sq_slice */
#endif
0, /* sq_ass_item */
0, /* sq_ass_slice */
PyTuple_Type.tp_as_sequence->sq_contains
};
static PyMappingMethods THPSize_as_mapping = {
PyTuple_Type.tp_as_mapping->mp_length,
wrap_tuple_fn<decltype(&mp_subscript), &mp_subscript>,
0
};
PyTypeObject THPSizeType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch.Size", /* tp_name */
@ -66,8 +110,8 @@ PyTypeObject THPSizeType = {
0, /* tp_reserved */
(reprfunc)THPSize_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
&THPSize_as_sequence, /* tp_as_sequence */
&THPSize_as_mapping, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */

View File

@ -1,6 +1,8 @@
#include <Python.h>
#include <structmember.h>
#define THP_HOST_HALF
#include <stdbool.h>
#include <TH/TH.h>
#include <libshm.h>
@ -9,3 +11,6 @@
#include "generic/Storage.cpp"
#include <TH/THGenerateAllTypes.h>
#include "generic/Storage.cpp"
#include <TH/THGenerateHalfType.h>

View File

@ -10,6 +10,8 @@
PyObject_IsInstance(obj, THPDoubleStorageClass)
#define THPFloatStorage_Check(obj) \
PyObject_IsInstance(obj, THPFloatStorageClass)
#define THPHalfStorage_Check(obj) \
PyObject_IsInstance(obj, THPFloatStorageClass)
#define THPLongStorage_Check(obj) \
PyObject_IsInstance(obj, THPLongStorageClass)
#define THPIntStorage_Check(obj) \
@ -23,6 +25,7 @@
#define THPDoubleStorage_CData(obj) (obj)->cdata
#define THPFloatStorage_CData(obj) (obj)->cdata
#define THPHalfStorage_CData(obj) (obj)->cdata
#define THPLongStorage_CData(obj) (obj)->cdata
#define THPIntStorage_CData(obj) (obj)->cdata
#define THPShortStorage_CData(obj) (obj)->cdata
@ -37,4 +40,7 @@
#include "generic/Storage.h"
#include <TH/THGenerateAllTypes.h>
#include "generic/Storage.h"
#include <TH/THGenerateHalfType.h>
#endif

View File

@ -1,6 +1,8 @@
#include <Python.h>
#include <structmember.h>
#define THP_HOST_HALF
#include <stdbool.h>
#include <vector>
#include <stack>
@ -9,6 +11,10 @@
#include "THP.h"
#include "copy_utils.h"
#include "DynamicTypes.h"
#include "generic/Tensor.cpp"
#include <TH/THGenerateAllTypes.h>
#include "generic/Tensor.cpp"
#include <TH/THGenerateHalfType.h>

View File

@ -8,6 +8,7 @@
#define THPDoubleTensor_Check(obj) PyObject_IsInstance(obj, THPDoubleTensorClass)
#define THPFloatTensor_Check(obj) PyObject_IsInstance(obj, THPFloatTensorClass)
#define THPHalfTensor_Check(obj) PyObject_IsInstance(obj, THPHalfTensorClass)
#define THPLongTensor_Check(obj) PyObject_IsInstance(obj, THPLongTensorClass)
#define THPIntTensor_Check(obj) PyObject_IsInstance(obj, THPIntTensorClass)
#define THPShortTensor_Check(obj) PyObject_IsInstance(obj, THPShortTensorClass)
@ -16,6 +17,7 @@
#define THPDoubleTensor_CData(obj) (obj)->cdata
#define THPFloatTensor_CData(obj) (obj)->cdata
#define THPHalfTensor_CData(obj) (obj)->cdata
#define THPLongTensor_CData(obj) (obj)->cdata
#define THPIntTensor_CData(obj) (obj)->cdata
#define THPShortTensor_CData(obj) (obj)->cdata
@ -63,4 +65,7 @@
#include "generic/Tensor.h"
#include <TH/THGenerateAllTypes.h>
#include "generic/Tensor.h"
#include <TH/THGenerateHalfType.h>
#endif

View File

@ -2,9 +2,10 @@
#define THP_AUTOGRAD_H
PyObject * THPAutograd_initExtension(PyObject *_unused);
bool THPAutograd_initFunctions(PyObject* module);
#include "variable.h"
#include "function.h"
#include "engine.h"
#include "torch/csrc/autograd/python_function.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/autograd/python_engine.h"
#endif

View File

@ -1,342 +1,187 @@
#include <Python.h>
#include <structmember.h>
#include "torch/csrc/autograd/engine.h"
#include <vector>
#include <unordered_map>
#include <deque>
#include <set>
#include <unordered_set>
#include <string>
#include <THPP/THPP.h>
#include "THP.h"
using thpp::Tensor;
PyObject *THPEngineClass = NULL;
namespace torch { namespace autograd {
// used for topological sort
using dependencies_type = std::unordered_map<THPFunction *, int>;
// stores gradient buffers
using grad_list_type = std::vector<THPObjectPtr>;
// used for need_copy set (to ensure correct gradient buffering)
using buffer_set_type = std::set<std::pair<size_t, int>>;
// gradient buffer - a list of gradient tensors + id
struct grad_buffer_type: public grad_list_type {
template<typename... Args>
grad_buffer_type(size_t buffer_id, Args&&... args):
grad_list_type(std::forward<Args>(args)...),
buffer_id(buffer_id) {};
grad_buffer_type(grad_buffer_type &&other):
grad_list_type(std::move(other)),
buffer_id(other.buffer_id) {};
grad_buffer_type& operator=(grad_buffer_type &&other) {
grad_list_type::operator=(std::move(other));
buffer_id = other.buffer_id;
return *this;
};
size_t buffer_id;
};
// used for the queue of nodes ready for processing
using ready_queue_type = std::deque<std::pair<THPFunction *, grad_buffer_type>>;
// Computes graph dependencies (using a super simple topological sort)
void THPEngine_compute_dependencies(std::vector<THPFunction*> queue,
dependencies_type& dependencies, ready_queue_type& ready)
{
std::set<THPFunction *> seen;
while (queue.size() > 0) {
THPFunction *fn = queue.back(); queue.pop_back();
for (int i = 0; i < fn->num_inputs; i++) {
THPFunction *prev_fn = (THPFunction*)fn->previous_functions[i].get();
// We can ignore variables (their backprop is called every time we have
// gradient ready).
if (THPVariable_Check((PyObject*)prev_fn))
continue;
// Stochastic functions are ready for backward immediately
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass) &&
prev_fn->requires_grad &&
seen.count(prev_fn) == 0) {
ready.emplace_back(prev_fn, grad_buffer_type(0));
} else if (fn->requires_grad && prev_fn->requires_grad) {
dependencies[prev_fn] += 1;
auto Engine::compute_dependencies(function_queue queue, ready_queue_type& ready) -> dependencies_type {
// First, search the graph and find all stochastic functions. Append them to the queue.
std::unordered_set<Function*> seen;
function_queue search_queue(queue);
while (search_queue.size() > 0) {
auto fn = search_queue.back(); search_queue.pop_back();
for (auto& prev_fn_pair : fn->previous_functions) {
auto& prev_fn = prev_fn_pair.first;
Function* prev_ptr = prev_fn.get();
if (!prev_ptr) continue;
if (prev_ptr->is_stochastic && prev_ptr->requires_grad && seen.count(prev_ptr) == 0) {
ready.emplace_back(prev_fn, GradBuffer(0));
queue.push_back(prev_ptr);
}
if (seen.count(prev_fn) == 0) {
seen.insert(prev_fn);
queue.push_back(prev_fn);
if (seen.count(prev_ptr) == 0) {
seen.insert(prev_ptr);
search_queue.push_back(prev_ptr);
}
}
}
}
// Frees backward dependency and returns true if prev_fn is ready for backward
bool THPEngine_free_backward_dependency(dependencies_type &dependencies,
THPFunction *prev_fn)
{
int deps = --dependencies[prev_fn];
if (deps < 0) {
std::string msg = "dependencies is negative: ";
msg += Py_TYPE((PyObject*)prev_fn)->tp_name;
throw std::runtime_error(msg);
}
if (deps == 0) {
dependencies.erase(prev_fn);
return true;
}
return false;
}
// Accumulates d_prev_fn gradient tensor into output_idx position of prev_grad buffer
bool THPEngine_add_grad(buffer_set_type &need_copy, grad_buffer_type &prev_grad,
int output_nr, PyObject *d_prev_fn)
{
// TODO: we should probably clean up need_copy, because most tensors will
// probably never hit the else clause
auto set_key = std::make_pair(prev_grad.buffer_id, output_nr);
if (!prev_grad[output_nr]) {
Py_INCREF(d_prev_fn);
prev_grad[output_nr] = d_prev_fn;
need_copy.insert(set_key);
} else {
PyObject *grad_tensor = prev_grad[output_nr];
if (need_copy.count(set_key) != 0) {
grad_tensor = PyObject_CallMethod(grad_tensor, "clone", "");
if (!grad_tensor)
return false;
need_copy.erase(set_key);
prev_grad[output_nr] = grad_tensor;
}
THPObjectPtr result = PyObject_CallMethod(grad_tensor, "add_", "O", d_prev_fn);
if (!result)
return false;
}
return true;
}
// Main backward function
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
{
PyObject *variables = NULL;
PyObject *grad_variables = NULL;
unsigned char retain_variables = 0;
size_t next_buf_id = 0;
const char *accepted_kwargs[] = {"variables", "grad_variables",
"retain_variables", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
&variables, &grad_variables, &retain_variables))
return NULL;
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
"retain_variables argument is expected to be a bool, but got %s",
THPUtils_typename(retain_variables_obj));
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
"be a tuple, but got %s", THPUtils_typename(variables));
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
"gradients", num_variables, num_gradients);
ready_queue_type ready;
std::unordered_map<THPFunction *, grad_buffer_type> not_ready;
// Now, queue contains all nodes that will start propagating gradients. We no longer have
// to expand functions that don't require grad.
dependencies_type dependencies;
buffer_set_type need_copy;
seen.clear();
// Just to make sure that they will never be added to the queue again
seen.insert(queue.begin(), queue.end());
while (queue.size() > 0) {
auto fn = std::move(queue.back()); queue.pop_back();
// This is needed only to filter out backward roots that don't require grad
if (!fn->requires_grad) continue;
for (auto& prev_fn_pair : fn->previous_functions) {
Function* prev_ptr = prev_fn_pair.first.get();
if (!prev_ptr) continue;
if (dynamic_cast<Variable*>(prev_ptr)) continue;
if (!prev_ptr->requires_grad) continue;
if (prev_ptr->is_stochastic) continue; // Stochastic nodes were in the queue already
dependencies[prev_ptr] += 1;
if (seen.count(prev_ptr) == 0) {
seen.insert(prev_ptr);
queue.push_back(prev_ptr);
}
}
}
return dependencies;
}
auto Engine::backward(const variable_list& variables,
tensor_list& grad_variables,
bool retain_variables) -> void {
function_queue creators;
std::unordered_map<std::shared_ptr<Function>, std::unique_ptr<GradBuffer>> creator_grad;
ready_queue_type ready;
bool did_leaf_backward = false;
std::vector<THPFunction*> creators;
for (int i = 0; i < num_variables; i++) {
THPVariable *variable = (THPVariable*)PyTuple_GET_ITEM(variables, i);
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
THPUtils_assert(THPVariable_Check((PyObject*)variable), "element %d of variables "
"tuple is not a Variable", i);
// If someone calls .backward() on a leaf, it's simple...
if (variable->creator == NULL) {
if (variable->requires_grad) {
THPObjectPtr result = PyObject_CallMethod((PyObject*)variable,
"_do_backward", "(O)O", grad, retain_variables_obj);
if (!result) return NULL;
int size = variables.size();
for (int i = 0; i < size; ++i) {
auto& var = variables[i];
auto& grad = grad_variables[i];
if (!var->creator) {
// If someone calls .backward() on a leaf, it's simple...
if (var->requires_grad) {
var->backward(std::make_shared<Variable>(std::move(grad), false, true));
did_leaf_backward = true;
}
continue;
}
THPFunction *creator = (THPFunction*)variable->creator;
creators.push_back(creator);
// Initialize the queue
if (creator->requires_grad) {
grad_buffer_type buf(next_buf_id++, creator->num_outputs);
Py_INCREF(grad);
buf[variable->output_nr] = grad;
ready.emplace_front(creator, std::move(buf));
} else {
auto& creator = var->creator;
auto& buf = creator_grad[creator];
if (creator->requires_grad) {
if (!buf) buf.reset(new GradBuffer(creator->num_outputs));
buf->addGrad(var->output_nr, Variable::of(std::move(grad)));
}
}
}
for (auto& entry: creator_grad) {
const auto& creator = entry.first;
auto& buf = entry.second; // WARNING: this is nullptr if !creator->requires_grad
creators.push_back(creator.get());
if (creator->requires_grad) {
ready.emplace_back(creator, std::move(*buf));
}
}
creator_grad.clear(); // Clear the shared pointers
THPEngine_compute_dependencies(std::move(creators), dependencies, ready);
auto dependencies = compute_dependencies(std::move(creators), ready);
THPUtils_assert(did_leaf_backward || ready.size() > 0, "there are no graph "
"nodes that require computing gradients");
if (!did_leaf_backward && ready.size() == 0) {
throw std::runtime_error(
"there are no graph nodes that require computing gradients");
}
std::unordered_map<Function*, GradBuffer> not_ready;
while (ready.size() > 0) {
std::pair<THPFunction *, grad_buffer_type> ready_pair =
std::move(ready.back()); ready.pop_back();
THPFunction *fn = ready_pair.first;
grad_buffer_type &fn_grad_buffer = ready_pair.second;
auto ready_pair = std::move(ready.back()); ready.pop_back();
auto& fn = ready_pair.first;
// Prepare a tuple for a call to _do_backward
THPObjectPtr grad_tuple = PyTuple_New(fn_grad_buffer.size());
if (!grad_tuple) return NULL;
for (unsigned int i = 0; i < fn_grad_buffer.size(); i++) {
PyObject *_grad;
if (fn_grad_buffer[i]) {
_grad = fn_grad_buffer[i].release();
} else {
_grad = Py_None;
Py_INCREF(_grad);
}
PyTuple_SET_ITEM(grad_tuple.get(), i, _grad);
auto grad_inputs = fn->apply(GradBuffer::variables(std::move(ready_pair.second)));
if (!retain_variables) {
fn->releaseVariables();
}
// Call _do_backward and make sure grad_input is sound
THPObjectPtr grad_input = PyObject_CallMethod((PyObject*)fn, "_do_backward",
"OO", grad_tuple.get(), retain_variables_obj);
if (!grad_input)
return NULL;
THPUtils_assert(PyTuple_Check(grad_input), "error, _do_backward should "
"return a tuple, but got %s", THPUtils_typename(grad_input));
int num_grads = PyTuple_GET_SIZE(grad_input.get());
if (grad_inputs.size() != fn->previous_functions.size()) {
std::string msg("Function returned an invalid number of gradients - expected ");
msg += fn->previous_functions.size();
msg += ", but got ";
msg += grad_inputs.size();
throw std::runtime_error(msg);
}
// Process tensors inside grad_input
for (int i = 0; i < num_grads; i++) {
PyObject *prev_obj = fn->previous_functions[i].get();
PyObject *grad_prev = PyTuple_GET_ITEM(grad_input.get(), i);
int size = grad_inputs.size();
for (int i = 0; i < size; ++i) {
auto& grad_input = grad_inputs[i];
auto& prev_fn = fn->previous_functions[i].first;
int output_nr = fn->previous_functions[i].second;
// A shortcut for variables - there's no need to buffer gradients for them
// as their _do_backward is super fast (and we can save memory).
// FIXME: this might call leaf variable hooks multiple times
if (THPVariable_Check(prev_obj)) {
THPVariable *prev_var = (THPVariable*)prev_obj;
if (prev_var->requires_grad) {
THPObjectPtr ret = PyObject_CallMethod(prev_obj, "_do_backward",
"(O)O", grad_prev, retain_variables_obj);
if (!ret) return NULL;
// null inputs have no previous_function and we skip them here
if (!prev_fn) {
continue;
}
if (auto var = dynamic_cast<Variable*>(prev_fn.get())) {
if (var->requires_grad) {
var->backward(grad_input);
}
continue;
}
// No need to do any work for functions that don't require gradients
THPFunction *prev_fn = (THPFunction*)prev_obj;
if (!prev_fn->requires_grad)
continue;
// Stochastic functions are immediately ready
if (PyObject_IsInstance((PyObject*)prev_fn, THPStochasticFunctionClass))
// Stochastic functions are placed in the ready queue by
// compute_dependencies, so we can skip them here.
if (prev_fn->is_stochastic || !prev_fn->requires_grad) {
continue;
}
// Check if the function is ready for backward and see if it has any
// buffers allocated
int output_idx = fn->previous_functions[i].output_nr;
bool is_ready = THPEngine_free_backward_dependency(dependencies, prev_fn);
auto not_ready_it = not_ready.find(prev_fn);
// Check if the function is ready for backward
bool is_ready = false;
auto it = dependencies.find(prev_fn.get());
if (it == dependencies.end()) {
throw std::runtime_error("dependency not found");
} else if (--it->second == 0) {
dependencies.erase(it);
is_ready = true;
}
auto not_ready_it = not_ready.find(prev_fn.get());
if (is_ready) {
// this is only a temporary, so no need for a correct id
grad_buffer_type prev_buffer(-1);
if (not_ready_it == not_ready.end()) {
// The function is ready and no buffers have been allocated for it.
prev_buffer = grad_buffer_type(next_buf_id++, prev_fn->num_outputs);
Py_INCREF(grad_prev);
prev_buffer[output_idx] = grad_prev;
// The function is ready and no buffers have been allocated for it
GradBuffer prev_buffer(prev_fn->num_outputs);
prev_buffer.addGrad(output_nr, std::move(grad_input));
ready.emplace_front(prev_fn, std::move(prev_buffer));
} else {
// The function is ready and it already has a buffer allocated.
prev_buffer = std::move(not_ready_it->second);
auto prev_buffer = std::move(not_ready_it->second);
not_ready.erase(not_ready_it);
if (!THPEngine_add_grad(need_copy, prev_buffer, output_idx, grad_prev))
return NULL;
prev_buffer.addGrad(output_nr, std::move(grad_input));
ready.emplace_front(prev_fn, std::move(prev_buffer));
}
// Put the function into the ready queue.
ready.emplace_front(prev_fn, std::move(prev_buffer));
} else {
// Allocate a buffer if necessary
// Allocate a buffer if necessary and accumulate gradient
if (not_ready_it == not_ready.end()) {
int num_prev_fn_outputs = prev_fn->num_outputs;
std::tie(not_ready_it, std::ignore) =
not_ready.emplace(prev_fn, grad_buffer_type(next_buf_id++, num_prev_fn_outputs));
GradBuffer prev_buffer(prev_fn->num_outputs);
prev_buffer.addGrad(output_nr, std::move(grad_input));
not_ready.emplace(prev_fn.get(), std::move(prev_buffer));
} else {
auto &prev_buffer = not_ready_it->second;
prev_buffer.addGrad(output_nr, std::move(grad_input));
}
// Accumulate the gradient into the buffer
grad_buffer_type &grad_buffer = not_ready_it->second;
if (!THPEngine_add_grad(need_copy, grad_buffer, output_idx, grad_prev))
return NULL;
}
}
}
if (!not_ready.empty()) {
std::string names;
for (auto &it : not_ready) {
if (!names.empty()) names += ", ";
names += Py_TYPE((PyObject *)it.first)->tp_name;
}
THPUtils_assert(not_ready.empty(),
"could not compute gradients for some functions (%s)", names.c_str());
throw std::runtime_error("could not compute gradients for some functions");
}
Py_RETURN_NONE;
}
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
return type->tp_alloc(type, 0);
}
static struct PyMethodDef THPEngine_methods[] = {
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL}
};
PyTypeObject THPEngineType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._EngineBase", /* tp_name */
sizeof(THPEngine), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPEngine_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPEngine_new /* tp_new */
};
bool THPEngine_initModule(PyObject *module)
{
if (PyType_Ready(&THPEngineType) < 0)
return false;
Py_INCREF(&THPEngineType);
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
return true;
}
}} // namespace torch::autograd

View File

@ -1,10 +1,35 @@
#ifndef THP_ENGINE_H
#define THP_ENGINE_H
#pragma once
struct THPEngine {
PyObject_HEAD
// Engine implements backpropagation from output variables and their gradients
// to "root" variables (variables created by the user with requires_grad=True).
#include <deque>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/grad_buffer.h"
namespace torch { namespace autograd {
struct Engine {
using ready_queue_type = std::deque<std::pair<std::shared_ptr<Function>, GradBuffer>>;
using function_queue = std::vector<Function*>;
using dependencies_type = std::unordered_map<Function*, int>;
// Given a list of output variables and their gradients, computes the
// gradients of "root" variables by backpropagation.
static void backward(
const variable_list& variables,
tensor_list& grad_variables,
bool retain_variables);
private:
static dependencies_type compute_dependencies(
function_queue queue,
ready_queue_type& ready);
};
bool THPEngine_initModule(PyObject *module);
#endif
}} // namespace torch::autograd

View File

@ -1,976 +1,31 @@
#include <Python.h>
#include <structmember.h>
#include "function.h"
#include <unordered_map>
#include <unordered_set>
#include <exception>
#include <THPP/THPP.h>
#include "THP.h"
#include "variable.h"
#ifdef WITH_CUDA
#include "cuda/AutoGPU.h"
#endif
namespace torch { namespace autograd {
// Throwing this exception means that the python error flags have been already
// set and control should be immediately returned to the interpreter.
class python_error : public std::exception {};
#define THPFunction_assert(condition, ...) \
if (!(condition)) { THPUtils_setError(__VA_ARGS__); throw python_error(); }
PyObject *THPFunctionClass = NULL;
PyObject *THPStochasticFunctionClass = NULL;
// Traverse and clear are required for supporting Python's GC cycle handling.
static int THPFunction_traverse(THPFunction *self, visitproc visit, void *arg)
{
Py_VISIT(self->needs_input_grad);
Py_VISIT(self->backward_hooks);
for (int i = 0; i < self->num_inputs; i++)
Py_VISIT(self->previous_functions[i].get());
if (self->saved_variables) {
for (unsigned int i = 0; i < self->saved_variables->size(); i++)
Py_VISIT(std::get<0>(self->saved_variables->at(i)));
}
if (self->output_backward_hooks) {
for (int i = 0; i < self->num_inputs; i++)
Py_VISIT(self->output_backward_hooks[i].get());
}
Py_VISIT(self->to_save);
Py_VISIT(self->shared_pairs);
Py_VISIT(self->non_differentiable);
Py_VISIT(self->dirty_tensors);
return 0;
}
static int THPFunction_clear(THPFunction *self)
{
self->num_inputs = 0;
self->num_outputs = 0;
Py_CLEAR(self->needs_input_grad);
Py_CLEAR(self->backward_hooks);
Py_CLEAR(self->to_save);
Py_CLEAR(self->shared_pairs);
Py_CLEAR(self->non_differentiable);
Py_CLEAR(self->dirty_tensors);
THPFunctionPtr *previous_functions = self->previous_functions;
self->previous_functions = NULL;
delete[] previous_functions;
auto saved_variables = self->saved_variables;
self->saved_variables = NULL;
delete saved_variables;
auto output_backward_hooks = self->output_backward_hooks;
self->output_backward_hooks = NULL;
delete[] output_backward_hooks;
auto output_info = self->output_info;
self->output_info = NULL;
delete output_info;
return 0;
}
static void THPFunction_dealloc(THPFunction* self)
{
PyObject_GC_UnTrack(self);
THPFunction_clear(self);
Py_TYPE(self)->tp_free((PyObject*)self);
}
PyObject *THPFunction_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
THPFunction *self = (THPFunction*)type->tp_alloc(type, 0);
if (!self)
return NULL;
// Python zero-initializes the object memory, so there's no need to initialize
// most fields
self->num_outputs = -1;
return (PyObject*)self;
}
////////////////////////////////////////////////////////////////////////////////
// Forward
////////////////////////////////////////////////////////////////////////////////
using t2var_type = std::unordered_map<PyObject *, THPVariable *>;
static void _mark_dirty(THPFunction *self, t2var_type &t2var,
std::unordered_set<PyObject *> &dirty_inputs)
{
// Increase versions of modified tensors
if (!self->dirty_tensors) return;
THPFunction_assert(PyTuple_Check(self->dirty_tensors), "autograd "
"internal error: dirty_tensors attribute is expected to be a tuple "
"but is %s", THPUtils_typename(self->dirty_tensors));
Py_ssize_t num_dirty = PyTuple_GET_SIZE(self->dirty_tensors);
for (int i = 0; i < num_dirty; i++) {
PyObject *tensor = PyTuple_GET_ITEM(self->dirty_tensors, i);
dirty_inputs.insert(tensor);
THPVariable *variable;
try {
variable = t2var.at(tensor);
} catch (std::out_of_range &e) {
THPFunction_assert(THPModule_isTensor(tensor), "mark_dirty can "
"only accept tensors, but argument %d is of type %s", i,
THPUtils_typename(tensor));
THPFunction_assert(false, "mark_dirty only accepts input tensors, but "
"argument %d isn't one", i);
}
auto &v_counter = *variable->version_counter;
THPFunction_assert(v_counter.var_refcnt() == 1, "in-place operations can be "
"only used on variables that don't share storage with any other "
"variables, but detected that there are %d objects sharing it",
v_counter.var_refcnt());
v_counter++;
}
// We're not going to ever need this so let's remove references now
Py_DECREF(self->dirty_tensors);
self->dirty_tensors = NULL;
}
static void _wrap_outputs(THPFunction *self, t2var_type &t2var,
std::unordered_set<PyObject *> &dirty_inputs, PyObject *raw_output,
PyObject *outputs)
{
// Wrap outputs in Variables
Py_ssize_t num_outputs = PyTuple_GET_SIZE(raw_output);
self->output_info = new std::vector<output_info_type>(num_outputs);
auto &output_info = *self->output_info;
for (int i = 0; i < num_outputs; i++) {
PyObject *output = PyTuple_GET_ITEM(raw_output, i);
THPVariable *output_var;
auto it = t2var.find(output);
if (it == t2var.end()) {
// A completely new tensor - just wrap it and continue
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self, self->requires_grad);
} else {
// If one of the outputs was also an input tensor it's a bit more complicated.
THPVariable *input_var = it->second;
if (input_var->creator) {
// If it's not a leaf we want to move it in the graph so backprop
// will be computed correctly:
// creator <- variable <- self ==> creator <- self <- variable
Py_INCREF(input_var);
output_var = input_var;
Py_DECREF(input_var->creator);
Py_INCREF(self);
input_var->creator = (PyObject*)self;
auto Function::flags(const variable_list& inputs) -> FunctionFlags {
int num_inputs = inputs.size();
FunctionFlags f;
f.requires_grad = false;
f.is_volatile = false;
f.previous_functions.resize(num_inputs);
for (int i = 0; i != num_inputs; ++i) {
auto& var = inputs[i];
if (var) {
f.requires_grad |= var->requires_grad;
f.is_volatile |= var->is_volatile;
if (var->creator) {
f.previous_functions[i] = std::make_pair<>(var->creator, var->output_nr);
} else {
// If the Variable has been changed, we have to move it after the
// current function to ensure the gradient is computed correctly.
// There are two cases now:
// 1. If it requires grad, it is an error, and this will be caught
// when its _do_backward is called, because it won't be a leaf anymore.
// Also we'll change its version.
// 2. If it doesn't require grad, we can safely move it in the graph,
// because its _do_backward will never be called.
if (dirty_inputs.count(output) > 0) {
Py_INCREF(input_var);
output_var = input_var;
Py_INCREF(self);
output_var->creator = (PyObject*)self;
if (!output_var->requires_grad && self->requires_grad) {
// Now, there's another subtlety. We move the input in the graph
// and we change its requires_grad to True. However, remember
// that we're still holding a reference to is as a previous
// function. Backward engine will think that it was really a
// leaf that initialy did require grad and call its _do_backward
// and that will throw. Because of this, we need to allocate
// a dummy leaf that doesn't require grad and put it as our
// previous function.
output_var->requires_grad = self->requires_grad;
PyObject* dummy_prev_fn = THPVariable_New(output, NULL, false);
if (!dummy_prev_fn) throw python_error();
self->previous_functions[i] = THPFunctionPtr(dummy_prev_fn, 0);
}
} else {
// An input has been returned, but it wasn't modified. It's better
// not to move the Variable, because there are some legitimate cases
// where making it non-leaf would break stuff (e.g. broadcast). Also,
// returning the input Variable is not a good option either,
// because if someone registers hooks on it, they will fire with grads
// from all usages, not only from usages of this output. This is why
// we'll return a copy and join their version counters. This has
// a side-effect of making in-place ops on any of these Variables an
// immediate error, but it would be raised anyway once someone
// calls backward.
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self,
self->requires_grad);
if (!output_var) throw python_error();
output_var->version_counter->join_with(*input_var->version_counter);
}
}
}
if (!output_var) throw python_error();
torch::THPVoidTensor *output_obj = (torch::THPVoidTensor*)output_var->data;
torch::THVoidTensor *output_tensor = output_obj->cdata;
long ndim = output_tensor->nDimension;
int device_id = -1;
THPObjectPtr is_cuda = PyObject_GetAttrString(output_var->data, "is_cuda");
if (is_cuda.get() == Py_True) {
THPObjectPtr device_id_obj = PyObject_CallMethod(output_var->data,
"get_device", "");
THPFunction_assert(THPUtils_checkLong(device_id_obj), "get_device "
"should return an int, but got %s", THPUtils_typename(device_id_obj));
device_id = THPUtils_unpackLong(device_id_obj);
}
output_info[i] = std::make_tuple(
(PyObject*)Py_TYPE(output_var->data),
device_id,
std::vector<long>(output_tensor->size, output_tensor->size + ndim)
);
t2var[output] = output_var;
output_var->output_nr = i;
PyTuple_SET_ITEM(outputs, i, (PyObject*)output_var);
}
}
static void _save_variables(THPFunction*self, t2var_type &t2var)
{
if (!self->to_save) return;
THPFunction_assert(PyTuple_Check(self->to_save), "autograd internal "
"error: to_save attribute is expected to be a tuple but is %s",
THPUtils_typename(self->to_save));
Py_ssize_t num_saved = PyTuple_GET_SIZE(self->to_save);
self->saved_variables = new std::vector<saved_var_info_type>();
self->saved_variables->reserve(num_saved);
for (int i = 0; i < num_saved; i++) {
PyObject *tensor = PyTuple_GET_ITEM(self->to_save, i);
if (tensor == Py_None) {
Py_INCREF(tensor);
self->saved_variables->emplace_back(tensor, 0, nullptr);
continue;
}
THPVariable *variable;
try {
variable = t2var.at(tensor);
} catch(std::out_of_range &e) {
THPFunction_assert(THPModule_isTensor(tensor),
"save_for_backward can only save tensors, but argument %d is of "
"type %s", i, THPUtils_typename(tensor));
THPFunction_assert(false, "save_for_backward can only save input or output "
"tensors, but argument %d doesn't satisfy this condition", i);
}
Py_INCREF(tensor);
self->saved_variables->emplace_back(
tensor,
**variable->version_counter,
std::unique_ptr<THPVariableVersion>(variable->version_counter->new_saved_ref())
);
}
// Free .to_save
Py_DECREF(self->to_save);
self->to_save = NULL;
}
static void _join_version_counters(THPFunction *self, t2var_type &t2var)
{
if (!self->shared_pairs) return;
THPFunction_assert(PyTuple_Check(self->shared_pairs), "autograd internal "
"error: shared_pairs attribute is expected to be a tuple but is %s",
THPUtils_typename(self->shared_pairs));
Py_ssize_t num_shared = PyTuple_GET_SIZE(self->shared_pairs);
for (int i = 0; i < num_shared; i++) {
PyObject *shared_tuple = PyTuple_GET_ITEM(self->shared_pairs, i);
THPFunction_assert(PyTuple_Check(shared_tuple), "mark_shared_storages "
"accepts a number of pairs, but one of the arguments is of type %s",
THPUtils_typename(shared_tuple));
THPFunction_assert(PyTuple_GET_SIZE(shared_tuple) == 2,
"mark_shared_storages accepts pairs, but argument %d is a tuple of "
"%d elements", i, PyTuple_GET_SIZE(shared_tuple));
// Now we're sure it's really a pair!
THPVariable *v1, *v2;
try {
v1 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 0));
v2 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 1));
} catch(std::out_of_range &e) {
// One tuple items wasn't present in t2var, so there are two cases:
// 1. it's not a tensor
// 2. it's not an input nor an output
PyObject *t1 = PyTuple_GET_ITEM(shared_tuple, 0);
PyObject *t2 = PyTuple_GET_ITEM(shared_tuple, 1);
THPFunction_assert(THPModule_isTensor(t1) && THPModule_isTensor(t2),
"mark_shared_storages accepts pairs of tensors, but one of them "
"contains %s and %s", THPUtils_typename(t1), THPUtils_typename(t2));
THPFunction_assert(false, "mark_shared_storages only accepts pairs of input "
"and output tensors, but argument %d doesn't satify this "
"condition", i);
}
v2->version_counter->join_with(*v1->version_counter);
}
// Free .shared_pairs
Py_DECREF(self->shared_pairs);
self->shared_pairs = NULL;
}
static void _mark_non_differentiable(THPFunction *self, t2var_type &t2var)
{
if (!self->non_differentiable) return;
THPFunction_assert(PyTuple_Check(self->non_differentiable), "autograd "
"internal error: non_differentiable attribute is expected to be a "
"tuple but is %s", THPUtils_typename(self->non_differentiable));
Py_ssize_t num_nondiff = PyTuple_GET_SIZE(self->non_differentiable);
for (int i = 0; i < num_nondiff; i++) {
PyObject *t = PyTuple_GET_ITEM(self->non_differentiable, i);
THPVariable *var;
try {
var = t2var.at(t);
THPFunction_assert(var->creator == (PyObject*)self,
"mark_non_differentiable only accepts output tensors, but "
"argument %d isn't an output", i);
} catch (std::out_of_range &e) {
THPFunction_assert(THPModule_isTensor(t), "mark_non_differentiable "
"only accepts tensor arguments, but got %s", THPUtils_typename(t));
THPFunction_assert(false, "mark_non_differentiable only accepts function "
"outputs");
}
var->requires_grad = 0;
}
Py_DECREF(self->non_differentiable);
self->non_differentiable = NULL;
}
static bool _ensure_tuple(THPObjectPtr& obj)
{
if (PyTuple_Check(obj.get()))
return false;
PyObject *tuple = PyTuple_New(1);
if (!tuple) throw python_error();
PyTuple_SET_ITEM(tuple, 0, obj.release());
obj = tuple;
return true;
}
PyObject *THPFunction_do_forward(THPFunction *self, PyObject *inputs)
{
try {
Py_ssize_t num_inputs = inputs ? PyTuple_GET_SIZE(inputs) : 0;
// Unpack inputs and check if they require gradients or are volatile
THPObjectPtr unpacked_inputs = PyTuple_New(num_inputs);
self->needs_input_grad = PyTuple_New(num_inputs);
self->requires_grad = false;
bool is_volatile = false;
for (int i = 0; i < num_inputs; i++) {
PyObject *input = PyTuple_GET_ITEM(inputs, i);
THPUtils_assert(THPVariable_Check(input), "expected a Variable argument, "
"but got %s", THPUtils_typename(input));
THPVariable *variable = (THPVariable*)input;
// Unpack the variable - SET_ITEM steals a reference so INCREF it
Py_INCREF(variable->data);
PyTuple_SET_ITEM(unpacked_inputs.get(), i, variable->data);
// We can't move this to C, because it's going to be accessed from user code.
PyTuple_SET_ITEM(self->needs_input_grad, i, PyBool_FromLong(variable->requires_grad));
is_volatile = is_volatile || variable->is_volatile;
self->requires_grad = self->requires_grad || variable->requires_grad;
}
// Now we're ready to call a forward (implemented in Python)
THPObjectPtr forward_fn = PyObject_GetAttrString((PyObject*)self, "forward");
THPUtils_assert(forward_fn.get(), "function %s doesn't implement a required "
"'forward' method", THPUtils_typename((PyObject*)self));
THPObjectPtr raw_output = PyObject_CallObject(forward_fn, unpacked_inputs);
if (!raw_output) return NULL;
// Wrap output in a tuple, if it's not one already
bool unpack_output = _ensure_tuple(raw_output);
int num_outputs = PyTuple_GET_SIZE(raw_output.get());
THPObjectPtr outputs = PyTuple_New(num_outputs);
if (!outputs) return NULL;
if (is_volatile) {
// If one of the inputs is volatile let's take a fast path - we want
// minimize the overhead of inference
for (int i = 0; i < num_outputs; i++) {
PyObject *output = PyTuple_GET_ITEM(raw_output.get(), i);
THPVariable *output_var = (THPVariable*)THPVariable_NewVolatile(output);
if (!output_var) return NULL;
output_var->output_nr = i;
PyTuple_SET_ITEM(outputs.get(), i, (PyObject*)output_var);
}
} else {
// We're not volatile, so there's a lot of bookkeeping to do...
self->num_inputs = num_inputs;
self->num_outputs = num_outputs;
t2var_type t2var;
// Save previous functions and initialize t2var map
self->previous_functions = new THPFunctionPtr[num_inputs];
for (int i = 0; i < num_inputs; i++) {
THPVariable *input_var = (THPVariable*)PyTuple_GET_ITEM(inputs, i);
t2var.emplace(input_var->data, input_var);
// Save previous function in a helper class (that has a smart pointer to
// the object and remembers which output did we use).
PyObject *prev_fn = input_var->creator ? input_var->creator : (PyObject*)input_var;
Py_INCREF(prev_fn);
self->previous_functions[i] = THPFunctionPtr(prev_fn, input_var->output_nr);
}
std::unordered_set<PyObject *> dirty_inputs;
_mark_dirty(self, t2var, dirty_inputs);
_wrap_outputs(self, t2var, dirty_inputs, raw_output, outputs);
_join_version_counters(self, t2var);
if (self->requires_grad ||
PyObject_IsInstance((PyObject*)self, THPStochasticFunctionClass)) {
_save_variables(self, t2var);
_mark_non_differentiable(self, t2var);
}
}
// Unpack the output, unless .forward() returned a tuple
if (unpack_output) {
PyObject *output = PyTuple_GET_ITEM(outputs.get(), 0);
Py_INCREF(output);
return output;
}
return outputs.release();
} catch (python_error& e) {
return NULL;
} catch (std::exception& e) {
THPUtils_setError(e.what());
return NULL;
}
}
////////////////////////////////////////////////////////////////////////////////
// Backward
////////////////////////////////////////////////////////////////////////////////
// We need a reference to a smart pointer that will outlive the duration of
// a function call, so that the char* pointer is valid even after it returns
static char* _try_get_name(PyObject *hook, THPObjectPtr& tmp) {
tmp = PyObject_GetAttrString(hook, "__name__");
#if PY_MAJOR_VERSION == 2
if (tmp && PyString_Check(tmp.get())) {
return PyString_AS_STRING(tmp.get());
}
#else
if (tmp && PyUnicode_Check(tmp.get())) {
tmp = PyUnicode_AsASCIIString(tmp.get());
return PyBytes_AS_STRING(tmp.get());
}
#endif
return NULL;
}
#define OPTIONAL_HOOK_NAME \
hook_name ? "'" : "", \
hook_name ? hook_name : "", \
hook_name ? "' " : ""
static void _ensure_correct_hook_result_single(PyObject *original,
PyObject *returned, PyObject *hook)
{
#if PY_MAJOR_VERSION == 2
static PyObject *IS_SAME_SIZE_NAME = PyString_FromString("is_same_size");
#else
static PyObject *IS_SAME_SIZE_NAME = PyUnicode_FromString("is_same_size");
#endif
THPObjectPtr tmp;
// Check that the type matches
if(Py_TYPE(original) != Py_TYPE(returned)) {
char *hook_name = _try_get_name(hook, tmp);
THPUtils_setError("backward hook %s%s%shas changed the type of "
"grad_input (was %s, but got %s)",
OPTIONAL_HOOK_NAME,
THPUtils_typename(original),
THPUtils_typename(returned)
);
throw python_error();
}
// Special case - None gradient. The type matches so it's everything we
// had to check.
if (original == Py_None) return;
THPVariable *original_var = (THPVariable*)original;
THPVariable *returned_var = (THPVariable*)returned;
// Check that data types match
if (Py_TYPE(original_var->data) != Py_TYPE(returned_var->data)) {
char *hook_name = _try_get_name(hook, tmp);
THPUtils_setError("backward hook %s%s%shas changed the type of "
"grad_input data (was %s, but got %s)",
OPTIONAL_HOOK_NAME,
THPUtils_typename(original_var->data),
THPUtils_typename(returned_var->data)
);
throw python_error();
}
// Check that the size matches
THPObjectPtr is_same_size = PyObject_CallMethodObjArgs(original,
IS_SAME_SIZE_NAME, returned, NULL);
if(is_same_size.get() != Py_True) {
char *hook_name = _try_get_name(hook, tmp);
THPUtils_setError("backward hook %s%s%shas changed the size of "
"grad_input",
OPTIONAL_HOOK_NAME
);
throw python_error();
}
}
static void _ensure_correct_hook_result(THPObjectPtr& grad_input,
THPObjectPtr& result, PyObject *hook)
{
THPObjectPtr tmp;
// Check that the tuple sizes match
if (PyTuple_GET_SIZE(result.get()) != PyTuple_GET_SIZE(grad_input.get())) {
char *hook_name = _try_get_name(hook, tmp);
THPUtils_setError("backward hook %s%s%sreturned an incorrect number "
"of gradients (got %ld, but expected %ld)",
OPTIONAL_HOOK_NAME,
PyTuple_GET_SIZE(result.get()),
PyTuple_GET_SIZE(grad_input.get())
);
throw python_error();
}
Py_ssize_t size = PyTuple_GET_SIZE(grad_input.get());
for (int i = 0; i < size; i++) {
PyObject *original = PyTuple_GET_ITEM(grad_input.get(), i);
PyObject *returned = PyTuple_GET_ITEM(result.get(), i);
_ensure_correct_hook_result_single(original, returned, hook);
}
}
static void _call_output_hooks(THPFunction *self, THPObjectPtr& grad_output)
{
if (!self->output_backward_hooks) return;
PyObject *key, *value;
Py_ssize_t pos = 0;
// We can't reuse the tuple we got, so allocate a new one.
THPObjectPtr new_grad_output = PyTuple_New(self->num_outputs);
if (!new_grad_output) throw python_error();
// FIXME: until multiple backward only
bool updated_gradient = false;
for (int i = 0; i < self->num_outputs; i++) {
// Copy grad to a new tuple
PyObject *old_grad = PyTuple_GET_ITEM(grad_output.get(), i);
// FIXME: no need to pack them again after changing grads to Variables
PyObject *old_grad_var;
if (old_grad == Py_None) {
old_grad_var = Py_None;
Py_INCREF(Py_None);
} else {
old_grad_var = THPVariable_NewVolatile(old_grad);
if (!old_grad_var) throw python_error();
}
PyTuple_SET_ITEM(new_grad_output.get(), i, old_grad_var);
// Make sure that we're really going to operate on a dict
PyObject *hook_dict = self->output_backward_hooks[i];
if (!hook_dict) continue;
THPFunction_assert(PyDict_Check(hook_dict), "backward_hooks "
"attribute has to be a dictionary");
while (PyDict_Next(hook_dict, &pos, &key, &value)) {
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
old_grad_var, NULL);
if (!result) throw python_error();
// If the hook returns a something else than None, we treat that as a sign
// to replace this grad with the return value.
if (result.get() != Py_None) {
updated_gradient = true;
// Check all possible inconsistencies of the output that we can detect
// (sizes, types, etc.)
_ensure_correct_hook_result_single(old_grad_var, result, value);
// Replace the old gradient
PyTuple_SET_ITEM(new_grad_output.get(), i, result.release());
Py_XDECREF(old_grad_var);
old_grad_var = PyTuple_GET_ITEM(new_grad_output.get(), i);
f.previous_functions[i] = std::make_pair<>(var, 0);
}
}
}
// FIXME: no need to do this after multiple backward
if (updated_gradient) {
THPObjectPtr unpacked_grad_output = PyTuple_New(self->num_outputs);
if (!unpacked_grad_output) throw python_error();
for (int i = 0; i < self->num_outputs; i++) {
PyObject *grad = PyTuple_GET_ITEM(new_grad_output.get(), i);
if (grad == Py_None) {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, Py_None);
} else {
THPVariable *var = (THPVariable*)grad;
Py_INCREF(var->data);
PyTuple_SET_ITEM(unpacked_grad_output.get(), i, var->data);
}
}
grad_output = unpacked_grad_output.release();
}
f.requires_grad &= !f.is_volatile;
return f;
}
static void _call_function_hooks(THPFunction *self, THPObjectPtr& grad_input, THPObjectPtr& grad_output)
{
if (!self->backward_hooks) return;
PyObject *key, *value;
Py_ssize_t pos = 0;
THPFunction_assert(PyDict_Check(self->backward_hooks), "backward_hooks "
"attribute has to be a dictionary");
// FIXME: until multiple backward only
bool updated_gradient = false;
THPObjectPtr packed_grad_input = PyTuple_New(self->num_inputs);
if (!packed_grad_input.get()) throw python_error();
for (int i = 0; i < self->num_inputs; i++) {
PyObject *tensor = PyTuple_GET_ITEM(grad_input.get(), i);
PyObject *var;
if (tensor == Py_None) {
var = Py_None;
Py_INCREF(Py_None);
} else {
var = THPVariable_NewVolatile(tensor);
}
if (!var) throw python_error();
PyTuple_SET_ITEM(packed_grad_input.get(), i, var);
}
THPObjectPtr packed_grad_output = PyTuple_New(self->num_outputs);
if (!packed_grad_output.get()) throw python_error();
for (int i = 0; i < self->num_outputs; i++) {
PyObject *tensor = PyTuple_GET_ITEM(grad_output.get(), i);
PyObject *var;
if (tensor == Py_None) {
var = Py_None;
Py_INCREF(Py_None);
} else {
var = THPVariable_NewVolatile(tensor);
}
if (!var) throw python_error();
PyTuple_SET_ITEM(packed_grad_output.get(), i, var);
}
while (PyDict_Next(self->backward_hooks, &pos, &key, &value)) {
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
packed_grad_input.get(), packed_grad_output.get(), NULL);
if (!result) throw python_error();
// If the hook returns a something else than None, we treat that as a sign
// to replace grad_input with its return value.
if (result.get() != Py_None) {
updated_gradient = true;
// Make sure we're working with a tuple
_ensure_tuple(result);
// Check all possible inconsistencies of the output that we can detect
// (sizes, types, etc.)
_ensure_correct_hook_result(packed_grad_input, result, value);
packed_grad_input = result.release();
}
}
// FIXME: until multiple backward only
if (updated_gradient) {
THPObjectPtr unpacked_grad_input = PyTuple_New(self->num_inputs);
if (!unpacked_grad_input) throw python_error();
for (int i = 0; i < self->num_inputs; i++) {
PyObject *grad = PyTuple_GET_ITEM(packed_grad_input.get(), i);
if (grad == Py_None) {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, Py_None);
} else {
THPVariable *var = (THPVariable*)grad;
Py_INCREF(var->data);
PyTuple_SET_ITEM(unpacked_grad_input.get(), i, var->data);
}
}
grad_input = unpacked_grad_input.release();
}
}
static void _prepare_grad_output(THPFunction *self, THPObjectPtr& raw_grad_output)
{
#ifdef WITH_CUDA
THCPAutoGPU gpu_guard(-1);
#endif
int num_grad_output = PyTuple_GET_SIZE(raw_grad_output.get());
// First, check if any of grad_outputs is None. If not, there's nothing to do
bool has_none = false;
for (int i = 0; i < num_grad_output; i++) {
if (PyTuple_GET_ITEM(raw_grad_output.get(), i) == Py_None) {
has_none = true;
break;
}
}
if (!has_none)
return;
THPObjectPtr grad_output;
grad_output = PyTuple_New(num_grad_output);
if (!grad_output) throw python_error();
// Look for Nones and replace them with new buffers
for (int i = 0; i < num_grad_output; i++) {
PyObject *grad = PyTuple_GET_ITEM(raw_grad_output.get(), i);
if (grad == Py_None) {
auto &info = (*self->output_info)[i];
PyObject *tensor_cls = std::get<0>(info);
#ifdef WITH_CUDA
gpu_guard.setDevice(std::get<1>(info));
#endif
std::vector<long> &sizes = std::get<2>(info);
THPObjectPtr grad_size = THPSize_New(sizes.size(), sizes.data());
THPObjectPtr new_grad = PyObject_CallFunctionObjArgs(tensor_cls, grad_size.get(), NULL);
if (!new_grad) throw python_error();
THPObjectPtr result = PyObject_CallMethod(new_grad.get(), "zero_", "");
if (!result) throw python_error();
grad = new_grad.release();
} else {
Py_INCREF(grad);
}
PyTuple_SET_ITEM(grad_output.get(), i, grad);
}
raw_grad_output = grad_output.release();
}
static void _trim_grad_input(THPFunction *self, THPObjectPtr& grad_input)
{
int num_grads = PyTuple_GET_SIZE(grad_input.get());
int num_prev_fns = self->num_inputs;
if (num_grads > num_prev_fns) {
// Check that all extra grads are none
bool all_none = true;
for (int i = num_prev_fns; i < num_grads; i++) {
all_none = (PyTuple_GET_ITEM(grad_input.get(), i) == Py_None);
if (!all_none) break;
}
// If yes, slice the tuple
if (all_none) {
num_grads = num_prev_fns;
grad_input = PyTuple_GetSlice(grad_input.get(), 0, num_grads);
if (!grad_input) throw python_error();
}
}
}
PyObject * THPFunction_do_backward(THPFunction *self, PyObject *args)
{
try {
Py_ssize_t num_args = args ? PyTuple_GET_SIZE(args) : 0;
THPUtils_assert(num_args == 2, "_do_backward expects exactly two arguments");
PyObject *raw_grad_output = PyTuple_GET_ITEM(args, 0);
PyObject *retain_variables = PyTuple_GET_ITEM(args, 1);
if (!PyTuple_Check(raw_grad_output) || !PyBool_Check(retain_variables)) {
THPUtils_invalidArguments(args, NULL, "_do_backward", 1, "(tuple, bool)");
return NULL;
}
// Some of the output might have been unused, so we have to allocate
// zero-filled buffers instead
Py_INCREF(raw_grad_output);
THPObjectPtr grad_output = raw_grad_output;
_prepare_grad_output(self, grad_output);
// Call output hooks (this can modify grad_output!)
_call_output_hooks(self, grad_output);
// self.backward(*grad_output)
THPObjectPtr backward_fn = PyObject_GetAttrString((PyObject*)self, "backward");
THPUtils_assert(backward_fn.get(), "function %s doesn't implement a required "
"'backward' method", THPUtils_typename((PyObject*)self));
THPObjectPtr grad_input = PyObject_CallObject(backward_fn, grad_output.get());
if (!grad_input) return NULL;
_ensure_tuple(grad_input);
// We allow functions to return more gradients, than there were outputs,
// if and only if the additional ones are all None
_trim_grad_input(self, grad_input);
int num_grads = PyTuple_GET_SIZE(grad_input.get());
int num_prev_fns = self->num_inputs;
THPUtils_assert(num_grads == num_prev_fns, "%s returned an invalid number of "
"gradient tensors (expected %d, but got %d)", THPUtils_typename(self),
num_prev_fns, num_grads);
// Call function hooks (this can modify grad_input!)
_call_function_hooks(self, grad_input, grad_output);
// Free buffers only if they're not going to be ever used again
if (retain_variables == Py_False) {
delete self->saved_variables;
self->saved_variables = nullptr;
self->has_freed_buffers = 1;
}
return grad_input.release();
} catch (python_error& e) {
return NULL;
} catch (std::exception& e) {
THPUtils_setError(e.what());
return NULL;
}
}
////////////////////////////////////////////////////////////////////////////////
// Other methods / attributes
////////////////////////////////////////////////////////////////////////////////
PyObject* THPFunction__register_hook_dict(THPFunction *self, PyObject *_var)
{
THPUtils_assert(THPVariable_Check(_var), "_register_hook_dict expected a variable");
THPVariable *var = (THPVariable*)_var;
if (!self->output_backward_hooks)
self->output_backward_hooks = new THPObjectPtr[self->num_inputs];
Py_INCREF(var->backward_hooks);
self->output_backward_hooks[var->output_nr] = var->backward_hooks;
Py_RETURN_NONE;
}
PyObject *THPFunction_saved_tensors(THPFunction *self, void *_unused)
{
THPUtils_assert(!self->has_freed_buffers, "Trying to backward through the "
"graph second time, but the buffers have already been freed. Please "
"specify retain_variables=True when calling backward for the first time.");
if (!self->saved_variables)
return PyTuple_New(0);
int num_saved = self->saved_variables->size();
THPObjectPtr saved_tensors = PyTuple_New(num_saved);
if (!saved_tensors)
return NULL;
for (int i = 0; i < num_saved; i++) {
saved_var_info_type &tuple = (*self->saved_variables)[i];
PyObject *tensor = std::get<0>(tuple);
if (tensor != Py_None) {
int expected_version = std::get<1>(tuple);
int current_version = **(std::get<2>(tuple));
THPUtils_assert(expected_version == current_version, "one of the variables "
"needed for gradient computation has been modified by an "
"inplace operation");
}
Py_INCREF(tensor);
PyTuple_SET_ITEM(saved_tensors.get(), i, tensor);
}
return saved_tensors.release();
}
PyObject *THPFunction_previous_functions(THPFunction *self, void *_unused)
{
THPObjectPtr previous_functions = PyTuple_New(self->num_inputs);
if (!previous_functions)
return NULL;
for (int i = 0; i < self->num_inputs; i++) {
THPObjectPtr fn_tuple = PyTuple_New(2);
if (!fn_tuple)
return NULL;
Py_INCREF(self->previous_functions[i].get());
PyTuple_SET_ITEM(fn_tuple.get(), 0, self->previous_functions[i].get());
PyTuple_SET_ITEM(fn_tuple.get(), 1, PyInt_FromLong(self->previous_functions[i].output_nr));
PyTuple_SET_ITEM(previous_functions.get(), i, fn_tuple.release());
}
return previous_functions.release();
}
typedef PyObject *(*getter)(PyObject *, void *);
typedef int (*setter)(PyObject *, PyObject *, void *);
static struct PyGetSetDef THPFunction_properties[] = {
{"saved_tensors", (getter)THPFunction_saved_tensors, NULL, NULL, NULL},
{"previous_functions", (getter)THPFunction_previous_functions, NULL, NULL, NULL},
{NULL}
};
static struct PyMemberDef THPFunction_members[] = {
{(char*)"_backward_hooks", T_OBJECT, offsetof(THPFunction, backward_hooks), 0, NULL},
{(char*)"to_save", T_OBJECT, offsetof(THPFunction, to_save), 0, NULL},
{(char*)"shared_pairs", T_OBJECT, offsetof(THPFunction, shared_pairs), 0, NULL},
{(char*)"non_differentiable", T_OBJECT, offsetof(THPFunction, non_differentiable), 0, NULL},
{(char*)"dirty_tensors", T_OBJECT, offsetof(THPFunction, dirty_tensors), 0, NULL},
{(char*)"needs_input_grad", T_OBJECT, offsetof(THPFunction, needs_input_grad), 0, NULL},
{(char*)"requires_grad", T_BOOL, offsetof(THPFunction, requires_grad), 0, NULL},
{(char*)"num_inputs", T_INT, offsetof(THPFunction, num_inputs), 0, NULL},
{(char*)"num_outputs", T_INT, offsetof(THPFunction, num_outputs), 0, NULL},
{NULL}
};
static struct PyMethodDef THPFunction_methods[] = {
{(char*)"_do_forward", (PyCFunction)THPFunction_do_forward, METH_VARARGS, NULL},
{(char*)"_do_backward", (PyCFunction)THPFunction_do_backward, METH_VARARGS, NULL},
{(char*)"_register_hook_dict", (PyCFunction)THPFunction__register_hook_dict, METH_O, NULL},
{NULL}
};
PyTypeObject THPFunctionType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._FunctionBase", /* tp_name */
sizeof(THPFunction), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPFunction_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
NULL, /* tp_doc */
(traverseproc)THPFunction_traverse, /* tp_traverse */
(inquiry)THPFunction_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPFunction_methods, /* tp_methods */
THPFunction_members, /* tp_members */
THPFunction_properties, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPFunction_new /* tp_new */
};
bool THPFunction_initModule(PyObject *module)
{
if (PyType_Ready(&THPFunctionType) < 0)
return false;
Py_INCREF(&THPFunctionType);
PyModule_AddObject(module, "_FunctionBase", (PyObject *)&THPFunctionType);
return true;
}
}} // namespace torch::autograd

View File

@ -1,61 +1,73 @@
#ifndef THP_FUNCTION_H
#define THP_FUNCTION_H
#pragma once
struct THPFunction;
// Function is an abstract class that represents a single operation from one or
// more variables to one more or varaibles.
//
// Subclasses may represent "forward" or "backward" operations (i.e functions
// and their derivatives). Some functions may be used as both.
struct THPFunctionPtr: public THPObjectPtr {
THPFunctionPtr(): THPObjectPtr(nullptr), output_nr(-1) {};
#include <memory>
#include <THPP/THPP.h>
#include <vector>
THPFunctionPtr(PyObject *fn, int output_nr):
THPObjectPtr(fn), output_nr(output_nr) {};
#include "torch/csrc/autograd/saved_variable.h"
THPFunctionPtr(THPFunction *fn, int output_nr):
THPObjectPtr((PyObject*)fn), output_nr(output_nr) {};
namespace torch { namespace autograd {
THPFunctionPtr(THPFunctionPtr &&other):
THPObjectPtr(std::move(other)), output_nr(other.output_nr) {}
struct Function;
struct Variable;
THPPointer& operator =(THPFunctionPtr &&other) {
output_nr = other.output_nr;
THPObjectPtr::operator=(std::move(other));
return *this;
}
using tensor_list = std::vector<std::unique_ptr<thpp::Tensor>>;
using variable_list = std::vector<std::shared_ptr<Variable>>;
using function_list = std::vector<std::pair<std::shared_ptr<Function>, int>>;
int output_nr;
// State used to create "backward" functions
struct FunctionFlags {
bool requires_grad;
bool is_volatile;
function_list previous_functions;
};
// (class, gpu id, sizes)
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
// (tensor, version when saved, version counter)
// or
// (None, 0, nullptr)
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<THPVariableVersion>>;
struct Function {
Function()
: num_outputs(0)
, previous_functions()
, requires_grad(false)
, is_volatile(false)
, is_stochastic(false)
{}
struct THPFunction {
PyObject_HEAD
Function(FunctionFlags flags)
: num_outputs(0)
, previous_functions(std::move(flags.previous_functions))
, requires_grad(flags.requires_grad)
, is_volatile(flags.is_volatile)
, is_stochastic(false)
{}
PyObject *needs_input_grad;
PyObject *backward_hooks;
THPObjectPtr *output_backward_hooks;
Function(const Function& other) = delete;
Function(Function&& other) = delete;
virtual ~Function() {}
PyObject *to_save;
PyObject *shared_pairs;
PyObject *non_differentiable;
PyObject *dirty_tensors;
// Implements the operation
virtual variable_list apply(const variable_list& inputs) = 0;
THPFunctionPtr *previous_functions;
std::vector<output_info_type> *output_info;
std::vector<saved_var_info_type> *saved_variables;
int num_inputs;
int num_outputs;
char requires_grad;
char has_freed_buffers;
// Computes requires_grad, is_volatile, and previous_functions from a list
// of input variables
static FunctionFlags flags(const variable_list& inputs);
// Releases saved variables if the operation won't be reused
virtual inline void releaseVariables() {}
// These variables are usually only meaningful for "backward" functions.
// num_outputs is the number of outputs of corresponding "forward" function;
// it's actually the number of inputs of this function.
int num_outputs;
function_list previous_functions;
bool requires_grad;
bool is_volatile;
bool is_stochastic;
};
bool THPFunction_initModule(PyObject *module);
extern PyObject *THPFunctionClass;
extern PyObject *THPStochasticFunctionClass;
#define THPFunction_Check(obj) PyObject_IsInstance(obj, THPFunctionClass)
#endif
}} // namespace torch::autograd

View File

@ -0,0 +1,166 @@
#include "batch_normalization.h"
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/nn/THNN_generic.h"
#ifdef WITH_CUDNN
#include "torch/csrc/cudnn/BatchNorm.h"
#include "torch/csrc/cudnn/Handles.h"
#include "torch/csrc/cudnn/Types.h"
extern THCState* state;
#endif
namespace torch { namespace autograd {
using thpp::Tensor;
auto BatchNormForward::apply(const variable_list& inputs) -> variable_list {
if (inputs.size() != 3) throw std::runtime_error("expected three inputs");
auto& input = inputs[0];
auto& weight = inputs[1];
auto& bias = inputs[2];
bool use_cudnn = false;
#ifdef WITH_CUDNN
use_cudnn = (input->data->isCuda()
&& input->data->type() != thpp::Type::HALF
&& weight && bias);
#endif
auto output = input->data->newTensor();
output->resizeAs(*input->data);
std::unique_ptr<Tensor> save_mean(output->newTensor());
save_mean->resizeAs(*running_mean);
std::unique_ptr<Tensor> save_std(output->newTensor());
save_std->resizeAs(*running_var);
if (use_cudnn) {
#ifdef WITH_CUDNN
torch::cudnn::cudnn_batch_norm_forward(
state,
torch::cudnn::getCudnnHandle(),
torch::cudnn::getCudnnDataType(*input->data),
(THVoidTensor*)input->data->cdata(),
(THVoidTensor*)output->cdata(),
(THVoidTensor*)weight->data->cdata(),
(THVoidTensor*)bias->data->cdata(),
(THVoidTensor*)running_mean->cdata(),
(THVoidTensor*)running_var->cdata(),
(THVoidTensor*)save_mean->cdata(),
(THVoidTensor*)save_std->cdata(),
training,
momentum,
eps);
#endif
} else {
torch::nn::BatchNormalization_updateOutput(
input->data.get(),
output.get(),
weight ? weight->data.get() : nullptr,
bias ? bias->data.get() : nullptr,
running_mean.get(),
running_var.get(),
save_mean.get(),
save_std.get(),
training,
momentum,
eps);
}
auto creator = std::make_shared<BatchNormBackward>(
flags(inputs),
std::unique_ptr<thpp::Tensor>(running_mean->clone_shallow()),
std::unique_ptr<thpp::Tensor>(running_var->clone_shallow()),
std::move(save_mean),
std::move(save_std),
input->save(),
Variable::save_opt(weight.get()),
Variable::save_opt(bias.get()),
training,
momentum,
eps);
variable_list results(1);
results[0] = std::make_shared<Variable>(std::move(output), creator);
return results;
};
auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_list {
auto& input = this->input.unpack();
auto& weight = this->weight.unpack();
auto& bias = this->bias.unpack();
bool use_cudnn = false;
#ifdef WITH_CUDNN
use_cudnn = (input->isCuda()
&& input->type() != thpp::Type::HALF
&& weight && bias && training);
#endif
std::unique_ptr<Tensor> grad_input = input->newTensor();
grad_input->resizeAs(*input);
std::unique_ptr<Tensor> grad_weight;
if (weight) {
grad_weight = weight->newTensor();
grad_weight->resizeAs(*weight);
if (!use_cudnn) {
grad_weight->zero();
}
}
std::unique_ptr<Tensor> grad_bias;
if (bias) {
grad_bias = bias->newTensor();
grad_bias->resizeAs(*bias);
grad_bias->zero();
if (!use_cudnn) {
grad_bias->zero();
}
}
if (use_cudnn) {
#ifdef WITH_CUDNN
torch::cudnn::cudnn_batch_norm_backward(
state,
torch::cudnn::getCudnnHandle(),
torch::cudnn::getCudnnDataType(*input),
(THVoidTensor*)input->cdata(),
(THVoidTensor*)grad_outputs[0]->data->cdata(),
(THVoidTensor*)grad_input->cdata(),
(THVoidTensor*)grad_weight->cdata(),
(THVoidTensor*)grad_bias->cdata(),
(THVoidTensor*)weight->cdata(),
(THVoidTensor*)running_mean->cdata(),
(THVoidTensor*)running_var->cdata(),
(THVoidTensor*)save_mean->cdata(),
(THVoidTensor*)save_std->cdata(),
training,
eps);
#endif
} else {
torch::nn::BatchNormalization_backward(
input.get(),
grad_outputs[0]->data.get(),
grad_input.get(),
grad_weight.get(),
grad_bias.get(),
weight.get(),
running_mean.get(),
running_var.get(),
save_mean.get(),
save_std.get(),
training,
1.0,
eps);
}
variable_list results(3);
results[0] = Variable::of(std::move(grad_input));
results[1] = Variable::of(std::move(grad_weight));
results[2] = Variable::of(std::move(grad_bias));
return results;
};
}} // namespace torch::autograd

View File

@ -0,0 +1,72 @@
#pragma once
#include <memory>
#include <THPP/THPP.h>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/variable.h"
namespace torch { namespace autograd {
struct BatchNormForward : public Function {
BatchNormForward(
std::unique_ptr<thpp::Tensor> running_mean,
std::unique_ptr<thpp::Tensor> running_var,
bool training,
double momentum,
double eps)
: running_mean(std::move(running_mean))
, running_var(std::move(running_var))
, training(training)
, momentum(momentum)
, eps(eps) {}
virtual variable_list apply(const variable_list& inputs) override;
std::unique_ptr<thpp::Tensor> running_mean;
std::unique_ptr<thpp::Tensor> running_var;
bool training;
double momentum;
double eps;
};
struct BatchNormBackward : public Function {
BatchNormBackward(
FunctionFlags flags,
std::unique_ptr<thpp::Tensor> running_mean,
std::unique_ptr<thpp::Tensor> running_var,
std::unique_ptr<thpp::Tensor> save_mean,
std::unique_ptr<thpp::Tensor> save_std,
SavedVariable input,
SavedVariable weight,
SavedVariable bias,
bool training,
double momentum,
double eps)
: Function(std::move(flags))
, running_mean(std::move(running_mean))
, running_var(std::move(running_var))
, save_mean(std::move(save_mean))
, save_std(std::move(save_std))
, input(std::move(input))
, weight(std::move(weight))
, bias(std::move(bias))
, training(training)
, momentum(momentum)
, eps(eps) {}
virtual variable_list apply(const variable_list& gradOutputs) override;
std::unique_ptr<thpp::Tensor> running_mean;
std::unique_ptr<thpp::Tensor> running_var;
std::unique_ptr<thpp::Tensor> save_mean;
std::unique_ptr<thpp::Tensor> save_std;
SavedVariable input;
SavedVariable weight;
SavedVariable bias;
bool training;
double momentum;
double eps;
};
}}

View File

@ -0,0 +1,56 @@
#include <Python.h>
#include "batch_normalization.h"
#include "torch/csrc/autograd/python_cpp_function.h"
using namespace torch::autograd;
static PyTypeObject BatchNormClass;
static PyTypeObject BatchNormBackwardClass;
struct BatchNormCtor {
BatchNormForward* operator()(PyObject* args) {
std::unique_ptr<thpp::Tensor> running_mean;
std::unique_ptr<thpp::Tensor> running_var;
char training;
double momentum;
double eps;
if (!PyArg_ParseTuple(args, "O&O&Bdd:BatchNorm",
TensorConverter, &running_mean,
TensorConverter, &running_var,
&training, &momentum, &eps)) {
return NULL;
}
return new BatchNormForward(
std::move(running_mean),
std::move(running_var),
(bool)training,
momentum,
eps);
}
};
struct NoCtor {
Function* operator()(PyObject* args) {
throw std::runtime_error("Cannot construct");
}
};
template<typename C, typename T>
static void addClass(PyObject* module, PyTypeObject& type, const char* name)
{
createForwardFunctionPyTypeObject<T>(type, name);
Py_INCREF(&type);
PyModule_AddObject(module, name, (PyObject*)&type);
registerCppFunction(typeid(C), &type);
}
bool THPAutograd_initFunctions(PyObject* _unused)
{
THPObjectPtr module = PyImport_ImportModule("torch.nn._functions.thnn");
if (!module) return false;
addClass<BatchNormForward, BatchNormCtor>(module, BatchNormClass, "BatchNorm");
addClass<BatchNormBackward, NoCtor>(module, BatchNormBackwardClass, "BatchNormBackward");
return true;
}

View File

@ -0,0 +1,52 @@
#include "torch/csrc/autograd/grad_buffer.h"
#ifdef WITH_CUDA
#include "torch/csrc/cuda/AutoGPU.h"
#endif
namespace torch { namespace autograd {
GradBuffer::GradBuffer(size_t size)
: buffer(size)
{}
auto GradBuffer::addGrad(size_t pos, std::shared_ptr<Variable>&& var) -> void {
auto& item = buffer[pos];
if (!var) {
return;
}
auto& tensor = var->data;
if (!item.first) {
buffer[pos] = std::make_pair<>(std::move(tensor), true);
} else {
#ifdef WITH_CUDA
THCPAutoGPU auto_gpu(tensor->getDevice());
#endif
if (item.first->isSparse() && !tensor->isSparse()) {
auto* sum = tensor->clone();
sum->cadd(*sum, *item.first);
item.first.reset(sum);
} else {
if (item.second) {
item.first.reset(item.first->clone());
}
item.first->cadd(*item.first, *tensor);
}
item.second = false;
}
}
auto GradBuffer::variables(GradBuffer&& g) -> std::vector<std::shared_ptr<Variable>> {
auto buffer = std::move(g.buffer);
int size = buffer.size();
std::vector<std::shared_ptr<Variable>> result(size);
for (int i = 0; i != size; ++i) {
if (buffer[i].first) {
result[i] = std::make_shared<Variable>(
std::move(buffer[i].first), false, true);
}
}
return result;
}
}} // namespace torch::autograd

View File

@ -0,0 +1,31 @@
#pragma once
// The GradBuffer class accumulates a list of gradients for use by a
// "backward" function. It implements logic to avoid modiyfing the passed
// gradients in-place
#include <vector>
#include <utility>
#include <memory>
#include <THPP/THPP.h>
#include "torch/csrc/autograd/variable.h"
namespace torch { namespace autograd {
struct GradBuffer {
explicit GradBuffer(size_t size);
GradBuffer(const GradBuffer& other) = delete;
GradBuffer(GradBuffer&& other) = default;
// Accumulates the gradient "var" at the specified index
void addGrad(size_t idx, std::shared_ptr<Variable>&& var);
// Returns the gradients as a list of variables. Destroys this GradBuffer.
static std::vector<std::shared_ptr<Variable>> variables(GradBuffer&& buffer);
private:
std::vector<std::pair<std::unique_ptr<thpp::Tensor>, bool>> buffer;
};
}} // namespace torch::autograd

View File

@ -0,0 +1,133 @@
#include "torch/csrc/autograd/python_cpp_function.h"
#include <Python.h>
#include <memory>
#include <stdio.h>
#include <THPP/THPP.h>
#include <typeindex>
#include <unordered_map>
#include "torch/csrc/autograd/python_function.h"
#include "torch/csrc/autograd/python_variable.h"
#include "torch/csrc/utils/auto_gil.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Exceptions.h"
using namespace torch::autograd;
namespace torch { namespace autograd {
namespace {
PyObject* THPCppFunction_call(PyObject* self, PyObject* args, PyObject *kwargs)
{
if (kwargs && PyDict_Size(kwargs) != 0) {
return PyErr_Format(PyExc_TypeError, "keyword arguments are not supported");
}
int num_inputs = PyTuple_GET_SIZE(args);
variable_list vars(num_inputs);
for (int i = 0; i != num_inputs; ++i) {
PyObject* arg = PyTuple_GET_ITEM(args, i);
if (arg == Py_None) {
continue;
}
if (!THPVariable_Check(arg)) {
return PyErr_Format(PyExc_TypeError, "argument %d is not a Variable", i);
}
vars[i] = ((THPVariable*)arg)->cdata;
}
variable_list output;
HANDLE_TH_ERRORS {
AutoNoGIL nogil;
output = ((THPCppFunction*)self)->cdata->apply(vars);
}
END_HANDLE_TH_ERRORS
int num_outputs = output.size();
if (num_outputs == 1) {
// assume we want to unpack one element tuples for now
return THPVariable_Wrap(output[0]);
}
THPObjectPtr tuple = PyTuple_New(num_outputs);
for (int i = 0; i != num_outputs; ++i) {
PyTuple_SET_ITEM(tuple.get(), i, THPVariable_Wrap(output[i]));
}
return tuple.release();
}
void THPCppFunction_dealloc(PyObject* self)
{
((THPCppFunction*)self)->cdata.~shared_ptr();
Py_TYPE(self)->tp_free(self);
}
} // namespace
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address)
{
try {
*address = createTensor(obj);
} catch (std::exception& e) {
PyErr_Format(PyExc_TypeError,
"expected a tensor, got %s", Py_TYPE(obj)->tp_name);
return 0;
}
return 1;
}
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name)
{
type.tp_flags = Py_TPFLAGS_DEFAULT;
type.tp_name = name;
type.tp_basicsize = sizeof(THPCppFunction);
type.tp_call = THPCppFunction_call;
type.tp_dealloc = THPCppFunction_dealloc;
if (PyType_Ready(&type) < 0) {
auto msg = std::string("Unable to instantiate PyTypeObject for ") + name;
throw std::runtime_error(msg);
}
return &type;
}
static std::unordered_map<std::type_index, THPObjectPtr> cpp_function_types;
PyObject* functionToPyObject(std::shared_ptr<Function> cdata)
{
if (auto pfw = dynamic_cast<PyFunction*>(cdata.get())) {
PyObject* obj = pfw->obj;
Py_INCREF(obj);
return obj;
}
if (auto var = std::dynamic_pointer_cast<Variable>(cdata)) {
return THPVariable_Wrap(var);
}
auto it = cpp_function_types.find(std::type_index(typeid(*cdata)));
if (it == cpp_function_types.end()) {
return PyErr_Format(PyExc_TypeError,
"Don't know how to create Python object for %s", typeid(*cdata).name());
}
PyTypeObject* type = (PyTypeObject*)it->second.get();
THPObjectPtr obj = type->tp_alloc(type, 0);
if (!obj) return NULL;
THPCppFunction* f = (THPCppFunction*)obj.get();
new (&f->cdata) std::shared_ptr<Function>(cdata);
if (!f->cdata) {
return NULL;
}
return obj.release();
}
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype)
{
Py_INCREF((PyObject*)pytype);
cpp_function_types[std::type_index(type)] = THPObjectPtr((PyObject*)pytype);
}
}} // namespace torch::autograd

View File

@ -0,0 +1,45 @@
#pragma once
#include <Python.h>
#include <memory>
#include <typeinfo>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/utils/object_ptr.h"
namespace torch { namespace autograd {
struct THPCppFunction {
PyObject_HEAD
std::shared_ptr<Function> cdata;
};
template<typename Ctor>
PyObject* CppFunction_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
THPObjectPtr obj = type->tp_alloc(type, 0);
if (!obj) return NULL;
THPCppFunction* f = (THPCppFunction*)obj.get();
new (&f->cdata) std::shared_ptr<Function>(Ctor()(args));
if (!f->cdata) {
return NULL;
}
return obj.release();
}
PyTypeObject* _initFunctionPyTypeObject(PyTypeObject& type, const char* name);
template<typename Ctor>
PyTypeObject* createForwardFunctionPyTypeObject(PyTypeObject& type, const char* name)
{
type.tp_new = &CppFunction_pynew<Ctor>;
return _initFunctionPyTypeObject(type, name);
}
// conversion utilities for PyArg_ParseTuple
int TensorConverter(PyObject* obj, std::unique_ptr<thpp::Tensor>* address);
void registerCppFunction(const std::type_info& type, PyTypeObject* pytype);
PyObject* functionToPyObject(std::shared_ptr<Function> cdata);
}} // namespace torch::autograd

View File

@ -0,0 +1,131 @@
#include "torch/csrc/autograd/python_engine.h"
#include "torch/csrc/autograd/engine.h"
#include "torch/csrc/THP.h"
#include "torch/csrc/DynamicTypes.h"
using namespace torch::autograd;
struct THPEngine {
PyObject_HEAD
};
PyObject *THPEngineClass = NULL;
// Main backward function
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
{
PyObject *variables = NULL;
PyObject *grad_variables = NULL;
unsigned char retain_variables = 0;
const char *accepted_kwargs[] = {"variables", "grad_variables",
"retain_variables", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
&variables, &grad_variables, &retain_variables))
return NULL;
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
THPUtils_assert(retain_variables_obj == Py_True || retain_variables_obj == Py_False,
"retain_variables argument is expected to be a bool, but got %s",
THPUtils_typename(retain_variables_obj));
THPUtils_assert(PyTuple_Check(variables), "variables argument is expected to "
"be a tuple, but got %s", THPUtils_typename(variables));
THPUtils_assert(PyTuple_Check(grad_variables), "variables argument is "
"expected to be a tuple, but got %s", THPUtils_typename(grad_variables));
Py_ssize_t num_variables = PyTuple_GET_SIZE(variables);
Py_ssize_t num_gradients = PyTuple_GET_SIZE(grad_variables);
THPUtils_assert(num_variables == num_gradients, "got %ld variables and %ld "
"gradients", num_variables, num_gradients);
variable_list vars(num_variables);
tensor_list grads(num_variables);
for (int i = 0; i < num_variables; i++) {
PyObject *variable = PyTuple_GET_ITEM(variables, i);
THPUtils_assert(THPVariable_Check(variable), "element %d of variables "
"tuple is not a Variable", i);
vars[i] = ((THPVariable*)variable)->cdata;
PyObject *grad = PyTuple_GET_ITEM(grad_variables, i);
if (THPModule_isTensor(grad)) {
grads[i] = torch::createTensor(grad);
} else {
THPUtils_assert(grad == Py_None,
"element %d of gradients tuple is not a Tensor or None", i);
THPUtils_assert(!vars[i]->requires_grad,
"element %d of gradients tuple is None, but the corresponding Variable requires grad");
}
}
try {
Engine::backward(vars, grads, retain_variables);
} catch (python_error &e) {
return nullptr;
} catch (std::exception &e) {
PyErr_SetString(PyExc_RuntimeError, e.what());
return nullptr;
}
Py_RETURN_NONE;
}
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
return type->tp_alloc(type, 0);
}
static struct PyMethodDef THPEngine_methods[] = {
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL}
};
PyTypeObject THPEngineType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._EngineBase", /* tp_name */
sizeof(THPEngine), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPEngine_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPEngine_new /* tp_new */
};
bool THPEngine_initModule(PyObject *module)
{
if (PyType_Ready(&THPEngineType) < 0)
return false;
Py_INCREF(&THPEngineType);
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
return true;
}

View File

@ -0,0 +1,5 @@
#pragma once
#include <Python.h>
bool THPEngine_initModule(PyObject *module);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
#pragma once
#include <Python.h>
#include <vector>
#include <utility>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/utils/object_ptr.h"
// (class, gpu id, sizes)
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
// (tensor, version when saved, version counter)
// or
// (None, 0, nullptr)
using saved_var_info_type = std::tuple<THPObjectPtr, int, std::unique_ptr<torch::autograd::VariableVersion>>;
namespace torch { namespace autograd {
struct PyFunction : public Function {
PyFunction(PyObject* obj) : obj(obj) {}
virtual variable_list apply(const variable_list& inputs) override;
virtual void releaseVariables() override;
PyObject* obj;
};
}} // namespace torch::autograd
struct THPFunction {
PyObject_HEAD
PyObject *needs_input_grad;
PyObject *backward_hooks;
THPObjectPtr *output_backward_hooks;
PyObject *to_save;
PyObject *shared_pairs;
PyObject *non_differentiable;
PyObject *dirty_tensors;
std::vector<output_info_type> *output_info;
std::vector<saved_var_info_type> *saved_variables;
int num_inputs;
char has_freed_buffers;
torch::autograd::PyFunction cdata;
};
bool THPFunction_initModule(PyObject *module);
extern PyObject *THPFunctionClass;
extern PyObject *THPStochasticFunctionClass;
std::shared_ptr<torch::autograd::PyFunction> THPFunction_asFunction(THPFunction* self);
inline bool THPFunction_Check(PyObject* obj) {
return PyObject_IsInstance(obj, THPFunctionClass);
}

View File

@ -0,0 +1,399 @@
#include "torch/csrc/autograd/python_variable.h"
#include <structmember.h>
#include "THP.h"
#include "torch/csrc/DynamicTypes.h"
#include "torch/csrc/Types.h"
#include "torch/csrc/autograd/python_cpp_function.h"
#include "torch/csrc/cuda/AutoGPU.h"
#include "torch/csrc/utils/auto_gil.h"
#include "torch/csrc/Exceptions.h"
#include <THPP/tensors/THTensor.hpp>
using namespace torch::autograd;
PyObject *THPVariableClass = NULL;
static PyObject* THPVariable_NewWithVar(PyTypeObject* type, std::shared_ptr<Variable> var)
{
PyObject* obj = type->tp_alloc(type, 0);
if (obj) {
auto v = (THPVariable*) obj;
new (&v->cdata) std::shared_ptr<Variable>(std::move(var));
}
return obj;
}
PyObject * THPVariable_Wrap(const std::shared_ptr<Variable>& var)
{
if (var->pyobj) {
Py_INCREF(var->pyobj);
} else {
var->pyobj = THPVariable_NewWithVar((PyTypeObject *)THPVariableClass, var);
}
return var->pyobj;
}
// This function DOES NOT steal a reference to data and creator
// To create a leaf Variable pass NULL as creator.
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile)
{
THPUtils_assert(THPModule_isTensor(data), "data must be a Tensor");
THPUtils_assert(!creator || THPFunction_Check(creator), "creator must be a Function");
auto v = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
PyObject* obj = THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, v);
if (obj) {
v->pyobj = obj;
v->creator = THPFunction_asFunction((THPFunction*)creator);
((THPVariable*)obj)->data = data;
Py_INCREF(data);
}
return obj;
}
// This function DOES NOT steal a reference to data
PyObject * THPVariable_NewVolatile(PyObject *data)
{
return THPVariable_New(data, nullptr, false, true);
}
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
{
Py_VISIT(self->data);
Py_VISIT(self->backward_hooks);
return 0;
}
static int THPVariable_clear(THPVariable *self)
{
Py_CLEAR(self->data);
Py_CLEAR(self->backward_hooks);
return 0;
}
static void THPVariable_dealloc(THPVariable* self)
{
PyObject_GC_UnTrack(self);
Py_XDECREF(self->data);
Py_XDECREF(self->backward_hooks);
self->cdata->pyobj = nullptr;
self->cdata.~shared_ptr<Variable>();
Py_TYPE(self)->tp_free((PyObject*)self);
}
PyObject *THPVariable_pynew(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
THPObjectPtr _data;
PyObject *data = NULL;
PyObject *creator = NULL;
char is_volatile = 0;
char requires_grad = 0;
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OObb", (char**)accepted_args,
&data, &creator, &is_volatile, &requires_grad))
return NULL;
if (creator == Py_None)
creator = NULL;
if (data == NULL || data == Py_None) {
// For legacy serialization code, create an empty tensor temporarily.
thpp::THTensor<float> tensor;
_data = torch::createPyObject(tensor);
data = _data.get();
}
THPUtils_assert(!(is_volatile && requires_grad),
"Variable can't be volatile and require_grad at the same time!");
THPUtils_assert(!creator || THPFunction_Check(creator),
"Variable creator has to be a Function object or None, but got %s",
THPUtils_typename(creator));
THPUtils_assert(THPModule_isTensor(data), "Variable data has to "
"be a tensor, but got %s", THPUtils_typename(data));
auto var = std::make_shared<Variable>(torch::createTensor(data), requires_grad, is_volatile);
PyObject* self = THPVariable_NewWithVar(type, var);
if (self) {
var->pyobj = self;
var->creator = THPFunction_asFunction((THPFunction*)creator);
((THPVariable*)self)->cdata = var;
((THPVariable*)self)->data = data;
Py_INCREF(data);
}
return self;
}
int THPVariable_pyinit(PyObject *self, PyObject *args, PyObject *kwds)
{
// Ensures that calls to Variable() and subclasses contain data argument.
// The 'data' argument is optional in __new__ to handle legacy serialized
// Variables.
PyObject *data;
PyObject *creator = NULL;
char is_volatile = 0;
char requires_grad = 0;
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Obb", (char**)accepted_args,
&data, &creator, &is_volatile, &requires_grad))
return -1;
return 0;
}
typedef PyObject *(*getter)(PyObject *, void *);
typedef int (*setter)(PyObject *, PyObject *, void *);
PyObject *THPVariable_get_version(THPVariable *self)
{
auto& var = *self->cdata;
return PyInt_FromLong(**var.version_counter);
}
PyObject *THPVariable_get_creator(THPVariable *self)
{
auto& var = *self->cdata;
if (!var.creator) {
Py_RETURN_NONE;
}
return functionToPyObject(var.creator);
}
int THPVariable_set_creator(THPVariable *self, PyObject *obj)
{
THPUtils_assertRet(-1, obj == Py_None, "_creator can be only set to None");
self->cdata->creator = nullptr;
return 0;
}
PyObject * THPVariable_get_data(THPVariable *self)
{
if (!self->data) {
auto& var = *self->cdata;
PyTypeObject* type = torch::getPyTypeObject(*var.data);
self->data = type->tp_alloc(type, 0);
if (self->data) {
((torch::THPVoidTensor*)self->data)->cdata =
(torch::THVoidTensor *)var.data->retain().cdata();
}
}
Py_INCREF(self->data);
return self->data;
}
int THPVariable_set_data(THPVariable *self, PyObject *data)
{
THPUtils_assertRet(-1, THPModule_isTensor(data), "Variable data has to "
"be a tensor, but got %s", THPUtils_typename(data));
Py_INCREF(data);
Py_XDECREF(self->data);
self->data = data;
auto& var = *self->cdata;
auto tensor = torch::createTensor(data);
var.data.swap(tensor);
return 0;
}
PyObject *THPVariable_get_raw_grad(THPVariable *self)
{
auto& var = *self->cdata;
if (!var.grad) {
Py_RETURN_NONE;
}
return THPVariable_Wrap(var.grad);
}
int THPVariable_set_raw_grad(THPVariable *self, PyObject *data)
{
auto& var = *self->cdata;
if (data == Py_None) {
var.grad.reset();
return 0;
}
THPUtils_assertRet(-1, THPVariable_Check(data),
"expected Variable or None (got %s)", THPUtils_typename(data));
var.grad = ((THPVariable*)data)->cdata;
return 0;
}
PyObject *THPVariable_get_grad(THPVariable *self)
{
auto& var = *self->cdata;
if (!var.grad) {
Py_RETURN_NONE;
}
return THPVariable_Wrap(var.grad);
}
PyObject *THPVariable_get_volatile(THPVariable *self)
{
auto& var = *self->cdata;
return PyBool_FromLong(var.is_volatile);
}
int THPVariable_set_volatile(THPVariable *self, PyObject *obj)
{
THPUtils_assertRet(-1, PyBool_Check(obj), "volatile must be a bool");
THPUtils_assertRet(-1, !self->cdata->creator,
"volatile can only be set on leaf variables");
auto& var = *self->cdata;
var.is_volatile = (obj == Py_True);
return 0;
}
PyObject *THPVariable_get_output_nr(THPVariable *self)
{
auto& var = *self->cdata;
return PyInt_FromLong(var.output_nr);
}
PyObject *THPVariable_get_requires_grad(THPVariable *self)
{
auto& var = *self->cdata;
return PyBool_FromLong(var.requires_grad);
}
int THPVariable_set_requires_grad(THPVariable *self, PyObject *obj)
{
THPUtils_assertRet(-1, PyBool_Check(obj), "requires_grad must be a bool");
auto& var = *self->cdata;
if (var.creator) {
const char *hint = "";
if (obj == Py_False) {
hint = " If you want to use a computed variable in a subgraph "
"that doesn't require differentiation use "
"var_no_grad = var.detach().";
}
THPUtils_setError("you can only change requires_grad flags of leaf variables.%s", hint);
return -1;
}
var.requires_grad = (obj == Py_True);
return 0;
}
struct PyVariableHook : public VariableHook {
PyVariableHook(PyObject* dict) : dict(dict) {
Py_INCREF(dict);
}
~PyVariableHook() {
AutoGIL gil;
Py_DECREF(dict);
}
std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& _grad) override {
AutoGIL gil;
THPObjectPtr grad = THPVariable_Wrap(_grad);
if (!grad) throw python_error();
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(dict, &pos, &key, &value)) {
THPObjectPtr res = PyObject_CallFunctionObjArgs(value, grad.get(), nullptr);
if (!res) throw python_error();
if (res == Py_None) continue;
if (!PyObject_IsInstance(res.get(), THPVariableClass)) {
PyErr_Format(PyExc_TypeError, "expected Variable, but hook returned '%s'",
THPUtils_typename(res.get()));
throw python_error();
}
grad = std::move(res);
}
return ((THPVariable*)grad.get())->cdata;
}
PyObject* dict;
};
PyObject *THPVariable_get_backwards_hooks(THPVariable *self)
{
if (self->backward_hooks) {
Py_INCREF(self->backward_hooks);
return self->backward_hooks;
}
Py_RETURN_NONE;
}
int THPVariable_set_backwards_hooks(THPVariable *self, PyObject *obj)
{
if (obj == Py_None) {
obj = nullptr;
}
Py_XINCREF(obj);
Py_XDECREF(self->backward_hooks);
self->backward_hooks = obj;
if (obj) {
self->cdata->backward_hook.reset(new PyVariableHook(obj));
} else {
self->cdata->backward_hook.reset();
}
return 0;
}
static struct PyGetSetDef THPVariable_properties[] = {
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
{"creator", (getter)THPVariable_get_creator, NULL, NULL, NULL},
{"_creator", (getter)THPVariable_get_creator, (setter)THPVariable_set_creator, NULL, NULL},
{"data", (getter)THPVariable_get_data, (setter)THPVariable_set_data, NULL, NULL},
{"_grad", (getter)THPVariable_get_raw_grad, (setter)THPVariable_set_raw_grad, NULL, NULL},
{"grad", (getter)THPVariable_get_grad, NULL, NULL, NULL},
{"volatile", (getter)THPVariable_get_volatile, (setter)THPVariable_set_volatile, NULL, NULL},
{"output_nr", (getter)THPVariable_get_output_nr, NULL, NULL, NULL},
{"requires_grad", (getter)THPVariable_get_requires_grad, (setter)THPVariable_set_requires_grad, NULL, NULL},
{"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, NULL, NULL},
{NULL}
};
PyTypeObject THPVariableType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._VariableBase", /* tp_name */
sizeof(THPVariable), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPVariable_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
NULL, /* tp_doc */
(traverseproc)THPVariable_traverse, /* tp_traverse */
(inquiry)THPVariable_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
THPVariable_properties, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
THPVariable_pyinit, /* tp_init */
0, /* tp_alloc */
THPVariable_pynew /* tp_new */
};
bool THPVariable_initModule(PyObject *module)
{
if (PyType_Ready(&THPVariableType) < 0)
return false;
Py_INCREF(&THPVariableType);
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
return true;
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <Python.h>
#include <memory>
#include "torch/csrc/autograd/variable.h"
struct THPVariable {
PyObject_HEAD
std::shared_ptr<torch::autograd::Variable> cdata;
PyObject* data;
PyObject* backward_hooks;
};
bool THPVariable_initModule(PyObject *module);
extern PyObject *THPVariableClass;
PyObject * THPVariable_NewVolatile(PyObject *data);
PyObject * THPVariable_New(PyObject *data, PyObject *creator, bool requires_grad, bool is_volatile=false);
PyObject * THPVariable_Wrap(const std::shared_ptr<torch::autograd::Variable>& var);
PyObject * THPVariable_get_data(THPVariable *self);
inline bool THPVariable_Check(PyObject *obj)
{
return THPVariableClass && PyObject_IsInstance(obj, THPVariableClass);
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <THPP/THPP.h>
#include <memory>
namespace torch { namespace autograd {
struct VariableVersion;
struct SavedVariable {
SavedVariable()
: data()
, expected_version(-1)
, version() {}
SavedVariable(
std::unique_ptr<thpp::Tensor> data,
int expected_version,
std::unique_ptr<VariableVersion> version)
: data(std::move(data))
, expected_version(expected_version)
, version(std::move(version)) {}
std::unique_ptr<thpp::Tensor> data;
int expected_version;
std::unique_ptr<VariableVersion> version;
std::unique_ptr<thpp::Tensor>& unpack();
};
}} // namespace torch::autograd

View File

@ -1,276 +1,109 @@
#include <Python.h>
#include <structmember.h>
#include "torch/csrc/autograd/variable.h"
#include "THP.h"
#ifdef WITH_CUDA
#include "torch/csrc/cuda/AutoGPU.h"
#endif
PyObject *THPVariableClass = NULL;
using namespace torch;
using namespace thpp;
constexpr size_t CACHE_SIZE = 100000;
static THPVariable *cached_variables[CACHE_SIZE];
static size_t num_cached;
namespace torch { namespace autograd {
// This helper steals a reference to data and creator
static inline THPVariable * pop_cache(PyObject *data, PyObject *creator, char requires_grad)
Variable::Variable(
std::unique_ptr<thpp::Tensor> data,
bool requires_grad,
bool is_volatile)
: data(std::move(data))
, creator(nullptr)
, grad(nullptr)
, version_counter(new VariableVersion())
, output_nr(0)
, backward_hook()
, pyobj(nullptr)
{
THPVariable *self = cached_variables[--num_cached];
PyObject_Init((PyObject*)self, Py_TYPE(self));
PyObject_GC_Track(self);
self->is_volatile = 0;
self->version_counter = new THPVariableVersion();
self->grad = NULL;
self->backward_hooks = NULL;
self->requires_grad = requires_grad;
self->data = data;
self->creator = creator;
return self;
if (!this->data) {
throw std::runtime_error("Variable data is NULL");
}
this->is_volatile = is_volatile;
this->requires_grad = requires_grad;
}
// This function DOES NOT steal a reference to data
PyObject * THPVariable_NewVolatile(PyObject *data)
Variable::Variable(
std::unique_ptr<thpp::Tensor> data,
std::shared_ptr<Function> creator)
: data(std::move(data))
, creator(creator)
, grad(nullptr)
, version_counter(new VariableVersion())
, output_nr(creator->num_outputs++)
, backward_hook()
, pyobj(nullptr)
{
THPVariable *variable;
if (num_cached > 0) {
Py_INCREF(data);
variable = pop_cache(data, NULL, 0);
if (!this->data) {
throw std::runtime_error("Variable data is NULL");
}
this->is_volatile = creator->is_volatile;
this->requires_grad = creator->requires_grad;
previous_functions.resize(1);
previous_functions[0] = std::make_pair<>(creator, output_nr);
}
bool Variable::is_cuda()
{
return data->isCuda();
}
auto Variable::backward(std::shared_ptr<Variable> gradOutput) -> void {
if (backward_hook) {
gradOutput = (*backward_hook)(gradOutput);
}
#ifdef WITH_CUDA
THCPAutoGPU auto_gpu(gradOutput->data->getDevice());
#endif
if (!grad) {
std::unique_ptr<Tensor> data(gradOutput->data->clone());
grad = std::make_shared<Variable>(std::move(data), false, true);
} else if (grad->data->isSparse() && !gradOutput->data->isSparse()) {
auto* sum = gradOutput->data->clone();
sum->cadd(*sum, *grad->data);
grad->data.reset(sum);
} else {
variable = (THPVariable*)PyObject_CallFunctionObjArgs(THPVariableClass, data, NULL);
}
if (variable) ((THPVariable*)variable)->is_volatile = 1;
return (PyObject*)variable;
}
// This function DOES NOT steal a reference to data and creator
// To create a leaf Variable pass NULL as creator.
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad)
{
if (num_cached > 0) {
Py_INCREF(data);
Py_XINCREF(creator);
return (PyObject*)pop_cache(data, creator, requires_grad);
}
// We can't pass a NULL creator to this Python call, because Py_BuildValue
// will raise an error (it tries to be overly smart by setting its own error
// if there's no flag set at the moment and we're giving NULL to some
// function).
creator = creator ? creator : Py_None;
return PyObject_CallFunction(THPVariableClass, "OObb", data, creator, (char)0, requires_grad);
}
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
{
Py_VISIT(self->creator);
Py_VISIT(self->data);
Py_VISIT(self->grad);
Py_VISIT(self->backward_hooks);
return 0;
}
static int THPVariable_clear(THPVariable *self)
{
Py_CLEAR(self->creator);
Py_CLEAR(self->data);
Py_CLEAR(self->grad);
Py_CLEAR(self->backward_hooks);
return 0;
}
static void THPVariable_dealloc(THPVariable* self)
{
PyObject_GC_UnTrack(self);
Py_XDECREF(self->creator);
Py_XDECREF(self->data);
Py_XDECREF(self->grad);
Py_XDECREF(self->backward_hooks);
delete self->version_counter;
self->version_counter = nullptr;
// We don't want to cache any subclasses
if ((PyObject*)Py_TYPE(self) == THPVariableClass && num_cached < CACHE_SIZE) {
cached_variables[num_cached++] = self;
// Variable class is defined in Python code, and as such has a
// Py_TPFLAGS_HEAPTYPE flag set, so python DECREFs the class at each
// object dealloc.
Py_INCREF(Py_TYPE(self));
} else {
Py_TYPE(self)->tp_free((PyObject*)self);
grad->data->cadd(*grad->data, *gradOutput->data);
}
}
PyObject *THPVariable_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
THPVariable *self;
if ((PyObject*)type != THPVariableClass || num_cached == 0) {
self = (THPVariable*)type->tp_alloc(type, 0);
if (!self) return NULL;
self->version_counter = new THPVariableVersion();
} else {
self = pop_cache(NULL, NULL, 0);
auto Variable::apply(const variable_list& gradOutputs) -> variable_list {
if (creator || **version_counter != 0) {
throw std::runtime_error("leaf variable was used in an inplace operation");
}
return (PyObject*)self;
if (gradOutputs.size() != 1) {
throw std::runtime_error("incorrect number of gradOutputs");
}
backward(gradOutputs[0]);
return variable_list();
}
int THPVariable_init(THPVariable *self, PyObject *args, PyObject *kwargs)
{
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Obb", (char**)accepted_args,
&self->data, &self->creator, &self->is_volatile,
&self->requires_grad))
return -1;
Py_INCREF(self->data);
if (self->creator == Py_None)
self->creator = NULL;
Py_XINCREF(self->creator);
THPUtils_assertRet(-1, !(self->is_volatile && self->requires_grad),
"Variable can't be volatile and require_grad at the same time!");
THPUtils_assertRet(-1, !self->creator || THPFunction_Check(self->creator),
"Variable creator has to be a Function object or None, but got %s",
THPUtils_typename(self->creator));
THPUtils_assertRet(-1, THPModule_isTensor(self->data), "Variable data has to "
"be a tensor, but got %s", THPUtils_typename(self->data));
return 0;
auto Variable::save() const -> SavedVariable {
return SavedVariable(
std::unique_ptr<Tensor>(data->clone_shallow()),
**version_counter,
std::unique_ptr<VariableVersion>(version_counter->new_saved_ref()));
}
PyObject * THPVariable_getstate(THPVariable *self)
{
THPUtils_assert(!self->creator, "serialization of non-leaf variables is not "
"implemented yet");
THPObjectPtr state = PyTuple_New(5);
if (!state)
return NULL;
Py_INCREF(self->data);
PyTuple_SET_ITEM(state.get(), 0, self->data);
PyObject *grad = self->grad ? self->grad : Py_None;
Py_INCREF(grad);
PyTuple_SET_ITEM(state.get(), 1, grad);
PyObject *backward_hooks = self->backward_hooks ? self->backward_hooks : Py_None;
Py_INCREF(backward_hooks);
PyTuple_SET_ITEM(state.get(), 2, backward_hooks);
PyTuple_SET_ITEM(state.get(), 3, PyBool_FromLong(self->requires_grad));
PyTuple_SET_ITEM(state.get(), 4, PyBool_FromLong(self->is_volatile));
return state.release();
auto Variable::save_opt(Variable* var) -> SavedVariable {
return var ? var->save() : SavedVariable();
}
PyObject * THPVariable_setstate(THPVariable *self, PyObject *state)
{
THPUtils_assert(!self->creator, "__setstate__ can be only called on leaf "
"variables");
THPUtils_assert(PyTuple_Check(state), "__setstate__ expects state to be a "
"tuple");
Py_ssize_t size = PyTuple_GET_SIZE(state);
THPUtils_assert(size == 5, "__setstate__ expects state tuple to have 5 "
"elements, but it has %d", size);
#define LOAD(NAME, IDX) \
Py_XDECREF(self->NAME); \
self->NAME = PyTuple_GET_ITEM(state, IDX) == Py_None ? NULL : PyTuple_GET_ITEM(state, IDX); \
Py_XINCREF(self->NAME);
THPUtils_assert(THPModule_isTensor(PyTuple_GET_ITEM(state, 0)), "first "
"element of variable state tuple has to be a tensor");
LOAD(data, 0);
LOAD(grad, 1);
LOAD(backward_hooks, 2);
#undef LOAD
PyObject *requires_grad_obj = PyTuple_GET_ITEM(state, 3);
PyObject *is_volatile_obj = PyTuple_GET_ITEM(state, 4);
THPUtils_assert(PyBool_Check(requires_grad_obj), "requires_grad "
"found in state was expected to be a bool, but got %s",
THPUtils_typename(requires_grad_obj));
THPUtils_assert(PyBool_Check(is_volatile_obj), "is_volatile "
"found in state was expected to be a bool, but got %s",
THPUtils_typename(is_volatile_obj));
self->requires_grad= requires_grad_obj == Py_True ? 1 : 0;
self->is_volatile = is_volatile_obj == Py_True ? 1 : 0;
Py_RETURN_NONE;
auto SavedVariable::unpack() -> std::unique_ptr<thpp::Tensor>& {
if (data) {
int current_version = **version;
if (expected_version != current_version) {
throw std::runtime_error("one of the variables "
"needed for gradient computation has been modified by an "
"inplace operation");
}
}
return data;
}
typedef PyObject *(*getter)(PyObject *, void *);
typedef int (*setter)(PyObject *, PyObject *, void *);
PyObject *THPVariable_get_version(THPVariable *self)
{
return PyInt_FromLong(**self->version_counter);
}
static struct PyGetSetDef THPVariable_properties[] = {
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
{NULL}
};
static struct PyMemberDef THPVariable_members[] = {
{(char*)"creator", T_OBJECT, offsetof(THPVariable, creator), 0, NULL},
{(char*)"data", T_OBJECT, offsetof(THPVariable, data), 0, NULL},
{(char*)"_grad", T_OBJECT, offsetof(THPVariable, grad), 0, NULL},
{(char*)"volatile", T_BOOL, offsetof(THPVariable, is_volatile), 0, NULL},
{(char*)"output_nr", T_INT, offsetof(THPVariable, output_nr), 0, NULL},
{(char*)"_backward_hooks",T_OBJECT, offsetof(THPVariable, backward_hooks), 0, NULL},
{(char*)"_requires_grad", T_BOOL, offsetof(THPVariable, requires_grad), 0, NULL},
{NULL}
};
static struct PyMethodDef THPVariable_methods[] = {
{"__getstate__", (PyCFunction)THPVariable_getstate, METH_NOARGS, NULL},
{"__setstate__", (PyCFunction)THPVariable_setstate, METH_O, NULL},
{NULL}
};
PyTypeObject THPVariableType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._VariableBase", /* tp_name */
sizeof(THPVariable), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPVariable_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
NULL, /* tp_doc */
(traverseproc)THPVariable_traverse, /* tp_traverse */
(inquiry)THPVariable_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPVariable_methods, /* tp_methods */
THPVariable_members, /* tp_members */
THPVariable_properties, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)THPVariable_init, /* tp_init */
0, /* tp_alloc */
THPVariable_new /* tp_new */
};
bool THPVariable_initModule(PyObject *module)
{
if (PyType_Ready(&THPVariableType) < 0)
return false;
Py_INCREF(&THPVariableType);
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
return true;
}
}} // namespace torch::autograd

View File

@ -1,8 +1,57 @@
#ifndef THP_VARIABLE_H
#define THP_VARIABLE_H
#pragma once
struct THPVariableVersion {
THPVariableVersion() {
#include <memory>
#include <functional>
#include <THPP/THPP.h>
#include "torch/csrc/autograd/function.h"
#include "torch/csrc/autograd/saved_variable.h"
#include "torch/csrc/Types.h"
namespace torch { namespace autograd {
struct VariableHook;
struct VariableVersion;
struct Variable : public Function {
Variable(
std::unique_ptr<thpp::Tensor> data,
std::shared_ptr<Function> creator);
Variable(
std::unique_ptr<thpp::Tensor> data,
bool requires_grad,
bool is_volatile);
bool is_cuda();
bool is_sparse();
void backward(std::shared_ptr<Variable> gradOutput);
virtual variable_list apply(const variable_list& gradOutputs) override;
SavedVariable save() const;
static SavedVariable save_opt(Variable* var);
static inline std::shared_ptr<Variable> of(std::unique_ptr<thpp::Tensor> data) {
if (!data) {
return std::shared_ptr<Variable>();
}
return std::make_shared<Variable>(std::move(data), 0, 0);
}
std::unique_ptr<thpp::Tensor> data;
std::shared_ptr<Function> creator;
std::shared_ptr<Variable> grad;
std::unique_ptr<VariableVersion> version_counter;
int output_nr;
std::unique_ptr<VariableHook> backward_hook;
PyObject *pyobj; // weak reference
};
struct VariableHook {
virtual std::shared_ptr<Variable> operator()(const std::shared_ptr<Variable>& grad) = 0;
};
struct VariableVersion {
VariableVersion() {
saved_ref = false;
version_block = new int[3];
version_block[0] = 0; // version
@ -16,15 +65,15 @@ struct THPVariableVersion {
int var_refcnt() { return version_block[2]; }
void join_with(THPVariableVersion &other) {
void join_with(VariableVersion &other) {
cleanup();
version_block = other.version_block;
version_block[1]++;
version_block[2]++;
}
THPVariableVersion* new_saved_ref() {
auto new_ver = new THPVariableVersion();
VariableVersion* new_saved_ref() {
auto new_ver = new VariableVersion();
new_ver->cleanup();
new_ver->version_block = version_block;
version_block[1]++;
@ -39,36 +88,10 @@ struct THPVariableVersion {
version_block = nullptr;
}
~THPVariableVersion() { cleanup(); }
~VariableVersion() { cleanup(); }
int *version_block;
bool saved_ref;
};
struct THPVariable {
PyObject_HEAD
PyObject *creator;
PyObject *data;
PyObject *grad;
PyObject *backward_hooks;
THPVariableVersion *version_counter;
int output_nr;
char is_volatile;
char requires_grad;
};
bool THPVariable_initModule(PyObject *module);
extern PyObject *THPVariableClass;
PyObject * THPVariable_NewVolatile(PyObject *data);
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad);
#define THPVariable_Check(obj) \
(THPVariableClass && \
PyObject_IsInstance(obj, THPVariableClass))
#define THPVariable_CheckType(obj, func) \
(THPVariableClass && \
(PyObject_IsInstance(obj, THPVariableClass) && \
func(((THPVariable*)obj)->data)))
#endif
}} // namespace torch::autograd

View File

@ -62,6 +62,16 @@ void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order,
}
}
void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
union { uint16_t x; THHalf f; };
x = (order == THP_BIG_ENDIAN ? decodeUInt16BE(src) : decodeUInt16LE(src));
dst[i] = f;
src += sizeof(uint16_t);
}
}
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {

View File

@ -3,6 +3,7 @@
#include <stdint.h>
#include <stddef.h>
#include <THHalf.h>
enum THPByteOrder {
THP_LITTLE_ENDIAN = 0,
@ -14,6 +15,7 @@ THPByteOrder THP_nativeByteOrder();
void THP_decodeInt16Buffer(int16_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeInt32Buffer(int32_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order, size_t len);

View File

@ -3,59 +3,68 @@
#include "THCP.h"
#include <THC/THC.h>
THCPAutoGPU::THCPAutoGPU(int device_id) {
setDevice(device_id);
}
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self) {
if (self && setObjDevice(self))
return;
if (!args)
return;
for (int i = 0; i < PyTuple_Size(args); i++) {
PyObject *arg = PyTuple_GET_ITEM(args, i);
if (setObjDevice(arg)) return;
}
}
bool THCPAutoGPU::setObjDevice(PyObject *obj) {
int new_device = -1;
static int getObjDevice(PyObject *obj) {
PyObject *obj_type = (PyObject*)Py_TYPE(obj);
if (obj_type == THCPDoubleTensorClass) {
new_device = THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
return THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
} else if (obj_type == THCPFloatTensorClass) {
new_device = THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
return THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
} else if (obj_type == THCPHalfTensorClass) {
new_device = THCudaHalfTensor_getDevice(LIBRARY_STATE ((THCPHalfTensor*)obj)->cdata);
return THCudaHalfTensor_getDevice(LIBRARY_STATE ((THCPHalfTensor*)obj)->cdata);
} else if (obj_type == THCPLongTensorClass) {
new_device = THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
return THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
} else if (obj_type == THCPIntTensorClass) {
new_device = THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
return THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
} else if (obj_type == THCPShortTensorClass) {
new_device = THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
return THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
} else if (obj_type == THCPCharTensorClass) {
new_device = THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
return THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
} else if (obj_type == THCPByteTensorClass) {
new_device = THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
return THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
} else if (obj_type == THCSPDoubleTensorClass) {
return THCSDoubleTensor_getDevice(LIBRARY_STATE ((THCSPDoubleTensor*)obj)->cdata);
} else if (obj_type == THCSPFloatTensorClass) {
return THCSFloatTensor_getDevice(LIBRARY_STATE ((THCSPFloatTensor*)obj)->cdata);
} else if (obj_type == THCSPHalfTensorClass) {
return THCSHalfTensor_getDevice(LIBRARY_STATE ((THCSPHalfTensor*)obj)->cdata);
} else if (obj_type == THCSPLongTensorClass) {
return THCSLongTensor_getDevice(LIBRARY_STATE ((THCSPLongTensor*)obj)->cdata);
} else if (obj_type == THCSPIntTensorClass) {
return THCSIntTensor_getDevice(LIBRARY_STATE ((THCSPIntTensor*)obj)->cdata);
} else if (obj_type == THCSPShortTensorClass) {
return THCSShortTensor_getDevice(LIBRARY_STATE ((THCSPShortTensor*)obj)->cdata);
} else if (obj_type == THCSPCharTensorClass) {
return THCSCharTensor_getDevice(LIBRARY_STATE ((THCSPCharTensor*)obj)->cdata);
} else if (obj_type == THCSPByteTensorClass) {
return THCSByteTensor_getDevice(LIBRARY_STATE ((THCSPByteTensor*)obj)->cdata);
}
return setDevice(new_device);
return -1;
}
bool THCPAutoGPU::setDevice(int new_device) {
if (new_device == -1)
return false;
if (device == -1)
THCudaCheck(cudaGetDevice(&device));
if (new_device != device)
THCPModule_setDevice(new_device);
return true;
static int getObjDevice(PyObject *args, PyObject *self) {
if (self) {
int device = getObjDevice(self);
if (device != -1) {
return device;
}
}
if (args) {
for (int i = 0; i < PyTuple_Size(args); i++) {
int device = getObjDevice(PyTuple_GET_ITEM(args, i));
if (device != -1) {
return device;
}
}
}
return -1;
}
// This can throw... But if it does I have no idea how to recover.
THCPAutoGPU::~THCPAutoGPU() {
if (device != -1)
THCPModule_setDevice(device);
THCPAutoGPU::THCPAutoGPU(int device_id) : AutoGPU(device_id) {}
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self)
: AutoGPU(getObjDevice(args, self)) {
}
void THCPAutoGPU::setObjDevice(PyObject *obj) {
setDevice(getObjDevice(obj));
}

View File

@ -2,15 +2,13 @@
#define THCP_AUTOGPU_INC
#include <Python.h>
#include "torch/csrc/utils/auto_gpu.h"
class THCPAutoGPU {
class THCPAutoGPU : public AutoGPU {
public:
THCPAutoGPU(int device_id=-1);
explicit THCPAutoGPU(int device_id=-1);
THCPAutoGPU(PyObject *args, PyObject *self=NULL);
~THCPAutoGPU();
bool setObjDevice(PyObject *obj);
bool setDevice(int new_device);
int device = -1;
void setObjDevice(PyObject *obj);
};
#endif

View File

@ -15,26 +15,26 @@ THCState *state;
// Class pointer cache
////////////////////////////////////////////////////////////////////////////////
static bool THCPModule_loadClasses(PyObject *module_dict)
static bool THCPModule_loadClasses(PyObject *torch_module)
{
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyMapping_GetItemString(module_dict, (char*)"DoubleStorage"));
ASSERT_NOT_NULL(THCPFloatStorageClass = PyMapping_GetItemString(module_dict, (char*)"FloatStorage"));
ASSERT_NOT_NULL(THCPHalfStorageClass = PyMapping_GetItemString(module_dict, (char*)"HalfStorage"));
ASSERT_NOT_NULL(THCPLongStorageClass = PyMapping_GetItemString(module_dict, (char*)"LongStorage"));
ASSERT_NOT_NULL(THCPIntStorageClass = PyMapping_GetItemString(module_dict, (char*)"IntStorage"));
ASSERT_NOT_NULL(THCPShortStorageClass = PyMapping_GetItemString(module_dict, (char*)"ShortStorage"));
ASSERT_NOT_NULL(THCPCharStorageClass = PyMapping_GetItemString(module_dict, (char*)"CharStorage"));
ASSERT_NOT_NULL(THCPByteStorageClass = PyMapping_GetItemString(module_dict, (char*)"ByteStorage"));
ASSERT_NOT_NULL(THCPDoubleStorageClass = PyObject_GetAttrString(torch_module, (char*)"DoubleStorage"));
ASSERT_NOT_NULL(THCPFloatStorageClass = PyObject_GetAttrString(torch_module, (char*)"FloatStorage"));
ASSERT_NOT_NULL(THCPHalfStorageClass = PyObject_GetAttrString(torch_module, (char*)"HalfStorage"));
ASSERT_NOT_NULL(THCPLongStorageClass = PyObject_GetAttrString(torch_module, (char*)"LongStorage"));
ASSERT_NOT_NULL(THCPIntStorageClass = PyObject_GetAttrString(torch_module, (char*)"IntStorage"));
ASSERT_NOT_NULL(THCPShortStorageClass = PyObject_GetAttrString(torch_module, (char*)"ShortStorage"));
ASSERT_NOT_NULL(THCPCharStorageClass = PyObject_GetAttrString(torch_module, (char*)"CharStorage"));
ASSERT_NOT_NULL(THCPByteStorageClass = PyObject_GetAttrString(torch_module, (char*)"ByteStorage"));
ASSERT_NOT_NULL(THCPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
ASSERT_NOT_NULL(THCPHalfTensorClass = PyMapping_GetItemString(module_dict, (char*)"HalfTensor"));
ASSERT_NOT_NULL(THCPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
ASSERT_NOT_NULL(THCPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
ASSERT_NOT_NULL(THCPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
ASSERT_NOT_NULL(THCPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
ASSERT_NOT_NULL(THCPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
ASSERT_NOT_NULL(THCPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
if (!THCPDoubleTensor_postInit(torch_module)) return false;
if (!THCPFloatTensor_postInit(torch_module)) return false;
if (!THCPHalfTensor_postInit(torch_module)) return false;
if (!THCPLongTensor_postInit(torch_module)) return false;
if (!THCPIntTensor_postInit(torch_module)) return false;
if (!THCPShortTensor_postInit(torch_module)) return false;
if (!THCPCharTensor_postInit(torch_module)) return false;
if (!THCPByteTensor_postInit(torch_module)) return false;
return true;
#undef ASSERT_NOT_NULL
@ -60,6 +60,7 @@ static bool THCPModule_assignStateless()
PyObject *stateless;
INIT_STATELESS(Double);
INIT_STATELESS_DETAIL(Float, Cuda);
INIT_STATELESS(Half);
INIT_STATELESS(Long);
INIT_STATELESS(Int);
INIT_STATELESS(Short);
@ -238,6 +239,20 @@ PyObject * THCPModule_cudaSleep(PyObject *_unused, PyObject *cycles)
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_cudaLockMutex(PyObject *module)
{
auto mutex = THCCachingAllocator_getCudaFreeMutex();
mutex->lock();
Py_RETURN_NONE;
}
PyObject * THCPModule_cudaUnlockMutex(PyObject *module)
{
auto mutex = THCCachingAllocator_getCudaFreeMutex();
mutex->unlock();
Py_RETURN_NONE;
}
PyObject * THCPModule_getLibPath(PyObject *_unused)
{
#define _STR(x) #x
@ -255,7 +270,8 @@ PyObject * THCPModule_getLibPath(PyObject *_unused)
// Cuda module initialization
////////////////////////////////////////////////////////////////////////////////
bool THCPModule_initCuda(PyObject *module_dict) {
bool THCPModule_initCuda(PyObject *torch_module) {
HANDLE_TH_ERRORS
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
state = THCState_alloc();
THCState_setDeviceAllocator(state, THCCachingAllocator_get());
@ -264,25 +280,26 @@ bool THCPModule_initCuda(PyObject *module_dict) {
#ifdef USE_MAGMA
THCMagma_init(state);
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(true)) != -1);
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(true)) != -1);
#else
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_magma", PyBool_FromLong(false)) != -1);
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_magma", PyBool_FromLong(false)) != -1);
#endif
#ifdef CUDA_HALF_TENSOR
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(true)) != -1);
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(true)) != -1);
#else
ASSERT_TRUE(PyDict_SetItemString(module_dict, "has_half", PyBool_FromLong(false)) != -1);
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "has_half", PyBool_FromLong(false)) != -1);
#endif
ASSERT_TRUE(THCPModule_loadClasses(module_dict));
ASSERT_TRUE(THCPModule_loadClasses(torch_module));
ASSERT_TRUE(THCPModule_assignStateless());
ASSERT_TRUE(PyDict_SetItemString(module_dict, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
ASSERT_TRUE(PyObject_SetAttrString(torch_module, "_state_cdata", PyLong_FromVoidPtr(state)) != -1);
// TODO: register THCudaShutdown handler at exit
return true;
#undef ASSERT_TRUE
END_HANDLE_TH_ERRORS
}
// Callback for python part. Used for additional initialization of python classes
@ -293,6 +310,5 @@ PyObject * THCPModule_initExtension(PyObject *self)
THPUtils_setError("class loader couldn't access torch module");
return NULL;
}
PyObject* module_dict = PyModule_GetDict(torch_module);
return PyBool_FromLong(THCPModule_initCuda(module_dict));
return PyBool_FromLong(THCPModule_initCuda(torch_module));
}

View File

@ -1,19 +1,18 @@
#include "THCP.h"
static bool THCSPModule_loadClasses(PyObject *module_dict)
static bool THCSPModule_loadClasses(PyObject *sparse_module)
{
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
ASSERT_NOT_NULL(THCSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
ASSERT_NOT_NULL(THCSPHalfTensorClass = PyMapping_GetItemString(module_dict, (char*)"HalfTensor"));
ASSERT_NOT_NULL(THCSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
ASSERT_NOT_NULL(THCSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
ASSERT_NOT_NULL(THCSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
ASSERT_NOT_NULL(THCSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
ASSERT_NOT_NULL(THCSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
ASSERT_NOT_NULL(THCSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
if (!THCSPDoubleTensor_postInit(sparse_module)) return false;
if (!THCSPFloatTensor_postInit(sparse_module)) return false;
#ifdef CUDA_HALF_TENSOR
if (!THCSPHalfTensor_postInit(sparse_module)) return false;
#endif
if (!THCSPLongTensor_postInit(sparse_module)) return false;
if (!THCSPIntTensor_postInit(sparse_module)) return false;
if (!THCSPShortTensor_postInit(sparse_module)) return false;
if (!THCSPCharTensor_postInit(sparse_module)) return false;
if (!THCSPByteTensor_postInit(sparse_module)) return false;
return true;
#undef ASSERT_NOT_NULL
}
static bool THCSPModule_assignStateless()
@ -31,7 +30,9 @@ static bool THCSPModule_assignStateless()
PyObject *stateless;
INIT_STATELESS(Double);
INIT_STATELESS(Float);
#ifdef CUDA_HALF_TENSOR
INIT_STATELESS(Half);
#endif
INIT_STATELESS(Long);
INIT_STATELESS(Int);
INIT_STATELESS(Short);
@ -46,9 +47,9 @@ static bool THCSPModule_assignStateless()
// Sparse Cuda module initialization
////////////////////////////////////////////////////////////////////////////////
bool THCSPModule_initCudaSparse(PyObject *module_dict) {
bool THCSPModule_initCudaSparse(PyObject *module) {
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
ASSERT_TRUE(THCSPModule_loadClasses(module_dict));
ASSERT_TRUE(THCSPModule_loadClasses(module));
ASSERT_TRUE(THCSPModule_assignStateless());
return true;
#undef ASSERT_TRUE
@ -56,11 +57,10 @@ bool THCSPModule_initCudaSparse(PyObject *module_dict) {
PyObject * THCSPModule_initExtension(PyObject *self)
{
PyObject *torch_module = PyImport_ImportModule("torch.cuda.sparse");
if (!torch_module) {
PyObject *module = PyImport_ImportModule("torch.cuda.sparse");
if (!module) {
THPUtils_setError("class loader couldn't access torch.cuda.sparse module");
return NULL;
}
PyObject* module_dict = PyModule_GetDict(torch_module);
return PyBool_FromLong(THCSPModule_initCudaSparse(module_dict));
return PyBool_FromLong(THCSPModule_initCudaSparse(module));
}

View File

@ -10,6 +10,7 @@
#include "override_macros.h"
#include "torch/csrc/copy_utils.h"
#include "DynamicTypes.h"
#define THC_GENERIC_FILE "torch/csrc/generic/Tensor.cpp"
#include <THC/THCGenerateAllTypes.h>

View File

@ -62,6 +62,8 @@ void cudnn_batch_norm_forward(
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
double exponential_average_factor, double epsilon)
{
assertSameGPU(dataType, input, output, weight, bias, running_mean, running_var,
save_mean, save_var);
cudnnBatchNormMode_t mode;
if (input->nDimension == 2) {
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
@ -120,6 +122,8 @@ void cudnn_batch_norm_backward(
THVoidTensor* save_mean, THVoidTensor* save_var, bool training,
double epsilon)
{
assertSameGPU(dataType, input, grad_output, grad_input, grad_weight, grad_bias, weight,
running_mean, running_var, save_mean, save_var);
cudnnBatchNormMode_t mode;
if (input->nDimension == 2) {
mode = CUDNN_BATCHNORM_PER_ACTIVATION;
@ -143,7 +147,7 @@ void cudnn_batch_norm_backward(
THVoidTensor_assertContiguous(save_mean);
THVoidTensor_assertContiguous(save_var);
CHECK(cudnnBatchNormalizationBackward(
handle, mode, &one, &zero, &one, &one,
handle, mode, &one, &zero, &one, &zero,
idesc.desc, tensorPointer(dataType, input),
odesc.desc, tensorPointer(dataType, grad_output),
gdesc.desc, tensorPointer(dataType, grad_input),

View File

@ -198,6 +198,8 @@ Workspace chooseAlgorithm(
if (!cache.find(conv.params, algo)) {
if (benchmark) {
// findAlgorithm may call cudaFree()
std::lock_guard<std::mutex> lock(*THCCachingAllocator_getCudaFreeMutex());
auto perfResults = search::findAlgorithm(handle, conv);
if (perfResults.status == CUDNN_STATUS_SUCCESS) {
*algo = perfResults.algo;
@ -285,6 +287,7 @@ void cudnn_convolution_forward(
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* output,
Convolution* info, bool benchmark)
{
assertSameGPU(dataType, input, weight, output);
int groups = info->groups;
cudnnConvolutionFwdAlgo_t fwdAlg;
@ -309,6 +312,7 @@ void cudnn_convolution_add_bias(
THVoidTensor* bias, THVoidTensor* output,
Convolution* info)
{
assertSameGPU(dataType, bias, output);
CHECK_ARG(output->nDimension <= 5);
TensorDescriptor& bdesc = info->bdesc;
@ -329,6 +333,7 @@ void cudnn_convolution_backward_data(
THVoidTensor* gradOutput, THVoidTensor* gradInput, THVoidTensor* weight,
Convolution* info, bool benchmark)
{
assertSameGPU(dataType, gradOutput, gradInput, weight);
int groups = info->params.groups;
cudnnConvolutionBwdDataAlgo_t bwdDataAlg;
@ -353,6 +358,7 @@ void cudnn_convolution_backward_filter(
THVoidTensor* gradOutput, THVoidTensor* input, THVoidTensor* gradWeight,
Convolution* info, bool benchmark)
{
assertSameGPU(dataType, gradOutput, input, gradWeight);
int groups = info->params.groups;
cudnnConvolutionBwdFilterAlgo_t bwdFilterAlg;
@ -380,6 +386,7 @@ void cudnn_convolution_backward_bias(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* gradBias, Convolution* info)
{
assertSameGPU(dataType, gradOutput, gradBias);
Constant one(dataType, 1);
Constant zero(dataType, 0);
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, 0, 1, 0);

View File

@ -1,17 +1,42 @@
#ifndef THP_CUDNN_EXCEPTIONS_INC
#define THP_CUDNN_EXCEPTIONS_INC
#include <THC/THC.h>
#include <cudnn.h>
#include <string>
#include <stdexcept>
#include <sstream>
#include "Types.h"
#define CHECK_ARG(cond) _CHECK_ARG(cond, #cond, __FILE__, __LINE__)
extern THCState* state;
namespace torch { namespace cudnn {
template<typename ...T>
void assertSameGPU(cudnnDataType_t dataType, T* ... tensors) {
static_assert(std::is_same<THVoidTensor, typename std::common_type<T...>::type>::value,
"all arguments to assertSameGPU have to be THVoidTensor*");
int is_same;
if (dataType == CUDNN_DATA_FLOAT) {
is_same = THCudaTensor_checkGPU(state, sizeof...(T),
reinterpret_cast<THCudaTensor*>(tensors)...);
} else if (dataType == CUDNN_DATA_HALF) {
is_same = THCudaHalfTensor_checkGPU(state, sizeof...(T),
reinterpret_cast<THCudaHalfTensor*>(tensors)...);
} else if (dataType == CUDNN_DATA_DOUBLE) {
is_same = THCudaDoubleTensor_checkGPU(state, sizeof...(T),
reinterpret_cast<THCudaDoubleTensor*>(tensors)...);
} else {
throw std::runtime_error("unknown cuDNN data type");
}
if (!is_same) {
throw std::runtime_error("tensors are on different GPUs");
}
}
class cudnn_exception : public std::runtime_error {
public:
cudnnStatus_t status;

View File

@ -20,6 +20,20 @@ cudnnDataType_t getCudnnDataType(PyObject *tensorClass)
throw std::runtime_error(msg);
}
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor)
{
if (tensor.type() == thpp::Type::FLOAT) {
return CUDNN_DATA_FLOAT;
} else if (tensor.type() == thpp::Type::DOUBLE) {
return CUDNN_DATA_DOUBLE;
} else if (tensor.type() == thpp::Type::HALF) {
return CUDNN_DATA_HALF;
}
std::string msg("getCudnnDataType() not supported for ");
msg += (int)tensor.type();
throw std::runtime_error(msg);
}
PyObject * getTensorClass(PyObject *args)
{
for (int i = 0; i < PyTuple_Size(args); i++) {

View File

@ -6,11 +6,13 @@
#include <string>
#include <cudnn.h>
#include "../Types.h"
#include <THPP/THPP.h>
namespace torch { namespace cudnn {
PyObject * getTensorClass(PyObject *args);
cudnnDataType_t getCudnnDataType(PyObject *tensorClass);
cudnnDataType_t getCudnnDataType(const thpp::Tensor& tensor);
void _THVoidTensor_assertContiguous(THVoidTensor *tensor, const std::string& name);
#define THVoidTensor_assertContiguous(tensor) \

View File

@ -26,9 +26,14 @@ static void THSPTensor_(dealloc)(THSPTensor* self)
static PyObject * THSPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
#ifdef THC_GENERIC_FILE
printf("Sparse CUDA Tensors not supported!\n");
return NULL;
#define THPIndexTensor_Check THCPLongTensor_Check
#define THPIndexTensor THCPLongTensor
#define THIndexTensor THCudaLongTensor
#else
#define THPIndexTensor_Check THPLongTensor_Check
#define THPIndexTensor THPLongTensor
#define THIndexTensor THLongTensor
#endif
HANDLE_TH_ERRORS
Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
@ -71,24 +76,24 @@ static PyObject * THSPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
self->cdata = THSTensor_(newWithSize)(LIBRARY_STATE sizes.get());
}
// torch.SparseTensor(torch.LongTensor indices, torch.LongTensor values)
else if (num_args == 2 && THPLongTensor_Check(first_arg)) {
else if (num_args == 2 && THPIndexTensor_Check(first_arg)) {
PyObject *second_arg = PyTuple_GET_ITEM(args, 1);
if (!THPTensor_(Check)(second_arg)) goto invalid_arguments;
THLongTensor *indices = ((THPLongTensor*)first_arg)->cdata;
THIndexTensor *indices = ((THPIndexTensor*)first_arg)->cdata;
THTensor *values = ((THPTensor*)second_arg)->cdata;
self->cdata = THSTensor_(newWithTensor)(LIBRARY_STATE indices, values);
}
// torch.SparseTensor(torch.LongTensor indices,
// torch.Tensor values,
// torch.Size sizes)
else if (num_args > 2 && THPLongTensor_Check(first_arg)) {
else if (num_args > 2 && THPIndexTensor_Check(first_arg)) {
PyObject *second_arg = PyTuple_GET_ITEM(args, 1);
PyObject *third_arg = PyTuple_GET_ITEM(args, 2);
if (!THPTensor_(Check)(second_arg)) goto invalid_arguments;
if (!THPSize_Check(third_arg)) goto invalid_arguments;
THLongTensor *indices = ((THPLongTensor*)first_arg)->cdata;
THIndexTensor *indices = ((THPIndexTensor*)first_arg)->cdata;
THTensor *values = ((THPTensor*)second_arg)->cdata;
THLongStoragePtr sizes = THPUtils_unpackSize(third_arg);
self->cdata = THSTensor_(newWithTensorAndSize)(
@ -107,12 +112,19 @@ invalid_arguments:
"no arguments",
"(int size)",
"(torch.Size sizes)",
#ifdef THC_GENERIC_FILE
"(torch.cuda.LongTensor indices, " THPTensorStr " values)",
"(torch.cuda.LongTensor indices, " THPTensorStr " values, torch.Size sizes)",
#else
"(torch.LongTensor indices, " THPTensorStr " values)",
"(torch.LongTensor indices, " THPTensorStr " values, torch.Size sizes)",
#endif
"(int ...)");
return NULL;
END_HANDLE_TH_ERRORS
#endif
#undef THPIndexTensor_Check
#undef THPIndexTensor
#undef THIndexTensor
}
// TODO: implement equality
@ -227,3 +239,16 @@ bool THSPTensor_(init)(PyObject *module)
PyModule_AddObject(module, THSPTensorBaseStr, (PyObject *)&THSPTensorType);
return true;
}
bool THSPTensor_(postInit)(PyObject *module)
{
THSPTensorClass = PyObject_GetAttrString(module, TH_CONCAT_STRING_2(Real,Tensor));
if (!THSPTensorClass) return false;
bool is_cuda = false;
#ifdef THC_GENERIC_FILE
is_cuda = true;
#endif
const char *type_name = TH_CONCAT_STRING_2(Real,);
torch::registerPyTypeObject((PyTypeObject*)THSPTensorClass, type_name, is_cuda, true);
return true;
}

View File

@ -186,10 +186,15 @@ static PyObject * THPStorage_(get)(THPStorage *self, PyObject *index)
return THPUtils_(newReal)(value);
/* Slice index */
} else if (PySlice_Check(index)) {
Py_ssize_t start, stop, slicelength;
Py_ssize_t start, stop, slicelength, step;
long len = THStorage_(size)(LIBRARY_STATE self->cdata);
if (!THPUtils_parseSlice(index, len, &start, &stop, &slicelength))
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return NULL;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %ld, but only a step of "
"1 is supported", (long)step);
return NULL;
}
real *data = THStorage_(data)(LIBRARY_STATE self->cdata);
THStoragePtr new_storage = THStorage_(newWithData)(LIBRARY_STATE data + start, slicelength);
@ -223,10 +228,15 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
THStorage_(set)(LIBRARY_STATE self->cdata, nindex, rvalue);
return 0;
} else if (PySlice_Check(index)) {
Py_ssize_t start, stop;
Py_ssize_t start, stop, slicelength, step;
long len = THStorage_(size)(LIBRARY_STATE self->cdata);
if (!THPUtils_parseSlice(index, len, &start, &stop, NULL))
if (!THPUtils_parseSlice(index, len, &start, &stop, &step, &slicelength))
return -1;
if (step != 1) {
THPUtils_setError("Trying to slice with a step of %ld, but only a step of "
"1 is supported", (long)step);
return 0;
}
// TODO: check the bounds only once
// TODO: fill?
for (;start < stop; start++)
@ -304,6 +314,7 @@ void THPStorage_(initCopyMethods)()
THPInsertCopyFunction(h, &THStorage_(copyShort));
THPInsertCopyFunction(h, &THStorage_(copyInt));
THPInsertCopyFunction(h, &THStorage_(copyLong));
THPInsertCopyFunction(h, &THStorage_(copyHalf));
THPInsertCopyFunction(h, &THStorage_(copyFloat));
THPInsertCopyFunction(h, &THStorage_(copyDouble));
#ifdef THC_GENERIC_FILE
@ -318,7 +329,6 @@ void THPStorage_(initCopyMethods)()
#ifdef CUDA_HALF_TENSOR
THPInsertCopyFunction(h, &THStorage_(copyCudaHalf));
#endif
#ifndef THC_REAL_IS_HALF
// add CPU <- GPU copies to base type
#define THCpuStorage_(name) TH_CONCAT_4(TH, Real, Storage_, name)
extern THPCopyList THCpuStorage_(copy_functions);
@ -335,7 +345,6 @@ void THPStorage_(initCopyMethods)()
#endif
#undef THCpuStorage_
#endif
#endif
}
#include "StorageMethods.cpp"

View File

@ -159,6 +159,8 @@ static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyO
#elif defined(TH_REAL_IS_LONG)
// TODO: remove the cast
THP_decodeInt64Buffer((int64_t*) storage->data, src + offset, byte_order, count);
#elif defined(TH_REAL_IS_HALF)
THP_decodeHalfBuffer(storage->data, src + offset, byte_order, count);
#elif defined(TH_REAL_IS_FLOAT)
THP_decodeFloatBuffer(storage->data, src + offset, byte_order, count);
#elif defined(TH_REAL_IS_DOUBLE)
@ -190,13 +192,36 @@ PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *file)
int fd = PyObject_AsFileDescriptor(file);
THPUtils_assert(fd != -1, "_new_with_file couldn't retrieve a file "
"descriptor from given object");
THStoragePtr storage = THPStorage_(readFileRaw)(fd);
THStorage *storage = THPStorage_(readFileRaw)(fd, nullptr);
if (storage == nullptr)
return nullptr;
PyObject *result = THPStorage_(New)(storage);
storage.release();
return result;
END_HANDLE_TH_ERRORS
}
static PyObject *THPStorage_(setFromFile)(THPStorage *self, PyObject *args)
{
HANDLE_TH_ERRORS
PyObject *file = PyTuple_GET_ITEM(args, 0);
int fd = PyObject_AsFileDescriptor(file);
PyObject *offset = PyTuple_GET_ITEM(args, 1);
if (offset != Py_None) {
lseek(fd, THPUtils_unpackLong(offset), SEEK_SET);
}
THPUtils_assert(fd != -1, "_set_from_file couldn't retrieve a file "
"descriptor from given object");
THStorage *storage = THPStorage_(readFileRaw)(fd, self->cdata);
if (storage == nullptr)
return nullptr;
Py_INCREF(self);
return (PyObject *) self;
END_HANDLE_TH_ERRORS
}
#ifdef THC_GENERIC_FILE
PyObject * THPStorage_(getDevice)(THPStorage *self)
{
@ -250,6 +275,7 @@ static PyMethodDef THPStorage_(methods)[] = {
{"is_pinned", (PyCFunction)THPStorage_(isPinned), METH_NOARGS, NULL},
{"_write_file", (PyCFunction)THPStorage_(writeFile), METH_O, NULL},
{"_new_with_file", (PyCFunction)THPStorage_(newWithFile), METH_O | METH_STATIC, NULL},
{"_set_from_file", (PyCFunction)THPStorage_(setFromFile), METH_VARARGS, NULL},
#ifndef THC_GENERIC_FILE
{"from_buffer", (PyCFunction)THPStorage_(fromBuffer), METH_VARARGS | METH_KEYWORDS | METH_STATIC, NULL},
#endif

View File

@ -16,6 +16,9 @@
#ifdef TH_REAL_IS_INT
#define NUMPY_TYPE_ENUM NPY_INT32
#endif
#ifdef TH_REAL_IS_SHORT
#define NUMPY_TYPE_ENUM NPY_INT16
#endif
#ifdef TH_REAL_IS_BYTE
#define NUMPY_TYPE_ENUM NPY_UINT8
#endif
@ -23,6 +26,7 @@
#endif
PyObject *THPTensorClass = NULL;
THPCopyList THTensor_(copy_functions);
PyObject * THPTensor_(NewEmpty)()
{
@ -412,32 +416,6 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject
#define UNPACK_SCALAR(IDX_VARIABLE) idx = THPUtils_unpackLong(IDX_VARIABLE);
#endif
#define INDEX_SCALAR(DIM, IDX_VARIABLE, TENSOR_VARIABLE, CASE_1D, CASE_MD) \
int64_t idx; \
UNPACK_SCALAR(IDX_VARIABLE); \
long dimsize = THTensor_(size)(LIBRARY_STATE TENSOR_VARIABLE, DIM); \
idx = (idx < 0) ? dimsize + idx : idx; \
\
if (dimsize <= 0) { \
PyErr_SetString(PyExc_IndexError, "indexing an empty tensor"); \
return false; \
} \
if (idx < 0 || idx >= dimsize) { \
PyErr_Format(PyExc_IndexError, "index %lld is out of range for dimension " \
"%lld (of size %lld)", (long long)idx, (long long)DIM, (long long)dimsize); \
return false; \
} \
\
if(THTensor_(nDimension)(LIBRARY_STATE TENSOR_VARIABLE) == 1) { \
CASE_1D; \
} else { \
CASE_MD; \
}
#define GET_OFFSET(t, idx) \
t->storageOffset + t->stride[0] * idx;
#ifdef THC_GENERIC_FILE
#define THIndexTensor THCudaLongTensor
#define THIndexTensor_(NAME) TH_CONCAT_2(THCudaLongTensor_,NAME)
@ -451,58 +429,89 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject
#endif
template<bool allow_index>
static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
THTensorPtr &tresult, THStorage * &sresult, long &storage_offset)
static bool THPTensor_(_indexOnce)(PyObject *index, int &indexed_dim,
THTensorPtr &tresult, THStorage* &sresult, long &storage_offset)
{
#ifdef WITH_NUMPY
static PyArray_Descr *NumpyLongArrDescr = PyArray_DescrFromType(NPY_INT64);
bool is_long, is_scalar_array;
#endif
tresult = NULL;
sresult = NULL;
// Indexing with an integer
// Indexing with a scalar
if(IS_SCALAR(index)) {
THTensor *self_t = self->cdata;
INDEX_SCALAR(0, index, self_t,
// 1D tensor
sresult = self_t->storage;
storage_offset = GET_OFFSET(self_t, idx),
// >1D tensor
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self_t);
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, 0, idx)
)
return true;
int64_t idx;
UNPACK_SCALAR(index);
long dimsize = THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim);
idx = (idx < 0) ? dimsize + idx : idx;
if (dimsize <= 0) {
PyErr_SetString(PyExc_IndexError, "indexing an empty tensor");
throw python_error();
}
if (idx < 0 || idx >= dimsize) {
PyErr_Format(PyExc_IndexError, "index %lld is out of range for dimension "
"%lld (of size %lld)", (long long)idx, (long long)indexed_dim, (long long)dimsize);
throw python_error();
}
if(THTensor_(nDimension)(LIBRARY_STATE tresult.get()) == 1) {
sresult = tresult.get()->storage;
storage_offset = tresult->storageOffset + tresult->stride[0] * idx;
tresult = NULL;
} else {
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, indexed_dim, idx);
}
} else if (index == Py_None) {
// _indexOnce will never be called with tresult == NULL, except for a None index
if (!tresult) {
tresult = THTensor_(newWithStorage1d)(LIBRARY_STATE sresult, storage_offset, 1, 1);
sresult = NULL;
} else {
THTensor_(unsqueeze1d)(LIBRARY_STATE tresult.get(), NULL, indexed_dim++);
}
// Indexing with a slice
} else if (PySlice_Check(index)) {
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
Py_ssize_t start, end, length;
if (!THPUtils_parseSlice(index, THTensor_(size)(LIBRARY_STATE tresult.get(), 0), &start, &end, &length))
return false;
THTensor_(narrow)(LIBRARY_STATE tresult.get(), NULL, 0, start, length);
return true;
} else if (THPIndexTensor_Check(index)) {
if (allow_index) {
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
tresult = THTensor_(new)(LIBRARY_STATE_NOARGS);
THTensor_(indexSelect)(LIBRARY_STATE tresult.get(), self->cdata, 0, index_t);
return true;
} else {
THPUtils_setError("assignments using LongTensors as index aren't supported yet");
tresult = NULL;
return false;
Py_ssize_t start, end, length, step;
if (!THPUtils_parseSlice(index, THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim), &start, &end, &step, &length))
throw python_error();
if (step <= 0) {
PyErr_SetString(PyExc_ValueError, "slice step has to be greater than 0");
throw python_error();
}
// Indexing multiple dimensions
} else if(PyTuple_Check(index)) {
if (length == 0) {
PyErr_SetString(PyExc_ValueError, "result of slicing is an empty tensor");
throw python_error();
}
tresult->storageOffset += tresult->stride[indexed_dim] * start;
tresult->stride[indexed_dim] *= step;
tresult->size[indexed_dim] = length;
indexed_dim++;
} else {
return false;
}
return true;
}
static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
THTensorPtr &tresult, THStorage * &sresult, long &storage_offset)
{
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
sresult = NULL;
int indexed_dim = 0;
if(PyTuple_Check(index)) {
long num_index_dim = (long)PyTuple_Size(index);
long num_effective_index = num_index_dim;
long num_tensor_dim = THTensor_(nDimension)(LIBRARY_STATE self->cdata);
long ellipsis_idx = num_tensor_dim + 1;
long ellipsis_idx = -1;
for (int i = 0; i < num_index_dim; i++) {
if (PyTuple_GET_ITEM(index, i) == Py_Ellipsis) {
PyObject *dimidx = PyTuple_GET_ITEM(index, i);
if (dimidx == Py_Ellipsis) {
if (ellipsis_idx != -1) throw std::runtime_error("ellipsis can be used at most once");
ellipsis_idx = i;
num_effective_index--;
break;
}
if (dimidx == Py_None) {
num_effective_index--;
}
}
if (num_effective_index > num_tensor_dim) {
@ -512,130 +521,52 @@ static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
return false;
}
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
int t_dim = 0;
bool valid = true;
for(int dim = 0; dim < num_index_dim; dim++) {
for (int dim = 0; dim < num_index_dim; dim++) {
if (dim == ellipsis_idx) {
t_dim = tresult->nDimension - (num_index_dim - dim - 1);
// tresult can be NULL if ellipsis is the last item
if (tresult) indexed_dim = tresult->nDimension - (num_index_dim - dim - 1);
continue;
}
PyObject *dimidx = PyTuple_GET_ITEM(index, dim);
if(IS_SCALAR(dimidx)) {
INDEX_SCALAR(t_dim, dimidx, tresult,
// 1D tensor
sresult = tresult->storage;
storage_offset = GET_OFFSET(tresult, idx);
tresult = NULL;
return true,
// >1D tensor
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, t_dim, idx)
)
} else if (PySlice_Check(dimidx)) {
Py_ssize_t start, end, length;
long size_dim = THTensor_(size)(LIBRARY_STATE tresult.get(), t_dim);
if (!THPUtils_parseSlice(dimidx, size_dim, &start, &end, &length))
return false;
THTensor_(narrow)(LIBRARY_STATE tresult.get(), NULL, t_dim++, start, length);
} else if (THPIndexTensor_Check(dimidx)) {
if (allow_index) {
THIndexTensor *index_t = ((THPIndexTensor*)dimidx)->cdata;
THTensorPtr index_result = THTensor_(new)(LIBRARY_STATE_NOARGS);
THTensor_(indexSelect)(LIBRARY_STATE index_result.get(), tresult.get(), t_dim++, index_t);
tresult = index_result.release();
} else {
THPUtils_setError("assignments using LongTensors as index aren't supported yet");
tresult = NULL;
return false;
}
} else {
valid = THPTensor_(_indexOnce)(dimidx, indexed_dim, tresult, sresult, storage_offset);
if (!valid) {
tresult = NULL;
valid = false;
// overwrite this, so the message mentions the incorrect object
index = dimidx;
break;
}
}
if (valid) {
if (valid) return true;
} else if (index == Py_Ellipsis) {
return true;
} else {
if (THPTensor_(_indexOnce)(index, indexed_dim, tresult, sresult, storage_offset))
return true;
}
}
PyErr_Format(PyExc_TypeError, "indexing a tensor with an object of type %s. "
"The only supported types are integers, slices"
#ifdef WITH_NUMPY
", numpy scalars"
", numpy scalars and "
#endif
" and "
#ifndef THC_GENERIC_FILE
"torch.ByteTensor.",
"torch.LongTensor or torch.ByteTensor as the only argument.",
#else
"torch.cuda.ByteTensor.",
"torch.cuda.LongTensor or torch.cuda.ByteTensor as the only argument.",
#endif
THPUtils_typename(index));
return false;
}
#undef IS_SCALAR
#undef INDEX_SCALAR
#undef GET_OFFSET
#undef THIndexTensor
#undef THIndexTensor_
#undef THPIndexTensor
#undef THPIndexTensor_Check
extern THPCopyList THTensor_(copy_functions);
THPCopyList THTensor_(copy_functions);
void THPTensor_(initCopyMethods)()
{
auto& h = THTensor_(copy_functions);
// copy from CPU types
THPInsertCopyFunction(h, &THTensor_(copyByte));
THPInsertCopyFunction(h, &THTensor_(copyChar));
THPInsertCopyFunction(h, &THTensor_(copyShort));
THPInsertCopyFunction(h, &THTensor_(copyInt));
THPInsertCopyFunction(h, &THTensor_(copyLong));
THPInsertCopyFunction(h, &THTensor_(copyFloat));
THPInsertCopyFunction(h, &THTensor_(copyDouble));
#ifdef THC_GENERIC_FILE
// copy from GPU types
THPInsertCopyFunction(h, &THTensor_(copyCudaByte));
THPInsertCopyFunction(h, &THTensor_(copyCudaChar));
THPInsertCopyFunction(h, &THTensor_(copyCudaShort));
THPInsertCopyFunction(h, &THTensor_(copyCudaInt));
THPInsertCopyFunction(h, &THTensor_(copyCudaLong));
THPInsertCopyFunction(h, &THTensor_(copyCudaFloat));
THPInsertCopyFunction(h, &THTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertCopyFunction(h, &THTensor_(copyCudaHalf));
#endif
#ifndef THC_REAL_IS_HALF
THPInsertCopyFunction(h, &THCTensor_(copyAsyncCPU), true);
// add CPU <- GPU copies to base type
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
extern THPCopyList THCpuTensor_(copy_functions);
auto& b = THCpuTensor_(copy_functions);
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaByte));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaChar));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaShort));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaInt));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaLong));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaFloat));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaHalf));
#endif
THPInsertCopyFunction(b, &THCpuTensor_(copyAsyncCuda), true);
#undef THCpuTensor_
#endif
#endif
}
#undef UNPACK_SCALAR
template<bool force_tensor>
static PyObject * THPTensor_(getValue)(THPTensor *self, PyObject *index)
{
HANDLE_TH_ERRORS
#ifndef TH_REAL_IS_HALF
#ifndef THC_GENERIC_FILE
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
#else
@ -647,11 +578,18 @@ static PyObject * THPTensor_(getValue)(THPTensor *self, PyObject *index)
THTensor_(maskedSelect)(LIBRARY_STATE t.get(), self->cdata, mask->cdata);
return THPTensor_(New)(t.release());
}
if (THPIndexTensor_Check(index)) {
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
THTensorPtr index_result = THTensor_(new)(LIBRARY_STATE_NOARGS);
THTensor_(indexSelect)(LIBRARY_STATE index_result.get(), self->cdata, 0, index_t);
return THPTensor_(New)(index_result.release());
}
#endif
THTensorPtr tresult;
THStorage *sresult;
long storage_offset;
if (!THPTensor_(_index)<true>(self, index, tresult, sresult, storage_offset))
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
return NULL;
if (tresult)
return THPTensor_(New)(tresult.release());
@ -674,6 +612,7 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
{
HANDLE_TH_ERRORS
#ifndef TH_REAL_IS_HALF
#ifndef THC_GENERIC_FILE
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
#else
@ -693,11 +632,26 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
}
return 0;
}
if (THPIndexTensor_Check(index)) {
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
if (THPUtils_(checkReal)(value)) {
real v = THPUtils_(unpackReal)(value);
THTensor_(indexFill)(LIBRARY_STATE self->cdata, 0, index_t, v);
} else if (THPTensor_(Check)(value)) {
THTensor_(indexCopy)(LIBRARY_STATE self->cdata, 0, index_t, ((THPTensor*)value)->cdata);
} else {
THPUtils_setError("can't assign %s to a " THPTensorStr " using a LongTensor "
"(only " THPTensorStr " or %s are supported)",
THPUtils_typename(value), THPUtils_typeTraits<real>::python_type_str);
}
return 0;
}
#endif
THTensorPtr tresult;
THStorage *sresult;
long storage_offset;
if (!THPTensor_(_index)<false>(self, index, tresult, sresult, storage_offset))
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
return -1;
if (sresult) {
if (!force_tensor) {
@ -714,7 +668,11 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
}
if (tresult) {
if (THPUtils_(checkReal)(value)) {
#ifndef TH_REAL_IS_HALF
THTensor_(fill)(LIBRARY_STATE tresult.get(), THPUtils_(unpackReal)(value));
#else
throw std::runtime_error("torch.HalfTensors don't support scalar assignments");
#endif
} else {
// TODO: try to do this without creating a temporary object
THPTensorPtr tmp = (THPTensor*)THPTensor_(New)(tresult.release());
@ -732,6 +690,10 @@ static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *valu
return -1;
END_HANDLE_TH_ERRORS_RET(-1)
}
#undef THIndexTensor
#undef THIndexTensor_
#undef THPIndexTensor
#undef THPIndexTensor_Check
Py_ssize_t THPTensor_(length)(THPTensor *self)
{
@ -847,11 +809,57 @@ PyTypeObject THPTensorStatelessType = {
0, /* tp_weaklist */
};
#ifndef TH_REAL_IS_HALF
#include "SparseTensor.cpp"
#endif
void THPTensor_(initCopyMethods)()
{
auto& h = THTensor_(copy_functions);
// copy from CPU types
THPInsertCopyFunction(h, &THTensor_(copyByte));
THPInsertCopyFunction(h, &THTensor_(copyChar));
THPInsertCopyFunction(h, &THTensor_(copyShort));
THPInsertCopyFunction(h, &THTensor_(copyInt));
THPInsertCopyFunction(h, &THTensor_(copyLong));
THPInsertCopyFunction(h, &THTensor_(copyFloat));
THPInsertCopyFunction(h, &THTensor_(copyHalf));
THPInsertCopyFunction(h, &THTensor_(copyDouble));
#ifdef THC_GENERIC_FILE
// copy from GPU types
THPInsertCopyFunction(h, &THTensor_(copyCudaByte));
THPInsertCopyFunction(h, &THTensor_(copyCudaChar));
THPInsertCopyFunction(h, &THTensor_(copyCudaShort));
THPInsertCopyFunction(h, &THTensor_(copyCudaInt));
THPInsertCopyFunction(h, &THTensor_(copyCudaLong));
THPInsertCopyFunction(h, &THTensor_(copyCudaFloat));
THPInsertCopyFunction(h, &THTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertCopyFunction(h, &THTensor_(copyCudaHalf));
#endif
THPInsertCopyFunction(h, &THCTensor_(copyAsyncCPU), true);
// add CPU <- GPU copies to base type
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
extern THPCopyList THCpuTensor_(copy_functions);
auto& b = THCpuTensor_(copy_functions);
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaByte));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaChar));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaShort));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaInt));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaLong));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaFloat));
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertCopyFunction(b, &THCpuTensor_(copyCudaHalf));
#endif
THPInsertCopyFunction(b, &THCpuTensor_(copyAsyncCuda), true);
#undef THCpuTensor_
#endif
}
bool THPTensor_(init)(PyObject *module)
{
#ifndef THC_GENERIC_FILE
#if !defined(THC_GENERIC_FILE) && !defined(TH_REAL_IS_HALF)
THVector_(vectorDispatchInit)();
#endif
THPTensorType.tp_methods = THPTensor_(methods);
@ -867,6 +875,20 @@ bool THPTensor_(init)(PyObject *module)
return true;
}
bool THPTensor_(postInit)(PyObject *module)
{
THPTensorClass = PyObject_GetAttrString(module,(char*)TH_CONCAT_STRING_2(Real,Tensor));
if (!THPTensorClass) return false;
bool is_cuda = false;
#ifdef THC_GENERIC_FILE
is_cuda = true;
#endif
const char *type_name = TH_CONCAT_STRING_2(Real,);
torch::registerPyTypeObject((PyTypeObject*)THPTensorClass, type_name, is_cuda, false);
return true;
}
#undef NUMPY_TYPE_ENUM
#endif

View File

@ -2,12 +2,18 @@
#define TH_GENERIC_FILE "generic/Tensor.h"
#else
#if defined(TH_REAL_IS_HALF) || defined(THD_GENERIC_FILE)
#define GENERATE_SPARSE 0
#else
#define GENERATE_SPARSE 1
#endif
struct THPTensor {
PyObject_HEAD
THTensor *cdata;
};
#ifndef THD_GENERIC_FILE
#if GENERATE_SPARSE
struct THSPTensor {
PyObject_HEAD
THSTensor *cdata;
@ -21,7 +27,7 @@ struct THSPTensor {
* count is decremented.
*/
THP_API PyObject * THPTensor_(New)(THTensor *ptr);
#ifndef THD_GENERIC_FILE
#if GENERATE_SPARSE
THP_API PyObject * THSPTensor_(New)(THSTensor *ptr);
#endif
@ -29,12 +35,12 @@ THP_API PyObject * THSPTensor_(New)(THSTensor *ptr);
* Creates a new empty Python Tensor object
*/
THP_API PyObject * THPTensor_(NewEmpty)(void);
#ifndef THD_GENERIC_FILE
#if GENERATE_SPARSE
THP_API PyObject * THSPTensor_(NewEmpty)(void);
#endif
extern PyObject *THPTensorClass;
#ifndef THD_GENERIC_FILE
#if GENERATE_SPARSE
extern PyObject *THSPTensorClass;
#endif
@ -43,12 +49,15 @@ extern PyObject *THSPTensorClass;
// TODO: init stateless in THPTensor_(init) and remove this
extern PyTypeObject THPTensorStatelessType;
#ifndef THD_GENERIC_FILE
#if GENERATE_SPARSE
extern PyTypeObject THSPTensorStatelessType;
#endif
bool THPTensor_(init)(PyObject *module);
#ifndef THD_GENERIC_FILE
bool THPTensor_(postInit)(PyObject *module);
#if GENERATE_SPARSE
bool THSPTensor_(init)(PyObject *module);
bool THSPTensor_(postInit)(PyObject *module);
#endif
extern PyTypeObject THPTensorType;
@ -58,4 +67,6 @@ template <> struct THPTypeInfo<THTensor> {
};
#endif
#undef GENERATE_SPARSE
#endif

View File

@ -1,34 +1,60 @@
// Sparse Tensors not supported for CUDA
#if IS_CUDA || !defined(TH_REAL_IS_HALF)
PyObject * THSPTensor_(size)(PyObject *self, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
THSTensor* tensor = ((THSPTensor*)self)->cdata;
if (PyTuple_Size(args) == 0 && (!kwargs || PyDict_Size(kwargs) == 0)) {
return THPSize_New(tensor->nDimensionI + tensor->nDimensionV, tensor->size);
}
int tuplecount = args ? PyTuple_Size(args) : 0;
int dictcount = kwargs ? PyDict_Size(kwargs) : 0;
PyObject* pydim = NULL;
if (tuplecount == 1 && dictcount == 0) {
pydim = PyTuple_GET_ITEM(args, 0);
} else if (dictcount == 1 && tuplecount == 0) {
pydim = PyDict_GetItemString(kwargs, "dim");
}
if (pydim && THPUtils_checkLong(pydim)) {
int dim = (int)THPUtils_unpackLong(pydim);
if (dim < 0)
dim += tensor->nDimensionI + tensor->nDimensionV;
return PyInt_FromLong(THSTensor_(size)(LIBRARY_STATE tensor, dim));
}
THPUtils_invalidArguments(args, kwargs, "size", 2, "(int dim)", "no arguments");
return NULL;
END_HANDLE_TH_ERRORS
}
[[
name: size
defined_if: "!IS_CUDA"
name: THSPTensor_(size)
python_name: size
method_flags: METH_KEYWORDS
only_register: True
sparse: yes
options:
- return: long
cname: size
arguments:
- THSTensor* self
- long dim
- return: THLongStorage*
cname: newSizeOf
arguments:
- THSTensor* self
]]
#endif
[[
name: nDimension
defined_if: "!IS_CUDA"
sparse: yes
python_name: ndimension
return: long
arguments:
- THSTensor* self
]]
[[
name: THPTensor_(nDimension)
python_name: dim
only_register: True
method_flags: METH_KEYWORDS
sparse: yes
]]
[[
name: nnz
defined_if: "!IS_CUDA"
sparse: yes
return: long
arguments:
@ -37,7 +63,6 @@
[[
name: isContiguous
defined_if: "!IS_CUDA"
sparse: yes
python_name: is_contiguous
return: bool
@ -54,9 +79,18 @@
- THSTensor* self
]]
[[
name: indices
defined_if: "IS_CUDA"
sparse: yes
return: THCudaLongTensor*
arguments:
- THSTensor* self
]]
[[
name: values
defined_if: "!IS_CUDA"
sparse: yes
return: THTensor*
arguments:
@ -65,16 +99,23 @@
[[
name: contiguous
defined_if: "!IS_CUDA"
sparse: yes
return: argument 0
arguments:
- THSTensor* self
]]
[[
name: clone
sparse: yes
cname: newClone
return: THSTensor*
arguments:
- THSTensor* self
]]
[[
name: toDense
defined_if: "!IS_CUDA"
sparse: yes
python_name: to_dense
return: THTensor*
@ -82,9 +123,19 @@
- THSTensor* self
]]
[[
name: resizeAs_
python_name: resize_as_
sparse: yes
cname: resizeAs
return: self
arguments:
- THSTensor* self
- THSTensor* template
]]
[[
name: transpose
defined_if: "!IS_CUDA"
sparse: yes
cname: newTranspose
return: THSTensor*
@ -96,7 +147,6 @@
[[
name: transpose_
defined_if: "!IS_CUDA"
sparse: yes
cname: transpose
return: argument 0
@ -108,7 +158,6 @@
[[
name: mm
defined_if: "!IS_CUDA"
sparse: yes
only_stateless: True
cname: spaddmm
@ -127,9 +176,29 @@
- THTensor* mat2
]]
[[
name: spmm
only_stateless: True
sparse: yes
cname: spaddmm
return: argument 0
before_call: |
long s1 = THSTensor_(size)(LIBRARY_STATE ((THSPTensor*)$arg4)->cdata, 0);
long s2 = THTensor_(size)(LIBRARY_STATE ((THPTensor*)$arg5)->cdata, 1);
THTensor_(resize2d)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata, s1, s2);
THTensor_(zero)(LIBRARY_STATE ((THPTensor*)$arg0)->cdata);
arguments:
- arg: THTensor* result
output: True
- CONSTANT AS_REAL(0)
- argument 0
- CONSTANT AS_REAL(1)
- THSTensor* mat1
- THTensor* mat2
]]
[[
name: sspmm
defined_if: "!IS_CUDA"
only_stateless: True
sparse: yes
cname: sspaddmm
@ -150,7 +219,6 @@
[[
name: sspaddmm
defined_if: "!IS_CUDA"
sparse: yes
with_stateless: True
return: argument 0
@ -168,7 +236,6 @@
[[
name: spadd
defined_if: "!IS_CUDA"
sparse: yes
cname: spcadd
with_stateless: True
@ -182,3 +249,139 @@
- THSTensor* mat2
]]
[[
name: zero_
sparse: yes
cname: zero
return: self
arguments:
- THSTensor* self
]]
[[
name: add
sparse: yes
with_stateless: True
return: argument 0
cname: cadd
arguments:
- arg: THSTensor* result
output: True
- THSTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* other
]]
[[
name: add_
sparse: yes
return: argument 0
cname: cadd
arguments:
- THSTensor* self
- THSTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* other
]]
[[
name: sub
sparse: yes
with_stateless: True
return: argument 0
cname: csub
arguments:
- arg: THSTensor* result
output: True
- THSTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* other
]]
[[
name: sub_
sparse: yes
return: argument 0
cname: csub
arguments:
- THSTensor* self
- THSTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* other
]]
[[
name: mul
sparse: yes
return: argument 0
with_stateless: True
options:
- cname: mul
arguments:
- arg: THSTensor* result
output: True
- THSTensor* self
- real value
- cname: cmul
arguments:
- arg: THSTensor* result
output: True
- THSTensor* self
- THSTensor* other
]]
[[
name: mul_
sparse: yes
return: argument 0
options:
- cname: mul
arguments:
- THSTensor* self
- THSTensor* self
- real value
- cname: cmul
arguments:
- THSTensor* self
- THSTensor* self
- THSTensor* other
]]
[[
name: div
sparse: yes
cname: div
with_stateless: True
return: argument 0
arguments:
- arg: THSTensor* result
output: True
- THSTensor* self
- real value
]]
[[
name: div_
sparse: yes
cname: div
return: argument 0
arguments:
- THSTensor* self
- THSTensor* self
- real value
]]
[[
name: sparse_mask
cname: sparseMask
return: argument 0
arguments:
- arg: THSTensor* result
output: True
- THTensor* self
- THSTensor* mask
]]

View File

@ -2,6 +2,7 @@
[[
name: THPTensor_(elementSize)
python_name: element_size
cpu_half: True
only_register: True
]]
static PyObject * THPTensor_(elementSize)(THPTensor *self, PyObject *args)
@ -13,6 +14,7 @@ static PyObject * THPTensor_(elementSize)(THPTensor *self, PyObject *args)
[[
name: THPTensor_(storage)
python_name: storage
cpu_half: True
only_register: True
]]
static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
@ -31,6 +33,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
[[
name: storageOffset
python_name: storage_offset
cpu_half: True
return: long
arguments:
- THTensor* self
@ -39,6 +42,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
[[
name: nDimension
python_name: ndimension
cpu_half: True
return: long
arguments:
- THTensor* self
@ -46,6 +50,7 @@ static PyObject * THPTensor_(storage)(THPTensor *self, PyObject *args)
[[
name: THPTensor_(nDimension)
python_name: dim
cpu_half: True
only_register: True
method_flags: METH_KEYWORDS
]]
@ -75,6 +80,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
name: resize_
return: self
cname: resize
cpu_half: True
arguments:
- THTensor* self
- arg: THSize* size
@ -107,6 +113,8 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
[[
name: numel
return: long
cname: nElement
cpu_half: True
with_stateless: True
arguments:
- THTensor* self
@ -114,6 +122,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
[[
name: THPTensor_(numel)
python_name: nelement
cpu_half: True
only_register: True
method_flags: METH_KEYWORDS
]]
@ -121,6 +130,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
[[
name: set_
cname: set
cpu_half: True
return: argument 0
options:
- cname: set
@ -159,6 +169,7 @@ PyObject * THPTensor_(setIndex)(THPTensor *self, PyObject *args)
[[
name: THPTensor_(select)
python_name: select
cpu_half: True
only_register: True
]]
static PyObject * THPTensor_(select)(THPTensor *self, PyObject *args)
@ -213,6 +224,7 @@ PyObject * THPTensor_(size)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: THPTensor_(size)
python_name: size
cpu_half: True
method_flags: METH_KEYWORDS
only_register: True
]]
@ -253,6 +265,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: THPTensor_(stride)
python_name: stride
cpu_half: True
method_flags: METH_KEYWORDS
only_register: True
]]
@ -269,6 +282,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: isSameSizeAs
python_name: is_same_size
cpu_half: True
return: bool
arguments:
- THTensor* self
@ -278,6 +292,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: isContiguous
python_name: is_contiguous
cpu_half: True
return: bool
arguments:
- THTensor* self
@ -286,6 +301,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: isSetTo
python_name: is_set_to
cpu_half: True
return: bool
arguments:
- THTensor* self
@ -326,20 +342,42 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
- THBoolTensor* mask
]]
#if IS_CUDA
THTensor* THTensor_(transpose_neg)(THCState* state, THTensor *self, THTensor *src, int dim0, int dim1)
#else
THTensor* THTensor_(transpose_neg)(THTensor *self, THTensor *src, int dim0, int dim1)
#endif
{
int ndim = self->nDimension;
if (dim0 < 0)
dim0 += ndim;
if (dim1 < 0)
dim1 += ndim;
if (src != NULL) {
THTensor_(transpose)(LIBRARY_STATE self, src, dim0, dim1);
return NULL;
} else {
return THTensor_(newTranspose)(LIBRARY_STATE self, dim0, dim1);
}
}
[[
name: transpose
with_stateless: True
cname: newTranspose
cname: transpose_neg
cpu_half: True
return: THTensor*
arguments:
- THTensor* self
- CONSTANT NULL
- long dim0
- long dim1
]]
[[
name: transpose_
cname: transpose
cname: transpose_neg
cpu_half: True
return: self
arguments:
- THTensor* self
@ -378,6 +416,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: squeeze
cpu_half: True
with_stateless: True
return: argument 0
options:
@ -395,6 +434,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: squeeze_
cpu_half: True
return: self
options:
- cname: squeeze
@ -408,6 +448,30 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
- long dim
]]
[[
name: unsqueeze
with_stateless: True
cpu_half: True
return: argument 0
cname: unsqueeze1d
arguments:
- arg: THTensor* result
output: True
- THTensor* self
- long dim
]]
[[
name: unsqueeze_
cpu_half: True
return: self
cname: unsqueeze1d
arguments:
- THTensor* self
- THTensor* self
- long dim
]]
[[
name: nonzero
with_stateless: True
@ -434,6 +498,16 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
- THTensor* self
]]
[[
name: view
cname: newView
return: THTensor*
arguments:
- THTensor* self
- arg: THSize* size
long_args: True
]]
[[
name: resizeAs_
python_name: resize_as_
@ -495,6 +569,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: narrow
cpu_half: True
return: argument 0
arguments:
- arg: THTensor* result
@ -507,6 +582,7 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
[[
name: unfold
cpu_half: True
return: argument 0
arguments:
- arg: THTensor* result
@ -570,12 +646,13 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs)
only_register: True
only_stateless: True
]]
#ifndef TH_REAL_IS_HALF
static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
{
#if IS_CUDA && THCP_AUTO_GPU
THCPAutoGPU __autogpu_guard = THCPAutoGPU(args);
#endif
HANDLE_TH_ERRORS
#if IS_CUDA
THCPAutoGPU __autogpu_guard(-1);
#endif
Py_ssize_t _argcount = args ? PyTuple_Size(args) : 0;
std::vector<THPObjectPtr> items;
std::vector<THTensor *> item_tensors;
@ -608,6 +685,10 @@ static PyObject * THPTensor_stateless_(cat)(THPTensor *_unused, PyObject *args)
dimension = 0;
}
#if IS_CUDA
__autogpu_guard.setDevice(THTensor_(getDevice)(LIBRARY_STATE item_tensors[0]));
#endif
result = (THPTensor *)THPTensor_(NewEmpty)();
if (!result) return NULL;
@ -622,10 +703,12 @@ invalid_arguments:
return NULL;
END_HANDLE_TH_ERRORS
}
#endif
[[
name: data_ptr
return: void*
cpu_half: True
cname: data
arguments:
- THTensor* self
@ -643,6 +726,7 @@ invalid_arguments:
[[
python_name: copy_
name: THPTensor_(copy_)
cpu_half: True
method_flags: METH_KEYWORDS
only_register: True
]]

View File

@ -9,6 +9,7 @@
name: THPTensor_(apply)
python_name: apply_
defined_if: "!IS_CUDA"
cpu_half: True
only_register: True
override_method_flags: METH_O
]]
@ -43,6 +44,7 @@ static PyObject * THPTensor_(apply)(THPTensor *self, PyObject *arg)
name: THPTensor_(map)
python_name: map_
defined_if: "!IS_CUDA"
cpu_half: True
only_register: True
]]
static PyObject * THPTensor_(map)(THPTensor *self, PyObject *args)
@ -78,6 +80,7 @@ static PyObject * THPTensor_(map)(THPTensor *self, PyObject *args)
name: THPTensor_(map2)
python_name: map2_
defined_if: "!IS_CUDA"
cpu_half: True
only_register: True
]]
static PyObject * THPTensor_(map2)(THPTensor *self, PyObject *args)

View File

@ -906,10 +906,10 @@
arguments:
- arg: THTensor* result
output: True
- THTensor* mat1
- THTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* mat2
- THSTensor* other
]]
[[
@ -1567,15 +1567,25 @@
[[
name: addcmul_
cname: addcmul
return: argument 0
arguments:
- THTensor* self
- THTensor* self
- arg: real value
default: AS_REAL(1)
- THTensor* tensor1
- THTensor* tensor2
options:
- cname: addcmul
return: argument 0
arguments:
- THTensor* self
- THTensor* self
- arg: real value
default: AS_REAL(1)
- THTensor* tensor1
- THTensor* tensor2
- cname: spaddcmul
return: argument 0
arguments:
- THTensor* self
- THTensor* self
- arg: real value
default: AS_REAL(1)
- THSTensor* tensor1
- THSTensor* tensor2
]]
[[

View File

@ -11,7 +11,7 @@
- long n
]]
#if !IS_CUDA
#if !defined(TH_REAL_IS_HALF) && !IS_CUDA
static void THTensor_(random2__)(THTensor *self, THGenerator *gen, long a, long b)
{
THArgCheck(b >= a, 2, "upper bound must be greater or equal than lower bound");

View File

@ -52,6 +52,10 @@ PyObject * THPTensor_(toNumpy)(THPTensor *self, PyObject *args) {
#if !defined(WITH_NUMPY)
THPUtils_setError("PyTorch was compiled without numpy support\n");
return NULL;
#elif defined(THC_GENERIC_FILE)
THPUtils_setError("can't convert CUDA tensor to numpy (it doesn't support GPU arrays). "
"Use .cpu() to move the tensor to host memory first.");
return NULL;
#elif !defined(NUMPY_TYPE_ENUM)
THPUtils_setError("numpy conversion for %s is not supported\n", THPUtils_typename(self));
return NULL;

View File

@ -29,22 +29,35 @@ THTensor * THPTensor_(newWithMetadataFileRaw)(int fd, THStorage *storage)
void THPStorage_(writeFileRaw)(THStorage *self, int fd)
{
real *data;
int64_t size = self->size;
#ifndef THC_GENERIC_FILE
data = self->data;
#else
std::unique_ptr<char[]> cpu_data(new char[self->size * sizeof(real)]);
std::unique_ptr<char[]> cpu_data(new char[size * sizeof(real)]);
data = (real*)cpu_data.get();
THCudaCheck(cudaMemcpy(data, self->data, self->size * sizeof(real), cudaMemcpyDeviceToHost));
THCudaCheck(cudaMemcpy(data, self->data, size * sizeof(real), cudaMemcpyDeviceToHost));
#endif
SYSCHECK(write(fd, &self->size, sizeof(long)));
ssize_t result = write(fd, &size, sizeof(int64_t));
if (result != sizeof(int64_t))
throw std::system_error(result, std::system_category());
// fast track for bytes and little endian
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
SYSCHECK(write(fd, data, sizeof(real) * self->size));
char *bytes = (char *) data;
int64_t remaining = sizeof(real) * size;
while (remaining > 0) {
ssize_t result = write(fd, bytes, remaining);
if (result < 0)
throw std::system_error(result, std::system_category());
bytes += result;
remaining -= result;
}
if (remaining != 0)
throw std::system_error(result, std::system_category());
} else {
long buffer_size = std::min(self->size, (long)5000);
int64_t buffer_size = std::min(size, (int64_t)5000);
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
for (long i = 0; i < self->size; i += buffer_size) {
size_t to_convert = std::min(self->size - i, buffer_size);
for (int64_t i = 0; i < size; i += buffer_size) {
size_t to_convert = std::min(size - i, buffer_size);
if (sizeof(real) == 2) {
THP_encodeInt16Buffer((uint8_t*)le_buffer.get(),
(const int16_t*)data + i,
@ -61,17 +74,27 @@ void THPStorage_(writeFileRaw)(THStorage *self, int fd)
THPByteOrder::THP_LITTLE_ENDIAN,
to_convert);
}
SYSCHECK(write(fd, data, to_convert * sizeof(real)));
SYSCHECK(write(fd, le_buffer.get(), to_convert * sizeof(real)));
}
}
}
THStorage * THPStorage_(readFileRaw)(int fd)
THStorage * THPStorage_(readFileRaw)(int fd, THStorage *_storage)
{
real *data;
long size;
SYSCHECK(read(fd, &size, sizeof(long)));
THStoragePtr storage = THStorage_(newWithSize)(LIBRARY_STATE size);
int64_t size;
ssize_t result = read(fd, &size, sizeof(int64_t));
if (result != sizeof(int64_t))
throw std::system_error(result, std::system_category());
THStoragePtr storage;
if (_storage == nullptr) {
storage = THStorage_(newWithSize)(LIBRARY_STATE size);
} else {
THPUtils_assert(_storage->size == size,
"storage has wrong size: expected %ld got %ld",
size, _storage->size);
storage = _storage;
}
#ifndef THC_GENERIC_FILE
data = storage->data;
@ -82,11 +105,21 @@ THStorage * THPStorage_(readFileRaw)(int fd)
// fast track for bytes and little endian
if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
SYSCHECK(read(fd, data, sizeof(real) * storage->size));
char *bytes = (char *) data;
int64_t remaining = sizeof(real) * storage->size;
while (remaining > 0) {
ssize_t result = read(fd, bytes, remaining);
if (result <= 0) // 0 means EOF, which is also an error
throw std::system_error(result, std::system_category());
bytes += result;
remaining -= result;
}
if (remaining != 0)
throw std::system_error(result, std::system_category());
} else {
long buffer_size = std::min(size, (long)5000);
int64_t buffer_size = std::min(size, (int64_t)5000);
std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]);
for (long i = 0; i < size; i += buffer_size) {
for (int64_t i = 0; i < size; i += buffer_size) {
size_t to_convert = std::min(size - i, buffer_size);
SYSCHECK(read(fd, le_buffer.get(), sizeof(real) * to_convert));
if (sizeof(real) == 2) {

Some files were not shown because too many files have changed in this diff Show More