Compare commits

...

800 Commits

Author SHA1 Message Date
ccd5f4dbfc version bump 2017-05-01 15:55:29 -04:00
3cc21b5a46 fix OSX build 2017-04-29 09:29:21 -04:00
27fb8750ad fix NCCL makefile for CUDA 7.5 2017-04-28 20:08:07 -04:00
45020a74cd remove inplace pow and fix contiguous -> coalesce (#1398) 2017-04-28 18:26:29 -04:00
9c01f5d6b2 Document hybrid sparse tensors.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-28 23:53:01 +02:00
cbb9f08b71 Add new init methods gain, eye and dirac (#1172) 2017-04-28 17:16:40 -04:00
f75ab857b8 Add safeCoalesce() to tests 2017-04-28 17:11:05 -04:00
f2903332c7 Make coalesce() out of place 2017-04-28 17:11:05 -04:00
9643be76f9 speed up accumulation 2017-04-28 17:11:05 -04:00
4f09461d24 Rename sparse tensor contiguous() to coalesce() 2017-04-28 17:11:05 -04:00
bafb2e5cc2 Implement sparse pow. (#1387) 2017-04-28 23:06:09 +02:00
28a7fbbdf5 Documentation fix for torch.gather 2017-04-28 22:45:14 +02:00
4c1cdb6148 Refactor Python string utility function 2017-04-28 21:25:26 +02:00
775481ed56 re-enable dilated convolutions on Kepler (#1394) 2017-04-28 14:42:19 -04:00
5b2aac7c73 Merge commit '224f5eabf5cfb3a19abc1819f7dac230500b6bdb' 2017-04-28 13:48:06 -04:00
224f5eabf5 half<->float conversion cleanup (#680) 2017-04-28 19:46:42 +02:00
fd490c6490 Merge commit 'd6a31c68a0f39656257322a55c9e04dd579de828' 2017-04-28 13:42:23 -04:00
d6a31c68a0 Add option to disable ppc64le's VSX support
Set environment variable TH_NO_VSX=1 to disable VSX.
2017-04-28 13:41:03 -04:00
96a281dfab Add one more missing self.dilation parameter. (#1392)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-28 19:16:32 +02:00
94b147fd41 Allows dicts batches in dataloader. (#1354)
* Allow dicts in Dataloader

* use collections.Sequence instead of collections.Iterable in dataloader
2017-04-28 19:14:52 +02:00
c26f6877a0 guard topk for half (#759) 2017-04-28 11:57:15 -04:00
8908000262 function -> lambda in test 2017-04-28 10:31:40 -04:00
8b1d5727d8 fix minor docs 2017-04-28 10:13:52 -04:00
75f1989bec Add nn.Bilinear and tests 2017-04-28 10:11:30 -04:00
e221536ad8 Merge commit 'a44317fea88adddded91e068088415de1e66fd4b' 2017-04-28 08:04:39 -04:00
a44317fea8 Change magma_sgesvd to magma_sgesdd which is significantly faster 2017-04-28 08:03:39 -04:00
24e5a9057e Revert "Parallelize TensorMethods.cpp builds (#1364)" (#1390)
This reverts commit 060048bcd808893ba3113d09273a42642904078a.
2017-04-28 07:59:40 -04:00
060048bcd8 Parallelize TensorMethods.cpp builds (#1364) 2017-04-28 07:45:21 -04:00
77035d151e make topk test unique 2017-04-28 07:30:25 -04:00
50c9c23525 enable topk for all cuda 2017-04-28 07:14:21 -04:00
3f81803b09 Merge commit '69574a6dc4036b0113c512a1b2d74e23682c8a3b' 2017-04-28 07:08:43 -04:00
d421c473a9 Merge commit '928f6516c16ff91c0a789d0a653551041d1bafd0' 2017-04-28 07:07:24 -04:00
48f9e526ea implement expand/expandAs in CPU/GPU code 2017-04-28 07:06:25 -04:00
69574a6dc4 implement expand/expandAs in CPU/GPU code 2017-04-28 07:04:08 -04:00
928f6516c1 implement expand/expandAs in CPU/GPU code 2017-04-28 07:03:51 -04:00
b93b525a1c Enable specifying of margin in HingeEmbeddingLoss (#1378)
Previously it was not possible to set a value for the margin in the HingeEmbeddingLoss in the constructor. This patch fixes the issue and makes the loss behave as it is described in the docs. 

A discussion of this issue can be viewed here:
https://discuss.pytorch.org/t/issue-with-setting-margin-for-hingeembeddingloss/2088
2017-04-28 06:58:48 -04:00
8db2cf6182 temp fix for transposed dilated convolution (#1388) 2017-04-28 02:53:37 +02:00
7e8ef0e22a Actually pass dilation to the underlying operators. (#1386)
No tests for now; we'll need some sort of shape DSL to concisely
represent them.

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-27 23:38:01 +02:00
27990fee54 Use fully qualified name as tp_name for tensors and storages (#1379) 2017-04-27 16:26:44 -04:00
2ef7331007 Update sparse.py 2017-04-27 02:25:00 +02:00
c2cfa4cf5b Add THGenerate*Type.h for all types (#1014) 2017-04-27 01:11:56 +02:00
b39a2f2cbb Documentation for sparse tensors. (#1366) 2017-04-26 21:43:05 +02:00
d9f01397b3 s/NOCUDA/NO_CUDA/
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-26 21:42:09 +02:00
8ca7bf2ab3 Check argument types in 'checkTypes' (#1363)
Fixes #1357
2017-04-26 15:00:41 -04:00
41705ce7d5 Add zero padding module (#1326) 2017-04-25 16:58:51 +02:00
88fc1d39ff Generic TopK implementation (#744)
* move TopK to generic

* partial genericization of kernel code

* introduce TopKTypeConfig, specialize radix type and conversion for floats

* implement topk for byte tensor

* implement for char tensor

* implement for int tensor, extend test to check indices as well

* works for longs too

* make bitfield set/get a struct, add support for 64-bit types

* extend to double tensor

* implement for half tensor

* asserts; test fix
2017-04-25 16:39:20 +02:00
3ab074b3c5 Fix torch.stack() with Variable inputs (#1345) 2017-04-24 12:20:51 -04:00
6a69f7007b Revert "add keyword out for autograd function Concat to match torch.cat (#1336)" (#1340)
This reverts commit 71b9dea6ecc2278511ba6c2531437d27d9a2b8c8.
2017-04-23 19:19:27 +02:00
71b9dea6ec add keyword out for autograd function Concat to match torch.cat (#1336) 2017-04-23 15:36:24 +02:00
fa4f363b93 Instance norm (#1283)
* instance norm

* fix whitespaces

* whitespaces

* docs

* "C" letter was cyrillic in docs, fixed

* remove force_eval, fix non contiguous case
2017-04-23 14:49:15 +02:00
aab30d4ea2 Fix errors when no CUDA devices are available (#1334)
Fixes #1267

This fixes a number of issues when PyTorch was compiled with CUDA
support but run on a machine without any GPUs. Now, we treat all errors
from cudaGetDeviceCount() as if the machine has no devices.
2017-04-23 14:45:27 +02:00
2b56711c24 Indexing fix for fused GRU/LSTM kernels when all tensors are not contiguous. (#1325) 2017-04-22 04:22:32 -04:00
2fa3365f94 Merge commit '5224fc56b03b6468cb85ccf39034b8ab0d76d04e' 2017-04-22 01:14:34 -07:00
5224fc56b0 fix typo 2017-04-22 10:14:09 +02:00
4373580e6b Merge commit 'e80a3a7f7b8d0e179c1481e0744f08e9385b31f3' 2017-04-22 01:11:10 -07:00
d9406a8a1a Merge commit '10387a3f35573462e18219c321ff550757ce9b09' 2017-04-22 01:10:53 -07:00
e80a3a7f7b Indexing fix for fused GRU/LSTM kernels when all tensors are not contiguous. 2017-04-22 01:09:46 -07:00
5b83fe6781 add contiguous checks 2017-04-22 09:57:36 +02:00
24d92b5d9f Concatenate directly into shared memory when constructing batches (#1323)
This saves an extra memory copy, which speeds up data loading a bit
(5-10% with accimage).

As part of this change:

 * torch.cat accepts keyword argument out
 * sepcifiying out=None is treated like not specifying out
2017-04-22 03:40:30 -04:00
1375694853 Document torchvision members 2017-04-21 12:50:36 -07:00
be5e399d46 Add a simple README for torch/lib. (#1322)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-21 15:06:12 -04:00
10387a3f35 fix gradBias checks 2017-04-20 19:21:50 -04:00
a782a6231f Merge commit 'e788ea40de0f7ef393f1b602098a6775a95d8976' 2017-04-20 19:00:45 -04:00
e788ea40de fix typo in TH_APPLY for _dimOffset 2017-04-20 18:59:12 -04:00
81345306c8 Merge commit '8236d38e81396ac48697ac289c0476cff18a8e08' 2017-04-20 15:03:48 -07:00
f0a19e2617 Merge commit '331219c5506b26bf0906b7acdafb4823e07a924e' 2017-04-20 15:01:22 -07:00
8236d38e81 add cusparse link dependency 2017-04-20 14:31:30 -07:00
8adf8fe2ed create and expose handles for cusparse 2017-04-20 14:30:14 -07:00
d2472d1ab5 Disable cudnn dilated convolutions for kepler. (#1308) 2017-04-20 15:31:45 -04:00
331219c550 define abs for short too 2017-04-20 09:55:17 -07:00
5f65ee9ca0 Add more newContiguous calls and checks 2017-04-19 14:01:31 -07:00
a8e6610e3d Fix argument typo in pad_packed_sequence docstring (#1300) 2017-04-19 13:50:59 -04:00
9e8b4ef075 Include THCNumerics.cuh in THCAtomics.cuh. (#752) 2017-04-19 12:08:22 -04:00
a35f507532 Update functional.py (#1298) 2017-04-19 11:07:12 -04:00
6aa22beb86 Fix loss.py docs (#1296) 2017-04-19 11:03:15 -04:00
c7d83a16f6 Update README.md 2017-04-18 19:05:18 -04:00
934816c01c Change the default algo for cuDNN conv forward to PRECOMP_GEMM (#1290) 2017-04-18 19:01:47 -04:00
5a0510934f Merge commit 'fcf4deac7d215f134ea25cd3def8b564b58b033c' 2017-04-18 15:21:20 -07:00
fc19473501 Corrections in legacy modules. (#1286) 2017-04-18 17:13:53 -04:00
34546f022a Expose dilated convolutions.
Fixes #1225.

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-18 17:13:02 -04:00
ab77742f6e Add some missing documentation for arguments.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2017-04-18 17:13:02 -04:00
701e63107f speed improvements, fix tests 2017-04-18 12:46:54 -07:00
655c22569e CPU hspmm + more efficient reorder 2017-04-18 12:46:54 -07:00
cd3bbc9dfd more operations and optimizations (hspmm, reorder, ...) 2017-04-18 12:46:54 -07:00
1018b238ac make gradients contiguous in adagrad 2017-04-18 12:46:54 -07:00
e27bd4ce7a faster cadd 2017-04-18 12:46:54 -07:00
b2acc33c73 contiguousValues method 2017-04-18 12:46:54 -07:00
40804830b8 mark_contiguous operation 2017-04-18 12:46:54 -07:00
01d84c5f9d revert sparse cuda index type change 2017-04-18 12:46:54 -07:00
88b42324e7 spcadd, sparseMask, cadd, csub, cmul + tests 2017-04-18 12:46:54 -07:00
ec260fe8e9 add test for dsmm 2017-04-18 12:46:54 -07:00
328b416068 THCS contiguous + to_dense 2017-04-18 12:46:54 -07:00
4bde9efbd7 Update CONTRIBUTING.md 2017-04-18 15:39:58 -04:00
ff781ed059 Update CONTRIBUTING.md 2017-04-18 15:39:26 -04:00
8f9a1af253 Merge commit 'fcf4deac7d215f134ea25cd3def8b564b58b033c' 2017-04-18 12:22:44 -07:00
31900b6bae Merge commit '1feb120d938d47c01900f656322f16bc41d08af3' 2017-04-18 12:22:27 -07:00
46cf6ff5fb fix batchnorm docs (#1284) 2017-04-18 15:12:38 -04:00
fcf4deac7d Fused RNN kernel remove explicit instantiation, isn't needed. 2017-04-18 11:07:58 -07:00
1feb120d93 Mark input as optional for gradInput in Tanh and Sigmoid 2017-04-18 10:33:33 -07:00
2ca071d730 Remove double precision math from LogSigmoid too 2017-04-18 10:28:13 -07:00
8a901c510d Update ops for Sigmoid and Tanh 2017-04-18 09:55:11 -07:00
9150e33765 Add support for creating docsets. (#1276)
Docsets are an offline documentation format introduced by Dash.app and
supported by Zeal and some other open-source clones.
2017-04-17 16:35:02 -04:00
e4478804ce Fix patched_make_field for newer Sphinx versions. (#1275)
Not sure since which version that change is needed, but using v1.5.5 here.
2017-04-17 16:17:58 -04:00
a220f2c3aa Fix group-convolution w/o biases on CPU. (#1273)
* Fix group-convolution w/o biases on CPU.

Not having this guard will cause a crash further down in the `cat`
function when it uses the first element in the passed list to create a
new tensor. (And even after that, cat doesn't handle nulls well.)

* Added test for groupconv w/o bias on CPU.
2017-04-17 14:53:28 -04:00
15267ac009 fix typo 2017-04-15 13:08:58 -04:00
cb66e9cf78 torch.diag bug fix (#1251) 2017-04-12 20:59:12 -07:00
c852883086 add named_parameters that yield name and value of parameters (#1242) 2017-04-12 16:32:36 -07:00
ab77e4c3d7 Merge commit '62c584ba7972dbba404766aa06d1a558282b4169' 2017-04-12 15:06:58 -07:00
2444278b8b Merge commit '4336e9ea6641b8ac2814eaef2adef64e4106459c' 2017-04-12 15:06:10 -07:00
62c584ba79 Fix abs with char and short cuda types. (#747) 2017-04-12 15:04:59 -07:00
fbd53d87bf block wide reduction with multiple values to reduce at once (#745) 2017-04-12 15:04:43 -07:00
71303b8af4 Autograd deadlock for recent glibc fix (#1243) 2017-04-12 22:24:31 +02:00
4336e9ea66 Revert "make it compile on Windows + use ilp64 MKL" (#1002) 2017-04-12 12:07:16 -07:00
d48afd41f9 Add print string for MaxPool3d, change for MaxPool2d (#1115) 2017-04-12 15:58:28 +02:00
e21e4bf3e8 add pyyaml to conda note here as well 2017-04-11 21:21:18 -07:00
8e36339911 Merge commit '0925c91e80cc1b3a86fcbc54570f5bb204c9cb77' 2017-04-11 18:00:44 -07:00
5391fe8953 addr zeroes output buffer when beta=0 2017-04-11 18:00:11 -07:00
0925c91e80 addr zeroes output buffer when beta=0 2017-04-11 17:59:42 -07:00
253c854da5 update Dockerfile not to use requirements.txt 2017-04-11 15:42:05 -07:00
7c59754d24 update source build instructions 2017-04-11 15:24:31 -07:00
2bf7dc643f Merge commit 'aec658f8708a6f4448329da006d14ff2e13dc821' 2017-04-11 15:02:36 -07:00
ce30c76823 Merge commit '2b37ecfccf810a8e21c2c9ac9a943ce2f7c01015' 2017-04-11 15:02:16 -07:00
a8d60ad3ac fix THNN headers 2017-04-11 15:00:30 -07:00
aec658f870 fix THNN headers 2017-04-11 14:57:11 -07:00
2b37ecfccf fix THNN headers 2017-04-11 14:56:53 -07:00
01a35dcace Fix coalesced CUDA collectives for nonhomogeneous lists 2017-04-11 14:48:54 -07:00
afeeb81e79 Add support for keyword arguments in torch.cat 2017-04-11 14:48:54 -07:00
6002f94232 Fix is_tensor and is_storage for old-style classes 2017-04-11 14:48:54 -07:00
a5c7d98611 Import TripletMarginLoss 2017-04-11 14:48:54 -07:00
605b3c86ce Retain the type of numpy scalars in collate_fn 2017-04-11 14:48:54 -07:00
2087b1157a Improve serialization error messages 2017-04-11 14:48:54 -07:00
81e972031d Handle all errors if Module's sources can't be retrieved 2017-04-11 14:48:54 -07:00
e9ff57176b Fused pointwise kernels for GRU/LSTM 2017-04-11 13:42:06 -07:00
a739960515 Merge commit 'cfa504691c2ce5e10010ffb6cd43001c59109aea' 2017-04-11 13:41:54 -07:00
f43320dbf2 Merge commit '0dc52abe9a673547caf79ac64c73e8e16fb37b33' 2017-04-11 13:41:42 -07:00
cfa504691c Fused pointwise kernels for GRU/LSTM 2017-04-11 13:36:38 -07:00
0dc52abe9a Fused pointwise kernels for GRU/LSTM 2017-04-11 13:36:02 -07:00
0b50f794e9 Use thnn version of Tanh/Sigmoid instead of autograd. (#1234) 2017-04-11 12:49:57 -07:00
2abbb5133c Fixing function signatures: long -> ptrdiff_t (#1232) 2017-04-11 11:37:21 -07:00
ade105fb7c update README to install pyyaml from conda (#1231) 2017-04-11 10:23:45 -07:00
4e693d12ab Merge commit '79c4cb96b16dac603247ffd88c473e84565915a9' 2017-04-10 14:35:54 -07:00
79c4cb96b1 fix memory leak in btrisolve and getri 2017-04-10 14:35:07 -07:00
f6fef3718e fix typo in autograd.rst (#1219) 2017-04-10 01:16:59 -04:00
bc0ed9298d remove incorrect version in readme 2017-04-09 14:44:44 -04:00
040cf42643 Merge pull request #455 from twitter-forks/indexlinear
Adding Indexlinear
2017-04-09 13:52:56 -04:00
6d9ad1d66a Adding IndexLinear (#1181)
* Add IndexLinear

* Fixes to IndexLinear

- Fix IndexLinear test
- make it better for multithreaded case
- fix a glitch in the C code
- improve the reset() method
- fix the weight allocation.
- remove "fakeBatch" possibility as it's not used
- clamp normalized values at evaluation time instead of just dividing by max.
- add assert on the keys/values dimensions in IndexLinear.
- invert order of weightDecay in the case of output dim > 1.

* Changes required to support IndexLinear in CUDA

* Adding support for flattened inputs for IndexLinear

* Doc for IndexLinear + fix for when the input format changes from one batch to another.

* Cleaning up IndexLinear documentation

* Changes required to build with latest torch

* Adding benchmark script for IndexLinear

* Bugfixes and cleanup of IndexLinear.lua

- Fixed bug that occurs when performing multiple accGradParams +
  updateParams

- All the data required for the updates is put in a single table

- Added :pararameters method
2017-04-09 13:51:45 -04:00
64ee4056d7 updated docker image inside the docs (#1216) 2017-04-08 10:29:03 -04:00
55d69b5ade Merge commit '88bcfc15316e3c878237a8f95aeb6e72402c90ff' 2017-04-07 17:20:52 -07:00
0d7d6e1f0d Merge commit '662163bef68a9d64f3cb13a903638c870c0b4aa6' 2017-04-07 17:20:15 -07:00
b16a352a3b Fix remainder and cremainder for integer types 2017-04-07 17:17:44 -07:00
88bcfc1531 Fix remainder and cremainder for integer types 2017-04-07 17:16:59 -07:00
662163bef6 Fix remainder and cremainder for integer types 2017-04-07 17:16:31 -07:00
4026593240 check for beta=0 and avoid multiply in sparse mm (#1211)
* check for beta=0 and avoid multiply in sparse mm
2017-04-07 20:14:32 -04:00
a931064a52 Merge commit '441d75ce569f89bad3e2f1f2a2075e68ae3bc76b' 2017-04-07 16:57:05 -07:00
441d75ce56 Adapts basic operations to new THXVector interface 2017-04-07 16:56:12 -07:00
3de56785fa fix conv1d test and add for padding 2017-04-07 13:56:02 -07:00
5ee8536a02 Merge commit 'a89317a9d407241c97fe4486b3c88de8578445d7' 2017-04-07 13:49:18 -07:00
f00a5d2f54 Merge commit '66a20e5c328836c1eb720cf4e2eb916366aae487' 2017-04-07 13:47:25 -07:00
a89317a9d4 fix types in unfold.c 2017-04-07 13:32:04 -07:00
e48db02e10 remove unused python-level BatchNorm.py 2017-04-07 16:27:16 -04:00
7f2553bc6f dont use cudnn batchnorm for cudnn < 5.1.10 2017-04-07 16:27:16 -04:00
66a20e5c32 Support TORCH_NVCC_FLAGS environment variable
This is already supported in cutorch since august 2016, and is used in
pytorch integration (to reduce the binary size).
2017-04-07 18:23:22 +02:00
37d95687c4 Merge commit 'ae1c365dbdbf667ae24c57eec9f2e6b9debf16bd' 2017-04-06 16:37:31 -07:00
f0c7124420 Allow support for negative dimension argument for all functions 2017-04-06 16:37:00 -07:00
ae1c365dbd Add TH_INDEX_BASE to nDimension and stride functions 2017-04-06 16:30:11 -07:00
e7f5220dfa device_ids can be None again in data_parallel (#1187) 2017-04-06 10:30:53 -04:00
a7ae04a657 fix precedence problem when building with debug python (#1201) 2017-04-06 10:30:16 -04:00
7f03182bfa sizeAverage -> size_average in docs 2017-04-06 01:31:02 -04:00
9f2a5d804d Add a flag to fix when dataset size is not divisible by batch size. (#1133) 2017-04-06 00:18:43 -04:00
aa506fa4d7 fix docs typo 2017-04-05 23:42:02 -04:00
d82cad3019 implement nn.Module.__dir__ (#1142) 2017-04-05 22:18:34 -04:00
9504246c32 add triplet margin loss (#1165) 2017-04-05 22:17:58 -04:00
81cf3dbf79 Merge commit '6bd4ecd15390517c68d598d236ffb0929ade277c' 2017-04-05 19:07:01 -07:00
12f1b4f76c Merge commit '84bdbe5ab4b602b021ff494487c8ad57457052d3' 2017-04-05 19:06:14 -07:00
84bdbe5ab4 btrisolve: Add sz checks, correct B's ordering, support nrhs>1. 2017-04-05 19:05:20 -07:00
85954032d9 fix doc formatting 2017-04-05 22:02:29 -04:00
1a04b92226 add note regarding SGD momentum 2017-04-05 20:45:41 -04:00
6bd4ecd153 Use thrust::inclusive_scan for 1D cumsum/cumprod (#742)
For large 1D tensors thrust::inclusive_scan is much faster than our
current implementation.
2017-04-04 21:05:10 -04:00
04f5b5ea83 Merge commit '5b40e4245d573ae0a6c2da70a0b712528aab2bce' 2017-04-04 15:39:35 -07:00
5b40e4245d Fix typo and make btrisolve work for doubles on the CPU. 2017-04-04 18:29:30 -04:00
d122b4e4ec Update btrisolve docs to the newest interface. 2017-04-04 15:21:16 -04:00
ccfc4567dc Merge pull request #78 from ilya-biryukov/master
Fix compilation error when compiling with 'clang -x cuda'.
2017-04-04 09:47:52 -07:00
8ce1382e99 make it compile on Windows + use ilp64 MKL (#981) 2017-04-03 18:02:15 -04:00
91c4ba7980 Add torch.arange and deprecate torch.range 2017-04-03 10:38:58 -04:00
03f1cab801 Unify argument names in norm and renorm 2017-04-03 10:38:58 -04:00
fa2c566353 Add Variable.type_as 2017-04-03 10:38:58 -04:00
2d1122739c Raise AttributeError in Module.__getattr__ 2017-04-03 10:38:58 -04:00
7861f585fe Reshape grad in dot 2017-04-03 10:38:58 -04:00
3abf2ef225 Merge pull request #991 from BTNC/win
add /arch:AVX /arch:AVX2 explicitly for msvc so it compiles on windows
2017-04-02 13:32:57 -04:00
70c4b82eba add /arch:AVX /arch:AVX2 explicitly for msvc 2017-04-02 20:47:29 +08:00
274b5c9003 Allow unhashable inputs to parallel_apply 2017-04-01 20:11:20 +02:00
dfa2d26830 * make random_ range correct when both lower and upper are specified 2017-03-31 15:37:24 -04:00
559ae078b8 Fix Option constructor in invalid argument error printing code (#1160) 2017-03-31 15:35:35 -04:00
030ff4928a Merge commit 'a216e377b3844ac9c7882bd391a00f4e0ae718e7' 2017-03-31 11:45:37 -07:00
0829bffdec Merge commit '403cad46dc91a2bc2f6889754055decd6f3d53c7' 2017-03-31 11:45:24 -07:00
ffc7911bec Merge commit 'd8ae7893e056ebf4e7a5e96bab2c3b69f196ddfd' 2017-03-31 11:45:06 -07:00
ff1fde6151 Merge commit 'a3bfb9f376a57fb63e89ddf70f57353f19ed9d69' 2017-03-31 11:44:48 -07:00
a216e377b3 Merge pull request #456 from twitter-forks/addmm-fixes
Using temporary variables when performing transpose + addmm
2017-03-31 14:44:07 -04:00
b13b7010b9 check for nvidia driver's sufficiency before checking for number of CUDA devices (#1156) 2017-03-31 12:19:59 -04:00
a3bfb9f376 THVector_(add),(mul) -> (adds),(mul) for VSX.
This was previously completed for other architectures.
2017-03-31 08:50:23 -07:00
5c79046d39 Use persistent tensor to store exp_inf (part of optimizer's state) (#1152) 2017-03-31 10:30:31 -04:00
30fd222b80 implement autograd function cross (#1138) 2017-03-31 01:45:51 -04:00
761eef1f19 Minor typo fix in backward function in torch/autograd/variable.py (#1143) 2017-03-30 11:23:28 -04:00
d8ae7893e0 Get rid of warp-synchronous code (#739)
Time to get rid of warp-synchronous code. It will break!
2017-03-30 01:20:43 -04:00
a95ce9e98f Using temporary variables when performing transpose + addmm 2017-03-29 16:56:39 -07:00
403cad46dc Using temporary variables when performing transpose + addmm 2017-03-29 16:14:13 -07:00
8aa1cefed8 Fix deadlock in autograd (#1140) 2017-03-29 16:19:40 -04:00
0d908d813b Implements Cumsum function for autograd (#1122) 2017-03-29 17:45:57 +02:00
1c391f6f93 bump version 2017-03-29 10:08:34 -04:00
be146fd721 Add btriunpack and update the btrifact test. 2017-03-29 13:42:13 +02:00
2979f4b989 add more functions to docs 2017-03-29 01:29:17 -04:00
22b3600f19 add samplers to documentation 2017-03-29 00:33:07 -04:00
215813d7ac Change dockerfile to support for cudnn v6 (#1135) 2017-03-28 20:05:04 -04:00
dc7695a47a Update links for tutorials in README (#1123) 2017-03-28 14:21:40 +02:00
032a65edff modify pip uninstall command in CONTRIBUTING.md 2017-03-28 14:20:49 +02:00
e4b4e515cd add mode to cwrap 2017-03-27 13:29:14 -07:00
4b1f5f4bd6 Merge commit 'afd576ec0e389db3e47efe44652c488b1706f168' 2017-03-27 13:26:50 -07:00
afd576ec0e Add mode kernel 2017-03-27 15:58:47 -04:00
95aa2af377 btrisolve: Make a Tensor method and update argument order
Also update docs for btrifact and btrisolve to the newest interface.
2017-03-27 15:46:49 -04:00
6774d39c96 Merge commit '5d274cd4991022d63b014cc8917e00c15441d3f4' 2017-03-27 11:54:08 -07:00
567faedc59 Merge commit '8051dec608368fed3569c7513292785083adc53c' 2017-03-27 11:53:41 -07:00
3eab8a71e2 Added docstring to add_module (#1116) 2017-03-27 11:09:24 -04:00
2fd4d088ff add Adaptive pooling methods to docs 2017-03-26 22:43:46 -04:00
5d274cd499 Update btrisolve argument order. 2017-03-26 13:07:24 -04:00
8051dec608 Update btrisolve argument order. 2017-03-26 13:06:34 -04:00
f2c1071c33 Adaptive max and average pooling (1D & 2D) (#1084) 2017-03-26 17:09:28 +02:00
bb71117ecc Cwrap arg assign (#1102) 2017-03-26 13:53:28 +02:00
d25433a099 Fix docker build commands (#1103) 2017-03-25 16:18:33 -04:00
7dd45490f8 don't use inplace backward, remove unnecessary zero for grad_input (#1079) 2017-03-25 20:04:48 +01:00
bf632544e6 Pass NULL rinfo_ to btrifact by default (#1089) 2017-03-24 19:49:40 -04:00
282402d4f3 Revert "Add back zero fill for ger" (#1093)
This reverts commit 5a761dbe65d2221e9c200b3f8ea0590b5d9b923f.
2017-03-24 19:49:31 -04:00
1461709ea0 Improving the performance of IndexLinear:updateOutput
- Removes separate kernel for updateOutputTrain
2017-03-24 16:34:31 -07:00
cce03074f5 Merge commit '3acbbb30f2bdc6ccf4ffb6f7d568e7916d4e384d' 2017-03-24 16:19:44 -07:00
f2f63773d8 Merge commit '52911f9e47f679045a238eb9dfdc5db55bf98cc9' 2017-03-24 16:19:19 -07:00
84aa41824c Merge commit 'b4fe5ad641181f30bdcc4749c949206a3ebb04b4' 2017-03-24 16:19:05 -07:00
25c8a117af Merge commit 'e8196f990db4ba368010f0d950bebf1fb13c2888' 2017-03-24 16:18:52 -07:00
ae122707b5 Don't do extra resize in linear bias 2017-03-24 23:41:15 +01:00
b4fe5ad641 Use zero instead of mul when beta == 0 in addr 2017-03-24 13:09:00 -07:00
5a761dbe65 Add back zero fill for ger
Ger does not have beta argument, so has to be zero-filled.
2017-03-24 21:03:02 +01:00
dd893391d5 Add argument to children to yield the name of the modules (#941) 2017-03-24 20:02:05 +01:00
649f04d077 Added Pascal nvcc flags, bumped version 2017-03-24 11:58:14 -07:00
e8196f990d Make rinfo_ argument optional in btrifact 2017-03-24 09:01:36 -07:00
269b77a1b2 Make rinfo_ optional in btrifact 2017-03-24 09:00:39 -07:00
476d85dd3f DataLoader: Fix batch data type for numpy array (#1074) 2017-03-24 11:34:24 -04:00
63f6c0d692 add Pairwise distance (#835) 2017-03-24 11:29:40 -04:00
b546fa3fcd add assertTrue to padding tests 2017-03-24 15:27:51 +01:00
1d656b6769 Ensure displayed progress in ProgressMonitor is between 0 and 100%.
Fixes #1086
2017-03-24 15:21:52 +01:00
3acbbb30f2 Fix inconsistent in-place and out-of-place for HardTanh
in-place and out-of-place updateGradOutput results are different where input=min_val or input=max_val
2017-03-23 17:27:29 -07:00
52911f9e47 Fix inconsistent in-place and out-of-place implementations
Currently in-place and out-of-place updateGradOutput will produce different results for input=max_val or input=min_val - in-place won't backprop gradient where input=max_val or input=min_val, out-of-place will backprop gradient in this case.
2017-03-23 17:22:55 -07:00
a65e0f488c Remove zero fill where not needed (#1077) 2017-03-23 19:44:00 -04:00
8dc5d2a22e export current_blas_handle 2017-03-23 23:32:45 +01:00
ed97f3f854 Adding support for flattened inputs for IndexLinear
- Adding relevant tests
2017-03-23 14:18:41 -07:00
a231fe8fc5 IndexLinear support for cunn 2017-03-23 14:18:01 -07:00
bb353ccc17 Add batch triangular factorization and solves, add IntegerTensor to cwrap (#903) 2017-03-23 15:06:00 -04:00
ced0054a9e Fix formula for stddevs grad in Normal function (#1076) 2017-03-23 14:32:34 -04:00
68ee5ede29 make inplace tests compare input grads 2017-03-23 18:54:00 +01:00
4df98e2927 Merge commit '3865606299b1fbcd0a94cef4a66c1bc007246da8' 2017-03-23 08:39:43 -07:00
6ccac5ce28 Merge commit 'd3334db6274d7a3cd07f20d583056e453dc8134d' 2017-03-23 08:39:30 -07:00
3865606299 adding batch triangular factorization and solves, add IntegerTensor to cwrap 2017-03-23 11:37:00 -04:00
d3334db627 adding batch triangular factorization and solves, add IntegerTensor to cwrap 2017-03-23 11:35:35 -04:00
50f5a4dd18 fix BCE loss formula visualization (#1072) 2017-03-23 11:27:21 -04:00
b60936b9ae fix NLLLoss2d documentation 2017-03-23 10:06:40 -04:00
2d750b9da5 fix typo 2017-03-23 09:40:06 -04:00
ca376d4584 implement autograd function trace 2017-03-23 10:37:52 +01:00
ef183a1d23 Merge commit '5cd313ed23a3b11ddd739bcfedaee6e310e4e438' 2017-03-22 19:25:46 -07:00
f4d8944973 fix OSX fread bug (#1068) 2017-03-22 22:06:14 -04:00
6b7aef63ac Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 20:36:52 -04:00
b3ab4b1094 Check torch.backends.cudnn.enabled, padding, and output_padding (#996)
* Check torch.backends.cudnn.enabled
* Don't allow negative padding and output_padding values
2017-03-22 19:42:11 -04:00
1e8cb82a2d Break only after the update in L-BFGS 2017-03-22 18:58:42 -04:00
dd399a8d68 Return total param norm from clip_grad_norm 2017-03-22 18:58:42 -04:00
faac0f5c25 Fix torch.cat bugs
Always use PySequence API and disallow catting along inexistent
dimensions.
2017-03-22 18:58:42 -04:00
c36f47bd1e Make random_ exclusive and make generator kwarg only in all random
functions
2017-03-22 18:58:42 -04:00
3d1888cd95 Fix size mismatch in CosineEmbeddingLoss backward 2017-03-22 18:58:42 -04:00
97a82a3018 fix formatting in upsampling docs (#1067) 2017-03-22 18:06:31 -04:00
5cd313ed23 Fix TH_TENSOR_APPLYX_D in the case where the dimension of interest is the inner dimension 2017-03-22 13:15:01 -07:00
b414494035 Merge commit '714b2b8bf657afe41cc8503998b6d919339b8075' 2017-03-22 12:49:29 -07:00
c10efc646e Merge commit 'e17d84d38edf6094175deead555abbc96321b69f' 2017-03-22 12:49:11 -07:00
348531ad8d Merge commit '0056b0883426e38ffbd646c040b6c281d12673f2' 2017-03-22 12:48:57 -07:00
714b2b8bf6 Merge pull request #453 from apaszke/lookup_renorm
Cast accumulator in LookupTable renorm to accreal
2017-03-22 11:53:41 -04:00
fe4bd5066b Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 11:45:02 -04:00
e17d84d38e Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 11:44:28 -04:00
b9aef6bc03 Fixing default values for LR and Epsilon (#895)
It seems that the default values for LR and Epsilon (previously, 1E-2 and 1E-38 respectively) were different from the ones recommended by the authors (2E-3 and 1E-8, respectively). Other packages such as Keras (https://github.com/fchollet/keras/blob/master/keras/optimizers.py#L474) and Lasagne (https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py#L612) use the suggested values as well.
2017-03-22 11:34:39 -04:00
0056b08834 Narrow V when returning only some right singular vectors 2017-03-22 08:33:03 -07:00
bd0df61bb5 Cast accumulator in LookupTable renorm to accreal 2017-03-22 08:29:39 -07:00
d9678c2e34 Correct typo in batchnorm documentation 2017-03-22 13:55:45 +01:00
b3c0aa3b7d fix a typo in ffi doc (#1055) 2017-03-21 15:37:48 -05:00
77fbc12f23 Fix some deadlocks when torch_shm_manager is not found (#1030)
- Add additional timeouts to test_multiprocessing to reduce chances of
   hanging indefintely on failure
 - Add missing header guards
 - Fix typo
 - Check that torch_shm_manager exists in torch/__init__.py
2017-03-17 18:28:39 -04:00
7e46eb1613 Fixes for Prod and Expand functions (#1026)
Thanks to @ChangYong-Oh for the original implementation.
2017-03-17 18:24:44 -04:00
821656d2d8 add CONTRIBUTING document 2017-03-17 07:59:37 -04:00
86e40ed875 Fix a typo in docs about pinned memory buffers (#1023)
* remove misleading guide for BCELoss

* fix docs about pinned memory buffers
2017-03-17 05:08:03 -04:00
b9379cfab7 Use cuDNN and NCCL symbols from _C library (#1017)
This ensures that we use the same library at the C++ level and with
Python ctypes. It moves the searching for the correct library from
run-time to compile-time.
2017-03-16 16:10:17 -04:00
f0b75c4aa4 Merge pull request #729 from shenxiul/cuda_linspace
linspace and logspace for CUDA Tensors
2017-03-16 14:03:00 -04:00
7654b3f49e Add function to compute cross_entropy for 2D image (#802) 2017-03-16 17:34:04 +01:00
37ebbc2809 the length of any item in padded_sequence should be greater than 0 (#1013) 2017-03-16 17:32:43 +01:00
8241cd7b6e Fix compilation error when compiling with 'clang -x cuda'.
Functions vFetch and vStore are not found by ADL with clang,
so they need to be declared before usage in ReduceCopy.
2017-03-16 12:01:11 +01:00
29ddbc3e37 implement linspace, logspace and range in CUDA 2017-03-15 20:50:30 -07:00
16a133ed9a Fixes for testing on FB infra (#1009)
- make each test in test_autograd have a unique name ignoring case
 - assemble all tests when test_legacy_nn is imported
 - import Python.h in PtrWrapper.h
2017-03-15 18:37:11 -04:00
c4d1318662 Fix map_location in torch.load (#1006) 2017-03-15 16:54:19 -04:00
379ae6d865 Refactor out dispatchStateless (#1007)
Some of the error messages were incorrect due to erroneous
'tensor == THPDefaultTensorClass' checks
2017-03-15 16:24:55 -04:00
24376ff9d3 Merge pull request #723 from killeent/scan-primitive
add implementation of inclusive scan via upsweep-downsweep
2017-03-15 14:37:21 -04:00
be6322e4b5 Update nn.init docstrings to correctly reference the module (#1001) 2017-03-15 11:17:59 -04:00
62063b2f62 Fix docs for pointwise ops (#845) (#985)
* add torch.nn.init docs to the source folder
2017-03-15 11:08:05 -04:00
13b1580613 add F.pad to docs 2017-03-15 00:09:14 -04:00
e50a1f19b3 Use streams in scatter to overlap copy with compute 2017-03-14 22:46:07 +01:00
e86db387ba Fix conv1d backward segfault (#999) 2017-03-14 16:15:53 -04:00
704ee3ca68 Use cudart symbols from the main program.
Our extension library links against cudart and pulls in the symbols. Use
LoadLibrary(None) to use the same symbols as the _C extension.

This fixes the PyTorch wheel when you don't have system CUDA installed.
2017-03-13 19:45:34 -04:00
9004652c7b updated the documentation to remove the unnecessary copy grads when using multiprocessing 2017-03-13 19:04:17 -04:00
aca6ce984c change lookup table sort 2017-03-13 13:55:16 -07:00
ed8773f7bd add legacy_serialized.pt to gitignore 2017-03-13 16:37:35 -04:00
48f48b6ff2 fix more flaky VolumetricMaxPooling tests 2017-03-13 14:38:27 -04:00
615b27eadf fix corner case in SetItem of Variable 2017-03-13 14:38:27 -04:00
170d790b66 fix doc of conv3d in conv.py (#989)
the second dimension should be height.
2017-03-13 11:30:13 -04:00
e216f557fd Fixes issue returning strings from a Dataloader with pin_memory=True (#908) 2017-03-13 10:11:07 +01:00
997312c233 Add WeightedRandomSampler (#980)
Samples elements from `[0,..,len(weights)-1]` with given probabilities (weights). So far there is no mean to either introduce sample weights in loss functions or while sampling from a dataset. This is an attempt to add the functionality for the latter issue.
2017-03-13 00:27:05 -04:00
d602b3a834 Allow submodules and parameters to shadow attrs on assignment 2017-03-12 13:31:32 -04:00
f531d98341 Fix memory leak in torch.from_numpy 2017-03-12 13:31:32 -04:00
6bdd5ecaf5 Remove some unnecessary AutoGPU calls 2017-03-12 13:31:32 -04:00
bfbde9d6eb Fix Embedding bug when max_norm was used 2017-03-12 13:31:32 -04:00
b9c816a796 Fix run_test.sh --coverage option. (#983) 2017-03-11 19:26:02 -05:00
2f5c215d34 Update setup.py (#981)
Adding `description` to `setup.py`
2017-03-11 12:14:07 -05:00
01650ac9de add torch.nn.init docs to the source folder (#979) 2017-03-11 10:11:30 -05:00
ce536aa355 fix example in docs for NLLLoss 2017-03-10 16:48:08 -05:00
fc0af33a18 key only block-wide bitonic sort 2017-03-10 11:50:43 -08:00
c7c4778af6 modify docs of broadcast to fix issuse #940 (#970) 2017-03-10 09:54:43 -05:00
73a65cd29f simple ordering fix to avoid gcc warning 2017-03-09 17:10:59 -08:00
b785ed0ac0 Fix Embedding and CosineEmbeddingLoss on non-float CUDA (#965) 2017-03-09 18:04:40 -05:00
b2d077d81d Update _tensor_docs.py (#966) 2017-03-09 18:04:19 -05:00
b1c2714ad5 Add momentum and centered options to RMSProp (#810)
* add momentum and centered options

Add two options :
 - Momentum (like SGD's momentum)
- Centered RMSprop, as in Graves 2013 ( https://arxiv.org/abs/1308.0850 ) : grad is normalized by running estimation of its variance

* somme PEP8

* bug in default

* bug2

* sign mistake

* alloc of momentum & centered only if needed

* add link to docstring

* some pep8 on docstring

* implement __setstate__() for backward compatibilty

* correct grammar mistake

* multiply by lr when adding delta to params

* rename momentum variables

* change __init__ params order
2017-03-09 10:04:32 +01:00
a462edd0f6 Docs(RNN|GRU|LSTM): Note dropout applies to all layers *except* the last layer (#961)
This is an important clarification to make as otherwise users are misled as to where they may need to add dropout and to clarify the situation would need to delve into the backend implementation. 
4647f753bc/torch/nn/_functions/rnn.py (L73)
2017-03-08 18:09:11 -05:00
c2425fc9a1 Fix build warning for C file 2017-03-08 21:28:57 +01:00
fbcedf2da2 Merge commit '3d95e13b332e1b31d706b59c3b67f886958ece79' 2017-03-08 09:09:46 -08:00
3d95e13b33 Check event_count before merging blocks 2017-03-08 08:49:04 -08:00
228e1a8696 Add CUDA caching allocator accessor 2017-03-08 08:29:50 -08:00
3fa8a3ff46 add implementation of inclusive scan via upsweep-downsweep 2017-03-08 07:34:14 -08:00
4647f753bc Merge commit '0f872ed02fbaf5b326f235b3f18724171b061416' 2017-03-07 14:45:01 -08:00
7ba5e7cea1 fix VolumetricMaxPooling test instability (#952) 2017-03-07 10:55:46 -05:00
9b626a8047 Fix documentation - replace 'matrix' with 'vector' (#951) 2017-03-07 10:40:18 -05:00
bd0e9a73c7 Fix some simple build error on MacOS (#949)
Issue #948

Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-03-07 09:47:49 -05:00
2b1cd919ce Update extending.rst (#933) 2017-03-06 23:23:14 -05:00
8e46a15605 add docs for set_printoptions to sphinx (#945) 2017-03-06 21:52:37 -05:00
15a9fbdedb Merge pull request #881 from colesbury/parallelize_backwards
Parallelize autograd backwards
2017-03-06 16:57:19 -05:00
6336300880 Fix bug where adding a hook could replace an existing hook.
We were keying hooks by RemovableHandle id. However, we don't hold onto
handles and ids of dead objects can be reused. This replaces id(handle)
with a global counter.
2017-03-06 12:47:53 -08:00
5073132837 Implement 'pre' and 'post' hooks at the C++ autograd level 2017-03-06 12:47:53 -08:00
65b66264d4 Improve broadcast/reduce performance by coalescing tensors 2017-03-06 12:47:53 -08:00
0f872ed02f Add THCCachingAllocator_recordStream()
This is similar to THCCachingHostAllocator_recordEvent() but on CUDA
allocations. It's useful for overlapping copies with computation. The
workflow is approximately:

  0. allocate dst tensor on copy stream
  1. copy from CPU to GPU on copy stream
  2. synchronize the main stream with the copy stream via
     cudaStreamWaitEvent
  3. THCCachingAllocator_recordStream(dst, main_stream)

The recordStream() call is necessary to prevent the dst tensor from
begin reused on the copy stream before the main stream finishes work.

Previously, you would need to insert a second cudaStreamWaitEvent before
dst is freed to force the copy stream to wait on the main stream.
2017-03-06 10:50:19 -08:00
761d6799be code syntax error in document (serialization.rst) (#937) 2017-03-06 10:06:04 -05:00
0d179aa8db Updated datasets.rst, combined all commits (#931)
Added MNIST in the docs

Updated incomplete cifar doc

Updated the datasets.rst to include all datasets
2017-03-05 17:38:28 -05:00
5b171ad7c2 remove misleading guide for BCELoss (#924) 2017-03-05 14:31:01 -05:00
ac9245aeb3 import numpy before setting dlopen flags (#928) 2017-03-05 14:30:13 -05:00
60736bdf99 fix corner case in kwargs for DataParallel (#930) 2017-03-05 14:27:52 -05:00
7d58765cee docs: Fixed example code bug in extending module doc. 2017-03-05 12:09:08 -05:00
76f7d749e4 bump version 2017-03-05 08:49:52 -08:00
0b7374eb44 add THCS to build_all flags 2017-03-05 11:32:43 -05:00
6fff764155 replace old select_compute_arch.cmake with new 2017-03-05 11:32:43 -05:00
8ced72ccb8 link THPP to THCS when CUDA available 2017-03-05 11:32:43 -05:00
b1ae7f90d5 Added functionality for data parallel table (#843) 2017-03-05 02:35:46 +01:00
8b61ee522e Merge commit 'aec182ae72d51dad0f46cdfe7ff9a41380d7da35' 2017-03-04 08:58:21 -08:00
76ca3eb191 Merge commit 'fea50a51ee2d9af15c42f785ab2232469357b557' 2017-03-04 08:58:02 -08:00
fea50a51ee reintroduce USE_AVX* for files which dont have -mavx* set 2017-03-04 08:55:43 -08:00
51e589ed73 fix critical bug in adds SSE implementation 2017-03-04 08:39:19 -08:00
2e87643761 remove fastmath for everything except simd/convolve 2017-03-04 08:16:47 -08:00
ba9a85f271 fix bug introduced in #952 2017-03-03 21:00:05 -08:00
0714d7a3ca set AVX/AVX2 flags only for specific files 2017-03-03 12:17:14 -08:00
34ce58c909 Parallelize backwards 2017-03-03 11:26:00 -08:00
c238ee3681 Fix issues with lazy grad initialization (#912) 2017-03-03 14:23:51 -05:00
f5338a1fb8 compile AVX and AVX2 intrinsic code in separate files. Cleanup use of USE_AVX and USE_AVX2 macros in favor of __AVX__ and __AVX2__ 2017-03-03 10:30:18 -08:00
d96ad41191 cleanup TH CMakeLists and THGeneral.h of unused flags 2017-03-03 09:48:26 -08:00
f17cfe4293 sparse tensor operations (#735) 2017-03-03 18:37:03 +01:00
aec182ae72 Support half precision in baddbmm 2017-03-03 16:15:39 +01:00
c93c884ee2 Add negative dimension to transpose and tests (#792) 2017-03-03 09:31:22 -05:00
c42a2d4d24 Fix dimension check for cat (#959)
* Use TH_INDEX_BASE when verifying dimension for cat

* Adding tests for cat when no dimension is specified.

- Also renamed ldimension to cat_dimension to be more specific.
2017-03-03 09:05:06 -05:00
f89252c336 Merge pull request #719 from twitter-forks/cat-fix
Fixes to cat
2017-03-03 09:04:06 -05:00
490c15fae9 Fix slicing with step (#905) 2017-03-03 09:00:14 -05:00
f2d72ba10f Revert "make handles to be thread-local"
This reverts commit 0720ba53b344809ce3d0bdfb1ea561afa5fe0646.
2017-03-02 17:48:24 -08:00
2108b42b92 Fix bug in cat when dimension is not specified.
- Code was using dimension specified which was negative
- Changed the cat_dimension variable to be more explicit
- Fixed code to use the cat_dimension variable
2017-03-02 16:14:09 -08:00
bae8df62d3 Add missing THCudaCheck around cudaMemcpy 2017-03-02 16:13:39 -08:00
98775b6bb4 Merge pull request #718 from killeent/templatize-scan
genericize PrefixSum --> PrefixScan via binary operator template parameter
2017-03-02 17:50:56 -05:00
b7cc2a501f genericize PrefixSum --> prefixScan 2017-03-02 14:31:27 -08:00
0720ba53b3 make handles to be thread-local 2017-03-02 11:10:49 -08:00
ff5fa11129 make mkl link to threaded version with GCC (#958) 2017-03-02 13:37:25 -05:00
5e7f5db332 add subset samplers (#888) 2017-03-02 09:26:10 -05:00
b5f7592140 boolean mode in module.train 2017-03-02 09:18:05 -05:00
f366e5fc81 Support int16 numpy conversions
issue #891
2017-03-02 09:15:57 -05:00
48f087f6ce C99 cleanup broke MSVC (#952)
* __pragma for MSVC.
2017-03-02 08:57:28 -05:00
7fef264bfa Bumping version to 1.3.3 2017-03-01 16:44:27 -08:00
8996811936 Only enable peer access for ring neighbors.
This enables support for systems with more than 9 GPUs attached to a single PCIe root complex.
2017-03-01 16:42:38 -08:00
c219a183d0 Fix copy/paste typo in error message 2017-03-01 16:42:38 -08:00
8e1d6f9b60 Fix crash in Reduce when non-root ranks have invalid recvbuff 2017-03-01 16:42:38 -08:00
7ad948ffa9 fix tests to not sys.exit(), also fix fatal error on THC initialization 2017-03-01 17:37:04 -05:00
3277d83648 Add Nesterov Momentum (#887) 2017-03-01 20:49:59 +01:00
1487278fdf Allow backprop through cuDNN RNN in eval mode
Handling of dropout descriptors has been improved too.
2017-03-01 19:42:39 +01:00
977630bc15 Handle duplicate backward roots in autograd 2017-03-01 19:42:39 +01:00
12efd53dba ConstantPad2d and F.pad (#856) 2017-03-01 19:39:44 +01:00
37e05485d9 added initialization schemes in torch.nn.init (#833) 2017-03-01 19:34:13 +01:00
c76770f40e Merge commit 'dfca8dfdc5988813ed5673589ffa4fdd1c4f3d2d' 2017-03-01 09:29:51 -08:00
da725830c2 Add support for variable length sequences in RNNs (#873) 2017-03-01 17:36:32 +01:00
fc6fcf23f7 Lock the cudaFree mutex. (#880)
Prevents NCCL calls from overlapping with cudaFree() which can lead to
deadlocks.
2017-03-01 11:29:25 -05:00
b190f1b5bc Add another pinned memory test.
Checks that pinned memory freed on a different GPU from which it was
allocated isn't re-used too soon.
2017-03-01 12:22:31 +01:00
dfca8dfdc5 ensure valid index in multinomial 2017-02-28 14:48:48 -08:00
b46d5e0b04 Fix NN bindings 2017-02-28 14:35:38 -08:00
f19a11a306 Merge commit '8e8022b7351401911e10b94aeb5ae35d32907705' 2017-02-28 14:35:20 -08:00
cfcf69703f Merge commit '80429ad9f7c4775f7f88344a2cf037e499f060b8' 2017-02-28 14:35:00 -08:00
e22b8e0d17 Merge commit '3cc89afde68a831434f3abe9e3af2ac0b134215e' 2017-02-28 14:34:44 -08:00
fbfba6bdca Merge commit '6ff77503645da59eeca5be473a1902e523c4adb3' 2017-02-28 14:34:29 -08:00
3cc89afde6 Merge pull request #713 from killeent/multinomial-indexing-fix
fix indexing bug in sampleMultinomialOnce
2017-02-28 17:13:44 -05:00
1e4aee057c Merge pull request #712 from killeent/multinomial-fixes
Fix sampleMultinomialOnce to better handle large distribution values
2017-02-28 17:12:48 -05:00
8dfcf7e35a Merge pull request #709 from colesbury/pinned_memory
Fix bug where pinned memory event could be recorded on incorrect device
2017-02-28 16:56:21 -05:00
76de151ddd Fix bug where pinned memory event could be recorded on incorrect device 2017-02-28 13:48:56 -08:00
2676cc46c2 fix indexing bug in sampleMultinomialOnce 2017-02-28 13:40:15 -08:00
1bf7bc9768 refactor sampleMultinomialOnce to use <real, accreal>, assertion for sum overflow 2017-02-28 12:46:12 -08:00
3c41c9fe46 Add AutoGPU RAII that doesn't depend on Python API (#875)
Separates out non-Python part of AutoGPU. This also compiles without
CUDA which is useful for generic tensor code.

Also fixes a bug where THCPAutoGPU may not always switch the device:

  THCPAutoGPU guard(-1);
  guard.setDevice(0);
  guard.setDevice(1);
  guard.setDevice(0);  // would not switch batch to 0
2017-02-28 14:39:20 -05:00
6ff7750364 add TH_TENSOR_APPLY variants for optimized redux (+refactor) 2017-02-28 10:30:31 -08:00
4d25c3d048 address comments and add tests 2017-02-28 10:23:36 -08:00
267b7ade50 Speed up reductions on non-contiguous dimensions 2017-02-28 10:23:36 -08:00
80429ad9f7 THVector_(add) -> THVector_(adds) 2017-02-28 12:20:44 -05:00
5ca6516ecb THVector_(add),(mul),(div) -> (adds),(muls),(divs) 2017-02-28 12:10:47 -05:00
67f94557ff Expose torch.HalfTensor 2017-02-27 19:35:47 -05:00
61bd5a0643 [Lint] Address F811 2017-02-27 19:33:00 -05:00
748d011c8b [Lint] Address F812 2017-02-27 19:33:00 -05:00
5d5cfe2e57 [Lint] Address E731 2017-02-27 19:33:00 -05:00
7cbe255296 [Lint] Use flake8 instead of pep8 2017-02-27 19:33:00 -05:00
4ef303698c Merge pull request #711 from gchanan/getDeviceAllocator
Add getter for cuda device allocator.
2017-02-27 19:29:39 -05:00
83e8b3f6c3 Add getter for cuda device allocator. 2017-02-27 15:44:44 -08:00
502ebed796 Fix one more reference cycle and ensure correct flag propagation (#868) 2017-02-27 18:38:29 -05:00
68ff58d771 Expose a mutex that is held around cudaFree() calls.
NCCL can deadlock if cudaFree() is called while it's launching kernels.
This exposes a mutex that can be held to prevent cudaFree() calls in the
caching allocator.
2017-02-27 15:08:30 -08:00
969c1602e6 Add Tensor::copy() to THPP
For now, this only supports copying from the same type. We can add
polymorphic copying in the future.
2017-02-27 21:33:40 +01:00
5e1d6a3691 Update functional.py (#862)
Fixed documentation error in conv3d
2017-02-27 10:42:02 -05:00
533cfc0381 Minor fix of docs of ModuleList and ParameterList (#861) 2017-02-27 10:09:54 +01:00
2b23712dc3 Improve autograd memory usage (#859) 2017-02-26 22:37:26 -05:00
88275da5e8 CUDA documentation tweaks (#858) 2017-02-26 20:37:43 +01:00
bd7a5ad6f0 Make Optimizer.load_state_dict use __setstate__ 2017-02-26 20:02:42 +01:00
1f6f82dbcf Fall back to indexing compatible with numpy 2017-02-26 20:02:42 +01:00
1f8939937a Allow using expand to broadcast tensors 2017-02-26 20:02:42 +01:00
b3d41a5f96 Add docs for ModuleList and ParameterList 2017-02-26 20:02:42 +01:00
fec2d493a9 Reshape grad_output in basic ops 2017-02-26 20:02:42 +01:00
86ee75f63f Fix for Long and Byte tensor indexing of Variables 2017-02-26 20:02:42 +01:00
31941918cf Prevent creation of reference cycles with leaf Variables that don't require grad
Also, raise an error immediately, if a leaf that requiers_grad is
modified in-place. Some comments were updated too.
2017-02-26 20:02:42 +01:00
19a65d2bea Expose stateless methods for torch.cuda.HalfTensor 2017-02-26 20:02:42 +01:00
819d4b2b83 Add finite differences gradcheck (#851) 2017-02-26 08:35:24 -05:00
b87c113cf4 CUDA documentation enhancement and docs versioning (#848)
* Add more detail to CUDA documentation

Also adds better cross-linking to the pages that discuss relevant topics.

* Adds recommendation to torch.save docs

* Make the version numbers for the docs dynamic

Might need tweaks for beta, 1.0, etc.
2017-02-26 08:33:26 -05:00
b25182971f readme change for getting clarity on binaries 2017-02-26 07:52:13 -05:00
1ee2c47e37 Correcting the description of LSTM attributes (#854) 2017-02-26 13:30:55 +01:00
2dc563f1f1 Fix indexing when passing only an Ellipsis 2017-02-25 23:34:09 +01:00
15ba71a275 Rebase fixes 2017-02-25 17:14:52 +01:00
e5b3fc49d6 Implementation of the 3rd set of tensor functions 2017-02-25 17:14:52 +01:00
ae1766951d Link TH and THPP to THD (#57)
* Fix THD library build

* THPP dependency added

* Minor cleanup; Fix build on OSX
2017-02-25 17:14:52 +01:00
02d08dafd9 Add support for IPv6 in Data Channel TCP (#53) 2017-02-25 17:14:52 +01:00
13a5090695 Added a size change in MaxPool1d module and improved tests (#771) (#832)
Backend is SpatialDilatedMaxPooling, so change 3D input (N*C*L)
to 4D size (N*C*1*L). Then output indices will range from 0 to L.
This range will not cause UnMaxPool1D error.

Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-25 08:53:30 -05:00
8e32e4c04c make wrap_generic_function importable 2017-02-24 14:27:54 -08:00
cf991310c3 c++ virtual function fix 2017-02-24 13:22:44 -08:00
938706099e adding environment flags to disable SIMD codepaths 2017-02-24 07:35:11 -05:00
3330287dc7 Update dataloader.py (#837) 2017-02-23 14:38:41 -05:00
38c8520adf adding unsqueeze to docs 2017-02-23 12:13:25 -05:00
492e1746af Fix THFree in THTensorApply 2017-02-23 06:01:13 -05:00
91a8109cfd Use C99 for openmp cleanup 2017-02-23 06:01:13 -05:00
161490d34a Add memcpy copy 2017-02-23 06:01:13 -05:00
9c302852eb comments fix 2017-02-23 06:01:13 -05:00
8654fcfd60 THVectorDefault style fix 2017-02-23 06:01:13 -05:00
b3d527d9a0 Tab style fix 2017-02-23 06:01:13 -05:00
4d495218c9 THTensorApply3 contiguous optimizations 2017-02-23 06:01:13 -05:00
13a041284c THTensorApply2 copy optimization 2017-02-23 06:01:13 -05:00
c60c1a003d TH_TENSOR_APPLY2 contiguous optimization 2017-02-23 06:01:13 -05:00
97add1a5ea comment fix 2017-02-23 06:01:13 -05:00
ca02930e47 Fill bug fix 2017-02-23 06:01:13 -05:00
20d5e95077 THTensorApply3 compress counter 2017-02-23 06:01:13 -05:00
eb4a7dc11d THTensorApply change dims to sizes 2017-02-23 06:01:13 -05:00
f722498b72 THTensorApply2 counter compress 2017-02-23 06:01:13 -05:00
aadfb6fe83 THTensorApply reduce memory overhead 2017-02-23 06:01:13 -05:00
6c273594c9 THTensorApply Counter compress 2017-02-23 06:01:13 -05:00
e475c82fa1 Add isTransposed judge and enable multithread of fill functions 2017-02-23 06:01:09 -05:00
0c2e6665df Add AVX copy 2017-02-23 05:50:34 -05:00
6295e6e94b Rebase master 2017-02-23 05:50:34 -05:00
670a4aa708 Fix AVX2 bugs 2017-02-23 05:50:34 -05:00
1bdc2e64ed Add fma cadd 2017-02-23 05:50:34 -05:00
c587be1e50 Add THVector Fill 2017-02-23 05:50:34 -05:00
bd481596f5 optimize THVector add mul div 2017-02-23 05:50:34 -05:00
a504d56b43 Fix THVector cmul AVX bug 2017-02-23 05:50:30 -05:00
91c4dfccea Use THVector cadd AVX 2017-02-23 05:46:44 -05:00
27f618c44d Add THVector Fill AVX 2017-02-23 05:46:44 -05:00
a14482a1df Add THVector cadd AVX 2017-02-23 05:46:40 -05:00
aa50c5734b Add THVector AVX cmul 2017-02-23 05:46:07 -05:00
293001a4fe Add THVector SSE div cdiv 2017-02-23 05:46:07 -05:00
638cfdf150 Add SSE add 2017-02-23 05:46:07 -05:00
5f80a14525 Separate SSE and AVX 2017-02-23 05:46:07 -05:00
1342fd3975 Remove THTensorMathSIMD THTensorMathDispatch 2017-02-23 05:46:07 -05:00
8d4af38489 Add THVector div cdiv 2017-02-23 05:46:07 -05:00
575a064e66 Remove THVector diff 2017-02-23 05:46:07 -05:00
3ab21a3c4f Merge THVector mul AVX 2017-02-23 05:46:07 -05:00
2f592e6c7d Remove THVector scale 2017-02-23 05:46:07 -05:00
5661ffb766 Merge THVector mul 2017-02-23 05:46:03 -05:00
9b74503daa Merge THVector cmul 2017-02-23 05:40:33 -05:00
24848f1cd8 Change THVector mul to cmul 2017-02-23 05:40:33 -05:00
a31a07ede9 Merge THVector add 2017-02-23 05:40:33 -05:00
c8c4c9b23d Change THVector add to cadd and fix NEON 2017-02-23 05:40:33 -05:00
e1ed9303f0 Add multi-thread add 2017-02-23 05:40:33 -05:00
a43aab13c2 Fix THTensorMath.c style 2017-02-23 05:40:33 -05:00
c698b4a45e Add Dispaches for div and mul 2017-02-23 05:40:29 -05:00
c6a0ffab50 Add AVX single float and double float add 2017-02-23 05:40:24 -05:00
8ba7cc30d1 Add THTensorMathSIMD.c 2017-02-23 05:32:34 -05:00
61bf08ca24 Fix compilation for simd tensor add 2017-02-23 05:32:28 -05:00
6ada3c0c16 Fast floating point add kernel in intrinsics (11x speedup over default for 10k elements) 2017-02-23 05:11:44 -05:00
60061fbe79 Fixed up CPU dispatch and tested. Can begin implementing kernels 2017-02-23 05:11:44 -05:00
46e7042add SIMD helper header, modified add in THTensorMath to check dispatch 2017-02-23 05:11:44 -05:00
d0c182773b First commit for dynamic CPU dispatch: general framework in place (need to create dispatch tables and stubs for all functions and make impls have hidden linkage) 2017-02-23 05:11:44 -05:00
b6f60585b5 fix AVX2 detection bugs 2017-02-23 05:00:55 -05:00
4b0e3ee219 Merge pull request #699 from twitter-forks/bitops
Bitwise operations
2017-02-23 04:15:35 -05:00
838842d4b2 fix documentation error. [issue #790](https://github.com/pytorch/pytorch/issues/790) (#831) 2017-02-23 08:59:29 +01:00
e71cf20192 improved serialization (no tar copy) (#713) 2017-02-22 22:24:20 +01:00
adb4cb2b5b contiguous view backward (#816) 2017-02-21 19:09:36 -05:00
6073f9b46c update table in README.md
it removes the empty top row
2017-02-21 12:58:04 -05:00
8e8022b735 Merge pull request #418 from ruotianluo/adaptiveAverage
Add SpatialAdaptiveAveragePooling.
2017-02-21 09:15:12 -05:00
da82d2dd70 Merge pull request #434 from bottler/master
VolumetricFractionalMaxPooling like spatial
2017-02-21 09:13:59 -05:00
82176473a5 Merge pull request #442 from twitter-forks/half-fixes
Convert real to accreal in libTHCUNN
2017-02-21 09:12:56 -05:00
2d269a9a72 Merge pull request #1137 from twitter-forks/half-fixes
Using accreal instead of real in the API
2017-02-21 09:12:32 -05:00
240372a991 Fixed topk documentation for largest=True 2017-02-21 04:38:24 -05:00
5b10411c8c Fixed some mistakes in examples
Fixed mistakes in LSTMCell and GRUCell examples.
2017-02-21 04:17:28 -05:00
4c474a9939 Improve prodall CUDA test 2017-02-20 23:28:31 -08:00
7ea6ae57c8 Support numpy arrays in default_collate 2017-02-20 23:28:31 -08:00
42633f8986 Fix misspelling and add support for weights in NLLLoss2d 2017-02-20 23:28:31 -08:00
84248690a9 Add support for indexing with None and slices with positive steps 2017-02-20 23:28:31 -08:00
53409ca0fb Fix a warning in THPP 2017-02-20 23:28:31 -08:00
c2c1710047 Add clip_grad_norm 2017-02-20 23:28:31 -08:00
876202503f Support multiple inputs in data parallel 2017-02-20 23:28:31 -08:00
946a7d9bc3 Make input contiguous only once in backward of cuDNN RNN 2017-02-20 23:28:31 -08:00
608bcd3b15 Return correct number of gradients from cuDNN RNN 2017-02-20 23:28:31 -08:00
632b02a477 Add checks for reward type and size in StochasticFunction 2017-02-20 23:28:31 -08:00
0db9c63300 Use library_dirs in setup.py 2017-02-20 23:28:31 -08:00
873ed4e6b6 Add better error message for conversion of CUDA tensors to numpy 2017-02-20 23:28:31 -08:00
01bd43037d add docs to torch/cuda/random 2017-02-20 20:43:47 -05:00
68c9e3f232 Fixed typo in GRUCell example 2017-02-21 01:37:04 +01:00
a25c8555eb Fixed paper references 2017-02-21 00:27:18 +01:00
dfd1dff383 Merge commit '4ca26fbc1b7be4e369f84e95df16431bb2f1dcb7' 2017-02-20 08:05:19 -08:00
8f391d4d51 Merge commit 'ee43cd7adca3b24a2071ce6c55dcd3a95a2b6ff6' 2017-02-20 07:55:46 -08:00
2a6b7685ae Merge commit 'f6c1bbfa483ad19c500dc94838baaa69f02d240b' 2017-02-20 07:55:19 -08:00
eb9573107d Merge commit '34b7fed802db1fda6322a70b648dcc4947858719' 2017-02-20 07:54:51 -08:00
ee43cd7adc Do SpatialClassNLLCriterion sizeAverage in a separate kernel 2017-02-20 06:54:23 -08:00
4ca26fbc1b Remove averaging from prodall 2017-02-20 11:37:53 +01:00
c165226325 Print a readable error message when arguments are on different GPUs 2017-02-20 11:35:50 +01:00
49295ebe54 Add sequential to documentation 2017-02-18 08:42:43 +05:30
455038e470 Use a more stable formula for spatial LogSoftMax 2017-02-17 13:05:45 -08:00
ca7f02ea0c Add shape checks for SpatialClassNLLCriterion 2017-02-17 13:01:56 -08:00
04aba1caec Fix cuDNN dropout desc for multi-gpu (#772) 2017-02-17 19:16:12 +01:00
f6c1bbfa48 Merge pull request #1105 from ruotianluo/adaptiveAvg
Add SpatialAdaptiveAveragePooling
2017-02-17 10:52:33 -05:00
4e2c8c6db5 Merge pull request #1123 from bottler/master
VolumetricFractionalMaxPooling like Spatial...
2017-02-17 10:42:21 -05:00
c26b9c0a5e Update rnn.py
Based on the https://github.com/pytorch/pytorch/blob/master/torch/backends/cudnn/rnn.py#L302 line, the output is returned in a (0,1) transposed version, if the batch_first argument is set to true.
2017-02-17 14:37:14 +01:00
aaf41c61a6 Fix Engine::compute_dependencies 2017-02-17 18:28:51 +05:30
dd844f741b Fix previous_functions when it contains Variables 2017-02-17 11:03:46 +05:30
7117a9012e Fix flaky non-contig test 2017-02-17 10:40:08 +05:30
1bdc28161a Add torch.__version__ 2017-02-17 10:40:08 +05:30
5e150caf38 Fix a bug in Engine::compute_dependencies 2017-02-17 10:40:08 +05:30
c0c62d099a Make detach() actually remove the creator 2017-02-17 10:40:08 +05:30
b9ece39685 Make torch.Size methods return torch.Size, not tuple 2017-02-17 10:40:08 +05:30
15ef008877 Using accreal instead of real in the API
- This reverts commit 7a07afe545b4deae5919d9dc268bfac3d37398c7.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:34:11 -08:00
b14d6318f8 Convert real to accreal in libTHCUNN
- This reverts commit 0d85922d116879448485ef88ae21e83a9255a0b0.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:33:03 -08:00
7c44506441 allow DataParallel to have tuple inputs on a single GPU 2017-02-16 19:07:17 +01:00
937ba581d7 Improve nn.legacy compatibility with Torch7 (#738) 2017-02-16 21:17:12 +05:30
2ae54f1194 setup.cfg -> tox.ini (#761) 2017-02-16 21:13:13 +05:30
a217fefee1 Update rnn.py
Fixed a problem with outputting the RuntimeError if arguments are incorrect in cudnn/rnn.py
2017-02-15 21:49:42 +01:00
34b7fed802 Fix gcc 4.4.7 build. 2017-02-15 09:06:25 -08:00
5221745c21 add test for bias=False for 3d convolution 2017-02-15 04:26:44 -08:00
000ca44b16 Merge commit '797544c47a4e9bdff02137a127f883a6df9b3dfe' 2017-02-15 04:24:14 -08:00
8f3d44033b Merge commit '0426f2f3ec2b932cb83d64101081244c2a1451b1' 2017-02-15 04:23:50 -08:00
7cc14c595a Merge commit '07f5b21ef1bd29d1451c616062dcbfc3f8fd7c6a' 2017-02-15 04:23:18 -08:00
797544c47a implementation of bias=False for VolConv.cu 2017-02-15 04:18:17 -08:00
0426f2f3ec implementation of bias=False for VolConv.c
Used .c file changes from 7318e2de13 as a starting point. All changes to .c files (except for whitespace details) are present here.
However, the required .h files were not present in that PR.
2017-02-15 04:16:09 -08:00
336eeee895 kernel_size as the default stride for avg_pool1d (#744)
Following the documentation, let stride to be kernel_size if stride is not provided.
2017-02-15 13:12:18 +05:30
593f867e3e Fixed a simple compiling erroin mac OS #745. (#746)
Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-15 12:19:03 +05:30
385913be1c Fix class torch.nn.ConvTransposeNd documentation (#739)
There is no `dilation`
`output_padding` doc was missing
2017-02-15 10:37:20 +05:30
6aaa14f5fe Fix LSTMCell Doc Typo (#743) 2017-02-15 08:29:17 +05:30
07f5b21ef1 Merge pull request #702 from gchanan/conservativeAllocator
Improve THCCachingHostAllocator performance by making it reclaim less aggressively
2017-02-15 08:26:48 +05:30
e454870396 Free set of stored streams and handle NULL streams. 2017-02-14 15:41:47 -08:00
2822013437 Fix flaky tests 2017-02-14 21:28:50 +01:00
72c1982734 Add some more asserts to cuDNN RNN 2017-02-14 21:28:50 +01:00
0de2ea305a Support retain_variables in cuDNN RNN 2017-02-14 21:28:50 +01:00
d899385a3d Raise error when too small input is given to conv 2017-02-14 21:28:50 +01:00
c6d6cbe8a6 Check that all tensors are on the same GPU in cuDNN bindings 2017-02-14 21:28:50 +01:00
85e82e85d8 Fix bug in zero_grad, when some parameters didn't require grad 2017-02-14 21:28:50 +01:00
a1534cc37d Fix auto-gpu in cat 2017-02-14 21:28:50 +01:00
8c8dc791ef Load half and double THCUNN backends 2017-02-14 21:28:50 +01:00
63edca44f2 Add tests for non-contiguous inputs and gradients 2017-02-14 21:28:50 +01:00
8d90ab2d9b compile with cudart (#737) 2017-02-14 06:40:35 +05:30
bd5303010d Refactor autograd package to separate Python dependencies. (#662)
The core autograd Variable, Function, and Engine no longer depend on the
Python API. This let's us implement functions in C++. In the future, we
can also multithread engine and release the GIL for most of the
non-Python backwards.
2017-02-13 16:00:16 -08:00
16d2c3d7b3 make networks converted with loadcaffe loadable 2017-02-13 23:53:46 +01:00
407a92dc26 std::min() requires same type (#732)
* std::min() requires same type

* cast buffer instead

* declare buffer_size as int64_t
2017-02-13 18:06:05 +01:00
0a893abc7b fix serialization bug for large files 2017-02-12 19:13:02 +01:00
34fa5e0dc7 Update docstrings for testing object type
Add docstring for `is_storage()` and `is_tensor()`
2017-02-12 09:21:01 +05:30
712686ce91 Add cat, contiguous, squeeze, and unsqueeze to THPP
Use unsqueeze and view from TH/THC
2017-02-11 17:49:31 +01:00
518864a7e0 Fix bug in legacy NN updateGradParameters (#714) 2017-02-11 11:04:18 +05:30
750fb5cc73 Fixes to support short and char tensors for bitwise operations 2017-02-09 18:52:59 -08:00
0f4749907a Adding bitwise operations
- lshift, rshift, bitand, bitor, bitxor
2017-02-09 18:11:58 -08:00
bd2dc63ef6 Adding bitand, bitor and bitxor 2017-02-09 17:06:04 -08:00
19a8795450 Changes to shift operations
- renaming lsh -> lshift, rsh -> rshift
- adding componentwise functions
2017-02-09 15:41:07 -08:00
d9dccfdd71 Fix for non-contiguous grad_output in cuDNN conv 2017-02-10 00:25:59 +01:00
7547a06c4f Avoiding duplicated unsigned as it causes error on gcc. 2017-02-09 13:29:05 -08:00
8929b75795 Added shift operations. 2017-02-09 13:28:36 -08:00
4d37ef878c Remove view on data and target tensors of dim 1 in TensorDataset (#609) 2017-02-09 22:06:39 +01:00
126e77d5c6 Merge commit 'e9b05c71b4acf210fad719f4da8bb58a425dd00b' 2017-02-09 12:31:58 -08:00
53eec78bea Merge commit 'ac9312e9f8002227b267a82e224a5a99c7a7e734' 2017-02-09 12:31:40 -08:00
a4edaec81a Merge commit 'aeb7a72620be47c0e6a8928a9cb6df49c06902a0' 2017-02-09 12:31:16 -08:00
92481b59d3 Merge commit '73d232ee454ca25de5552d347a2b06820f30d193' 2017-02-09 12:30:39 -08:00
6c77fa9121 Changes in RNNBase and Embedding for compatibility with DataParallel (#660) 2017-02-09 22:36:26 +05:30
aeb7a72620 Merge pull request #693 from colesbury/view
Add code for 'view' to THC
2017-02-09 12:09:28 +05:30
73d232ee45 Merge pull request #926 from colesbury/view
Add code for 'view' to TH
2017-02-09 12:08:57 +05:30
c0c65bf915 Merge pull request #696 from colesbury/unsqueeze
Add unsqueeze to THC
2017-02-09 11:08:20 +05:30
f6cee952af Merge pull request #929 from colesbury/unsqueeze
Add unsqueeze1d to TH
2017-02-09 11:07:47 +05:30
e74184f679 Make THCCachingHostAllocator less aggressive.
In cases where copyAsync is a large percentage of the work,
processing events in recordEvent can cause a large bottleneck.

Here, we relax the constraint that we reclaim blocks as fast as possible
(i.e. in copyAync); instead, we only check that a block can be re-allocated
in malloc and free.
2017-02-08 14:44:24 -08:00
3884d36176 Add unsqueeze to THC 2017-02-08 13:49:32 -08:00
e7c6886a00 Add unsqueeze1d to TH
Unsqueeze inserts a singleton dimension. Unlike view, it doesn't require
the tensor to be contiguous.
2017-02-08 09:52:50 -08:00
024d1e2678 Merge pull request #69 from cwhipkey/master
Qualify nullptr_t with std::
2017-02-08 09:17:50 -08:00
ed8e92f63d Expose rawSet and rawResize as resizeNd and setStorageNd 2017-02-08 09:00:22 -08:00
fb97df5d65 Expose rawSet and rawResize as resizeNd and setStorageNd
These methods are useful from C because they don't require constructing
THLongStorages to wrap the sizes and strides, which can lead to leaked
memory in case of an error. Instead the sizes and strides can be
represented on the stack using standard C long arrays.
2017-02-08 08:56:04 -08:00
e9b05c71b4 Use THCTensor rather than THCudaTensor in THCUNN.h definition of
GatedLinearUnit.
2017-02-08 07:54:10 -08:00
5eab428294 Qualify nullptr_t with std::. 2017-02-08 07:06:31 -08:00
7926324385 Corrected parameter typo in Adam docstring (#697) 2017-02-07 19:00:10 +01:00
1527b37c26 Fixed typo and rendering of some equations (#693)
* Fixed typo and rendering of some equations

* Few more fixes to MSELoss docs

* Cleaning up whitespace to make pep8 happy
2017-02-07 18:59:27 +01:00
de4659659b The RNNCell's example can not run correctly 2017-02-07 18:58:19 +01:00
a96a8c8336 Static build support + Query CUDA driver, runtime versions (#695) 2017-02-07 08:34:20 +05:30
691aa19b88 Add code for 'view' to THC 2017-02-06 14:04:04 -08:00
6b07dc9e22 Add code for 'view' to TH 2017-02-06 14:00:48 -08:00
8aa259b52b review comments from gchanan 2017-02-06 11:08:23 +00:00
ac9312e9f8 Bugfix/rowconv (#1126) 2017-02-04 20:37:45 +05:30
91a17b702b half<->float conversion cleanup (#901)
* half<->float conversion cleanup
2017-02-04 07:30:13 +05:30
c54597e0b2 std::move fixes 2017-02-03 21:31:03 +01:00
a9785bba44 cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-02-02 21:38:25 -08:00
833b8cbc7a Remove unused code from module 2017-02-02 17:20:11 +01:00
75aeb16e05 Merge commit '72089c9c36c6b880c695baf732cd04329d72c098' 2017-02-01 22:00:42 -08:00
fc354a0d6e Revert "cuda implementation of Gated Linear Unit, fixed issues with genericization" 2017-02-02 10:50:47 +05:30
262611fcd3 Merge pull request #430 from huihuifan/newCudaGLU
cuda implementation of Gated Linear Unit, fixed issues with genericization
2017-02-02 08:16:35 +05:30
b8a34f3033 Small fixups:
1) Add return after THError for completeness.
2) Fix brace formatting
2017-02-01 15:46:19 -08:00
10bb6bb9b8 Fix function names in error messages 2017-02-01 15:21:57 -08:00
3c9ef69c37 Fix THCTensor::isSparse 2017-02-01 14:51:06 -08:00
dee987d6ee use pseudo-fp16 2017-02-01 23:48:09 +01:00
138f254ec1 Support sparse tensors in THPP (#667) 2017-02-01 17:34:50 -05:00
c7c8aaa7f0 Add ModuleList and ParameterList to nn 2017-02-01 23:26:31 +01:00
d0db624e02 Add W503 to PEP8 ignore list (#646) 2017-02-01 15:57:09 -05:00
e3e7b76310 Rename all normal and log_normal args to std 2017-02-01 21:48:11 +01:00
dad02bceb9 Remove duplicated line in cwrap 2017-02-01 21:48:11 +01:00
b195285879 Improve CUDA detection in THPP 2017-02-01 21:48:11 +01:00
8f3da5b51d set_index -> _set_index 2017-02-01 21:48:11 +01:00
825e919eb8 Add torch.unbind 2017-02-01 21:48:11 +01:00
acb0ce8885 Add LongTensor indexing support 2017-02-01 21:48:11 +01:00
72089c9c36 Update THHalf.c 2017-02-01 11:53:29 -08:00
cf2f158fec Remove erroneous proprietary license header
This change was approved by NVIDIA Legal, and I am authorized to make the change on behalf of the company.
2017-02-01 11:43:44 -08:00
41ddc2a786 VolumetricFractionalMaxPooling like Spatial... 2017-02-01 12:01:09 +00:00
e4886f6589 VolumetricFractionalMaxPooling like spatial 2017-02-01 11:52:49 +00:00
6470b5bd21 Add test for Embedding with sparse=True (#663) 2017-02-01 09:54:42 +05:30
tvn
44196955e2 ByteTensor should be unsigned (#664)
ByteTensor should be unsigned
2017-01-31 21:43:39 -05:00
f08ec1394d Fix bug with inplace TH(CU)NN
Also, remove unnecessary zero_() calls
2017-01-31 21:00:49 +01:00
f8fb25e0a2 Add generic bindings to THNN and THCUNN (#645)
Adds bindings using thpp::Tensor to THNN and THCUNN. This allows calling
into those APIs without knowing the concrete types of the tensor
arguments.
2017-01-31 13:23:02 -05:00
6a0c66752f Fix documentation and argument name for Tensor.normal_(mean, stddev) (#652) 2017-01-31 11:55:39 -05:00
a1bd4efb08 readme: add guidance on disabling CUDA (#655) 2017-01-31 14:05:51 +05:30
b43ce05268 Refactor parts of utils.h (#648)
Moves THPObjectPtr into a separate header, so that it can be included
independently. Currently, utils.h requries all of THP.h. Also adds RAII
structs for acquiring and releasing the GIL.
2017-01-30 21:16:28 -05:00
80e56cfda9 Merge commit 'dc9a5b7d2fbcf21268b524b9da5ae38a74214a59' 2017-01-30 17:58:05 -08:00
24701fc5a7 Merge commit '03dcf8a83bb009ecfdd8f27c4d9a6db40829b690' 2017-01-30 17:57:20 -08:00
f78a266d99 Merge commit '368cbe615d0a7bdaadddcb3bd390abcd4cc17b91' 2017-01-30 17:56:37 -08:00
f096fb6859 adding cudnn V6 support (#515) 2017-01-31 02:01:37 +01:00
a3e11d606b Fix linter errors 2017-01-31 01:58:09 +01:00
79232c24e2 Fixes after rebase 2017-01-31 01:58:09 +01:00
15d9d499ab Remove ZMQ dependency from compilation files 2017-01-31 01:58:09 +01:00
962084c8e8 Add Data Channel receive from any source (#52) 2017-01-31 01:58:09 +01:00
7518b1eefb Introduce Scalar for easier send/receive types through DataChannel 2017-01-31 01:58:09 +01:00
8215d7a4ba Implement TH_API functions from the set 2 (#49) 2017-01-31 01:58:09 +01:00
5aaa220d84 Thd functions v3 (#46) 2017-01-31 01:58:09 +01:00
12c16ab9bc Remaining storage functions implemented 2017-01-31 01:58:09 +01:00
76520512e7 DataChannel tests rewrite (#42); DataChannel isend and irecv implementation (#44) 2017-01-31 01:58:09 +01:00
66de965882 Replace ZeroMQ (#41) 2017-01-31 01:58:09 +01:00
10d32fb0b7 Fix DataChannel tests failure (#43)
Tests failed due to accessing reference which could be invalid.
2017-01-31 01:58:09 +01:00
e72c9b6e4a Storage constructors implemented (#40) 2017-01-31 01:58:09 +01:00
ac1f68127a Add barrier, scatter, gather and allGather implementations + groups (#34) 2017-01-31 01:58:09 +01:00
60d1852c7b Major improvements to master-worker mode
* Fixed all undefined symbol errors
* Implemented storage interface and THStorage class
* RPC improvements
* Code refactor
2017-01-31 01:58:09 +01:00
d53eb521fc Add missing headers. 2017-01-31 01:58:09 +01:00
9808932f10 Refactor RPC and change TensorType to Type 2017-01-31 01:58:09 +01:00
ea876eb6d5 Add initial bindings for master-worker mode 2017-01-31 01:58:09 +01:00
0a45864866 Add THDStorage and improve master-worker mode implementation 2017-01-31 01:58:09 +01:00
2560b39796 Merge TensorTypeTraits.hpp with TensorTraits.hpp 2017-01-31 01:58:09 +01:00
21afa4c88b Worker handling for constructors + destructor 2017-01-31 01:58:09 +01:00
9fc3c5e4d2 THDTensor constructors implemented + some minor fixes 2017-01-31 01:58:09 +01:00
3e3501c98d Integration tests of the THD Python interface (#28) 2017-01-31 01:58:09 +01:00
5e6fcd02b5 Implement data channel groups (#25) 2017-01-31 01:58:09 +01:00
d46ebcfadf Fix broadcast and reduce implementations
Due to bad rank mapping broadcast and reduce were connecting
wrong processes what resulted in errors or not received/sent tensors.

 * Introduced new mapping method to solve this problem.
 * Added and improved tests for this cases.
2017-01-31 01:58:09 +01:00
41480c8cf2 Data channel maintenance 2017-01-31 01:58:09 +01:00
236890d902 Fix transitive library dependencies in CMake 2017-01-31 01:58:09 +01:00
55632d81d2 Add Python wrappers for process group mode 2017-01-31 01:58:09 +01:00
0b276d622e Add reduce and allReduce implementations (#15) 2017-01-31 01:58:09 +01:00
c81491b37d Preserve directory structure when installing headers 2017-01-31 01:58:09 +01:00
42e189425f Detect ZMQ libs and headers in CMake 2017-01-31 01:58:09 +01:00
3cfa0d7199 Expose C API for process group mode 2017-01-31 01:58:09 +01:00
7c9e088661 Reorganize THD directory structure 2017-01-31 01:58:09 +01:00
e78aa4bb84 Implement CommandChannel with ZMQ. 2017-01-31 01:58:09 +01:00
f8e94d0d8b Implement DataChannel (MPI and TCP) (#8) 2017-01-31 01:58:09 +01:00
ebe6f40fce RPC message packing and unpacking implemented 2017-01-31 01:58:09 +01:00
5fb37efb46 Use #pragma once instead of defines 2017-01-31 01:58:09 +01:00
4f47855873 Style improvements 2017-01-31 01:58:09 +01:00
52ae6f682f Add initial version of tensor wrappers 2017-01-31 01:58:09 +01:00
c35f58f97b Template for THD implementation 2017-01-31 01:58:09 +01:00
659b2f3154 Add more autograd functions 2017-01-31 00:39:34 +01:00
5ea05cfb96 Return indices from Variable sort and topk 2017-01-31 00:39:34 +01:00
dc9a5b7d2f Fix memory leak in SpatialMaxUnpooling 2017-01-30 23:23:07 +01:00
f7ab5a128a Delete extra bracket in RNNCellBase.__repr__. (#637)
This extra bracket causes a ValueError when trying to print a Module that uses RNNCellBase or any of its subclasses.
2017-01-29 23:21:24 -05:00
368cbe615d Add Ubuntu 16.04 lib paths in CMake 2017-01-30 01:16:02 +01:00
d4c9a3782b billinear -> bilinear, docs for upsampling, improved docs for Unpooling, pep8 tests fix (#617)
* billinear -> bilinear, docs for upsampling, improved docs for Unpooling, pep8 tests fix
2017-01-30 05:08:48 +05:30
172dca5e8b Fix bug in cat (non-contiguous first input) 2017-01-29 21:25:53 +01:00
818bf0c408 Compile with asserts by default 2017-01-29 21:21:59 +01:00
03dcf8a83b Compile with asserts on by default 2017-01-29 21:18:54 +01:00
604f607fd1 Add asserts in index* functions 2017-01-29 21:18:43 +01:00
956d946c25 Default initial hidden states for recurrent layers (#605)
Fixes #434
2017-01-29 12:38:56 +01:00
970caaa621 Exclude sphinx_rtd_theme from pep8 2017-01-28 23:37:39 -05:00
00a5980cdf Improve RNN doc formatting 2017-01-28 23:37:39 -05:00
e24eee04f0 Link THC to THPP 2017-01-28 23:37:39 -05:00
f1b3af4ee2 Add more bernoulli options in cwrap 2017-01-28 23:37:39 -05:00
fb2d28f477 remove circular references in NestedIOFunction 2017-01-28 23:30:06 +01:00
3a704ff725 Fix legacy load_lua for SpatialConvolution (#608)
* fix legacy load_lua for conv2d

* fix pep8
2017-01-28 20:19:18 +01:00
0180e638e5 Remove unnecessary zero_() calls in cuDNN RNN 2017-01-28 14:36:57 +01:00
95c6ae04fb Fix non-contiguous grad handling in cuDNN RNN 2017-01-28 14:36:57 +01:00
27c4c6e0af Merge commit '6ee77b4edd1552d3a9a2e5389ffc351e513a8089' 2017-01-27 17:29:07 -08:00
da17414b3f Merge commit '343d65db91c2419843d36aed5467c2d1374108bc' 2017-01-27 17:16:08 -08:00
be2b27a747 Merge commit '4461ae809043390d5223905cb82b17035c7f9f31' 2017-01-27 17:15:21 -08:00
aec2c8f752 Merge commit 'c45ff2efe64d0face3889194ba6f885fe9cc4d48' 2017-01-27 17:12:13 -08:00
13e34b4679 Fix multiprocessing tests 2017-01-28 01:18:42 +01:00
57373c7c29 Fix docs 2017-01-28 01:16:04 +01:00
79f5bf84e5 [pep8] Potentially breaking docstring changes 2017-01-28 01:15:51 +01:00
3ed720079e [pep8] Fix most remaining lint manually 2017-01-28 01:15:51 +01:00
e7c1e6a8e3 [pep8] Fix most lint automatically with autopep8
Here's the command I used to invoke autopep8 (in parallel!):

    git ls-files | grep '\.py$' | xargs -n1 -P`nproc` autopep8 -i

Several rules are ignored in setup.cfg. The goal is to let autopep8
handle everything which it can handle safely, and to disable any rules
which are tricky or controversial to address. We may want to come back
and re-enable some of these rules later, but I'm trying to make this
patch as safe as possible.

Also configures flake8 to match pep8's behavior.

Also configures TravisCI to check the whole project for lint.
2017-01-28 01:15:51 +01:00
f1d0d73ed7 Fix flaky Sqrt test 2017-01-28 00:45:49 +01:00
9c411513bf Patch distutils crash when linking with ccache 2017-01-28 00:28:33 +01:00
ce78bc898b Fix travis builds and add ccache 2017-01-28 00:28:33 +01:00
887002e932 Add bindings to CUDA tensors and storages in THPP (#615) 2017-01-27 18:15:56 -05:00
31dea5ff23 Small typo in README (#613) 2017-01-27 20:18:36 +01:00
ec4602a973 Fix bad code alignment (#612)
forward *is* a method of the Linear class
2017-01-27 20:16:49 +01:00
a38749d15f Fix cuda notes
Target GPU *is* consisten with source GPU
2017-01-27 19:30:49 +01:00
6ee77b4edd Added cunn support for TemporalRowConvolutionMM (#415)
* Added cunn TemporalRowConvolutionMM support
2017-01-27 13:30:25 -05:00
343d65db91 Rowconv repull (#1120)
* Added TemporalRowConvolutionMM layer, tests, and documentation
2017-01-27 13:29:05 -05:00
6328981fcf cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-01-26 22:56:33 -08:00
a90913105c add make-contiguous in batchnorm backward (#602) 2017-01-26 16:17:39 -05:00
9368596059 legacy.nn Attributes: Add '_gradOutput' to SpatialConvolution. (#600) 2017-01-26 15:00:41 -05:00
80ed795ff1 Minor ffi utils fix 2017-01-26 11:55:49 +01:00
a2938e3d11 add cc 3.0 to nccl (#594) 2017-01-25 22:47:23 -05:00
2ad967dbe4 Fix pep8 in setup.py with "autopep8 -i setup.py" 2017-01-25 22:23:22 -05:00
7415c090ac Check setup.py for pep8 lint on TravisCI 2017-01-25 22:23:22 -05:00
a1fa995044 Fixes and improvements (#593)
* Fix error in ELU backward

* Add --seed flag for testst st

* Add test for BatchNorm eval

* Fix autograd.backward docs

* Support cc flags in cuDNN search

* Fix IndexSelect backward formula
2017-01-25 22:21:49 -05:00
3c2ecc6b15 add dockerfiles (#583)
* add dockerfiles
2017-01-25 17:30:29 -05:00
fa1516d319 Install THCUNN.h and generic/THCUNN.h
The THCApply.cuh is moved to the .cu files so that THCUNN.h can be
compiled by a standard C compiler.
2017-01-25 14:13:17 -08:00
5e26f49db4 Install THNN.h and generic/THNN.h 2017-01-25 14:09:09 -08:00
7694f65120 Revert "Using accreal instead of real in the API" 2017-01-25 16:26:42 -05:00
b5ebf68df1 Revert "Convert real to accreal in libTHCUNN" 2017-01-25 16:13:20 -05:00
aa46055274 Update CI links in README (#579) 2017-01-25 13:58:05 -05:00
2cad802b68 Revert "cuda implementation of Gated Linear Unit" 2017-01-25 13:15:22 -05:00
2d01f384f1 fallback to nn batchnorm on backward-evaluate (#589) 2017-01-25 12:38:57 -05:00
f8d4f980b3 Add upsampling modules and functions 2017-01-24 17:30:50 -05:00
4f5a6c366e Make Variables non-comparable 2017-01-24 17:30:50 -05:00
ecfcf39f30 Improve optimizer serialization
Also, add optimizer.load_state_dict
2017-01-24 17:30:50 -05:00
3975a2676e Fix invalid DECREF in torch.Size constructor 2017-01-24 17:30:50 -05:00
138ee75a3b Fix for target_link_libraries on CMake 2.8 (#581) 2017-01-24 17:26:24 -05:00
0048f228cb Add spatial test for LogSoftmax 2017-01-24 23:24:25 +01:00
2748b920ab make adam have the same lr as lua torch (#576) 2017-01-24 16:35:28 -05:00
a92a2312d4 Add missing fields to read_lua_file for BatchNorm and Linear layers. 2017-01-24 22:09:47 +01:00
945ce5cdb0 Fix math block of GRUCell in docs (#572)
Added a blank space between the beginning of the `.. math::` block, otherwise it is displayed as a code block.
2017-01-24 14:28:56 -05:00
b39de2cbbe Merge pull request #416 from pavanky/half-fixes
Convert real to accreal in libTHCUNN
2017-01-24 12:17:49 -05:00
49a555e0f5 Merge pull request #1109 from pavanky/api
Using accreal instead of real in the API
2017-01-24 12:17:17 -05:00
ce13900148 update From Source instructions 2017-01-24 10:48:25 -05:00
4c77ad6ee4 step_rate -> lr in adadelta (#569) 2017-01-24 10:05:59 -05:00
0bc4246425 adding NLLLoss2d to docs 2017-01-24 09:22:51 -05:00
c45ff2efe6 Merge pull request #915 from pavanky/convert
Macros to convert between real and accreal
2017-01-24 09:14:33 -05:00
99b520cc5d Merge pull request #421 from huihuifan/cudaGLU
cuda implementation of Gated Linear Unit
2017-01-24 09:13:34 -05:00
e05607aee1 Add fall back to implicit GEMM and friends. (#558)
If we can't allocate the workspace for the desired algorithm, we fall
back to a default algorithm which does not require a workspace.
2017-01-24 09:10:39 -05:00
a360ba1734 Add a hint about CUDNN_STATUS_NOT_SUPPORTED 2017-01-24 09:09:30 -05:00
c661b963b9 Add more contiguity checks to cuDNN 2017-01-24 09:09:30 -05:00
e374dc1696 add step rate to adadelta (#568)
Scales `delta` before it is applied to the parameters in order to control the learning rate of the optimizer (inspired from climin optim lib for theano).
Also changed the link to the Adadelta paper to point to the right location.
2017-01-24 08:48:19 -05:00
116e0c7f38 Merge commit '45596d52897fb187701943cb77456ff1e7249989' 2017-01-23 14:37:44 -08:00
45596d5289 Add contiguity checks to THCUNN 2017-01-23 14:17:51 -08:00
342e7b873d fixing THPP cmake for cmake < 3.1 (#559) 2017-01-23 14:47:06 -05:00
00410c4496 Fix broken THNN groups in conv functions 2017-01-22 18:32:51 -05:00
8b9276bbee Fix view bug in Conv1d 2017-01-22 18:32:51 -05:00
3238786ea1 Improve optimizer error messages 2017-01-22 18:32:51 -05:00
07ebbcbcb3 Add Parameter docs 2017-01-22 18:32:51 -05:00
ca555abcf9 fix comments 2017-01-22 18:02:40 -05:00
63893c3fa2 Fix auto-gpu semantics for indexing 2017-01-22 18:02:40 -05:00
f8ae34706e Port L-BFGS from Lua optim 2017-01-22 18:02:40 -05:00
7179002bfb cuda implementation of Gated Linear Unit 2017-01-19 23:01:30 -08:00
43b5be1d78 added c implementation of GatedLinearUnit 2017-01-19 22:18:08 -08:00
b5f6fdb814 Using accreal instead of real in the API
This is done to be consistent with the changes made to cunn
2017-01-17 16:58:19 -08:00
a69d819901 Converting all instances of real to accreal in libTHCUNN
This is because the current version of luaffifb fails to pass
custom structs (i.e. half) as arguments or accept them as return
values.

The accreal parameters are immediately converted to real internally.
This is done to ensure none of the internal code needs to be changed.

This change also removes transform_reals_to_half which is no longer
necessary.

Change-Id: I978151d001de5492576fb0eddfa0608cd4e99149
2017-01-17 16:06:42 -08:00
fef2b1526d Adding macros to convert between real and accreal 2017-01-17 15:14:45 -08:00
3719994c96 Remove redundant code in THGenerateAllTypes.h 2017-01-17 15:12:43 -08:00
4461ae8090 include cstddef for msvc 2017-01-15 23:45:48 +08:00
2b948c42cd Add SpatialAdaptiveAveragePooling. 2017-01-14 19:44:07 -06:00
b2ae054410 Add SpatialAdaptiveAveragePooling. 2017-01-14 15:27:52 -06:00
2a974f5ca2 Fix 1.3.2 compilation 2016-12-08 09:11:43 -08:00
648e9fbb58 Adding missing file 2016-12-05 18:06:24 -08:00
34d27771c6 1.3.2 release
Broadcast tuning
Better checking of inputs
Copy/reduce code simplification
2016-12-01 15:17:50 -08:00
1093821c33 Replace min BW by average BW in tests 2016-12-01 15:16:35 -08:00
ddddfba1c0 Merge pull request #54 from peterhj/peterhj-staticlib
Add a static library target "staticlib" to the Makefile.
2016-11-28 09:15:39 -08:00
5765d608cc Add a static library target "staticlib" to the Makefile.
Rename the static library "libnccl_static.a" to disambiguate from the
dynamic libraries.
2016-11-24 11:31:03 -08:00
c2c515516b Remove irrelevant output from ncclReduce Fortran tests 2016-11-21 10:18:04 -08:00
9c18468fe2 Add Copyright header to Fortran bindings source files 2016-11-21 10:17:58 -08:00
5f2b32e45b Add Fortran bindings 2016-11-17 15:33:34 -08:00
534b9a1697 Bump to 1.3.1 2016-10-13 10:33:05 -07:00
b2781d0501 Fix primitives function prototype 2016-10-13 10:32:42 -07:00
bf7d1514f7 NVML (libwrap) : import the needed definitions 2016-10-13 10:28:59 -07:00
8bb06c94be Improved allreduce segmentation for small sizes 2016-10-07 12:42:23 -07:00
828 changed files with 53834 additions and 18301 deletions

6
.gitignore vendored
View File

@ -2,6 +2,7 @@ build/
dist/
torch.egg-info/
*/**/__pycache__
torch/version.py
torch/csrc/generic/TensorMethods.cpp
torch/lib/*.so*
torch/lib/*.dylib*
@ -15,8 +16,12 @@ torch/csrc/nn/THNN.cwrap
torch/csrc/nn/THNN.cpp
torch/csrc/nn/THCUNN.cwrap
torch/csrc/nn/THCUNN.cpp
torch/csrc/nn/THNN_generic.cwrap
torch/csrc/nn/THNN_generic.cpp
torch/csrc/nn/THNN_generic.h
docs/src/**/*
test/data/legacy_modules.t7
test/data/gpu_tensors.pt
test/htmlcov
test/.coverage
*/*.pyc
@ -27,3 +32,4 @@ test/.coverage
*/*.so*
*/**/*.so*
*/**/*.dylib*
test/data/legacy_serialized.pt

View File

@ -4,16 +4,26 @@ python:
- 2.7.8
- 2.7
- 3.5
- 3.6
- nightly
cache:
- ccache
- directories:
- $HOME/.ccache
install:
- export CC="gcc-4.8"
- export CXX="g++-4.8"
- travis_retry pip install -r requirements.txt
- travis_retry pip install .
- unset CCACHE_DISABLE
- export CCACHE_DIR=$HOME/.ccache
- export CC="ccache gcc-4.8"
- export CXX="ccache g++-4.8"
- ccache --show-stats
- travis_retry pip install --upgrade pip setuptools wheel
- travis_retry pip install -r requirements.txt --only-binary=scipy
- python setup.py install
script:
- ./test/run_test.sh
- OMP_NUM_THREADS=2 ./test/run_test.sh
addons:
apt:
@ -30,3 +40,9 @@ sudo: false
matrix:
fast_finish: true
include:
env: LINT_CHECK
python: "2.7"
addons: true
install: pip install flake8
script: flake8

120
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,120 @@
## Contributing to PyTorch
If you are interested in contributing to PyTorch, your contributions will fall
into two categories:
1. You want to propose a new Feature and implement it
- post about your intended feature, and we shall discuss the design and
implementation. Once we agree that the plan looks good, go ahead and implement it.
2. You want to implement a feature or bug-fix for an outstanding issue
- Look at the outstanding issues here: https://github.com/pytorch/pytorch/issues
- Especially look at the Low Priority and Medium Priority issues
- Pick an issue and comment on the task that you want to work on this feature
- If you need more context on a particular issue, please ask and we shall provide.
Once you finish implementing a feature or bugfix, please send a Pull Request to
https://github.com/pytorch/pytorch
If you are not familiar with creating a Pull Request, here are some guides:
- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request
- https://help.github.com/articles/creating-a-pull-request/
## Developing locally with PyTorch
To locally develop with PyTorch, here are some tips:
1. Uninstall all existing pytorch installs
```
conda uninstall pytorch
pip uninstall torch
pip uninstall torch # run this command twice
```
2. Locally clone a copy of PyTorch from source:
```
git clone https://github.com/pytorch/pytorch
cd pytorch
```
3. Install PyTorch in `build develop` mode:
A full set of instructions on installing PyTorch from Source are here:
https://github.com/pytorch/pytorch#from-source
The change you have to make is to replace
`python setup.py install`
with
```
python setup.py build develop
```
This is especially useful if you are only changing Python files.
This mode will symlink the python files from the current local source tree into the
python install.
Hence, if you modify a python file, you do not need to reinstall pytorch again and again.
For example:
- Install local pytorch in `build develop` mode
- modify your python file torch/__init__.py (for example)
- test functionality
- modify your python file torch/__init__.py
- test functionality
- modify your python file torch/__init__.py
- test functionality
You do not need to repeatedly install after modifying python files.
#### C++ Development tips
When you are developing on the C++ side of things, the environment variables `DEBUG` and `NO_CUDA` are helpful.
- `DEBUG=1` will enable debug builds (-g -O0)
- `NO_CUDA=1` will disable compiling CUDA (in case you are developing on something not CUDA related), to save compile time.
For example:
```
NO_CUDA=1 DEBUG=1 python setup.py build develop
```
Also, if you are developing a lot, using ccache is a real time-saver. By default, ccache does not properly support CUDA stuff, so here are the instructions for installing a custom `ccache` fork that has CUDA support:
```
# install and export ccache
if ! ls ~/ccache/bin/ccache
then
sudo apt-get update
sudo apt-get install -y automake autoconf
sudo apt-get install -y asciidoc
mkdir -p ~/ccache
pushd /tmp
rm -rf ccache
git clone https://github.com/colesbury/ccache -b ccbin
pushd ccache
./autogen.sh
./configure
make install prefix=~/ccache
popd
popd
mkdir -p ~/ccache/lib
mkdir -p ~/ccache/cuda
ln -s ~/ccache/bin/ccache ~/ccache/lib/cc
ln -s ~/ccache/bin/ccache ~/ccache/lib/c++
ln -s ~/ccache/bin/ccache ~/ccache/lib/gcc
ln -s ~/ccache/bin/ccache ~/ccache/lib/g++
ln -s ~/ccache/bin/ccache ~/ccache/cuda/nvcc
~/ccache/bin/ccache -M 25Gi
fi
export PATH=~/ccache/lib:$PATH
export CUDA_NVCC_EXECUTABLE=~/ccache/cuda/nvcc
```
Hope this helps, and thanks for considering to contribute.

36
Dockerfile Normal file
View File

@ -0,0 +1,36 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
ENV CUDNN_VERSION 6.0.20
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
curl \
ca-certificates \
libjpeg-dev \
libpng-dev \
libcudnn6=$CUDNN_VERSION-1+cuda8.0 \
libcudnn6-dev=$CUDNN_VERSION-1+cuda8.0 && \
rm -rf /var/lib/apt/lists/*
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda install conda-build && \
/opt/conda/bin/conda create -y --name pytorch-py35 python=3.5.2 numpy pyyaml scipy ipython mkl&& \
/opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/envs/pytorch-py35/bin:$PATH
RUN conda install --name pytorch-py35 -c soumith magma-cuda80
# This must be done before pip so that requirements.txt is available
WORKDIR /opt/pytorch
COPY . .
RUN TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python setup.py install
WORKDIR /workspace
RUN chmod -R a+w /workspace

111
README.md
View File

@ -14,31 +14,48 @@ We are in an early-release Beta. Expect some adventures and rough edges.
- [Installation](#installation)
- [Binaries](#binaries)
- [From source](#from-source)
- [Docker image](#docker-image)
- [Getting Started](#getting-started)
- [Communication](#communication)
- [Releases and Contributing](#releases-and-contributing)
- [The Team](#the-team)
| Python | **`Linux CPU`** | **`Linux GPU`** |
|--------|--------------------|------------------|
| 2.7.8 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
| 2.7 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py2)](https://build.pytorch.org/job/pytorch-master-py2) |
| 3.5 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py3)](https://build.pytorch.org/job/pytorch-master-py3) |
| Nightly| [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
| System | Python | Status |
| --- | --- | --- |
| Linux CPU | 2.7.8, 2.7, 3.5, nightly | [![Build Status](https://travis-ci.org/pytorch/pytorch.svg?branch=master)](https://travis-ci.org/pytorch/pytorch) |
| Linux GPU | 2.7 | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py2)](https://build.pytorch.org/job/pytorch-master-py2) |
| Linux GPU | 3.5 | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py3)](https://build.pytorch.org/job/pytorch-master-py3) |
## More about PyTorch
At a granular level, PyTorch is a library that consists of the following components:
| \_ | \_ |
| ------------------------ | --- |
| torch | a Tensor library like NumPy, with strong GPU support |
| torch.autograd | a tape based automatic differentiation library that supports all differentiable Tensor operations in torch |
| torch.nn | a neural networks library deeply integrated with autograd designed for maximum flexibility |
| torch.optim | an optimization package to be used with torch.nn with standard optimization methods such as SGD, RMSProp, LBFGS, Adam etc. |
| torch.multiprocessing | python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. |
| torch.utils | DataLoader, Trainer and other utility functions for convenience |
| torch.legacy(.nn/.optim) | legacy code that has been ported over from torch for backward compatibility reasons |
<table>
<tr>
<td><b> torch </b></td>
<td> a Tensor library like NumPy, with strong GPU support </td>
</tr>
<tr>
<td><b> torch.autograd </b></td>
<td> a tape based automatic differentiation library that supports all differentiable Tensor operations in torch </td>
</tr>
<tr>
<td><b> torch.nn </b></td>
<td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td>
</tr>
<tr>
<td><b> torch.multiprocessing </b></td>
<td> python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. </td>
</tr>
<tr>
<td><b> torch.utils </b></td>
<td> DataLoader, Trainer and other utility functions for convenience </td>
</tr>
<tr>
<td><b> torch.legacy(.nn/.optim) </b></td>
<td> legacy code that has been ported over from torch for backward compatibility reasons </td>
</tr>
</table>
Usually one uses PyTorch either as:
@ -101,7 +118,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
PyTorch has minimal framework overhead. We integrate acceleration libraries
such as Intel MKL and NVIDIA (CuDNN, NCCL) to maximize speed.
At the core, it's CPU and GPU Tensor and Neural Network backends
At the core, its CPU and GPU Tensor and Neural Network backends
(TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API.
They are mature and have been tested for years.
@ -118,52 +135,82 @@ Writing new neural network modules, or interfacing with PyTorch's Tensor API was
and with minimal abstractions.
You can write new neural network layers in Python using the torch API
[or your favorite numpy based libraries such as SciPy](https://github.com/pytorch/tutorials/blob/master/Creating%20extensions%20using%20numpy%20and%20scipy.ipynb).
[or your favorite numpy based libraries such as SciPy](http://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html).
If you want to write your layers in C/C++, we provide an extension API based on
[cffi](http://cffi.readthedocs.io/en/latest/) that is efficient and with minimal boilerplate.
There is no wrapper code that needs to be written. [You can see an example here](https://github.com/pytorch/extension-ffi).
[cffi](http://cffi.readthedocs.io/en/latest/) that is efficient and with minimal boilerplate.
There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/c_extension.html) and [an example here](https://github.com/pytorch/extension-ffi).
## Installation
### Binaries
- Anaconda
```bash
conda install pytorch torchvision -c soumith
```
Commands to install from binaries via Conda or pip wheels are on our website:
[http://pytorch.org](http://pytorch.org)
### From source
Instructions for an Anaconda environment.
If you are installing from source, we highly recommend installing an [Anaconda](https://www.continuum.io/downloads) environment.
You will get a high-quality BLAS library (MKL) and you get a controlled compiler version regardless of your Linux distro.
Once you have [anaconda](https://www.continuum.io/downloads) installed, here are the instructions.
If you want to compile with CUDA support, install
- [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) 7.5 or above
- [NVIDIA CuDNN](https://developer.nvidia.com/cudnn) v5.x
- [NVIDIA CuDNN](https://developer.nvidia.com/cudnn) v5.x or above
If you want to disable CUDA support, export environment variable `NO_CUDA=1`.
#### Install optional dependencies
On Linux
```bash
export CMAKE_PREFIX_PATH=[anaconda root directory]
# Install basic dependencies
conda install numpy mkl setuptools cmake gcc cffi
conda install numpy pyyaml mkl setuptools cmake gcc cffi
# On Linux, add LAPACK support for the GPU
conda install -c soumith magma-cuda75 # or magma-cuda80 if CUDA 8.0
# Add LAPACK support for the GPU
conda install -c soumith magma-cuda80 # or magma-cuda75 if CUDA 7.5
```
On OSX
```bash
export CMAKE_PREFIX_PATH=[anaconda root directory]
conda install numpy pyyaml setuptools cmake cffi
```
#### Install PyTorch
On Linux
```bash
export MACOSX_DEPLOYMENT_TARGET=10.9 # if OSX
pip install -r requirements.txt
python setup.py install
```
On OSX
```bash
MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
```
### Docker image
Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual
```
docker build -t pytorch-cudnnv6 .
```
and run with nvidia-docker:
```
nvidia-docker run --rm -ti --ipc=host pytorch-cudnnv6
```
Please note that pytorch uses shared memory to share data between processes, so if torch multiprocessing is used (e.g.
for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you
should increase shared memory size either with --ipc=host or --shm-size command line options to nvidia-docker run.
## Getting Started
Three pointers to get you started:
- [Tutorials: notebooks to get you started with understanding and using PyTorch](https://github.com/pytorch/tutorials)
- [Tutorials: get you started with understanding and using PyTorch](http://pytorch.org/tutorials/)
- [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples)
- The API Reference: [http://pytorch.org/docs/](http://pytorch.org/docs/)
@ -176,7 +223,7 @@ Three pointers to get you started:
## Releases and Contributing
PyTorch has a 90 day release cycle (major releases).
It's current state is Beta (v0.1.6), we expect no obvious bugs. Please let us know if you encounter a bug by [filing an issue](https://github.com/pytorch/pytorch/issues).
It's current state is Beta, we expect no obvious bugs. Please let us know if you encounter a bug by [filing an issue](https://github.com/pytorch/pytorch/issues).
We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion.

View File

@ -63,11 +63,16 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
"}\n")
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
"-ccbin" ${CMAKE_CXX_COMPILER}
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(nvcc_res EQUAL 0)
# only keep the last line of nvcc_out
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
list(GET nvcc_out -1 nvcc_out)
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
endif()
@ -116,13 +121,13 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
set(add_ptx TRUE)
set(arch_name ${CMAKE_MATCH_1})
endif()
if(arch_name MATCHES "([0-9]\\.[0-9])$")
if(arch_name MATCHES "(^[0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
set(arch_ptx ${arch_bin})
else()
# Look for it in our list of known architectures
if(${arch_name} STREQUAL "Fermi")
set(arch_bin 2.0 "2.1(2.0)")
set(arch_bin "2.0 2.1(2.0)")
elseif(${arch_name} STREQUAL "Kepler+Tegra")
set(arch_bin 3.2)
elseif(${arch_name} STREQUAL "Kepler+Tesla")
@ -173,11 +178,11 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
# Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin})
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
# User explicitly specified ARCH for the concrete CODE
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()

View File

@ -12,7 +12,14 @@ BUILDDIR = build
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
docset: html
doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/pytorch-logo-flame.png --enable-js --online-redirect-url http://pytorch.org/docs/ --force $(BUILDDIR)/html/
# Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution.
cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png
convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png
.PHONY: help Makefile docset
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).

Binary file not shown.

After

Width:  |  Height:  |  Size: 1010 B

View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
height="40.200001"
width="40.200001"
xml:space="preserve"
viewBox="0 0 40.200002 40.2"
y="0px"
x="0px"
id="Layer_1"
version="1.1"><metadata
id="metadata4717"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
id="defs4715" /><style
id="style4694"
type="text/css">
.st0{fill:#F05732;}
.st1{fill:#9E529F;}
.st2{fill:#333333;}
</style><path
style="fill:#f05732"
id="path4696"
d="m 26.975479,12.199999 c -1.3,-1 -1.8,3.9 -4.4,3.9 -3,0 -4,-12.9999998 -6.3,-12.9999998 -0.7,0 -0.8,-0.4 -7.9000003,21.2999998 -2.9000001,9 4.4000003,15.8 11.8000003,15.8 4.6,0 12.3,-3 12.3,-12.6 0,-7.1 -3.5,-13.9 -5.5,-15.4 z m -6.9,23.1 c -3.7,0 -6.7,-3.1 -6.7,-7 0,-3.9 3,-7 6.7,-7 3.7,0 6.7,3.1 6.7,7 0,3.8 -3,7 -6.7,7 z"
class="st0" /><path
style="fill:#9e529f"
id="path4698"
d="m 24.075479,-7.6293945e-7 c -0.5,0 -1.8,2.49999996293945 -1.8,3.59999996293945 0,1.5 1,2 1.8,2 0.8,0 1.8,-0.5 1.8,-2 -0.1,-1.1 -1.4,-3.59999996293945 -1.8,-3.59999996293945 z"
class="st1" /></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -20,7 +20,7 @@ of a couple in-place methods, that would overwrite inputs required for
gradient computation). In most cases Tensors can be safely replaced with
Variables and the code will remain to work just fine. Because of this,
we're not documenting all the operations on variables, and you should
refere to :class:`torch.Tensor` docs for this purpose.
refer to :class:`torch.Tensor` docs for this purpose.
In-place operations on Variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -74,9 +74,11 @@ author = 'Torch Contributors'
# built documents.
#
# The short X.Y version.
version = '0.1.6'
# TODO: change to [:2] at v1.0
version = '.'.join(torch.__version__.split('+')[0].split('.')[:3])
# The full version, including alpha/beta/rc tags.
release = '0.1.6'
# TODO: verify this works as expected
release = torch.__version__.split('+')[0]
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -201,12 +203,16 @@ from docutils import nodes
from sphinx.util.docfields import TypedField
from sphinx import addnodes
def patched_make_field(self, types, domain, items):
def patched_make_field(self, types, domain, items, **kw):
# `kw` catches `env=None` needed for newer sphinx while maingaining
# backwards compatibility when passed along further down!
# type: (List, unicode, Tuple) -> nodes.field
def handle_item(fieldarg, content):
par = nodes.paragraph()
par += addnodes.literal_strong('', fieldarg) # Patch: this line added
#par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
# par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
# addnodes.literal_strong))
if fieldarg in types:
par += nodes.Text(' (')
@ -221,7 +227,7 @@ def patched_make_field(self, types, domain, items):
typename = typename.replace('float', 'python:float')
typename = typename.replace('type', 'python:type')
par.extend(self.make_xrefs(self.typerolename, domain, typename,
addnodes.literal_emphasis))
addnodes.literal_emphasis, **kw))
else:
par += fieldtype
par += nodes.Text(')')

View File

@ -5,3 +5,8 @@ torch.utils.data
.. autoclass:: Dataset
.. autoclass:: TensorDataset
.. autoclass:: DataLoader
.. autoclass:: torch.utils.data.sampler.Sampler
.. autoclass:: torch.utils.data.sampler.SequentialSampler
.. autoclass:: torch.utils.data.sampler.RandomSampler
.. autoclass:: torch.utils.data.sampler.SubsetRandomSampler
.. autoclass:: torch.utils.data.sampler.WeightedRandomSampler

View File

@ -24,6 +24,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
torch
tensors
sparse
storage
nn
optim

View File

@ -7,6 +7,12 @@ torch.nn
.. automodule:: torch.nn
.. currentmodule:: torch.nn
Parameters
----------
.. autoclass:: Parameter
:members:
Containers
----------------------------------
@ -16,6 +22,24 @@ Containers
.. autoclass:: Module
:members:
:hidden:`Sequential`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Sequential
:members:
:hidden:`ModuleList`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ModuleList
:members:
:hidden:`ParameterList`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ParameterList
:members:
Convolution Layers
----------------------------------
@ -126,6 +150,31 @@ Pooling Layers
.. autoclass:: LPPool2d
:members:
:hidden:`AdaptiveMaxPool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveMaxPool1d
:members:
:hidden:`AdaptiveMaxPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveMaxPool2d
:members:
:hidden:`AdaptiveAvgPool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool1d
:members:
:hidden:`AdaptiveAvgPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool2d
:members:
Non-linear Activations
----------------------------------
@ -253,6 +302,23 @@ Normalization layers
.. autoclass:: BatchNorm3d
:members:
:hidden:`InstanceNorm1d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: InstanceNorm1d
:members:
:hidden:`InstanceNorm2d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: InstanceNorm2d
:members:
:hidden:`InstanceNorm3d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: InstanceNorm3d
:members:
Recurrent layers
----------------------------------
@ -334,6 +400,15 @@ Sparse layers
.. autoclass:: Embedding
:members:
Distance functions
----------------------------------
:hidden:`PairwiseDistance`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: PairwiseDistance
:members:
Loss functions
----------------------------------
@ -362,6 +437,12 @@ Loss functions
.. autoclass:: NLLLoss
:members:
:hidden:`NLLLoss2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: NLLLoss2d
:members:
:hidden:`KLDivLoss`
~~~~~~~~~~~~~~~~~~~
@ -432,6 +513,19 @@ Vision layers
.. autoclass:: PixelShuffle
:members:
:hidden:`UpsamplingNearest2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingNearest2d
:members:
:hidden:`UpsamplingBilinear2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingBilinear2d
:members:
Multi-GPU layers
----------------
@ -441,6 +535,36 @@ Multi-GPU layers
.. autoclass:: DataParallel
:members:
Utilities
---------
:hidden:`clip_grad_norm`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.clip_grad_norm
.. currentmodule:: torch.nn.utils.rnn
:hidden:`PackedSequence`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.PackedSequence
:hidden:`pack_padded_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pack_padded_sequence
:hidden:`pad_packed_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pad_packed_sequence
torch.nn.functional
===================
@ -532,6 +656,27 @@ Pooling functions
.. autofunction:: lp_pool2d
:hidden:`adaptive_max_pool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_max_pool1d
:hidden:`adaptive_max_pool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_max_pool2d
:hidden:`adaptive_avg_pool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_avg_pool1d
:hidden:`adaptive_avg_pool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_avg_pool2d
Non-linear activation functions
-------------------------------
@ -655,6 +800,15 @@ Dropout functions
.. autofunction:: dropout
Distance functions
----------------------------------
:hidden:`pairwise_distance`
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pairwise_distance
Loss functions
--------------
@ -691,3 +845,25 @@ Vision functions
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pixel_shuffle
:hidden:`pad`
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pad
torch.nn.init
=============
.. currentmodule:: torch.nn.init
.. autofunction:: calculate_gain
.. autofunction:: uniform
.. autofunction:: normal
.. autofunction:: constant
.. autofunction:: eye
.. autofunction:: dirac
.. autofunction:: xavier_uniform
.. autofunction:: xavier_normal
.. autofunction:: kaiming_uniform
.. autofunction:: kaiming_normal
.. autofunction:: orthogonal
.. autofunction:: sparse

View File

@ -1,3 +1,5 @@
.. _cuda-semantics:
CUDA semantics
==============
@ -29,12 +31,15 @@ Below you can find a small example showcasing this::
b = torch.FloatTensor(1).cuda()
# a.get_device() == b.get_device() == 1
c = a + b
# c.get_device() == 1
z = x + y
# z.get_device() == 1
# z.get_device() == 0
# even within a context, you can give a GPU id to the .cuda call
c = torch.randn(2).cuda(2)
# c.get_device() == 2
d = torch.randn(2).cuda(2)
# d.get_device() == 2
Best practices
--------------
@ -57,4 +62,22 @@ Just pass an additional ``async=True`` argument to a :meth:`~torch.Tensor.cuda`
call. This can be used to overlap data transfers with computation.
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
pinned memory by passing ``pinned=True`` to its constructor.
pinned memory by passing ``pin_memory=True`` to its constructor.
.. _cuda-nn-dataparallel-instead:
Use nn.DataParallel instead of multiprocessing
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most use cases involving batched input and multiple GPUs should default to using
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
a single python process can saturate multiple GPUs.
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
However, this is a known issue that is under active development. As always,
test your use case.
There are significant caveats to using CUDA models with
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
requirements exactly, it is likely that your program will have incorrect or
undefined behavior.

View File

@ -86,6 +86,19 @@ small helper functions::
# return it.
return Linear()(input, weight, bias)
You probably want to check if the backward method you implemented actually
computes the derivatives of your function. It is possible by comparing with
numerical approximations using small finite differences::
from torch.autograd import gradcheck
# gradchek takes a tuple of tensor as input, check if your gradient
# evaluated with these tensors are close enough to numerical
# approximations and returns True if they all verify this condition.
input = (Variable(torch.randn(20,20).double(), requires_grad=True),)
test = gradcheck(Linear(), input, eps=1e-6, atol=1e-4)
print(test)
Extending :mod:`torch.nn`
-------------------------
@ -132,7 +145,7 @@ This is how a ``Linear`` module can be implemented::
# nn.Parameters can never be volatile and, different than Variables,
# they require gradients by default.
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
if bias is not None:
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
# You should always register all possible parameters, but the
@ -144,9 +157,9 @@ This is how a ``Linear`` module can be implemented::
if bias is not None:
self.bias.data.uniform_(-0.1, 0.1)
def forward(self, input):
# See the autograd section for explanation of what happens here.
return Linear()(input, self.weight, self.bias)
def forward(self, input):
# See the autograd section for explanation of what happens here.
return Linear()(input, self.weight, self.bias)
Writing custom C extensions

View File

@ -33,6 +33,8 @@ by the CUDA runtime.
kinds of data should be done with care. Note that this restriction doesn't
apply to shared CPU memory.
See also: :ref:`cuda-nn-dataparallel-instead`
Best practices and tips
-----------------------
@ -100,11 +102,6 @@ example below as well::
from model import MyModel
def train(model):
# This for loop will break sharing of gradient buffers. It's not
# necessary but it reduces the contention, and has a small memory cost
# (equal to the total size of parameters).
for param in model.parameters():
param.grad.data = param.grad.data.clone()
# Construct data_loader, optimizer, etc.
for data, labels in data_loader:
optimizer.zero_grad()

View File

@ -0,0 +1,34 @@
Serialization semantics
=======================
Best practices
--------------
.. _recommend-saving-models:
Recommended approach for saving a model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are two main approaches for serializing and restoring a model.
The first (recommended) saves and loads only the model parameters::
torch.save(the_model.state_dict(), PATH)
Then later::
the_model = TheModelClass(*args, **kwargs)
the_model.load_state_dict(torch.load(PATH))
The second saves and loads the entire model::
torch.save(the_model, PATH)
Then later::
the_model = torch.load(PATH)
However in this case, the serialized data is bound to the specific classes
and the exact directory structure used, so it can break in various ways when
used in other projects, or after some serious refactors.

View File

@ -106,6 +106,8 @@ Algorithms
:members:
.. autoclass:: ASGD
:members:
.. autoclass:: LBFGS
:members:
.. autoclass:: RMSprop
:members:
.. autoclass:: Rprop

89
docs/source/sparse.rst Normal file
View File

@ -0,0 +1,89 @@
.. currentmodule:: torch.sparse
Sparse tensors
==============
.. warning::
This API is currently experimental and may change in the near future.
Torch supports sparse tensors in COO(rdinate) format, which can
efficiently store and process tensors for which the majority of elements
are zeros.
A sparse tensor is represented as a pair of dense tensors: a tensor
which contains the actual values :class:`torch.sparse.values`, and a
tensor which contains the coordinates of those values
:class:`torch.sparse.indices`. A sparse tensor can be constructed
by providing these two tensors, as well as the size of the sparse tensor
(which cannot be inferred from these tensors!)
>>> i = torch.LongTensor([[0, 1], [2, 0]])
>>> v = torch.FloatTensor([3, 4])
>>> torch.sparse.FloatTensor(i, v, torch.Size([2,3])).to_dense()
0 0 3
4 0 0
[torch.FloatTensor of size 2x2]
You can also construct hybrid sparse tensors, where only the first n
dimensions are sparse, and the rest of the dimensions are dense.
>>> i = torch.LongTensor([[2, 4]])
>>> v = torch.FloatTensor([[1, 3], [5, 7]])
>>> torch.sparse.FloatTensor(i, v).to_dense()
0 0
0 0
1 3
0 0
5 7
[torch.FloatTensor of size 5x2]
An empty sparse tensor can be constructed by specifying its size:
>>> torch.sparse.FloatTensor(2, 3)
SparseFloatTensor of size 2x3 with indices:
[torch.LongTensor with no dimension]
and values:
[torch.FloatTensor with no dimension]
Sparse tensors can have duplicate entries for an index; such a tensor is
called non-coalesced. Duplicate entries are summed together when
coalescing (or converting to another representation). Some operations
(for example, :func:`torch.FloatTensor.add`) produce duplicate entries;
if you repeatedly perform these operations, you should coalesce your
sparse tensors to prevent them from growing too large.
.. class:: FloatTensor()
.. automethod:: add
.. automethod:: add_
.. automethod:: clone
.. automethod:: contiguous
.. automethod:: dim
.. automethod:: div
.. automethod:: div_
.. automethod:: get_device
.. automethod:: hspmm
.. automethod:: indices
.. automethod:: is_contiguous
.. automethod:: mm
.. automethod:: mul
.. automethod:: mul_
.. automethod:: nnz
.. automethod:: resizeAs_
.. automethod:: size
.. automethod:: spadd
.. automethod:: sparse_mask
.. automethod:: spmm
.. automethod:: sspaddmm
.. automethod:: sspmm
.. automethod:: sub
.. automethod:: sub_
.. automethod:: t_
.. automethod:: toDense
.. automethod:: transpose
.. automethod:: transpose_
.. automethod:: values
.. automethod:: zero_

View File

@ -14,8 +14,8 @@ Data type CPU tensor GPU tensor
32-bit floating point :class:`torch.FloatTensor` :class:`torch.cuda.FloatTensor`
64-bit floating point :class:`torch.DoubleTensor` :class:`torch.cuda.DoubleTensor`
16-bit floating point N/A :class:`torch.cuda.HalfTensor`
8-bit integer (signed) :class:`torch.ByteTensor` :class:`torch.cuda.ByteTensor`
8-bit integer (unsigned) :class:`torch.CharTensor` :class:`torch.cuda.CharTensor`
8-bit integer (unsigned) :class:`torch.ByteTensor` :class:`torch.cuda.ByteTensor`
8-bit integer (signed) :class:`torch.CharTensor` :class:`torch.cuda.CharTensor`
16-bit integer (signed) :class:`torch.ShortTensor` :class:`torch.cuda.ShortTensor`
32-bit integer (signed) :class:`torch.IntTensor` :class:`torch.cuda.IntTensor`
64-bit integer (signed) :class:`torch.LongTensor` :class:`torch.cuda.LongTensor`
@ -251,7 +251,6 @@ view of a storage and defines numeric operations on it.
.. automethod:: scatter_
.. automethod:: select
.. automethod:: set_
.. automethod:: set_index
.. automethod:: share_memory_
.. automethod:: short
.. automethod:: sigmoid

View File

@ -8,6 +8,7 @@ Tensors
.. autofunction:: is_storage
.. autofunction:: set_default_tensor_type
.. autofunction:: numel
.. autofunction:: set_printoptions
Creation Ops
@ -20,6 +21,7 @@ Creation Ops
.. autofunction:: rand
.. autofunction:: randn
.. autofunction:: randperm
.. autofunction:: arange
.. autofunction:: range
.. autofunction:: zeros
@ -37,6 +39,8 @@ Indexing, Slicing, Joining, Mutating Ops
.. autofunction:: stack
.. autofunction:: t
.. autofunction:: transpose
.. autofunction:: unbind
.. autofunction:: unsqueeze
Random sampling
@ -157,6 +161,8 @@ BLAS and LAPACK Operations
.. autofunction:: addr
.. autofunction:: baddbmm
.. autofunction:: bmm
.. autofunction:: btrifact
.. autofunction:: btrisolve
.. autofunction:: dot
.. autofunction:: eig
.. autofunction:: gels

View File

@ -3,11 +3,13 @@ torchvision.datasets
The following dataset loaders are available:
- `MNIST`_
- `COCO (Captioning and Detection)`_
- `LSUN Classification`_
- `ImageFolder`_
- `Imagenet-12`_
- `CIFAR10 and CIFAR100`_
- `STL10`_
Datasets have the API:
@ -33,6 +35,15 @@ but they all take the keyword args:
transforms it. For example, take in the caption string and return a
tensor of word indices.
MNIST
~~~~~
``dset.MNIST(root, train=True, transform=None, target_transform=None, download=False)``
- ``root`` : root directory of dataset where ``processed/training.pt`` and ``processed/test.pt`` exist.
- ``train`` : ``True`` = Training set, ``False`` = Test set
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, place the processed dataset (function available in mnist.py) in the ``processed`` folder.
COCO
~~~~
@ -82,11 +93,42 @@ LSUN
``dset.LSUN(db_path, classes='train', [transform, target_transform])``
- db\_path = root directory for the database files
- classes =
- train - all categories, training set
- val - all categories, validation set
- test - all categories, test set
- [bedroom\_train, church\_train, …] : a list of categories to load
- ``classes`` = ``train`` (all categories, training set), ``val`` (all categories, validation set), ``test`` (all categories, test set)
- [``bedroom\_train``, ``church\_train``, …] : a list of categories to load
ImageFolder
~~~~~~~~~~~
A generic data loader where the images are arranged in this way:
::
root/dog/xxx.png
root/dog/xxy.png
root/dog/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
``dset.ImageFolder(root="root folder path", [transform, target_transform])``
It has the members:
- ``self.classes`` - The class names as a list
- ``self.class_to_idx`` - Corresponding class indices
- ``self.imgs`` - The list of (image path, class-index) tuples
Imagenet-12
~~~~~~~~~~~
This is simply implemented with an ImageFolder dataset.
The data is preprocessed `as described
here <https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset>`__
`Here is an
example <https://github.com/pytorch/examples/blob/27e2a46c1d1505324032b1d94fc6ce24d5b67e97/imagenet/main.py#L48-L62>`__.
CIFAR
~~~~~
@ -99,11 +141,22 @@ CIFAR
``cifar-10-batches-py``
- ``train`` : ``True`` = Training set, ``False`` = Test set
- ``download`` : ``True`` = downloads the dataset from the internet and
puts it in root directory. If dataset already downloaded, do
puts it in root directory. If dataset already downloaded, doesn't do anything.
STL10
~~~~~
``dset.STL10(root, split='train', transform=None, target_transform=None, download=False)``
- ``root`` : root directory of dataset where there is folder ``stl10_binary``
- ``split`` : ``'train'`` = Training set, ``'test'`` = Test set, ``'unlabeled'`` = Unlabeled set, ``'train+unlabeled'`` = Training + Unlabeled set (missing label marked as ``-1``)
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, doesn't do anything.
.. _MNIST: #mnist
.. _COCO (Captioning and Detection): #coco
.. _LSUN Classification: #lsun
.. _ImageFolder: #imagefolder
.. _Imagenet-12: #imagenet-12
.. _CIFAR10 and CIFAR100: #cifar
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI
.. _STL10: #stl10
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI

View File

@ -3,3 +3,6 @@ torchvision
The :mod:`torchvision` package consists of popular datasets, model
architectures, and common image transformations for computer vision.
.. automodule:: torchvision
:members:

View File

@ -33,7 +33,7 @@ Conversion Transforms
.. autoclass:: ToPILImage
Generic Transofrms
Generic Transforms
------------------
.. autoclass:: Lambda

267
setup.py
View File

@ -1,6 +1,9 @@
from setuptools import setup, Extension, distutils, Command, find_packages
import setuptools.command.build_ext
import setuptools.command.install
import setuptools.command.develop
import setuptools.command.build_py
import distutils.unixccompiler
import distutils.command.build
import distutils.command.clean
import platform
@ -13,18 +16,28 @@ from tools.setup_helpers.env import check_env_flag
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME
from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR
DEBUG = check_env_flag('DEBUG')
WITH_DISTRIBUTED = check_env_flag('WITH_DISTRIBUTED')
WITH_DISTRIBUTED_MW = WITH_DISTRIBUTED and check_env_flag('WITH_DISTRIBUTED_MW')
WITH_NCCL = WITH_CUDA and platform.system() != 'Darwin'
SYSTEM_NCCL = False
################################################################################
# Monkey-patch setuptools to compile in parallel
################################################################################
original_link = distutils.unixccompiler.UnixCCompiler.link
def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
def parallelCCompile(self, sources, output_dir=None, macros=None,
include_dirs=None, debug=0, extra_preargs=None,
extra_postargs=None, depends=None):
# those lines are copied from distutils.ccompiler.CCompiler directly
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs)
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
# compile using a thread pool
import multiprocessing.pool
def _single_compile(obj):
src, ext = build[obj]
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
@ -33,12 +46,23 @@ def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=N
return objects
def patched_link(self, *args, **kwargs):
_cxx = self.compiler_cxx
self.compiler_cxx = None
result = original_link(self, *args, **kwargs)
self.compiler_cxx = _cxx
return result
distutils.ccompiler.CCompiler.compile = parallelCCompile
distutils.unixccompiler.UnixCCompiler.link = patched_link
################################################################################
# Custom build commands
################################################################################
class build_deps(Command):
user_options = []
@ -53,6 +77,10 @@ class build_deps(Command):
build_all_cmd = ['bash', 'torch/lib/build_all.sh']
if WITH_CUDA:
build_all_cmd += ['--with-cuda']
if WITH_NCCL and not SYSTEM_NCCL:
build_all_cmd += ['--with-nccl']
if WITH_DISTRIBUTED:
build_all_cmd += ['--with-distributed']
if subprocess.call(build_all_cmd) != 0:
sys.exit(1)
generate_nn_wrappers()
@ -72,7 +100,30 @@ class build_module(Command):
self.run_command('build_ext')
class build_py(setuptools.command.build_py.build_py):
def run(self):
self.create_version_file()
setuptools.command.build_py.build_py.run(self)
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'torch', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
class develop(setuptools.command.develop.develop):
def run(self):
build_py.create_version_file()
setuptools.command.develop.develop.run(self)
class build_ext(setuptools.command.build_ext.build_ext):
def run(self):
# Print build options
if WITH_NUMPY:
@ -87,6 +138,12 @@ class build_ext(setuptools.command.build_ext.build_ext):
print('-- Detected CUDA at ' + CUDA_HOME)
else:
print('-- Not using CUDA')
if WITH_NCCL and SYSTEM_NCCL:
print('-- Using system provided NCCL library')
elif WITH_NCCL:
print('-- Building NCCL library')
else:
print('-- Not using NCCL')
# cwrap depends on pyyaml, so we can't import it earlier
from tools.cwrap import cwrap
@ -97,10 +154,11 @@ class build_ext(setuptools.command.build_ext.build_ext):
from tools.cwrap.plugins.KwargsPlugin import KwargsPlugin
from tools.cwrap.plugins.NullableArguments import NullableArguments
from tools.cwrap.plugins.CuDNNPlugin import CuDNNPlugin
from tools.cwrap.plugins.WrapDim import WrapDim
thp_plugin = THPPlugin()
cwrap('torch/csrc/generic/TensorMethods.cwrap', plugins=[
BoolOption(), thp_plugin, AutoGPU(condition='IS_CUDA'),
ArgcountSortPlugin(), KwargsPlugin()
ArgcountSortPlugin(), KwargsPlugin(), WrapDim()
])
cwrap('torch/csrc/cudnn/cuDNN.cwrap', plugins=[
CuDNNPlugin(), NullableArguments()
@ -116,6 +174,7 @@ class build(distutils.command.build.build):
class install(setuptools.command.install.install):
def run(self):
if not self.skip_build:
self.run_command('build_deps')
@ -123,6 +182,7 @@ class install(setuptools.command.install.install):
class clean(distutils.command.clean.clean):
def run(self):
import glob
with open('.gitignore', 'r') as f:
@ -138,12 +198,12 @@ class clean(distutils.command.clean.clean):
distutils.command.clean.clean.run(self)
################################################################################
# Configure compile flags
################################################################################
include_dirs = []
library_dirs = []
extra_link_args = []
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
@ -161,45 +221,67 @@ include_dirs += [
tmp_install_path + "/include",
tmp_install_path + "/include/TH",
tmp_install_path + "/include/THPP",
tmp_install_path + "/include/THNN",
]
extra_link_args.append('-L' + lib_path)
library_dirs.append(lib_path)
# we specify exact lib names to avoid conflict with lua-torch installs
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
THS_LIB = os.path.join(lib_path, 'libTHS.so.1')
THC_LIB = os.path.join(lib_path, 'libTHC.so.1')
THCS_LIB = os.path.join(lib_path, 'libTHCS.so.1')
THNN_LIB = os.path.join(lib_path, 'libTHNN.so.1')
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
THS_LIB = os.path.join(lib_path, 'libTHS.so.1')
THC_LIB = os.path.join(lib_path, 'libTHC.so.1')
THCS_LIB = os.path.join(lib_path, 'libTHCS.so.1')
THNN_LIB = os.path.join(lib_path, 'libTHNN.so.1')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.so.1')
THPP_LIB = os.path.join(lib_path, 'libTHPP.so.1')
THPP_LIB = os.path.join(lib_path, 'libTHPP.so.1')
THD_LIB = os.path.join(lib_path, 'libTHD.so.1')
NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1')
if platform.system() == 'Darwin':
TH_LIB = os.path.join(lib_path, 'libTH.1.dylib')
THS_LIB = os.path.join(lib_path, 'libTHS.1.dylib')
THC_LIB = os.path.join(lib_path, 'libTHC.1.dylib')
THCS_LIB = os.path.join(lib_path, 'libTHCS.1.dylib')
THNN_LIB = os.path.join(lib_path, 'libTHNN.1.dylib')
TH_LIB = os.path.join(lib_path, 'libTH.1.dylib')
THS_LIB = os.path.join(lib_path, 'libTHS.1.dylib')
THC_LIB = os.path.join(lib_path, 'libTHC.1.dylib')
THCS_LIB = os.path.join(lib_path, 'libTHCS.1.dylib')
THNN_LIB = os.path.join(lib_path, 'libTHNN.1.dylib')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.1.dylib')
THPP_LIB = os.path.join(lib_path, 'libTHPP.1.dylib')
THPP_LIB = os.path.join(lib_path, 'libTHPP.1.dylib')
THD_LIB = os.path.join(lib_path, 'libTHD.1.dylib')
NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib')
if WITH_NCCL and subprocess.call('ldconfig -p | grep libnccl >/dev/null', shell=True) == 0:
SYSTEM_NCCL = True
main_compile_args = ['-D_THP_CORE']
main_libraries = ['shm']
main_link_args = [TH_LIB, THS_LIB, THPP_LIB]
main_link_args = [TH_LIB, THS_LIB, THPP_LIB, THNN_LIB]
main_sources = [
"torch/csrc/PtrWrapper.cpp",
"torch/csrc/Module.cpp",
"torch/csrc/Generator.cpp",
"torch/csrc/Size.cpp",
"torch/csrc/Exceptions.cpp",
"torch/csrc/Tensor.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/DynamicTypes.cpp",
"torch/csrc/byte_order.cpp",
"torch/csrc/utils.cpp",
"torch/csrc/utils/object_ptr.cpp",
"torch/csrc/utils/tuple_parser.cpp",
"torch/csrc/allocators.cpp",
"torch/csrc/serialization.cpp",
"torch/csrc/autograd/init.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/engine.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/grad_buffer.cpp",
"torch/csrc/autograd/python_function.cpp",
"torch/csrc/autograd/python_cpp_function.cpp",
"torch/csrc/autograd/python_variable.cpp",
"torch/csrc/autograd/python_engine.cpp",
"torch/csrc/autograd/python_hook.cpp",
"torch/csrc/autograd/functions/batch_normalization.cpp",
"torch/csrc/autograd/functions/convolution.cpp",
"torch/csrc/autograd/functions/init.cpp",
"torch/csrc/nn/THNN_generic.cpp",
]
try:
@ -210,6 +292,20 @@ try:
except ImportError:
WITH_NUMPY = False
if WITH_DISTRIBUTED:
extra_compile_args += ['-DWITH_DISTRIBUTED']
main_sources += [
"torch/csrc/distributed/Module.cpp",
"torch/csrc/distributed/utils.cpp",
]
if WITH_DISTRIBUTED_MW:
main_sources += [
"torch/csrc/distributed/Tensor.cpp",
"torch/csrc/distributed/Storage.cpp",
]
include_dirs += [tmp_install_path + "/include/THD"]
main_link_args += [THD_LIB]
if WITH_CUDA:
cuda_lib_dirs = ['lib64', 'lib']
cuda_include_path = os.path.join(CUDA_HOME, 'include')
@ -218,11 +314,13 @@ if WITH_CUDA:
if os.path.exists(cuda_lib_path):
break
include_dirs.append(cuda_include_path)
extra_link_args.append('-L' + cuda_lib_path)
include_dirs.append(tmp_install_path + "/include/THCUNN")
library_dirs.append(cuda_lib_path)
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
extra_compile_args += ['-DWITH_CUDA']
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
main_link_args += [THC_LIB, THCS_LIB]
main_libraries += ['cudart']
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
main_sources += [
"torch/csrc/cuda/Module.cpp",
"torch/csrc/cuda/Storage.cpp",
@ -233,18 +331,23 @@ if WITH_CUDA:
"torch/csrc/cuda/serialization.cpp",
]
if WITH_NCCL:
if SYSTEM_NCCL:
main_libraries += ['nccl']
else:
main_link_args += [NCCL_LIB]
extra_compile_args += ['-DWITH_NCCL']
if WITH_CUDNN:
main_libraries += ['cudnn']
include_dirs.append(CUDNN_INCLUDE_DIR)
extra_link_args.append('-L' + CUDNN_LIB_DIR)
library_dirs.append(CUDNN_LIB_DIR)
main_sources += [
"torch/csrc/cudnn/Module.cpp",
"torch/csrc/cudnn/BatchNorm.cpp",
"torch/csrc/cudnn/Conv.cpp",
"torch/csrc/cudnn/cuDNN.cpp",
"torch/csrc/cudnn/Types.cpp",
"torch/csrc/cudnn/Handles.cpp",
"torch/csrc/cudnn/CppWrapper.cpp",
]
extra_compile_args += ['-DWITH_CUDNN']
@ -267,70 +370,82 @@ extensions = []
packages = find_packages(exclude=('tools.*',))
C = Extension("torch._C",
libraries=main_libraries,
sources=main_sources,
language='c++',
extra_compile_args=main_compile_args + extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
)
libraries=main_libraries,
sources=main_sources,
language='c++',
extra_compile_args=main_compile_args + extra_compile_args,
include_dirs=include_dirs,
library_dirs=library_dirs,
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
)
extensions.append(C)
DL = Extension("torch._dl",
sources=["torch/csrc/dl.c"],
language='c',
)
sources=["torch/csrc/dl.c"],
language='c',
)
extensions.append(DL)
THNN = Extension("torch._thnn._THNN",
sources=['torch/csrc/nn/THNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THNN_LIB,
make_relative_rpath('../lib'),
]
)
sources=['torch/csrc/nn/THNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THNN_LIB,
make_relative_rpath('../lib'),
]
)
extensions.append(THNN)
if WITH_CUDA:
THCUNN = Extension("torch._thnn._THCUNN",
sources=['torch/csrc/nn/THCUNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THC_LIB,
THCUNN_LIB,
make_relative_rpath('../lib'),
]
)
sources=['torch/csrc/nn/THCUNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THC_LIB,
THCUNN_LIB,
make_relative_rpath('../lib'),
]
)
extensions.append(THCUNN)
version="0.1"
version = '0.1.12'
if os.getenv('PYTORCH_BUILD_VERSION'):
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
version = os.getenv('PYTORCH_BUILD_VERSION') \
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
else:
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except subprocess.CalledProcessError:
pass
setup(name="torch", version=version,
ext_modules=extensions,
cmdclass = {
'build': build,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'install': install,
'clean': clean,
},
packages=packages,
package_data={'torch': [
'lib/*.so*', 'lib/*.dylib*',
'lib/torch_shm_manager',
'lib/*.h',
'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
install_requires=['pyyaml'],
)
description="Tensors and Dynamic neural networks in Python with strong GPU acceleration",
ext_modules=extensions,
cmdclass={
'build': build,
'build_py': build_py,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'develop': develop,
'install': install,
'clean': clean,
},
packages=packages,
package_data={'torch': [
'lib/*.so*', 'lib/*.dylib*',
'lib/torch_shm_manager',
'lib/*.h',
'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
install_requires=['pyyaml'],
)

View File

@ -1,17 +1,30 @@
import sys
import os
import argparse
import unittest
import warnings
import contextlib
from functools import wraps
from itertools import product
from copy import deepcopy
import torch
import torch.cuda
from torch.autograd import Variable, Function
from torch.autograd import Variable
torch.set_default_tensor_type('torch.DoubleTensor')
torch.manual_seed(123)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(123)
def run_tests():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--seed', type=int, default=123)
args, remaining = parser.parse_known_args()
torch.manual_seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(args.seed)
remaining = [sys.argv[0]] + remaining
unittest.main(argv=remaining)
TEST_NUMPY = True
@ -20,6 +33,33 @@ try:
except ImportError:
TEST_NUMPY = False
TEST_SCIPY = True
try:
import scipy
except ImportError:
TEST_SCIPY = False
def skipIfNoLapack(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as e:
if 'Lapack library not found' in e.args[0]:
raise unittest.SkipTest('Compiled without Lapack')
raise
return wrapper
def suppress_warnings(fn):
def wrapper(*args, **kwargs):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fn(*args, **kwargs)
return wrapper
def get_cpu_type(t):
assert t.__module__ == 'torch.cuda'
return getattr(torch, t.__class__.__name__)
@ -78,6 +118,39 @@ def is_iterable(obj):
class TestCase(unittest.TestCase):
precision = 1e-5
def assertTensorsSlowEqual(self, x, y, prec=None, message=''):
max_err = 0
self.assertEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertLessEqual(max_err, prec, message)
def safeCoalesce(self, t):
tc = t.coalesce()
value_map = {}
for idx, val in zip(t.indices().t(), t.values()):
idx_tup = tuple(idx)
if idx_tup in value_map:
value_map[idx_tup] += val
else:
value_map[idx_tup] = val.clone() if torch.is_tensor(val) else val
new_indices = sorted(list(value_map.keys()))
new_values = [value_map[idx] for idx in new_indices]
if t.values().ndimension() < 2:
new_values = t.values().new(new_values)
else:
new_values = torch.stack(new_values)
new_indices = t.indices().new(new_indices).t()
tg = t.new(new_indices, new_values, t.size())
self.assertEqual(tc.indices(), tg.indices())
self.assertEqual(tc.values(), tg.values())
return tg
def assertEqual(self, x, y, prec=None, message=''):
if prec is None:
prec = self.precision
@ -87,11 +160,28 @@ class TestCase(unittest.TestCase):
y = y.data
if torch.is_tensor(x) and torch.is_tensor(y):
max_err = 0
super(TestCase, self).assertEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertLessEqual(max_err, prec, message)
def assertTensorsEqual(a, b):
super(TestCase, self).assertEqual(a.size(), b.size())
if a.numel() > 0:
b = b.type_as(a)
b = b.cuda(device=a.get_device()) if a.is_cuda else b.cpu()
# check that NaNs are in the same locations
nan_mask = a != a
self.assertTrue(torch.equal(nan_mask, b != b))
diff = a - b
diff[nan_mask] = 0
if diff.is_signed():
diff = diff.abs()
max_err = diff.max()
self.assertLessEqual(max_err, prec, message)
self.assertEqual(x.is_sparse, y.is_sparse, message)
if x.is_sparse:
x = self.safeCoalesce(x)
y = self.safeCoalesce(y)
assertTensorsEqual(x.indices(), y.indices())
assertTensorsEqual(x.values(), y.values())
else:
assertTensorsEqual(x, y)
elif type(x) == str and type(y) == str:
super(TestCase, self).assertEqual(x, y)
elif is_iterable(x) and is_iterable(y):
@ -114,12 +204,19 @@ class TestCase(unittest.TestCase):
y = y.data
if torch.is_tensor(x) and torch.is_tensor(y):
max_err = 0
if x.size() != y.size():
super(TestCase, self).assertNotEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertGreaterEqual(max_err, prec, message)
self.assertGreater(x.numel(), 0)
y = y.type_as(x)
y = y.cuda(device=x.get_device()) if x.is_cuda else y.cpu()
nan_mask = x != x
if torch.equal(nan_mask, y != y):
diff = x - y
if diff.is_signed():
diff = diff.abs()
diff[nan_mask] = 0
max_err = diff.max()
self.assertGreaterEqual(max_err, prec, message)
elif type(x) == str and type(y) == str:
super(TestCase, self).assertNotEqual(x, y)
elif is_iterable(x) and is_iterable(y):
@ -139,65 +236,23 @@ class TestCase(unittest.TestCase):
raise AssertionError("object not found in iterable")
def make_jacobian(input, num_out):
if isinstance(input, Variable) and not input.requires_grad:
return None
if torch.is_tensor(input) or isinstance(input, Variable):
return torch.zeros(input.nelement(), num_out)
def download_file(url, path, binary=True):
if sys.version_info < (3,):
import urllib2
request = urllib2
error = urllib2
else:
return type(input)(filter(lambda x: x is not None,
(make_jacobian(elem, num_out) for elem in input)))
import urllib.request
import urllib.error
request = urllib.request
error = urllib.error
def iter_tensors(x, only_requiring_grad=False):
if torch.is_tensor(x):
yield x
elif isinstance(x, Variable):
if x.requires_grad or not only_requiring_grad:
yield x.data
else:
for elem in x:
for result in iter_tensors(elem, only_requiring_grad):
yield result
def contiguous(input):
if torch.is_tensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous()
else:
return type(input)(contiguous(e) for e in input)
def get_numerical_jacobian(fn, input, target):
perturbation = 1e-6
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target, True)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.DoubleTensor(output_size)
outb = torch.DoubleTensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nelement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - perturbation
outa.copy_(fn(input))
flat_tensor[i] = orig + perturbation
outb.copy_(fn(input))
flat_tensor[i] = orig
outb.add_(-1,outa).div_(2*perturbation)
d_tensor[i] = outb
return jacobian
if os.path.exists(path):
return True
try:
data = request.urlopen(url, timeout=15).read()
with open(path, 'wb' if binary else 'w') as f:
f.write(data)
return True
except error.URLError as e:
return False

View File

@ -2,11 +2,13 @@ import sys
import tempfile
import unittest
from copy import deepcopy
from itertools import product
import torch
import torch.cuda
from torch.autograd import Variable
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
from common import TestCase, to_gpu, freeze_rng_state
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors, contiguous
import torch.backends.cudnn
# tarfile module tries to obtain a file object name in python 3.3
@ -18,6 +20,7 @@ else:
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version()
PRECISION = 1e-5
module_tests = [
@ -25,14 +28,14 @@ module_tests = [
module_name='Linear',
constructor_args=(10, 8),
input_size=(4, 10),
reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
),
dict(
module_name='Linear',
constructor_args=(10, 8, False),
input_size=(4, 10),
desc='no_bias',
reference_fn=lambda i,p: torch.mm(i, p[0].t())
reference_fn=lambda i, p: torch.mm(i, p[0].t())
),
dict(
module_name='Threshold',
@ -72,7 +75,7 @@ module_tests = [
dict(
module_name='Hardtanh',
input_size=(3, 2, 5),
reference_fn=lambda i,_: i.clamp(-1, 1)
reference_fn=lambda i, _: i.clamp(-1, 1)
),
dict(
module_name='Sigmoid',
@ -85,17 +88,23 @@ module_tests = [
dict(
module_name='Softmax',
input_size=(10, 20),
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
),
dict(
module_name='Softmax2d',
input_size=(1, 3, 10, 20),
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i))
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i))
),
dict(
module_name='LogSoftmax',
input_size=(10, 20),
reference_fn=lambda i,_: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
),
dict(
module_name='LogSoftmax',
input_size=(1, 3, 10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand_as(i)).log_(),
desc='multiparam'
),
dict(
module_name='ELU',
@ -124,18 +133,18 @@ module_tests = [
dict(
module_name='LogSigmoid',
input_size=(2, 3, 4),
reference_fn=lambda i,_: i.sigmoid().log()
reference_fn=lambda i, _: i.sigmoid().log()
),
dict(
module_name='Softplus',
input_size=(10, 20),
reference_fn=lambda i,_: torch.log(1 + torch.exp(i))
reference_fn=lambda i, _: torch.log(1 + torch.exp(i))
),
dict(
module_name='Softplus',
constructor_args=(2,),
input_size=(10, 20),
reference_fn=lambda i,_: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
desc='beta'
),
dict(
@ -155,18 +164,47 @@ module_tests = [
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5)
input_size=(2, 3, 4),
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
desc='1d',
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4),
desc='1d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5),
desc='2d',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4, 5),
desc='multiparam'
desc='2d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5, 6),
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
desc='3d',
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4, 5, 6),
desc='3d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='Softsign',
input_size=(3, 2, 5),
reference_fn=lambda i,_: i.div(1 + torch.abs(i))
reference_fn=lambda i, _: i.div(1 + torch.abs(i))
),
dict(
module_name='Softmin',
@ -181,11 +219,11 @@ module_tests = [
criterion_tests = [
dict(module_name='L1Loss',
input_size=(2, 3, 4),
target=torch.randn(2, 3, 4),
reference_fn=lambda i,t,_: 1./i.numel() * \
sum((a-b).abs().sum() for a,b in zip(i, t))
),
input_size=(2, 3, 4),
target=torch.randn(2, 3, 4),
reference_fn=lambda i, t, _: 1. / i.numel() *
sum((a - b).abs().sum() for a, b in zip(i, t))
),
dict(
module_name='NLLLoss',
input=torch.rand(15, 10).log(),
@ -207,7 +245,7 @@ criterion_tests = [
module_name='MSELoss',
input=torch.randn(2, 3, 4, 5),
target=torch.randn(2, 3, 4, 5),
reference_fn=lambda i,t,_: (i-t).abs().pow(2).sum() / i.numel()
reference_fn=lambda i, t, _: (i - t).abs().pow(2).sum() / i.numel()
),
dict(
module_name='BCELoss',
@ -238,6 +276,13 @@ criterion_tests = [
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long()
),
dict(
module_name='NLLLoss2d',
constructor_args=(torch.rand(3),),
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long(),
desc='weights'
),
dict(
module_name='HingeEmbeddingLoss',
input=torch.rand(10),
@ -321,15 +366,19 @@ class NNTestCase(TestCase):
def _flatten_tensors(self, x):
if torch.is_tensor(x):
return x.view(-1)
if x.is_sparse:
return x.to_dense().view(-1)
else:
return x.view(-1)
elif isinstance(x, Variable):
return x.data.view(-1)
return self._flatten_tensors(x.data)
else:
return tuple(self._flatten_tensors(a) for a in x)
def _zero_grad_input(self, input):
if isinstance(input, Variable):
input.grad.data.zero_()
if input.requires_grad and input.grad is not None:
input.grad.data.zero_()
elif torch.is_tensor(input):
return
else:
@ -364,9 +413,9 @@ class NNTestCase(TestCase):
if jacobian_input:
for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)):
jacobian_x[:,i] = d_x
jacobian_x[:, i] = d_x
if jacobian_parameters:
jacobian_param[:,i] = torch.cat(self._flatten_tensors(d_param), 0)
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
res = tuple()
if jacobian_input:
@ -393,9 +442,9 @@ class NNTestCase(TestCase):
# TODO: enable non-contig tests
input = contiguous(input)
if jacobian_input:
res += get_numerical_jacobian(fw, input, input),
res += get_numerical_jacobian(fw, input, input, eps=1e-6),
if jacobian_parameters:
res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0),
res += torch.cat(list(get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param), 0),
return res
def check_jacobian(self, module, input, jacobian_input=True):
@ -427,7 +476,7 @@ class NNTestCase(TestCase):
fx1 = self._forward_criterion(criterion, input, target)
x[i] = original - eps
fx2 = self._forward_criterion(criterion, input, target)
deriv = (fx1 - fx2) / (2.*eps)
deriv = (fx1 - fx2) / (2. * eps)
d_x[i] = deriv
x[i] = original
@ -441,8 +490,9 @@ class NNTestCase(TestCase):
class TestBase(object):
def __init__(self, constructor, constructor_args=tuple(), input_size=None,
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
if input_size is None and input is None:
raise RuntimeError("Specify either an input tensor, or it's size!")
self.constructor = constructor
@ -490,6 +540,7 @@ class TestBase(object):
class ModuleTest(TestBase):
def __init__(self, *args, **kwargs):
super(ModuleTest, self).__init__(*args, **kwargs)
self.jacobian_input = kwargs.get('jacobian_input', True)
@ -507,6 +558,8 @@ class ModuleTest(TestBase):
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
test_case.assertEqual(out, expected_out)
self.test_noncontig(test_case, module, input)
# TODO: do this with in-memory files as soon as torch.save will support it
with TemporaryFile() as f:
test_case._forward(module, input)
@ -517,6 +570,51 @@ class ModuleTest(TestBase):
self._do_test(test_case, module, input)
def noncontiguize(self, obj):
if isinstance(obj, list):
return [self.noncontiguize(o) for o in obj]
tensor = obj.data if isinstance(obj, Variable) else obj
ndim = tensor.dim()
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
if isinstance(obj, Variable):
return Variable(noncontig, requires_grad=obj.requires_grad)
return noncontig
def test_noncontig(self, test_case, module, input):
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(input)
with freeze_rng_state():
output = test_case._forward(module, input)
grad_output = output
if isinstance(grad_output, Variable):
grad_output = grad_output.data.clone()
else:
grad_output = grad_output.clone()
output = output.clone()
grad_output.normal_()
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
d_param = deepcopy(test_case._get_parameters(module)[1])
nc_input = self.noncontiguize(input)
nc_grad_output = self.noncontiguize(grad_output)
for contig_i, contig_g in product((True, False), repeat=2):
i = input if contig_i else nc_input
go = grad_output if contig_g else nc_grad_output
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(i)
with freeze_rng_state():
try:
out = test_case._forward(module, i)
except Exception:
# Some modules will fail because of non contiguous inputs and we're ok with that
continue
grad = test_case._backward(module, i, out, go)
test_case.assertEqual(out, output)
test_case.assertEqual(grad, d_input, 1e-4)
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
def test_cuda(self, test_case):
if not TEST_CUDA or not self.should_test_cuda:
raise unittest.SkipTest('Excluded from CUDA tests')
@ -527,8 +625,6 @@ class ModuleTest(TestBase):
cpu_module = self.constructor(*self.constructor_args)
gpu_module = self.constructor(*self.constructor_args).float().cuda()
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
cpu_param = test_case._get_parameters(cpu_module)
gpu_param = test_case._get_parameters(gpu_module)
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
@ -538,6 +634,10 @@ class ModuleTest(TestBase):
gpu_p = gpu_p.data
gpu_p.copy_(cpu_p)
test_case._zero_grad_input(cpu_input)
test_case._zero_grad_input(gpu_input)
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
cpu_output = test_case._forward(cpu_module, cpu_input)
gpu_output = test_case._forward(gpu_module, gpu_input)
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
@ -551,6 +651,8 @@ class ModuleTest(TestBase):
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
self.test_noncontig(test_case, gpu_module, gpu_input)
except NotImplementedError:
pass
# TODO: remove this after CUDA scatter_ is implemented
@ -562,6 +664,7 @@ class ModuleTest(TestBase):
class CriterionTest(TestBase):
def __init__(self, *args, **kwargs):
super(CriterionTest, self).__init__(*args, **kwargs)
self.target = self._get_target(kwargs['target'])
@ -584,7 +687,7 @@ class CriterionTest(TestBase):
if isinstance(target, Variable):
target = target.data
expected_out = self.reference_fn(deepcopy(self._unpack_input(input)),
deepcopy(target), module)
deepcopy(target), module)
test_case.assertEqual(out, expected_out)
test_case.check_criterion_jacobian(module, input, self.target)
@ -607,10 +710,10 @@ class CriterionTest(TestBase):
cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target)
gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target)
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
test_case.assertEqual(cpu_output, gpu_output, 4e-4)
cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target)
gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 4e-4)
except NotImplementedError:
pass

View File

@ -2,6 +2,7 @@ import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)

View File

@ -2,6 +2,7 @@ import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)

View File

@ -1,5 +1,6 @@
import torch
def check_error(desc, fn, *required_substrings):
try:
fn()
@ -16,54 +17,55 @@ def check_error(desc, fn, *required_substrings):
assert False, "given function ({}) didn't raise an error".format(desc)
check_error(
'Wrong argument types',
lambda: torch.FloatStorage(object()),
'object')
'Wrong argument types',
lambda: torch.FloatStorage(object()),
'object')
check_error('Unknown keyword argument',
lambda: torch.FloatStorage(content=1234.),
'keyword')
lambda: torch.FloatStorage(content=1234.),
'keyword')
check_error('Invalid types inside a sequence',
lambda: torch.FloatStorage(['a', 'b']),
'list', 'str')
lambda: torch.FloatStorage(['a', 'b']),
'list', 'str')
check_error('Invalid size type',
lambda: torch.FloatStorage(1.5),
'float')
lambda: torch.FloatStorage(1.5),
'float')
check_error('Invalid offset',
lambda: torch.FloatStorage(torch.FloatStorage(2), 4),
'2', '4')
lambda: torch.FloatStorage(torch.FloatStorage(2), 4),
'2', '4')
check_error('Negative offset',
lambda: torch.FloatStorage(torch.FloatStorage(2), -1),
'2', '-1')
lambda: torch.FloatStorage(torch.FloatStorage(2), -1),
'2', '-1')
check_error('Invalid size',
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, 5),
'2', '1', '5')
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, 5),
'2', '1', '5')
check_error('Negative size',
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, -5),
'2', '1', '-5')
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, -5),
'2', '1', '-5')
check_error('Invalid index type',
lambda: torch.FloatStorage(10)['first item'],
'str')
lambda: torch.FloatStorage(10)['first item'],
'str')
def assign():
torch.FloatStorage(10)[1:-1] = '1'
check_error('Invalid value type',
assign,
'str')
assign,
'str')
check_error('resize_ with invalid type',
lambda: torch.FloatStorage(10).resize_(1.5),
'float')
lambda: torch.FloatStorage(10).resize_(1.5),
'float')
check_error('fill_ with invalid type',
lambda: torch.IntStorage(10).fill_('asdf'),
'str')
lambda: torch.IntStorage(10).fill_('asdf'),
'str')
# TODO: frombuffer

View File

@ -1,5 +1,5 @@
# th test.lua > lua.out
th test.lua > lua.out
python3 test.py > python.out
diff lua.out python.out >/dev/null 2>&1

File diff suppressed because it is too large Load Diff

View File

@ -1,39 +0,0 @@
assert(arg[1])
funcs = {
'resizeAs', 'add', 'zero', 'mul', 'div', 'abs',
'addcmul', 'addcdiv', 'copy', 'sqrt', 'fill',
{'cmul', 'mul'},
{'cdiv', 'div'},
}
for _, val in pairs(funcs) do
local name, newname
if type(val) == 'table' then
name = val[1]
newname = val[2]
else
name = val
newname = val .. '_'
end
command = "sed -i -r "
.. "'/torch\\." .. name .. "\\(/b; " -- short-circuits
.. "s/([a-zA-Z]*)\\." .. name .. "\\(" -- substitution
.. "/"
.. "\\1\\." .. newname .. "\\(/g' " .. arg[1]
print(command)
os.execute(command)
command = "sed -i 's/math\\." .. newname
.. "/math\\." .. name .. "/' " .. arg[1]
print(command)
os.execute(command)
end
funcs = {
{'torch\.cmul', 'torch\.mul'},
{'torch\.cdiv', 'torch\.div'},
}
for _, val in pairs(funcs) do
command = "sed -i 's/" .. val[1] .. "/" .. val[2] .. "/' " .. arg[1]
print(command)
os.execute(command)
end

33
test/optim/test.lua Normal file
View File

@ -0,0 +1,33 @@
local cjson = require 'cjson'
require 'optim'
function rosenbrock(t)
x, y = t[1], t[2]
return (1 - x) ^ 2 + 100 * (y - x^2)^2
end
function drosenbrock(t)
x, y = t[1], t[2]
return torch.DoubleTensor({-400 * x * (y - x^2) - 2 * (1 - x), 200 * x * (y - x^2)})
end
local fd = io.open('tests.json', 'r')
local tests = cjson.decode(fd:read('*a'))
fd:close()
for i, test in ipairs(tests) do
print(test.algorithm)
algorithm = optim[test.algorithm]
for i, config in ipairs(test.config) do
print('================================================================================')
params = torch.DoubleTensor({1.5, 1.5})
for i = 1, 100 do
function closure(x)
return rosenbrock(x), drosenbrock(x)
end
algorithm(closure, params, config)
print(string.format('%.8f\t%.8f', params[1], params[2]))
end
end
end

View File

@ -3,13 +3,15 @@ import torch
import torch.legacy.optim as optim
from pprint import pprint
def rosenbrock(tensor):
x, y = tensor
return (1 - x)**2 + 100 * (y - x**2)**2
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
def drosenbrock(tensor):
x, y = tensor
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * x * (y - x**2)))
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * x * (y - x ** 2)))
algorithms = {
'adadelta': optim.adadelta,
@ -22,6 +24,7 @@ algorithms = {
'rmsprop': optim.rmsprop,
'rprop': optim.rprop,
'sgd': optim.sgd,
'lbfgs': optim.lbfgs,
}
with open('tests.json', 'r') as f:
@ -35,4 +38,4 @@ for test in tests:
params = torch.DoubleTensor((1.5, 1.5))
for i in range(100):
algorithm(lambda x: (rosenbrock(x), drosenbrock(x)), params, config)
print('{:.12f}\t{:.12f}\t'.format(params[0], params[1]))
print('{:.8f}\t{:.8f}\t'.format(params[0], params[1]))

View File

@ -98,5 +98,12 @@
{"learningRate": 1e-4, "nesterov": true, "momentum": 0.95, "dampening": 0},
{"weightDecay": 0.2}
]
},
{
"algorithm": "lbfgs",
"config": [
{},
{"learningRate": 1e-1}
]
}
]

View File

@ -2,8 +2,17 @@
set -e
PYCMD=${PYCMD:="python"}
if [ "$1" == "coverage" ];
then
COVERAGE=0
while [[ "$#" -gt 0 ]]; do
case "$1" in
-p|--python) PYCMD=$2; shift 2 ;;
-c|--coverage) COVERAGE=1; shift 1;;
--) shift; break ;;
*) echo "Invalid argument: $1!" ; exit 1 ;;
esac
done
if [[ $COVERAGE -eq 1 ]]; then
coverage erase
PYCMD="coverage run --parallel-mode --source torch "
echo "coverage flag found. Setting python command to: \"$PYCMD\""
@ -12,42 +21,68 @@ fi
pushd "$(dirname "$0")"
echo "Running torch tests"
$PYCMD test_torch.py
$PYCMD test_torch.py $@
echo "Running autograd tests"
$PYCMD test_autograd.py
$PYCMD test_autograd.py $@
echo "Running sparse tests"
$PYCMD test_sparse.py
$PYCMD test_sparse.py $@
echo "Running nn tests"
$PYCMD test_nn.py
$PYCMD test_nn.py $@
echo "Running legacy nn tests"
$PYCMD test_legacy_nn.py
$PYCMD test_legacy_nn.py $@
echo "Running optim tests"
$PYCMD test_optim.py
$PYCMD test_optim.py $@
echo "Running multiprocessing tests"
$PYCMD test_multiprocessing.py
MULTIPROCESSING_METHOD=spawn $PYCMD test_multiprocessing.py
MULTIPROCESSING_METHOD=forkserver $PYCMD test_multiprocessing.py
$PYCMD test_multiprocessing.py $@
MULTIPROCESSING_METHOD=spawn $PYCMD test_multiprocessing.py $@
MULTIPROCESSING_METHOD=forkserver $PYCMD test_multiprocessing.py $@
echo "Running util tests"
$PYCMD test_utils.py
$PYCMD test_utils.py $@
echo "Running dataloader tests"
$PYCMD test_dataloader.py
$PYCMD test_dataloader.py $@
echo "Running cuda tests"
$PYCMD test_cuda.py
$PYCMD test_cuda.py $@
echo "Running NCCL tests"
$PYCMD test_nccl.py
$PYCMD test_nccl.py $@
if [ "$1" == "coverage" ];
then
################################################################################
if [[ "$TEST_DISTRIBUTED" -eq 1 ]]; then
distributed_set_up() {
export TEMP_DIR="$(mktemp -d)"
rm -rf "$TEMP_DIR/"*
mkdir "$TEMP_DIR/barrier"
mkdir "$TEMP_DIR/test_dir"
}
distributed_tear_down() {
rm -rf "$TEMP_DIR"
}
trap distributed_tear_down EXIT SIGHUP SIGINT SIGTERM
echo "Running distributed tests for the TCP backend"
distributed_set_up
BACKEND=tcp WORLD_SIZE=3 $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the MPI backend"
distributed_set_up
BACKEND=mpi mpiexec -n 3 $PYCMD ./test_distributed.py
distributed_tear_down
fi
################################################################################
if [[ $COVERAGE -eq 1 ]]; then
coverage combine
coverage html
fi

File diff suppressed because it is too large Load Diff

View File

@ -7,12 +7,15 @@ import torch
import torch.cuda
import torch.cuda.comm as comm
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state
from test_torch import TestTorch
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests
HAS_CUDA = True
if not torch.cuda.is_available():
print('CUDA not available, skipping tests')
import sys
sys.exit()
TestCase = object # noqa: F811
HAS_CUDA = False
def is_floating(t):
return type(t) in [torch.FloatTensor, torch.DoubleTensor,
@ -31,7 +34,8 @@ types = [
float_types = [
torch.FloatTensor,
torch.DoubleTensor
] # TODO: add half...
] # TODO: add half...
def number(floating, integer, t):
name = type(t).__name__
@ -44,48 +48,70 @@ def number(floating, integer, t):
S = 10
M = 50
def make_tensor(t, *sizes):
return t(*sizes).copy_(torch.randn(*sizes))
def small_2d(t):
return make_tensor(t, S, S)
def small_2d_scaled(t, scale=10):
return make_tensor(t, S, S).mul(scale)
def small_2d_oneish(t):
if is_floating(t):
return make_tensor(t, S, S).clamp(min=0.99, max=1.01)
else:
return t(S, S).fill_(1)
def small_3d(t):
return make_tensor(t, S, S, S)
def medium_1d(t):
return make_tensor(t, M)
def medium_2d(t):
return make_tensor(t, M, M)
def medium_2d_scaled(t, scale=10):
return make_tensor(t, M, M).mul(scale)
def small_3d_ones(t):
return t(S, S, S).copy_(torch.ones(S, S, S))
def small_3d_positive(t):
min_val = 1e-3 if is_floating(t) else 2
return make_tensor(t, S, S, S).clamp_(min_val, 120)
def small_3d_unique(t):
return t(S, S, S).copy_(torch.range(1, S*S*S))
return t(S, S, S).copy_(torch.arange(1, S * S * S + 1))
def small_1d_lapack(t):
return t(1, 3).copy_(torch.range(1, 3).view(3))
return t(1, 3).copy_(torch.arange(1, 4).view(3))
def small_2d_lapack(t):
return t(3, 3).copy_(torch.range(1, 9).view(3, 3))
return t(3, 3).copy_(torch.arange(1, 10).view(3, 3))
def small_2d_lapack_skinny(t):
return t(3, 4).copy_(torch.range(1, 12).view(3, 4))
return t(3, 4).copy_(torch.arange(1, 13).view(3, 4))
def small_2d_lapack_fat(t):
return t(4, 3).copy_(torch.range(1, 12).view(4, 3))
return t(4, 3).copy_(torch.arange(1, 13).view(4, 3))
def new_t(*sizes):
def tmp(t):
@ -93,139 +119,167 @@ def new_t(*sizes):
return tmp
tests = [
('add', small_3d, lambda t: [number(3.14, 3, t)] ),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor' ),
('sub', small_3d, lambda t: [number(3.14, 3, t)], ),
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('mul', small_3d, lambda t: [number(3.14, 3, t)], ),
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('div', small_3d, lambda t: [number(3.14, 3, t)], ),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)], ),
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar' ),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], ),
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar' ),
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars' ),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar' ),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars' ),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], ),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar' ),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars' ),
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types),
('fmod', small_3d, lambda t: [3], 'value' ),
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('chunk', medium_2d, lambda t: [4], ),
('chunk', medium_2d, lambda t: [4, 1], 'dim' ),
('clamp', medium_2d_scaled, lambda t: [-1, 5], ),
('clone', medium_2d, lambda t: [], ),
('contiguous', medium_2d, lambda t: [], ),
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)], ),
('cumprod', small_3d, lambda t: [1], ),
('cumsum', small_3d, lambda t: [1], ),
('dim', small_3d, lambda t: [], ),
('dist', small_2d, lambda t: [small_2d(t)], ),
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm' ),
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm' ),
('dot', medium_1d, lambda t: [medium_1d(t)], ),
('element_size', medium_1d, lambda t: [], ),
('eq', small_3d_ones, lambda t: [small_3d(t)], ),
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
('ne', small_3d_ones, lambda t: [small_3d(t)], ),
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
('equal', small_3d_ones, lambda t: [small_3d(t)], ),
('expand', new_t(M, 1, M), lambda t: [M, 4, M], ),
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)], ),
('fill', medium_2d, lambda t: [number(3.14, 3, t)], ),
('ge', medium_2d, lambda t: [medium_2d(t)], ),
('le', medium_2d, lambda t: [medium_2d(t)], ),
('gt', medium_2d, lambda t: [medium_2d(t)], ),
('lt', medium_2d, lambda t: [medium_2d(t)], ),
('is_contiguous', medium_2d, lambda t: [], ),
('add', small_3d, lambda t: [number(3.14, 3, t)]),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'),
('sub', small_3d, lambda t: [number(3.14, 3, t)],),
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('mul', small_3d, lambda t: [number(3.14, 3, t)],),
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('div', small_3d, lambda t: [number(3.14, 3, t)],),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)],),
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)],),
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t),
small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar'),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)],),
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)],),
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar'),
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars'),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)],),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar'),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars'),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)],),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar'),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars'),
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types),
('fmod', small_3d, lambda t: [3], 'value'),
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('chunk', medium_2d, lambda t: [4],),
('chunk', medium_2d, lambda t: [4, 1], 'dim'),
('chunk', medium_2d, lambda t: [4, -2], 'neg_dim'),
('clamp', medium_2d_scaled, lambda t: [-1, 5],),
('clone', medium_2d, lambda t: [],),
('contiguous', medium_2d, lambda t: [],),
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],),
('cumprod', small_3d, lambda t: [1],),
('cumprod', small_3d, lambda t: [-1], 'neg_dim'),
('cumsum', small_3d, lambda t: [1],),
('cumsum', small_3d, lambda t: [-1], 'neg_dim'),
('dim', small_3d, lambda t: [],),
('dist', small_2d, lambda t: [small_2d(t)],),
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'),
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm'),
('dot', medium_1d, lambda t: [medium_1d(t)],),
('element_size', medium_1d, lambda t: [],),
('eq', small_3d_ones, lambda t: [small_3d(t)],),
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('ne', small_3d_ones, lambda t: [small_3d(t)],),
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('equal', small_3d_ones, lambda t: [small_3d(t)],),
('expand', new_t(M, 1, M), lambda t: [M, 4, M],),
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],),
('fill', medium_2d, lambda t: [number(3.14, 3, t)],),
('ge', medium_2d, lambda t: [medium_2d(t)],),
('le', medium_2d, lambda t: [medium_2d(t)],),
('gt', medium_2d, lambda t: [medium_2d(t)],),
('lt', medium_2d, lambda t: [medium_2d(t)],),
('is_contiguous', medium_2d, lambda t: [],),
# TODO: can't check negative case - GPU copy will be contiguous
('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative' ),
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive' ),
('is_set_to', medium_2d, lambda t: [medium_2d(t)], ),
('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative'),
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive'),
('is_set_to', medium_2d, lambda t: [medium_2d(t)],),
# TODO: positive case
('kthvalue', small_3d_unique, lambda t: [3], ),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim' ),
('lerp', small_3d, lambda t: [small_3d(t), 0.3], ),
('max', small_3d_unique, lambda t: [], ),
('max', small_3d_unique, lambda t: [1], 'dim' ),
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ),
('min', small_3d_unique, lambda t: [], ),
('min', small_3d_unique, lambda t: [1], 'dim' ),
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ),
('mean', small_3d, lambda t: [], ),
('mean', small_3d, lambda t: [1], 'dim' ),
('mode', small_3d, lambda t: [], ),
('mode', small_3d, lambda t: [1], 'dim' ),
('remainder', small_3d, lambda t: [3], 'value' ),
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('std', small_3d, lambda t: [], ),
('std', small_3d, lambda t: [1], 'dim' ),
('var', small_3d, lambda t: [], ),
('var', small_3d, lambda t: [1], 'dim' ),
('ndimension', small_3d, lambda t: [], ),
('nelement', small_3d, lambda t: [], ),
('numel', small_3d, lambda t: [], ),
('narrow', small_3d, lambda t: [1, 3, 2], ),
('nonzero', small_3d, lambda t: [], ),
('norm', small_3d, lambda t: [], ),
('norm', small_3d, lambda t: [3], '3_norm' ),
('norm', small_3d, lambda t: [3, 0], '3_norm_dim' ),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5], ),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0], ),
('prod', small_3d, lambda t: [], ),
('prod', small_3d, lambda t: [1], 'dim' ),
('sum', small_2d, lambda t: [], ),
('sum', small_3d, lambda t: [1], 'dim' ),
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm' ),
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm' ),
('repeat', small_2d, lambda t: [2, 2, 2], ),
('size', new_t(1, 2, 3, 4), lambda t: [], ),
('sort', small_3d_unique, lambda t: [], ),
('sort', small_3d_unique, lambda t: [1], 'dim' ),
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
('split', small_3d, lambda t: [2], ),
('split', small_3d, lambda t: [2, 1], 'dim' ),
('squeeze', new_t(1, 2, 1, 4), lambda t: [], ),
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim' ),
('t', new_t(1, 2), lambda t: [], ),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2], ),
('to_list', small_3d, lambda t: [], ),
('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort' ),
('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort' ),
('trace', medium_2d, lambda t: [], ),
('tril', medium_2d, lambda t: [], ),
('tril', medium_2d, lambda t: [2], 'positive' ),
('tril', medium_2d, lambda t: [-2], 'negative' ),
('triu', medium_2d, lambda t: [], ),
('triu', medium_2d, lambda t: [2], 'positive' ),
('triu', medium_2d, lambda t: [-2], 'negative' ),
('view', small_3d, lambda t: [100, 10], ),
('view_as', small_3d, lambda t: [t(100, 10)], ),
('zero', small_3d, lambda t: [], ),
('zeros', small_3d, lambda t: [1, 2, 3, 4], ),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('kthvalue', small_3d_unique, lambda t: [3],),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'),
('kthvalue', small_3d_unique, lambda t: [3, -1], 'neg_dim'),
('lerp', small_3d, lambda t: [small_3d(t), 0.3],),
('max', small_3d_unique, lambda t: [],),
('max', small_3d_unique, lambda t: [1], 'dim'),
('max', small_3d_unique, lambda t: [-1], 'neg_dim'),
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('min', small_3d_unique, lambda t: [],),
('min', small_3d_unique, lambda t: [1], 'dim'),
('min', small_3d_unique, lambda t: [-1], 'neg_dim'),
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('mean', small_3d, lambda t: [],),
('mean', small_3d, lambda t: [-1], 'neg_dim'),
('mean', small_3d, lambda t: [1], 'dim'),
('mode', small_3d, lambda t: [],),
('mode', small_3d, lambda t: [1], 'dim'),
('mode', small_3d, lambda t: [-1], 'neg_dim'),
('remainder', small_3d, lambda t: [3], 'value'),
('remainder', small_3d, lambda t: [-3], 'negative_value'),
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('remainder', small_3d, lambda t: [0 - small_3d_positive(t)], 'negative_tensor'),
('std', small_3d, lambda t: [],),
('std', small_3d, lambda t: [1], 'dim'),
('std', small_3d, lambda t: [-1], 'neg_dim'),
('var', small_3d, lambda t: [],),
('var', small_3d, lambda t: [1], 'dim'),
('var', small_3d, lambda t: [-1], 'neg_dim'),
('ndimension', small_3d, lambda t: [],),
('nelement', small_3d, lambda t: [],),
('numel', small_3d, lambda t: [],),
('narrow', small_3d, lambda t: [1, 3, 2],),
('narrow', small_3d, lambda t: [-1, 3, 2], 'neg_dim'),
('nonzero', small_3d, lambda t: [],),
('norm', small_3d, lambda t: [],),
('norm', small_3d, lambda t: [3], '3_norm'),
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
('norm', small_3d, lambda t: [3, -2], '3_norm_neg_dim'),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
('prod', small_2d_oneish, lambda t: [],),
('prod', small_3d, lambda t: [1], 'dim'),
('prod', small_3d, lambda t: [-1], 'neg_dim'),
('sum', small_2d, lambda t: [],),
('sum', small_3d, lambda t: [1], 'dim'),
('sum', small_3d, lambda t: [-1], 'neg_dim'),
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'),
('renorm', small_3d, lambda t: [2, -1, 1], '2_norm_neg_dim'),
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'),
('repeat', small_2d, lambda t: [2, 2, 2],),
('size', new_t(1, 2, 3, 4), lambda t: [],),
('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'),
('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'),
('sort', small_3d_unique, lambda t: [],),
('sort', small_3d_unique, lambda t: [1], 'dim'),
('sort', small_3d_unique, lambda t: [-1], 'neg_dim'),
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending'),
('split', small_3d, lambda t: [2],),
('split', small_3d, lambda t: [2, 1], 'dim'),
('split', small_3d, lambda t: [2, -3], 'neg_dim'),
('squeeze', new_t(1, 2, 1, 4), lambda t: [],),
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
('squeeze', new_t(1, 2, 1, 4), lambda t: [-2], 'neg_dim'),
('t', new_t(1, 2), lambda t: [],),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
('to_list', small_3d, lambda t: [],),
('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort'),
('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort'),
('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'),
('trace', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [2], 'positive'),
('tril', medium_2d, lambda t: [-2], 'negative'),
('triu', medium_2d, lambda t: [],),
('triu', medium_2d, lambda t: [2], 'positive'),
('triu', medium_2d, lambda t: [-2], 'negative'),
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
('unsqueeze', new_t(2, 3, 4), lambda t: [-2], 'neg_dim'),
('view', small_3d, lambda t: [100, 10],),
('view_as', small_3d, lambda t: [t(100, 10)],),
('zero', small_3d, lambda t: [],),
('zeros', small_3d, lambda t: [1, 2, 3, 4],),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
# lapack tests
('qr', small_2d_lapack, lambda t: [], 'square', float_types),
('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types),
('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types),
('qr', small_2d_lapack, lambda t: [], 'square', float_types),
('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types),
('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types),
]
@ -275,6 +329,8 @@ for fn in simple_pointwise_float:
tests.append((fn, small_3d, lambda t: [], None, float_types))
_cycles_per_ms = None
def get_cycles_per_ms():
"""Approximate number of cycles per millisecond for torch.cuda._sleep"""
global _cycles_per_ms
@ -288,6 +344,7 @@ def get_cycles_per_ms():
_cycles_per_ms = 1000000 / start.elapsed_time(end)
return _cycles_per_ms
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
def tmp(self):
cpu_tensor = tensor_constructor(t)
@ -314,23 +371,24 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
self.assertEqual(cpu_result, gpu_result, precision)
return tmp
class TestCuda(TestCase):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_autogpu(self):
if torch.cuda.device_count() > 1:
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.get_device(), 0)
self.assertEqual(x.get_device(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.get_device(), 1)
q = x.add(y)
self.assertEqual(q.get_device(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.get_device(), 1)
z = z.cuda()
self.assertEqual(z.get_device(), 0)
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.get_device(), 0)
self.assertEqual(x.get_device(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.get_device(), 1)
q = x.add(y)
self.assertEqual(q.get_device(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.get_device(), 1)
z = z.cuda()
self.assertEqual(z.get_device(), 0)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_copy_device(self):
@ -352,7 +410,7 @@ class TestCuda(TestCase):
self.assertEqual(z.get_device(), 0)
self.assertIs(z.cuda(0), z)
def test_serialization(self):
def test_serialization_array_with_storage(self):
x = torch.randn(5, 5).cuda()
y = torch.IntTensor(2, 5).fill_(0).cuda()
q = [x, y, x, y.storage()]
@ -404,6 +462,32 @@ class TestCuda(TestCase):
def test_broadcast_gpu(self):
self._test_broadcast(torch.randn(5, 5))
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_broadcast_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
b_tensors = [comm.broadcast(t, (0, 1)) for t in tensors]
for (_, bt), t in zip(b_tensors, tensors):
self.assertEqual(bt.get_device(), 1)
self.assertEqual(bt, t)
self.assertIsInstance(bt, type(t))
bc_tensors = comm.broadcast_coalesced(tensors, (0, 1), buffer_size=num_bytes * 5 // 2)
bc_tensors_t = list(zip(*bc_tensors))
self.assertEqual(b_tensors, bc_tensors_t)
for (_, bt), (_, bct) in zip(b_tensors, bc_tensors_t):
self.assertEqual(bt.get_device(), bct.get_device())
self.assertIsInstance(bct, type(bt))
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add(self):
x = torch.randn(5, 5)
@ -412,7 +496,33 @@ class TestCuda(TestCase):
y_cuda = y.cuda(1)
result = comm.reduce_add((x_cuda, y_cuda))
self.assertEqual(result.get_device(), 0)
self.assertEqual(result.cpu(), x+y)
self.assertEqual(result.cpu(), x + y)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add_coalesced(self):
numel = 5
num_bytes = numel * 8
tensors = [
torch.randn(numel).long().cuda(),
torch.randn(numel).cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel).long().cuda(),
torch.randn(numel * 2).int().cuda(), # int is 2x shorter
torch.randn(numel).cuda(),
]
dup_tensors = [tensors, list(map(lambda t: t.cuda(1), tensors))]
r_tensors = list(map(comm.reduce_add, zip(*dup_tensors)))
for r, t in zip(r_tensors, tensors):
self.assertEqual(r.get_device(), t.get_device())
self.assertEqual(r, t * 2)
self.assertIsInstance(r, type(t))
rc_tensors = comm.reduce_add_coalesced(dup_tensors, buffer_size=num_bytes * 5 // 2)
self.assertEqual(r_tensors, rc_tensors)
for r, rc in zip(r_tensors, rc_tensors):
self.assertEqual(rc.get_device(), r.get_device())
self.assertIsInstance(rc, type(r))
def _test_scatter(self, input, chunk_sizes=None, dim=0):
if torch.cuda.device_count() < 2:
@ -435,6 +545,9 @@ class TestCuda(TestCase):
def test_scatter_cpu_dim(self):
self._test_scatter(torch.randn(4, 4), dim=1)
def test_scatter_cpu_neg_dim(self):
self._test_scatter(torch.randn(4, 4), dim=-2)
def test_scatter_cpu_sizes(self):
self._test_scatter(torch.randn(6, 4), chunk_sizes=(2, 4))
@ -444,6 +557,9 @@ class TestCuda(TestCase):
def test_scatter_gpu_dim(self):
self._test_scatter(torch.randn(4, 4).cuda(), dim=1)
def test_scatter_gpu_neg_dim(self):
self._test_scatter(torch.randn(4, 4).cuda(), dim=-2)
def test_scatter_gpu_sizes(self):
self._test_scatter(torch.randn(6, 4).cuda(), chunk_sizes=(2, 4))
@ -473,8 +589,8 @@ class TestCuda(TestCase):
self._test_gather(1)
def test_from_sequence(self):
seq = [list(range(i*4,i*4+4)) for i in range(5)]
reference = torch.range(0, 19).resize_(5, 4)
seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)]
reference = torch.arange(0, 20).resize_(5, 4)
for t in types:
cuda_type = get_gpu_type(t)
self.assertEqual(cuda_type(seq), reference)
@ -490,6 +606,13 @@ class TestCuda(TestCase):
self.assertEqual(x, y)
self.assertEqual(torch.cuda.initial_seed(), 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_cat_autogpu(self):
x = torch.randn(4, 4).cuda(1)
y = torch.randn(4, 4).cuda(1)
z = torch.cat([x, y], 0)
self.assertEqual(z.get_device(), x.get_device())
def test_serialization(self):
x = torch.randn(4, 4).cuda()
with tempfile.NamedTemporaryFile() as f:
@ -500,7 +623,7 @@ class TestCuda(TestCase):
self.assertIs(type(x_copy), type(x))
self.assertEqual(x_copy.get_device(), x.get_device())
def test_serialization_empty(self):
def test_serialization_array_with_empty(self):
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
@ -526,6 +649,7 @@ class TestCuda(TestCase):
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization_remap(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
def gpu_remap(storage, location):
if location == 'cuda:1':
return storage.cuda(0)
@ -623,6 +747,38 @@ class TestCuda(TestCase):
self.assertTrue(event.query())
self.assertGreater(start_event.elapsed_time(event), 0)
def test_record_stream(self):
cycles_per_ms = get_cycles_per_ms()
t = torch.FloatTensor([1, 2, 3, 4]).pin_memory()
result = torch.cuda.FloatTensor(t.size())
stream = torch.cuda.Stream()
ptr = [None]
# Performs the CPU->GPU copy in a background stream
def perform_copy():
with torch.cuda.stream(stream):
tmp = t.cuda(async=True)
ptr[0] = tmp.data_ptr()
torch.cuda.current_stream().wait_stream(stream)
tmp.record_stream(torch.cuda.current_stream())
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
result.copy_(tmp)
perform_copy()
with torch.cuda.stream(stream):
tmp2 = torch.cuda.FloatTensor(t.size())
tmp2.zero_()
self.assertNotEqual(tmp2.data_ptr(), ptr[0], 'allocation re-used to soon')
self.assertEqual(result.tolist(), [1, 2, 3, 4])
# Check that the block will be re-used after the main stream finishes
torch.cuda.current_stream().synchronize()
with torch.cuda.stream(stream):
tmp3 = torch.cuda.FloatTensor(t.size())
self.assertEqual(tmp3.data_ptr(), ptr[0], 'allocation not re-used')
def test_caching_pinned_memory(self):
cycles_per_ms = get_cycles_per_ms()
@ -642,39 +798,73 @@ class TestCuda(TestCase):
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
self.assertEqual(list(gpu_tensor), [1])
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_caching_pinned_memory_multi_gpu(self):
# checks that the events preventing pinned memory from being re-used
# too early are recorded on the correct GPU
cycles_per_ms = get_cycles_per_ms()
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
t = torch.FloatTensor([1]).pin_memory()
ptr = t.data_ptr()
gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name_inner, gpu_tensor.__class__.__name__))
continue
with torch.cuda.device(1):
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
gpu_tensor1.copy_(t, async=True)
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
del t
t = torch.FloatTensor([2]).pin_memory()
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
with torch.cuda.device(0):
gpu_tensor0.copy_(t, async=True)
self.assertEqual(gpu_tensor1[0], 1)
self.assertEqual(gpu_tensor0[0], 2)
def test_btrifact(self):
TestTorch._test_btrifact(self, lambda t: t.cuda())
def test_btrisolve(self):
TestTorch._test_btrisolve(self, lambda t: t.cuda())
if HAS_CUDA:
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
name_inner, gpu_tensor.__class__.__name__))
continue
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
if __name__ == '__main__':
unittest.main()
run_tests()

View File

@ -4,7 +4,7 @@ import torch
import traceback
import unittest
from torch.utils.data import Dataset, TensorDataset, DataLoader
from common import TestCase
from common import TestCase, run_tests, TEST_NUMPY
from common_nn import TEST_CUDA
@ -27,11 +27,12 @@ class TestTensorDataset(TestCase):
l = torch.randn(15)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i:i+1], source[i][0])
self.assertEqual(l[i:i+1], source[i][1])
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
class ErrorDataset(Dataset):
def __init__(self, size):
self.size = size
@ -50,9 +51,9 @@ class TestDataLoader(TestCase):
batch_size = loader.batch_size
for i, (sample, target) in enumerate(loader):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx+batch_size])
self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1))
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
self.assertEqual(sample, self.data[idx:idx + batch_size])
self.assertEqual(target, self.labels[idx:idx + batch_size])
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
def _test_shuffle(self, loader):
found_data = {i: 0 for i in range(self.data.size(0))}
@ -65,11 +66,11 @@ class TestDataLoader(TestCase):
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
self.assertEqual(target, self.labels[data_point_idx])
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i+1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i+1) * batch_size)
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
def _test_error(self, loader):
it = iter(loader)
@ -81,10 +82,9 @@ class TestDataLoader(TestCase):
errors += 1
except StopIteration:
self.assertEqual(errors,
math.ceil(float(len(loader.dataset))/loader.batch_size))
math.ceil(float(len(loader.dataset)) / loader.batch_size))
return
def test_sequential(self):
self._test_sequential(DataLoader(self.dataset))
@ -123,6 +123,22 @@ class TestDataLoader(TestCase):
self.assertTrue(input.is_pinned())
self.assertTrue(target.is_pinned())
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
def test_numpy(self):
import numpy as np
class TestDataset(torch.utils.data.Dataset):
def __getitem__(self, i):
return np.ones((2, 3, 4)) * i
def __len__(self):
return 1000
loader = DataLoader(TestDataset(), batch_size=12)
batch = next(iter(loader))
self.assertIsInstance(batch, torch.DoubleTensor)
self.assertEqual(batch.size(), torch.Size([12, 2, 3, 4]))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
@ -157,6 +173,102 @@ class TestDataLoader(TestCase):
check_len(DataLoader(self.dataset, batch_size=2), 50)
check_len(DataLoader(self.dataset, batch_size=3), 34)
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
def test_numpy_scalars(self):
import numpy as np
class ScalarDataset(torch.utils.data.Dataset):
def __init__(self, dtype):
self.dtype = dtype
def __getitem__(self, i):
return self.dtype()
def __len__(self):
return 4
dtypes = {
np.float64: torch.DoubleTensor,
np.float32: torch.FloatTensor,
np.float16: torch.HalfTensor,
np.int64: torch.LongTensor,
np.int32: torch.IntTensor,
np.int16: torch.ShortTensor,
np.int8: torch.CharTensor,
np.uint8: torch.ByteTensor,
}
for dt, tt in dtypes.items():
dset = ScalarDataset(dt)
loader = DataLoader(dset, batch_size=2)
batch = next(iter(loader))
self.assertIsInstance(batch, tt)
class StringDataset(Dataset):
def __init__(self):
self.s = '12345'
def __len__(self):
return len(self.s)
def __getitem__(self, ndx):
return (self.s[ndx], ndx)
class TestStringDataLoader(TestCase):
def setUp(self):
self.dataset = StringDataset()
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_shuffle_pin_memory(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
for batch_ndx, (s, n) in enumerate(loader):
self.assertIsInstance(s[0], str)
self.assertTrue(n.is_pinned())
class DictDataset(Dataset):
def __len__(self):
return 4
def __getitem__(self, ndx):
return {
'a_tensor': torch.Tensor(4, 2).fill_(ndx),
'another_dict': {
'a_number': ndx,
},
}
class TestDictDataLoader(TestCase):
def setUp(self):
self.dataset = DictDataset()
def test_sequential_batch(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
batch_size = loader.batch_size
for i, sample in enumerate(loader):
idx = i * batch_size
self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})
t = sample['a_tensor']
self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
self.assertTrue((t[0] == idx).all())
self.assertTrue((t[1] == idx + 1).all())
n = sample['another_dict']['a_number']
self.assertEqual(n.size(), torch.Size([batch_size]))
self.assertEqual(n[0], idx)
self.assertEqual(n[1], idx + 1)
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_pin_memory(self):
loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
for batch_ndx, sample in enumerate(loader):
self.assertTrue(sample['a_tensor'].is_pinned())
self.assertTrue(sample['another_dict']['a_number'].is_pinned())
if __name__ == '__main__':
unittest.main()
run_tests()

508
test/test_distributed.py Normal file
View File

@ -0,0 +1,508 @@
import fcntl
import multiprocessing
import os
import sys
import time
import unittest
from functools import wraps, reduce
from contextlib import contextmanager
import torch
import torch.distributed as dist
from common import TestCase
BACKEND = os.environ['BACKEND']
TEMP_DIR = os.environ['TEMP_DIR']
MASTER_PORT = '29500'
MASTER_ADDR = '127.0.0.1:' + MASTER_PORT
@contextmanager
def _lock():
lockfile = os.path.join(TEMP_DIR, 'lockfile')
with open(lockfile, 'w') as lf:
try:
fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
yield
finally:
fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
lf.close()
def _build_tensor(size, value=None):
if value is None:
value = size
return torch.FloatTensor(size, size, size).fill_(value)
class Barrier(object):
barrier_id = 0
@classmethod
def init(cls):
cls.barrier_id = 0
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
for f_name in os.listdir(barrier_dir):
os.unlink(os.path.join(barrier_dir, f_name))
@classmethod
def sync(cls, timeout=5):
cls.barrier_id += 1
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
pid = str(os.getpid())
barrier_file = os.path.join(barrier_dir, pid)
with _lock():
with open(barrier_file, 'w') as f:
f.write(str(cls.barrier_id))
start_time = time.time()
while True:
arrived = 0
with _lock():
for f_name in os.listdir(barrier_dir):
with open(os.path.join(barrier_dir, f_name), 'r') as f:
data = f.read()
if int(data) >= cls.barrier_id:
arrived += 1
if arrived == dist.get_num_processes():
break
if time.time() - start_time > timeout:
raise RuntimeError("barrier timeout")
time.sleep(0.1)
class _DistTestBase(object):
def _barrier(self, *args, **kwargs):
Barrier.sync(*args, **kwargs)
def _init_group_test(self):
group = [1, 2]
group_id = dist.new_group(group)
rank = dist.get_rank()
if rank not in group:
return ([], None, rank)
return (group, group_id, rank)
def _init_global_test(self):
group = [i for i in range(0, dist.get_num_processes())]
group_id = dist.group.WORLD
rank = dist.get_rank()
return (group, group_id, rank)
# GET RANK
def test_get_rank(self):
test_dir = os.path.join(TEMP_DIR, 'test_dir')
pid = str(os.getpid())
num_processes = dist.get_num_processes()
with open(os.path.join(test_dir, pid), 'w') as f:
f.write(str(dist.get_rank()))
self._barrier()
all_ranks = set()
for f_name in os.listdir(test_dir):
with open(os.path.join(test_dir, f_name), 'r') as f:
all_ranks.add(int(f.read()))
self.assertEqual(len(all_ranks), num_processes)
self._barrier()
if dist.get_rank() == 0:
for f_name in os.listdir(test_dir):
os.unlink(os.path.join(test_dir, f_name))
self._barrier()
# SEND RECV
def test_send_recv(self):
rank = dist.get_rank()
tensor = _build_tensor(rank + 1)
for dest in range(0, dist.get_num_processes()):
if dest == rank:
continue
dist.send(tensor, dest)
for src in range(0, dist.get_num_processes()):
if src == rank:
continue
tensor = _build_tensor(src + 1, value=-1)
expected_tensor = _build_tensor(src + 1)
dist.recv(tensor, src)
self.assertEqual(tensor, expected_tensor)
self._barrier()
# SEND RECV ANY SOURCE
def test_send_recv_any_source(self):
rank = dist.get_rank()
tensor = _build_tensor(10, rank)
for dest in range(0, dist.get_num_processes()):
if dest == rank:
continue
dist.send(tensor, dest)
recv_ranks = set()
for src in range(0, dist.get_num_processes()):
if src == rank:
continue
tensor = _build_tensor(10, value=-1)
dist.recv(tensor)
recv_ranks.add(tensor.resize_(1)[0])
self.assertEqual(len(recv_ranks), dist.get_num_processes() - 1)
self._barrier()
# ISEND
def test_isend(self):
rank = dist.get_rank()
world_size = dist.get_num_processes()
if rank == 0:
requests = [
dist.isend(_build_tensor(dest, 10), dest) for dest in range(1, world_size)
]
for request in requests:
request.wait()
self.assertTrue(request.is_completed())
else:
tensor = _build_tensor(rank, -1)
dist.recv(tensor, 0)
self.assertEqual(tensor, _build_tensor(rank, 10))
self._barrier()
# IRECV
def test_irecv(self):
rank = dist.get_rank()
world_size = dist.get_num_processes()
if rank == 0:
expected_tensors = [_build_tensor(src, -1) for src in range(1, world_size)]
requests = [
dist.irecv(expected_tensors[src - 1], src) for src in range(1, world_size)
]
for src in range(1, world_size):
requests[src - 1].wait()
self.assertTrue(requests[src - 1].is_completed())
self.assertEqual(expected_tensors[src - 1], _build_tensor(src, 10))
else:
tensor = _build_tensor(rank, 10)
dist.send(tensor, 0)
self._barrier()
# BROADCAST
def _test_broadcast_helper(self, group, group_id, rank):
for src in group:
expected_tensor = _build_tensor(src + 1)
if rank == src:
dist.broadcast(expected_tensor, src, group_id)
else:
tensor = _build_tensor(src + 1, -1)
dist.broadcast(tensor, src, group_id)
self.assertEqual(tensor, expected_tensor)
self._barrier()
def test_broadcast(self):
group, group_id, rank = self._init_global_test()
self._test_broadcast_helper(group, group_id, rank)
def test_broadcast_group(self):
group, group_id, rank = self._init_group_test()
self._test_broadcast_helper(group, group_id, rank)
# REDUCE
def _test_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
for src in group:
if rank == src:
tensor = _build_tensor(src + 1).fill_(master_value)
dist.reduce(tensor, src, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
else:
tensor = _build_tensor(src + 1).fill_(worker_value)
dist.reduce(tensor, src, op, group_id)
self._barrier()
def test_reduce_sum(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_reduce_product(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_reduce_min(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_reduce_max(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
def test_reduce_group_sum(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_reduce_group_product(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_reduce_group_min(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_reduce_group_max(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
# ALL REDUCE
def _test_all_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
for src in group:
if rank == src:
tensor = _build_tensor(src + 1).fill_(master_value)
dist.all_reduce(tensor, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
else:
tensor = _build_tensor(src + 1).fill_(worker_value)
dist.all_reduce(tensor, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
self._barrier()
def test_all_reduce_sum(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_all_reduce_product(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_all_reduce_min(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_all_reduce_max(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
def test_all_reduce_group_sum(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_all_reduce_group_product(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_all_reduce_group_min(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_all_reduce_group_max(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
# SCATTER
def _test_scatter_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, -1)
expected_tensor = _build_tensor(dest + 1, rank)
if rank == dest:
tensors = [_build_tensor(dest + 1, i) for i in group]
dist.scatter_send(tensors, tensor, group_id)
self.assertEqual(tensor, expected_tensor)
else:
dist.scatter_recv(tensor, dest, group_id)
self.assertEqual(tensor, expected_tensor)
self._barrier()
def test_scatter(self):
group, group_id, rank = self._init_global_test()
self._test_scatter_helper(group, group_id, rank)
def test_scatter_group(self):
group, group_id, rank = self._init_group_test()
self._test_scatter_helper(group, group_id, rank)
# GATHER
def _test_gather_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, rank)
if rank == dest:
tensors = [_build_tensor(dest + 1, -1) for i in group]
dist.gather_recv(tensors, tensor, group_id)
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
for t1, t2 in zip(tensors, expected_tensors):
self.assertEqual(t1, t2)
else:
dist.gather_send(tensor, dest, group_id)
self._barrier()
def test_gather(self):
group, group_id, rank = self._init_global_test()
self._test_gather_helper(group, group_id, rank)
def test_gather_group(self):
group, group_id, rank = self._init_group_test()
self._test_gather_helper(group, group_id, rank)
# ALL GATHER
def _test_all_gather_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, rank)
tensors = [_build_tensor(dest + 1, -1) for i in group]
dist.all_gather(tensors, tensor, group_id)
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
for t1, t2 in zip(tensors, expected_tensors):
self.assertEqual(t1, t2)
self._barrier()
def test_all_gather(self):
group, group_id, rank = self._init_global_test()
self._test_all_gather_helper(group, group_id, rank)
def test_all_gather_group(self):
group, group_id, rank = self._init_group_test()
self._test_all_gather_helper(group, group_id, rank)
# BARRIER
def _test_barrier_helper(self, group, group_id, rank):
WAIT_TIME = 0.3 # seconds
for dest in group:
expected_time = torch.DoubleTensor(1).fill_(0.0)
if dest == rank:
expected_time.fill_(time.time() + WAIT_TIME)
dist.broadcast(expected_time, dest, group_id)
time.sleep(WAIT_TIME + 0.1) # sleep a little bit longer
dist.barrier(group_id)
else:
dist.broadcast(expected_time, dest, group_id)
dist.barrier(group_id)
self.assertGreaterEqual(time.time(), expected_time[0])
self._barrier()
def test_barrier(self):
group, group_id, rank = self._init_global_test()
self._test_barrier_helper(group, group_id, rank)
def test_barrier_group(self):
group, group_id, rank = self._init_group_test()
self._test_barrier_helper(group, group_id, rank)
if BACKEND == 'tcp':
WORLD_SIZE = os.environ['WORLD_SIZE']
class TestTCP(TestCase, _DistTestBase):
MANAGER_PROCESS_RANK = -1
JOIN_TIMEOUT = 5
@staticmethod
def manager_join(fn):
@wraps(fn)
def wrapper(self):
if self.rank == self.MANAGER_PROCESS_RANK:
self._join_and_reduce()
else:
fn(self)
return wrapper
@classmethod
def setUpClass(cls):
os.environ['MASTER_ADDR'] = MASTER_ADDR
os.environ['MASTER_PORT'] = MASTER_PORT
os.environ['WORLD_SIZE'] = WORLD_SIZE
for attr in dir(cls):
if attr.startswith('test'):
fn = getattr(cls, attr)
setattr(cls, attr, cls.manager_join(fn))
def setUp(self):
self.processes = []
self.rank = self.MANAGER_PROCESS_RANK
Barrier.init()
for rank in range(int(WORLD_SIZE)):
self.processes.append(self._spawn_process(rank))
def tearDown(self):
for p in self.processes:
p.terminate()
def _spawn_process(self, rank):
os.environ['RANK'] = str(rank)
name = 'process ' + str(rank)
process = multiprocessing.Process(target=self._run, name=name,
args=(rank,))
process.start()
return process
def _run(self, rank):
self.rank = rank
dist.init_process_group(backend=BACKEND)
# self.id() == e.g. '__main__.TestDistributed.test_get_rank'
# We're retreiving a corresponding test and executing it.
getattr(self, self.id().split(".")[2])()
sys.exit(0)
def _join_and_reduce(self):
for p in self.processes:
p.join(self.JOIN_TIMEOUT)
self.assertEqual(p.exitcode, 0)
elif BACKEND == 'mpi':
dist.init_process_group(backend='mpi')
class TestMPI(TestCase, _DistTestBase):
pass
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

View File

@ -11,13 +11,15 @@ import torch.cuda
import torch.multiprocessing as mp
from torch.autograd import Variable
from torch.nn import Parameter
from common import TestCase
from common import TestCase, run_tests
TEST_REPEATS = 30
HAS_SHM_FILES = os.path.isdir('/dev/shm')
TEST_CUDA_IPC = torch.cuda.is_available() and \
sys.version_info[0] == 3 and \
sys.platform != 'darwin'
sys.version_info[0] == 3 and \
sys.platform != 'darwin'
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
def simple_fill(queue, event):
@ -73,14 +75,13 @@ def autograd_sharing(queue, ready, master_modified):
ready.set()
master_modified.wait()
expected_var = torch.range(1, 25).view(5, 5)
expected_var[0,0] = 1000
expected_var = torch.arange(1, 26).view(5, 5)
expected_var[0, 0] = 1000
is_ok = var.data.equal(expected_var)
var.data[:] = torch.ones(5, 5)
if var.grad is not None:
is_ok &= var.grad.data.equal(torch.ones(5, 5) * 4)
var.grad.data[:] = torch.ones(5, 5)
is_ok &= var.grad is None
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
queue.put(is_ok)
@ -113,7 +114,7 @@ class leak_checker(object):
# one-off initialization that may use up a file descriptor
available_fds = self._get_next_fds(10)
self.test_case.assertLessEqual(
available_fds[-1] - self.next_fds[-1], 4)
available_fds[-1] - self.next_fds[-1], 5)
self.test_case.assertFalse(self.has_shm_files())
return False
@ -148,9 +149,6 @@ class leak_checker(object):
class TestMultiprocessing(TestCase):
def __init__(self, *args, **kwargs):
super(TestMultiprocessing, self).__init__(*args, **kwargs)
def _test_sharing(self, ctx=mp, type=torch.FloatTensor, repeat=1):
def test_fill():
x = torch.zeros(5, 5).type(type)
@ -159,9 +157,11 @@ class TestMultiprocessing(TestCase):
data = [x, x[:, 1]]
q.put(data)
p = ctx.Process(target=simple_fill, args=(q, e))
p.daemon = True
lc.check_pid(p.pid)
p.start()
e.wait()
e.wait(10)
self.assertTrue(e.is_set())
self.assertTrue(data[0].eq(4).all())
self.assertTrue(data[1].eq(4).all())
p.join(1)
@ -171,6 +171,7 @@ class TestMultiprocessing(TestCase):
q = ctx.Queue()
e = ctx.Event()
p = ctx.Process(target=send_tensor, args=(q, e, type))
p.daemon = True
lc.check_pid(p.pid)
p.start()
t1 = q.get()
@ -182,17 +183,17 @@ class TestMultiprocessing(TestCase):
self.assertFalse(p.is_alive())
with leak_checker(self) as lc:
for i in range(repeat):
for _ in range(repeat):
test_fill()
test_receive()
def _test_preserve_sharing(self, ctx=mp, repeat=1):
def do_test():
x = torch.randn(5, 5)
data = [x.storage(), x.storage()[1:4], x, x[2], x[:,1]]
data = [x.storage(), x.storage()[1:4], x, x[2], x[:, 1]]
q = ctx.Queue()
q.put(data)
new_data = q.get()
new_data = q.get(timeout=1)
self.assertEqual(new_data, data, 0)
storage_cdata = data[0]._cdata
self.assertEqual(new_data[0]._cdata, storage_cdata)
@ -229,27 +230,27 @@ class TestMultiprocessing(TestCase):
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_sharing(self):
self._test_sharing(repeat=20)
self._test_sharing(repeat=TEST_REPEATS)
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_preserve_sharing(self):
self._test_preserve_sharing(repeat=20)
self._test_preserve_sharing(repeat=TEST_REPEATS)
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_pool(self):
self._test_pool(repeat=20)
self._test_pool(repeat=TEST_REPEATS)
def test_fs_sharing(self):
with fs_sharing():
self._test_sharing(repeat=20)
self._test_sharing(repeat=TEST_REPEATS)
def test_fs_preserve_sharing(self):
with fs_sharing():
self._test_preserve_sharing(repeat=20)
self._test_preserve_sharing(repeat=TEST_REPEATS)
def test_fs_pool(self):
with fs_sharing():
self._test_pool(repeat=20)
self._test_pool(repeat=TEST_REPEATS)
@unittest.skipIf(not HAS_SHM_FILES, "don't not how to check if shm files exist")
def test_fs(self):
@ -263,7 +264,7 @@ class TestMultiprocessing(TestCase):
q.get()
with fs_sharing(), leak_checker(self) as lc:
for i in range(20):
for _ in range(TEST_REPEATS):
queue_put()
def test_inherit_tensor(self):
@ -271,6 +272,7 @@ class TestMultiprocessing(TestCase):
def __init__(self, tensor):
super(SubProcess, self).__init__()
self.tensor = tensor
self.daemon = True
def run(self):
self.tensor.add_(3)
@ -278,7 +280,7 @@ class TestMultiprocessing(TestCase):
t = torch.zeros(5, 5)
p = SubProcess(t.share_memory_())
p.start()
p.join()
p.join(1)
self.assertEqual(t, torch.ones(5, 5) * 3, 0)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
@ -286,15 +288,15 @@ class TestMultiprocessing(TestCase):
torch.cuda.FloatTensor([1]) # initialize CUDA outside of leak checker
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
def test_cuda_small_tensors(self):
# Check multiple small tensors which will likely use the same
# underlying cached allocation
ctx = mp.get_context('spawn')
tensors = []
for i in range(5):
tensors += [torch.range(i * 5, (i * 5) + 4).cuda()]
tensors += [torch.arange(i * 5, (i + 1) * 5).cuda()]
inq = ctx.Queue()
outq = ctx.Queue()
@ -309,7 +311,7 @@ class TestMultiprocessing(TestCase):
for i, tensor in enumerate(tensors):
v, device, tensor_size, storage_size = results[i]
self.assertEqual(v, torch.range(i * 5, (i * 5) + 4).sum())
self.assertEqual(v, torch.arange(i * 5, (i + 1) * 5).sum())
self.assertEqual(device, 0)
self.assertEqual(tensor_size, 5)
self.assertEqual(storage_size, 5)
@ -355,22 +357,23 @@ class TestMultiprocessing(TestCase):
master_modified = mp.Event()
queue = mp.Queue()
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
p.daemon = True
p.start()
var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
queue.put(var)
ready.wait()
var.data[0,0] = 1000
if var.grad is not None:
var.grad.data[:] = torch.ones(5, 5) * 4
var.data[0, 0] = 1000
var.grad.data[:] = torch.ones(5, 5) * 4
master_modified.set()
worker_ok = queue.get()
self.assertTrue(worker_ok)
self.assertEqual(var.data, torch.ones(5, 5))
if var.grad is not None:
self.assertEqual(var.grad.data, torch.ones(5, 5))
p.join()
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
p.join(1)
self.assertFalse(p.is_alive())
def test_variable_sharing(self):
configs = [
@ -379,13 +382,13 @@ class TestMultiprocessing(TestCase):
(False, True),
]
for requires_grad, volatile in configs:
var = Variable(torch.range(1, 25).view(5, 5),
requires_grad=requires_grad,
volatile=volatile)
var = Variable(torch.arange(1, 26).view(5, 5),
requires_grad=requires_grad,
volatile=volatile)
self._test_autograd_sharing(var)
def test_parameter_sharing(self):
param = Parameter(torch.range(1, 25).view(5, 5))
param = Parameter(torch.arange(1, 26).view(5, 5))
self._test_autograd_sharing(param)
def _test_is_shared(self):
@ -409,4 +412,4 @@ class TestMultiprocessing(TestCase):
if __name__ == '__main__':
unittest.main()
run_tests()

View File

@ -4,14 +4,12 @@ import torch
import torch.cuda.nccl as nccl
import torch.cuda
from common import TestCase
if not torch.cuda.is_available():
print('CUDA not available, skipping tests')
import sys
sys.exit()
from common import TestCase, run_tests
nGPUs = torch.cuda.device_count()
if nGPUs == 0:
print('CUDA not available, skipping tests')
TestCase = object # noqa: F811
class TestNCCL(TestCase):
@ -87,4 +85,4 @@ class TestNCCL(TestCase):
if __name__ == '__main__':
unittest.main()
run_tests()

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,12 @@
import unittest
import functools
from copy import deepcopy
import torch
import torch.optim as optim
import torch.legacy.optim as old_optim
from torch.autograd import Variable
from common import TestCase
from common import TestCase, run_tests
def rosenbrock(tensor):
@ -14,7 +16,7 @@ def rosenbrock(tensor):
def drosenbrock(tensor):
x, y = tensor
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * (y - x**2)))
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
def wrap_old_fn(old_fn, **config):
@ -36,15 +38,22 @@ class TestOptim(TestCase):
initial_dist = params.data.dist(solution)
def eval():
optimizer.zero_grad()
loss = rosenbrock(params)
loss.backward()
# loss.backward() will give **slightly** different
# gradients, than drosenbtock, because of a different ordering
# of floating point operations. In most cases it doesn't matter,
# but some optimizers are so sensitive that they can temporarily
# diverge up to 1e-4, just to converge again. This makes the
# comparison more stable.
params.grad.data.copy_(drosenbrock(params.data))
return loss
for i in range(2000):
optimizer.zero_grad()
optimizer.step(eval)
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
params_t, state)
params_t, state)
self.assertEqual(params.data, params_t)
self.assertLessEqual(params.data.dist(solution), initial_dist)
@ -52,25 +61,65 @@ class TestOptim(TestCase):
def _test_basic_cases_template(self, weight, bias, input, constructor):
weight = Variable(weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
input = Variable(input, requires_grad=False)
input = Variable(input)
optimizer = constructor(weight, bias)
def fn():
optimizer.zero_grad()
y = weight.mv(input)
if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
y = y.cuda(bias.get_device())
return (y + bias).abs().sum()
loss = (y + bias).pow(2).sum()
loss.backward()
return loss
initial_value = fn().data[0]
for i in range(200):
weight.grad.data.zero_()
bias.grad.data.zero_()
fn().backward()
optimizer.step()
optimizer.step(fn)
self.assertLess(fn().data[0], initial_value)
self.assertLessEqual(fn().data[0], initial_value)
def _test_state_dict(self, weight, bias, input, constructor):
weight = Variable(weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
input = Variable(input)
def _test_basic_cases(self, constructor):
def fn_base(optimizer, weight, bias):
optimizer.zero_grad()
loss = (weight.mv(input) + bias).pow(2).sum()
loss.backward()
return loss
optimizer = constructor(weight, bias)
fn = functools.partial(fn_base, optimizer, weight, bias)
# Prime the optimizer
for i in range(20):
optimizer.step(fn)
# Clone the weights and construct new optimizer for them
weight_c = Variable(weight.data.clone(), requires_grad=True)
bias_c = Variable(bias.data.clone(), requires_grad=True)
optimizer_c = constructor(weight_c, bias_c)
fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)
# Load state dict
state_dict = deepcopy(optimizer.state_dict())
state_dict_c = deepcopy(optimizer.state_dict())
optimizer_c.load_state_dict(state_dict_c)
# Run both optimizations in parallel
for i in range(20):
optimizer.step(fn)
optimizer_c.step(fn_c)
self.assertEqual(weight, weight_c)
self.assertEqual(bias, bias_c)
# Make sure state dict wasn't modified
self.assertEqual(state_dict, state_dict_c)
def _test_basic_cases(self, constructor, ignore_multidevice=False):
self._test_state_dict(
torch.randn(10, 5),
torch.randn(10),
torch.randn(5),
constructor
)
self._test_basic_cases_template(
torch.randn(10, 5),
torch.randn(10),
@ -79,8 +128,8 @@ class TestOptim(TestCase):
)
# non-contiguous parameters
self._test_basic_cases_template(
torch.randn(10, 5, 2)[...,0],
torch.randn(10, 2)[...,0],
torch.randn(10, 5, 2)[..., 0],
torch.randn(10, 2)[..., 0],
torch.randn(5),
constructor
)
@ -94,12 +143,12 @@ class TestOptim(TestCase):
constructor
)
# Multi-GPU
if not torch.cuda.device_count() > 1:
if not torch.cuda.device_count() > 1 or ignore_multidevice:
return
self._test_basic_cases_template(
torch.randn(10, 5).cuda(),
torch.randn(10).cuda(),
torch.randn(5).cuda(),
torch.randn(10, 5).cuda(0),
torch.randn(10).cuda(1),
torch.randn(5).cuda(0),
constructor
)
@ -275,10 +324,24 @@ class TestOptim(TestCase):
lr=1e-3)
)
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def test_invalid_param_type(self):
with self.assertRaises(TypeError):
optim.SGD(Variable(torch.randn(5, 5)), lr=3)
if __name__ == '__main__':
unittest.main()
run_tests()

View File

@ -4,34 +4,51 @@ from torch import sparse
import itertools
import random
import unittest
from common import TestCase
from common import TestCase, run_tests
from common_nn import TEST_CUDA
from numbers import Number
SparseTensor = sparse.DoubleTensor
# triplet := (index type, value type, sparse type)
cpu_triplet = (
torch.LongTensor,
torch.DoubleTensor,
torch.sparse.DoubleTensor)
if TEST_CUDA:
cuda_triplet = (
torch.cuda.LongTensor,
torch.cuda.DoubleTensor,
torch.cuda.sparse.DoubleTensor)
class TestSparse(TestCase):
@staticmethod
def _gen_sparse(d, nnz, with_size):
v = torch.randn(nnz)
def _gen_sparse(d, nnz, with_size, is_cuda=False):
if isinstance(with_size, Number):
v = torch.randn(nnz)
i = (torch.rand(d, nnz) * with_size).type(torch.LongTensor)
x = SparseTensor(i, v)
x = torch.sparse.DoubleTensor(i, v)
else:
v_size = [nnz] + list(with_size[d:])
v = torch.randn(*v_size)
i = torch.rand(d, nnz) * \
torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1)
torch.Tensor(with_size[:d]).repeat(nnz, 1).transpose(0, 1)
i = i.type(torch.LongTensor)
x = SparseTensor(i, v, torch.Size(with_size))
x = torch.sparse.DoubleTensor(i, v, torch.Size(with_size))
return x, i, v
if is_cuda:
return x.cuda(), i.cuda(), v.cuda()
else:
return x, i.clone(), v.clone()
def test_basic(self):
x, i, v = self._gen_sparse(3, 10, 100)
def _test_basic(self, is_cuda):
x, i, v = self._gen_sparse(3, 10, 100, is_cuda)
self.assertEqual(i, x.indices())
self.assertEqual(v, x.values())
x, i, v = self._gen_sparse(3, 10, [100, 100, 100])
x, i, v = self._gen_sparse(3, 10, [100, 100, 100], is_cuda)
self.assertEqual(i, x.indices())
self.assertEqual(v, x.values())
self.assertEqual(x.ndimension(), 3)
@ -39,20 +56,30 @@ class TestSparse(TestCase):
for i in range(3):
self.assertEqual(x.size(i), 100)
SparseTensor = (cuda_triplet if is_cuda else cpu_triplet)[2]
# Make sure we can access empty indices / values
x = SparseTensor()
self.assertEqual(x.indices().numel(), 0)
self.assertEqual(x.values().numel(), 0)
def test_to_dense(self):
i = torch.LongTensor([
def test_basic(self):
self._test_basic(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_basic_cuda(self):
self._test_basic(True)
def _test_to_dense(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
v = torch.Tensor([2, 1, 3, 4])
v = ValueTensor([2, 1, 3, 4])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
res = torch.Tensor([
res = ValueTensor([
[[2, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
@ -72,61 +99,181 @@ class TestSparse(TestCase):
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_contig(self):
i = torch.LongTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
def test_to_dense(self):
self._test_to_dense(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_to_dense_cuda(self):
self._test_to_dense(True)
def _test_to_dense_hybrid(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
])
v = ValueTensor([[2, 3], [1, 2], [3, 4], [4, 5]])
x = SparseTensor(i, v, torch.Size([3, 4, 2]))
res = ValueTensor([
[[2, 3],
[0, 0],
[0, 0],
[0, 0]],
[[1, 2],
[0, 0],
[0, 0],
[0, 0]],
[[3, 4],
[0, 0],
[0, 0],
[4, 5]],
])
x.to_dense() # Tests double to_dense for memory corruption
x.to_dense()
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_to_dense_hybrid(self):
self._test_to_dense_hybrid(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_to_dense_hybrid_cuda(self):
self._test_to_dense_hybrid(True)
def _test_contig(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
])
v = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
v = ValueTensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
x = SparseTensor(i, v, torch.Size([100, 100]))
exp_i = torch.LongTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
exp_i = IndexTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
])
exp_v = torch.Tensor([2, 1, 6, 4, 10, 3, 5, 9, 8, 7])
x.contiguous()
exp_v = ValueTensor([2, 1, 6, 4, 10, 3, 5, 9, 8, 7])
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
i = torch.LongTensor([
i = IndexTensor([
[2, 0, 2, 1],
[0, 0, 3, 0],
[1, 0, 4, 0],
])
v = torch.Tensor([3, 2, 4, 1])
v = ValueTensor([3, 2, 4, 1])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
exp_i = torch.LongTensor([
exp_i = IndexTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
exp_v = torch.Tensor([2, 1, 3, 4])
exp_v = ValueTensor([2, 1, 3, 4])
x.contiguous()
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
# Duplicate indices
i = torch.LongTensor([
i = IndexTensor([
[0, 0, 2, 0],
[0, 0, 3, 0],
[0, 0, 4, 0],
])
v = torch.Tensor([3, 2, 4, 1])
v = ValueTensor([3, 2, 4, 1])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
exp_i = torch.LongTensor([
exp_i = IndexTensor([
[0, 2],
[0, 3],
[0, 4],
])
exp_v = torch.Tensor([6, 4])
exp_v = ValueTensor([6, 4])
x.contiguous()
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_transpose(self):
x = self._gen_sparse(4, 20, 5)[0]
def test_contig(self):
self._test_contig(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_contig_cuda(self):
self._test_contig(True)
def _test_contig_hybrid(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
])
v = ValueTensor([
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
])
x = SparseTensor(i, v, torch.Size([100, 100, 2]))
exp_i = IndexTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
])
exp_v = ValueTensor([
[2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
[3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
])
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
i = IndexTensor([
[2, 0, 2, 1],
[0, 0, 3, 0],
[1, 0, 4, 0],
])
v = ValueTensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = IndexTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
exp_v = ValueTensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]])
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
# Duplicate indices
i = IndexTensor([
[0, 0, 2, 0],
[0, 0, 3, 0],
[0, 0, 4, 0],
])
v = ValueTensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = IndexTensor([
[0, 2],
[0, 3],
[0, 4],
])
exp_v = ValueTensor([[6, 4, 5], [4, 3, 4]])
x = self.safeCoalesce(x)
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_contig_hybrid(self):
self._test_contig_hybrid(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_contig_hybrid_cuda(self):
self._test_contig_hybrid(True)
def _test_transpose(self, is_cuda):
x = self._gen_sparse(4, 20, 5, is_cuda=is_cuda)[0]
y = x.to_dense()
for i, j in itertools.combinations(range(4), 2):
@ -138,6 +285,13 @@ class TestSparse(TestCase):
y = y.transpose(i, j)
self.assertEqual(x.to_dense(), y)
def test_transpose(self):
self._test_transpose(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_transpose_cuda(self):
self._test_transpose(True)
def test_mm(self):
def test_shape(di, dj, dk):
x, _, _ = self._gen_sparse(2, 20, [di, dj])
@ -146,16 +300,16 @@ class TestSparse(TestCase):
alpha = random.random()
beta = random.random()
expected = torch.addmm(alpha, t, beta, x.to_dense(), y)
res = torch.addmm(alpha, t, beta, x, y)
expected = torch.addmm(alpha, t, beta, x.to_dense(), y)
self.assertEqual(res, expected)
expected = torch.addmm(t, x.to_dense(), y)
res = torch.addmm(t, x, y)
expected = torch.addmm(t, x.to_dense(), y)
self.assertEqual(res, expected)
expected = torch.mm(x.to_dense(), y)
res = torch.mm(x, y)
expected = torch.mm(x.to_dense(), y)
self.assertEqual(res, expected)
test_shape(10, 100, 100)
@ -170,51 +324,293 @@ class TestSparse(TestCase):
alpha = random.random()
beta = random.random()
expected = torch.addmm(alpha, t.to_dense(), beta, x.to_dense(), y)
res = torch.saddmm(alpha, t, beta, x, y)
expected = torch.addmm(alpha, t.to_dense(), beta, x.to_dense(), y)
self.assertEqual(res.to_dense(), expected)
expected = torch.addmm(t.to_dense(), x.to_dense(), y)
res = torch.saddmm(t, x, y)
expected = torch.addmm(t.to_dense(), x.to_dense(), y)
self.assertEqual(res.to_dense(), expected)
expected = torch.mm(x.to_dense(), y)
res = torch.smm(x, y)
expected = torch.mm(x.to_dense(), y)
self.assertEqual(res.to_dense(), expected)
test_shape(7, 5, 3)
test_shape(1000, 100, 100)
test_shape(3000, 64, 300)
def _test_dsmm(self, is_cuda):
def test_shape(di, dj, dk):
x = self._gen_sparse(2, 20, [di, dj], is_cuda)[0]
y = torch.randn(dj, dk)
if is_cuda:
y = y.cuda()
res = torch.dsmm(x, y)
expected = torch.mm(x.to_dense(), y)
self.assertEqual(res, expected)
test_shape(7, 5, 3)
test_shape(1000, 100, 100)
test_shape(3000, 64, 300)
def test_dsmm(self):
self._test_dsmm(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_dsmm_cuda(self):
self._test_dsmm(True)
def _test_hsmm(self, is_cuda):
def test_shape(di, dj, dk):
x = self._gen_sparse(2, 20, [di, dj], is_cuda)[0]
y = torch.randn(dj, dk)
if is_cuda:
y = y.cuda()
res = torch.hsmm(x, y)
expected = torch.mm(x.to_dense(), y)
self.assertEqual(res.to_dense(), expected)
test_shape(7, 5, 3)
test_shape(1000, 100, 100)
test_shape(3000, 64, 300)
def test_hsmm(self):
self._test_hsmm(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_hsmm_cuda(self):
self._test_hsmm(True)
def _test_spadd_shape(self, is_cuda, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x, _, _ = self._gen_sparse(len(shape_i), 10, shape, is_cuda)
y = torch.randn(*shape)
if is_cuda:
y = y.cuda()
r = random.random()
res = torch.add(y, r, x)
expected = y + r * x.to_dense()
self.assertEqual(res, expected)
# Non contiguous dense tensor
s = list(shape)
s[0] = shape[-1]
s[-1] = shape[0]
y = torch.randn(*s)
if is_cuda:
y = y.cuda()
y.transpose_(0, len(s) - 1)
r = random.random()
res = torch.add(y, r, x)
expected = y + r * x.to_dense()
self.assertEqual(res, expected)
def _test_spadd(self, is_cuda):
self._test_spadd_shape(is_cuda, [5, 6])
self._test_spadd_shape(is_cuda, [10, 10, 10])
self._test_spadd_shape(is_cuda, [50, 30, 20])
self._test_spadd_shape(is_cuda, [5, 5, 5, 5, 5, 5])
def test_spadd(self):
def test_shape(*shape):
x, _, _ = self._gen_sparse(len(shape), 10, shape)
y = torch.randn(*shape)
r = random.random()
self._test_spadd(False)
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_spadd_cuda(self):
self._test_spadd(True)
self.assertEqual(res, expected)
def _test_spadd_hybrid(self, is_cuda):
self._test_spadd_shape(is_cuda, [5, 6], [2, 3])
self._test_spadd_shape(is_cuda, [10, 10, 10], [3])
self._test_spadd_shape(is_cuda, [50, 30, 20], [2])
self._test_spadd_shape(is_cuda, [5, 5, 5, 5, 5, 5], [2])
# Non contiguous dense tensor
s = list(shape)
s[0] = shape[-1]
s[-1] = shape[0]
y = torch.randn(*s).transpose_(0, len(s) - 1)
r = random.random()
def test_spadd_hybrid(self):
self._test_spadd_hybrid(False)
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_spadd_hybrid_cuda(self):
self._test_spadd_hybrid(True)
self.assertEqual(res, expected)
def _test_basic_ops_shape(self, is_cuda, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape, is_cuda)
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape, is_cuda)
test_shape(5, 6)
test_shape(10, 10, 10)
test_shape(50, 30, 20)
test_shape(5, 5, 5, 5, 5, 5)
y1 = x1 + x2
y2 = x1.clone()
y2.add_(x2)
expected = x1.to_dense() + x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 - x2
y2 = x1.clone()
y2.sub_(x2)
expected = x1.to_dense() - x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 * x2
y2 = x1.clone()
y2.mul_(x2)
expected = x1.to_dense() * x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 * 37.5
y2 = x1.clone()
y2.mul_(37.5)
expected = x1.to_dense() * 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 / 37.5
y2 = x1.clone()
y2.div_(37.5)
expected = x1.to_dense() / 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
# TODO: add back inplace support
y1 = x1 ** 2
y2 = x1.clone()
y2 = y2.pow(2)
expected = x1.to_dense() ** 2
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y = x1.clone()
y.zero_()
expected = torch.zeros(x1.size())
self.assertEqual(y.to_dense(), expected)
self.assertFalse(x1.is_coalesced())
y = x1.coalesce()
z = x1.coalesce()
self.assertFalse(x1.is_coalesced())
self.assertTrue(y.is_coalesced())
self.assertEqual(x1, y)
# check that coalesce is out of place
y.values().add_(1)
self.assertEqual(z.values() + 1, y.values())
def _test_basic_ops(self, is_cuda):
self._test_basic_ops_shape(is_cuda, [5, 6])
self._test_basic_ops_shape(is_cuda, [10, 10, 10])
self._test_basic_ops_shape(is_cuda, [50, 30, 20])
self._test_basic_ops_shape(is_cuda, [5, 5, 5, 5, 5, 5])
def test_basic_ops(self):
self._test_basic_ops(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_basic_ops_cuda(self):
self._test_basic_ops(True)
def _test_basic_ops_hybrid(self, is_cuda):
self._test_basic_ops_shape(is_cuda, [5, 6], [2, 3])
self._test_basic_ops_shape(is_cuda, [10, 10, 10], [3])
self._test_basic_ops_shape(is_cuda, [50, 30, 20], [2])
self._test_basic_ops_shape(is_cuda, [5, 5, 5, 5, 5, 5], [2])
def test_basic_ops_hybrid(self):
self._test_basic_ops_hybrid(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_basic_ops_hybrid_cuda(self):
self._test_basic_ops_hybrid(True)
def _test_sparse_mask_shape(self, is_cuda, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape, is_cuda)
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape, is_cuda)
y1 = x1 + x2
y2 = x1.clone()
y2.add_(x2)
expected = x1.to_dense() + x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
def _test_sparse_mask_fixed(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[1, 3, 3, 0, 4],
[2, 1, 1, 2, 3],
])
v = ValueTensor([1, 2, 3, 4, 5])
x = SparseTensor(i, v, torch.Size([5, 4]))
dense = ValueTensor([
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[13, 14, 15, 16],
[17, 18, 19, 20],
])
exp_v = ValueTensor([7, 14, 14, 3, 20])
res = dense.sparse_mask(x)
expected = SparseTensor(i, exp_v, torch.Size([5, 4]))
self.assertEqual(res, expected)
def _test_sparse_mask(self, is_cuda):
self._test_sparse_mask_fixed(is_cuda)
self._test_sparse_mask_shape(is_cuda, [5, 6])
self._test_sparse_mask_shape(is_cuda, [10, 10, 10])
self._test_sparse_mask_shape(is_cuda, [50, 30, 20])
self._test_sparse_mask_shape(is_cuda, [5, 5, 5, 5, 5, 5])
def test_sparse_mask(self):
self._test_sparse_mask(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_sparse_mask_cuda(self):
self._test_sparse_mask(True)
def _test_sparse_mask_hybrid_fixed(self, is_cuda):
IndexTensor, ValueTensor, SparseTensor = \
cuda_triplet if is_cuda else cpu_triplet
i = IndexTensor([
[1, 3, 3, 0, 4],
[2, 1, 1, 2, 3],
])
v = ValueTensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
x = SparseTensor(i, v, torch.Size([5, 4, 2]))
dense = ValueTensor([
[[1, 3], [2, 2], [3, 3], [4, 2]],
[[5, 7], [6, 7], [7, 9], [8, 9]],
[[9, 2], [10, 4], [11, 1], [12, 3]],
[[13, 5], [14, 1], [15, 1], [16, 6]],
[[17, 7], [18, 2], [19, 7], [20, 1]],
])
res = dense.sparse_mask(x)
exp_v = ValueTensor([[7, 9], [14, 1], [14, 1], [3, 3], [20, 1]])
expected = SparseTensor(i, exp_v, torch.Size([5, 4, 2]))
self.assertEqual(res, expected)
def _test_sparse_mask_hybrid(self, is_cuda):
self._test_sparse_mask_hybrid_fixed(is_cuda)
self._test_sparse_mask_shape(is_cuda, [5, 6], [2, 3])
self._test_sparse_mask_shape(is_cuda, [10, 10, 10], [3])
self._test_sparse_mask_shape(is_cuda, [50, 30, 20], [2])
self._test_sparse_mask_shape(is_cuda, [5, 5, 5, 5, 5, 5], [2])
def test_sparse_mask_hybrid(self):
self._test_sparse_mask_hybrid(False)
@unittest.skipIf(not TEST_CUDA, 'CUDA not available')
def test_sparse_mask_hybrid_cuda(self):
self._test_sparse_mask_hybrid(True)
if __name__ == '__main__':
unittest.main()
run_tests()

File diff suppressed because it is too large Load Diff

View File

@ -6,9 +6,9 @@ import shutil
import random
import tempfile
import unittest
import sys
import traceback
import torch
import torch.utils.data
import torch.cuda
import warnings
from torch.autograd import Variable
@ -19,7 +19,7 @@ from torch.utils.serialization import load_lua
HAS_CUDA = torch.cuda.is_available()
from common import TestCase
from common import TestCase, run_tests, download_file
try:
import cffi
@ -28,7 +28,9 @@ try:
except ImportError:
HAS_CFFI = False
class SimplePlugin(Plugin):
def __init__(self, interval):
super(SimplePlugin, self).__init__(interval)
self.trainer = None
@ -58,6 +60,7 @@ class SimplePlugin(Plugin):
class ModelMock(object):
def __init__(self):
self.num_calls = 0
self.output = Variable(torch.ones(1, 1), requires_grad=True)
@ -68,6 +71,7 @@ class ModelMock(object):
class CriterionMock(object):
def __init__(self):
self.num_calls = 0
@ -95,6 +99,7 @@ class OptimizerMock(object):
class DatasetMock(object):
def __iter__(self):
for i in range(10):
yield torch.randn(2, 10), torch.randperm(10)[:2]
@ -103,6 +108,44 @@ class DatasetMock(object):
return 10
class TestDataLoader(TestCase):
def setUp(self):
self.dataset = torch.randn(5, 3, 3, 2)
self.batch_size = 3
def test_single_keep(self):
dataloader = torch.utils.data.DataLoader(self.dataset,
batch_size=self.batch_size,
num_workers=0,
drop_last=False)
dataiter = iter(dataloader)
self.assertEqual(len(list(dataiter)), 2)
def test_single_drop(self):
dataloader = torch.utils.data.DataLoader(self.dataset,
batch_size=self.batch_size,
num_workers=0,
drop_last=True)
dataiter = iter(dataloader)
self.assertEqual(len(list(dataiter)), 1)
def test_multi_keep(self):
dataloader = torch.utils.data.DataLoader(self.dataset,
batch_size=self.batch_size,
num_workers=2,
drop_last=False)
dataiter = iter(dataloader)
self.assertEqual(len(list(dataiter)), 2)
def test_multi_drop(self):
dataloader = torch.utils.data.DataLoader(self.dataset,
batch_size=self.batch_size,
num_workers=2,
drop_last=True)
dataiter = iter(dataloader)
self.assertEqual(len(list(dataiter)), 1)
class TestTrainer(TestCase):
intervals = [
@ -183,6 +226,7 @@ class TestTrainer(TestCase):
test_dir = os.path.abspath(os.path.dirname(str(__file__)))
class TestFFI(TestCase):
def setUp(self):
@ -196,13 +240,13 @@ class TestFFI(TestCase):
@unittest.skipIf(not HAS_CFFI, "ffi tests require cffi package")
def test_cpu(self):
compile_extension(
name='test_extensions.cpulib',
header=test_dir + '/ffi/src/cpu/lib.h',
sources=[
test_dir + '/ffi/src/cpu/lib1.c',
test_dir + '/ffi/src/cpu/lib2.c',
],
verbose=False,
name='test_extensions.cpulib',
header=test_dir + '/ffi/src/cpu/lib.h',
sources=[
test_dir + '/ffi/src/cpu/lib1.c',
test_dir + '/ffi/src/cpu/lib2.c',
],
verbose=False,
)
from test_extensions import cpulib
tensor = torch.ones(2, 2).float()
@ -217,20 +261,20 @@ class TestFFI(TestCase):
self.assertIs(type(f), float)
self.assertRaises(TypeError,
lambda: cpulib.good_func(tensor.double(), 2, 1.5))
lambda: cpulib.good_func(tensor.double(), 2, 1.5))
self.assertRaises(torch.FatalError,
lambda: cpulib.bad_func(tensor, 2, 1.5))
lambda: cpulib.bad_func(tensor, 2, 1.5))
@unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
def test_gpu(self):
compile_extension(
name='gpulib',
header=test_dir + '/ffi/src/cuda/cudalib.h',
sources=[
test_dir + '/ffi/src/cuda/cudalib.c',
],
with_cuda=True,
verbose=False,
name='gpulib',
header=test_dir + '/ffi/src/cuda/cudalib.h',
sources=[
test_dir + '/ffi/src/cuda/cudalib.c',
],
with_cuda=True,
verbose=False,
)
import gpulib
tensor = torch.ones(2, 2).float()
@ -243,9 +287,9 @@ class TestFFI(TestCase):
self.assertEqual(ctensor, torch.ones(2, 2) * 2 + 1.5)
self.assertRaises(TypeError,
lambda: gpulib.cuda_func(tensor, 2, 1.5))
lambda: gpulib.cuda_func(tensor, 2, 1.5))
self.assertRaises(TypeError,
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
class TestLuaReader(TestCase):
@ -290,37 +334,15 @@ class TestLuaReader(TestCase):
self.assertEqual(grad_input, test['grad_input'])
return do_test
@classmethod
def _download_data(cls, test_file_path):
if os.path.exists(test_file_path):
return
print('Downloading test file for TestLuaReader.')
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
urllib = cls._get_urllib('request')
data = urllib.urlopen(DATA_URL, timeout=15).read()
with open(test_file_path, 'wb') as f:
f.write(data)
@staticmethod
def _get_urllib(submodule):
if sys.version_info < (3,):
import urllib2
return urllib2
else:
import urllib.error
import urllib.request
return getattr(urllib, submodule)
@classmethod
def init(cls):
DATA_URL = 'https://download.pytorch.org/test_data/legacy_modules.t7'
data_dir = os.path.join(os.path.dirname(__file__), 'data')
test_file_path = os.path.join(data_dir, 'legacy_modules.t7')
urllib = cls._get_urllib('error')
try:
cls._download_data(test_file_path)
except urllib.URLError as e:
succ = download_file(DATA_URL, test_file_path)
if not succ:
warnings.warn(("Couldn't download the test file for TestLuaReader! "
"Tests will be incomplete!"), RuntimeWarning)
"Tests will be incomplete!"), RuntimeWarning)
return
tests = load_lua(test_file_path)
@ -364,4 +386,4 @@ class TestLuaReader(TestCase):
TestLuaReader.init()
if __name__ == '__main__':
unittest.main()
run_tests()

View File

@ -7,6 +7,8 @@ from .plugins import ArgcountChecker, OptionalArguments, ArgumentReferences, \
class cwrap(object):
BASE_INDENT_SIZE = 6
RETURN_WRAPPERS = {
'void': Template('Py_RETURN_NONE;'),
'long': Template('return PyLong_FromLong($result);'),
@ -16,17 +18,22 @@ class cwrap(object):
OPTION_TEMPLATE = Template("""
${els}if ($arg_check) {
$pre_arg_assign
$arg_assign
$code
""")
ARG_ASSIGN_TEMPLATE = Template("""${type} ${name} = ${unpack};""")
OPTION_CODE_TEMPLATE = [
'$call',
'$return_result',
'$call',
'$return_result',
]
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($arg_unpack);")
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($call_arg);")
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments,
ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
def __init__(self, source, destination=None, plugins=[], default_plugins=True):
if destination is None:
@ -87,7 +94,7 @@ class cwrap(object):
with open(fname, 'r') as f:
included = f.read().split('\n')
# insert it into lines at position i+1
lines[i+1:i+1] = included
lines[i + 1:i + 1] = included
else:
output.append(line)
i += 1
@ -97,10 +104,10 @@ class cwrap(object):
def set_declaration_defaults(self, declaration):
declaration.setdefault('arguments', [])
declaration.setdefault('return', 'void')
if not 'cname' in declaration:
if 'cname' not in declaration:
declaration['cname'] = declaration['name']
# Simulate multiple dispatch, even if it's not necessary
if not 'options' in declaration:
if 'options' not in declaration:
declaration['options'] = [{'arguments': declaration['arguments']}]
del declaration['arguments']
# Parse arguments (some of them can be strings)
@ -136,10 +143,10 @@ class cwrap(object):
return fallback(*args)
def get_type_check(self, arg, option):
return self.search_plugins('get_type_check', (arg, option), lambda arg,_: None)
return self.search_plugins('get_type_check', (arg, option), lambda arg, _: None)
def get_type_unpack(self, arg, option):
return self.search_plugins('get_type_unpack', (arg, option), lambda arg,_: None)
return self.search_plugins('get_type_unpack', (arg, option), lambda arg, _: None)
def get_return_wrapper(self, option):
return self.search_plugins('get_return_wrapper', (option,), lambda _: self.RETURN_WRAPPERS[option['return']])
@ -147,6 +154,9 @@ class cwrap(object):
def get_wrapper_template(self, declaration):
return self.search_plugins('get_wrapper_template', (declaration,), lambda _: None)
def get_assign_args(self, arguments):
return self.search_plugins('get_assign_args', (arguments,), lambda _: arguments)
def get_arg_accessor(self, arg, option):
def wrap_accessor(arg, _):
if arg.get('idx') is None:
@ -177,12 +187,47 @@ class cwrap(object):
res = tmpl.substitute(arg=accessor, idx=arg.get('idx'))
for plugin in self.plugins:
res = getattr(plugin, plugin_fn_name)(res, arg, accessor)
result.append(res)
return result
def build_option_args(self, arguments, arg_unpack):
assignement = []
call_arg = []
# If types or names needs to be changed
arguments = self.get_assign_args(arguments)
for arg, unpack in zip(arguments, arg_unpack):
if arg['type'] == 'CONSTANT':
call_arg.append(unpack)
else:
var_name = "arg_" + str(arg.get('assign_name', arg['name']))
res = self.ARG_ASSIGN_TEMPLATE.substitute(
type=arg['type'],
name=var_name,
unpack=unpack)
if var_name not in call_arg:
assignement.append(res)
call_arg.append(var_name)
return assignement, call_arg
def indent_code(self, code):
if code == '':
return code
code_lines = map(lambda s: s.strip(), code.split('\n'))
code = '\n'
depth = self.BASE_INDENT_SIZE
for line in code_lines:
depth -= line.count('}') * 2
code += ' ' * depth + line + '\n'
depth += line.count('{') * 2
depth += line.count('(') * 4
depth -= line.count(')') * 4
return code[:-1]
def generate_option(self, option, is_first):
checked_args = list(filter(
lambda arg: not 'ignore_check' in arg or not arg['ignore_check'],
lambda arg: 'ignore_check' not in arg or not arg['ignore_check'],
option['arguments']))
option['num_checked_args'] = len(checked_args)
idx_args = list(filter(
@ -193,45 +238,50 @@ class cwrap(object):
# Generate checks
arg_checks = self.map_selected_arguments('get_type_check',
'process_single_check', option, checked_args)
'process_single_check', option, checked_args)
arg_checks = ' &&\n '.join(arg_checks)
for plugin in self.plugins:
arg_checks = plugin.process_all_checks(arg_checks, option)
# Generate unpacks
arg_unpack = self.map_selected_arguments('get_type_unpack',
'process_single_unpack', option, option['arguments'])
arg_unpack = ', '.join(arg_unpack)
# Generate pre_arg assign
pre_arg_assign = []
for plugin in self.plugins:
arg_unpack = plugin.process_all_unpacks(arg_unpack, option)
pre_arg_assign = plugin.process_pre_arg_assign(pre_arg_assign, option)
# Generate arg assignment and call arguments
arg_unpack = self.map_selected_arguments('get_type_unpack',
'process_single_unpack', option, option['arguments'])
arg_assign, call_arg = self.build_option_args(option['arguments'], arg_unpack)
call_arg = ', '.join(call_arg)
for plugin in self.plugins:
call_arg = plugin.process_all_call_arg(call_arg, option)
# Generate call
try:
return_result = self.get_return_wrapper(option).substitute()
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result='',
cname=option['cname'], arg_unpack=arg_unpack)
cname=option['cname'], call_arg=call_arg)
except KeyError:
return_result = self.get_return_wrapper(option).substitute(result='__result')
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result=(option['return'] + ' __result = '),
cname=option['cname'], arg_unpack=arg_unpack)
cname=option['cname'], call_arg=call_arg)
code_template = deepcopy(self.OPTION_CODE_TEMPLATE)
for plugin in self.plugins:
code_template = plugin.process_option_code_template(code_template,
option)
option)
code_template = Template('\n'.join(code_template))
code = code_template.substitute(call=call, return_result=return_result)
code_lines = map(lambda s: s.strip(), code.split('\n'))
code = '\n'
depth = 6
for line in code_lines:
depth -= line.count('}') * 2
code += ' ' * depth + line + '\n'
depth += line.count('{') * 2
code = self.indent_code(code)
pre_arg_assign = self.indent_code('\n'.join(pre_arg_assign))
arg_assign = self.indent_code('\n'.join(arg_assign))
# Put everything together
return self.OPTION_TEMPLATE.substitute(
els=('} else ' if not is_first else ''),
arg_check=arg_checks,
pre_arg_assign=pre_arg_assign,
arg_assign=arg_assign,
code=code,
)

View File

@ -1,5 +1,6 @@
from . import CWrapPlugin
class ArgcountChecker(CWrapPlugin):
def process_all_checks(self, checks, option):

View File

@ -1,5 +1,6 @@
from . import CWrapPlugin
class ArgcountSortPlugin(CWrapPlugin):
def __init__(self, descending=True):
@ -11,4 +12,3 @@ class ArgcountSortPlugin(CWrapPlugin):
for declaration in declarations:
declaration['options'].sort(key=num_checked_args, reverse=self.descending)
return declarations

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class ArgumentReferences(CWrapPlugin):
def initialize(self, cwrap):

View File

@ -1,5 +1,6 @@
from . import CWrapPlugin
class AutoGPU(CWrapPlugin):
def __init__(self, has_self=True, condition=None):
@ -14,7 +15,9 @@ class AutoGPU(CWrapPlugin):
#endif
"""
def process_option_code_template(self, template, option):
def process_pre_arg_assign(self, template, option):
if not option.get('auto_gpu', True):
return template
call = 'THCPAutoGPU __autogpu_guard = THCPAutoGPU(args{});'.format(
', (PyObject*)self' if self.has_self else '')

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class BeforeAfterCall(CWrapPlugin):
def initialize(self, cwrap):
@ -13,15 +14,20 @@ class BeforeAfterCall(CWrapPlugin):
if '$' in prepend_str:
before_call_template = Template(option[name])
args = {'arg' + str(i): self.cwrap.get_arg_accessor(arg, option) for i, arg
in enumerate(option['arguments'])}
in enumerate(option['arguments'])}
prepend_str = before_call_template.substitute(args)
template.insert(offset, prepend_str)
def process_pre_arg_assign(self, template, option):
if option.get('before_arg_assign'):
self.insert_snippet(template, option, 0, 'before_arg_assign')
return template
def process_option_code_template(self, template, option):
if option.get('before_call') or option.get('after_call'):
call_idx = template.index('$call')
self.insert_snippet(template, option, call_idx, 'before_call')
# call position might have changed
call_idx = template.index('$call')
self.insert_snippet(template, option, call_idx+1, 'after_call')
self.insert_snippet(template, option, call_idx + 1, 'after_call')
return template

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class BoolOption(CWrapPlugin):
UNPACK_TEMPLATE = Template('$arg == Py_True ? $if_true : $if_false')
@ -8,12 +9,20 @@ class BoolOption(CWrapPlugin):
def is_bool_option(self, arg):
return arg['type'] == 'bool' and 'if_true' in arg and 'if_false' in arg
def process_declarations(self, declarations):
for declaration in declarations:
for option in declaration['options']:
for arg in option['arguments']:
if self.is_bool_option(arg):
arg['is_bool_option'] = True
arg['type'] = 'const char*'
return declarations
def get_type_check(self, arg, option):
if self.is_bool_option(arg):
if arg.get('is_bool_option', False):
return Template('PyBool_Check($arg)')
def get_type_unpack(self, arg, option):
if self.is_bool_option(arg):
if arg.get('is_bool_option', False):
return Template(self.UNPACK_TEMPLATE.safe_substitute(
if_true=arg['if_true'], if_false=arg['if_false']))

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class ConstantArguments(CWrapPlugin):
def process_declarations(self, declarations):
@ -18,5 +19,3 @@ class ConstantArguments(CWrapPlugin):
def get_arg_accessor(self, arg, option):
if arg['type'] == 'CONSTANT':
return arg['name']

View File

@ -1,32 +1,38 @@
from string import Template
import copy
from copy import deepcopy
from . import CWrapPlugin
from itertools import product
class CuDNNPlugin(CWrapPlugin):
TYPE_UNPACK = {
'THTensor*': Template('((THPVoidTensor*)$arg)->cdata'),
'int': Template('THPUtils_unpackLong($arg)'),
'THTensor*': Template('((THPVoidTensor*)$arg)->cdata'),
'int': Template('THPUtils_unpackLong($arg)'),
'std::vector<int>': Template('THPUtils_unpackIntTuple($arg)'),
'cudnnDataType_t': Template('$arg'),
'cudnnHandle_t': Template('$arg'),
'Convolution*': Template('(Convolution*)THPWrapper_get($arg)'),
'bool': Template('$arg == Py_True'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'cudnnDataType_t': Template('$arg'),
'cudnnHandle_t': Template('$arg'),
'Convolution*': Template('(Convolution*)THPWrapper_get($arg)'),
'bool': Template('$arg == Py_True'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
}
INPUT_ARGUMENT_MAP = {
'THTensor*': 'THVoidTensor*',
}
TYPE_CHECK = {
'Convolution*': Template('THPWrapper_check($arg)'),
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'),
'int': Template('THPUtils_checkLong($arg)'),
'Convolution*': Template('THPWrapper_check($arg)'),
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'),
'int': Template('THPUtils_checkLong($arg)'),
'std::vector<int>': Template('THPUtils_checkIntTuple($arg)'),
'bool': Template('PyBool_Check($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'bool': Template('PyBool_Check($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
}
RETURN_WRAPPER = {
'Convolution*': Template('return THPWrapper_New($result, [](void* arg) { delete (Convolution*)arg; });'),
'Convolution*': Template('return THPWrapper_New($result, [](void* arg) { delete (Convolution*)arg; });'),
}
METHODS_DECLARATION = Template("""
@ -78,6 +84,16 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
def get_type_check(self, arg, option):
return self.TYPE_CHECK.get(arg['type'], None)
def get_assign_args(self, arguments):
assign_args = []
for arg in arguments:
arg = copy.copy(arg)
new_type = self.INPUT_ARGUMENT_MAP.get(arg['type'])
if new_type is not None:
arg['type'] = new_type
assign_args.append(arg)
return assign_args
def get_wrapper_template(self, declaration):
arg_desc = []
for option in declaration['options']:
@ -123,7 +139,8 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
def filter_unique_options(self, options):
def signature(option):
return '#'.join(arg['type'] for arg in option['arguments'] if not 'ignore_check' in arg or not arg['ignore_check'])
return '#'.join(arg['type'] for arg in option['arguments']
if 'ignore_check' not in arg or not arg['ignore_check'])
seen_signatures = set()
unique = []
for option in options:
@ -141,7 +158,7 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
return self.preprocessor_guard(code, declaration['defined_if'])
return code
def process_all_unpacks(self, code, option):
def process_all_call_arg(self, code, option):
return 'state, ' + code
def declare_methods(self):
@ -151,8 +168,8 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
if not declaration.get('only_register'):
extra_flags += ' | METH_KEYWORDS'
entry = Template(' {"$python_name", (PyCFunction)$name, METH_VARARGS$extra_flags, NULL},\n').substitute(
python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags
)
python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags
)
if 'defined_if' in declaration:
entry = self.preprocessor_guard(entry, declaration['defined_if'])
methods += entry

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class GILRelease(CWrapPlugin):
OPTION_START = [
@ -24,6 +25,5 @@ class GILRelease(CWrapPlugin):
def process_option_code_template(self, template, option):
call_idx = template.index('$call')
template.insert(call_idx, self.BEFORE_CALL)
template.insert(call_idx+2, self.AFTER_CALL)
template.insert(call_idx + 2, self.AFTER_CALL)
return self.OPTION_START + template + self.OPTION_END

View File

@ -0,0 +1,223 @@
import copy
from string import Template
from . import CWrapPlugin
class GenericNN(CWrapPlugin):
INPUT_TYPE_CHECK = Template("checkTypes(is_cuda, $type, $tensor_args);")
HEADER_TEMPLATE = Template("void $name($args);")
WRAPPER_TEMPLATE = Template("""\
void $name($args)
{
bool is_cuda = $input->isCuda();
auto type = $input->type();
$type_check
$options
} else {
throw std::runtime_error("invalid arguments");
}
}
""")
THNN_TEMPLATE = Template("""\
if (type == thpp::Type::FLOAT) {
THNN_Float$name(
NULL,
$float_args);
} else if (type == thpp::Type::DOUBLE) {
THNN_Double$name(
NULL,
$double_args);
} else {
throw std::runtime_error("unsupported tensor type");
}""")
THCUNN_TEMPLATE = Template("""\
#ifdef WITH_CUDA
if (type == thpp::Type::FLOAT) {
THNN_Cuda$name(
state,
$float_args);
} else if (type == thpp::Type::DOUBLE) {
THNN_CudaDouble$name(
state,
$double_args);
} else if (type == thpp::Type::HALF) {
THNN_CudaHalf$name(
state,
$half_args);
} else {
throw std::runtime_error("unsupported tensor type");
}
#endif
""")
INDEX_TENSOR_TYPES = {'THIndexTensor*', 'THCIndexTensor*'}
REAL_TENSOR_TYPES = {'THTensor*', 'THCTensor*'}
INPUT_ARGUMENT_MAP = {
'THNNState*': 'void*',
'THCState*': 'void*',
'THTensor*': 'thpp::Tensor*',
'THCTensor*': 'thpp::Tensor*',
'THIndexTensor*': 'thpp::Tensor*',
'THCIndexTensor*': 'thpp::Tensor*',
'THIndex_t': 'long',
'accreal': 'double',
}
def __init__(self, header=False):
self.header = header
self.declarations = []
def process_full_file(self, base_wrapper):
if self.header:
wrapper = '#pragma once\n\n'
wrapper += '#include <THPP/Tensor.hpp>\n\n'
else:
wrapper = '#include "THNN_generic.h"\n'
wrapper = '#include "THNN_generic.inc.h"\n\n'
wrapper += 'namespace torch { namespace nn {\n\n'
wrapper += base_wrapper
wrapper += '}} // namespace torch::nn\n'
return wrapper
def process_declarations(self, declarations):
for declaration in declarations:
base_args = declaration['options'][0]['arguments']
for option in declaration['options']:
for idx, arg in enumerate(option['arguments']):
arg['assign_name'] = base_args[idx]['name']
arg['assign_type'] = base_args[idx]['type']
if idx != 1:
arg['ignore_check'] = True
return declarations
def get_arg_accessor(self, arg, option):
return self.get_type_unpack(arg, option)
def process_pre_arg_assign(self, pre_arg_assign, option):
if option['backend'] == 'cunn':
# Enclose arg_assign with CUDA guard
pre_arg_assign.append('#ifdef WITH_CUDA')
return pre_arg_assign
def process_option_code_template(self, template, option):
template = []
if option['backend'] == 'cunn':
template.append('#endif')
def base_cast(arg, CReal, real):
name = 'arg_' + arg['assign_name']
type = arg['type']
if type in self.REAL_TENSOR_TYPES:
return ('(TH{CReal}Tensor*){name}->cdata()'
.format(CReal=CReal, name=name))
elif type in self.INDEX_TENSOR_TYPES:
return '({type}){name}->cdata()'.format(type=type, name=name)
elif type == 'THCState*':
return '({}){}'.format(type, name)
elif type == 'real':
if real == 'half':
return 'THC_float2half({})'.format(name)
return '({real}){name}'.format(real=real, name=name)
return name
def cast(arg, CReal, real):
expr = base_cast(arg, CReal, real)
if arg.get('optional', False):
name = 'arg_' + arg['assign_name']
return '{name} ? {expr} : NULL'.format(name=name, expr=expr)
return expr
if option['backend'] == 'nn':
float_args = []
double_args = []
for idx, arg in enumerate(option['arguments']):
float_args.append(cast(arg, 'Float', 'float'))
double_args.append(cast(arg, 'Double', 'double'))
code = self.THNN_TEMPLATE.substitute(
name=option['cname'],
float_args=',\n'.join(float_args),
double_args=',\n'.join(double_args))
template.append(code)
elif option['backend'] == 'cunn':
float_args = []
double_args = []
half_args = []
for idx, arg in enumerate(option['arguments']):
float_args.append(cast(arg, 'Cuda', 'float'))
double_args.append(cast(arg, 'CudaDouble', 'double'))
half_args.append(cast(arg, 'CudaHalf', 'half'))
code = self.THCUNN_TEMPLATE.substitute(
name=option['cname'],
float_args=',\n'.join(float_args),
double_args=',\n'.join(double_args),
half_args=',\n'.join(half_args))
template.append(code)
template.append('')
return template
def get_type_unpack(self, arg, option):
return Template(arg.get('assign_name', arg['name']))
def get_type_check(self, arg, option):
if option['backend'] == 'cunn':
return Template('is_cuda')
else:
return Template('!is_cuda')
def get_assign_args(self, arguments):
assign_args = []
for arg in arguments:
arg = copy.copy(arg)
new_type = self.INPUT_ARGUMENT_MAP.get(arg['type'])
if new_type is not None:
arg['type'] = new_type
assign_args.append(arg)
return assign_args
def get_wrapper_template(self, declaration):
# get assign arguments string
base_arguments = declaration['options'][0]['arguments']
args = self.get_assign_args(base_arguments)
arg_str = ', '.join([arg['type'] + ' ' + arg['name'] for arg in args])
if self.header:
return Template(self.HEADER_TEMPLATE.safe_substitute(args=arg_str))
def get_checked_args(tensor_types):
checked_args = []
for arg in base_arguments:
if arg['type'] in tensor_types:
name = arg.get('assign_name', arg['name'])
name_str = name
if arg.get('optional', False):
name_str = '?' + name_str
checked_args += ['"' + name_str + '"', name]
checked_args += ['NULL']
return checked_args
real_args = get_checked_args(self.REAL_TENSOR_TYPES)
long_args = get_checked_args(self.INDEX_TENSOR_TYPES)
# check input types
types_checks = []
if len(real_args) > 1:
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
type='type', tensor_args=', '.join(real_args)))
if len(long_args) > 1:
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
type='thpp::Type::LONG', tensor_args=', '.join(long_args)))
return Template(self.WRAPPER_TEMPLATE.safe_substitute(
input=args[0]['name'],
args=arg_str,
type_check='\n '.join(types_checks)))

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class KwargsPlugin(CWrapPlugin):
ACCESSOR_TEMPLATE = Template('(__tuplecount > $idx ? PyTuple_GET_ITEM(args, $idx) : __kw_$name)')
@ -23,6 +24,16 @@ class KwargsPlugin(CWrapPlugin):
for option in declaration['options']:
for arg in option['arguments']:
arg['no_kwargs'] = True
# we need to use offsets for arg position in *arg if kwarg_only args
# are not at the end
for declaration in declarations:
for option in declaration['options']:
offset = 0
for arg in option['arguments']:
if arg.get('kwarg_only') and not arg.get('ignore_check', False):
offset += 1
else:
arg['kwarg_offset'] = offset
return declarations
def get_arg_accessor(self, arg, option):
@ -30,14 +41,14 @@ class KwargsPlugin(CWrapPlugin):
return
if arg.get('kwarg_only'):
return self.KWARG_ONLY_ACCESSOR_TEMPLATE.substitute(name=arg['name'])
return self.ACCESSOR_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'])
return self.ACCESSOR_TEMPLATE.substitute(idx=arg['idx'] - arg['kwarg_offset'], name=arg['name'])
def process_single_check(self, code, arg, arg_accessor):
if arg.get('no_kwargs'):
return code
if arg.get('kwarg_only'):
return self.KWARG_ONLY_CHECK_TEMPLATE.substitute(name=arg['name'], code=code)
return self.CHECK_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'], code=code)
return self.CHECK_TEMPLATE.substitute(idx=arg['idx'] - arg['kwarg_offset'], name=arg['name'], code=code)
def process_wrapper(self, code, declaration):
if declaration.get('no_kwargs'):
@ -52,8 +63,9 @@ class KwargsPlugin(CWrapPlugin):
name not in seen_args):
seen_args.add(name)
args.append(name)
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args])
lookups = '\n '.join(['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args])
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(a) for a in args])
lookups = '\n '.join(
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=a) for a in args])
start_idx = code.find('{') + 1
new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups)
return code[:start_idx] + new_code + code[start_idx:]

View File

@ -1,6 +1,8 @@
from . import CWrapPlugin
class NullableArguments(CWrapPlugin):
def process_single_check(self, code, arg, arg_accessor):
if 'nullable' in arg and arg['nullable']:
return '({} || {} == Py_None)'.format(code, arg_accessor)
@ -10,5 +12,3 @@ class NullableArguments(CWrapPlugin):
if 'nullable' in arg and arg['nullable']:
return '({} == Py_None ? NULL : {})'.format(arg_accessor, code)
return code

View File

@ -2,6 +2,7 @@ from copy import deepcopy
from . import CWrapPlugin
from itertools import product
class OptionalArguments(CWrapPlugin):
def process_declarations(self, declarations):
@ -32,20 +33,20 @@ class OptionalArguments(CWrapPlugin):
else:
kwarg_only_count = -kwarg_only_count
arg_signature = '#'.join(
arg['type']
for arg in option['arguments'][:kwarg_only_count]
if not arg.get('ignore_check'))
arg['type']
for arg in option['arguments'][:kwarg_only_count]
if not arg.get('ignore_check'))
if kwarg_only_count is None:
return arg_signature
kwarg_only_signature = '#'.join(
arg['name'] + '#' + arg['type']
for arg in option['arguments'][kwarg_only_count:]
if not arg.get('ignore_check'))
arg['name'] + '#' + arg['type']
for arg in option['arguments'][kwarg_only_count:]
if not arg.get('ignore_check'))
return arg_signature + "#-#" + kwarg_only_signature
seen_signatures = set()
unique = []
for option in options:
for num_kwarg_only in range(0, len(option['arguments'])+1):
for num_kwarg_only in range(0, len(option['arguments']) + 1):
sig = signature(option, num_kwarg_only)
if sig not in seen_signatures:
if num_kwarg_only > 0:
@ -55,4 +56,3 @@ class OptionalArguments(CWrapPlugin):
seen_signatures.add(sig)
break
return unique

View File

@ -1,9 +1,10 @@
from . import CWrapPlugin
from string import Template
class ReturnArguments(CWrapPlugin):
ARGUMENT_RETURN_TEMPLATE = Template("Py_INCREF($arg);\nreturn (PyObject*)($arg);")
TUPLE_RETURN_TEMPLATE = Template("return PyTuple_Pack($num_args, $args);")
ARGUMENT_RETURN_TEMPLATE = Template("Py_INCREF($arg);\nreturn (PyObject*)($arg);")
TUPLE_RETURN_TEMPLATE = Template("return PyTuple_Pack($num_args, $args);")
def initialize(self, cwrap):
self.cwrap = cwrap
@ -16,4 +17,5 @@ class ReturnArguments(CWrapPlugin):
if len(args) == 1:
return Template(self.ARGUMENT_RETURN_TEMPLATE.safe_substitute(arg=accessors[0]))
else:
return Template(self.TUPLE_RETURN_TEMPLATE.safe_substitute(num_args=len(args), args=', '.join(accessors)))
return Template(self.TUPLE_RETURN_TEMPLATE.safe_substitute(num_args=len(args),
args=', '.join(accessors)))

View File

@ -26,41 +26,41 @@ $METHODS
class StandaloneExtension(CWrapPlugin):
TYPE_UNPACK = {
'THFloatTensor*': Template('THPFloatTensor_CData((THPFloatTensor*)$arg)'),
'THDoubleTensor*': Template('THPDoubleTensor_CData((THPDoubleTensor*)$arg)'),
'THLongTensor*': Template('THPLongTensor_CData((THPLongTensor*)$arg)'),
'THIntTensor*': Template('THPIntTensor_CData((THPIntTensor*)$arg)'),
'THFloatTensor*': Template('THPFloatTensor_CData((THPFloatTensor*)$arg)'),
'THDoubleTensor*': Template('THPDoubleTensor_CData((THPDoubleTensor*)$arg)'),
'THLongTensor*': Template('THPLongTensor_CData((THPLongTensor*)$arg)'),
'THIntTensor*': Template('THPIntTensor_CData((THPIntTensor*)$arg)'),
'THCudaHalfTensor*': Template('THCPHalfTensor_CData((THCPHalfTensor*)$arg)'),
'THCudaTensor*': Template('THCPFloatTensor_CData((THCPFloatTensor*)$arg)'),
'THCudaTensor*': Template('THCPFloatTensor_CData((THCPFloatTensor*)$arg)'),
'THCudaDoubleTensor*': Template('THCPDoubleTensor_CData((THCPDoubleTensor*)$arg)'),
'THCudaLongTensor*': Template('THCPLongTensor_CData((THCPLongTensor*)$arg)'),
'half': Template('THPHalfUtils_unpackReal($arg)'),
'float': Template('THPFloatUtils_unpackReal($arg)'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'bool': Template('($arg == Py_True ? true : false)'),
'int': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'void*': Template('(void*)THPUtils_unpackLong($arg)'),
'THGenerator*': Template('THPGenerator_CData((THPGenerator*)$arg)'),
'half': Template('THPHalfUtils_unpackReal($arg)'),
'float': Template('THPFloatUtils_unpackReal($arg)'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'bool': Template('($arg == Py_True ? true : false)'),
'int': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'void*': Template('(void*)THPUtils_unpackLong($arg)'),
'THGenerator*': Template('THPGenerator_CData((THPGenerator*)$arg)'),
}
TYPE_CHECK = {
'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'),
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'),
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
'THCudaHalfTensor*': Template('THCPHalfTensor_Check($arg)'),
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THCudaDoubleTensor*': Template('THCPDoubleTensor_Check($arg)'),
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
'half': Template('THPHalfUtils_checkReal($arg)'),
'float': Template('THPFloatUtils_checkReal($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'bool': Template('PyBool_Check($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'void*': Template('THPUtils_checkLong($arg)'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
'half': Template('THPHalfUtils_checkReal($arg)'),
'float': Template('THPFloatUtils_checkReal($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'bool': Template('PyBool_Check($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'void*': Template('THPUtils_checkLong($arg)'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
}
WRAPPER_TEMPLATE = Template("""
@ -131,6 +131,7 @@ PyObject * $name(PyObject *_unused, PyObject *args)
def get_wrapper_template(self, declaration):
arg_desc = []
def describe_arg(arg):
desc = self.TYPE_NAMES[arg['type']] + ' ' + arg['name']
if arg.get('nullable'):
@ -138,8 +139,8 @@ PyObject * $name(PyObject *_unused, PyObject *args)
return desc
for option in declaration['options']:
option_desc = [describe_arg(arg)
for arg in option['arguments']
if not arg.get('ignore_check', False)]
for arg in option['arguments']
if not arg.get('ignore_check', False)]
if option_desc:
arg_desc.append('({})'.format(', '.join(option_desc)))
else:

View File

@ -4,85 +4,100 @@ from . import CWrapPlugin
from itertools import product, chain
from collections import OrderedDict
class THPPlugin(CWrapPlugin):
TYPE_UNPACK = {
'THFloatTensor*': Template('((THPFloatTensor*)$arg)->cdata'),
'THDoubleTensor*': Template('((THPDoubleTensor*)$arg)->cdata'),
'THLongTensor*': Template('((THPLongTensor*)$arg)->cdata'),
'THIntTensor*': Template('((THPIntTensor*)$arg)->cdata'),
'THTensor*': Template('((THPTensor*)$arg)->cdata'),
'THBoolTensor*': Template('((THPBoolTensor*)$arg)->cdata'),
'THIndexTensor*': Template('((THPIndexTensor*)$arg)->cdata'),
'THFloatTensor*': Template('((THPFloatTensor*)$arg)->cdata'),
'THDoubleTensor*': Template('((THPDoubleTensor*)$arg)->cdata'),
'THLongTensor*': Template('((THPLongTensor*)$arg)->cdata'),
'THIntTensor*': Template('((THPIntTensor*)$arg)->cdata'),
'THTensor*': Template('((THPTensor*)$arg)->cdata'),
'THBoolTensor*': Template('((THPBoolTensor*)$arg)->cdata'),
'THIndexTensor*': Template('((THPIndexTensor*)$arg)->cdata'),
'THIntegerTensor*': Template('((THPIntegerTensor*)$arg)->cdata'),
'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'),
'THCudaTensor*': Template('((THCPFloatTensor*)$arg)->cdata'),
'THCudaDoubleTensor*': Template('((THCPDoubleTensor*)$arg)->cdata'),
'THCudaIntTensor*': Template('((THCPIntTensor*)$arg)->cdata'),
'THCudaLongTensor*': Template('((THCPLongTensor*)$arg)->cdata'),
'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'),
'THSDoubleTensor*': Template('((THSPDoubleTensor*)$arg)->cdata'),
'THSLongTensor*': Template('((THSPLongTensor*)$arg)->cdata'),
'THSIntTensor*': Template('((THSPIntTensor*)$arg)->cdata'),
'THSTensor*': Template('((THSPTensor*)$arg)->cdata'),
'THSBoolTensor*': Template('((THSPBoolTensor*)$arg)->cdata'),
'THSIndexTensor*': Template('((THSPIndexTensor*)$arg)->cdata'),
'THSLongTensor*': Template('((THSPLongTensor*)$arg)->cdata'),
'THSIntTensor*': Template('((THSPIntTensor*)$arg)->cdata'),
'THSTensor*': Template('((THSPTensor*)$arg)->cdata'),
'THSBoolTensor*': Template('((THSPBoolTensor*)$arg)->cdata'),
'THSIndexTensor*': Template('((THSPIndexTensor*)$arg)->cdata'),
'THLongStorage*': Template('((THPLongStorage*)$arg)->cdata'),
'THStorage*': Template('((THPStorage*)$arg)->cdata'),
'THGenerator*': Template('((THPGenerator*)$arg)->cdata'),
'THSize*': Template('__size.get()'),
'THStride*': Template('__stride.get()'),
'void*': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'int': Template('THPUtils_unpackLong($arg)'),
'bool': Template('($arg == Py_True ? true : false)'),
'float': Template('THPFloatUtils_unpackReal($arg)'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'real': Template('THPUtils_(unpackReal)($arg)'),
'accreal': Template('THPUtils_(unpackAccreal)($arg)'),
'THLongStorage*': Template('((THPLongStorage*)$arg)->cdata'),
'THStorage*': Template('((THPStorage*)$arg)->cdata'),
'THGenerator*': Template('((THPGenerator*)$arg)->cdata'),
'THSize*': Template('__size.get()'),
'THStride*': Template('__stride.get()'),
'void*': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'int': Template('THPUtils_unpackLong($arg)'),
'bool': Template('($arg == Py_True ? true : false)'),
'float': Template('THPFloatUtils_unpackReal($arg)'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'real': Template('THPUtils_(unpackReal)($arg)'),
'accreal': Template('THPUtils_(unpackAccreal)($arg)'),
}
TYPE_CHECK = {
'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'),
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == THPTensorClass'),
'THBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THPBoolTensorClass'),
'THIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIndexTensorClass'),
'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'),
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == THPTensorClass'),
'THBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THPBoolTensorClass'),
'THIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIndexTensorClass'),
'THIntegerTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntegerTensorClass'),
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THCudaDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPDoubleTensorClass'),
'THCudaIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPIntTensorClass'),
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
'THSDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPDoubleTensorClass'),
'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'),
'THSLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPLongTensorClass'),
'THSIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIntTensorClass'),
'THSTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPTensorClass'),
'THSBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPBoolTensorClass'),
'THSIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIndexTensorClass'),
'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'),
'THSLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPLongTensorClass'),
'THSIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIntTensorClass'),
'THSTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPTensorClass'),
'THSBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPBoolTensorClass'),
'THSIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIndexTensorClass'),
'THLongStorage*': Template('(PyObject*)Py_TYPE($arg) == THPLongStorageClass'),
'THStorage*': Template('(PyObject*)Py_TYPE($arg) == THPStorageClass'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
'THSize*': Template('THPUtils_tryUnpackLongs($arg, __size)'),
'THStride*': Template('THPUtils_tryUnpackLongs($arg, __stride)'),
'void*': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
'bool': Template('PyBool_Check($arg)'),
'float': Template('THPFloatUtils_checkReal($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'real': Template('THPUtils_(checkReal)($arg)'),
'accreal': Template('THPUtils_(checkAccreal)($arg)'),
'THLongStorage*': Template('(PyObject*)Py_TYPE($arg) == THPLongStorageClass'),
'THStorage*': Template('(PyObject*)Py_TYPE($arg) == THPStorageClass'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
'THSize*': Template('THPUtils_tryUnpackLongs($arg, __size)'),
'THStride*': Template('THPUtils_tryUnpackLongs($arg, __stride)'),
'void*': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
'bool': Template('PyBool_Check($arg)'),
'float': Template('THPFloatUtils_checkReal($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'real': Template('THPUtils_(checkReal)($arg)'),
'accreal': Template('THPUtils_(checkAccreal)($arg)'),
}
SIZE_VARARG_CHECK = Template('THPUtils_tryUnpackLongVarArgs(args, $idx, __size)')
RETURN_WRAPPER = {
'THTensor*': Template('return THPTensor_(New)($result);'),
'THSTensor*': Template('return THSPTensor_(New)($result);'),
'THLongTensor*': Template('return THPLongTensor_New($result);'),
'THLongStorage*': Template('return THPLongStorage_New($result);'),
'THTensor*': Template('return THPTensor_(New)($result);'),
'THSTensor*': Template('return THSPTensor_(New)($result);'),
'THIndexTensor*': Template('return THPIndexTensor_(New)($result);'),
'THLongTensor*': Template('return THPLongTensor_New($result);'),
'THLongStorage*': Template('return THPLongStorage_New($result);'),
'THCudaIntTensor*': Template('return THCPIntTensor_New($result);'),
'THCudaLongTensor*': Template('return THCPLongTensor_New($result);'),
# TODO: make it smarter - it should return python long if result doesn't fit into an int
'long': Template('return PyInt_FromLong($result);'),
'accreal': Template('return THPUtils_(newAccreal)($result);'),
'self': Template('Py_INCREF(self);\nreturn (PyObject*)self;'),
'real': Template('return THPUtils_(newReal)($result);'),
'long': Template('return PyInt_FromLong($result);'),
'accreal': Template('return THPUtils_(newAccreal)($result);'),
'self': Template('Py_INCREF(self);\nreturn (PyObject*)self;'),
'real': Template('return THPUtils_(newReal)($result);'),
}
TENSOR_METHODS_DECLARATION = Template("""
@ -138,13 +153,14 @@ ${cpu}
return Template(code)
ALLOCATE_TYPE = {
'THTensor*': _allocate('', ALLOCATE_TMPL),
'THLongTensor*': _allocate('Long', ALLOCATE_TMPL),
'THIntTensor*': _allocate('Int', ALLOCATE_TMPL),
'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THTensor*': _allocate('', ALLOCATE_TMPL),
'THLongTensor*': _allocate('Long', ALLOCATE_TMPL),
'THIntTensor*': _allocate('Int', ALLOCATE_TMPL),
'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THIntegerTensor*': _allocate('Int', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THSTensor*': _allocate('', ALLOCATE_TMPL, sparse=True),
'THSTensor*': _allocate('', ALLOCATE_TMPL, sparse=True),
}
TYPE_NAMES = {
@ -157,8 +173,13 @@ ${cpu}
'THIntTensor*': '" THPModuleStr "IntTensor',
'THBoolTensor*': '" THPModuleStr "ByteTensor',
'THIndexTensor*': '" THPModuleStr "LongTensor',
'THIntegerTensor*': '" THPModuleStr "IntTensor',
'THFloatTensor*': '" THPModuleStr "FloatTensor',
'THDoubleTensor*': '" THPModuleStr "DoubleTensor',
'THCudaTensor*': 'torch.cuda.FloatTensor',
'THCudaDoubleTensor*': 'torch.cuda.DoubleTensor',
'THCudaIntTensor*': 'torch.cuda.IntTensor',
'THCudaLongTensor*': 'torch.cuda.LongTensor',
'THSize*': 'torch.Size',
'THStride*': 'tuple',
'long': 'int',
@ -166,10 +187,12 @@ ${cpu}
'double': 'float',
'accreal': '" RealStr "',
'bool': 'bool',
'const char*': 'bool', # Can come only from bool option.
}
OUT_INIT = """
__out = kwargs ? PyDict_GetItemString(kwargs, "out") : NULL;
if (__out == Py_None) { __out = NULL; __dictcount--; __argcount--; }
"""
def __init__(self):
@ -198,14 +221,14 @@ ${cpu}
def format_args(args, var_args=False):
option_desc = [format_arg(arg, var_args)
for arg in args
if not arg.get('ignore_check', False)
and not arg.get('output')]
if not arg.get('ignore_check', False) and
not arg.get('output')]
output_args = list(filter(lambda a: a.get('output'), args))
if output_args:
if len(output_args) > 1:
out_type = 'tuple['
out_type += ', '.join(
self.TYPE_NAMES[arg['type']] for arg in output_args)
self.TYPE_NAMES[arg['type']] for arg in output_args)
out_type += ']'
option_desc += ['#' + out_type + ' out']
else:
@ -287,7 +310,7 @@ ${cpu}
if not output_provided:
arg['ignore_check'] = True
else:
option_copy['argcount_offset'] = -len(out_idx) + 1
option_copy['argcount_offset'] = -len(out_idx) + 1
arg['no_kwargs'] = True
arg['no_idx'] = True
new_options.append(option_copy)
@ -295,8 +318,6 @@ ${cpu}
def process_declarations(self, declarations):
new_declarations = []
register_only = [d for d in declarations if d.get('only_register', False)]
declarations = [d for d in declarations if not d.get('only_register', False)]
def has_arg_type(declaration, type_name):
return any(arg['type'] == type_name
@ -314,8 +335,16 @@ ${cpu}
for arg in option['arguments'])
for declaration in declarations:
# Disable all methods for THHalfTensor, unless cpu_half is True
if not declaration.get('cpu_half', False):
defined_if = '!defined(TH_REAL_IS_HALF)'
if 'defined_if' in declaration:
defined_if += ' && (' + declaration['defined_if'] + ')'
declaration['defined_if'] = defined_if
if declaration.get('only_register', False):
continue
declaration.setdefault('python_name', declaration['name'])
declaration.setdefault('variables', [])
if has_arg_type(declaration, 'THSize*'):
@ -345,8 +374,9 @@ ${cpu}
if arg['name'] == 'self':
arg['ignore_check'] = True
declarations = [d for d in declarations if not d.get('only_stateless', False)]
register_only = [d for d in declarations if d.get('only_register', False)]
declarations = [d for d in declarations
if (not d.get('only_stateless', False)) and (not d.get('only_register', False))]
self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only))
self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only))
@ -362,6 +392,7 @@ ${cpu}
for option in declaration['options']:
for arg in option['arguments']:
if arg['name'] == 'self':
arg['assign_name'] = 'self'
arg['name'] = 'source'
return declaration
@ -377,38 +408,41 @@ ${cpu}
if declaration.get('override_method_flags'):
flags = declaration['override_method_flags']
entry = Template(' {"$python_name", (PyCFunction)$name, $flags, $docstring},\n').substitute(
python_name=declaration['python_name'], name=declaration['name'], flags=flags,
docstring=declaration.get('docstring_var', 'NULL')
)
python_name=declaration['python_name'], name=declaration['name'], flags=flags,
docstring=declaration.get('docstring_var', 'NULL')
)
if 'defined_if' in declaration:
entry = self.preprocessor_guard(entry, declaration['defined_if'])
tensor_methods += entry
return self.TENSOR_METHODS_DECLARATION.substitute(
generated = self.TENSOR_METHODS_DECLARATION.substitute(
methods=tensor_methods,
stateless=('' if not stateless else 'stateless_'),
sparse=('' if not sparse else 'S'),
)
if sparse:
generated = '#ifndef TH_REAL_IS_HALF\n' + generated + '\n#endif\n\n'
return generated
def process_full_file(self, code):
# We have to find a place before all undefs
idx = code.find('// PUT DEFINITIONS IN HERE PLEASE')
return (code[:idx]
+ self.declare_methods(False, False)
+ self.declare_methods(True, False)
+ self.declare_methods(False, True)
+ self.declare_methods(True, True)
+ code[idx:]
return (code[:idx] +
self.declare_methods(False, False) +
self.declare_methods(True, False) +
self.declare_methods(False, True) +
self.declare_methods(True, True) +
code[idx:]
)
def preprocessor_guard(self, code, condition):
return '#if ' + condition + '\n' + code + '#endif\n'
return '#if ' + condition + '\n' + code + '#endif\n'
def process_wrapper(self, code, declaration):
if 'defined_if' in declaration:
return self.preprocessor_guard(code, declaration['defined_if'])
return code
def process_all_unpacks(self, code, option):
def process_all_call_arg(self, code, option):
return 'LIBRARY_STATE ' + code
def process_all_checks(self, code, option):
@ -419,7 +453,7 @@ ${cpu}
if option['output_count'] > 1:
checks += "PyTuple_Check(__out) &&\n" + indent
length_check = "PyTuple_GET_SIZE(__out) == {} &&\n".format(
option['output_count'])
option['output_count'])
checks += length_check + indent
code = checks + code
else:
@ -432,7 +466,7 @@ ${cpu}
return code
def process_option_code_template(self, template, option):
def process_pre_arg_assign(self, template, option):
new_args = []
for arg in option['arguments']:
if not option.get('output_provided', True) and arg.get('output'):
@ -443,13 +477,13 @@ ${cpu}
def generate_docstrings_cpp(self):
template = Template('char* $name = "$content";')
return '\n\n'.join(
template.substitute(name=decl['docstring_var'], content=decl['docstring_content'])
for decl in chain(self.declarations, self.stateless_declarations)
if 'docstring_var' in decl)
template.substitute(name=decl['docstring_var'], content=decl['docstring_content'])
for decl in chain(self.declarations, self.stateless_declarations)
if 'docstring_var' in decl)
def generate_docstrings_h(self):
template = Template('extern char* $name;')
return '\n\n'.join(
template.substitute(name=decl['docstring_var'])
for decl in chain(self.declarations, self.stateless_declarations)
if 'docstring_var' in decl)
template.substitute(name=decl['docstring_var'])
for decl in chain(self.declarations, self.stateless_declarations)
if 'docstring_var' in decl)

View File

@ -0,0 +1,40 @@
from . import CWrapPlugin
from string import Template
class WrapDim(CWrapPlugin):
NDIM_TEMPLATE = Template(
"""${arg_tensor}->nDimension""")
CODE_TEMPLATE = Template(
"""THPUtils_assert(${arg_dim} >= -(${ndim}) && ${arg_dim} < (${ndim}),
"dimension out of range (expected to be in range of [%d, %d], but got %d)",
-(${ndim}), (${ndim})-1, ${arg_dim});
if (${arg_dim} < 0) ${arg_dim} += (${ndim});""")
def initialize(self, cwrap):
self.cwrap = cwrap
def process_option_code_template(self, template, option):
new_code = []
for i, arg in enumerate(option['arguments']):
if 'wrap_dim' not in arg:
continue
params = arg.get('wrap_dim').split("+")
arg_tensor = params[0]
arg_tensor = "arg_" + arg_tensor
arg_dim = "arg_" + arg.get('assign_name', arg['name'])
params[0] = self.NDIM_TEMPLATE.substitute(arg_tensor=arg_tensor)
ndim = "+".join(params)
new_code.append(self.CODE_TEMPLATE.substitute(
arg_dim=arg_dim,
ndim=ndim))
new_code.append("")
template = new_code + template
return template

View File

@ -16,6 +16,9 @@ class CWrapPlugin(object):
def get_wrapper_template(self, declaration):
pass
def get_assign_args(self, arguments):
pass
def get_arg_accessor(self, arg, option):
pass
@ -31,7 +34,7 @@ class CWrapPlugin(object):
def process_single_unpack(self, code, arg, arg_accessor):
return code
def process_all_unpacks(self, code, option):
def process_all_call_arg(self, code, option):
return code
def process_option_code(self, code, option):
@ -46,6 +49,9 @@ class CWrapPlugin(object):
def process_option_code_template(self, template, option):
return template
def process_pre_arg_assign(self, template, option):
return template
from .StandaloneExtension import StandaloneExtension
from .NullableArguments import NullableArguments
@ -58,3 +64,5 @@ from .ReturnArguments import ReturnArguments
from .GILRelease import GILRelease
from .AutoGPU import AutoGPU
from .CuDNNPlugin import CuDNNPlugin
from .GenericNN import GenericNN
from .WrapDim import WrapDim

View File

@ -1 +1,2 @@
from .generate_wrappers import generate_wrappers, wrap_function, import_module
from .generate_wrappers import generate_wrappers, wrap_function, \
import_module, wrap_generic_function

View File

@ -2,12 +2,13 @@ import os
import sys
from string import Template, ascii_lowercase
from ..cwrap import cwrap
from ..cwrap.plugins import StandaloneExtension, NullableArguments, AutoGPU
from ..cwrap.plugins import StandaloneExtension, GenericNN, NullableArguments, AutoGPU
BASE_PATH = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
WRAPPER_PATH = os.path.join(BASE_PATH, 'torch', 'csrc', 'nn')
THNN_UTILS_PATH = os.path.join(BASE_PATH, 'torch', '_thnn', 'utils.py')
def import_module(name, path):
if sys.version_info >= (3, 5):
import importlib.util
@ -51,22 +52,27 @@ TYPE_TRANSFORMS = {
'Float': {
'THTensor*': 'THFloatTensor*',
'real': 'float',
'accreal': 'double',
},
'Double': {
'THTensor*': 'THDoubleTensor*',
'real': 'double',
'accreal': 'double',
},
'CudaHalf': {
'THCTensor*': 'THCudaHalfTensor*',
'real': 'half',
'accreal': 'float',
},
'Cuda': {
'THCTensor*': 'THCudaTensor*',
'real': 'float',
'accreal': 'float',
},
'CudaDouble': {
'THCTensor*': 'THCudaDoubleTensor*',
'real': 'double',
'accreal': 'double',
},
}
for t, transforms in TYPE_TRANSFORMS.items():
@ -81,7 +87,8 @@ for t in ['CudaHalf', 'Cuda', 'CudaDouble']:
def wrap_function(name, type, arguments):
cname = 'THNN_' + type + name
declaration = ''
declaration += 'extern "C" void ' + cname + '(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n'
declaration += 'extern "C" void ' + cname + \
'(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n'
declaration += FUNCTION_TEMPLATE.substitute(name=type + name, cname=cname)
indent = ' ' * 4
dict_indent = ' ' * 6
@ -91,15 +98,18 @@ def wrap_function(name, type, arguments):
declaration += prefix + TYPE_TRANSFORMS[type].get(arg.type, arg.type) + ' ' + arg.name + '\n'
else:
t = TYPE_TRANSFORMS[type].get(arg.type, arg.type)
declaration += prefix + 'type: ' + t + '\n' + \
dict_indent + 'name: ' + arg.name + '\n' + \
dict_indent + 'nullable: True' + '\n'
declaration += prefix + 'type: ' + t + '\n' + \
dict_indent + 'name: ' + arg.name + '\n' + \
dict_indent + 'nullable: True' + '\n'
declaration += ']]\n\n\n'
return declaration
def generate_wrappers():
wrap_nn()
wrap_cunn()
wrap_generic()
def wrap_nn():
wrapper = '#include <TH/TH.h>\n\n\n'
@ -114,6 +124,7 @@ def wrap_nn():
NullableArguments(),
])
def wrap_cunn():
wrapper = '#include <TH/TH.h>\n'
wrapper += '#include <THC/THC.h>\n\n\n'
@ -128,3 +139,66 @@ def wrap_cunn():
NullableArguments(),
AutoGPU(has_self=False),
])
GENERIC_FUNCTION_TEMPLATE = Template("""\
[[
name: $name
return: void
options:
""")
def wrap_generic_function(name, backends):
declaration = ''
declaration += GENERIC_FUNCTION_TEMPLATE.substitute(name=name)
for backend in backends:
declaration += ' - cname: ' + name + '\n'
declaration += ' backend: ' + backend['name'] + '\n'
declaration += ' arguments:\n'
for arg in backend['arguments']:
declaration += ' - arg: ' + arg.type + ' ' + arg.name + '\n'
if arg.is_optional:
declaration += ' optional: True\n'
declaration += ']]\n\n\n'
return declaration
def wrap_generic():
from collections import OrderedDict
defs = OrderedDict()
def should_wrap_function(name):
if name.startswith('LookupTable'):
return False
return (name.endswith('updateOutput') or
name.endswith('updateGradInput') or
name.endswith('accGradParameters') or
name.endswith('backward'))
def add_functions(name, functions):
for fn in functions:
if not should_wrap_function(fn.name):
continue
if fn.name not in defs:
defs[fn.name] = []
defs[fn.name] += [{
'name': name,
'arguments': fn.arguments[1:],
}]
add_functions('nn', thnn_utils.parse_header(thnn_utils.THNN_H_PATH))
add_functions('cunn', thnn_utils.parse_header(thnn_utils.THCUNN_H_PATH))
wrapper = ''
for name, backends in defs.items():
wrapper += wrap_generic_function(name, backends)
with open('torch/csrc/nn/THNN_generic.cwrap', 'w') as f:
f.write(wrapper)
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
GenericNN(header=True),
], default_plugins=False, destination='torch/csrc/nn/THNN_generic.h')
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
GenericNN(),
], default_plugins=False)

View File

@ -1,8 +1,17 @@
import ctypes.util
import os
from .env import check_env_flag
CUDA_HOME = os.getenv('CUDA_HOME', '/usr/local/cuda')
WITH_CUDA = not check_env_flag('NO_CUDA') and os.path.exists(CUDA_HOME)
if not WITH_CUDA:
if check_env_flag('NO_CUDA'):
WITH_CUDA = False
CUDA_HOME = None
else:
CUDA_HOME = os.getenv('CUDA_HOME', '/usr/local/cuda')
if not os.path.exists(CUDA_HOME):
cudart_path = ctypes.util.find_library('cudart')
if cudart_path is not None:
CUDA_HOME = os.path.dirname(cudart_path)
else:
CUDA_HOME = None
WITH_CUDA = CUDA_HOME is not None

View File

@ -1,9 +1,15 @@
import os
import glob
from itertools import chain
from .env import check_env_flag
from .cuda import WITH_CUDA, CUDA_HOME
def gather_paths(env_vars):
return list(chain(*(os.getenv(v, '').split(':') for v in env_vars)))
WITH_CUDNN = False
CUDNN_LIB_DIR = None
CUDNN_INCLUDE_DIR = None
@ -12,13 +18,19 @@ if WITH_CUDA and not check_env_flag('NO_CUDNN'):
os.getenv('CUDNN_LIB_DIR'),
os.path.join(CUDA_HOME, 'lib'),
os.path.join(CUDA_HOME, 'lib64'),
'/usr/lib/x86_64-linux-gnu/',
]))
'/usr/lib/x86_64-linux-gnu/',
] + gather_paths([
'LIBRARY_PATH',
])))
include_paths = list(filter(bool, [
os.getenv('CUDNN_INCLUDE_DIR'),
os.path.join(CUDA_HOME, 'include'),
'/usr/include/'
]))
'/usr/include/',
] + gather_paths([
'CPATH',
'C_INCLUDE_PATH',
'CPLUS_INCLUDE_PATH',
])))
for path in lib_paths:
if path is None or not os.path.exists(path):
continue

View File

@ -1,4 +1,5 @@
import os
def check_env_flag(name):
return os.getenv(name) in ['ON', '1', 'YES', 'TRUE', 'Y']

View File

@ -10,6 +10,7 @@ on an NVIDIA GPU with compute capability >= 2.0.
import sys
from ._utils import _import_dotted_name
from .version import __version__
__all__ = [
'typename', 'is_tensor', 'is_storage', 'set_default_tensor_type',
@ -30,6 +31,13 @@ __all__ = [
# automatically filled by the dynamic loader.
import os as _dl_flags
# if we have numpy, it *must* be imported before the call to setdlopenflags()
# or there is risk that later c modules will segfault when importing numpy
try:
import numpy as np
except:
pass
# first check if the os package has the required flags
if not hasattr(_dl_flags, 'RTLD_GLOBAL') or not hasattr(_dl_flags, 'RTLD_NOW'):
try:
@ -56,6 +64,7 @@ del old_flags
# Define basic utilities
################################################################################
def typename(o):
module = ''
class_name = ''
@ -74,11 +83,21 @@ def typename(o):
def is_tensor(obj):
return obj.__class__ in _tensor_classes
r"""Returns True if `obj` is a pytorch tensor.
Args:
obj (Object): Object to test
"""
return type(obj) in _tensor_classes
def is_storage(obj):
return obj.__class__ in _storage_classes
r"""Returns True if `obj` is a pytorch storage object.
Args:
obj (Object): Object to test
"""
return type(obj) in _storage_classes
def set_default_tensor_type(t):
@ -91,7 +110,7 @@ def set_default_tensor_type(t):
def set_rng_state(new_state):
r"""Sets the random number generator state.
Args:
new_state (torch.ByteTensor): The desired state
"""
@ -104,9 +123,9 @@ def get_rng_state():
def manual_seed(seed):
r"""Sets the seed for generating random numbers. And returns a
r"""Sets the seed for generating random numbers. And returns a
`torch._C.Generator` object.
Args:
seed (int or long): The desired seed.
"""
@ -114,7 +133,7 @@ def manual_seed(seed):
def initial_seed():
r"""Returns the initial seed for generating random numbers as a
r"""Returns the initial seed for generating random numbers as a
python `long`.
"""
return default_generator.initial_seed()
@ -130,61 +149,115 @@ from ._tensor_str import set_printoptions
from .storage import _StorageBase
from .tensor import _TensorBase
class DoubleStorage(_C.DoubleStorageBase, _StorageBase):
pass
class FloatStorage(_C.FloatStorageBase, _StorageBase):
pass
class HalfStorage(_C.HalfStorageBase, _StorageBase):
pass
class LongStorage(_C.LongStorageBase, _StorageBase):
pass
class IntStorage(_C.IntStorageBase, _StorageBase):
pass
class ShortStorage(_C.ShortStorageBase, _StorageBase):
pass
class CharStorage(_C.CharStorageBase, _StorageBase):
pass
class ByteStorage(_C.ByteStorageBase, _StorageBase):
pass
class DoubleTensor(_C.DoubleTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return DoubleStorage
class FloatTensor(_C.FloatTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return FloatStorage
class LongTensor(_C.LongTensorBase, _TensorBase):
class HalfTensor(_C.HalfTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return HalfStorage
class LongTensor(_C.LongTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return LongStorage
class IntTensor(_C.IntTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return IntStorage
class ShortTensor(_C.ShortTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return ShortStorage
class CharTensor(_C.CharTensorBase, _TensorBase):
def is_signed(self):
# TODO
return False
@classmethod
def storage_type(cls):
return CharStorage
class ByteTensor(_C.ByteTensorBase, _TensorBase):
def is_signed(self):
return False
@classmethod
def storage_type(cls):
return ByteStorage
@ -209,19 +282,21 @@ set_default_tensor_type('torch.FloatTensor')
from .functional import *
################################################################################
# Initialize extension
################################################################################
def manager_path():
import os
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'lib', 'torch_shm_manager')
if not os.path.exists(path):
raise RuntimeError("Unable to find torch_shm_manager at " + path)
return path.encode('utf-8')
# Shared memory manager needs to know the exact location of manager executable
import os
manager_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'lib', 'torch_shm_manager')
if sys.version_info[0] >= 3:
manager_path = bytes(manager_path, 'ascii')
_C._initExtension(manager_path)
del os
_C._initExtension(manager_path())
del manager_path
################################################################################
@ -260,6 +335,8 @@ import torch.autograd
import torch.nn
import torch.optim
import torch.multiprocessing
import torch.sparse
_C._init_names(list(torch._tensor_classes) + list(torch._storage_classes))
# attach docstrings to torch and tensor functions
from . import _torch_docs, _tensor_docs

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ def set_printoptions(
edgeitems=None,
linewidth=None,
profile=None,
):
):
"""Set options for printing. Items shamelessly taken from Numpy
Args:
@ -119,7 +119,7 @@ def _number_format(tensor, min_sz=-1):
else:
if exp_max > prec + 1 or exp_max < 0:
sz = max(min_sz, 7)
scale = math.pow(10, exp_max-1)
scale = math.pow(10, exp_max - 1)
else:
if exp_max == 0:
sz = 7
@ -132,19 +132,19 @@ def _number_format(tensor, min_sz=-1):
def _tensor_str(self):
n = PRINT_OPTS.edgeitems
has_hdots = self.size()[-1] > 2*n
has_vdots = self.size()[-2] > 2*n
has_hdots = self.size()[-1] > 2 * n
has_vdots = self.size()[-2] > 2 * n
print_full_mat = not has_hdots and not has_vdots
formatter = _number_format(self, min_sz=3 if not print_full_mat else 0)
print_dots = self.numel() >= PRINT_OPTS.threshold
dim_sz = max(2, max(len(str(x)) for x in self.size()))
dim_fmt = "{:^" + str(dim_sz) + "}"
dot_fmt = u"{:^" + str(dim_sz+1) + "}"
dot_fmt = u"{:^" + str(dim_sz + 1) + "}"
counter_dim = self.ndimension() - 2
counter = torch.LongStorage(counter_dim).fill_(0)
counter[counter.size()-1] = -1
counter[counter.size() - 1] = -1
finished = False
strt = ''
while True:
@ -152,7 +152,7 @@ def _tensor_str(self):
nskipped = [False for i in counter]
for i in _range(counter_dim - 1, -1, -1):
counter[i] += 1
if print_dots and counter[i] == n and self.size(i) > 2*n:
if print_dots and counter[i] == n and self.size(i) > 2 * n:
counter[i] = self.size(i) - n
nskipped[i] = True
if counter[i] == self.size(i):
@ -188,18 +188,18 @@ def __repr_row(row, indent, fmt, scale, sz, truncate=None):
if truncate is not None:
dotfmt = " {:^5} "
return (indent +
' '.join(fmt.format(val/scale) for val in row[:truncate]) +
' '.join(fmt.format(val / scale) for val in row[:truncate]) +
dotfmt.format('...') +
' '.join(fmt.format(val/scale) for val in row[-truncate:]) +
' '.join(fmt.format(val / scale) for val in row[-truncate:]) +
'\n')
else:
return indent + ' '.join(fmt.format(val/scale) for val in row) + '\n'
return indent + ' '.join(fmt.format(val / scale) for val in row) + '\n'
def _matrix_str(self, indent='', formatter=None, force_truncate=False):
n = PRINT_OPTS.edgeitems
has_hdots = self.size(1) > 2*n
has_vdots = self.size(0) > 2*n
has_hdots = self.size(1) > 2 * n
has_vdots = self.size(0) > 2 * n
print_full_mat = not has_hdots and not has_vdots
if formatter is None:
@ -207,14 +207,14 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False):
min_sz=5 if not print_full_mat else 0)
else:
fmt, scale, sz = formatter
nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth-len(indent))/(sz+1)))
nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth - len(indent)) / (sz + 1)))
strt = ''
firstColumn = 0
if not force_truncate and \
(self.numel() < PRINT_OPTS.threshold or print_full_mat):
while firstColumn < self.size(1):
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1)-1)
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1) - 1)
if nColumnPerLine < self.size(1):
strt += '\n' if firstColumn != 1 else ''
strt += 'Columns {} to {} \n{}'.format(
@ -223,15 +223,15 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False):
strt += SCALE_FORMAT.format(scale)
for l in _range(self.size(0)):
strt += indent + (' ' if scale != 1 else '')
row_slice = self[l, firstColumn:lastColumn+1]
strt += ' '.join(fmt.format(val/scale) for val in row_slice)
row_slice = self[l, firstColumn:lastColumn + 1]
strt += ' '.join(fmt.format(val / scale) for val in row_slice)
strt += '\n'
firstColumn = lastColumn + 1
else:
if scale != 1:
strt += SCALE_FORMAT.format(scale)
if has_vdots and has_hdots:
vdotfmt = "{:^" + str((sz+1)*n-1) + "}"
vdotfmt = "{:^" + str((sz + 1) * n - 1) + "}"
ddotfmt = u"{:^5}"
for row in self[:n]:
strt += __repr_row(row, indent, fmt, scale, sz, n)
@ -245,8 +245,8 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False):
strt += __repr_row(row, indent, fmt, scale, sz, n)
elif has_vdots and not has_hdots:
vdotfmt = u"{:^" + \
str(len(__repr_row(self[0], '', fmt, scale, sz))) + \
"}\n"
str(len(__repr_row(self[0], '', fmt, scale, sz))) + \
"}\n"
for row in self[:n]:
strt += __repr_row(row, indent, fmt, scale, sz)
strt += vdotfmt.format(u'\u22EE')
@ -269,13 +269,13 @@ def _vector_str(self):
ident = ' '
if self.numel() < PRINT_OPTS.threshold:
return (strt +
'\n'.join(ident + fmt.format(val/scale) for val in self) +
'\n'.join(ident + fmt.format(val / scale) for val in self) +
'\n')
else:
return (strt +
'\n'.join(ident + fmt.format(val/scale) for val in self[:n]) +
'\n'.join(ident + fmt.format(val / scale) for val in self[:n]) +
'\n' + (ident + dotfmt.format(u"\u22EE")) +
'\n'.join(ident + fmt.format(val/scale) for val in self[-n:]) +
'\n'.join(ident + fmt.format(val / scale) for val in self[-n:]) +
'\n')
@ -295,4 +295,3 @@ def _str(self):
strt += '[{} of size {}{}]\n'.format(torch.typename(self),
size_str, device_str)
return '\n' + strt

View File

@ -2,7 +2,9 @@ import threading
import torch.cuda
from .utils import THNN_H_PATH, THCUNN_H_PATH, parse_header, load_backend
class Backends(object):
def __init__(self):
self.backends = {}
@ -14,6 +16,7 @@ class Backends(object):
class Backend(object):
def __init__(self, lib_prefix, lib_name, functions, mixins=tuple()):
self.lib_prefix = lib_prefix
self.lib_name = lib_name
@ -32,11 +35,12 @@ class Backend(object):
with self.loading_lock:
if self.backend is None:
self.backend = load_backend(self.lib_prefix, self.lib_name,
self.functions, self.mixins)
self.functions, self.mixins)
return self.backend
class THNNCudaBackendStateMixin(object):
@property
def library_state(self):
return torch.cuda._state_cdata
@ -54,7 +58,10 @@ for t in ['Float', 'Double']:
type2backend.backends['torch.{}Tensor'.format(t)] = backend
type2backend.backends[getattr(torch, '{}Tensor'.format(t))] = backend
backend = Backend('Cuda', 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
type2backend.backends['THNNCudaBackend'] = backend
type2backend.backends['torch.cuda.FloatTensor'] = backend
type2backend.backends[torch.cuda.FloatTensor] = backend
for t in ['Half', '', 'Double']:
backend = Backend('Cuda' + t, 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
type2backend.backends['THNNCuda{}Backend'.format(t)] = backend
py_name = 'Float' if t == '' else t
type2backend.backends['torch.cuda.{}Tensor'.format(py_name)] = backend
type2backend.backends[getattr(torch.cuda, '{}Tensor'.format(py_name))] = backend

View File

@ -12,6 +12,7 @@ def _unpickle_backend(backend_name):
class THNNBackendBase(object):
def __init__(self):
self.methods = {}
@ -33,6 +34,7 @@ class THNNBackendBase(object):
class Function(object):
def __init__(self, name):
self.name = name
self.arguments = []
@ -46,6 +48,7 @@ class Function(object):
class Argument(object):
def __init__(self, _type, name, is_optional):
self.type = _type
self.name = name

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
import torch
import importlib
def _type(self, new_type=None, async=False):
@ -21,6 +22,15 @@ def _type(self, new_type=None, async=False):
new_type = _import_dotted_name(new_type)
if new_type == type(self):
return self
if self.is_sparse:
if not new_type.is_sparse:
raise RuntimeError("Cannot cast sparse tensor to dense tensor")
new_type_name = new_type.__module__ + '.' + new_type.__name__
new_values_type_name = new_type_name.replace('.sparse', '')
new_values = self.values().type(new_values_type_name, async)
return new_type(self.indices(), new_values, self.size())
if new_type.is_sparse:
raise RuntimeError("Cannot cast dense tensor to sparse tensor")
return new_type(self.size()).copy_(self, async)
@ -39,16 +49,27 @@ def _cuda(self, device=None, async=False):
if self.is_cuda:
if device is None:
device = torch.cuda.current_device()
if self.get_device() != device:
with torch.cuda.device(device):
return type(self)(self.size()).copy_(self, async)
else:
if self.get_device() == device:
return self
else:
if device is None:
device = -1
with torch.cuda.device(device):
return self.type(getattr(torch.cuda, self.__class__.__name__), async)
with torch.cuda.device(device):
if self.is_sparse:
new_type = getattr(torch.cuda.sparse, self.__class__.__name__)
indices = self.indices().cuda(device, async)
values = self.values().cuda(device, async)
return new_type(indices, values, self.size())
else:
new_type = getattr(torch.cuda, self.__class__.__name__)
return new_type(self.size()).copy_(self, async)
def _rebuild_tensor(storage, storage_offset, size, stride):
class_name = storage.__class__.__name__.replace('Storage', 'Tensor')
module = importlib.import_module(storage.__module__)
tensor_class = getattr(module, class_name)
return tensor_class().set_(storage, storage_offset, size, stride)
def _range(*args, **kwargs):

View File

@ -9,9 +9,11 @@ import torch
from .variable import Variable
from .function import Function, NestedIOFunction
from .stochastic_function import StochasticFunction
from .gradcheck import gradcheck
__all__ = ['Variable', 'Function', 'StochasticFunction', 'backward']
def backward(variables, grad_variables, retain_variables=False):
"""Computes the sum of gradients of given variables w.r.t. graph leaves.
@ -28,7 +30,7 @@ def backward(variables, grad_variables, retain_variables=False):
Arguments:
variables (sequence of Variable): Variables of which the derivative will be
computed.
grad_variables (sequence of Variable): Gradients w.r.t. each element of
grad_variables (sequence of Tensor): Gradients w.r.t. each element of
corresponding variables. Required only for non-scalar variables that
require gradient.
retain_variables (bool): If ``True``, buffers necessary for computing
@ -37,6 +39,6 @@ def backward(variables, grad_variables, retain_variables=False):
times.
"""
Variable._execution_engine.run_backward(
tuple(variables), tuple(grad_variables), retain_variables)
tuple(variables), tuple(grad_variables), retain_variables)
assert torch._C._autograd_init()

View File

@ -5,4 +5,4 @@ from .reduce import *
from .linalg import *
from .blas import *
from .stochastic import *
from .compare import *

View File

@ -3,9 +3,16 @@ from ..function import Function, InplaceFunction
import math
def maybe_view(tensor, size):
if tensor.size() == size:
return tensor
return tensor.contiguous().view(size)
class Add(InplaceFunction):
def forward(self, a, b):
self.b_size = b.size()
if self.inplace:
self.mark_dirty(a)
return a.add_(b)
@ -13,12 +20,13 @@ class Add(InplaceFunction):
return a.add(b)
def backward(self, grad_output):
return grad_output, grad_output
return grad_output, maybe_view(grad_output, self.b_size)
class Sub(InplaceFunction):
def forward(self, a, b):
self.b_size = b.size()
if self.inplace:
self.mark_dirty(a)
return a.sub_(b)
@ -26,40 +34,43 @@ class Sub(InplaceFunction):
return a.sub(b)
def backward(self, grad_output):
return grad_output, grad_output.neg()
return grad_output, maybe_view(grad_output.neg(), self.b_size)
class Mul(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.mul(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.mul(b), grad_output.mul(a)
return grad_output.mul(b), maybe_view(grad_output.mul(a), self.b_size)
class Div(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.div(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.div(b), grad_output.neg().mul(a).div_(b).div_(b)
return grad_output.div(b), maybe_view(grad_output.neg().mul(a).div_(b).div_(b), self.b_size)
class Pow(Function):
def forward(self, a, b):
self.b_size = b.size()
self.save_for_backward(a, b)
return a.pow(b)
def backward(self, grad_output):
a, b = self.saved_tensors
return grad_output.mul(b).mul_(a.pow(b-1)), grad_output.mul(a.pow(b)).mul_(a.log())
return grad_output.mul(b).mul_(a.pow(b - 1)), maybe_view(grad_output.mul(a.pow(b)).mul_(a.log()), self.b_size)
class AddConstant(InplaceFunction):
@ -174,7 +185,7 @@ class PowConstant(Function):
return grad_output.mul(self.fw_result).mul_(math.log(self.constant))
else:
a = self.saved_tensors[0]
return grad_output.mul(self.constant).mul_(a.pow(self.constant-1))
return grad_output.mul(self.constant).mul_(a.pow(self.constant - 1))
class Negate(InplaceFunction):

View File

@ -25,7 +25,7 @@ class Addmm(_BlasBase):
self.save_for_backward(matrix1, matrix2)
output = self._get_output(add_matrix)
return torch.addmm(self.alpha, add_matrix, self.beta,
matrix1, matrix2, out=output)
matrix1, matrix2, out=output)
def backward(self, grad_output):
matrix1, matrix2 = self.saved_tensors
@ -55,7 +55,7 @@ class Addbmm(_BlasBase):
self.save_for_backward(batch1, batch2)
output = self._get_output(add_matrix)
return torch.addbmm(self.alpha, add_matrix, self.beta,
batch1, batch2, out=output)
batch1, batch2, out=output)
def backward(self, grad_output):
batch1, batch2 = self.saved_tensors
@ -68,8 +68,8 @@ class Addbmm(_BlasBase):
if any(self.needs_input_grad[1:]):
batch_grad_output = (grad_output
.unsqueeze(0)
.expand(batch1.size(0), batch1.size(1), batch2.size(2)))
.unsqueeze(0)
.expand(batch1.size(0), batch1.size(1), batch2.size(2)))
if self.needs_input_grad[1]:
grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2))
@ -90,7 +90,7 @@ class Baddbmm(_BlasBase):
self.save_for_backward(batch1, batch2)
output = self._get_output(add_batch)
return torch.baddbmm(self.alpha, add_batch, self.beta,
batch1, batch2, out=output)
batch1, batch2, out=output)
def backward(self, grad_output):
batch1, batch2 = self.saved_tensors
@ -120,7 +120,7 @@ class Addmv(_BlasBase):
self.save_for_backward(matrix, vector)
output = self._get_output(add_vector)
return torch.addmv(self.alpha, add_vector, self.beta,
matrix, vector, out=output)
matrix, vector, out=output)
def backward(self, grad_output):
matrix, vector = self.saved_tensors
@ -150,7 +150,7 @@ class Addr(_BlasBase):
self.save_for_backward(vector1, vector2)
output = self._get_output(add_matrix)
return torch.addr(self.alpha, add_matrix, self.beta,
vector1, vector2, out=output)
vector1, vector2, out=output)
def backward(self, grad_output):
vector1, vector2 = self.saved_tensors
@ -168,7 +168,7 @@ class Addr(_BlasBase):
if self.needs_input_grad[2]:
# TODO: maybe it's better to do transpose + mv + transpose
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output)
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
if self.beta != 1:
grad_vector2 *= self.beta
@ -179,6 +179,7 @@ class Dot(Function):
def forward(self, vector1, vector2):
self.save_for_backward(vector1, vector2)
self.sizes = (vector1.size(), vector2.size())
return vector1.new((vector1.dot(vector2),))
def backward(self, grad_output):
@ -186,17 +187,9 @@ class Dot(Function):
grad_vector1 = grad_vector2 = None
if self.needs_input_grad[0]:
grad_vector1 = vector2.mul(grad_output[0])
grad_vector1 = vector2.mul(grad_output[0]).view(self.sizes[0])
if self.needs_input_grad[1]:
grad_vector2 = vector1.mul(grad_output[0])
grad_vector2 = vector1.mul(grad_output[0]).view(self.sizes[1])
return grad_vector1, grad_vector2
# TODO: cross
# TODO: diag
# TODO: trace
# TODO: tril
# TODO: triu

View File

@ -0,0 +1,40 @@
import torch
from ..function import Function
class _CompareOp(Function):
def __init__(self, scalar=None):
super(_CompareOp, self).__init__()
self.scalar = scalar
def forward(self, tensor1, tensor2=None):
other = tensor2 if tensor2 is not None else self.scalar
mask = getattr(tensor1, self.fn_name)(other)
self.mark_non_differentiable(mask)
return mask
class Eq(_CompareOp):
fn_name = 'eq'
class Ne(_CompareOp):
fn_name = 'ne'
class Gt(_CompareOp):
fn_name = 'gt'
class Ge(_CompareOp):
fn_name = 'ge'
class Lt(_CompareOp):
fn_name = 'lt'
class Le(_CompareOp):
fn_name = 'le'

View File

@ -10,10 +10,10 @@ class Diag(Function):
self.diagonal_idx = diagonal_idx
def forward(self, input):
return input.diag()
return input.diag(self.diagonal_idx)
def backward(self, grad_output):
return grad_output.diag()
return grad_output.diag(self.diagonal_idx)
class Tril(Function):
@ -41,5 +41,31 @@ class Triu(Function):
def backward(self, grad_output):
return grad_output.triu(self.diagonal_idx)
# TODO: trace
class Trace(Function):
def forward(self, input):
self.isize = input.size()
return input.new((input.trace(),))
def backward(self, grad_output):
isize = self.isize
grad_input = grad_output.new(isize).zero_()
grad_input.view(-1)[::(isize[1] + 1)] = grad_output[0]
return grad_input
class Cross(Function):
def __init__(self, dim=-1):
self.dim = dim
def forward(self, input, other):
self.save_for_backward(input, other)
return torch.cross(input, other, self.dim)
def backward(self, grad_output):
input, other = self.saved_tensors
grad_input = torch.cross(other, grad_output, self.dim)
grad_other = torch.cross(grad_output, input, self.dim)
return grad_input, grad_other

View File

@ -165,6 +165,7 @@ class Tan(Function):
class Asin(Function):
def forward(self, i):
self.save_for_backward(i)
return i.asin()
@ -175,6 +176,7 @@ class Asin(Function):
class Acos(Function):
def forward(self, i):
self.save_for_backward(i)
return i.acos()
@ -185,6 +187,7 @@ class Acos(Function):
class Atan(Function):
def forward(self, i):
self.save_for_backward(i)
return i.atan()

View File

@ -4,6 +4,7 @@ from ..function import Function
class _DimReduceFunction(Function):
def __init__(self, dim=None):
super(_DimReduceFunction, self).__init__()
self.dim = dim
@ -45,13 +46,50 @@ class Prod(_DimReduceFunction):
def backward(self, grad_output):
if self.dim is None:
input, = self.saved_tensors
grad_input = grad_output.new(self.input_size).fill_(self.result)
return grad_input.div(input)
zero_idx = (input == 0).nonzero()
if zero_idx.dim() == 0:
return grad_output.mul(self.result).expand_as(input).div(input)
elif zero_idx.size(0) > 1:
return grad_output.new(self.input_size).zero_()
else:
grad_input = grad_output.new(self.input_size).zero_()
zero_idx = tuple(zero_idx[0].cpu())
input_copy = input.clone()
input_copy[zero_idx] = 1.
grad_input[zero_idx] = grad_output[0] * input_copy.prod()
return grad_input
else:
input, output = self.saved_tensors
repeats = [1 for _ in self.input_size]
repeats[self.dim] = self.input_size[self.dim]
return output.mul(grad_output).repeat(*repeats).div_(input)
dim = self.dim if self.dim >= 0 else self.dim + input.dim()
zero_mask = input == 0
slice_zero_count = zero_mask.sum(dim)
total_zeros = slice_zero_count.sum()
grad_input = grad_output.mul(output).expand_as(input).div(input)
if total_zeros == 0:
return grad_input
some_zeros = slice_zero_count.gt(0).expand_as(grad_input)
grad_input[some_zeros] = 0
single_zero_idx = slice_zero_count.eq(1).nonzero()
if len(single_zero_idx) == 0:
return grad_input
for idx in single_zero_idx:
idx_tuple = tuple(idx.cpu())
input_idx_tuple = idx_tuple[:dim] + (slice(0, None),) + idx_tuple[dim + 1:]
# slice_mask and input_copy are 1D
slice_mask = zero_mask[input_idx_tuple]
input_copy = input[input_idx_tuple].clone()
zero_idx = slice_mask.nonzero()[0, 0]
input_copy[zero_idx] = 1.
grad_idx_tuple = idx_tuple[:dim] + (zero_idx,) + idx_tuple[dim + 1:]
grad_input[grad_idx_tuple] = grad_output[idx_tuple] * input_copy.prod()
return grad_input
class Mean(_DimReduceFunction):
@ -139,6 +177,7 @@ class Kthvalue(_SelectionFunction):
class Norm(Function):
def __init__(self, norm_type=2, dim=None):
super(Norm, self).__init__()
self.norm_type = norm_type

View File

@ -65,7 +65,7 @@ class Normal(StochasticFunction):
output.mul_(stddevs)
else:
raise RuntimeError("Normal function requires specifying a common "
"stddev, or per-sample stddev")
"stddev, or per-sample stddev")
output.add_(means)
self.save_for_backward(output, means, stddevs)
self.mark_non_differentiable(output)
@ -74,7 +74,7 @@ class Normal(StochasticFunction):
def backward(self, reward):
output, means, stddevs = self.saved_tensors
grad_stddevs = None
grad_means = means - output # == -(output - means)
grad_means = means - output # == -(output - means)
assert self.stddev is not None or stddevs is not None
if self.stddev is not None:
grad_means /= 1e-6 + self.stddev ** 2
@ -83,9 +83,9 @@ class Normal(StochasticFunction):
stddevs_cb = stddevs_sq * stddevs
stddevs_sq += 1e-6
stddevs_cb += 1e-6
grad_stddevs = (grad_means * grad_means) / stddevs_cb
grad_stddevs = (stddevs - grad_stddevs) * reward
grad_stddevs = (stddevs_sq - (grad_means * grad_means))
grad_stddevs /= stddevs_cb
grad_stddevs *= reward
grad_means /= stddevs_sq
grad_means *= reward
return grad_means, grad_stddevs

View File

@ -18,9 +18,8 @@ class Index(Function):
return result
def backward(self, grad_output):
# TODO: this won't have to be zeroed
grad_input = grad_output.new(self.input_size).zero_()
grad_input.index(self.index).copy_(grad_output)
grad_input._set_index(self.index, grad_output)
return grad_input
@ -33,20 +32,23 @@ class SetItem(InplaceFunction):
def forward(self, i, value=None):
self.mark_dirty(i)
if value is None:
if value is None: # value is scalar
value = self.value
i.set_index(self.index, value)
else: # value is Tensor
self.value_size = value.size()
i._set_index(self.index, value)
return i
def backward(self, grad_output):
if self.value is None:
if self.value is None: # value is Tensor
grad_input = grad_output.clone()
grad_input.set_index(self.index, 0)
grad_input._set_index(self.index, 0)
grad_value = grad_output.index(self.index).clone()
grad_value = grad_value.view(self.value_size)
return grad_input, grad_value
else:
grad_input = grad_output.clone()
grad_input.set_index(self.index, 0)
grad_input._set_index(self.index, 0)
return grad_input
@ -99,25 +101,29 @@ class View(Function):
def backward(self, grad_output):
# TODO: not sure if this clone is necessary
return grad_output.clone().view(self.input_size)
return grad_output.contiguous().view(self.input_size)
class Expand(Function):
def __init__(self, sizes):
super(Expand, self).__init__()
self.sizes = sizes
self.expanded_dims = []
def forward(self, i):
self.expanded_dims = [dim for dim, (expanded, original)
in enumerate(zip(self.sizes, i.size()))
if expanded != original]
result = i.expand(*self.sizes)
self.num_unsqueezed = len(self.sizes) - i.dim()
self.expanded_dims = [dim for dim, (expanded, original)
in enumerate(zip(self.sizes[self.num_unsqueezed:], i.size()))
if expanded != original]
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
grad_input = grad_output
for i in range(self.num_unsqueezed):
grad_input = grad_input.sum(0).squeeze(0)
for dim in self.expanded_dims:
grad_input = grad_input.sum(dim)
return grad_input
@ -288,7 +294,7 @@ class IndexSelect(Function):
if self.needs_input_grad[0]:
index, = self.saved_tensors
grad_tensor = grad_output.new(*self.input_size).zero_()
grad_tensor.index_copy_(self.dim, index, grad_output)
grad_tensor.index_add_(self.dim, index, grad_output)
return grad_tensor, None
@ -304,8 +310,8 @@ class Concat(Function):
return torch.cat(inputs, self.dim)
def backward(self, grad_output):
return tuple(grad_output.narrow(self.dim, end-size, size) for size, end
in zip(self.input_sizes, _accumulate(self.input_sizes)))
return tuple(grad_output.narrow(self.dim, end - size, size) for size, end
in zip(self.input_sizes, _accumulate(self.input_sizes)))
class Resize(Function):
@ -318,11 +324,11 @@ class Resize(Function):
def forward(self, tensor):
if tensor.numel() != self.numel:
raise RuntimeError(("requested resize to {} ({} elements in total), "
"but the given tensor has a size of {} ({} elements). "
"autograd's resize can only change the shape of a given "
"tensor, while preserving the number of elements. ").format(
'x'.join(map(str, self.sizes)), self.numel,
'x'.join(map(str, tensor.size())), tensor.numel()))
"but the given tensor has a size of {} ({} elements). "
"autograd's resize can only change the shape of a given "
"tensor, while preserving the number of elements. ").format(
'x'.join(map(str, self.sizes)), self.numel,
'x'.join(map(str, tensor.size())), tensor.numel()))
self.input_sizes = tensor.size()
result = tensor.new(tensor).resize_(*self.sizes)
self.mark_shared_storage((tensor, result))
@ -474,7 +480,7 @@ class _MultiSelectionFunction(Function):
class Sort(_MultiSelectionFunction):
def __init__(self, dim=None, descending=False, return_indices=False):
def __init__(self, dim=None, descending=False, return_indices=True):
super(Sort, self).__init__(dim, return_indices)
self.descending = descending
@ -486,14 +492,14 @@ class Sort(_MultiSelectionFunction):
class Topk(_MultiSelectionFunction):
def __init__(self, k, dim=None, largest=True, sort=True, return_indices=False):
def __init__(self, k, dim=None, largest=True, sort=True, return_indices=True):
super(Topk, self).__init__(dim, return_indices)
self.k = k
self.largest = largest
self.sort = sort
def forward(self, input):
dim = self.dim if self.dim is not None else input.dim()-1
dim = self.dim if self.dim is not None else input.dim() - 1
self.args = (self.k, dim, self.largest, self.sort)
return super(Topk, self).forward(input)
@ -567,9 +573,41 @@ class Scatter(InplaceFunction):
return grad_input, None, grad_source
# TODO: kthvalue
# TODO: repeat
# TODO: sort
# TODO: split
# TODO: topk
class Repeat(Function):
def __init__(self, repeats):
super(Repeat, self).__init__()
self.repeats = repeats
def forward(self, input):
return input.repeat(self.repeats)
def backward(self, grad_output):
grad_input = grad_output
for dim, repeat in enumerate(self.repeats):
if repeat == 1:
continue
grad_input = sum(grad_input.chunk(repeat, dim))
return grad_input
class Cumsum(Function):
def __init__(self, dim):
super(Cumsum, self).__init__()
self.dim = dim
def forward(self, input):
return torch.cumsum(input, dim=self.dim)
def backward(self, grad_output):
grad_input = torch.cumsum(-grad_output, dim=self.dim)
end_idx = grad_input.size(self.dim) - 1
grad_sum = grad_input.narrow(self.dim, end_idx, 1)
grad_input -= grad_sum.expand_as(grad_input)
grad_input += grad_output
return grad_input
# TODO: unfold

View File

@ -71,8 +71,8 @@ class BasicEngine(object):
else:
if prev_fn.num_outputs != 1:
raise RuntimeError("one of the function outputs "
"wasn't used - this is an error not, but "
"it's going to be fixed soon")
"wasn't used - this is an error not, but "
"it's going to be fixed soon")
prev_grad = (d_prev_fn,)
ready.appendleft((prev_fn, prev_grad))
else:

View File

@ -2,7 +2,6 @@ import torch
import torch._C as _C
import torch.utils.hooks as hooks
from collections import OrderedDict
from itertools import chain
class Function(_C._FunctionBase):
@ -98,21 +97,22 @@ class Function(_C._FunctionBase):
**This should be called at most once, only from inside the**
:func:`forward` **method, and all arguments should be outputs.**
This will mark outputs as non requiring gradient, increasing the
This will mark outputs as not requiring gradients, increasing the
efficiency of backward computation. You still need to accept a gradient
for this output in :meth:`~Function.backward`, but it's always going to
for each output in :meth:`~Function.backward`, but it's always going to
be ``None``.
This is used e.g. for indices returned from a max :class:`Function`.
"""
self.non_differentiable = args
def register_hook(self, hook):
if self._backward_hooks is None:
self._backward_hooks = OrderedDict()
handle = hooks.RemovableHandle(self._backward_hooks)
self._backward_hooks[id(handle)] = hook
return handle
@staticmethod
def _register_hook(backward_hooks, hook):
if backward_hooks is None:
backward_hooks = OrderedDict()
handle = hooks.RemovableHandle(backward_hooks)
backward_hooks[handle.id] = hook
return backward_hooks, handle
def forward(self, *input):
"""Performs the operation.
@ -154,9 +154,10 @@ def _nested_map(condition, fn):
return type(obj)(_map(x) for x in obj)
else:
raise ValueError("NestedIOFunction doesn't know how to process "
"an input object of type " + torch.typename(obj))
"an input object of type " + torch.typename(obj))
return _map
def _iter_filter(condition):
def _iter(obj):
if condition(obj):
@ -169,17 +170,29 @@ def _iter_filter(condition):
yield var
else:
raise ValueError("NestedIOFunction doesn't know how to process "
"an input object of type " + torch.typename(obj))
"an input object of type " + torch.typename(obj))
return _iter
def _unflatten(input, proto):
# unflatten a list or tuple input into a nested list/tuple structure
# specified by proto
def unflatten_helper(input, proto):
res = []
if not isinstance(proto, (list, tuple)):
return input[0], input[1:]
for e in proto:
res_e, input = unflatten_helper(input, e)
res.append(res_e)
return type(proto)(res), input
return unflatten_helper(input, proto)[0]
_iter_variables = _iter_filter(lambda o: isinstance(o, torch.autograd.Variable))
_iter_tensors = _iter_filter(torch.is_tensor)
_iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o))
_map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data)
def _map_tensor_fromiter(itr):
return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr))
class NestedIOFunction(Function):
@ -188,14 +201,20 @@ class NestedIOFunction(Function):
flat_input = tuple(_iter_variables(input))
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
nested_output = self._nested_output
nested_variables = _map_tensor_fromiter(iter(flat_output))(self._nested_output)
nested_variables = _unflatten(flat_output, self._nested_output)
return nested_variables
def _do_backward(self, gradients, retain_variables):
self.retain_variables = retain_variables
result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
if not retain_variables:
del self._nested_output
del self._to_save_nested
return result
def backward(self, *gradients):
nested_gradients = _map_tensor_fromiter(iter(gradients))(self._nested_output)
del self._nested_output
nested_gradients = _unflatten(gradients, self._nested_output)
result = self.backward_extended(*nested_gradients)
del self._to_save_nested
return tuple(_iter_None_tensors(result))
__call__ = _do_forward
@ -214,7 +233,7 @@ class NestedIOFunction(Function):
@property
def saved_tensors(self):
flat_tensors = super(NestedIOFunction, self).saved_tensors
return _map_tensor_fromiter(iter(flat_tensors))(self._to_save_nested)
return _unflatten(flat_tensors, self._to_save_nested)
def mark_dirty(self, *args, **kwargs):
self.dirty_tensors = tuple(_iter_tensors((args, kwargs)))

160
torch/autograd/gradcheck.py Normal file
View File

@ -0,0 +1,160 @@
import torch
from torch.autograd import Variable
def iter_gradients(x):
if isinstance(x, Variable):
if x.requires_grad:
yield x.grad.data if x.grad is not None else None
else:
for elem in x:
for result in iter_gradients(elem):
yield result
def zero_gradients(i):
for t in iter_gradients(i):
if t is not None:
t.zero_()
def make_jacobian(input, num_out):
if isinstance(input, Variable) and not input.requires_grad:
return None
if torch.is_tensor(input) or isinstance(input, Variable):
return torch.zeros(input.nelement(), num_out)
else:
return type(input)(filter(lambda x: x is not None,
(make_jacobian(elem, num_out) for elem in input)))
def iter_tensors(x, only_requiring_grad=False):
if torch.is_tensor(x):
yield x
elif isinstance(x, Variable):
if x.requires_grad or not only_requiring_grad:
yield x.data
else:
for elem in x:
for result in iter_tensors(elem, only_requiring_grad):
yield result
def contiguous(input):
if torch.is_tensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous()
else:
return type(input)(contiguous(e) for e in input)
def get_numerical_jacobian(fn, input, target, eps=1e-3):
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target, True)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.DoubleTensor(output_size)
outb = torch.DoubleTensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nelement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - eps
outa.copy_(fn(input))
flat_tensor[i] = orig + eps
outb.copy_(fn(input))
flat_tensor[i] = orig
outb.add_(-1, outa).div_(2 * eps)
d_tensor[i] = outb
return jacobian
def get_analytical_jacobian(input, output):
jacobian = make_jacobian(input, output.numel())
grad_output = output.data.clone().zero_()
flat_grad_output = grad_output.view(-1)
for i in range(flat_grad_output.numel()):
flat_grad_output.zero_()
flat_grad_output[i] = 1
zero_gradients(input)
output.backward(grad_output, retain_variables=True)
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
if d_x is None:
jacobian_x[:, i].zero_()
else:
jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
return jacobian
def _as_tuple(x):
if isinstance(x, tuple):
return x
elif isinstance(x, list):
return tuple(x)
else:
return x,
def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3):
"""Check gradients computed via small finite differences
against analytical gradients
The check between numerical and analytical has the same behaviour as
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
meaning it check that
absolute(a - n) <= (atol + rtol * absolute(n))
is true for all elements of analytical jacobian a and numerical jacobian n.
Args:
func: Python function that takes Variable inputs and returns
a tuple of Variables
inputs: tuple of Variables
eps: perturbation for finite differences
atol: absolute tolerance
rtol: relative tolerance
Returns:
True if all differences satisfy allclose condition
"""
output = func(*inputs)
output = _as_tuple(output)
for i, o in enumerate(output):
if not o.requires_grad:
continue
def fn(input):
return _as_tuple(func(*input))[i].data
numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
analytical = get_analytical_jacobian(_as_tuple(inputs), o)
for a, n in zip(analytical, numerical):
if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
return False
# check if the backward multiplies by grad_output
zero_gradients(inputs)
output = _as_tuple(func(*inputs))
torch.autograd.backward(output, [o.data.new(o.size()).zero_() for o in output])
for i in inputs:
if i.grad is None:
continue
if not i.grad.data.eq(0).all():
return False
return True

View File

@ -1,7 +1,10 @@
import torch
from numbers import Number
from .function import Function
_NOT_PROVIDED = object()
class StochasticFunction(Function):
def __init__(self):
@ -10,12 +13,32 @@ class StochasticFunction(Function):
def _do_backward(self, grad_output, retain_variables):
if self.reward is _NOT_PROVIDED:
raise RuntimeError("differentiating stochastic functions requires "
"providing a reward")
"providing a reward")
result = super(StochasticFunction, self)._do_backward((self.reward,), retain_variables)
if not retain_variables:
self.reward = None
return result
def _reinforce(self, reward):
self.reward = reward
def _do_forward(self, *inputs):
result = super(StochasticFunction, self)._do_forward(*inputs)
# save output type and size, to check the type of reward
assert isinstance(result, torch.autograd.Variable), \
"stochastic functions support only a single output at the moment"
self.reward_info = (type(inputs[0].data), result.size())
return result
__call__ = _do_forward
def _reinforce(self, reward):
is_number = isinstance(reward, Number)
if not is_number and type(reward) != self.reward_info[0]:
raise TypeError("mismatch between reward and output type: got {}, "
"but expected {}".format(torch.typename(reward),
torch.typename(self.reward_info[0])))
if not is_number and reward.size() != self.reward_info[1]:
raise ValueError("got reward of size {}, but expected a tensor of size {}".format(
'x'.join(map(str, reward.size())),
'x'.join(map(str, self.reward_info[1]))))
if self.reward is not _NOT_PROVIDED:
raise RuntimeError("you can only reinforce a stochastic Function once")
self.reward = reward

View File

@ -1,6 +1,7 @@
import sys
import torch._C as _C
from collections import OrderedDict
import torch.sparse as sparse
import torch.utils.hooks as hooks
from ._functions import *
@ -56,30 +57,6 @@ class Variable(_C._VariableBase):
'is_cuda',
}
@property
def grad(self):
if self.requires_grad and self._grad is None:
# TODO: this won't have to be zeroed in the future
self._grad = Variable(self.data.new(self.data.size()).zero_())
return self._grad
@property
def requires_grad(self):
return self._requires_grad
@requires_grad.setter
def requires_grad(self, value):
if self.creator is not None:
if value is False:
hint = (" If you want to use a computed variable in a subgraph "
"that doesn't require differentiation use "
"var_no_grad = var.detach().")
else:
hint = ''
raise RuntimeError("you can only change requires_grad flags of "
"leaf variables." + hint)
self._requires_grad = value
def __getattr__(self, name):
if name in self._fallthrough_methods:
return getattr(self.data, name)
@ -87,13 +64,13 @@ class Variable(_C._VariableBase):
def __getitem__(self, key):
if (isinstance(key, Variable) and
type(key.data).__name__ == 'ByteTensor'):
type(key.data).__name__ == 'ByteTensor'):
return MaskedSelect()(self, key)
return Index(key)(self)
def __setitem__(self, key, value):
if (isinstance(key, Variable) and
type(key.data).__name__ == 'ByteTensor'):
type(key.data).__name__ == 'ByteTensor'):
if isinstance(value, Variable):
return MaskedCopy(inplace=True)(self, key, value)
else:
@ -107,20 +84,31 @@ class Variable(_C._VariableBase):
def __deepcopy__(self, memo):
if self.creator is not None:
raise RuntimeError("Only Variables created explicitly by the user "
"(graph leaves) support the deepcopy protocol at the moment")
result = type(self)(self.data.clone(), requires_grad=self.requires_grad,
volatile=self.volatile)
"(graph leaves) support the deepcopy protocol at the moment")
result = type(self)(self.data.clone())
result.requires_grad = self.requires_grad
result.volatile = self.volatile
memo[id(self)] = result
return result
def __reduce_ex__(self, proto):
state = (self.requires_grad, self.volatile, self._backward_hooks)
if proto > 1:
return super(Variable, self).__reduce_ex__(proto)
return type(self), (self.data,), state
if sys.version_info[0] == 2:
from copy_reg import __newobj__
else:
from copyreg import __newobj__
return __newobj__, (type(self),), self.__getstate__()
return __newobj__, (type(self), self.data), state
def __setstate__(self, state):
if len(state) == 5:
# legacy serialization of Variable
self.data = state[0]
state = (state[3], state[4], state[2])
if self.creator is not None:
raise RuntimeError('__setstate__ can be only called on leaf variables')
self.requires_grad, self.volatile, self._backward_hooks = state
def __repr__(self):
return 'Variable containing:' + self.data.__repr__()
@ -131,7 +119,7 @@ class Variable(_C._VariableBase):
The graph is differentiated using the chain rule. If the variable is
non-scalar (i.e. its data has more than one element) and requires
gradient, the function additionaly requires specifying ``gradient``.
It should be a tensor of matching type and location, that containins
It should be a tensor of matching type and location, that contains
the gradient of the differentiated function w.r.t. ``self``.
This function accumulates gradients in the leaves - you might need to zero
@ -151,7 +139,9 @@ class Variable(_C._VariableBase):
raise RuntimeError('calling backward on a volatile variable')
if gradient is None and self.requires_grad:
if self.data.numel() != 1:
raise RuntimeError('backward should be called only on a scalar (i.e. 1-element tensor) or with gradient w.r.t. the variable')
raise RuntimeError(
'backward should be called only on a scalar (i.e. 1-element tensor) '
'or with gradient w.r.t. the variable')
gradient = self.data.new().resize_as_(self.data).fill_(1)
self._execution_engine.run_backward((self,), (gradient,), retain_variables)
@ -161,7 +151,7 @@ class Variable(_C._VariableBase):
The hook will be called every time a gradient with respect to the
variable is computed. The hook should have the following signature::
hook(grad) -> Tensor or None
hook(grad) -> Variable or None
The hook should not modify its argument, but it can optionally return
a new gradient which will be used in place of :attr:`grad`.
@ -190,22 +180,9 @@ class Variable(_C._VariableBase):
if self.creator is not None:
self.creator._register_hook_dict(self)
handle = hooks.RemovableHandle(self._backward_hooks)
self._backward_hooks[id(handle)] = hook
self._backward_hooks[handle.id] = hook
return handle
def _do_backward(self, grad_output, retain_variables):
assert len(grad_output) == 1
assert self._version == 0 and self.creator is None, \
"leaf variable was used in an inplace operation"
unpacked_grad = grad_output[0]
if self._backward_hooks:
for hook in self._backward_hooks.values():
result = hook(unpacked_grad)
if result is not None:
unpacked_grad = result
self.grad.data.add_(unpacked_grad)
return tuple()
def reinforce(self, reward):
"""Registers a reward obtained as a result of a stochastic process.
@ -219,12 +196,29 @@ class Variable(_C._VariableBase):
"""
if not isinstance(self.creator, StochasticFunction):
raise RuntimeError("reinforce() can be only called on outputs "
"of stochastic functions")
"of stochastic functions")
self.creator._reinforce(reward)
def detach(self):
"""Detaches the Variable from the graph that created it."""
return NoGrad()(self)
"""Returns a new Variable, detached from the current graph.
Result will never require gradient. If the input is volatile, the output
will be volatile too.
.. note::
Returned Variable uses the same data tensor, as the original one, and
in-place modifications on either of them will be seen, and may trigger
errors in correctness checks.
"""
result = NoGrad()(self) # this is needed, because it merges version counters
result._creator = None
return result
def detach_(self):
"""Detaches the Variable from the graph that created it, making it a leaf."""
self._creator = None
self.requires_grad = False
def contiguous(self):
self.data = self.data.contiguous()
@ -238,6 +232,9 @@ class Variable(_C._VariableBase):
return Type(t)(self)
return self
def type_as(self, t):
return self.type(type(t.data))
def _get_type(self, name):
module = torch._import_dotted_name(self.data.__module__)
return getattr(module, name)
@ -392,7 +389,7 @@ class Variable(_C._VariableBase):
def clamp(self, min=None, max=None):
if min is None and max is None:
raise ValueError("clamp requires specifying at least one of "
"min and max arguments")
"min and max arguments")
elif min is None and max is not None:
return CminConstant(max)(self)
elif min is not None and max is None:
@ -424,12 +421,6 @@ class Variable(_C._VariableBase):
def trunc(self):
return Trunc()(self)
def floor(self):
return Floor()(self)
def ceil(self):
return Ceil()(self)
def fmod(self, value):
return Fmod(value)(self)
@ -482,6 +473,40 @@ class Variable(_C._VariableBase):
def view_as(self, tensor):
return View(*tensor.size())(self)
def split(self, split_size, dim=0):
return torch.split(self, split_size, dim)
def repeat(self, *repeats):
if len(repeats) == 1 and isinstance(repeats[0], torch.Size):
repeats = repeats[0]
else:
repeats = torch.Size(repeats)
return Repeat(repeats)(self)
def cumsum(self, dim):
return Cumsum(dim)(self)
def var(self, dim=None, unbiased=True):
mean = self.mean(dim)
if dim is None:
mean = mean.view(*(1 for s in self.size()))
mean_expanded = mean.expand_as(self)
zero_centered = self.sub(mean_expanded)
var = zero_centered.mul(zero_centered).sum(dim)
numel = self.numel() if dim is None else self.size(dim)
return var.div(numel - int(unbiased))
def std(self, dim=None, unbiased=True):
return self.var(dim, unbiased).sqrt()
def renorm(self, p, dim, maxnorm):
t = self.transpose(dim, 0)
flat = t.contiguous().view(self.size(0), -1)
norms = flat.norm(p, 1)
norms = norms.clamp(max=maxnorm).div(norms.add(1e-7))
flat_out = flat.mul(norms.expand_as(flat))
return flat_out.view(t.size()).transpose(dim, 0)
@staticmethod
def _static_blas(cls, args, inplace):
num_args = len(args)
@ -503,7 +528,7 @@ class Variable(_C._VariableBase):
def bmm(self, batch):
output = Variable(self.data.new(self.data.size(0), self.data.size(1),
batch.data.size(2)))
batch.data.size(2)))
return self._static_blas(Baddbmm, (output, 0, 1, self, batch), False)
def mv(self, vector):
@ -567,11 +592,11 @@ class Variable(_C._VariableBase):
def addcdiv(self, *args):
return self._addcop(Addcdiv, args)
def norm(self, norm_type=2, dim=None):
return Norm(norm_type, dim)(self)
def norm(self, p=2, dim=None):
return Norm(p, dim)(self)
def dist(self, tensor, norm_type=2):
return Norm(norm_type)(self - tensor)
def dist(self, tensor, p=2):
return Norm(p)(self - tensor)
def index_add(self, dim, index, tensor):
return IndexAdd(dim)(self, index, tensor)
@ -622,7 +647,7 @@ class Variable(_C._VariableBase):
if isinstance(sizes[0], torch.Size):
if len(sizes) > 1:
raise ValueError("expand expects a several ints or a single "
"torch.Size argument")
"torch.Size argument")
sizes = sizes[0]
return Expand(sizes)(self)
@ -636,12 +661,14 @@ class Variable(_C._VariableBase):
return Transpose(dim1, dim2)(self)
def select(self, dim, _index):
dim = dim if dim >= 0 else dim + self.dim()
index = tuple(slice(None, None) for _ in range(dim)) + (_index,)
return Index(index)(self)
def narrow(self, dim, start_index, length):
dim = dim if dim >= 0 else dim + self.dim()
index = tuple(slice(None, None) for _ in range(dim)) + \
(slice(start_index, start_index+length),)
(slice(start_index, start_index + length),)
return Index(index)(self)
@ -666,12 +693,54 @@ class Variable(_C._VariableBase):
def triu(self, diagonal_idx=0):
return Triu(diagonal_idx)(self)
def trace(self):
return Trace()(self)
def cross(self, other, dim=-1):
return Cross(dim)(self, other)
def multinomial(self, num_samples=1, with_replacement=False):
return Multinomial(num_samples, with_replacement)(self)
def bernoulli(self):
return Bernoulli()(self)
def eq(self, other):
if isinstance(other, Variable):
return Eq()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Eq(other)(self)
def ne(self, other):
if isinstance(other, Variable):
return Ne()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Ne(other)(self)
def gt(self, other):
if isinstance(other, Variable):
return Gt()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Gt(other)(self)
def ge(self, other):
if isinstance(other, Variable):
return Ge()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Ge(other)(self)
def lt(self, other):
if isinstance(other, Variable):
return Lt()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Lt(other)(self)
def le(self, other):
if isinstance(other, Variable):
return Le()(self, other)
assert not torch.is_tensor(other), "can't compare Variable and tensor"
return Le(other)(self)
def __add__(self, other):
return self.add(other)
__radd__ = __add__
@ -710,7 +779,7 @@ class Variable(_C._VariableBase):
elif dim_self == 2 and dim_other == 2:
return self.mm(other)
raise ValueError("both arguments to __matmul__ need to be 1D or 2D, "
"but they are {}D and {}D".format(dim_self, dim_other))
"but they are {}D and {}D".format(dim_self, dim_other))
def __div__(self, other):
return self.div(other)
@ -741,6 +810,30 @@ class Variable(_C._VariableBase):
def __iter__(self):
return iter(map(lambda i: self[i], range(self.size(0))))
def __mod__(self, other):
return self.remainder(other)
def __eq__(self, other):
return self.eq(other)
def __ne__(self, other):
return self.ne(other)
def __lt__(self, other):
return self.lt(other)
def __le__(self, other):
return self.le(other)
def __gt__(self, other):
return self.gt(other)
def __ge__(self, other):
return self.ge(other)
def __hash__(self):
return id(self)
class _torch(object):
@staticmethod
@ -748,11 +841,11 @@ class Variable(_C._VariableBase):
return Concat(dim)(*iterable)
@staticmethod
def normal(means, stddev=1):
if isinstance(stddev, Variable):
return Normal()(means, stddev)
def normal(means, std=1):
if isinstance(std, Variable):
return Normal()(means, std)
else:
return Normal(stddev)(means)
return Normal(std)(means)
@staticmethod
def _blas(cls, args, inplace):

View File

@ -1,43 +1,32 @@
import torch._C as _C
import ctypes
import warnings
import torch.cuda
import sys
import os.path as path
import torch
import warnings
enabled = True # set to False to globally disable cuDNN
lib = None
# TODO: fix libname for Windows
__cudnn_version = None
# TODO: dynamic version checks via cudnnGetVersion
# TODO: load 5.1.3 if using CUDA 7.5 and 5.1.5 if using CUDA 8.0
thisdir = path.dirname(__file__)
libpaths = ['', path.join(thisdir, '../../lib')]
if sys.platform.startswith('linux'):
libnames = ['libcudnn.so.5.1.5', 'libcudnn.so.5.1.3', 'libcudnn.so.5.0.5', 'libcudnn.so.5.1.10']
elif sys.platform == 'darwin':
libnames = ['libcudnn.5.dylib']
else:
libnames = []
def _loadlib():
global lib
loaded = False
for libpath in libpaths:
for libname in libnames:
try:
lib = ctypes.cdll.LoadLibrary(path.join(libpath, libname))
loaded = True
break
except OSError:
continue
if loaded:
break
if loaded:
lib.cudnnGetErrorString.restype = ctypes.c_char_p
else:
lib = None
raise OSError("Could not load cuDNN")
def _libcudnn():
global lib, __cudnn_version
if lib is None:
lib = ctypes.cdll.LoadLibrary(None)
if hasattr(lib, 'cudnnGetErrorString'):
lib.cudnnGetErrorString.restype = ctypes.c_char_p
__cudnn_version = lib.cudnnGetVersion()
else:
lib = None
return lib
def version():
if _libcudnn() is None:
return None
return __cudnn_version
def is_acceptable(tensor):
if not enabled:
@ -46,57 +35,30 @@ def is_acceptable(tensor):
isinstance(tensor, torch.cuda.FloatTensor) or
isinstance(tensor, torch.cuda.DoubleTensor)):
return False
if lib is None:
try:
_loadlib()
except Exception:
warnings.warn('cuDNN library not found. Check your {libpath}'.format(
libpath={
'darwin': 'DYLD_LIBRARY_PATH',
'win32': 'PATH'
}.get(sys.platform, 'LD_LIBRARY_PATH')))
return False
if not _C.has_cudnn:
warnings.warn("cuDNN library has been detected, but your pytorch "
"installation was compiled without support for it. You "
"might want to rebuild pytorch, making sure the library "
"is visible to the build system.")
if not torch._C.has_cudnn:
warnings.warn(
"PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "
"PyTorch making sure the library is visible to the build system.")
return False
if _libcudnn() is None:
warnings.warn('cuDNN library not found. Check your {libpath}'.format(
libpath={
'darwin': 'DYLD_LIBRARY_PATH',
'win32': 'PATH'
}.get(sys.platform, 'LD_LIBRARY_PATH')))
return False
return True
__cudnn_version = []
def version():
if not lib:
raise RuntimeError("cuDNN not initialized")
if len(__cudnn_version) == 0:
__cudnn_version.append(lib.cudnnGetVersion())
return __cudnn_version[0]
_handles = {}
benchmark = False
verbose = False
workspace_limit = None
CUDNN_DATA_FLOAT = 0
CUDNN_DATA_DOUBLE = 1
CUDNN_DATA_HALF = 2
CUDNN_CONVOLUTION = 0
CUDNN_CROSS_CORRELATION = 1
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2
CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE = 0
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST = 1
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT = 2
CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE = 0
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST = 1
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT = 2
CUDNN_TENSOR_NCHW = 0
CUDNN_TENSOR_NHWC = 1
@ -108,6 +70,11 @@ CUDNN_GRU = 3
CUDNN_LINEAR_INPUT = 0
CUDNN_SKIP_INPUT = 1
CUDNN_RNN_ALGO_STANDARD = 0
CUDNN_RNN_ALGO_PERSIST_STATIC = 1
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2
class CuDNNHandle:
def __init__(self):
ptr = ctypes.c_void_p()
@ -117,6 +84,7 @@ class CuDNNHandle:
def __del__(self):
check_error(lib.cudnnDestroy(self))
class CuDNNError(RuntimeError):
def __init__(self, status):
self.status = status
@ -161,40 +129,21 @@ class TensorDescriptorArray(object):
def __getitem__(self, key):
return ctypes.c_void_p(self.ptrs[key])
def set(self, tensor):
self._type = tensor.type()
self._size = tensor.size()
self._stride = tensor.stride()
def set_all(self, tensor):
_type = _typemap[tensor.type()]
_ndim = tensor.dim()
_size = int_array(tensor.size())
_stride = int_array(tensor.stride())
for ptr in self.ptrs:
check_error(lib.cudnnSetTensorNdDescriptor(
ctypes.c_void_p(ptr), _typemap[tensor.type()], tensor.dim(),
int_array(tensor.size()), int_array(tensor.stride())))
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
def as_tuple(self):
return (self._type, tuple(self._size), tuple(self._stride))
def set_raw(self, i, _type, _ndim, _size, _stride):
ptr = self.ptrs[i]
check_error(lib.cudnnSetTensorNdDescriptor(
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
class ConvolutionDescriptor(object):
def __init__(self):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateConvolutionDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
def __del__(self):
check_error(lib.cudnnDestroyConvolutionDescriptor(self._as_parameter_))
del self._as_parameter_
def set(self, typename, pad, stride):
self._pad = pad
self._stride = stride
upscale = int_array([1, 1])
check_error(lib.cudnnSetConvolutionNdDescriptor(
self, 2, int_array(pad), int_array(stride), upscale,
CUDNN_CROSS_CORRELATION, _typemap[typename]))
def as_tuple(self):
return (self._pad, self._stride)
class FilterDescriptor(object):
def __init__(self):
ptr = ctypes.c_void_p()
@ -209,7 +158,8 @@ class FilterDescriptor(object):
self._size = weight.size()
datatype = _typemap[weight.type()]
check_error(lib.cudnnSetFilterNdDescriptor(
self, datatype, CUDNN_TENSOR_NCHW, weight.ndimension(), int_array(weight.size())))
self, datatype, CUDNN_TENSOR_NCHW, weight.ndimension(),
int_array(weight.size())))
def as_tuple(self):
return tuple(self._size)
@ -219,69 +169,93 @@ class DropoutDescriptor(object):
def __init__(self, handle, dropout, seed):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
self.state = None
self.dropout = dropout
self.handle = handle
dropout_states_size = ctypes.c_long()
check_error(lib.cudnnDropoutGetStatesSize(
handle,
ctypes.byref(dropout_states_size)))
self._set(dropout, seed)
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
def set_dropout(self, dropout, seed):
if dropout != self.dropout:
self._set(dropout, seed)
def _set(self, dropout, seed):
if self.state is None and dropout > 0:
dropout_states_size = ctypes.c_long()
check_error(lib.cudnnDropoutGetStatesSize(
self.handle,
ctypes.byref(dropout_states_size)))
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
state_ptr = self.state.data_ptr()
state_size = self.state.size(0)
else:
state_ptr = None
state_size = 0
check_error(lib.cudnnSetDropoutDescriptor(
self,
handle,
self.handle,
ctypes.c_float(dropout),
ctypes.c_void_p(self.state.data_ptr()),
ctypes.c_size_t(self.state.size(0)),
ctypes.c_void_p(state_ptr),
ctypes.c_size_t(state_size),
ctypes.c_ulonglong(seed),
))
self.dropout = dropout
def __del__(self):
check_error(lib.cudnnDestroyDropoutDescriptor(self))
class RNNDescriptor(object):
def __init__(self, hidden_size, num_layers, dropout_desc, input_mode,
bidirectional, mode, datatype):
def __init__(self, handle, hidden_size, num_layers, dropout_desc, input_mode,
bidirectional, mode, datatype):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateRNNDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
check_error(lib.cudnnSetRNNDescriptor(
self,
hidden_size,
num_layers,
dropout_desc,
input_mode,
bidirectional,
mode,
datatype
))
if version() >= 6000:
check_error(lib.cudnnSetRNNDescriptor_v6(
handle,
self,
hidden_size,
num_layers,
dropout_desc,
input_mode,
bidirectional,
mode,
CUDNN_RNN_ALGO_STANDARD,
datatype
))
else:
check_error(lib.cudnnSetRNNDescriptor(
self,
hidden_size,
num_layers,
dropout_desc,
input_mode,
bidirectional,
mode,
datatype
))
def __del__(self):
check_error(lib.cudnnDestroyRNNDescriptor(self))
class ConvolutionAlgoPerf(ctypes.Structure):
_fields_ = [
("algo", ctypes.c_int),
("status", ctypes.c_int),
("time", ctypes.c_float),
("memory", ctypes.c_size_t),
]
def check_error(status):
if status is not 0:
raise CuDNNError(status)
def get_error_string(status):
return lib.cudnnGetErrorString(status)
def get_handle():
if lib is None:
_loadlib()
if _libcudnn() is None:
raise RuntimeError('cuDNN not available')
current_device = torch.cuda.current_device()
handle = _handles.get(current_device, None)
if handle is None:
@ -289,6 +263,7 @@ def get_handle():
_handles[current_device] = handle
return handle
_typemap = {
'torch.cuda.HalfTensor': CUDNN_DATA_HALF,
'torch.cuda.FloatTensor': CUDNN_DATA_FLOAT,
@ -296,11 +271,12 @@ _typemap = {
}
_sizeofmap = {
CUDNN_DATA_HALF : 2,
CUDNN_DATA_FLOAT : 4,
CUDNN_DATA_DOUBLE : 8,
CUDNN_DATA_HALF: 2,
CUDNN_DATA_FLOAT: 4,
CUDNN_DATA_DOUBLE: 8,
}
def c_type(tensor):
if isinstance(tensor, torch.cuda.HalfTensor):
return ctypes.c_float
@ -311,127 +287,36 @@ def c_type(tensor):
else:
raise ValueError("unknown type '{}'".format(type(tensor)))
def int_array(itr):
array_type = ctypes.c_int * len(itr)
return array_type(*itr)
def descriptor(tensor, N=None):
padded_size = tensor.size() + ((1,) * (5 - tensor.dim()))
tensor = tensor.view(padded_size)
if N is not None:
descriptor = TensorDescriptorArray(N)
descriptor.set_all(tensor)
else:
descriptor = TensorDescriptor()
if tensor.dim() == 2:
tensor = tensor.view(tensor.size(0), tensor.size(1), 1, 1)
elif tensor.dim() == 3:
tensor = tensor.view(tensor.size(0), tensor.size(1), tensor.size(2), 1)
descriptor.set(tensor)
descriptor.set(tensor)
return descriptor
_autotuner_forward = {}
_autotuner_backward_data = {}
_autotuner_backward_filter = {}
def convolution_autotuner_key(idesc, weight_desc, conv_desc):
return (idesc.as_tuple(), weight_desc.as_tuple(), conv_desc.as_tuple())
def descriptor_sequence(tensor, batch_sizes):
descriptors = TensorDescriptorArray(len(batch_sizes))
_type = _typemap[tensor.type()]
_ndim = 5
dim_pad = (1,) * (5 - tensor.dim())
_size = int_array(tensor.size() + dim_pad)
_stride = int_array(tensor.stride() + dim_pad)
for i, batch_size in enumerate(batch_sizes):
_size[0] = batch_size
descriptors.set_raw(i, _type, _ndim, _size, _stride)
return descriptors
def convolution_forward_algorithm(idesc, weight_desc, conv_desc, odesc):
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
if k in _autotuner_forward:
return _autotuner_forward[k]
if benchmark:
perf_results = ConvolutionAlgoPerf()
algo_count = ctypes.c_int()
check_error(lib.cudnnFindConvolutionForwardAlgorithm(
get_handle(), idesc, weight_desc, conv_desc, odesc, 1,
ctypes.byref(algo_count), ctypes.byref(perf_results)))
_autotuner_forward[k] = perf_results.algo
return perf_results.algo
search_mode = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST
wlimit = 0
if workspace_limit is not None:
wlimit = workspace_limit
search_mode = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
fwd_alg = ctypes.c_int()
check_error(lib.cudnnGetConvolutionForwardAlgorithm(
get_handle(), idesc, weight_desc, conv_desc, odesc, search_mode,
wlimit, ctypes.byref(fwd_alg)))
return fwd_alg
def convolution_forward_workspace_size(*args):
check_error(lib.cudnnGetConvolutionForwardWorkspaceSize(*args))
def convolution_forward(*args):
check_error(lib.cudnnConvolutionForward(*args))
def convolution_backward_data(*args):
return check_error(lib.cudnnConvolutionBackwardData(*args))
def convolution_backward_data_algorithm(weight_desc, odesc, conv_desc, idesc):
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
if k in _autotuner_backward_data:
return _autotuner_backward_data[k]
if benchmark:
perf_results = ConvolutionAlgoPerf()
algo_count = ctypes.c_int()
check_error(lib.cudnnFindConvolutionBackwardDataAlgorithm(
get_handle(), weight_desc, odesc, conv_desc, idesc, 1,
ctypes.byref(algo_count), ctypes.byref(perf_results)))
_autotuner_backward_data[k] = perf_results.algo
return perf_results.algo
search_mode = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST
wlimit = 0
if workspace_limit is not None:
wlimit = workspace_limit
search_mode = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
bwd_data_alg = ctypes.c_int()
check_error(lib.cudnnGetConvolutionBackwardDataAlgorithm(
get_handle(), weight_desc, odesc, conv_desc, idesc, search_mode,
wlimit, ctypes.byref(bwd_data_alg)))
return bwd_data_alg
def convolution_backward_data_workspace_size(*args):
return check_error(lib.cudnnGetConvolutionBackwardDataWorkspaceSize(*args))
def convolution_backward_filter(*args):
return check_error(lib.cudnnConvolutionBackwardFilter(*args))
def convolution_backward_filter_algorithm(idesc, odesc, conv_desc, weight_desc):
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
if k in _autotuner_backward_filter:
return _autotuner_backward_filter[k]
if benchmark:
perf_results = ConvolutionAlgoPerf()
algo_count = ctypes.c_int()
check_error(lib.cudnnFindConvolutionBackwardFilterAlgorithm(
get_handle(), idesc, odesc, conv_desc, weight_desc, 1,
ctypes.byref(algo_count), ctypes.byref(perf_results)))
_autotuner_backward_filter[k] = perf_results.algo
return perf_results.algo
search_mode = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST
wlimit = 0
if workspace_limit is not None:
wlimit = workspace_limit
search_mode = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
bwd_filter_alg = ctypes.c_int()
check_error(lib.cudnnGetConvolutionBackwardFilterAlgorithm(
get_handle(), idesc, odesc, conv_desc, weight_desc, search_mode,
wlimit, ctypes.byref(bwd_filter_alg)))
return bwd_filter_alg
def convolution_backward_filter_workspace_size(*args):
return check_error(lib.cudnnGetConvolutionBackwardFilterWorkspaceSize(*args))
def convolution_backward_bias(*args):
check_error(lib.cudnnConvolutionBackwardBias(*args))
def add_tensor(*args):
check_error(lib.cudnnAddTensor(*args))

View File

@ -3,6 +3,7 @@ import torch.backends.cudnn as cudnn
from torch.backends.cudnn import check_error
import ctypes
def get_cudnn_mode(mode):
if mode == 'RNN_RELU':
return cudnn.CUDNN_RNN_RELU
@ -17,9 +18,10 @@ def get_cudnn_mode(mode):
class Unserializable(object):
def __init__(self, inner):
self.inner = inner
def get(self):
return self.inner
@ -32,18 +34,20 @@ class Unserializable(object):
self.inner = None
def init_dropout_descriptor(fn, handle):
return cudnn.DropoutDescriptor(
handle,
fn.dropout,
fn.dropout_seed
)
def init_rnn_descriptor(fn):
def init_rnn_descriptor(fn, handle):
dropout_desc_name = 'desc_' + str(torch.cuda.current_device())
dropout_p = fn.dropout if fn.train else 0
if (dropout_desc_name not in fn.dropout_state) or (fn.dropout_state[dropout_desc_name].get() is None):
fn.dropout_state[dropout_desc_name] = Unserializable(
cudnn.DropoutDescriptor(handle, dropout_p, fn.dropout_seed)
)
dropout_desc = fn.dropout_state[dropout_desc_name].get()
dropout_desc.set_dropout(dropout_p, fn.dropout_seed)
return cudnn.RNNDescriptor(
handle,
fn.hidden_size,
fn.num_layers,
fn.dropout_state['desc'].get(),
dropout_desc,
fn.input_mode,
fn.bidirectional,
fn.mode,
@ -58,16 +62,22 @@ def init_weight_descriptor(fn, weight):
return w_desc
def _input_size(fn):
return (fn.seq_length, fn.mini_batch, fn.input_size)
def _input_size(fn, input):
if fn.batch_sizes is not None:
return (input.size(0), fn.input_size)
else:
return (fn.seq_length, fn.mini_batch, fn.input_size)
def _hidden_size(fn):
return (fn.num_layers * fn.num_directions, fn.mini_batch, fn.hidden_size)
def _output_size(fn):
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
def _output_size(fn, input):
if fn.batch_sizes is not None:
return (input.size(0), fn.hidden_size * fn.num_directions)
else:
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
def get_num_weights(handle, rnn_desc, x_desc, datatype):
@ -80,7 +90,7 @@ def get_num_weights(handle, rnn_desc, x_desc, datatype):
datatype
))
elem_size = cudnn._sizeofmap[datatype]
assert(weight_size.value % elem_size == 0)
assert weight_size.value % elem_size == 0
return weight_size.value // elem_size
@ -139,10 +149,11 @@ def get_parameters(fn, handle, weight_buf):
ctypes.byref(nb_dims),
ctypes.c_void_p(filter_dim_a.data_ptr())))
filter_dim_a.resize_(nb_dims.value)
assert nb_dims.value <= min_dim
filter_dim_a = filter_dim_a[:nb_dims.value]
elem_size = cudnn._sizeofmap[fn.datatype]
offset_bytes = (matrix_pointer.value - weight_buf.data_ptr())
assert(offset_bytes % elem_size == 0)
assert offset_bytes % elem_size == 0
offset = offset_bytes // elem_size
# for all the RNN types provided by CUDNN, all the ih weights
@ -151,17 +162,16 @@ def get_parameters(fn, handle, weight_buf):
# Since we're storing all the weights in a single tensor anyway,
# might as well merge the CUDNN ones into a single tensor as well
if linear_id == 0 or linear_id == num_linear_layers / 2:
assert(filter_dim_a.prod() == filter_dim_a[0])
assert filter_dim_a.prod() == filter_dim_a[0]
param = fn.weight_buf.new().set_(
weight_buf.storage(), offset,
filter_dim_a[0] * num_linear_layers // 2, filter_dim_a[2])
layer_params.append(param)
else:
assert(cur_offset == offset)
assert cur_offset == offset
cur_offset = offset + filter_dim_a[0]
params.append(layer_params)
return params
@ -170,7 +180,7 @@ def get_parameters(fn, handle, weight_buf):
def _copyParams(params_from, params_to):
for layer_params_from, layer_params_to in zip(params_from, params_to):
for param_from, param_to in zip(layer_params_from, layer_params_to):
assert(param_from.type() == param_to.type())
assert param_from.type() == param_to.type()
param_to.copy_(param_from)
@ -179,6 +189,7 @@ def forward(fn, input, hx, weight, output, hy):
lib = cudnn.lib
handle = cudnn.get_handle()
fn.datatype = cudnn._typemap[input.type()]
is_input_packed = fn.batch_sizes is not None
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
@ -186,37 +197,45 @@ def forward(fn, input, hx, weight, output, hy):
else:
cx, cy = None, None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
if input.dim() != 3:
if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
raise RuntimeError(
'input must have 3 dimensions, got {}'.format(input.dim()))
if fn.input_size != input.size(2):
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
fn.input_size
if fn.input_size != input.size(-1):
raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
fn.input_size, input.size(-1)
))
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
if is_input_packed:
fn.seq_length = len(fn.batch_sizes)
fn.mini_batch = fn.batch_sizes[0]
fn.input_size = input.size(-1)
else:
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
output_size = _output_size(fn, input)
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
output.resize_(*output_size)
hy.resize_(*hidden_size).zero_()
hy.resize_(*hidden_size)
if cy is not None:
cy.resize_(*hidden_size).zero_()
cy.resize_(*hidden_size)
y = output
# init descriptors
if ('desc' not in fn.dropout_state) or (fn.dropout_state['desc'].get() is None):
fn.dropout_state['desc'] = Unserializable(
init_dropout_descriptor(fn, handle)
)
fn.rnn_desc = init_rnn_descriptor(fn)
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
fn.rnn_desc = init_rnn_descriptor(fn, handle)
if is_input_packed:
fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
else:
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
fn.hx_desc = cudnn.descriptor(hx)
fn.hy_desc = cudnn.descriptor(hx)
fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
@ -225,7 +244,7 @@ def forward(fn, input, hx, weight, output, hy):
# create the weight buffer and copy the weights into it
num_weights = get_num_weights(
handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
fn.weight_buf = input.new(num_weights)
fn.weight_buf = x.new(num_weights)
fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
w = fn.weight_buf
# this zero might not seem necessary, but it is in the case
@ -237,7 +256,7 @@ def forward(fn, input, hx, weight, output, hy):
if tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected hidden size {}, got {}'.format(
hidden_size, tuple(hx.size())))
hidden_size, tuple(hx.size())))
if cx is not None and tuple(cx.size()) != hidden_size:
raise RuntimeError('Expected cell size {}, got {}'.format(
hidden_size, tuple(cx.size())))
@ -251,7 +270,7 @@ def forward(fn, input, hx, weight, output, hy):
ctypes.byref(workspace_size)
))
fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
if fn.train:
if fn.requires_grad:
reserve_size = ctypes.c_long()
check_error(lib.cudnnGetRNNTrainingReserveSize(
handle,
@ -291,13 +310,13 @@ def forward(fn, input, hx, weight, output, hy):
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0)
))
if fn.batch_first:
output = output.transpose_(0, 1)
if fn.batch_first and not is_input_packed:
output.transpose_(0, 1)
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
with torch.cuda.device_of(input):
is_input_packed = fn.batch_sizes is not None
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
@ -307,33 +326,35 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
else:
cx, grad_cx, grad_cy = None, None, None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
grad_output = grad_output.transpose(0, 1)
output = output.transpose(0, 1)
input_size = _input_size(fn)
input_size = _input_size(fn, input)
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
output_size = _output_size(fn, input)
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
dy = grad_output.contiguous()
y = output
w = fn.weight_buf
dx = grad_input.resize_as_(input)
dhy = grad_hy.resize_(*hidden_size)
dcy = grad_cy.resize_(*hidden_size) if grad_cy is not None else None
dhy = grad_hy.contiguous().view(*hidden_size)
dcy = grad_cy.contiguous().view(*hidden_size) if grad_cy is not None else None
dhx = grad_hx.resize_(*hidden_size)
dcx = grad_cx.resize_(*hidden_size) if grad_cx is not None else None
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if not fn.train:
raise RuntimeError('backward_grad can only be called when training!')
if not fn.requires_grad:
raise RuntimeError('backward_grad can only be called when the function requires grad!')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if tuple(output.size()) != _output_size(fn):
if tuple(output.size()) != output_size:
raise RuntimeError('Expected output size {}, got {}'.format(
output_size, output.size()))
if hx is not None and tuple(hx.size()) != hidden_size:
@ -348,6 +369,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
if dcy is not None and tuple(dcy.size()) != hidden_size:
raise RuntimeError('Expected d_cell size {}, got {}'.format(
hidden_size, dcy.size()))
if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
raise RuntimeError('Gradients aren\'t CUDA tensors')
check_error(cudnn.lib.cudnnRNNBackwardData(
handle,
@ -367,7 +390,7 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
))
if fn.batch_first:
if fn.batch_first and not is_input_packed:
grad_input = grad_input.transpose_(0, 1)
@ -386,30 +409,32 @@ def _num_linear_layers(fn):
def backward_weight(fn, input, hx, output, weight, grad_weight):
with torch.cuda.device_of(input):
is_input_packed = fn.batch_sizes is not None
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
else:
cx = None
if fn.batch_first:
if fn.batch_first and not is_input_packed:
input = input.transpose(0, 1)
output = output.transpose(0, 1)
input_size = _input_size(fn)
input_size = _input_size(fn, input)
hidden_size = _hidden_size(fn)
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if not fn.requires_grad:
raise RuntimeError('backward_weight can only be called when the function requires grad!')
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected input size {}, got {}'.format(
hidden_size, hx.size()))
assert hx.is_contiguous()
assert cx is None or cx.is_contiguous()
x = input.contiguous()
y = output
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()

181
torch/csrc/DynamicTypes.cpp Normal file
View File

@ -0,0 +1,181 @@
#include "DynamicTypes.h"
#include "THP.h"
#include <vector>
#include <unordered_map>
#include <THPP/tensors/THTensor.hpp>
#include <THPP/tensors/THSTensor.hpp>
#ifdef WITH_CUDA
#include <THC/THC.h>
#include <THCS/THCS.h>
#include <THPP/tensors/THCTensor.hpp>
#include <THPP/tensors/THCSTensor.hpp>
extern THCState* state;
#endif
using namespace thpp;
namespace torch {
struct TensorType {
Type data_type;
bool is_cuda;
bool is_sparse;
friend bool operator==(const TensorType &t1, const TensorType &t2)
{
return (t1.data_type == t2.data_type &&
t1.is_cuda == t2.is_cuda &&
t1.is_sparse == t2.is_sparse);
}
friend bool operator!=(const TensorType &t1, const TensorType &t2)
{
return !(t1 == t2);
}
};
struct TensorTypeHasher
{
std::size_t operator()(const TensorType& k) const
{
size_t hash = static_cast<size_t>(k.data_type);
hash = (hash << 8) + k.is_cuda;
hash = (hash << 1) + k.is_sparse;
return hash;
}
};
static std::unordered_map<std::string, Type> type_names = {
{"Float", Type::FLOAT},
{"Double", Type::DOUBLE},
{"Half", Type::HALF},
{"Byte", Type::UCHAR},
{"Char", Type::CHAR},
{"Short", Type::SHORT},
{"Int", Type::INT},
{"Long", Type::LONG},
};
static std::unordered_map<PyTypeObject*, TensorType> pytype_to_tensortype;
static std::unordered_map<TensorType, PyTypeObject*, TensorTypeHasher> tensortype_to_pytype;
void registerPyTypeObject(PyTypeObject *pytype, const std::string& name, bool is_cuda, bool is_sparse)
{
TensorType type;
type.data_type = type_names.at(name);
type.is_cuda = is_cuda;
type.is_sparse = is_sparse;
pytype_to_tensortype[pytype] = type;
tensortype_to_pytype[type] = pytype;
}
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor)
{
TensorType type;
type.data_type = tensor.type();
type.is_cuda = tensor.isCuda();
type.is_sparse = tensor.isSparse();
return tensortype_to_pytype.at(type);
}
static std::unique_ptr<Tensor> createTensor(void *tensor, Type type, bool is_cuda, bool is_sparse)
{
if (is_cuda) {
#ifdef WITH_CUDA
if (is_sparse) {
if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THCSTensor<unsigned char>(state, (THCSByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THCSTensor<char>(state, (THCSCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THCSTensor<short>(state, (THCSShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THCSTensor<int>(state, (THCSIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THCSTensor<long>(state, (THCSLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THCSTensor<float>(state, (THCSFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THCSTensor<double>(state, (THCSDoubleTensor*)tensor));
} else if (type == Type::HALF) {
return std::unique_ptr<Tensor>(new THCSTensor<half>(state, (THCSHalfTensor*)tensor));
}
} else if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THCTensor<unsigned char>(state, (THCudaByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THCTensor<char>(state, (THCudaCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THCTensor<short>(state, (THCudaShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THCTensor<int>(state, (THCudaIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THCTensor<long>(state, (THCudaLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THCTensor<float>(state, (THCudaTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THCTensor<double>(state, (THCudaDoubleTensor*)tensor));
} else if (type == Type::HALF) {
return std::unique_ptr<Tensor>(new THCTensor<half>(state, (THCudaHalfTensor*)tensor));
}
#else
throw std::runtime_error("Compiled without CUDA support");
#endif
} else if (is_sparse) {
if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THSTensor<unsigned char>((THSByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THSTensor<char>((THSCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THSTensor<short>((THSShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THSTensor<int>((THSIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THSTensor<long>((THSLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THSTensor<float>((THSFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THSTensor<double>((THSDoubleTensor*)tensor));
}
} else if (type == Type::UCHAR) {
return std::unique_ptr<Tensor>(new THTensor<unsigned char>((THByteTensor*)tensor));
} else if (type == Type::CHAR) {
return std::unique_ptr<Tensor>(new THTensor<char>((THCharTensor*)tensor));
} else if (type == Type::SHORT) {
return std::unique_ptr<Tensor>(new THTensor<short>((THShortTensor*)tensor));
} else if (type == Type::INT) {
return std::unique_ptr<Tensor>(new THTensor<int>((THIntTensor*)tensor));
} else if (type == Type::LONG) {
return std::unique_ptr<Tensor>(new THTensor<long>((THLongTensor*)tensor));
} else if (type == Type::FLOAT) {
return std::unique_ptr<Tensor>(new THTensor<float>((THFloatTensor*)tensor));
} else if (type == Type::DOUBLE) {
return std::unique_ptr<Tensor>(new THTensor<double>((THDoubleTensor*)tensor));
}
throw std::invalid_argument("Unsupported tensor type");
}
std::unique_ptr<Tensor> createTensor(PyObject *data)
{
auto tensor_type = pytype_to_tensortype.at(Py_TYPE(data));
auto type = tensor_type.data_type;
auto tensor = ((THPVoidTensor *)data)->cdata;
auto wrapper = createTensor(tensor, type, tensor_type.is_cuda, tensor_type.is_sparse);
wrapper->retain();
return wrapper;
}
PyObject* createPyObject(const thpp::Tensor& tensor)
{
auto type = getPyTypeObject(tensor);
PyObject *obj = type->tp_alloc(type, 0);
if (obj) {
((THPVoidTensor*)obj)->cdata = (THVoidTensor *)const_cast<thpp::Tensor&>(tensor).retain().cdata();
}
return obj;
}
} // namespace

25
torch/csrc/DynamicTypes.h Normal file
View File

@ -0,0 +1,25 @@
#pragma once
// Provides conversions between Python tensor objects and thpp::Tensors.
#include <memory>
#include <Python.h>
#include <THPP/THPP.h>
namespace torch {
// Register a PyTypeObject* with the given attributes
void registerPyTypeObject(
PyTypeObject *pytype, const std::string& name,
bool is_cuda, bool is_sparse);
// Gets the PyTypeObject* corresponding to the Tensor
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor);
// Creates a Tensor from a Python tensor object
std::unique_ptr<thpp::Tensor> createTensor(PyObject *data);
// Creates Python tensor object from a Tensor
PyObject* createPyObject(const thpp::Tensor& tensor);
} // namespace torch

Some files were not shown because too many files have changed in this diff Show More