Compare commits

...

1613 Commits

Author SHA1 Message Date
b13b7010b9 check for nvidia driver's sufficiency before checking for number of CUDA devices (#1156) 2017-03-31 12:19:59 -04:00
5c79046d39 Use persistent tensor to store exp_inf (part of optimizer's state) (#1152) 2017-03-31 10:30:31 -04:00
30fd222b80 implement autograd function cross (#1138) 2017-03-31 01:45:51 -04:00
761eef1f19 Minor typo fix in backward function in torch/autograd/variable.py (#1143) 2017-03-30 11:23:28 -04:00
8aa1cefed8 Fix deadlock in autograd (#1140) 2017-03-29 16:19:40 -04:00
0d908d813b Implements Cumsum function for autograd (#1122) 2017-03-29 17:45:57 +02:00
1c391f6f93 bump version 2017-03-29 10:08:34 -04:00
be146fd721 Add btriunpack and update the btrifact test. 2017-03-29 13:42:13 +02:00
2979f4b989 add more functions to docs 2017-03-29 01:29:17 -04:00
22b3600f19 add samplers to documentation 2017-03-29 00:33:07 -04:00
215813d7ac Change dockerfile to support for cudnn v6 (#1135) 2017-03-28 20:05:04 -04:00
dc7695a47a Update links for tutorials in README (#1123) 2017-03-28 14:21:40 +02:00
032a65edff modify pip uninstall command in CONTRIBUTING.md 2017-03-28 14:20:49 +02:00
e4b4e515cd add mode to cwrap 2017-03-27 13:29:14 -07:00
4b1f5f4bd6 Merge commit 'afd576ec0e389db3e47efe44652c488b1706f168' 2017-03-27 13:26:50 -07:00
afd576ec0e Add mode kernel 2017-03-27 15:58:47 -04:00
95aa2af377 btrisolve: Make a Tensor method and update argument order
Also update docs for btrifact and btrisolve to the newest interface.
2017-03-27 15:46:49 -04:00
6774d39c96 Merge commit '5d274cd4991022d63b014cc8917e00c15441d3f4' 2017-03-27 11:54:08 -07:00
567faedc59 Merge commit '8051dec608368fed3569c7513292785083adc53c' 2017-03-27 11:53:41 -07:00
3eab8a71e2 Added docstring to add_module (#1116) 2017-03-27 11:09:24 -04:00
2fd4d088ff add Adaptive pooling methods to docs 2017-03-26 22:43:46 -04:00
5d274cd499 Update btrisolve argument order. 2017-03-26 13:07:24 -04:00
8051dec608 Update btrisolve argument order. 2017-03-26 13:06:34 -04:00
f2c1071c33 Adaptive max and average pooling (1D & 2D) (#1084) 2017-03-26 17:09:28 +02:00
bb71117ecc Cwrap arg assign (#1102) 2017-03-26 13:53:28 +02:00
d25433a099 Fix docker build commands (#1103) 2017-03-25 16:18:33 -04:00
7dd45490f8 don't use inplace backward, remove unnecessary zero for grad_input (#1079) 2017-03-25 20:04:48 +01:00
bf632544e6 Pass NULL rinfo_ to btrifact by default (#1089) 2017-03-24 19:49:40 -04:00
282402d4f3 Revert "Add back zero fill for ger" (#1093)
This reverts commit 5a761dbe65d2221e9c200b3f8ea0590b5d9b923f.
2017-03-24 19:49:31 -04:00
cce03074f5 Merge commit '3acbbb30f2bdc6ccf4ffb6f7d568e7916d4e384d' 2017-03-24 16:19:44 -07:00
f2f63773d8 Merge commit '52911f9e47f679045a238eb9dfdc5db55bf98cc9' 2017-03-24 16:19:19 -07:00
84aa41824c Merge commit 'b4fe5ad641181f30bdcc4749c949206a3ebb04b4' 2017-03-24 16:19:05 -07:00
25c8a117af Merge commit 'e8196f990db4ba368010f0d950bebf1fb13c2888' 2017-03-24 16:18:52 -07:00
ae122707b5 Don't do extra resize in linear bias 2017-03-24 23:41:15 +01:00
b4fe5ad641 Use zero instead of mul when beta == 0 in addr 2017-03-24 13:09:00 -07:00
5a761dbe65 Add back zero fill for ger
Ger does not have beta argument, so has to be zero-filled.
2017-03-24 21:03:02 +01:00
dd893391d5 Add argument to children to yield the name of the modules (#941) 2017-03-24 20:02:05 +01:00
e8196f990d Make rinfo_ argument optional in btrifact 2017-03-24 09:01:36 -07:00
269b77a1b2 Make rinfo_ optional in btrifact 2017-03-24 09:00:39 -07:00
476d85dd3f DataLoader: Fix batch data type for numpy array (#1074) 2017-03-24 11:34:24 -04:00
63f6c0d692 add Pairwise distance (#835) 2017-03-24 11:29:40 -04:00
b546fa3fcd add assertTrue to padding tests 2017-03-24 15:27:51 +01:00
1d656b6769 Ensure displayed progress in ProgressMonitor is between 0 and 100%.
Fixes #1086
2017-03-24 15:21:52 +01:00
3acbbb30f2 Fix inconsistent in-place and out-of-place for HardTanh
in-place and out-of-place updateGradOutput results are different where input=min_val or input=max_val
2017-03-23 17:27:29 -07:00
52911f9e47 Fix inconsistent in-place and out-of-place implementations
Currently in-place and out-of-place updateGradOutput will produce different results for input=max_val or input=min_val - in-place won't backprop gradient where input=max_val or input=min_val, out-of-place will backprop gradient in this case.
2017-03-23 17:22:55 -07:00
a65e0f488c Remove zero fill where not needed (#1077) 2017-03-23 19:44:00 -04:00
8dc5d2a22e export current_blas_handle 2017-03-23 23:32:45 +01:00
bb353ccc17 Add batch triangular factorization and solves, add IntegerTensor to cwrap (#903) 2017-03-23 15:06:00 -04:00
ced0054a9e Fix formula for stddevs grad in Normal function (#1076) 2017-03-23 14:32:34 -04:00
68ee5ede29 make inplace tests compare input grads 2017-03-23 18:54:00 +01:00
4df98e2927 Merge commit '3865606299b1fbcd0a94cef4a66c1bc007246da8' 2017-03-23 08:39:43 -07:00
6ccac5ce28 Merge commit 'd3334db6274d7a3cd07f20d583056e453dc8134d' 2017-03-23 08:39:30 -07:00
3865606299 adding batch triangular factorization and solves, add IntegerTensor to cwrap 2017-03-23 11:37:00 -04:00
d3334db627 adding batch triangular factorization and solves, add IntegerTensor to cwrap 2017-03-23 11:35:35 -04:00
50f5a4dd18 fix BCE loss formula visualization (#1072) 2017-03-23 11:27:21 -04:00
b60936b9ae fix NLLLoss2d documentation 2017-03-23 10:06:40 -04:00
2d750b9da5 fix typo 2017-03-23 09:40:06 -04:00
ca376d4584 implement autograd function trace 2017-03-23 10:37:52 +01:00
ef183a1d23 Merge commit '5cd313ed23a3b11ddd739bcfedaee6e310e4e438' 2017-03-22 19:25:46 -07:00
f4d8944973 fix OSX fread bug (#1068) 2017-03-22 22:06:14 -04:00
6b7aef63ac Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 20:36:52 -04:00
b3ab4b1094 Check torch.backends.cudnn.enabled, padding, and output_padding (#996)
* Check torch.backends.cudnn.enabled
* Don't allow negative padding and output_padding values
2017-03-22 19:42:11 -04:00
1e8cb82a2d Break only after the update in L-BFGS 2017-03-22 18:58:42 -04:00
dd399a8d68 Return total param norm from clip_grad_norm 2017-03-22 18:58:42 -04:00
faac0f5c25 Fix torch.cat bugs
Always use PySequence API and disallow catting along inexistent
dimensions.
2017-03-22 18:58:42 -04:00
c36f47bd1e Make random_ exclusive and make generator kwarg only in all random
functions
2017-03-22 18:58:42 -04:00
3d1888cd95 Fix size mismatch in CosineEmbeddingLoss backward 2017-03-22 18:58:42 -04:00
97a82a3018 fix formatting in upsampling docs (#1067) 2017-03-22 18:06:31 -04:00
5cd313ed23 Fix TH_TENSOR_APPLYX_D in the case where the dimension of interest is the inner dimension 2017-03-22 13:15:01 -07:00
b414494035 Merge commit '714b2b8bf657afe41cc8503998b6d919339b8075' 2017-03-22 12:49:29 -07:00
c10efc646e Merge commit 'e17d84d38edf6094175deead555abbc96321b69f' 2017-03-22 12:49:11 -07:00
348531ad8d Merge commit '0056b0883426e38ffbd646c040b6c281d12673f2' 2017-03-22 12:48:57 -07:00
714b2b8bf6 Merge pull request #453 from apaszke/lookup_renorm
Cast accumulator in LookupTable renorm to accreal
2017-03-22 11:53:41 -04:00
fe4bd5066b Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 11:45:02 -04:00
e17d84d38e Added support for multidimensional tensors in PReLU; Channel number now in second dimension 2017-03-22 11:44:28 -04:00
b9aef6bc03 Fixing default values for LR and Epsilon (#895)
It seems that the default values for LR and Epsilon (previously, 1E-2 and 1E-38 respectively) were different from the ones recommended by the authors (2E-3 and 1E-8, respectively). Other packages such as Keras (https://github.com/fchollet/keras/blob/master/keras/optimizers.py#L474) and Lasagne (https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py#L612) use the suggested values as well.
2017-03-22 11:34:39 -04:00
0056b08834 Narrow V when returning only some right singular vectors 2017-03-22 08:33:03 -07:00
bd0df61bb5 Cast accumulator in LookupTable renorm to accreal 2017-03-22 08:29:39 -07:00
d9678c2e34 Correct typo in batchnorm documentation 2017-03-22 13:55:45 +01:00
b3c0aa3b7d fix a typo in ffi doc (#1055) 2017-03-21 15:37:48 -05:00
77fbc12f23 Fix some deadlocks when torch_shm_manager is not found (#1030)
- Add additional timeouts to test_multiprocessing to reduce chances of
   hanging indefintely on failure
 - Add missing header guards
 - Fix typo
 - Check that torch_shm_manager exists in torch/__init__.py
2017-03-17 18:28:39 -04:00
7e46eb1613 Fixes for Prod and Expand functions (#1026)
Thanks to @ChangYong-Oh for the original implementation.
2017-03-17 18:24:44 -04:00
821656d2d8 add CONTRIBUTING document 2017-03-17 07:59:37 -04:00
86e40ed875 Fix a typo in docs about pinned memory buffers (#1023)
* remove misleading guide for BCELoss

* fix docs about pinned memory buffers
2017-03-17 05:08:03 -04:00
b9379cfab7 Use cuDNN and NCCL symbols from _C library (#1017)
This ensures that we use the same library at the C++ level and with
Python ctypes. It moves the searching for the correct library from
run-time to compile-time.
2017-03-16 16:10:17 -04:00
f0b75c4aa4 Merge pull request #729 from shenxiul/cuda_linspace
linspace and logspace for CUDA Tensors
2017-03-16 14:03:00 -04:00
7654b3f49e Add function to compute cross_entropy for 2D image (#802) 2017-03-16 17:34:04 +01:00
37ebbc2809 the length of any item in padded_sequence should be greater than 0 (#1013) 2017-03-16 17:32:43 +01:00
29ddbc3e37 implement linspace, logspace and range in CUDA 2017-03-15 20:50:30 -07:00
16a133ed9a Fixes for testing on FB infra (#1009)
- make each test in test_autograd have a unique name ignoring case
 - assemble all tests when test_legacy_nn is imported
 - import Python.h in PtrWrapper.h
2017-03-15 18:37:11 -04:00
c4d1318662 Fix map_location in torch.load (#1006) 2017-03-15 16:54:19 -04:00
379ae6d865 Refactor out dispatchStateless (#1007)
Some of the error messages were incorrect due to erroneous
'tensor == THPDefaultTensorClass' checks
2017-03-15 16:24:55 -04:00
24376ff9d3 Merge pull request #723 from killeent/scan-primitive
add implementation of inclusive scan via upsweep-downsweep
2017-03-15 14:37:21 -04:00
be6322e4b5 Update nn.init docstrings to correctly reference the module (#1001) 2017-03-15 11:17:59 -04:00
62063b2f62 Fix docs for pointwise ops (#845) (#985)
* add torch.nn.init docs to the source folder
2017-03-15 11:08:05 -04:00
13b1580613 add F.pad to docs 2017-03-15 00:09:14 -04:00
e50a1f19b3 Use streams in scatter to overlap copy with compute 2017-03-14 22:46:07 +01:00
e86db387ba Fix conv1d backward segfault (#999) 2017-03-14 16:15:53 -04:00
704ee3ca68 Use cudart symbols from the main program.
Our extension library links against cudart and pulls in the symbols. Use
LoadLibrary(None) to use the same symbols as the _C extension.

This fixes the PyTorch wheel when you don't have system CUDA installed.
2017-03-13 19:45:34 -04:00
9004652c7b updated the documentation to remove the unnecessary copy grads when using multiprocessing 2017-03-13 19:04:17 -04:00
aca6ce984c change lookup table sort 2017-03-13 13:55:16 -07:00
ed8773f7bd add legacy_serialized.pt to gitignore 2017-03-13 16:37:35 -04:00
48f48b6ff2 fix more flaky VolumetricMaxPooling tests 2017-03-13 14:38:27 -04:00
615b27eadf fix corner case in SetItem of Variable 2017-03-13 14:38:27 -04:00
170d790b66 fix doc of conv3d in conv.py (#989)
the second dimension should be height.
2017-03-13 11:30:13 -04:00
e216f557fd Fixes issue returning strings from a Dataloader with pin_memory=True (#908) 2017-03-13 10:11:07 +01:00
997312c233 Add WeightedRandomSampler (#980)
Samples elements from `[0,..,len(weights)-1]` with given probabilities (weights). So far there is no mean to either introduce sample weights in loss functions or while sampling from a dataset. This is an attempt to add the functionality for the latter issue.
2017-03-13 00:27:05 -04:00
d602b3a834 Allow submodules and parameters to shadow attrs on assignment 2017-03-12 13:31:32 -04:00
f531d98341 Fix memory leak in torch.from_numpy 2017-03-12 13:31:32 -04:00
6bdd5ecaf5 Remove some unnecessary AutoGPU calls 2017-03-12 13:31:32 -04:00
bfbde9d6eb Fix Embedding bug when max_norm was used 2017-03-12 13:31:32 -04:00
b9c816a796 Fix run_test.sh --coverage option. (#983) 2017-03-11 19:26:02 -05:00
2f5c215d34 Update setup.py (#981)
Adding `description` to `setup.py`
2017-03-11 12:14:07 -05:00
01650ac9de add torch.nn.init docs to the source folder (#979) 2017-03-11 10:11:30 -05:00
ce536aa355 fix example in docs for NLLLoss 2017-03-10 16:48:08 -05:00
fc0af33a18 key only block-wide bitonic sort 2017-03-10 11:50:43 -08:00
c7c4778af6 modify docs of broadcast to fix issuse #940 (#970) 2017-03-10 09:54:43 -05:00
73a65cd29f simple ordering fix to avoid gcc warning 2017-03-09 17:10:59 -08:00
b785ed0ac0 Fix Embedding and CosineEmbeddingLoss on non-float CUDA (#965) 2017-03-09 18:04:40 -05:00
b2d077d81d Update _tensor_docs.py (#966) 2017-03-09 18:04:19 -05:00
b1c2714ad5 Add momentum and centered options to RMSProp (#810)
* add momentum and centered options

Add two options :
 - Momentum (like SGD's momentum)
- Centered RMSprop, as in Graves 2013 ( https://arxiv.org/abs/1308.0850 ) : grad is normalized by running estimation of its variance

* somme PEP8

* bug in default

* bug2

* sign mistake

* alloc of momentum & centered only if needed

* add link to docstring

* some pep8 on docstring

* implement __setstate__() for backward compatibilty

* correct grammar mistake

* multiply by lr when adding delta to params

* rename momentum variables

* change __init__ params order
2017-03-09 10:04:32 +01:00
a462edd0f6 Docs(RNN|GRU|LSTM): Note dropout applies to all layers *except* the last layer (#961)
This is an important clarification to make as otherwise users are misled as to where they may need to add dropout and to clarify the situation would need to delve into the backend implementation. 
4647f753bc/torch/nn/_functions/rnn.py (L73)
2017-03-08 18:09:11 -05:00
c2425fc9a1 Fix build warning for C file 2017-03-08 21:28:57 +01:00
fbcedf2da2 Merge commit '3d95e13b332e1b31d706b59c3b67f886958ece79' 2017-03-08 09:09:46 -08:00
3d95e13b33 Check event_count before merging blocks 2017-03-08 08:49:04 -08:00
228e1a8696 Add CUDA caching allocator accessor 2017-03-08 08:29:50 -08:00
3fa8a3ff46 add implementation of inclusive scan via upsweep-downsweep 2017-03-08 07:34:14 -08:00
4647f753bc Merge commit '0f872ed02fbaf5b326f235b3f18724171b061416' 2017-03-07 14:45:01 -08:00
7ba5e7cea1 fix VolumetricMaxPooling test instability (#952) 2017-03-07 10:55:46 -05:00
9b626a8047 Fix documentation - replace 'matrix' with 'vector' (#951) 2017-03-07 10:40:18 -05:00
bd0e9a73c7 Fix some simple build error on MacOS (#949)
Issue #948

Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-03-07 09:47:49 -05:00
2b1cd919ce Update extending.rst (#933) 2017-03-06 23:23:14 -05:00
8e46a15605 add docs for set_printoptions to sphinx (#945) 2017-03-06 21:52:37 -05:00
15a9fbdedb Merge pull request #881 from colesbury/parallelize_backwards
Parallelize autograd backwards
2017-03-06 16:57:19 -05:00
6336300880 Fix bug where adding a hook could replace an existing hook.
We were keying hooks by RemovableHandle id. However, we don't hold onto
handles and ids of dead objects can be reused. This replaces id(handle)
with a global counter.
2017-03-06 12:47:53 -08:00
5073132837 Implement 'pre' and 'post' hooks at the C++ autograd level 2017-03-06 12:47:53 -08:00
65b66264d4 Improve broadcast/reduce performance by coalescing tensors 2017-03-06 12:47:53 -08:00
0f872ed02f Add THCCachingAllocator_recordStream()
This is similar to THCCachingHostAllocator_recordEvent() but on CUDA
allocations. It's useful for overlapping copies with computation. The
workflow is approximately:

  0. allocate dst tensor on copy stream
  1. copy from CPU to GPU on copy stream
  2. synchronize the main stream with the copy stream via
     cudaStreamWaitEvent
  3. THCCachingAllocator_recordStream(dst, main_stream)

The recordStream() call is necessary to prevent the dst tensor from
begin reused on the copy stream before the main stream finishes work.

Previously, you would need to insert a second cudaStreamWaitEvent before
dst is freed to force the copy stream to wait on the main stream.
2017-03-06 10:50:19 -08:00
761d6799be code syntax error in document (serialization.rst) (#937) 2017-03-06 10:06:04 -05:00
0d179aa8db Updated datasets.rst, combined all commits (#931)
Added MNIST in the docs

Updated incomplete cifar doc

Updated the datasets.rst to include all datasets
2017-03-05 17:38:28 -05:00
5b171ad7c2 remove misleading guide for BCELoss (#924) 2017-03-05 14:31:01 -05:00
ac9245aeb3 import numpy before setting dlopen flags (#928) 2017-03-05 14:30:13 -05:00
60736bdf99 fix corner case in kwargs for DataParallel (#930) 2017-03-05 14:27:52 -05:00
7d58765cee docs: Fixed example code bug in extending module doc. 2017-03-05 12:09:08 -05:00
76f7d749e4 bump version 2017-03-05 08:49:52 -08:00
0b7374eb44 add THCS to build_all flags 2017-03-05 11:32:43 -05:00
6fff764155 replace old select_compute_arch.cmake with new 2017-03-05 11:32:43 -05:00
8ced72ccb8 link THPP to THCS when CUDA available 2017-03-05 11:32:43 -05:00
b1ae7f90d5 Added functionality for data parallel table (#843) 2017-03-05 02:35:46 +01:00
8b61ee522e Merge commit 'aec182ae72d51dad0f46cdfe7ff9a41380d7da35' 2017-03-04 08:58:21 -08:00
76ca3eb191 Merge commit 'fea50a51ee2d9af15c42f785ab2232469357b557' 2017-03-04 08:58:02 -08:00
fea50a51ee reintroduce USE_AVX* for files which dont have -mavx* set 2017-03-04 08:55:43 -08:00
51e589ed73 fix critical bug in adds SSE implementation 2017-03-04 08:39:19 -08:00
2e87643761 remove fastmath for everything except simd/convolve 2017-03-04 08:16:47 -08:00
ba9a85f271 fix bug introduced in #952 2017-03-03 21:00:05 -08:00
0714d7a3ca set AVX/AVX2 flags only for specific files 2017-03-03 12:17:14 -08:00
34ce58c909 Parallelize backwards 2017-03-03 11:26:00 -08:00
c238ee3681 Fix issues with lazy grad initialization (#912) 2017-03-03 14:23:51 -05:00
f5338a1fb8 compile AVX and AVX2 intrinsic code in separate files. Cleanup use of USE_AVX and USE_AVX2 macros in favor of __AVX__ and __AVX2__ 2017-03-03 10:30:18 -08:00
d96ad41191 cleanup TH CMakeLists and THGeneral.h of unused flags 2017-03-03 09:48:26 -08:00
f17cfe4293 sparse tensor operations (#735) 2017-03-03 18:37:03 +01:00
aec182ae72 Support half precision in baddbmm 2017-03-03 16:15:39 +01:00
c93c884ee2 Add negative dimension to transpose and tests (#792) 2017-03-03 09:31:22 -05:00
c42a2d4d24 Fix dimension check for cat (#959)
* Use TH_INDEX_BASE when verifying dimension for cat

* Adding tests for cat when no dimension is specified.

- Also renamed ldimension to cat_dimension to be more specific.
2017-03-03 09:05:06 -05:00
f89252c336 Merge pull request #719 from twitter-forks/cat-fix
Fixes to cat
2017-03-03 09:04:06 -05:00
490c15fae9 Fix slicing with step (#905) 2017-03-03 09:00:14 -05:00
f2d72ba10f Revert "make handles to be thread-local"
This reverts commit 0720ba53b344809ce3d0bdfb1ea561afa5fe0646.
2017-03-02 17:48:24 -08:00
2108b42b92 Fix bug in cat when dimension is not specified.
- Code was using dimension specified which was negative
- Changed the cat_dimension variable to be more explicit
- Fixed code to use the cat_dimension variable
2017-03-02 16:14:09 -08:00
bae8df62d3 Add missing THCudaCheck around cudaMemcpy 2017-03-02 16:13:39 -08:00
98775b6bb4 Merge pull request #718 from killeent/templatize-scan
genericize PrefixSum --> PrefixScan via binary operator template parameter
2017-03-02 17:50:56 -05:00
b7cc2a501f genericize PrefixSum --> prefixScan 2017-03-02 14:31:27 -08:00
0720ba53b3 make handles to be thread-local 2017-03-02 11:10:49 -08:00
ff5fa11129 make mkl link to threaded version with GCC (#958) 2017-03-02 13:37:25 -05:00
5e7f5db332 add subset samplers (#888) 2017-03-02 09:26:10 -05:00
b5f7592140 boolean mode in module.train 2017-03-02 09:18:05 -05:00
f366e5fc81 Support int16 numpy conversions
issue #891
2017-03-02 09:15:57 -05:00
48f087f6ce C99 cleanup broke MSVC (#952)
* __pragma for MSVC.
2017-03-02 08:57:28 -05:00
7ad948ffa9 fix tests to not sys.exit(), also fix fatal error on THC initialization 2017-03-01 17:37:04 -05:00
3277d83648 Add Nesterov Momentum (#887) 2017-03-01 20:49:59 +01:00
1487278fdf Allow backprop through cuDNN RNN in eval mode
Handling of dropout descriptors has been improved too.
2017-03-01 19:42:39 +01:00
977630bc15 Handle duplicate backward roots in autograd 2017-03-01 19:42:39 +01:00
12efd53dba ConstantPad2d and F.pad (#856) 2017-03-01 19:39:44 +01:00
37e05485d9 added initialization schemes in torch.nn.init (#833) 2017-03-01 19:34:13 +01:00
c76770f40e Merge commit 'dfca8dfdc5988813ed5673589ffa4fdd1c4f3d2d' 2017-03-01 09:29:51 -08:00
da725830c2 Add support for variable length sequences in RNNs (#873) 2017-03-01 17:36:32 +01:00
fc6fcf23f7 Lock the cudaFree mutex. (#880)
Prevents NCCL calls from overlapping with cudaFree() which can lead to
deadlocks.
2017-03-01 11:29:25 -05:00
b190f1b5bc Add another pinned memory test.
Checks that pinned memory freed on a different GPU from which it was
allocated isn't re-used too soon.
2017-03-01 12:22:31 +01:00
dfca8dfdc5 ensure valid index in multinomial 2017-02-28 14:48:48 -08:00
b46d5e0b04 Fix NN bindings 2017-02-28 14:35:38 -08:00
f19a11a306 Merge commit '8e8022b7351401911e10b94aeb5ae35d32907705' 2017-02-28 14:35:20 -08:00
cfcf69703f Merge commit '80429ad9f7c4775f7f88344a2cf037e499f060b8' 2017-02-28 14:35:00 -08:00
e22b8e0d17 Merge commit '3cc89afde68a831434f3abe9e3af2ac0b134215e' 2017-02-28 14:34:44 -08:00
fbfba6bdca Merge commit '6ff77503645da59eeca5be473a1902e523c4adb3' 2017-02-28 14:34:29 -08:00
3cc89afde6 Merge pull request #713 from killeent/multinomial-indexing-fix
fix indexing bug in sampleMultinomialOnce
2017-02-28 17:13:44 -05:00
1e4aee057c Merge pull request #712 from killeent/multinomial-fixes
Fix sampleMultinomialOnce to better handle large distribution values
2017-02-28 17:12:48 -05:00
8dfcf7e35a Merge pull request #709 from colesbury/pinned_memory
Fix bug where pinned memory event could be recorded on incorrect device
2017-02-28 16:56:21 -05:00
76de151ddd Fix bug where pinned memory event could be recorded on incorrect device 2017-02-28 13:48:56 -08:00
2676cc46c2 fix indexing bug in sampleMultinomialOnce 2017-02-28 13:40:15 -08:00
1bf7bc9768 refactor sampleMultinomialOnce to use <real, accreal>, assertion for sum overflow 2017-02-28 12:46:12 -08:00
3c41c9fe46 Add AutoGPU RAII that doesn't depend on Python API (#875)
Separates out non-Python part of AutoGPU. This also compiles without
CUDA which is useful for generic tensor code.

Also fixes a bug where THCPAutoGPU may not always switch the device:

  THCPAutoGPU guard(-1);
  guard.setDevice(0);
  guard.setDevice(1);
  guard.setDevice(0);  // would not switch batch to 0
2017-02-28 14:39:20 -05:00
6ff7750364 add TH_TENSOR_APPLY variants for optimized redux (+refactor) 2017-02-28 10:30:31 -08:00
4d25c3d048 address comments and add tests 2017-02-28 10:23:36 -08:00
267b7ade50 Speed up reductions on non-contiguous dimensions 2017-02-28 10:23:36 -08:00
80429ad9f7 THVector_(add) -> THVector_(adds) 2017-02-28 12:20:44 -05:00
5ca6516ecb THVector_(add),(mul),(div) -> (adds),(muls),(divs) 2017-02-28 12:10:47 -05:00
67f94557ff Expose torch.HalfTensor 2017-02-27 19:35:47 -05:00
61bd5a0643 [Lint] Address F811 2017-02-27 19:33:00 -05:00
748d011c8b [Lint] Address F812 2017-02-27 19:33:00 -05:00
5d5cfe2e57 [Lint] Address E731 2017-02-27 19:33:00 -05:00
7cbe255296 [Lint] Use flake8 instead of pep8 2017-02-27 19:33:00 -05:00
4ef303698c Merge pull request #711 from gchanan/getDeviceAllocator
Add getter for cuda device allocator.
2017-02-27 19:29:39 -05:00
83e8b3f6c3 Add getter for cuda device allocator. 2017-02-27 15:44:44 -08:00
502ebed796 Fix one more reference cycle and ensure correct flag propagation (#868) 2017-02-27 18:38:29 -05:00
68ff58d771 Expose a mutex that is held around cudaFree() calls.
NCCL can deadlock if cudaFree() is called while it's launching kernels.
This exposes a mutex that can be held to prevent cudaFree() calls in the
caching allocator.
2017-02-27 15:08:30 -08:00
969c1602e6 Add Tensor::copy() to THPP
For now, this only supports copying from the same type. We can add
polymorphic copying in the future.
2017-02-27 21:33:40 +01:00
5e1d6a3691 Update functional.py (#862)
Fixed documentation error in conv3d
2017-02-27 10:42:02 -05:00
533cfc0381 Minor fix of docs of ModuleList and ParameterList (#861) 2017-02-27 10:09:54 +01:00
2b23712dc3 Improve autograd memory usage (#859) 2017-02-26 22:37:26 -05:00
88275da5e8 CUDA documentation tweaks (#858) 2017-02-26 20:37:43 +01:00
bd7a5ad6f0 Make Optimizer.load_state_dict use __setstate__ 2017-02-26 20:02:42 +01:00
1f6f82dbcf Fall back to indexing compatible with numpy 2017-02-26 20:02:42 +01:00
1f8939937a Allow using expand to broadcast tensors 2017-02-26 20:02:42 +01:00
b3d41a5f96 Add docs for ModuleList and ParameterList 2017-02-26 20:02:42 +01:00
fec2d493a9 Reshape grad_output in basic ops 2017-02-26 20:02:42 +01:00
86ee75f63f Fix for Long and Byte tensor indexing of Variables 2017-02-26 20:02:42 +01:00
31941918cf Prevent creation of reference cycles with leaf Variables that don't require grad
Also, raise an error immediately, if a leaf that requiers_grad is
modified in-place. Some comments were updated too.
2017-02-26 20:02:42 +01:00
19a65d2bea Expose stateless methods for torch.cuda.HalfTensor 2017-02-26 20:02:42 +01:00
819d4b2b83 Add finite differences gradcheck (#851) 2017-02-26 08:35:24 -05:00
b87c113cf4 CUDA documentation enhancement and docs versioning (#848)
* Add more detail to CUDA documentation

Also adds better cross-linking to the pages that discuss relevant topics.

* Adds recommendation to torch.save docs

* Make the version numbers for the docs dynamic

Might need tweaks for beta, 1.0, etc.
2017-02-26 08:33:26 -05:00
b25182971f readme change for getting clarity on binaries 2017-02-26 07:52:13 -05:00
1ee2c47e37 Correcting the description of LSTM attributes (#854) 2017-02-26 13:30:55 +01:00
2dc563f1f1 Fix indexing when passing only an Ellipsis 2017-02-25 23:34:09 +01:00
15ba71a275 Rebase fixes 2017-02-25 17:14:52 +01:00
e5b3fc49d6 Implementation of the 3rd set of tensor functions 2017-02-25 17:14:52 +01:00
ae1766951d Link TH and THPP to THD (#57)
* Fix THD library build

* THPP dependency added

* Minor cleanup; Fix build on OSX
2017-02-25 17:14:52 +01:00
02d08dafd9 Add support for IPv6 in Data Channel TCP (#53) 2017-02-25 17:14:52 +01:00
13a5090695 Added a size change in MaxPool1d module and improved tests (#771) (#832)
Backend is SpatialDilatedMaxPooling, so change 3D input (N*C*L)
to 4D size (N*C*1*L). Then output indices will range from 0 to L.
This range will not cause UnMaxPool1D error.

Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-25 08:53:30 -05:00
8e32e4c04c make wrap_generic_function importable 2017-02-24 14:27:54 -08:00
cf991310c3 c++ virtual function fix 2017-02-24 13:22:44 -08:00
938706099e adding environment flags to disable SIMD codepaths 2017-02-24 07:35:11 -05:00
3330287dc7 Update dataloader.py (#837) 2017-02-23 14:38:41 -05:00
38c8520adf adding unsqueeze to docs 2017-02-23 12:13:25 -05:00
492e1746af Fix THFree in THTensorApply 2017-02-23 06:01:13 -05:00
91a8109cfd Use C99 for openmp cleanup 2017-02-23 06:01:13 -05:00
161490d34a Add memcpy copy 2017-02-23 06:01:13 -05:00
9c302852eb comments fix 2017-02-23 06:01:13 -05:00
8654fcfd60 THVectorDefault style fix 2017-02-23 06:01:13 -05:00
b3d527d9a0 Tab style fix 2017-02-23 06:01:13 -05:00
4d495218c9 THTensorApply3 contiguous optimizations 2017-02-23 06:01:13 -05:00
13a041284c THTensorApply2 copy optimization 2017-02-23 06:01:13 -05:00
c60c1a003d TH_TENSOR_APPLY2 contiguous optimization 2017-02-23 06:01:13 -05:00
97add1a5ea comment fix 2017-02-23 06:01:13 -05:00
ca02930e47 Fill bug fix 2017-02-23 06:01:13 -05:00
20d5e95077 THTensorApply3 compress counter 2017-02-23 06:01:13 -05:00
eb4a7dc11d THTensorApply change dims to sizes 2017-02-23 06:01:13 -05:00
f722498b72 THTensorApply2 counter compress 2017-02-23 06:01:13 -05:00
aadfb6fe83 THTensorApply reduce memory overhead 2017-02-23 06:01:13 -05:00
6c273594c9 THTensorApply Counter compress 2017-02-23 06:01:13 -05:00
e475c82fa1 Add isTransposed judge and enable multithread of fill functions 2017-02-23 06:01:09 -05:00
0c2e6665df Add AVX copy 2017-02-23 05:50:34 -05:00
6295e6e94b Rebase master 2017-02-23 05:50:34 -05:00
670a4aa708 Fix AVX2 bugs 2017-02-23 05:50:34 -05:00
1bdc2e64ed Add fma cadd 2017-02-23 05:50:34 -05:00
c587be1e50 Add THVector Fill 2017-02-23 05:50:34 -05:00
bd481596f5 optimize THVector add mul div 2017-02-23 05:50:34 -05:00
a504d56b43 Fix THVector cmul AVX bug 2017-02-23 05:50:30 -05:00
91c4dfccea Use THVector cadd AVX 2017-02-23 05:46:44 -05:00
27f618c44d Add THVector Fill AVX 2017-02-23 05:46:44 -05:00
a14482a1df Add THVector cadd AVX 2017-02-23 05:46:40 -05:00
aa50c5734b Add THVector AVX cmul 2017-02-23 05:46:07 -05:00
293001a4fe Add THVector SSE div cdiv 2017-02-23 05:46:07 -05:00
638cfdf150 Add SSE add 2017-02-23 05:46:07 -05:00
5f80a14525 Separate SSE and AVX 2017-02-23 05:46:07 -05:00
1342fd3975 Remove THTensorMathSIMD THTensorMathDispatch 2017-02-23 05:46:07 -05:00
8d4af38489 Add THVector div cdiv 2017-02-23 05:46:07 -05:00
575a064e66 Remove THVector diff 2017-02-23 05:46:07 -05:00
3ab21a3c4f Merge THVector mul AVX 2017-02-23 05:46:07 -05:00
2f592e6c7d Remove THVector scale 2017-02-23 05:46:07 -05:00
5661ffb766 Merge THVector mul 2017-02-23 05:46:03 -05:00
9b74503daa Merge THVector cmul 2017-02-23 05:40:33 -05:00
24848f1cd8 Change THVector mul to cmul 2017-02-23 05:40:33 -05:00
a31a07ede9 Merge THVector add 2017-02-23 05:40:33 -05:00
c8c4c9b23d Change THVector add to cadd and fix NEON 2017-02-23 05:40:33 -05:00
e1ed9303f0 Add multi-thread add 2017-02-23 05:40:33 -05:00
a43aab13c2 Fix THTensorMath.c style 2017-02-23 05:40:33 -05:00
c698b4a45e Add Dispaches for div and mul 2017-02-23 05:40:29 -05:00
c6a0ffab50 Add AVX single float and double float add 2017-02-23 05:40:24 -05:00
8ba7cc30d1 Add THTensorMathSIMD.c 2017-02-23 05:32:34 -05:00
61bf08ca24 Fix compilation for simd tensor add 2017-02-23 05:32:28 -05:00
6ada3c0c16 Fast floating point add kernel in intrinsics (11x speedup over default for 10k elements) 2017-02-23 05:11:44 -05:00
60061fbe79 Fixed up CPU dispatch and tested. Can begin implementing kernels 2017-02-23 05:11:44 -05:00
46e7042add SIMD helper header, modified add in THTensorMath to check dispatch 2017-02-23 05:11:44 -05:00
d0c182773b First commit for dynamic CPU dispatch: general framework in place (need to create dispatch tables and stubs for all functions and make impls have hidden linkage) 2017-02-23 05:11:44 -05:00
b6f60585b5 fix AVX2 detection bugs 2017-02-23 05:00:55 -05:00
4b0e3ee219 Merge pull request #699 from twitter-forks/bitops
Bitwise operations
2017-02-23 04:15:35 -05:00
838842d4b2 fix documentation error. [issue #790](https://github.com/pytorch/pytorch/issues/790) (#831) 2017-02-23 08:59:29 +01:00
e71cf20192 improved serialization (no tar copy) (#713) 2017-02-22 22:24:20 +01:00
adb4cb2b5b contiguous view backward (#816) 2017-02-21 19:09:36 -05:00
6073f9b46c update table in README.md
it removes the empty top row
2017-02-21 12:58:04 -05:00
8e8022b735 Merge pull request #418 from ruotianluo/adaptiveAverage
Add SpatialAdaptiveAveragePooling.
2017-02-21 09:15:12 -05:00
da82d2dd70 Merge pull request #434 from bottler/master
VolumetricFractionalMaxPooling like spatial
2017-02-21 09:13:59 -05:00
82176473a5 Merge pull request #442 from twitter-forks/half-fixes
Convert real to accreal in libTHCUNN
2017-02-21 09:12:56 -05:00
2d269a9a72 Merge pull request #1137 from twitter-forks/half-fixes
Using accreal instead of real in the API
2017-02-21 09:12:32 -05:00
240372a991 Fixed topk documentation for largest=True 2017-02-21 04:38:24 -05:00
5b10411c8c Fixed some mistakes in examples
Fixed mistakes in LSTMCell and GRUCell examples.
2017-02-21 04:17:28 -05:00
4c474a9939 Improve prodall CUDA test 2017-02-20 23:28:31 -08:00
7ea6ae57c8 Support numpy arrays in default_collate 2017-02-20 23:28:31 -08:00
42633f8986 Fix misspelling and add support for weights in NLLLoss2d 2017-02-20 23:28:31 -08:00
84248690a9 Add support for indexing with None and slices with positive steps 2017-02-20 23:28:31 -08:00
53409ca0fb Fix a warning in THPP 2017-02-20 23:28:31 -08:00
c2c1710047 Add clip_grad_norm 2017-02-20 23:28:31 -08:00
876202503f Support multiple inputs in data parallel 2017-02-20 23:28:31 -08:00
946a7d9bc3 Make input contiguous only once in backward of cuDNN RNN 2017-02-20 23:28:31 -08:00
608bcd3b15 Return correct number of gradients from cuDNN RNN 2017-02-20 23:28:31 -08:00
632b02a477 Add checks for reward type and size in StochasticFunction 2017-02-20 23:28:31 -08:00
0db9c63300 Use library_dirs in setup.py 2017-02-20 23:28:31 -08:00
873ed4e6b6 Add better error message for conversion of CUDA tensors to numpy 2017-02-20 23:28:31 -08:00
01bd43037d add docs to torch/cuda/random 2017-02-20 20:43:47 -05:00
68c9e3f232 Fixed typo in GRUCell example 2017-02-21 01:37:04 +01:00
a25c8555eb Fixed paper references 2017-02-21 00:27:18 +01:00
dfd1dff383 Merge commit '4ca26fbc1b7be4e369f84e95df16431bb2f1dcb7' 2017-02-20 08:05:19 -08:00
8f391d4d51 Merge commit 'ee43cd7adca3b24a2071ce6c55dcd3a95a2b6ff6' 2017-02-20 07:55:46 -08:00
2a6b7685ae Merge commit 'f6c1bbfa483ad19c500dc94838baaa69f02d240b' 2017-02-20 07:55:19 -08:00
eb9573107d Merge commit '34b7fed802db1fda6322a70b648dcc4947858719' 2017-02-20 07:54:51 -08:00
ee43cd7adc Do SpatialClassNLLCriterion sizeAverage in a separate kernel 2017-02-20 06:54:23 -08:00
4ca26fbc1b Remove averaging from prodall 2017-02-20 11:37:53 +01:00
c165226325 Print a readable error message when arguments are on different GPUs 2017-02-20 11:35:50 +01:00
49295ebe54 Add sequential to documentation 2017-02-18 08:42:43 +05:30
455038e470 Use a more stable formula for spatial LogSoftMax 2017-02-17 13:05:45 -08:00
ca7f02ea0c Add shape checks for SpatialClassNLLCriterion 2017-02-17 13:01:56 -08:00
04aba1caec Fix cuDNN dropout desc for multi-gpu (#772) 2017-02-17 19:16:12 +01:00
f6c1bbfa48 Merge pull request #1105 from ruotianluo/adaptiveAvg
Add SpatialAdaptiveAveragePooling
2017-02-17 10:52:33 -05:00
4e2c8c6db5 Merge pull request #1123 from bottler/master
VolumetricFractionalMaxPooling like Spatial...
2017-02-17 10:42:21 -05:00
c26b9c0a5e Update rnn.py
Based on the https://github.com/pytorch/pytorch/blob/master/torch/backends/cudnn/rnn.py#L302 line, the output is returned in a (0,1) transposed version, if the batch_first argument is set to true.
2017-02-17 14:37:14 +01:00
aaf41c61a6 Fix Engine::compute_dependencies 2017-02-17 18:28:51 +05:30
dd844f741b Fix previous_functions when it contains Variables 2017-02-17 11:03:46 +05:30
7117a9012e Fix flaky non-contig test 2017-02-17 10:40:08 +05:30
1bdc28161a Add torch.__version__ 2017-02-17 10:40:08 +05:30
5e150caf38 Fix a bug in Engine::compute_dependencies 2017-02-17 10:40:08 +05:30
c0c62d099a Make detach() actually remove the creator 2017-02-17 10:40:08 +05:30
b9ece39685 Make torch.Size methods return torch.Size, not tuple 2017-02-17 10:40:08 +05:30
15ef008877 Using accreal instead of real in the API
- This reverts commit 7a07afe545b4deae5919d9dc268bfac3d37398c7.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:34:11 -08:00
b14d6318f8 Convert real to accreal in libTHCUNN
- This reverts commit 0d85922d116879448485ef88ae21e83a9255a0b0.
- Includes fixes for TemporalRowConvolution
2017-02-16 17:33:03 -08:00
7c44506441 allow DataParallel to have tuple inputs on a single GPU 2017-02-16 19:07:17 +01:00
937ba581d7 Improve nn.legacy compatibility with Torch7 (#738) 2017-02-16 21:17:12 +05:30
2ae54f1194 setup.cfg -> tox.ini (#761) 2017-02-16 21:13:13 +05:30
a217fefee1 Update rnn.py
Fixed a problem with outputting the RuntimeError if arguments are incorrect in cudnn/rnn.py
2017-02-15 21:49:42 +01:00
34b7fed802 Fix gcc 4.4.7 build. 2017-02-15 09:06:25 -08:00
5221745c21 add test for bias=False for 3d convolution 2017-02-15 04:26:44 -08:00
000ca44b16 Merge commit '797544c47a4e9bdff02137a127f883a6df9b3dfe' 2017-02-15 04:24:14 -08:00
8f3d44033b Merge commit '0426f2f3ec2b932cb83d64101081244c2a1451b1' 2017-02-15 04:23:50 -08:00
7cc14c595a Merge commit '07f5b21ef1bd29d1451c616062dcbfc3f8fd7c6a' 2017-02-15 04:23:18 -08:00
797544c47a implementation of bias=False for VolConv.cu 2017-02-15 04:18:17 -08:00
0426f2f3ec implementation of bias=False for VolConv.c
Used .c file changes from 7318e2de13 as a starting point. All changes to .c files (except for whitespace details) are present here.
However, the required .h files were not present in that PR.
2017-02-15 04:16:09 -08:00
336eeee895 kernel_size as the default stride for avg_pool1d (#744)
Following the documentation, let stride to be kernel_size if stride is not provided.
2017-02-15 13:12:18 +05:30
593f867e3e Fixed a simple compiling erroin mac OS #745. (#746)
Signed-off-by: Zhou Chang <achang.zhou@gmail.com>
2017-02-15 12:19:03 +05:30
385913be1c Fix class torch.nn.ConvTransposeNd documentation (#739)
There is no `dilation`
`output_padding` doc was missing
2017-02-15 10:37:20 +05:30
6aaa14f5fe Fix LSTMCell Doc Typo (#743) 2017-02-15 08:29:17 +05:30
07f5b21ef1 Merge pull request #702 from gchanan/conservativeAllocator
Improve THCCachingHostAllocator performance by making it reclaim less aggressively
2017-02-15 08:26:48 +05:30
e454870396 Free set of stored streams and handle NULL streams. 2017-02-14 15:41:47 -08:00
2822013437 Fix flaky tests 2017-02-14 21:28:50 +01:00
72c1982734 Add some more asserts to cuDNN RNN 2017-02-14 21:28:50 +01:00
0de2ea305a Support retain_variables in cuDNN RNN 2017-02-14 21:28:50 +01:00
d899385a3d Raise error when too small input is given to conv 2017-02-14 21:28:50 +01:00
c6d6cbe8a6 Check that all tensors are on the same GPU in cuDNN bindings 2017-02-14 21:28:50 +01:00
85e82e85d8 Fix bug in zero_grad, when some parameters didn't require grad 2017-02-14 21:28:50 +01:00
a1534cc37d Fix auto-gpu in cat 2017-02-14 21:28:50 +01:00
8c8dc791ef Load half and double THCUNN backends 2017-02-14 21:28:50 +01:00
63edca44f2 Add tests for non-contiguous inputs and gradients 2017-02-14 21:28:50 +01:00
8d90ab2d9b compile with cudart (#737) 2017-02-14 06:40:35 +05:30
bd5303010d Refactor autograd package to separate Python dependencies. (#662)
The core autograd Variable, Function, and Engine no longer depend on the
Python API. This let's us implement functions in C++. In the future, we
can also multithread engine and release the GIL for most of the
non-Python backwards.
2017-02-13 16:00:16 -08:00
16d2c3d7b3 make networks converted with loadcaffe loadable 2017-02-13 23:53:46 +01:00
407a92dc26 std::min() requires same type (#732)
* std::min() requires same type

* cast buffer instead

* declare buffer_size as int64_t
2017-02-13 18:06:05 +01:00
0a893abc7b fix serialization bug for large files 2017-02-12 19:13:02 +01:00
34fa5e0dc7 Update docstrings for testing object type
Add docstring for `is_storage()` and `is_tensor()`
2017-02-12 09:21:01 +05:30
712686ce91 Add cat, contiguous, squeeze, and unsqueeze to THPP
Use unsqueeze and view from TH/THC
2017-02-11 17:49:31 +01:00
518864a7e0 Fix bug in legacy NN updateGradParameters (#714) 2017-02-11 11:04:18 +05:30
750fb5cc73 Fixes to support short and char tensors for bitwise operations 2017-02-09 18:52:59 -08:00
0f4749907a Adding bitwise operations
- lshift, rshift, bitand, bitor, bitxor
2017-02-09 18:11:58 -08:00
bd2dc63ef6 Adding bitand, bitor and bitxor 2017-02-09 17:06:04 -08:00
19a8795450 Changes to shift operations
- renaming lsh -> lshift, rsh -> rshift
- adding componentwise functions
2017-02-09 15:41:07 -08:00
d9dccfdd71 Fix for non-contiguous grad_output in cuDNN conv 2017-02-10 00:25:59 +01:00
7547a06c4f Avoiding duplicated unsigned as it causes error on gcc. 2017-02-09 13:29:05 -08:00
8929b75795 Added shift operations. 2017-02-09 13:28:36 -08:00
4d37ef878c Remove view on data and target tensors of dim 1 in TensorDataset (#609) 2017-02-09 22:06:39 +01:00
126e77d5c6 Merge commit 'e9b05c71b4acf210fad719f4da8bb58a425dd00b' 2017-02-09 12:31:58 -08:00
53eec78bea Merge commit 'ac9312e9f8002227b267a82e224a5a99c7a7e734' 2017-02-09 12:31:40 -08:00
a4edaec81a Merge commit 'aeb7a72620be47c0e6a8928a9cb6df49c06902a0' 2017-02-09 12:31:16 -08:00
92481b59d3 Merge commit '73d232ee454ca25de5552d347a2b06820f30d193' 2017-02-09 12:30:39 -08:00
6c77fa9121 Changes in RNNBase and Embedding for compatibility with DataParallel (#660) 2017-02-09 22:36:26 +05:30
aeb7a72620 Merge pull request #693 from colesbury/view
Add code for 'view' to THC
2017-02-09 12:09:28 +05:30
73d232ee45 Merge pull request #926 from colesbury/view
Add code for 'view' to TH
2017-02-09 12:08:57 +05:30
c0c65bf915 Merge pull request #696 from colesbury/unsqueeze
Add unsqueeze to THC
2017-02-09 11:08:20 +05:30
f6cee952af Merge pull request #929 from colesbury/unsqueeze
Add unsqueeze1d to TH
2017-02-09 11:07:47 +05:30
e74184f679 Make THCCachingHostAllocator less aggressive.
In cases where copyAsync is a large percentage of the work,
processing events in recordEvent can cause a large bottleneck.

Here, we relax the constraint that we reclaim blocks as fast as possible
(i.e. in copyAync); instead, we only check that a block can be re-allocated
in malloc and free.
2017-02-08 14:44:24 -08:00
3884d36176 Add unsqueeze to THC 2017-02-08 13:49:32 -08:00
e7c6886a00 Add unsqueeze1d to TH
Unsqueeze inserts a singleton dimension. Unlike view, it doesn't require
the tensor to be contiguous.
2017-02-08 09:52:50 -08:00
ed8e92f63d Expose rawSet and rawResize as resizeNd and setStorageNd 2017-02-08 09:00:22 -08:00
fb97df5d65 Expose rawSet and rawResize as resizeNd and setStorageNd
These methods are useful from C because they don't require constructing
THLongStorages to wrap the sizes and strides, which can lead to leaked
memory in case of an error. Instead the sizes and strides can be
represented on the stack using standard C long arrays.
2017-02-08 08:56:04 -08:00
e9b05c71b4 Use THCTensor rather than THCudaTensor in THCUNN.h definition of
GatedLinearUnit.
2017-02-08 07:54:10 -08:00
7926324385 Corrected parameter typo in Adam docstring (#697) 2017-02-07 19:00:10 +01:00
1527b37c26 Fixed typo and rendering of some equations (#693)
* Fixed typo and rendering of some equations

* Few more fixes to MSELoss docs

* Cleaning up whitespace to make pep8 happy
2017-02-07 18:59:27 +01:00
de4659659b The RNNCell's example can not run correctly 2017-02-07 18:58:19 +01:00
a96a8c8336 Static build support + Query CUDA driver, runtime versions (#695) 2017-02-07 08:34:20 +05:30
691aa19b88 Add code for 'view' to THC 2017-02-06 14:04:04 -08:00
6b07dc9e22 Add code for 'view' to TH 2017-02-06 14:00:48 -08:00
8aa259b52b review comments from gchanan 2017-02-06 11:08:23 +00:00
ac9312e9f8 Bugfix/rowconv (#1126) 2017-02-04 20:37:45 +05:30
91a17b702b half<->float conversion cleanup (#901)
* half<->float conversion cleanup
2017-02-04 07:30:13 +05:30
c54597e0b2 std::move fixes 2017-02-03 21:31:03 +01:00
a9785bba44 cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-02-02 21:38:25 -08:00
833b8cbc7a Remove unused code from module 2017-02-02 17:20:11 +01:00
75aeb16e05 Merge commit '72089c9c36c6b880c695baf732cd04329d72c098' 2017-02-01 22:00:42 -08:00
fc354a0d6e Revert "cuda implementation of Gated Linear Unit, fixed issues with genericization" 2017-02-02 10:50:47 +05:30
262611fcd3 Merge pull request #430 from huihuifan/newCudaGLU
cuda implementation of Gated Linear Unit, fixed issues with genericization
2017-02-02 08:16:35 +05:30
b8a34f3033 Small fixups:
1) Add return after THError for completeness.
2) Fix brace formatting
2017-02-01 15:46:19 -08:00
10bb6bb9b8 Fix function names in error messages 2017-02-01 15:21:57 -08:00
3c9ef69c37 Fix THCTensor::isSparse 2017-02-01 14:51:06 -08:00
dee987d6ee use pseudo-fp16 2017-02-01 23:48:09 +01:00
138f254ec1 Support sparse tensors in THPP (#667) 2017-02-01 17:34:50 -05:00
c7c8aaa7f0 Add ModuleList and ParameterList to nn 2017-02-01 23:26:31 +01:00
d0db624e02 Add W503 to PEP8 ignore list (#646) 2017-02-01 15:57:09 -05:00
e3e7b76310 Rename all normal and log_normal args to std 2017-02-01 21:48:11 +01:00
dad02bceb9 Remove duplicated line in cwrap 2017-02-01 21:48:11 +01:00
b195285879 Improve CUDA detection in THPP 2017-02-01 21:48:11 +01:00
8f3da5b51d set_index -> _set_index 2017-02-01 21:48:11 +01:00
825e919eb8 Add torch.unbind 2017-02-01 21:48:11 +01:00
acb0ce8885 Add LongTensor indexing support 2017-02-01 21:48:11 +01:00
72089c9c36 Update THHalf.c 2017-02-01 11:53:29 -08:00
cf2f158fec Remove erroneous proprietary license header
This change was approved by NVIDIA Legal, and I am authorized to make the change on behalf of the company.
2017-02-01 11:43:44 -08:00
41ddc2a786 VolumetricFractionalMaxPooling like Spatial... 2017-02-01 12:01:09 +00:00
e4886f6589 VolumetricFractionalMaxPooling like spatial 2017-02-01 11:52:49 +00:00
6470b5bd21 Add test for Embedding with sparse=True (#663) 2017-02-01 09:54:42 +05:30
tvn
44196955e2 ByteTensor should be unsigned (#664)
ByteTensor should be unsigned
2017-01-31 21:43:39 -05:00
f08ec1394d Fix bug with inplace TH(CU)NN
Also, remove unnecessary zero_() calls
2017-01-31 21:00:49 +01:00
f8fb25e0a2 Add generic bindings to THNN and THCUNN (#645)
Adds bindings using thpp::Tensor to THNN and THCUNN. This allows calling
into those APIs without knowing the concrete types of the tensor
arguments.
2017-01-31 13:23:02 -05:00
6a0c66752f Fix documentation and argument name for Tensor.normal_(mean, stddev) (#652) 2017-01-31 11:55:39 -05:00
a1bd4efb08 readme: add guidance on disabling CUDA (#655) 2017-01-31 14:05:51 +05:30
b43ce05268 Refactor parts of utils.h (#648)
Moves THPObjectPtr into a separate header, so that it can be included
independently. Currently, utils.h requries all of THP.h. Also adds RAII
structs for acquiring and releasing the GIL.
2017-01-30 21:16:28 -05:00
80e56cfda9 Merge commit 'dc9a5b7d2fbcf21268b524b9da5ae38a74214a59' 2017-01-30 17:58:05 -08:00
24701fc5a7 Merge commit '03dcf8a83bb009ecfdd8f27c4d9a6db40829b690' 2017-01-30 17:57:20 -08:00
f78a266d99 Merge commit '368cbe615d0a7bdaadddcb3bd390abcd4cc17b91' 2017-01-30 17:56:37 -08:00
f096fb6859 adding cudnn V6 support (#515) 2017-01-31 02:01:37 +01:00
a3e11d606b Fix linter errors 2017-01-31 01:58:09 +01:00
79232c24e2 Fixes after rebase 2017-01-31 01:58:09 +01:00
15d9d499ab Remove ZMQ dependency from compilation files 2017-01-31 01:58:09 +01:00
962084c8e8 Add Data Channel receive from any source (#52) 2017-01-31 01:58:09 +01:00
7518b1eefb Introduce Scalar for easier send/receive types through DataChannel 2017-01-31 01:58:09 +01:00
8215d7a4ba Implement TH_API functions from the set 2 (#49) 2017-01-31 01:58:09 +01:00
5aaa220d84 Thd functions v3 (#46) 2017-01-31 01:58:09 +01:00
12c16ab9bc Remaining storage functions implemented 2017-01-31 01:58:09 +01:00
76520512e7 DataChannel tests rewrite (#42); DataChannel isend and irecv implementation (#44) 2017-01-31 01:58:09 +01:00
66de965882 Replace ZeroMQ (#41) 2017-01-31 01:58:09 +01:00
10d32fb0b7 Fix DataChannel tests failure (#43)
Tests failed due to accessing reference which could be invalid.
2017-01-31 01:58:09 +01:00
e72c9b6e4a Storage constructors implemented (#40) 2017-01-31 01:58:09 +01:00
ac1f68127a Add barrier, scatter, gather and allGather implementations + groups (#34) 2017-01-31 01:58:09 +01:00
60d1852c7b Major improvements to master-worker mode
* Fixed all undefined symbol errors
* Implemented storage interface and THStorage class
* RPC improvements
* Code refactor
2017-01-31 01:58:09 +01:00
d53eb521fc Add missing headers. 2017-01-31 01:58:09 +01:00
9808932f10 Refactor RPC and change TensorType to Type 2017-01-31 01:58:09 +01:00
ea876eb6d5 Add initial bindings for master-worker mode 2017-01-31 01:58:09 +01:00
0a45864866 Add THDStorage and improve master-worker mode implementation 2017-01-31 01:58:09 +01:00
2560b39796 Merge TensorTypeTraits.hpp with TensorTraits.hpp 2017-01-31 01:58:09 +01:00
21afa4c88b Worker handling for constructors + destructor 2017-01-31 01:58:09 +01:00
9fc3c5e4d2 THDTensor constructors implemented + some minor fixes 2017-01-31 01:58:09 +01:00
3e3501c98d Integration tests of the THD Python interface (#28) 2017-01-31 01:58:09 +01:00
5e6fcd02b5 Implement data channel groups (#25) 2017-01-31 01:58:09 +01:00
d46ebcfadf Fix broadcast and reduce implementations
Due to bad rank mapping broadcast and reduce were connecting
wrong processes what resulted in errors or not received/sent tensors.

 * Introduced new mapping method to solve this problem.
 * Added and improved tests for this cases.
2017-01-31 01:58:09 +01:00
41480c8cf2 Data channel maintenance 2017-01-31 01:58:09 +01:00
236890d902 Fix transitive library dependencies in CMake 2017-01-31 01:58:09 +01:00
55632d81d2 Add Python wrappers for process group mode 2017-01-31 01:58:09 +01:00
0b276d622e Add reduce and allReduce implementations (#15) 2017-01-31 01:58:09 +01:00
c81491b37d Preserve directory structure when installing headers 2017-01-31 01:58:09 +01:00
42e189425f Detect ZMQ libs and headers in CMake 2017-01-31 01:58:09 +01:00
3cfa0d7199 Expose C API for process group mode 2017-01-31 01:58:09 +01:00
7c9e088661 Reorganize THD directory structure 2017-01-31 01:58:09 +01:00
e78aa4bb84 Implement CommandChannel with ZMQ. 2017-01-31 01:58:09 +01:00
f8e94d0d8b Implement DataChannel (MPI and TCP) (#8) 2017-01-31 01:58:09 +01:00
ebe6f40fce RPC message packing and unpacking implemented 2017-01-31 01:58:09 +01:00
5fb37efb46 Use #pragma once instead of defines 2017-01-31 01:58:09 +01:00
4f47855873 Style improvements 2017-01-31 01:58:09 +01:00
52ae6f682f Add initial version of tensor wrappers 2017-01-31 01:58:09 +01:00
c35f58f97b Template for THD implementation 2017-01-31 01:58:09 +01:00
659b2f3154 Add more autograd functions 2017-01-31 00:39:34 +01:00
5ea05cfb96 Return indices from Variable sort and topk 2017-01-31 00:39:34 +01:00
dc9a5b7d2f Fix memory leak in SpatialMaxUnpooling 2017-01-30 23:23:07 +01:00
f7ab5a128a Delete extra bracket in RNNCellBase.__repr__. (#637)
This extra bracket causes a ValueError when trying to print a Module that uses RNNCellBase or any of its subclasses.
2017-01-29 23:21:24 -05:00
368cbe615d Add Ubuntu 16.04 lib paths in CMake 2017-01-30 01:16:02 +01:00
d4c9a3782b billinear -> bilinear, docs for upsampling, improved docs for Unpooling, pep8 tests fix (#617)
* billinear -> bilinear, docs for upsampling, improved docs for Unpooling, pep8 tests fix
2017-01-30 05:08:48 +05:30
172dca5e8b Fix bug in cat (non-contiguous first input) 2017-01-29 21:25:53 +01:00
818bf0c408 Compile with asserts by default 2017-01-29 21:21:59 +01:00
03dcf8a83b Compile with asserts on by default 2017-01-29 21:18:54 +01:00
604f607fd1 Add asserts in index* functions 2017-01-29 21:18:43 +01:00
956d946c25 Default initial hidden states for recurrent layers (#605)
Fixes #434
2017-01-29 12:38:56 +01:00
970caaa621 Exclude sphinx_rtd_theme from pep8 2017-01-28 23:37:39 -05:00
00a5980cdf Improve RNN doc formatting 2017-01-28 23:37:39 -05:00
e24eee04f0 Link THC to THPP 2017-01-28 23:37:39 -05:00
f1b3af4ee2 Add more bernoulli options in cwrap 2017-01-28 23:37:39 -05:00
fb2d28f477 remove circular references in NestedIOFunction 2017-01-28 23:30:06 +01:00
3a704ff725 Fix legacy load_lua for SpatialConvolution (#608)
* fix legacy load_lua for conv2d

* fix pep8
2017-01-28 20:19:18 +01:00
0180e638e5 Remove unnecessary zero_() calls in cuDNN RNN 2017-01-28 14:36:57 +01:00
95c6ae04fb Fix non-contiguous grad handling in cuDNN RNN 2017-01-28 14:36:57 +01:00
27c4c6e0af Merge commit '6ee77b4edd1552d3a9a2e5389ffc351e513a8089' 2017-01-27 17:29:07 -08:00
da17414b3f Merge commit '343d65db91c2419843d36aed5467c2d1374108bc' 2017-01-27 17:16:08 -08:00
be2b27a747 Merge commit '4461ae809043390d5223905cb82b17035c7f9f31' 2017-01-27 17:15:21 -08:00
aec2c8f752 Merge commit 'c45ff2efe64d0face3889194ba6f885fe9cc4d48' 2017-01-27 17:12:13 -08:00
13e34b4679 Fix multiprocessing tests 2017-01-28 01:18:42 +01:00
57373c7c29 Fix docs 2017-01-28 01:16:04 +01:00
79f5bf84e5 [pep8] Potentially breaking docstring changes 2017-01-28 01:15:51 +01:00
3ed720079e [pep8] Fix most remaining lint manually 2017-01-28 01:15:51 +01:00
e7c1e6a8e3 [pep8] Fix most lint automatically with autopep8
Here's the command I used to invoke autopep8 (in parallel!):

    git ls-files | grep '\.py$' | xargs -n1 -P`nproc` autopep8 -i

Several rules are ignored in setup.cfg. The goal is to let autopep8
handle everything which it can handle safely, and to disable any rules
which are tricky or controversial to address. We may want to come back
and re-enable some of these rules later, but I'm trying to make this
patch as safe as possible.

Also configures flake8 to match pep8's behavior.

Also configures TravisCI to check the whole project for lint.
2017-01-28 01:15:51 +01:00
f1d0d73ed7 Fix flaky Sqrt test 2017-01-28 00:45:49 +01:00
9c411513bf Patch distutils crash when linking with ccache 2017-01-28 00:28:33 +01:00
ce78bc898b Fix travis builds and add ccache 2017-01-28 00:28:33 +01:00
887002e932 Add bindings to CUDA tensors and storages in THPP (#615) 2017-01-27 18:15:56 -05:00
31dea5ff23 Small typo in README (#613) 2017-01-27 20:18:36 +01:00
ec4602a973 Fix bad code alignment (#612)
forward *is* a method of the Linear class
2017-01-27 20:16:49 +01:00
a38749d15f Fix cuda notes
Target GPU *is* consisten with source GPU
2017-01-27 19:30:49 +01:00
6ee77b4edd Added cunn support for TemporalRowConvolutionMM (#415)
* Added cunn TemporalRowConvolutionMM support
2017-01-27 13:30:25 -05:00
343d65db91 Rowconv repull (#1120)
* Added TemporalRowConvolutionMM layer, tests, and documentation
2017-01-27 13:29:05 -05:00
6328981fcf cuda implementation of Gated Linear Unit, fixed issues with genericization 2017-01-26 22:56:33 -08:00
a90913105c add make-contiguous in batchnorm backward (#602) 2017-01-26 16:17:39 -05:00
9368596059 legacy.nn Attributes: Add '_gradOutput' to SpatialConvolution. (#600) 2017-01-26 15:00:41 -05:00
80ed795ff1 Minor ffi utils fix 2017-01-26 11:55:49 +01:00
a2938e3d11 add cc 3.0 to nccl (#594) 2017-01-25 22:47:23 -05:00
2ad967dbe4 Fix pep8 in setup.py with "autopep8 -i setup.py" 2017-01-25 22:23:22 -05:00
7415c090ac Check setup.py for pep8 lint on TravisCI 2017-01-25 22:23:22 -05:00
a1fa995044 Fixes and improvements (#593)
* Fix error in ELU backward

* Add --seed flag for testst st

* Add test for BatchNorm eval

* Fix autograd.backward docs

* Support cc flags in cuDNN search

* Fix IndexSelect backward formula
2017-01-25 22:21:49 -05:00
3c2ecc6b15 add dockerfiles (#583)
* add dockerfiles
2017-01-25 17:30:29 -05:00
fa1516d319 Install THCUNN.h and generic/THCUNN.h
The THCApply.cuh is moved to the .cu files so that THCUNN.h can be
compiled by a standard C compiler.
2017-01-25 14:13:17 -08:00
5e26f49db4 Install THNN.h and generic/THNN.h 2017-01-25 14:09:09 -08:00
7694f65120 Revert "Using accreal instead of real in the API" 2017-01-25 16:26:42 -05:00
b5ebf68df1 Revert "Convert real to accreal in libTHCUNN" 2017-01-25 16:13:20 -05:00
aa46055274 Update CI links in README (#579) 2017-01-25 13:58:05 -05:00
2cad802b68 Revert "cuda implementation of Gated Linear Unit" 2017-01-25 13:15:22 -05:00
2d01f384f1 fallback to nn batchnorm on backward-evaluate (#589) 2017-01-25 12:38:57 -05:00
f8d4f980b3 Add upsampling modules and functions 2017-01-24 17:30:50 -05:00
4f5a6c366e Make Variables non-comparable 2017-01-24 17:30:50 -05:00
ecfcf39f30 Improve optimizer serialization
Also, add optimizer.load_state_dict
2017-01-24 17:30:50 -05:00
3975a2676e Fix invalid DECREF in torch.Size constructor 2017-01-24 17:30:50 -05:00
138ee75a3b Fix for target_link_libraries on CMake 2.8 (#581) 2017-01-24 17:26:24 -05:00
0048f228cb Add spatial test for LogSoftmax 2017-01-24 23:24:25 +01:00
2748b920ab make adam have the same lr as lua torch (#576) 2017-01-24 16:35:28 -05:00
a92a2312d4 Add missing fields to read_lua_file for BatchNorm and Linear layers. 2017-01-24 22:09:47 +01:00
945ce5cdb0 Fix math block of GRUCell in docs (#572)
Added a blank space between the beginning of the `.. math::` block, otherwise it is displayed as a code block.
2017-01-24 14:28:56 -05:00
b39de2cbbe Merge pull request #416 from pavanky/half-fixes
Convert real to accreal in libTHCUNN
2017-01-24 12:17:49 -05:00
49a555e0f5 Merge pull request #1109 from pavanky/api
Using accreal instead of real in the API
2017-01-24 12:17:17 -05:00
ce13900148 update From Source instructions 2017-01-24 10:48:25 -05:00
4c77ad6ee4 step_rate -> lr in adadelta (#569) 2017-01-24 10:05:59 -05:00
0bc4246425 adding NLLLoss2d to docs 2017-01-24 09:22:51 -05:00
c45ff2efe6 Merge pull request #915 from pavanky/convert
Macros to convert between real and accreal
2017-01-24 09:14:33 -05:00
99b520cc5d Merge pull request #421 from huihuifan/cudaGLU
cuda implementation of Gated Linear Unit
2017-01-24 09:13:34 -05:00
e05607aee1 Add fall back to implicit GEMM and friends. (#558)
If we can't allocate the workspace for the desired algorithm, we fall
back to a default algorithm which does not require a workspace.
2017-01-24 09:10:39 -05:00
a360ba1734 Add a hint about CUDNN_STATUS_NOT_SUPPORTED 2017-01-24 09:09:30 -05:00
c661b963b9 Add more contiguity checks to cuDNN 2017-01-24 09:09:30 -05:00
e374dc1696 add step rate to adadelta (#568)
Scales `delta` before it is applied to the parameters in order to control the learning rate of the optimizer (inspired from climin optim lib for theano).
Also changed the link to the Adadelta paper to point to the right location.
2017-01-24 08:48:19 -05:00
116e0c7f38 Merge commit '45596d52897fb187701943cb77456ff1e7249989' 2017-01-23 14:37:44 -08:00
45596d5289 Add contiguity checks to THCUNN 2017-01-23 14:17:51 -08:00
342e7b873d fixing THPP cmake for cmake < 3.1 (#559) 2017-01-23 14:47:06 -05:00
00410c4496 Fix broken THNN groups in conv functions 2017-01-22 18:32:51 -05:00
8b9276bbee Fix view bug in Conv1d 2017-01-22 18:32:51 -05:00
3238786ea1 Improve optimizer error messages 2017-01-22 18:32:51 -05:00
07ebbcbcb3 Add Parameter docs 2017-01-22 18:32:51 -05:00
ca555abcf9 fix comments 2017-01-22 18:02:40 -05:00
63893c3fa2 Fix auto-gpu semantics for indexing 2017-01-22 18:02:40 -05:00
f8ae34706e Port L-BFGS from Lua optim 2017-01-22 18:02:40 -05:00
f8e89fbe11 fix docs for torch.nn.functional.conv1d (#536) 2017-01-21 10:41:52 -05:00
30d208010c Fix segfault when a None gradient was given to a hook (#533) 2017-01-21 10:39:35 -05:00
Tom
017c7efb43 Fix typo in LSTMCell documentation 2017-01-21 15:35:48 +01:00
0c69fd559a Fix CUDA sharing across processes (#530) 2017-01-20 18:28:39 -05:00
c991258b93 fix formula for GRU cells 2017-01-20 17:28:57 -05:00
9f89692dcd adding documentation for some lapack functions (#528) 2017-01-20 16:56:37 -05:00
c28575a4eb Fix typo in documentation for autograd 2017-01-20 21:59:33 +01:00
c9db9c2317 Add C++ tensor library (from THD fork) (#526) 2017-01-20 15:23:34 -05:00
16a09304b4 fix documentation of LSTM cell (#525) 2017-01-20 12:01:50 -05:00
58a88d1ac0 Fix doc search and warnings 2017-01-20 11:36:41 +01:00
b740878697 Updated h0,c0 shape in documentation for RNN, LSTM, GRU (#519) 2017-01-20 10:12:44 +01:00
7179002bfb cuda implementation of Gated Linear Unit 2017-01-19 23:01:30 -08:00
43b5be1d78 added c implementation of GatedLinearUnit 2017-01-19 22:18:08 -08:00
173c81c2d2 import package at the beginning 2017-01-20 00:09:22 +01:00
ee4c77c59f Docs improvements (#512)
* Always compile .numpy() for all types

* Add torch.nn.functional docs and hidden headers

* Use sphinx to generate torchvision docs

* Remove unused import in ffi utils
2017-01-19 17:28:49 -05:00
30ec12fdd5 update readme for source installs to make magma dependency optional 2017-01-19 16:20:13 -05:00
269ec0566f fix typo 2017-01-19 14:26:50 -05:00
a0a95c95d4 Add Random Number Generator Docstrings (#506) 2017-01-19 11:10:01 -05:00
1335b7c1da Fix unpooling docs (#492) 2017-01-19 11:08:43 -05:00
6d14ef8083 Update batchnorm docstrings
Add missing full stops, and added blank line for increased clarity on rendered documentation.
2017-01-19 14:15:26 +01:00
26a492acf3 Update docstring for ConvTranspose functions
Transposed convolutions are often (but incorrectly) referred to as Deconvolutional operations. Made mention of this in the docstring to make it easier for people to search for this operation in the documentation.
2017-01-19 13:02:58 +01:00
f2741e8038 format fix (#490) 2017-01-18 21:41:10 -05:00
8d1a6975d2 Fix for non-contiguous from_numpy (#489) 2017-01-18 18:53:13 -05:00
c414bf0aaf Fix handling of unicode in torch._C._add_docstr (#487) 2017-01-18 17:22:30 -05:00
99f4864674 fixed RMSprop initialization (#485)
* fixed RMSprop initialization
2017-01-18 17:05:53 -05:00
784cbeff5b added a non-exhaustive list of contributors 2017-01-18 13:54:56 -05:00
9302f860ae Remove unused file TensorDocstrings.cpp (#481)
Tensor docstrings are created in _tensor_docs.py
2017-01-18 13:34:40 -05:00
ac8a5e7f0d Remove error message assertion (#480)
Depending on how PyTorch is compiled, the source code for DataLoader
might not be fully available which can cause a spurious error in
test_dataloader.py
2017-01-18 13:16:38 -05:00
798fc16bbf add beta tag 2017-01-18 12:21:46 -05:00
0f65c9267d Fix typo 2017-01-18 08:46:04 -08:00
be45231ccb Improve ffi utils (#479)
* Improve ffi utils
2017-01-18 11:17:01 -05:00
279aea683b update conda install command 2017-01-18 10:52:49 -05:00
8aa8f791fc add more torch.* and Tensor docs (#476) 2017-01-18 08:39:33 -05:00
6464e69e21 Docs for torch.Storage (#475) 2017-01-18 03:22:30 -05:00
a93812e4e5 Fix PowConstant (#471) 2017-01-18 01:53:30 -05:00
225f942044 Disable IndexCopy test until #473 is fixed (#474) 2017-01-18 01:18:18 -05:00
d951d5b1cd Fix tensor.cuda(0) when on non-zero device. (#472) 2017-01-18 01:08:37 -05:00
2082ccbf59 More Tensor docs (#470) 2017-01-18 00:42:41 -05:00
473e795277 Fix invalidArguments for functions with tuple outputs, but no other (#468)
arguments.

For example:

   >>> torch.randn(5, 5).geqrf('invalid arg')
   TypeError: geqrf received an invalid combination of arguments - got (str), but expected ()
2017-01-17 23:14:40 -05:00
a09f653f52 Begin to document TensorBase methods (#466) 2017-01-17 21:44:12 -05:00
90fe6dd528 remove spurious pprint 2017-01-17 21:43:38 -05:00
57a2ccf777 PYTORCH_BUILD_VERSION to setup.py 2017-01-17 17:51:16 -08:00
b5f6fdb814 Using accreal instead of real in the API
This is done to be consistent with the changes made to cunn
2017-01-17 16:58:19 -08:00
205b9bc05f fix build_all.sh 2017-01-17 16:55:46 -08:00
14d5d52789 Add placeholder tensor documentation for methods that exist in torch. (#463) 2017-01-17 19:37:47 -05:00
9c218b419f kl_div and docs (#429) 2017-01-17 19:24:01 -05:00
a69d819901 Converting all instances of real to accreal in libTHCUNN
This is because the current version of luaffifb fails to pass
custom structs (i.e. half) as arguments or accept them as return
values.

The accreal parameters are immediately converted to real internally.
This is done to ensure none of the internal code needs to be changed.

This change also removes transform_reals_to_half which is no longer
necessary.

Change-Id: I978151d001de5492576fb0eddfa0608cd4e99149
2017-01-17 16:06:42 -08:00
517fb2f410 Remove free() and retain() from Tensor (#464) 2017-01-17 18:15:11 -05:00
fef2b1526d Adding macros to convert between real and accreal 2017-01-17 15:14:45 -08:00
3719994c96 Remove redundant code in THGenerateAllTypes.h 2017-01-17 15:12:43 -08:00
35c2821d71 Add documentation for methods defined in TensorBase (#462) 2017-01-17 17:40:54 -05:00
e4812b3903 add binary version to setup.py 2017-01-17 14:14:01 -08:00
4cc11066b2 Add torch.utils.data docs and improve notes (#460)
* Add torch.utils.data docs and improve notes
2017-01-17 14:51:05 -05:00
85b64d77b7 Merge pull request #461 from colesbury/visiondocs
Add torchvision reference to docs
2017-01-17 14:50:00 -05:00
db7948d7d5 Add torchvision reference to docs
Some documentation is just copied from the GitHub readme for now.
2017-01-17 11:40:33 -08:00
3d40c0562d improve build_all.sh 2017-01-17 09:49:48 -08:00
146bcc0e70 adding binary build copy option to build_all 2017-01-17 07:52:18 -08:00
8d9f6c2583 Minor fixes to docs 2017-01-17 10:19:14 -05:00
ac32d8b706 fix docs 2017-01-16 21:08:14 -05:00
15c1dad340 Minor fixes and torch.cuda docs 2017-01-16 20:38:14 -05:00
6d8baf7c30 Fix Sphinx warnings 2017-01-16 20:38:14 -05:00
7ced682ff5 Add notes 2017-01-16 20:38:14 -05:00
89cab4f5e6 fix readme language and links 2017-01-16 20:35:08 -05:00
a0afb79898 add pic to readme 2017-01-16 20:15:19 -05:00
d6fa3b3fd5 Deprecate nn.Container in favor of nn.Module 2017-01-16 19:07:37 -05:00
f91bb96071 Remove cmin, cmax and cinv 2017-01-16 19:07:37 -05:00
3b6644d195 Minor README fix 2017-01-17 00:15:06 +01:00
652b468ec2 Readme improvements 2017-01-16 18:05:26 -05:00
af110d37f2 remove old docs 2017-01-16 15:06:08 -05:00
38967568ca Make load_state_dict() more restrictive (#451)
The load_state_dict() function now raises an error if the argument
state_dict has extra keys or is missing keys.

Previously, load_state_dict() ignored extra and missing keys, which made
it hard to notice when you load an invalid state_dict. This could
happen, for example, if you save the state_dict for a DataParallel, but
load it into a single model.

The state_dict() function now only includes the Tensor data from the
paramters, which reduces checkpoint size by not saving gradients.
2017-01-16 13:06:00 -05:00
df79631a72 Fix a mistake in autograd docs 2017-01-16 12:59:47 -05:00
95f0fa8a92 Change .grad attribute of Variables to be a Variable 2017-01-16 12:59:47 -05:00
1c6ff53b60 Make storages unresizable once exported to numpy 2017-01-16 12:59:47 -05:00
1dbf44c00d Add SmoothL1Loss to functional 2017-01-16 12:59:47 -05:00
1259a0648b Make nn containers copyable 2017-01-16 12:59:47 -05:00
b0055f6229 Improve argument checks for long arg options 2017-01-16 12:59:47 -05:00
90040afc44 Fix cwrap option filtering 2017-01-16 12:59:47 -05:00
59bc96bdc2 Check dropout probability 2017-01-16 12:59:47 -05:00
676ffee542 Check params type in optimizers 2017-01-16 12:59:47 -05:00
77136e4c13 Add anything in torch.legacy docs 2017-01-16 12:59:47 -05:00
604e13775f Add optim docs 2017-01-16 12:59:47 -05:00
02380a74e3 Add warnings to multiprocessing docs 2017-01-16 12:59:47 -05:00
4461ae8090 include cstddef for msvc 2017-01-15 23:45:48 +08:00
2b948c42cd Add SpatialAdaptiveAveragePooling. 2017-01-14 19:44:07 -06:00
133c1e927f fix readme, bump version 2017-01-14 17:47:35 -05:00
b2ae054410 Add SpatialAdaptiveAveragePooling. 2017-01-14 15:27:52 -06:00
2290798a83 if nccl is available, do not compile it and load system version 2017-01-14 10:09:48 +01:00
fd600b11a6 Merge commit '2b88d85505d7317f980e69201e72694d6d5905a4' 2017-01-13 15:58:54 -08:00
b5c9f5c4c3 Merge commit 'ca74bb17b8823d74b83433e2743f23e572501c72' 2017-01-13 15:55:19 -08:00
b8a5b1ed8e Merge commit 'e67b525388a5ae11ed243e94bbc25b4934b03a66' 2017-01-13 15:54:49 -08:00
ca74bb17b8 Merge pull request #675 from pavanky/more-atomic-fix
Ensure atomicAdd(double) is visible to host side code
2017-01-13 17:21:39 -05:00
69d8331195 Use functools.partial 2017-01-13 23:10:45 +01:00
eab5c1975c Avoid strict aliasing warning in float/half conversions. 2017-01-13 14:08:25 -08:00
e67b525388 Merge pull request #911 from gchanan/convWarning
Avoid strict aliasing warning in float/half conversions.
2017-01-13 17:06:17 -05:00
5171e56b82 Ensure atomicAdd(double) is visible to host side code
Just replicating behavior of the cuda headers
2017-01-13 14:05:36 -08:00
f467848448 Avoid strict aliasing warning in float/half conversions.
Verified that at least for GCC 4.47 this generates identical code.
2017-01-13 13:58:03 -08:00
7e4ddcfe8a Remove names from register_hook calls (#446)
The register hook calls now return an object that can be used to remove
the hook. For example,

   >>> h = module.register_forward_hook(callback)
   >>> h.remove()  # removes hook

Or as a context manager:

   >>> with module.register_forward_hook(callback):
   ...     pass

This makes it easier for libraries to use hooks without worrying about
name collisions.
2017-01-13 15:57:03 -05:00
3152be5fb3 Add repr to RNNs and Embedding (#428) 2017-01-13 15:53:52 -05:00
b076944dc5 Fix for atomicAdd(double) for CUDA_VERSION < 8000 2017-01-13 12:43:15 -08:00
3a07228509 Add ConvTranspose1d module (#449) 2017-01-13 15:22:57 -05:00
24a2f2e3a0 Add MaxUnpool1d module (#447) 2017-01-13 14:36:25 -05:00
b32dd4a876 add cudnn deb package installation paths to cudnn discovery, add 5.1.10 to load options (#448) 2017-01-13 14:32:23 -05:00
4f4bd81228 Fixes to autograd: (#442)
- Non differentiable outputs could prevent a gradient computation (see
   test_dep_nograd)
 - Crash in backward on variable which doesn't requires_grad (issue
   #438)
 - Stochastic functions could be backproped through multiple times
2017-01-13 13:51:47 -05:00
59b23d79c6 fix cudnn rnn batch_first with tests (#445)
* fix cudnn rnn batch_first with tests
2017-01-13 13:40:27 -05:00
8c14630e35 Fix Tensor.apply_() (#444)
Fixes #411
2017-01-12 21:51:18 -08:00
cc32de8ef9 Fix typos etc. in docs
- replace "long" with the Python type "int"
 - remove "reshape" from torch.rst since torch.reshape is not
   implemented
2017-01-12 21:25:50 -08:00
44696c1375 Fix MaxPool2d on 3D CUDA inputs (#443)
Currently, MaxPool2d returns 4d indices for 3d CUDA inputs, but
correctly returns 3d indices for 3d CPU inputs.
2017-01-12 21:04:25 -08:00
82088a8110 parallelizing catArray to multiple tensors per kernel (#635) 2017-01-12 12:57:30 -08:00
d5e45b2278 Add AvgPool1d which just uses AvgPool2d implementation (#439) 2017-01-12 15:07:11 -05:00
bdfef2975c adding more docs for torch.* functions 2017-01-11 08:19:49 -08:00
b4bb4b64a1 simd.h: really fix the arm64 (i.e. Aarch64) build 2017-01-11 10:07:32 +00:00
3e91c5e1ad Merge pull request #668 from gchanan/thrustalloc
Add THCThrustAllocator.cuh to install files
2017-01-10 19:27:09 -05:00
2b88d85505 Re-route thrust memory allocation to THCudaMalloc / THCudaFree in cunn. 2017-01-10 10:42:29 -08:00
50651970b8 Merge pull request #666 from gchanan/thrustalloc
Re-route thrust memory allocation to THCudaMalloc / THCudaFree
2017-01-10 12:02:51 -05:00
4a8906dd8a Add THCThrustAllocator.cuh to install files to downstream projects can use it. 2017-01-10 09:02:28 -08:00
68e2769a13 Re-route thrust memory allocation to THCudaMalloc / THCudaFree
so it can use the caching allocator.
2017-01-10 08:35:41 -08:00
17c998e99a fixing arm64 build 2017-01-10 00:15:11 -05:00
35758f51f2 Get rid of a few unused imports. 2017-01-09 15:41:58 -08:00
e8102b0a9b fix compiler warning in THCS 2017-01-09 15:19:13 -08:00
04f2bc9aa7 Fix bug in squeeze backward (#425) 2017-01-09 16:29:37 -05:00
d070178dd3 Instantiate 128kb of scratch space in GPU memory per-device by default 2017-01-09 13:21:18 -08:00
c9ec7fad52 Add model_zoo utility torch torch.utils (#424)
This was originally part of a torchvision PR, but I think it will be
useful outside vision, such as for distributing word embeddings.
2017-01-09 13:16:58 -05:00
f0a6ca4d53 BatchNorm fixes (#423)
- don't use cuDNN for half inputs because weight, bias, running_mean,
   etc. are required to be of different type than for THCUNN
 - accept 3D inputs (N,C,L) in BatchNorm1d
 - remove accidental 'use_cudnn=False'
2017-01-09 13:16:51 -05:00
fd92470e23 Add cuDNN bindings for BatchNorm (#421) 2017-01-07 15:35:24 -05:00
8369664445 Minor doc fixes 2017-01-06 21:51:35 +01:00
35e1adfe82 documentation parity with torch7 for catArray impl 2017-01-06 11:55:57 -08:00
eb91fc5e5d Minor fixes to docs (#412) 2017-01-06 10:59:24 -05:00
d186fdb34c Fix THHalf issues with MSVC. 2017-01-05 08:09:09 -08:00
0f04f71b7e fix API reference link 2017-01-05 02:46:19 -05:00
87f1959be7 adding proper categories to torch.rst 2017-01-04 23:20:57 -05:00
a538055e81 fix invalid use of THPUtils_invalidArguments in sparse tensors 2017-01-04 21:47:48 -05:00
0e345aaf6d Fix invalidArguments to take kwargs and out into account (#397) 2017-01-04 19:49:11 -05:00
c976dd339d remove .zero() on grad_input conv and batch_norm 2017-01-05 01:48:50 +01:00
71cef62436 Fix condition for threadArgErrorHandler
Some error handlers may not have any data associated with them
2017-01-04 16:43:31 -08:00
3a29055044 Fix rnn sphynx docs (#405) 2017-01-04 19:17:10 -05:00
59d66e6963 Sparse Library (#333) 2017-01-05 00:43:41 +01:00
46bc43a80f fixing loss layer docs 2017-01-04 18:40:51 -05:00
7fa60b2e44 fixing docs of activations, pixelshuffle, sparse for rst 2017-01-04 18:40:51 -05:00
c78893f912 removing Image: references in nn activation docs 2017-01-04 13:51:56 -05:00
0d2a4e1a9e fix dropout docs for rst 2017-01-04 13:49:43 -05:00
088f14c697 fix batchnorm and linear docs for rst 2017-01-04 13:35:55 -05:00
4bf7be7bd5 fix RNN module docs for rst 2017-01-04 13:22:02 -05:00
b2ab6891c5 fix the rest of Pool module docs for rst 2017-01-04 12:51:55 -05:00
39ab5bcba8 fix MaxPool1d,2d,3d docs for rst 2017-01-04 03:11:48 -05:00
42f131c09f fixing nn.Conv* documentation for rst and adding nn docs to sphinx 2017-01-04 02:11:27 -05:00
89dca6ffdc Add a patch to stop Sphinx from cross-referencing ivar tags 2017-01-03 18:31:08 -05:00
b7f36f93d5 Expand autograd docs and add sections 2017-01-03 18:31:08 -05:00
58320d5082 Add multiprocessing docs 2017-01-03 18:31:08 -05:00
a461804a65 adding docs for more torch.* functions 2017-01-03 18:29:50 -05:00
817f6cc59d adding linspace, logspace, neg and range 2017-01-03 18:29:50 -05:00
108936169c implement more torch.* docs, remove zero, cauchy, log_normal from torch.* docs as they are not stateless 2017-01-03 18:29:50 -05:00
f60ae085e6 Float -> float, Long -> long 2017-01-03 18:29:50 -05:00
85dda09f95 fixed names and other cosmetics 2017-01-03 18:29:50 -05:00
4f479a98d4 fix indentation issue for all examples, add doc for add 2017-01-03 18:29:50 -05:00
35ba948dde add doc for *mm* functions, *mv* functions and addcmul, addcdiv 2017-01-03 18:29:50 -05:00
6b4ed52f10 adding docs for some torch.* functions, removing all, any stateless methods 2017-01-03 18:29:50 -05:00
dcf5f8671c Add __pow__ to Tensor and list additional undocumented functions (#398) 2017-01-03 13:38:44 -05:00
5340291add Update FindARM.cmake
Fix typos
2017-01-03 12:29:06 -05:00
1c6fe58574 Add gather and scatter to autograd 2017-01-02 13:42:59 -05:00
9f2111af73 Rename Variable.no_grad to Variable.detach 2017-01-02 13:42:59 -05:00
2ed6c6d479 Fix leaf Variable handling in autograd 2017-01-02 13:42:59 -05:00
01ac2d3791 Merge commit '1b97f088cb9e42717122795463a800bf3f503adf' 2017-01-02 09:39:45 -08:00
eac687df5a Merge commit '849cbf3a4774727eadb97c27af13bfbdc976a02a' 2017-01-02 09:39:20 -08:00
6a2785aef7 remove link_prefix from linker arguments (#395) 2017-01-02 12:37:52 -05:00
849cbf3a47 small cmake fix 2017-01-01 19:02:33 -05:00
a0c614ece3 unsqueeze instead of view in dataloader 2017-01-01 23:38:54 +01:00
1b97f088cb Merge pull request #651 from pavanky/cat
Adding support for empty tensors in cat, catArray
2017-01-01 12:47:19 -05:00
097399cdeb Merge branch 'master' into contiguous-cat-1d 2017-01-01 12:34:46 -05:00
7ee152881e Merge commit '3074f8eb8103ecdcbbcbb8d49332d9e7d6f3141c' 2017-01-01 01:13:17 -08:00
3074f8eb81 Removing TH_GENERIC_USE_HALF, TH_NATIVE_HALF, TH_GENERIC_NO_MATH (replaced where appropriate with TH_REAL_IS_HALF), removed half from THGenerateAllTypes, added an explicit THGenerateHalfType.h 2017-01-01 00:57:51 -08:00
748208775f Merge commit '5df17050bf82337d13dbd2108bd17922ac38956c' 2017-01-01 00:08:55 -08:00
5df17050bf Revert "TH_GENERIC_USE_HALF=1 by default, half enabled by default" 2017-01-01 01:06:18 -05:00
92df0eb2bf removing unneeded flags in build_all.sh 2016-12-31 20:16:50 -08:00
995195935b Merge commit 'be8376eb883d2f5a466994e024cde44e6adc6130' 2016-12-31 20:10:11 -08:00
be8376eb88 TH_GENERIC_USE_HALF=1 by default, half enabled by default 2016-12-31 20:07:18 -08:00
b650a45b9c fix botched merge in setup.py 2016-12-31 16:55:53 -05:00
8a20e22239 Add torch.stack 2016-12-31 16:25:39 -05:00
7c5014d803 Add torch.split, torch.chunk and change default dim of cat to 0 2016-12-31 16:25:39 -05:00
62ac1b4bdd Implement missing cases of __matmul__ 2016-12-31 16:25:39 -05:00
0633c08ec9 Add is_shared() method for storages and tensors 2016-12-31 16:25:39 -05:00
cf87cc9214 Check valid configurations of Variable flags 2016-12-31 16:25:39 -05:00
f908432eb3 Ensure that Variable's grad is shared between processes 2016-12-31 16:25:39 -05:00
1bd291c57c Fix multiprocessing tests on macOS 2016-12-31 16:25:39 -05:00
b277df6705 Doc css fixes for mobile and large screens (#389) 2016-12-31 12:01:01 -05:00
ec4d597c59 test fix 2016-12-31 11:08:34 -05:00
d2ef49384e Add custom docs stylesheet (#387) 2016-12-31 10:32:00 -05:00
b5dc36f278 explicitly linking against v1 libs to avoid lua-torch conflicts (#386) 2016-12-31 10:30:36 -05:00
41976e2b60 Merge commit '3dac1b9936a62225cf8516d6d7830fe6c83039ae' 2016-12-30 21:07:13 -08:00
3dac1b9936 cmake C flags fix 2016-12-31 00:06:26 -05:00
d2bb56647f Merge commit '224422eed6813c15b3c3b2c0dcd5e0187ec660a1' 2016-12-30 19:51:01 -08:00
224422eed6 cmake fix 2016-12-30 22:50:06 -05:00
3c26f7a205 Merge commit '10f78985e72fb6834b435ac3f8d0890fa6614365' 2016-12-30 19:24:00 -08:00
9ac9809f27 Merge commit 'd8f4d5f91e3680478a6843d49d7295c1165618f0' 2016-12-30 19:23:41 -08:00
7bf6e984ef Merge commit 'dc95f66a954ad18b80f3f649f8e2c8507c048b74' 2016-12-30 19:23:17 -08:00
10f78985e7 adding TH_LIBRARIES and THC_LIBRARIES var to THCUNN cmake 2016-12-30 22:20:29 -05:00
dc95f66a95 adding TH_LIBRARIES var to THC cmake 2016-12-30 22:10:18 -05:00
d8f4d5f91e adding TH_LIBRARIES var to THNN cmake 2016-12-30 22:08:09 -05:00
47f56f0230 Merge commit '43fbdd3b45d4351623a4aa9c8d5e6dba9eac259a' 2016-12-30 17:46:04 -08:00
b4018c4c30 Merge commit '803d0320771365754658ac74587cc082c2a61fa7' 2016-12-30 17:45:45 -08:00
43fbdd3b45 workaround for luarocks 12.04 bug 2016-12-30 20:44:35 -05:00
803d032077 workaround for luarocks 12.04 bug 2016-12-30 20:44:21 -05:00
9d2d884313 Merge commit 'b5cf1d2fc71604f472a07d0181a05a7f09e276c2' 2016-12-30 16:50:25 -08:00
c0600e655a Merge commit 'c1ca9044bd6dccd293471c6caeeeea4ebd97d61b' 2016-12-30 16:49:56 -08:00
671ed89f2a Merge commit '52c2a92013c45afa5df61a68b16695663ee9fab5' 2016-12-30 16:49:29 -08:00
e0372643e1 Merge commit '541ab961d8f9a02bbbe1a06ba25027116ee93c20' 2016-12-30 16:49:05 -08:00
b5cf1d2fc7 adding THCUNN_SO_VERSION 2016-12-30 19:06:23 -05:00
c1ca9044bd add THNN_SO_VERSION 2016-12-30 19:04:31 -05:00
52c2a92013 adding THC_SO_VERSION property 2016-12-30 19:02:50 -05:00
541ab961d8 adding TH_SO_VERSION option 2016-12-30 18:56:59 -05:00
849794cd2c Remove deprecated and unimplemented functions (#383) 2016-12-30 18:37:44 -05:00
f47fa2cb04 use __get_cpuid when available 2016-12-30 18:10:57 -05:00
7a162dd97a Fix outputs of torch.* comparison functions 2016-12-30 23:02:57 +01:00
b123bace1b Rename torch.autograd.functions to torch.autograd._functions 2016-12-30 23:02:57 +01:00
483490cc25 Move PixelShuffle implementation to functional 2016-12-30 23:02:57 +01:00
8d60e39fdc Rename torch.nn.functions to torch.nn._functions 2016-12-30 23:02:57 +01:00
e7dff91cf3 Fix for multinomial autograd function 2016-12-30 23:02:57 +01:00
ab5776449c Add documentation for some torch.xxx functions (#382) 2016-12-30 17:01:47 -05:00
a229582238 Merge pull request #875 from atkayu/add_histc2
Add a new function bhistc to calculate histogram of batch of images only once
2016-12-30 13:41:42 -05:00
a0df8fde62 Merge pull request #592 from joker512/master
fix: cunn can't find cutorch sources
2016-12-30 11:31:57 -05:00
e4a3aa9295 Change container doc to assign child modules via attributes 2016-12-30 15:51:09 +01:00
be98c5d12d Start documenting torch.Tensor (#377) 2016-12-30 01:21:34 -05:00
bc6a71b1f5 Add Function docs 2016-12-30 00:15:06 -05:00
26f1e2ca9c Add basic autograd docs 2016-12-30 00:15:06 -05:00
75d850cfd2 Fix optim docs 2016-12-30 00:15:06 -05:00
f4870ca5c6 Fix nn docs 2016-12-30 00:15:06 -05:00
235d5400e1 Merge pixelshuffle function into module (#375) 2016-12-29 21:38:37 -05:00
491d5ba4fd add new flags to build_all.sh 2016-12-29 18:16:59 -08:00
d42eadfeb9 Merge commit '2975f539ff8ac9b8e07fb2b610bd69a1596d4c3c' 2016-12-29 17:51:34 -08:00
9a40821069 Merge commit '1ac038ab243bb2718b37cbd81eadbfeb2a234252' 2016-12-29 17:51:13 -08:00
2975f539ff sort cuda 8.0+ fix 2016-12-29 17:47:30 -08:00
64ca584199 Fix group support in convolution modules (#374) 2016-12-29 20:01:39 -05:00
5263469e21 Fix handling of zero sizes in caching host allocator 2016-12-29 15:36:49 -08:00
c367e0b64e Support dilated 1d and 3d convolutions (#372)
Fixes #367
2016-12-29 18:20:32 -05:00
183b3aacd2 Hold CuDNN PRNG state between RNN iterations 2016-12-30 00:14:55 +01:00
101950ce92 fix repr in legacy.nn.linear 2016-12-29 17:30:46 -05:00
239ae94389 fix in conv repr 2016-12-29 17:30:46 -05:00
55e850d825 test if modules can be printed with fixes 2016-12-29 17:30:46 -05:00
62af45d99f Basic functional interface (#354) 2016-12-29 22:53:57 +01:00
1ac038ab24 Merge pull request #882 from amrobbins/ppcvectorinstxns
Add support for VSX vector instructions on PPC
2016-12-29 14:24:56 -05:00
77a925ab66 Add THHalfTensor support to cutorch (#655)
* Add THHalfTensor support to cutorch.
2016-12-29 14:23:45 -05:00
d0d33d3ae7 Add support for torch.HalfTensor (#874)
* Add support for torch.HalfTensor.

* Improvements/Simplifications for torch.HalfTensor.

Improvements/Simplifications:
1) Defines half type as TH_Half, so as to not conflict with cutorch
version.  Previously, these were defined as the same "half" type and
required proper ordering of includes to ensure type was only defined
once, which would have affected all downstream projects.
2) No longer generates math functions that are not actually defined
on torch.HalfTensor, e.g. maskedFill, map, etc.
3) Adds tests for all available torch.HalfTensor functions
4) Allows compiling without TH_GENERIC_USE_HALF (so if there's a
problem can just unset that in CMakeLists rather than backing out)
5) Some simplifications: removes a new copy optimization and
some TH_HALF literal definitions

Limitations:
Because match functions are not defined, some "non-math" operators
on torch.HalfTensor give an error message, e.g. __index__/__newindex__
with a ByteTensor apply a mask, but masks aren't implemented.  These
limitations aren't always obvious, (e.g. for documentation purposes),
but they should always give an error message.

* Rename TH_HALF to THHalf.
2016-12-29 14:23:26 -05:00
9b7eceddc8 Accept outputs in out argument 2016-12-29 12:25:59 +01:00
24af02154c Use ForkingPickler for sharing tensor/storages across processes (#344)
This hooks into the (internal) ForkingPickler class in multiprocessing
to reduce tensors, storages, and CUDA events instead of our queue from
joblib. This makes it easier to use the standard multiprocessing classes
in later versions of Python.

This also exposes:

 - Tensor/Storage.share_memory_()
 - Module.share_memory()

These methods move the CPU tensors and storages to shared memory. If
you're using the "fork" method of multiprocessing, these objects can be
directly inherited instead of serialized through a queue.
2016-12-28 20:34:23 -05:00
86ec14e594 Add support for VSX vector instructions on PPC
Added support for the fill, diff, scale, mul and add functions using
PPC CPU vector instructions. These are used in place of the versions
of these functions written for x86, when compiled on PPC.

This fixes a compile failure on PPC
2016-12-28 16:58:09 -06:00
8a29338837 Use cuDNN for Conv3d and ConvTranspose3d (#359)
I've also updated test_nn.py to run marked tests twice: once with cuDNN
enabled and once with it disabled.
2016-12-28 16:14:47 -05:00
29918c6ca5 Copy libnccl.so.1 instead of libnccl.so
Occasionally, my PyTorch checkout gets into a bad state where libnccl.so
does not exist, but the NCCL makefile doesn't build it because
libnccl.so.1 exists. Switch to copying libnccl.so.1 to work around this.
2016-12-28 20:21:31 +01:00
80a44e84dc Change multinomial return type for CUDA 2016-12-28 18:15:17 +01:00
5497b1babb Use TypeError in invalidArguments 2016-12-28 18:15:17 +01:00
bef70aa377 Make type checking more strict and fix topk arguments 2016-12-28 18:15:17 +01:00
0d30f77889 Make variables picklable with protocols <2 2016-12-28 18:15:17 +01:00
e27bb3e993 Minor fixes 2016-12-28 18:15:17 +01:00
179d5efc81 Merge commit '310ec57fd7176e07137ab7bc717f3602b6f53aa5' 2016-12-28 07:33:37 -08:00
b55e38801d rename histc2 to bhistc 2016-12-28 16:26:09 +08:00
e704ec5c6f Merge commit '46f024846698cd8201d6c1804f21bffda15a2069' 2016-12-27 19:12:45 -08:00
6cda6bb34c Merge commit 'd2a93c310292c9427056e02ac7e0d5cca12a04a2' 2016-12-27 19:12:21 -08:00
46f0248466 Use bool for sizeAverage in SoftMarginCriterion 2016-12-28 00:36:11 +01:00
310ec57fd7 Fix typos in THCTensorRandom 2016-12-28 00:16:53 +01:00
cd82b2b869 Implement comparison and logical operators for tensors 2016-12-28 00:04:08 +01:00
126a1cc398 Add Sphinx docs 2016-12-28 00:03:39 +01:00
bf650f05b3 Merge pull request #652 from apaszke/multinomial
Make multinomial return a LongTensor (compatible with CPU version)
2016-12-27 17:54:54 -05:00
f2606a7502 Make multinomial return a LongTensor (compatible with CPU version) 2016-12-27 23:12:12 +01:00
b07fe52ee0 Adding support for empty tensors in cat, catArray 2016-12-27 13:37:42 -08:00
b07358b329 renaming test to avoid dot in test name 2016-12-27 13:34:09 -08:00
2aea8077f9 renaming test to avoid dot in test name 2016-12-27 13:17:04 -08:00
41f9c14297 Merge commit '135687f04a4e4e0722c14f096c9a1fc647c95f07' 2016-12-27 13:12:26 -08:00
135687f04a critical bugfix in storage copy 2016-12-27 13:11:32 -08:00
b140e70b58 Add autograd.backward (#341) 2016-12-26 19:10:35 -05:00
ec987b57f6 removing 3.3, 3.4 from README badges 2016-12-26 14:52:36 -05:00
596677232c Add a different code path for catting contiguous tensors along the first dimension, for speed reasons.
Fix a bug in cat when catting with an empty tensor along first dim (it added an extra dim).
Fix the ambiguous 'catting along last dimension' sentence in the doc and change the behavior to pick the maximum last dimension over all input tensors.
Now empty tensors are allowed.
2016-12-26 10:23:42 -05:00
9d74e139e5 removing 3.3 and 3.4 from travis build 2016-12-25 15:13:13 -05:00
d2a93c3102 remove unused buffer in avg pooling 2016-12-25 20:00:10 +01:00
bc475cad67 Move max pooling construction logic to functions (#343) 2016-12-25 10:28:11 -05:00
45d6212fd2 default args for conv functions 2016-12-25 01:55:00 -05:00
f45d75ed22 make the CUDA-aware tests backoff if CUDA no available 2016-12-24 15:36:00 -05:00
b03407289f Merge commit '55a794e6ec8d01fc8cceee14ce23ec501e517570' 2016-12-24 11:06:27 -08:00
55a794e6ec fixing OpenMP longjmp bugs in *MaxUnpooling 2016-12-24 13:54:43 -05:00
93ed476e7d adding LAPACK double bindings, adding fmod and remainder 2016-12-22 17:36:47 -08:00
10faa303bc Merge commit '6fa371cb0db9f43e3d05746c7e90516975052589' 2016-12-22 17:35:13 -08:00
6fa371cb0d bugfix for qr skinny matrices 2016-12-22 16:29:53 -08:00
18a2691b4b Fix memory leak in THStorage_copyCudaXXX 2016-12-22 13:49:31 -08:00
f7bd3f7932 added pixel shuffle layer + tests
removed duplicate save_for_backward
2016-12-22 21:43:38 +01:00
f8dee4620a add a new function histc2 2016-12-22 10:11:58 +08:00
800e24616a Merge commit 'fa61159dd0bfd9bbb190e1dfbd90a68f4d3c30c8' 2016-12-21 12:40:41 -08:00
d63a435787 Merge commit 'f16a624b35dd28fbd4cdcd3bd08dfc2421c3e2b0' 2016-12-21 12:40:20 -08:00
a9c2809ce3 change the order of cudnn libs 2016-12-21 05:44:16 -08:00
fa61159dd0 cremainder, cfmod implementations (take 2) (#646) 2016-12-20 20:43:07 -05:00
a215e000e9 fix for out of place tests and for non standard I/O pipes 2016-12-20 16:13:24 -08:00
f16a624b35 correctness fixes for mod and remainder for integer type tensors. 2016-12-20 11:41:16 -08:00
61c2896cb8 Merge pull request #638 from pavanky/multinomial_fix
Bugfix for multinomial distribution
2016-12-20 14:08:59 -05:00
22ebc3f205 Revert "Add support for cremainder, cfmod" 2016-12-20 09:35:41 -05:00
8fa9f443ec Merge pull request #641 from killeent/cfuncs
Add support for cremainder, cfmod
2016-12-19 20:49:29 -05:00
bb72ccf1a5 Support CUDA IPC in Python 3 (#203)
CUDA IPC only works with Python 3 using the "spawn" start method. You
can select the start method using the get_context method:

 import torch.multiprocessing as mp
 ctx = mp.get_context('spawn')
 queue = ctx.Queue()
 event = ctx.Event()
2016-12-19 20:42:53 -05:00
2e73456f5c Fix compiler warnings in Tensor.cpp 2016-12-19 20:35:08 -05:00
3e49a2b4b7 Prevent deepcopy from changing Parameters into Variables 2016-12-19 20:35:08 -05:00
4694e4050b Fix printing bug when all values are NaN or inf 2016-12-19 20:35:08 -05:00
59b9eeff49 Expose gather and equals for CUDA tensors 2016-12-19 20:35:08 -05:00
1744fad8c2 Use 'void' for no-arg function 2016-12-19 12:23:17 -08:00
e46d942ca6 Fix double initialization of HalfStorage (#331) 2016-12-19 15:19:41 -05:00
93a6136863 Add support for cremainder, cfmod 2016-12-19 11:25:10 -08:00
230bde94e7 fix about section 2016-12-19 11:00:53 -05:00
20fffc8bb7 Fix torch.is_tensor for half tensors (#322)
Fixes #311
2016-12-19 15:27:47 +01:00
861a3f3a30 avoid shadowing warnings 2016-12-17 14:01:11 -08:00
ee52102943 small change from set to dict 2016-12-17 13:39:04 -08:00
26516f667e Fix multinomial bug and decrease precision of normal test (#325) 2016-12-17 21:40:13 +01:00
5586f48ad5 add cudnn 5.0.5 to supported versions (#321) 2016-12-17 07:57:20 -05:00
cc6e3c92d2 ensure that legacy linear has gradWeight and gradBias fields (#319) 2016-12-17 00:06:58 +01:00
a2ef5782d0 Revert "Bugfix of type in THCTensor macro." 2016-12-16 17:20:57 -05:00
0c1c0e21b8 Bugfix of type in THCTensor macro.
A fix for issue #632.
2016-12-16 15:37:06 -05:00
ffcc38cf05 Deterministic ordering of parameters and buffers. (#317)
Uses the assignment syntax to get deterministic ordering of parameters.
The ordering of parameters using the constructor syntax is
non-deterministic because kwargs use dict() in Python 3.5 and earlier.
2016-12-16 14:45:56 -05:00
cc24b68584 Merge commit 'f413ee087df1a4bbd8b5a9baba83d07ae0729ea0' 2016-12-16 05:29:16 -08:00
8a70067b92 Add support for stochastic functions in autograd (#294) 2016-12-16 13:14:37 +01:00
33b227c45b serialization bug fix (#314) 2016-12-16 12:05:36 +01:00
fb68be952d Bugfix for multinomial distribution
- Ensures the index of the first bin from the cdf is returned.
2016-12-15 16:01:37 -08:00
f413ee087d Add missing free in LookupTable (#400) 2016-12-15 22:17:37 +01:00
6495f5dd30 fix bounds issue in snprintf 2016-12-14 17:11:26 -08:00
8e09f0590b Make sure that C extension was compiled with cuDNN before using it 2016-12-15 00:47:55 +01:00
08d346df9c Print libraries used for building the extension 2016-12-15 00:47:55 +01:00
12cf96e358 Don't change requires_grad of parameters in train() and eval() 2016-12-15 00:47:55 +01:00
765a720d1c Add support for tds.Vec and tds.Hash in load_lua 2016-12-15 00:47:55 +01:00
cace62f94c Fix a bug in narrow docs 2016-12-15 00:47:55 +01:00
767c96850d Return False from torch.cuda.is_available() when no devices are visible 2016-12-15 00:47:55 +01:00
b73e78edbb Check nDimension in t() and t_() 2016-12-15 00:47:55 +01:00
7914cc119d Fix bmm for Variables 2016-12-15 00:47:55 +01:00
2b13eb2a6c Fix naming of setup.py env toggles 2016-12-15 00:47:55 +01:00
8768e64e97 Allow returning changed gradients from the hooks 2016-12-15 00:47:55 +01:00
9212b9ca09 fix wrong export directive for THCCachingHostAllocator (#633)
fix wrong export directive for THCCachingHostAllocator
2016-12-15 00:36:03 +01:00
0d0f197682 Add note on Huber loss (#310) 2016-12-14 21:39:42 +01:00
281e34d1b7 fixes for changes in THNN API 2016-12-13 18:10:07 -08:00
287ba38905 Merge commit 'ed9dbff4e0295dbeb2e8de908cb8c1109c278a8a' 2016-12-13 17:23:56 -08:00
ed9dbff4e0 removing ifdef 2016-12-13 17:22:52 -08:00
6ba4e48521 Merge commit '3adcb2c157ed7df5aaff9b59d4526aa24ec770db' 2016-12-13 16:49:38 -08:00
b7269f2295 Merge commit '220183ed783101f19d88cb8fb3052fd4abc7234f' 2016-12-13 16:49:15 -08:00
5ab317d4a6 Merge commit '258c9ffb2c2d23a06b153aa9161a88ad930cfbbc' 2016-12-13 16:48:45 -08:00
431bcf7afa Merge commit '56245426ebcf239363867905ca2a4cea676dd45d' 2016-12-13 16:48:16 -08:00
41909e8c5b adding a couple more imports 2016-12-13 16:47:00 -08:00
56245426eb small fixes to allocator 2016-12-13 16:45:01 -08:00
3adcb2c157 Check that batch size matches the target size in ClassNLLCriterion (#399) 2016-12-14 00:25:05 +01:00
6d12185cc9 Fixed compilation on Raspberry PI without NEON 2016-12-13 17:30:54 -05:00
258c9ffb2c Implement bernoulli with element-wise probabilities for all types 2016-12-13 11:10:28 -08:00
dede431dd9 More state_dict fixes (#305)
In #304 I forgot even more...

I did a repo search and this time it should be all.
2016-12-13 13:59:06 +01:00
6312d29d80 Another documentation change, missed one in #303 (#304)
Apparently load_parameter_dict was also renamed to load_state_dict
2016-12-13 12:47:40 +01:00
ab5f26545b Correct documentation to be in line with #237 (#303)
.parameter_dict was renamed to .state_dict in #237

This documentation change reflects that.
2016-12-13 12:32:42 +01:00
6567c1342d small doc fixes 2016-12-12 23:51:54 +01:00
3d6c2e023c TensorInfo related code documentation 2016-12-12 10:06:13 -08:00
89d930335b fix tests for GPU-less setup (#298) 2016-12-12 10:56:57 +01:00
04393cd47d fix gcc-6 build on os x (#297) 2016-12-12 00:01:15 +01:00
28f0cf6cee Add docstring support to cwrap (#295) 2016-12-11 23:25:14 +01:00
1af9a9637f Refactor copy and release GIL during copy (#286) 2016-12-11 21:54:58 +01:00
1031d671fb legacy fixes (#287) 2016-12-11 20:13:48 +01:00
ee91b22317 Merge pull request #394 from gchanan/volumShapeChecks
Improve Volumetric shape checking.
2016-12-07 02:07:22 +01:00
220183ed78 Improve gradOutput checks for VolumetricReplicationPadding. 2016-12-06 09:09:38 -08:00
504d2ca171 Improve gradOutput check for VolumetricMaxUnpooling. 2016-12-06 09:09:27 -08:00
d535aa94a1 Improve shape checks for VolumetricDilatedConvolution, VolumetricConvolutionMM,
VolumetricFullConvolution.

Also add some additional checks for SpatialFullConvolution.
2016-12-06 09:06:07 -08:00
0376a1909b Improve shape checks for VolumetricAveragePooling, VolumetricDilatedMaxPooling,
VolumetricMaxUnpooling, VolumetricReplicationPadding.
2016-12-06 09:06:03 -08:00
f757077780 Improve shape checks for VolumetricMaxPooling and VolumetricDilatedMaxPooling. 2016-12-06 09:05:59 -08:00
9f7114a4a1 Improve shape checks for VolumetricDilatedConvolution, VolumetricConvolution,
VolumetricFullConvolution.

Also add some additional checks for SpatialFullConvolution.
2016-12-05 12:22:04 -08:00
7d03da0890 Improve shape checks for VolumetricAveragePooling,
VolumetricMaxUnpooling, VolumetricReplicationPadding.
2016-12-05 09:31:00 -08:00
4e0cecae7f Improve shape checks for VolumetricMaxPooling and VolumetricDilatedMaxPooling. 2016-12-05 08:20:19 -08:00
72dbb76a15 fix half type numerics issue in SpatialFractionalMaxPooling 2016-12-02 16:33:27 -08:00
cceb926af3 Remove extra size check in SpatialAveragePooling. 2016-12-02 15:36:11 -08:00
0d7d29fa57 Enable caching allocator for CUDA pinned memory (#275)
Also add binding for CUDA "sleep" kernel
2016-12-02 01:33:56 -05:00
be3276fcdd Account for batch_size in DataLoader.__len__() (#277) 2016-12-02 01:21:36 -05:00
09c94a170c Merge commit 'f2a18004a77f146bb5b431715402f4afd3cacccd' 2016-12-01 22:16:58 -08:00
f2a18004a7 Process outstanding CUDA events in recordEvent
Without this, the cuda_events could continuously grow from calls to
cudaMemcpyAsync, but would never be processed if there were no new
pinned memory allocations.

For example:

 t1 = cutorch.createCudaHostTensor(10)
 t2 = torch.CudaTensor(10)
 while true do t2:copyAsync(t1) end
2016-12-01 19:09:47 -08:00
1a3ff1bd28 Remove unnecessary shape checks in Spatial Pooling modules.
Checks comparing input image sizes to kernel sizes are superseded
by output size checks.
2016-12-01 15:49:53 -08:00
a5d3c779c7 Add gradOutput shape checks in temporal modules. 2016-12-01 15:49:48 -08:00
9d32e60dc2 Fix spacing in SpatialDilatedMaxPooling. 2016-12-01 15:49:41 -08:00
f6913f56ea Remove unnecessary shape checks in Spatial Pooling modules.
Checks comparing input image sizes to kernel sizes are superseded
by output size checks.
2016-12-01 15:38:51 -08:00
801fe8408f Add gradOutput shape checks in Temporal modules. 2016-12-01 15:37:59 -08:00
cf4a979836 Improve shape checking for Temporal Convolution. 2016-12-01 15:37:49 -08:00
91f2946310 Import most common packages by default 2016-12-01 23:14:41 +01:00
2bd7a3c31d Don't raise an error when retrieval of container's source code fails 2016-12-01 23:14:41 +01:00
a681f6759b Raise correct error types when indexing tensors 2016-12-01 23:14:41 +01:00
cb849524f3 Improve cuDNN detection at build time 2016-12-01 23:14:41 +01:00
1f5951693a Change torch.randperm to return Long tensors 2016-12-01 23:14:41 +01:00
87748ffd4c Add .type() for torch.nn modules 2016-12-01 23:14:41 +01:00
0580f5a928 Add __len__ for tensors 2016-12-01 23:14:41 +01:00
88d9fdec2e Add torch.cuda.set_device 2016-12-01 23:14:41 +01:00
506a40ce44 Remove optim submodule attributes from torch.optim package 2016-12-01 23:14:41 +01:00
bf0e185bd6 Merge commit 'bb1019d1ec1503718b97d17366902f96f349f472' 2016-12-01 13:47:20 -08:00
5b3ccec10d Merge commit 'c2d32030a25e352eb2e2af26931163c0f4c96b36' 2016-12-01 13:46:35 -08:00
eb07581502 Merge commit 'bec6ab47b6782f60925e306b69e0f556274fb28e' 2016-12-01 13:46:03 -08:00
934a2b6878 Merge commit 'b27d4de850b5f43829bd4980f5e7f3b4b32ab7cf' 2016-12-01 13:45:05 -08:00
bec6ab47b6 Add caching allocator for pinned (host) memory
Adds a caching allocator for CUDA pinned (page-locked) memory. This
avoid synchronization due to cudaFreeHost or cudaHostUnregister at the
expense of potentially higher host memory usage.

Correctness is preserved by recording CUDA events after each
cudaMemcpyAsync involving the pinned memory. The pinned memory
allocations are not reused until all events associated with it have
completed.
2016-12-01 13:35:12 -08:00
49480f1548 Adds a CUDA "sleep" kernel
Adds a CUDA "sleep" kernel which spins for the given number of
iterations. This is useful for testing correct synchronization with
streams.
2016-12-01 12:45:07 -08:00
18a3c62d9b Allow NoneType for parameters in Module.load_state_dict 2016-12-01 20:12:15 +01:00
6322cf3234 Allow device=None in Tensor constructor"
Setting device=None is the same as not specifying the device (use the
current active device).
2016-12-01 20:09:19 +01:00
4e2b154342 update install command from source 2016-12-01 10:55:04 +01:00
bb1019d1ec Add newContiguous calls that have been removed from lua. 2016-11-30 13:58:22 -08:00
c2d32030a2 Move make contiguous code from lua to C.
Exceptions are:
1) SparseLinear
requires additional parameters to be passed in (e.g. nbatches),
so it's not clear it's worth moving to C since it won't really simplify the binding
code logic.

2) BatchNormalization
requires "makeBatch", which isn't a trivial translation to C.

3) LookupTable
requires "view" in C, which is already a TODO

4) SpatialUpSamplingBilinear
requires "view" in C, which is already TODO
2016-11-30 13:45:16 -08:00
162170fd7b Add optional weight decay to optim.SGD (#269) 2016-11-29 20:35:40 -05:00
ea728e7c5e Add DataParallel container (#268)
Adds a container version of the `data_parallel` function. This is a
drop-in replacement for the DataParallel class in the ImageNet example.
2016-11-29 16:36:01 -05:00
aea6ba4bcd Support pinned memory in the DataLoader (#265)
DataLoader now supports the constructor argument 'pin_memory'. When set
to true, tensors in the sample are copied to pinned memory. This happens
in a background thread when num_workers > 1.
2016-11-29 12:35:03 -05:00
ab357c14fc Merge pull request #1051 from gchanan/temporalShapeCheck
Improve error messages/shape checks for temporal modules.
2016-11-28 13:51:16 -06:00
606aa43da0 Merge pull request #383 from gchanan/TemporalShapeCheck
Improve error messages/shape check in TemporalMaxPooling.
2016-11-28 13:50:59 -06:00
8bfa802665 Improve error messages/shape check in TemporalMaxPooling. 2016-11-28 11:46:26 -08:00
ff5b73c0b3 Improve error messages/shape checks for temporal modules. 2016-11-28 11:19:00 -08:00
86c95014a4 use local modified select_compute_arch.cmake for msvc 2016-11-28 14:02:21 +08:00
288c950c5e use local modified select_compute_arch.cmake for msvc 2016-11-28 13:23:24 +08:00
b27d4de850 changes to compile with msvc 2016-11-28 10:27:36 +08:00
61063ebade Merge commit 'a7f24ccb7635447b133011d39e36279be140149e' 2016-11-26 09:13:12 -08:00
3e70e26278 Merge commit '08a1bc71c0712a4151de83d1487a55b218ae1a15' 2016-11-26 09:12:53 -08:00
66e7e42800 Merge commit '379860e457dbb72c0f18e0366e5b199452b302f5' 2016-11-26 09:12:24 -08:00
0fecec14b8 fixing bug in indexing when given float indices 2016-11-26 11:50:56 -05:00
a7f24ccb76 Fix shapeCheck in Spatial Pooling modules 2016-11-26 17:41:59 +01:00
08a1bc71c0 Fix shapeCheck in Spatial Pooling modules 2016-11-26 15:00:32 +01:00
04e896a4b4 adding coverage support for tests 2016-11-26 00:26:30 -05:00
5dcfb80b36 lua serializer registers CUDA classes only when CUDA is available 2016-11-26 00:26:30 -05:00
9da60c39ce Fix batch_first in AutogradRNN (#255) 2016-11-25 23:55:45 -05:00
379860e457 Lazily initialize CUDA devices
Previously, cutorch would initialize every CUDA device and enable P2P
access between all pairs. This slows down start-up, especially with 8
devices. Now, THCudaInit does not initialize any devices and P2P access
is enabled lazily. Setting the random number generator seed also does
not initialize the device until random numbers are actually used.
2016-11-25 15:22:16 -08:00
bcfa2d6c79 Add .t7 file reader 2016-11-25 00:41:55 +01:00
8b492bbc47 Return accreal as correct python types 2016-11-25 00:40:36 +01:00
a49b7b0f58 Fix bug when Variable constructor didn't set the error properly 2016-11-25 00:40:36 +01:00
c781ac414a Unify signatures of max, mean, etc. between variables and tensors 2016-11-25 00:40:36 +01:00
656dca6edb Implement in-place operators for variables 2016-11-25 00:40:36 +01:00
830adfd151 Allow passing torch.Size to expand 2016-11-25 00:40:36 +01:00
6f7c8e4ef8 Fix bug when passing 0 as dim to max, min, mode, median and kthvalue 2016-11-25 00:40:36 +01:00
2ba6678766 Revert "Lazily initialize CUDA devices" 2016-11-23 19:40:03 -05:00
71a47d1bed Merge pull request #610 from colesbury/lazy
Lazily initialize CUDA devices
2016-11-23 17:48:00 -05:00
51bf6321ea Implemented cudaMemGetInfo for caching allocator (#600)
* Implemented cudaMemGetInfo for caching allocator
2016-11-23 17:38:57 -05:00
aa8916e7c6 Don't unpack single element tuples returned by functions 2016-11-23 18:48:41 +01:00
2e24da2a0b Change parameter_dict to state_dict in torch.nn 2016-11-23 18:48:41 +01:00
c94ccafb61 Print error message when constructing a tensor from a numpy array with negative strides 2016-11-23 18:48:41 +01:00
80a827d3da Fix data_parallel bugs 2016-11-23 18:48:41 +01:00
6909c8da48 Use TH_INDEX_BASE for range asserts in MultiLabelMarginCriterion 2016-11-23 13:26:16 +01:00
c07105a796 fix cwrap for changed signatures 2016-11-22 14:27:41 -08:00
c40c061a9f Lazily initialize CUDA devices
Previously, cutorch would initialize every CUDA device and enable P2P
access between all pairs. This slows down start-up, especially with 8
devices. Now, THCudaInit does not initialize any devices and P2P access
is enabled lazily. Setting the random number generator seed also does
not initialize the device until random numbers are actually used.
2016-11-22 13:43:25 -08:00
a9bd27ce5c Merge commit '709255d9952783eed6c8f84e504693f9b436f852' 2016-11-22 13:26:09 -08:00
2e36c4ea2d Merge commit 'f3cb636294fbd0e15dd4b3bfdca16e73d1dca38b' 2016-11-22 13:25:53 -08:00
4e45385a8d Merge commit 'b27f576f29189ca78dd670cbd177bfa29b695c50' 2016-11-22 13:25:29 -08:00
cf5e925c10 Merge commit 'f6b94dd830c06692cd78addd41868a7a12c48755' 2016-11-22 13:25:00 -08:00
709255d995 added shape checks for SpatialAveragePooling 2016-11-22 13:23:16 -08:00
f3cb636294 refactoring and adding additional shape checks for SpatialAveragePooling 2016-11-22 13:08:58 -08:00
e3f440b1d0 Make torch.backends.cudnn work on OSX 2016-11-22 19:06:08 +01:00
f6b94dd830 Add some documentation for APPLY and DIM_APPLY macros 2016-11-21 14:02:33 -08:00
3911a1d395 Fix memory leak in LogSoftMax 2016-11-21 21:32:10 +01:00
ebd3648fd6 Call newContiguous rather than arg checking isContiguous. 2016-11-21 21:32:10 +01:00
f698f09cb7 Add contiguous checking / make tensors contiguous for
SpatialUpSamplingBilinear, PReLU, SpatialSubSampling, TemporalConvolution.
2016-11-21 21:32:10 +01:00
86aa5dae05 Move VolumetricConvolution contiguous code from lua to C. 2016-11-21 21:32:10 +01:00
179c82ffb4 Autograd functions no longer store references to saved_variables
Only references to their data and version counters are stored.
Also, it is now possible to have None arguments in save_for_backward
and return too many values from backward (as long as the excessive
results are None).
2016-11-21 19:39:55 +01:00
233017f01f Add torch.multinomial for CUDA 2016-11-21 19:39:55 +01:00
597bbfeacd SpatialConvolutionLocal uses baddbmm 2016-11-21 09:10:26 -08:00
99a169c17e Fix memory leak in LogSoftMax 2016-11-19 23:44:31 +01:00
0613ac90cd string.split and string.join removed for .split and .join 2016-11-18 16:23:34 -08:00
78871d829a Call PyObject_GC_UnTrack from tp_dealloc handler (#231)
Without the PyObject_GC_UnTrack call, the tp_dealloc handler could get
called twice if a referred to object triggers a garbage collection from
its destructor.

See http://bugs.python.org/issue28737
2016-11-18 14:06:35 -05:00
d40a7bf9eb Fix Scatter.backward() (#232) 2016-11-18 13:58:09 -05:00
b27f576f29 guard random functions for half 2016-11-18 09:32:32 -08:00
073dfd8b88 bump version 2016-11-18 12:26:12 -05:00
509dd57c2e tensor docs 2016-11-18 04:00:27 -05:00
7a837b7a14 fixing nn docs to be categorized, and optim docs 2016-11-18 03:18:48 -05:00
dee864116a optim docs 2016-11-17 21:09:17 -05:00
e51d0bef97 Add cuDNN bindings for 2D transposed convolution 2016-11-17 14:34:40 -08:00
2fd78112ab Add half copy/conversions 2016-11-17 14:34:33 -08:00
5c14bd2888 Merge pull request #605 from gchanan/halfAddrAddmv
Add half support for addmv and addr.
2016-11-17 14:33:31 -08:00
84b4665e02 Add half support for addmv and addr. 2016-11-17 14:27:56 -08:00
26d626a47c adding docs for loss functions, container, module and fix typos 2016-11-17 15:11:27 -05:00
6ff6299c65 fix memory leak in (equal) 2016-11-16 15:43:57 -08:00
071e68d99d fixing output size w / h order 2016-11-16 15:32:18 -08:00
78c1094d93 Don't override __call__ in modules 2016-11-16 15:32:18 -08:00
56fc639c9f Fix no bias mode of autogenerated THNN function 2016-11-16 15:32:18 -08:00
51084a9054 Merge pull request #603 from killeent/remainder
Implement fmod, remainder, equal in Cutorch
2016-11-16 15:20:57 -08:00
f8ae5c93e9 enables random functions for float and half types on cuda (#223) 2016-11-16 15:14:26 -08:00
ad286c0692 add support for equal in cutorch 2016-11-16 14:41:59 -08:00
a483b3903d Merge pull request #377 from gchanan/checkContiguous
Add contiguous checks / auto contiguous
2016-11-16 10:35:11 -08:00
6564d39777 Call newContiguous for tensors that are required to be contiguous.
Also add tests to verify that non-contiguous tensors are handled correctly.
2016-11-16 09:50:11 -08:00
8f1b7230fe add support for fmod in cutorch 2016-11-16 08:35:17 -08:00
c0b7608965 add support for remainder in cutorch 2016-11-16 08:12:44 -08:00
56dd4132c4 add MACOSX_DEPLOYMENT_TARGET to instructions 2016-11-16 10:45:56 -05:00
91494cb496 Call newContiguous rather than arg checking isContiguous. 2016-11-15 16:16:08 -08:00
9057eade95 Handle contiguousness and improve shape checks
in SpatialAdaptiveMaxPooling, SpatialUpSamplingNearest, and TemporalConvolution.
2016-11-15 14:17:45 -08:00
a28317b263 SpatialSubSampling contiguous check. 2016-11-15 14:16:48 -08:00
25c3603266 VolumetricConvolution check contiguous. 2016-11-15 14:15:55 -08:00
ae6f2dd11c Adapt nn code to changes in THNN and THCUNN 2016-11-15 23:02:14 +01:00
3aaa1771d5 [cutorch mag2gen] more cleanup 2016-11-15 13:31:57 -08:00
2034396a3c [cutorch mag2gen] some cleanup 2016-11-15 13:31:57 -08:00
0cad668065 [cutorch mag2gen] move qr to generic 2016-11-15 13:31:57 -08:00
f644a11b82 [cutorch mag2gen] move potr* to generic 2016-11-15 13:31:32 -08:00
d7e3b2ef29 [cutorch mag2gen] move inverse to generic 2016-11-15 13:31:32 -08:00
fc5ec87478 [cutorch mag2gen] move svd to generic 2016-11-15 13:31:32 -08:00
ed4023127b [cutorch mag2gen] move eig to generic 2016-11-15 13:31:32 -08:00
2bd4e5f5f6 [cutorch mag2gen] move symeig to generic 2016-11-15 13:31:32 -08:00
d2dcbc26f8 [cutorch mag2gen] move gels to generic 2016-11-15 13:31:32 -08:00
2f05eefe9a [cutorch mag2gen] code refactor to support generics; move gesv to generic 2016-11-15 13:31:32 -08:00
7d1afa78b9 [cutorch mag2gen] generic MAGMA memory allocator function 2016-11-15 13:30:49 -08:00
dac9b020e0 [cutorch potr*] API parity for potr* functions in cutorch 2016-11-15 13:28:37 -08:00
eb77b79df9 Merge pull request #839 from Atcold/fix_ASIMD
Fix compilation for ASIMD, fix #766
2016-11-15 12:57:57 -08:00
456998f043 Merge commit 'aeed8a6ea4650d1092289a60e71d8d83875a0ba6' 2016-11-15 12:55:11 -08:00
c09f07edd9 Merge commit 'c82537462baa715b2c70726f7da8f734b2ad3a3f' 2016-11-15 12:53:29 -08:00
66320c498c Add contiguous checking / make tensors contiguous for
SpatialUpSamplingBilinear, PReLU, SpatialSubSampling, TemporalConvolution.
2016-11-15 12:50:08 -08:00
8cb8a0a146 Move VolumetricConvolution contiguous code from lua to C. 2016-11-15 12:23:09 -08:00
aeed8a6ea4 Remove duplicate entries and add optional marks in THCUNN.h 2016-11-15 21:22:14 +01:00
c82537462b [cutorch] remove syncing point from baddbmm
This change removes HtoD copies inside baddbmm. These copies
introduce a syncing point which causes slow downs in a multi
gpu training.

Test plan: Run unittests for baddbmm.
2016-11-15 11:55:36 -08:00
a8a02ff560 Fix compilation for ASIMD
On ARMv8, neon is inherit and instead listed as 'asimd' in /proc/cpuinfo
Replace assembly with C

Original authors:
 - @dusty-nv
    FindARM-patch.txt
    CMakeLists-patch.txt
 - @rtarquini
    NEON.c
2016-11-15 14:38:32 -05:00
72a9df19c8 Merge pull request #598 from killeent/rr2
move random functions to generic (attempt 2)
2016-11-14 11:44:41 -05:00
5b9b9634f9 [cutorch rand2gen] various fixes 2016-11-14 08:13:30 -08:00
c279a91c03 Merge commit '64c8a1377335799b322ca41d323dee13118be0ab' 2016-11-13 21:54:27 -08:00
ef6a764509 Merge commit '1cee5a359c2828800db0c41ebe0108bd5eef9501' 2016-11-13 15:23:11 -08:00
4db5afdf7e Merge commit 'f2daa616d105d700b63f05c4d544befb6e65a036' 2016-11-13 15:20:03 -08:00
7867187451 Merge commit '4f8e6ec42abd5b9b5491a49bdfe1a637e6675207' 2016-11-13 15:19:10 -08:00
4f8e6ec42a [PATCH] Improve potrf error message. (#189) 2016-11-13 15:17:05 -08:00
64c8a13773 Remove comment. 2016-11-11 15:46:44 -08:00
395ab4a287 Fix SpatialDilatedMaxPooling shape check.
In nn, indices are 3d, but they are 4d in cunn.
2016-11-11 15:43:54 -08:00
15dc862056 more improvements on error messages and shape checks. 2016-11-11 15:43:49 -08:00
f2daa616d1 Revert "Move random functions to generic" 2016-11-11 18:15:01 -05:00
64a50f5ad3 Merge pull request #589 from killeent/random-refactor
Move random functions to generic
2016-11-11 17:56:39 -05:00
1d0f86144c [cutorch rand2gen] fix illegal memory access in multinomial code, update unit tests 2016-11-11 13:23:03 -08:00
89e93bba9d [cutorch rand2gen] test fixes, add floor to geometric distribution transform 2016-11-11 13:23:02 -08:00
3290d4c7d6 [cutorch rand2gen] extend functions to use _double methods 2016-11-11 13:23:02 -08:00
ca22befc93 [cutorch rand2gen] move randn to generic 2016-11-11 13:23:02 -08:00
b08df5b9c0 [cutorch rand2gen] partial move of logNormal to generic, needs further debugging 2016-11-11 13:23:01 -08:00
ebd3c3291c [cutorch rand2gen] move geometric to generic 2016-11-11 13:23:01 -08:00
16728d2f26 [cutorch rand2gen] move multinomial to generic 2016-11-11 13:23:00 -08:00
34dab66f44 [cutorch rand2gen] move cauchy to generic 2016-11-11 13:22:59 -08:00
3a111c7499 [cutorch rand2gen] move exponential to generic 2016-11-11 13:22:59 -08:00
3600c94ec5 [cutorch rand2gen] move normal to generic 2016-11-11 13:22:58 -08:00
e2f8b00e00 [cutorch rand2gen] move bernoulli to generic 2016-11-11 13:22:58 -08:00
65ed1eba48 [cutorch rand2gen] move uniform, rand to generic 2016-11-11 13:22:57 -08:00
7fff7977fe [cutorch rand2gen] make sampleMultinomialWithRoutReplacement utility function generic 2016-11-11 13:22:57 -08:00
add5922aac [cutorch rand2gen] make sampleMultinomialWithReplacement utility function generic 2016-11-11 13:22:56 -08:00
a94b54a533 [cutorch rand2gen] make sampleMultinomialOnce utility function generic 2016-11-11 13:22:56 -08:00
bea82b9da6 [cutorch rand2gen] make renormRowsL1 utility function generic 2016-11-11 13:22:56 -08:00
2e7debe282 [cutorch rand2gen] introduce THCTensorRandom.cuh, move and templatize simple binary search function 2016-11-11 13:22:55 -08:00
1cee5a359c Fix checking and spacing of dilation parameters in SpatialDilatedConvolution
and SpatialDilatedMaxPooling.
2016-11-11 10:25:44 -08:00
b08862405e Remove extraneous shape check from SpatialDilatedConvolution. (#1029) 2016-11-11 12:53:48 -05:00
d57e1a6756 change to compile with msvc && export THCDescBuff for cunn 2016-11-11 13:56:13 +08:00
c9172c5bc9 change to work on windows && ptrdiff_t replacement 2016-11-11 13:33:36 +08:00
5d5e877a05 Fix implementation of logNormal 2016-11-10 18:35:45 -08:00
1e794c87ae adding bidirectional doc 2016-11-10 17:38:47 -08:00
d9cb1b545a Fix build on 32bit platform like JETSON TK1 2016-11-11 00:22:06 +00:00
23f611f14d Rename assertSameGPU_generic to assertSameGPU.
Also remove old assertSameGPU since there is no
longer both generic and non-generic support.
2016-11-10 15:40:41 -08:00
42b28d0d69 Merge pull request #370 from gchanan/sizeCheckErrorMessages
Improving error messages in nn.
2016-11-10 18:35:22 -05:00
d0cf5f7b65 Improving error messages in nn.
Differences from nn equivalent:
1) No changes to VolumetricConvolutionMM, which doesn't exist in cunn.
2) No changes to HardShrink, which doesn't  exist in cunn.
3) LookupTable doesn't verify that all inputs are within range.
2016-11-10 15:12:35 -08:00
4699c817e8 [cutorch rand2gen] fix illegal memory access in multinomial code, update unit tests 2016-11-10 15:10:12 -08:00
4f490c16e9 [cutorch rand2gen] test fixes, add floor to geometric distribution transform 2016-11-10 13:44:55 -08:00
bcdab7a632 Remove mul/div from THCHalfAutoNumerics as they've been moved to
THCNumerics.
2016-11-10 12:13:41 -08:00
7f51af7cbc adding dropout, bidirection, etc. to RNN (#214) 2016-11-10 13:25:14 -05:00
b4ae60cac8 Protect half operations with CUDA_HALF_TENSOR with generic modules. 2016-11-10 08:59:23 -08:00
4d03d96e8b fix: cunn can't find cutorch sources
https://github.com/torch/distro/issues/138#issuecomment-259133935
2016-11-10 14:44:46 +03:00
a39ffebc3a Add THCTensor_(sizeDesc) for better debug messages. 2016-11-09 12:09:18 -08:00
4bba6082ed [cutorch rand2gen] extend functions to use _double methods 2016-11-09 11:55:51 -08:00
b111632965 [cutorch rand2gen] move randn to generic 2016-11-09 11:09:30 -08:00
0a34b34bfe [cutorch rand2gen] partial move of logNormal to generic, needs further debugging 2016-11-09 10:55:54 -08:00
6b821ece22 fixing trainer tests (#213) 2016-11-08 21:50:17 -05:00
d3b2096bfd trainer fix for new optim API 2016-11-08 15:49:03 -08:00
9f1b12bf06 Merge pull request #1009 from gchanan/spatialNNGeneric
Support generic type Spatial modules
2016-11-08 18:17:58 -05:00
e64fca4b04 Allow wider test tolerances for:
1) Size of half numbers
2) Convolution weight/bias
3) BatchNormalization
2016-11-08 13:47:01 -08:00
b941e73f4f ArgCheck that dilation parameters are > 0 and ensure tests
pick dilation parameters > 0.
2016-11-08 13:46:52 -08:00
c57873d3cb Add generic support for LookupTable.
In some cases, does not do accumulation as accreal.
2016-11-08 13:46:48 -08:00
f3bc3275ac Add generic support for TemporalConvolution.
Has increased tolerance for backward weight/bias like other
Convolution modules.
2016-11-08 13:46:45 -08:00
8df26e6c5c Add generic support for VolumetricFullConvolution, VolumetricDilatedConvolution.
Has increased tolerance for backward weight/bias like other
Convolution modules.
2016-11-08 13:46:33 -08:00
5c8ecb8150 Fix one more compatibility bug in Python 3.3 2016-11-08 16:13:25 -05:00
3928f7740a Implement functional interface for Variables (torch.*) 2016-11-08 16:13:25 -05:00
1767f73e6b Add generic support for VolumetricConvolution.
Uses the higher tolerances for weight/bias that are used for
SpatialConvolution modules.
2016-11-08 13:07:35 -08:00
9e7d5e93ab Add generic support for VolumetricReplicationPadding. 2016-11-08 13:07:35 -08:00
70c6ee93a2 Add generic support for VolumetricAveragePooling. 2016-11-08 13:07:35 -08:00
5cbf8504ef Add generic support for VolumetricMaxPooling, VolumetricMaxUnpooling,
VolumetricDilatedMaxPooling.
2016-11-08 13:07:35 -08:00
9a393b023d Add generic support for TemporalMaxPooling. 2016-11-08 13:07:35 -08:00
30bf464f73 Rebase BatchNormalization. 2016-11-08 13:06:52 -08:00
9fb1f8934b Add support for L1Cost.
Changes thrust::reduce to trust::transform_reduce in order
to be able to do summation at accreal precision.
2016-11-08 13:01:06 -08:00
f3f02b23a0 Add generic support for SparseLinear.
We don't support SparseLInear with fp16 because of lack of cusparseHcsrmm
(or equivalent Ex function) until CUDA 8.0.
2016-11-08 13:01:06 -08:00
7668cdd32c Add generic support for DistKLDivCriterion. 2016-11-08 13:01:06 -08:00
f9dafdcf09 Add generic support for ClassNLLCriterion. 2016-11-08 13:01:06 -08:00
d284a419c1 Add generic support for BCECriterion.
Test skips comparing vs lua version for half type, because hdot is
not currently implemented in cutorch.
2016-11-08 13:01:06 -08:00
b45844e3d9 Add generic support for L1SmoothCriterion. 2016-11-08 13:01:06 -08:00
6caa7e0fff Add generic support for MultiLabelMarginCriterion. 2016-11-08 13:01:06 -08:00
1669fffb8d Add generic support for MultiMarginCriterion.
Accumulation is done at accreal precision and changes target tensor
indexing to THCIndexTensor.
2016-11-08 13:01:06 -08:00
18aa86eebd Add generic support for MSECriterion. 2016-11-08 13:01:06 -08:00
075e49d3f4 Add generic support for SoftMarginCriterion. 2016-11-08 13:01:06 -08:00
a6695b8365 Add generic support for MarginCriterion. 2016-11-08 13:01:06 -08:00
06ee48b391 Add generic support for AbsCriterion. 2016-11-08 13:01:06 -08:00
fcaeffbbd4 Fix spacing in SpatialDilatedMaxPooling. 2016-11-08 13:01:06 -08:00
6146a9a641 Generic support for SpatialFullConvolution and SpatialDilatedConvolution.
Uses matrix multiple for matrix vector multiply for half (no matrix vector
implementation exists).
2016-11-08 13:01:06 -08:00
83de8e40d5 Add generic support for SpatialFractionalMaxPooling. 2016-11-08 13:01:06 -08:00
30590c46a3 Generic support for SpatialConvolutionMM.
Still need Hgemv.
2016-11-08 13:01:06 -08:00
a3a5e56287 Add generic support for SpatialConvolutionLocal. 2016-11-08 13:01:06 -08:00
185c96d63a Add generic support for SpatialUpSamplingBilinear.
Math is done at accreal precision.  At real precision,
forward pass fails, but backward passes.  We do backward
pass at accreal precision for consistency.
2016-11-08 13:01:06 -08:00
be61ad6eb4 Add generic support for SpatialUpSamplingNearest.
Accumulates as AccType.
2016-11-08 13:01:06 -08:00
222dfd2259 Add generic support for SpatialReplicationPadding. 2016-11-08 13:01:06 -08:00
b06e1c7e1d Add generic support for SpatialReflectionPooling. 2016-11-08 13:01:06 -08:00
6876abba51 Add generic support for SpatialSubSampling.
Half types fail on backward, probably because we don't consistently
accumulate in accreal.  This is difficult because gradInput is
accumulated directly (either with atomicAdd or not) rather than
in another variable.
2016-11-08 13:01:06 -08:00
0798466a01 Generic support for SpatialCrossMapLRN
Removed the C-linkage for a couple of functions because they are now generic --
not sure if they were used by anyone outside.
2016-11-08 13:01:06 -08:00
2cda782273 Add generic support for SpatialAveragePooling. 2016-11-08 13:01:06 -08:00
7d1c9554b6 Add generic support for SpatialAdaptiveMaxPooling. 2016-11-08 13:01:06 -08:00
a29d16f1a8 Use THCIndexTensors more generally. 2016-11-08 13:01:06 -08:00
6d0c1c0f17 Use indices for SpatialAdaptiveMaxPooling indices. 2016-11-08 13:01:06 -08:00
5ed4b5c25b Add generic support for SpatialMaxUnpooling. 2016-11-08 13:01:05 -08:00
6fe89c5e44 Fix tests 2016-11-08 13:01:05 -08:00
fda8c37641 Add generic support for SpatialMaxPooling.
Also fix tests for SpatialDilatedMaxPooling.
2016-11-08 13:01:05 -08:00
6d5a0ff3a1 Get SpatialDilatedMaxPooling generic working with long tensors as index.
Does as much math as possible in accreal to try to suss out why CudaHalfTensor fails.
2016-11-08 13:01:05 -08:00
f8718dd355 Add generic support for SpatialDilatedMaxPooling. 2016-11-08 13:01:05 -08:00
85af686797 Add generic support for SpatialClassNLLCriterion. 2016-11-08 13:01:05 -08:00
0f6ec3f15f Remove fastExpIfAvail and benchmarking from functional tests.
Also fix broken IFNDEF and test whitespace.
2016-11-08 13:01:05 -08:00
44644c50ee Reorganize THCHalfAutoNumerics. 2016-11-08 13:01:05 -08:00
9749f7eacc Add generic support for RReLU. 2016-11-08 13:01:05 -08:00
d9a2bdb9df Add generic support for PReLU.
This is the first instance of functions that take a lua number but
are not reals in C.  So, instead of automatically converting lua
numbers in the half case, we parse the function definitions to
find the argument positions to convert.
2016-11-08 13:01:05 -08:00
57e678c94b fix logsoftmax 2016-11-08 13:01:05 -08:00
516f127cfd Add generic support for LogSoftMax. 2016-11-08 13:01:05 -08:00
e477add103 Add generic support for SoftMax.
Math is done at accreal precision (e.g. for half,
math is done at float precision).  Originally code
called __expf, which doesn't have a double equivalent;
we call exp instead of converting down.
2016-11-08 13:01:05 -08:00
ba3d577875 Add generic support for ELU. 2016-11-08 13:01:05 -08:00
917e4f47c4 Add generic support for SoftShrink. 2016-11-08 13:01:05 -08:00
0143dac247 Add generic support for Square.
Math is (arbitrarily?) done at double precision to
keep the intent of existing code.
2016-11-08 13:01:05 -08:00
d2390f3616 Add generic support for Sqrt. 2016-11-08 13:01:05 -08:00
949ea73402 Add generic support for LeakyReLU. 2016-11-08 13:01:05 -08:00
d1e2fe0efe Add generic support for Threshold. 2016-11-08 13:01:05 -08:00
584ada12bf Add generic support for LogSigmoid.
This has the same logic as Sigmoid; i.e.
math is done at double precision and then
stored back at desired precision.
2016-11-08 13:01:05 -08:00
3ead72f654 Add generic support for Sigmoid.
This maintains the existing logic of doing the math in
double precision and converting back to the intended
type (previously: just float).  We do the same for
half here, although perhaps we should do the math
at float in that case.

There is some question about what to do with conversions;
Sigmoid did math in double before converting back to float;
we keep this intent, although there is some question on whether
this was intentional and for half -- should we just go up to
float or up to double?
2016-11-08 13:01:05 -08:00
9ce96d3bd3 Add generic support for Abs. 2016-11-08 13:01:05 -08:00
5549c003d9 Add generic support for HardTanh. 2016-11-08 13:01:05 -08:00
46105bf90b Add generic support for Tanh. 2016-11-08 13:01:05 -08:00
73ce3b3702 Add generic support for SoftPlus.
Adds the ability to "genericize" cunn modules that can exist
simultaneously with non-generic modules (i.e. modules can
be genericized one at a time).  Allowing both generic and
non-generic modules simultaneously requires some extra code
that can be removed once every module is genericized.
Also genericizes SoftPlus in this way.
2016-11-08 13:01:05 -08:00
1c6225dc2f [cutorch rand2gen] move geometric to generic 2016-11-08 10:47:28 -08:00
44874542c8 fix printing in console (#208) 2016-11-08 13:42:26 -05:00
31f2846aff [cutorch rand2gen] move multinomial to generic 2016-11-08 09:34:19 -08:00
bc08011e72 Don't jongjmp out of omp loops in unpooling modules 2016-11-08 18:12:56 +01:00
7cccc216d0 ArgCheck that dilation parameters are > 0. 2016-11-08 18:12:56 +01:00
09493603f6 Change optimizer API 2016-11-08 18:12:56 +01:00
e799bd0ba9 Restrict in-place autograd ops to disjoint variables 2016-11-08 18:12:56 +01:00
40247b0382 Fix torch tests in Python 3.3 and 3.4 2016-11-08 18:12:56 +01:00
cd2e9c5119 [cutorch rand2gen] move cauchy to generic 2016-11-08 08:11:39 -08:00
0b6f7b12b1 [cutorch rand2gen] move exponential to generic 2016-11-08 08:04:26 -08:00
86e42ba291 Adding truncated tensor printing (#202)
* Adding truncated tensor printing
2016-11-08 10:05:30 -05:00
e0a18cafd3 Don't jongjmp out of omp loops in unpooling modules 2016-11-08 13:23:43 +01:00
8c2f77cab6 updated autogen docs 2016-11-07 17:19:00 -05:00
c1bd6ba1e1 Zero-initialize outputs for BLAS functions 2016-11-07 22:50:56 +01:00
df59b89fbb Add more optimizers 2016-11-07 22:50:56 +01:00
8fd9cc160c [cutorch rand2gen] move normal to generic 2016-11-07 13:26:59 -08:00
28e3f07b63 adding apply function 2016-11-07 16:17:49 -05:00
513d902df1 adding __repr__ for nn 2016-11-07 16:17:40 -05:00
fce14a9f51 [cutorch rand2gen] move bernoulli to generic 2016-11-07 13:16:10 -08:00
884107da01 [cutorch rand2gen] move uniform, rand to generic 2016-11-07 12:27:30 -08:00
caa79a354a [cutorch rand2gen] make sampleMultinomialWithRoutReplacement utility function generic 2016-11-07 10:33:03 -08:00
5bb873a2fe [cutorch rand2gen] make sampleMultinomialWithReplacement utility function generic 2016-11-07 10:28:19 -08:00
bc0442d7df [cutorch rand2gen] make sampleMultinomialOnce utility function generic 2016-11-07 10:15:13 -08:00
cfcd33552b [cutorch rand2gen] make renormRowsL1 utility function generic 2016-11-07 10:02:21 -08:00
5f6b9fd5ba [cutorch rand2gen] introduce THCTensorRandom.cuh, move and templatize simple binary search function 2016-11-07 08:31:19 -08:00
469dce4a2d skip test_scatter_gpu on no CUDA 2016-11-05 20:10:07 -04:00
55d32de331 Fix bugs in torch.legacy.nn and add regression tests 2016-11-05 22:48:52 +01:00
4491d2d3cb Expose ger, mv, mm, bmm as tensor methods 2016-11-05 22:48:52 +01:00
f9669b9b9a Merge pull request #583 from nicolasvasilache/master
THC UVA Allocator
2016-11-05 11:50:07 -04:00
246d5f37c7 THC UVA Allocator 2016-11-05 02:40:44 +00:00
293bfb03dd Merge commit '4def4e696b9079f587d0dba3e86423df5ea429b8' 2016-11-03 14:12:22 -07:00
4def4e696b fix result type 2016-11-03 14:10:49 -07:00
b6e58c030a enable dot for CUDA_HALF 2016-11-03 13:50:50 -07:00
bf00308ab2 Merge commit 'fd677945741b4ee353079911993ada3770e07f5c' 2016-11-03 13:31:12 -07:00
e3e786e35e Move source code checks from __getstate__ to torch.load (#200)
The __getstate__ and __setstate__ functions are called from copy.copy as
well as pickling. The source code inspection currently slows down the
data parallel code because it makes a copy of the object every
iteration.
2016-11-03 16:29:14 -04:00
fd67794574 Merge pull request #581 from torch/dotfix
making dot to have an accreal return type (consistent with CPU)
2016-11-03 12:51:27 -04:00
104b502919 ArgCheck that dilation parameters are > 0. 2016-11-03 09:02:22 -07:00
a18cd3ba92 ArgCheck that dilation parameters are > 0. 2016-11-03 09:01:43 -07:00
0676cad200 Merge commit 'e644f6ed2c1965b0de55cc9037d5c75245f63d54' 2016-11-03 08:36:42 -07:00
3b1d217310 Merge commit 'e32af0196e10ad11b3938ad73ec5ef49cac7c03e' 2016-11-03 08:36:04 -07:00
93bcb2e7ba making dot to have an accreal return type (consistent with CPU) 2016-11-02 16:40:54 -07:00
ebc70f7919 Look for libcudart in default CUDA installation paths (#195) 2016-11-02 19:36:10 -04:00
e32af0196e Merge pull request #828 from apaszke/lapack
Add more size checks and improve some LAPACK error messages
2016-11-02 18:53:45 -04:00
3e5c121c56 Adding !!inc to cwrap and splitting up TensorMethods.cwrap (#197)
* Adding !!inc to cwrap and splitting up TensorMethods.cwrap
2016-11-02 18:50:56 -04:00
e644f6ed2c Add supporting code for CUDA IPC
This adds three small pieces to help with sharing THCStorages across
processes:

 1. THCIpcAllocator: a THCDeviceAllocator to close shared memory handles in the
    child process.
 2. THCCachingAllocator_getBaseAllocation which returns the pointer and
    size of the underlying cudaMalloc allocation. This is necessary
    because cudaIpcGetMemHandle requires 'base' pointers
 3. Support for TH_STORAGE_VIEW in THCStorage_(free). This is useful in
    child processes to represent THCCachingAllocator allocations split
    from a larger cudaMalloc call.
2016-11-02 14:53:28 -07:00
551a7c72f3 Fix multiprocess serialization with "spawn" or "forksever" (#198) 2016-11-02 17:44:36 -04:00
05b121841e Add more size checks and improve some LAPACK error messages 2016-11-02 21:51:51 +01:00
c29aea89ee Merge pull request #827 from howard0su/freebsd
Fix compile error on freebsd
2016-11-02 16:10:50 -04:00
103e70ccc5 adding cuda types for tensor methods (#194) 2016-11-02 10:25:58 -04:00
ec7ecbe2dd Fix compile error on freebsd 2016-11-02 20:27:05 +08:00
7a06dbb87e Merge commit '1234e434fa2b6ddd440194c8bccd352593902c69' 2016-11-01 21:33:41 -07:00
1234e434fa TH_INDEX_BASE for nonzero 2016-11-01 21:08:52 -07:00
2d374f982e Changes for ccache nvcc support 2016-11-01 15:54:33 -04:00
4e73630a95 Fix criterion backward, that was modifying grad_output shape 2016-11-01 19:31:53 +01:00
e867baa5f9 Accept file paths in torch.save and torch.load 2016-11-01 19:31:53 +01:00
04b750cb52 Improve Parameter's __repr__ 2016-11-01 19:31:53 +01:00
97c7b12542 Fix Variable __setstate__ refcounting bugs 2016-11-01 19:31:53 +01:00
0dfec752a3 Merge commit 'f16f68e103dfc22921f6106ec7136ddc7a0ab087' 2016-11-01 10:38:13 -07:00
f16f68e103 CMake: Install generic/THCTensorMathScan.h 2016-11-01 16:07:07 +01:00
4b7f8f9b77 adding notes for compiling from source 2016-11-01 01:27:28 -04:00
9969d50833 fix for CPU-only builds 2016-11-01 01:19:37 -04:00
7355c63845 adding multiple types for dist 2016-10-31 21:26:19 -07:00
16cac6442a adding multiple types for cumsum, cumprod 2016-10-31 21:26:19 -07:00
5009ae5548 adding multiple types for pow, trace, diag, tril, triu 2016-10-31 19:26:08 -07:00
32647e285e implement torch.nonzero 2016-10-31 18:22:49 -07:00
6df334ea68 Improve potrf error message. (#189) 2016-10-31 18:48:29 -04:00
f8501042c1 Make _requires_grad Variable attribute writeable 2016-10-31 22:47:09 +01:00
be085b8f6c Allow marking non-leaf variables as non-requiring grad 2016-10-31 22:47:09 +01:00
ef557761dd Allow to not use all function outputs in autograd 2016-10-31 22:47:09 +01:00
15377ac391 Copy Module._buffers in nn.parallel.replicate (#180) 2016-10-31 12:12:29 -04:00
ad5fdef6ac Make every user-visible Tensor have a Storage (#179) 2016-10-31 12:12:22 -04:00
0cb5943be8 Fix NCCL reduce_scatter in Python 2.7 (#183) 2016-10-30 17:58:02 -04:00
fb593d5f28 Fix bugs in variable __setitem__ and improve __getitem__ 2016-10-30 00:16:06 +02:00
645c913e4f Print GPU id for CUDA tensors 2016-10-30 00:16:06 +02:00
b4f4cca875 Rename training and evaluation methods 2016-10-30 00:16:06 +02:00
6027513574 Add support for indexing with numpy types 2016-10-30 00:16:06 +02:00
849188fdab Fix multiprocessing 2016-10-29 14:23:23 -07:00
a9c14a5306 Remove unused code 2016-10-28 15:28:22 -07:00
2da36a14d1 Clean up cuDNN code and fix chooseBackwardFilterAlgorithm 2016-10-28 13:05:53 -07:00
2ee451f5f7 Build in Release mode 2016-10-28 12:51:19 -07:00
f2d7e94948 Use torch.Size for Tensor sizes and tuple for strides
See issue #20

The torch.Size class is a tuple subclass which distinguishes sizes from
other tuples so that torch.Tensor(size) is interpreted as size instead
of data.
2016-10-28 19:37:09 +02:00
2031dfc08a Add hdot support for CUDA 8.
If not compiled with CUDA 8+, an error is raised indicating that
CUDA 8.0+ is required.
2016-10-27 15:01:09 -07:00
34ede14877 Fix compile error due to THCStorage change 2016-10-27 14:27:10 -07:00
2af3098e5a Merge commit '42e835ebb81a3ecf8f76e15bb1866c1427f61d74' 2016-10-27 13:49:23 -07:00
2e44511b13 Merge commit 'bbe8627a3f0e6cbb8fd1952826f75df741e44b01' 2016-10-27 13:47:36 -07:00
7bc4aa7e72 Merge commit '2bd36604e298547cc66f175588c925271223b4e9' 2016-10-27 13:46:38 -07:00
e2458bce97 Add Parameter class to nn 2016-10-27 22:31:36 +02:00
ae9789fccc adding input / output / member sections to the docgen 2016-10-27 01:11:53 -04:00
45ef25ea27 fix rnn documentation typos and format 2016-10-27 01:11:53 -04:00
ad2d413c0b Add C++ bindings for cuDNN (#167)
The Python ctypes bindings overhead was high enough that it slowed down
multi-gpu training when using 4+ Maxwell GPUs.
2016-10-26 19:51:48 -04:00
30924ff1e0 Fix test_nonzero flakiness (#173) 2016-10-26 19:50:56 -04:00
383c48968f Add support for indexing with ellipsis (#172) 2016-10-26 19:50:44 -04:00
bbe8627a3f Use 'void' for no-arg functions 2016-10-26 12:44:34 -07:00
2bd36604e2 Fix no-arg function prototypes 2016-10-26 12:35:05 -07:00
9ed47ef531 fix bug in mmaping 2016-10-26 07:23:04 -07:00
139f98a872 pushing THCState back to the header 2016-10-25 18:23:53 -07:00
c825895190 Make KwargsPlugin output deterministic 2016-10-26 00:19:33 +02:00
42e835ebb8 Add sameGPU checks to BatchNormalization (#361) 2016-10-25 15:19:03 -04:00
a7d5fdf54e Add integer indexing for MultiLabelMarginCriterion. 2016-10-25 11:42:56 -07:00
3b4e41f6ec Add integer indexing for MultiMarginCriterion. 2016-10-25 10:19:53 -07:00
5505e1de7d Store the device in THCStorage 2016-10-25 07:21:54 -07:00
6d329e418b allocator updates 2016-10-25 07:07:52 -07:00
3a11afb57f some bugfixes for THC 2016-10-24 17:16:17 -07:00
df86e02c9e update nn docs 2016-10-24 17:20:00 -04:00
deebc1383e Show exponent when printing vectors 2016-10-24 22:30:11 +02:00
19f2f1a9d3 Buffer values when constructing a CUDA tensor from a sequence 2016-10-24 22:30:11 +02:00
4dc13ecdd8 Make tests deterministic 2016-10-24 22:30:11 +02:00
b4b6e356ef Fix clang warnings 2016-10-24 22:30:11 +02:00
9000f40e61 Add torch.from_numpy 2016-10-24 22:30:11 +02:00
f137c0c05a Improve error messages of stateless functions 2016-10-24 22:29:43 +02:00
b43a02a9aa Make random 0-based 2016-10-24 22:29:43 +02:00
30be715900 Add training and evaluation to torch.nn 2016-10-24 22:29:43 +02:00
71cf8e14cb Fixes in torch.legacy.nn 2016-10-24 22:29:43 +02:00
ffd4863b23 Don't build nccl on macOS 2016-10-24 22:29:43 +02:00
4c17098bb8 Fix platform detection in torch.cuda 2016-10-24 22:29:43 +02:00
bcfdd18599 Fix python2.7 compatibility and check cffi version in ffi utils 2016-10-24 22:29:43 +02:00
067662d280 making .numpy return writeable arrays (#164) 2016-10-24 16:23:28 -04:00
93d02e4686 Merge pull request #129 from adamlerer/cudnn_rnn
CuDNN + PyTorch RNN library
2016-10-24 15:00:02 -04:00
12de115305 Fix Lua->Python logic in legacy.optim 2016-10-24 20:04:23 +02:00
b5d13296c6 addressing comments 2016-10-23 21:11:22 -07:00
86288265ad Adding rnn cell library 2016-10-23 20:23:48 -07:00
a559d94a44 docs and such 2016-10-23 20:23:48 -07:00
1eb6870853 add nobias option to rnn 2016-10-23 20:23:48 -07:00
f88c3e9c12 fix some missing features in pytorch needed for RNNs 2016-10-23 20:23:48 -07:00
942ca477a6 Copying weights for CUDNN 2016-10-23 20:23:48 -07:00
b0e33fb473 cudnn + THNN match with parameters 2016-10-23 20:23:48 -07:00
d58b627b98 CUDNN RNN bindings 2016-10-23 20:23:48 -07:00
b85fc35f9a Fix for versions compiled without CUDA support (#155)
* Fix pytorch when compiling without CUDA support
* Skip print test with CUDA types if CUDA is not available
2016-10-23 13:03:10 +02:00
bcb466fb76 fix bug with numpy conversion and storageOffset > 0 (#154) 2016-10-22 11:56:18 -04:00
6db721b5dd Make DataLoader preserve the ordering of the dataset (#135) 2016-10-21 23:54:16 -04:00
140c65e52b fixing python setup.py clean 2016-10-21 23:20:02 -04:00
29e8d77ce0 Merge pull request #558 from gchanan/genericDeviceTensorUtils
Add generic type support for toDeviceTensor.
2016-10-19 18:19:13 -04:00
b66a4ea919 Add THNN_CHECK_DIM_SIZE_INDICES to avoid pointer conversion warnings. 2016-10-19 15:01:49 -07:00
d3d59e5024 Indices for nn. 2016-10-19 14:53:19 -07:00
5285da0418 Use index types for SpatialAdaptiveMaxPooling indices. 2016-10-19 14:53:10 -07:00
a76e69d709 Use index types for Max Pooling / Unpooling indices. 2016-10-19 14:52:58 -07:00
4d0d775d16 Add generic type support for toDeviceTensor. 2016-10-19 14:36:03 -07:00
98f67e90d5 Fix super call in Container.modules and Container.parameters (#142) 2016-10-19 13:21:03 -04:00
fee67c2e1a Allow parameters and child modules to be assigned by attribute (#136)
For example:
  self.linear = nn.Linear(10, 20)
  self.weight = torch.autograd.Variable(torch.Tensor(10, 20))
2016-10-18 23:34:20 +02:00
c295f26a00 Support async argument to Variable.cuda (#137) 2016-10-18 23:27:11 +02:00
8a09c45f28 Fix typo 2016-10-18 09:29:19 -07:00
79ead42ade Add CUDA Stream and Event API (#133) 2016-10-18 12:15:57 -04:00
94e52e1d17 Fix Variable.cat 2016-10-17 15:36:08 -07:00
3931beee81 Use THSetNumThreads instead of omp_set_num_threads
Set OMP num threads to one in the data loader.

Fixes #81
Fixes #82
2016-10-17 15:15:00 -04:00
d293c17d21 Merge commit '1a3920e5dc546803ec8ada369ff1b0d56cf24e76' 2016-10-17 10:29:41 -07:00
1a3920e5dc Expose OpenMP num threads through TH lib
Expose omp_set_num_threads and similar APIs through the TH lib. This
means a third-party libaries using TH don't need to be compiled with
OpenMP support just to control the number of TH OMP threads.
2016-10-17 10:09:10 -07:00
ffc3eb1a24 Exclude THNN Linear in favor of Python implementation 2016-10-17 09:53:20 -07:00
2f5d4a7318 gcc 5 + cuda < 8 workaround improved 2016-10-17 12:46:21 -04:00
70553f4253 gcc 5 + cuda < 8 workaround improved 2016-10-17 12:45:45 -04:00
8d39fb4094 Use new THC API for device allocator 2016-10-17 09:35:41 -07:00
7d10b2370f Merge commit 'ec7a2878013ec70a4d4a8bfb6f5e5503f87f9ea0' 2016-10-17 09:35:04 -07:00
31ec7650ac Merge commit '429f2d67652f4fcba0bbf65c7d3e109e136a9cdf' 2016-10-17 09:33:06 -07:00
c014920dc1 Merge commit 'b01c78580594c53e6afb02b3d2110577a4673308' 2016-10-17 09:32:01 -07:00
17e3d4e1ee Merge commit '38cb3d02270b9e558a891a9a2bef01a75d1bd9e1' 2016-10-17 09:31:38 -07:00
b01c785805 Fix cutorch.getStream()
state->numUserStreams does not include the NULL stream, which is stored
in res->streams[i]
2016-10-17 08:49:23 -07:00
0eea71f878 torch.cat for multiple cuda types 2016-10-17 01:56:33 -04:00
ec7a287801 Merge pull request #1006 from torch/errorsimprovements
more improvments on error messages and shape checks
2016-10-17 00:46:21 -04:00
4bc585a2fe more improvments on error messages and shape checks 2016-10-17 00:37:50 -04:00
429f2d6765 fixes to upsampling bilinear API 2016-10-17 00:30:25 -04:00
a0c7e3cf04 Merge pull request #550 from colesbury/streams
Add stream API that is not based on indices
2016-10-16 19:08:03 -04:00
9cd68129da fixing typo 2016-10-16 19:07:09 -04:00
aa6f6117b7 Ported Linear module to THNN 2016-10-16 17:49:47 +02:00
6fa9c87aa4 Merge pull request #548 from BTNC/win-msvc
make cunn compile with msvc && fix compilation failure for linux/mac os
2016-10-15 22:07:52 -04:00
ee14cf9438 Add support for pinned memory: (#127)
torch.Storage/Tensor.pin_memory()
 torch.Storage/Tensor.is_pinned()
2016-10-15 18:38:26 -04:00
0391bbb376 Fix view_as and view for empty tensors (#128) 2016-10-15 18:33:05 -04:00
28ada0c634 update md docs 2016-10-14 18:56:24 -04:00
2c233d23ad Add stream API that is not based on indices
This implements the THC code so that we can expose streams as objects
instead of simply referring to them by indices. This is not exposed in
Lua yet.
2016-10-14 15:25:38 -07:00
59c628803a fixing padding_idx option 2016-10-14 15:05:21 -07:00
6b830bc77f Merge pull request #78 from colesbury/nccl
Use NCCL in comm.py if available
2016-10-14 17:44:11 -04:00
f30081a313 Use NCCL bcast and reduce functions in comm 2016-10-14 14:16:32 -07:00
c15648c6b5 Add NCCL build scripts 2016-10-14 14:16:32 -07:00
a02917f502 Fix typo 2016-10-14 14:07:29 -07:00
70d8bd04c0 Make cuDNN descriptors extend object
Fixes weird double __del__ issue
2016-10-14 13:58:20 -07:00
ad2cee0cae Fix caching allocator when used from multiple Lua threads
Use a single, global THCCachingAllocator instance.

Previously, each Lua thread had its own THCCachingAllocator instance.
However, threads can share storages, which means a segment could be
allocated from on THCCachingAllocator and freed on another, which
breaks.

Fixes #539
2016-10-14 10:08:56 -07:00
756a7122ad torchdoc 2016-10-14 04:18:10 -04:00
3d6ebde756 qr and ormqr tests and bugfix 2016-10-14 03:10:16 -04:00
daa30aa992 fix typo 2016-10-13 23:11:32 -07:00
39459eb238 make cunn compile with msvc && fix compilation failure for linux/mac os 2016-10-14 12:54:00 +08:00
0325e2f646 Major autograd refactor
Improves autograd performance by more than 2x and fixes a couple
of bugs. All core functions have been moved to C.
2016-10-13 17:17:49 -07:00
93b8b5631f Improve CUDA tensor constructor speed 2016-10-13 17:16:39 -07:00
60ab1ce0c1 Stop using contextlib for device and device_of 2016-10-13 17:16:39 -07:00
2f186df52d removing CUDA_HALF_INSTRUCTIONS and enabling hgemm only for P100 2016-10-13 16:52:40 -07:00
452e07d432 Revert "change to work on windows && replace long with ptrdiff_t" 2016-10-13 18:09:34 -04:00
05d1404b9c Revert "changes to make cunn compile on windows with msvc" 2016-10-13 18:08:56 -04:00
2acee24332 Add keyword argument support to most tensor functions 2016-10-13 12:32:04 -04:00
e7639e55f8 change to work on windows && replace long with ptrdiff_t 2016-10-13 23:44:28 +08:00
f978eca477 change to work on windows && replace long with ptrdiff_t 2016-10-13 22:55:58 +08:00
eb3ac2b367 changes to make cunn compile on windows with msvc 2016-10-13 22:22:23 +08:00
968d386b36 Make atomicAdd functions static inline. 2016-10-12 15:18:30 -07:00
38cb3d0227 Fix build when NEON is supported 2016-10-12 12:51:22 +00:00
6f606dd5f9 updating nn docs 2016-10-11 14:41:25 -04:00
bab616cf11 Fix OOM error message in tensor constructor 2016-10-10 20:51:15 -07:00
966adc6291 Simplify torch.cat 2016-10-10 20:51:15 -07:00
518cb6ec7c Allow specifying output size in MaxUnpooling 2016-10-10 20:51:15 -07:00
34bcd4c237 Rename FullConv to ConvTranspose and allow specifying output size 2016-10-10 20:51:15 -07:00
a121127082 Merge remote-tracking branch 'upstream/master' into more-generic-functions 2016-10-10 10:09:43 -07:00
50326e94b1 try cudnn 5.1.5 and 5.1.3 in that order to load them up. This is needed because cudnn for cuda 7.5 ships with 5.1.3 and cudnn for cuda 8.0 ships with 5.1.5 2016-10-09 22:26:43 -04:00
160723b5b4 fix cudnn lib name 2016-10-09 21:19:50 -04:00
7991125293 Improve error messages 2016-10-08 20:37:40 -07:00
96f61bff30 Add LAPACK functions 2016-10-08 20:37:37 -07:00
a94488f584 replace long with ptrdiff_t for memory size/offset, element count 2016-10-08 21:39:16 +08:00
f2cf673d3a fix tensor printing when the tensor is a view into a giant storage 2016-10-07 17:53:37 -04:00
c4595a3dd6 [cutorch refactor] addcmul/addcdiv to generic 2016-10-07 13:09:05 -07:00
5db118e64b Update LogSoftMax to work in spatial domain 2016-10-07 16:08:39 -04:00
1620c56808 [cutorch refactor] cmin/cmax to generic 2016-10-07 11:50:28 -07:00
e88e0026b1 [cutorch refactor] make dist(...)'s op generic, add missing unit test 2016-10-07 11:50:28 -07:00
ace9b49e28 [cutorch refactor] move cross(...) to generic 2016-10-07 11:50:28 -07:00
da90751add [cutorch refactor] move lerp(...) to generic 2016-10-07 11:50:28 -07:00
8cc566f7b5 [cutorch refactor] move clamp(...) to generic 2016-10-07 11:50:28 -07:00
02ad199905 [cutorch refactor] make var(...) generic 2016-10-07 11:50:28 -07:00
c3e0811d86 [cutorch refactor] cleanup code in prep for review 2016-10-07 11:50:28 -07:00
499d1c5709 [cutorch refactor] fixes for norm, wrap/test 2016-10-07 11:50:28 -07:00
cf16ec45e1 [cutorch refactor] move stdall into generic, wrap test for std 2016-10-07 11:50:27 -07:00
daa15dcceb [cutorch refactor] move varall into generic 2016-10-07 11:50:27 -07:00
32556cbe5e [cutorch refactor] move normall to generic 2016-10-07 11:50:27 -07:00
74d9c674f5 Make _norm(...)'s ops generic 2016-10-07 11:50:27 -07:00
a4da558fa0 [cutorch refactor] move mean function into generic/ 2016-10-07 11:50:27 -07:00
dba6d1d57f Make _norm(...)'s ops generic 2016-10-07 11:50:27 -07:00
b01c4338c9 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
811d947da3 [cutorch refactor] move renorm function into generic 2016-10-07 11:50:27 -07:00
de7bf7efe6 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
5537df9927 [cutorch refactor] make _renorm(...)'s ops generic 2016-10-07 11:50:27 -07:00
81fea93741 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
df1065a2d8 Move _std dependencies into THCTensorMathReduce.cuh 2016-10-07 11:50:27 -07:00
c2e3bf2145 [cutorch refactor] move meanall function into generic/, update cwrap for lua mean 2016-10-07 11:49:33 -07:00
a4d849ef68 [cutorch refactor] move mean function into generic/ 2016-10-07 11:49:33 -07:00
957c9f3853 Move atomicAdd functions to THCAtomics.cuh in order to share
definitions with other projects, e.g. cunn.
2016-10-07 11:43:02 -07:00
3958b6b0e1 Merge pull request #338 from nitsky/spatial_logsoftmax
SpatialLogSoftMax
2016-10-07 10:36:40 -04:00
5d70feb573 bug fix for wrong usage of checkGPU && port to windows with msvc 2016-10-07 15:55:38 +08:00
a22af69335 Add versioning and shared storage handling to autograd (#105) 2016-10-06 17:12:58 -04:00
1213149a2f add bias option to linear; allow modules to return nested lists/tuples of tensors (#106)
* add bias option to linear; allow modules to return nested lists/tuples of tensors
2016-10-06 15:59:12 -04:00
398b6f75cd update nn.md 2016-10-05 14:56:41 -04:00
e46e05e7c5 fix container doc 2016-10-05 14:53:41 -04:00
166028836d Ignore graph parts not requiring gradient in engine 2016-10-05 08:46:34 -07:00
3cbe66ba8c Change requires_grad default to False 2016-10-05 08:46:34 -07:00
99de537a2e Remove CUDA sync points from losses and trainer 2016-10-05 08:46:31 -07:00
1d0afdf9f7 Make requires_grad read only (except for leaves) 2016-10-05 07:55:07 -07:00
4db6667923 Allow specifying per-parameter optimization parameters 2016-10-04 18:21:50 -07:00
80e16e44aa Check container source on load 2016-10-04 17:41:12 -07:00
58b134b793 Allow exporting optimizer state as a dict 2016-10-04 17:33:49 -07:00
6efefac2df Add parameter_dict and load_parameter_dict methods for modules 2016-10-04 14:47:56 -07:00
0c9670ddf0 Allow remapping storages at load time and serialize data in little endian order 2016-10-04 12:54:55 -07:00
53c65ddc6a Fix memory leak when constructing a tensor from numpy (#98) 2016-10-03 23:27:54 -04:00
33371c5164 ffi tests skip on cuda 2016-10-03 12:15:28 -07:00
64dd1419c5 Fix Variable indexing bugs (#96) 2016-10-03 14:49:21 -04:00
108068a417 python 2.7 fixes 2016-10-03 00:14:06 -07:00
6e8ed95ada ‘fix compilation error: 'orr' loop initial declarations are only allowed in C99 mode 2016-10-03 14:11:59 +08:00
39c9f9e9e8 replace long with ptrdiff_t for memory size/offset etc 2016-10-03 12:55:30 +08:00
b555588f5d Make THNN lazy init thread safe 2016-10-02 21:36:05 -07:00
47ef4bb0a0 Fix memory leak in torch.cat 2016-10-02 21:36:05 -07:00
b34654bf97 Merge commit 'ab0e86ae4b0a08b8d0a67f1494ff80e65a6932ad' 2016-10-02 20:58:29 -07:00
6068df3ab2 Merge commit '60a8a9e918e04fd5581d20e4e7527dd115c69cd8' 2016-10-02 20:56:33 -07:00
bb35999f51 Merge commit '25c51c49aa3bb9ac5f64560a46f1f2a905f4e3f7' 2016-10-02 20:55:38 -07:00
25c51c49aa adding stdc++ static linking on TH_BINARY_BUILD=1 always, because caching allocator uses c++ 2016-10-02 20:48:35 -07:00
833bedb46b cudnn relative check in binary builds 2016-10-02 11:45:46 -07:00
3d8eba7b42 updating readme with new info 2016-10-02 10:13:15 -07:00
ab0e86ae4b fix arm neon bug 2016-10-02 08:35:40 -07:00
94b35312d0 Compile fixes for picky compilers / stl versions (#518)
* Compile fixes for picky compilers/stl versions
2016-10-02 00:41:47 -04:00
f4ebc65a12 Add Module.modules() and Module.children() (#90)
modules(): returns an iterator over all modules in the network
 children(): returns an iterator over immediate children

Also fix __getitem__ in Sequential
2016-10-01 21:18:53 -04:00
2bc9da4f5e Support "device" keyword argument (#79)
Adds the optional "device" keyword argument to Tensor and Storage
constructors and .new methods.
2016-10-01 19:32:55 -04:00
e034f258e3 Fix ffi utils in Python 2.7 2016-10-01 15:37:05 -07:00
39adf6dbd2 Merge pull request #80 from colesbury/data
Fixes to trainer and data loading
2016-10-01 16:50:42 -04:00
112df5f664 Fixes to trainer and data loading
1. Wrap target in a Variable in trainer
2. Collate numbers into torch.Long/DoubleTensors
2016-10-01 13:21:16 -07:00
3564b77553 a couple of changes for win32 (#779)
* windows timer with milliseconds
2016-10-01 15:27:30 -04:00
c813e93d85 fixing python 3 compat 2016-09-30 16:44:00 -07:00
ff59385034 Add 'torch/lib/nccl/' from commit 'ca330b110ae76ace344182ab83a028911111cc36'
git-subtree-dir: torch/lib/nccl
git-subtree-mainline: ea4f812a123a99d3beda1fdf4a2197035981eccb
git-subtree-split: ca330b110ae76ace344182ab83a028911111cc36
2016-09-30 16:35:16 -07:00
ea4f812a12 Fix Container.parameters() 2016-09-30 16:31:36 -07:00
dbe540e49f Use the custom TH error handler in all threads by default 2016-09-30 14:59:50 -07:00
c1c0969834 Allow changing the default error handler for all threads
THSetErrorHandler still modifies per-thread pointers, but
THSetDefaultErrorHandler allows to set a handler that's
used by all threads that haven't specified any function.
2016-09-30 14:59:50 -07:00
b87f26ce26 windows high resolution timer with a few makefile changes (#776)
windows high resolution timer
2016-09-30 14:59:50 -07:00
67335e638c bug fix for read/writeLong in THMemoryFile 2016-09-30 14:59:50 -07:00
90916f34a7 fix cpuid ecx; change to compile with msvc 2016-09-30 14:59:50 -07:00
11b38a6895 Add more functions to autograd 2016-09-30 16:37:07 -04:00
a1f5fe6a8f Add multiprocess data loader + improvements to torch.utils.data 2016-09-30 16:23:43 -04:00
5cad164dee Merge pull request #73 from colesbury/THC
Update THC and THCUNN
2016-09-30 15:53:11 -04:00
7dd28b885d Allow changing the default error handler for all threads
THSetErrorHandler still modifies per-thread pointers, but
THSetDefaultErrorHandler allows to set a handler that's
used by all threads that haven't specified any function.
2016-09-30 12:37:58 -07:00
c20828478e Update Module.cpp for THC changes 2016-09-30 11:13:14 -07:00
3e1c88e3e0 Merge commit 'da1e3f084d237ba319a22987f95f70abb69d7745' 2016-09-30 11:07:46 -07:00
e98a4ea336 Merge commit '0b0a62420c52b6e4d4c80c36d067db4654d1ed8d' 2016-09-30 11:06:53 -07:00
e8a5f00866 Auto GPU for CUNN (#71) 2016-09-30 14:04:53 -04:00
d92b7da733 fix documentation to not use forward 2016-09-30 09:49:30 -07:00
7ff16baa7d Use breadth-first in ExecutionEngine (#72) 2016-09-29 23:57:37 -04:00
93e60715af Fix error message 2016-09-29 16:27:20 -07:00
14965cfce9 Run cuDNN operations on the correct device 2016-09-29 16:27:07 -07:00
da1e3f084d Fixes for https://github.com/torch/cutorch/pull/519 2016-09-29 16:19:41 -07:00
0b0a62420c Make some basic THC operations thread-safe
Switching the device, setting the stream, and switching BLAS handles is
now thread-safe. Some other operations, like reserveStreams, are still
not thread-safe.
2016-09-29 16:17:43 -07:00
c92c82aa1a Really fix utils tests... 2016-09-29 12:52:12 -07:00
4742c08c7c Improve error messages in autograd 2016-09-29 12:16:19 -07:00
9c6ced1c0a Disable ffi tests if cffi is not available 2016-09-29 12:16:19 -07:00
a33c9bd774 Improve argument matching in invalidArguments 2016-09-29 12:16:19 -07:00
c8a4734b97 Add RReLU to both nn packages 2016-09-29 11:33:34 -07:00
3f7ab95890 Finish implementation of prng related functions 2016-09-29 11:33:25 -07:00
2d8c2972ae Only allow leaf variables as module parameters 2016-09-29 11:31:26 -07:00
941cf4e63d Add ffi utils for user C extensions 2016-09-29 09:35:56 -07:00
57610a7471 Fix documentation for MaxUnpool2d (#68) 2016-09-29 10:02:34 -04:00
f5a6a3b0e9 Fix torch.nn.Module._apply with None types (#66) 2016-09-28 19:31:07 -04:00
bab7f89cdc Fix no_bias constructor for conv2d (#65) 2016-09-28 19:30:43 -04:00
cb5d4e836f Lazy load CUDA and THNN modules (#64) 2016-09-28 19:29:53 -04:00
3a5544f060 Add support for GenerateFloatTypes, for use with cunn. 2016-09-28 09:59:19 -07:00
412019dbe4 fixing CPU builds by making cuda imports optional 2016-09-28 11:56:18 -04:00
f9d9c92560 Fix type conversions in autograd 2016-09-27 15:45:52 -07:00
7f4ff0e615 Fix type conversions in nn 2016-09-27 15:45:49 -07:00
3eac7164f4 Add data parallel functions to nn 2016-09-27 15:45:45 -07:00
f9d25e8e72 Refactor nn (require specifying parameters explicitly) 2016-09-27 15:22:26 -07:00
52ed57352a Free GIL in C functions 2016-09-27 15:22:20 -07:00
1828e7c42f Add async CUDA copy 2016-09-27 15:12:48 -07:00
2c89ae4e8a Rename getDevice to get_device 2016-09-27 15:12:48 -07:00
779a460030 Add cuDNN support for convolutions (#36) 2016-09-27 17:55:04 -04:00
0312f939d6 Only set c++11 compiler flags on THCCachingAllocator.cpp 2016-09-27 13:13:59 -07:00
60a8a9e918 improving error messages in nn 2016-09-27 12:26:03 -04:00
89666fc4fe Fix SpatialLogSoftMax memory leak and code cleanup 2016-09-27 08:16:31 -07:00
44527ab5be fix c++11 flags thing 2016-09-27 09:26:21 -04:00
a0cf6658c5 windows high resolution timer with a few makefile changes (#776)
windows high resolution timer
2016-09-27 08:59:27 -04:00
5107f23126 fix ClassNLLCriterion targets in tests and legacy nn 2016-09-26 18:56:12 -07:00
4a5557203b Merge commit 'c020a8502bd943aa37f897efe79a01fd61249ab4' 2016-09-26 17:54:05 -07:00
c020a8502b making ClassNLLCriterion targets consistent between cpu and cuda 2016-09-26 17:48:17 -07:00
44481354fc Add back support for child=None in Container constructor (#55)
It's often useful to have optional child modules, such as the
downsampling operation in ResNets. Add a test for this case:

  nn.Container(
    child=None,
  )
2016-09-26 17:18:02 -04:00
974fb1b09a Merge pull request #57 from colesbury/THC
Update THC and use CUDA caching allocator
2016-09-26 16:29:02 -04:00
4e9f0a8255 Use CUDA caching allocator 2016-09-26 13:12:39 -07:00
fa1f286cae Merge commit '85bd287b7ba481312fa58d7ffb32cba901c58829' 2016-09-26 13:08:32 -07:00
85bd287b7b Add THC_CACHING_ALLOCATOR=1 to README.md 2016-09-26 13:02:48 -07:00
0eff3897e3 Update SpatialLogSoftMax kernel to use cuda dimensions 2016-09-26 09:39:56 -07:00
e26e35a9ee bug fix for read/writeLong in THMemoryFile 2016-09-26 10:45:10 +08:00
980300b381 Combine autograd.Leaf and autograd.Variable (#52)
Prior to this change, there was a circular reference between Leaf and
Variable. This means that the objects (and referenced Tensors) are not
collected as soon as they go out of scope, which lead to higher memory
usage and out-of-memory errors.
2016-09-25 20:21:14 -04:00
1cf87e8a0b OSX + Python 2 build fixes 2016-09-25 19:26:13 -04:00
817d860af5 Add CUDA caching allocator
The allocator can be enabled by setting the environment variable
THC_CACHING_ALLOCATOR=1
2016-09-25 12:57:50 -07:00
0be5031a93 Pretty print type mismatches in error messages 2016-09-25 12:26:00 -07:00
1ed488da4f Make custom precision of CUDA tests work in inplace mode as well 2016-09-25 12:26:00 -07:00
ddf1598ef8 Add a method for catching exceptions thrown in ctypes 2016-09-25 12:25:54 -07:00
4a8a185aa4 Preserve storage view sharing in torch.save and torch.load 2016-09-25 12:24:10 -07:00
4cdeae3283 Return only unique variables from parameters() 2016-09-25 12:23:43 -07:00
5030d76acf Reduce precision of CUDA blas tests 2016-09-23 21:10:28 -07:00
c51e2c8b8c Rename CELoss to CrossEntropyLoss 2016-09-23 18:06:44 -07:00
eec0420eb3 Initialize nn modules' parameters with a default tensor type 2016-09-23 18:06:26 -07:00
e66ea56bb3 Improve THNN tensor type mismatch error messages 2016-09-23 18:06:26 -07:00
eefa0c7b40 Require torch.nn.cuda automatically when calling .cuda() 2016-09-23 18:06:26 -07:00
a489884da4 Reduce precision of addmm CUDA test 2016-09-23 17:52:08 -07:00
7a74d3fc9e Fix dl flag module in python>=3.6 2016-09-23 17:25:10 -07:00
e71204b52f Improve error messages in storage and tensor C functions 2016-09-23 17:17:35 -07:00
ca330b110a Add scan tests 2016-09-22 11:58:33 -07:00
6c77476cc1 Make tests check for deltas and report bandwidth 2016-09-22 11:58:28 -07:00
cabd6848e4 Heavy code refactoring to remove a lot of code in collectives (~1000 lines).
Have all collectives use the same args, the same ring, and the same primitives for synchronization between threads with the same pattern.
2016-09-22 11:57:56 -07:00
e3dbc6110e Add profiling API 2016-09-22 11:56:51 -07:00
1d6715fe20 Fix MPI test path 2016-09-22 11:56:20 -07:00
06ab3f962f Refactor _C extension to export some utilities 2016-09-21 08:36:54 -07:00
df77a8a81a Update LogSoftMax to work in spatial domain 2016-09-21 08:11:59 -07:00
94b7c32eb3 compiling double atomicAdd only if CUDA_ARCH < 6000, because it's now included in CUDA 2016-09-20 20:42:23 -04:00
8fdec15a55 Codemod to remove camel case method naming 2016-09-20 08:40:28 -07:00
e8b1217b28 Use bitwise operations for atomicAdd rather than byte_perm or pointer deferences.
Also properly check that half is enabled.
2016-09-19 14:00:52 -07:00
f56f06d88d fix cpuid ecx; change to compile with msvc 2016-09-19 14:41:48 +08:00
0f7a1e27d0 updating auto-generated docs 2016-09-19 00:39:46 -04:00
5114d94ad9 docstrings for conv, dropout, linear, pooling and sparse functions 2016-09-19 00:31:22 -04:00
f74c42bf00 Slightly improve THNN error messages 2016-09-18 15:02:25 -04:00
a8e816f450 Fix maskedSelect test 2016-09-18 12:54:12 -04:00
a90c259eda Add myself to LICENSE file 2016-09-18 12:53:57 -04:00
e223564a55 Fix multiprocessing on OS X 2016-09-16 18:27:07 -04:00
7847d77405 Add more functions to autograd 2016-09-16 15:26:24 -07:00
089d223922 Add support for CUDA indexAdd
Adds indexAdd via atomicAdd for unsigned char, char, short, long,
half, double.  Integer types are templatized based on sizeof.
Floating point types are implemented via intrinsics.
2016-09-16 12:50:57 -07:00
930085ec9c fixing doc2md for code blocks 2016-09-16 13:34:12 -04:00
e5874ea40d Add getDevice for CUDA storages 2016-09-15 13:54:39 -07:00
9ee6189bf9 Merge pull request #41 from jia-kai/master
Some minor fixes for compile/usage
2016-09-15 09:45:52 -07:00
939b0a4297 Merge pull request #45 from NVIDIA/cw-update-copyright-year
Update LICENSE.txt
2016-08-26 15:44:00 -07:00
234c8c9ef3 Update LICENSE.txt 2016-08-26 15:39:21 -07:00
75bad643bd Updated LICENCE.txt 2016-08-26 15:08:20 -07:00
47b0797fe1 pass devlist as const int* rather than int* in ncclCommInitAll 2016-08-19 19:00:14 +08:00
ed401cc29b link library with -lrt; otherwise there is undefined reference to shm_open 2016-08-19 18:58:56 +08:00
b3a9e1333d Remove unneeded deb build script 2016-07-27 17:58:00 -07:00
428ec5b2a3 Merge remote-tracking branch 'github/master' into public 2016-07-25 10:53:01 -07:00
55c42ad681 Fixed redundant contexts in multi-process apps
Change-Id: If787014450fd281304f0c7baf01d25963e40905d
2016-07-25 10:10:30 -07:00
7a1aa6b563 Improved Deb generation 2016-07-07 16:31:57 +02:00
9ae84f5d6b Fix version number 2016-06-16 17:07:42 -07:00
e51e922924 Add a debug level to NCCL and CUDA versions at init 2016-06-16 17:04:41 -07:00
9fcc523485 Increased version to 1.2.3 2016-06-15 19:18:13 -07:00
67d1ab9106 Packaging : Generate shlibs.local 2016-06-15 19:03:08 -07:00
da6d2009e0 Move deb to build directory 2016-06-15 18:20:10 -07:00
155132d336 Fix make install to use BUILDDIR 2016-06-15 18:20:02 -07:00
08ddfe03d2 Rework debian packaging 2016-06-15 18:18:44 -07:00
5d4716a8a3 Include link to blog post in README.md 2016-06-15 10:54:19 -07:00
aa8f669a3d Updating for .deb rebuild 2016-06-13 02:01:49 -07:00
d5e507fc7f Only call the CUDA runtime. That may fix #27. 2016-06-07 16:27:51 -07:00
620491a649 Merge remote-tracking branch 'github/master' into HEAD 2016-06-06 14:35:57 -07:00
7edfc57228 Make NCCL collectives work on communicators with only one rank 2016-06-06 14:35:00 -07:00
bd3cf73e6e Changed CURAND generator to work on a wider set of platforms. 2016-06-06 14:34:03 -07:00
177505b757 Gencodes changed to NV recommended 2016-06-06 00:06:18 -07:00
9d9d8cd59f Bump to 1.2.2 2016-06-03 17:21:53 -07:00
1657af1567 Better name for GENCODE 2016-06-03 10:25:37 -07:00
acb93d1aed Removing unneeded includes 2016-06-02 17:33:43 -07:00
889ad3d4e6 Makefile improvements
- Use standard CXX env var
 - Permit redefinition of more env
 - Separate lib from tests
2016-06-02 15:01:03 -07:00
93538def65 Merge pull request #22 from borisfom/master
Fixed version in ChangeLog
2016-04-21 18:58:44 -07:00
e5067b6611 Fixed version in ChangeLog 2016-04-21 16:28:13 -07:00
0629fb62d7 Merge pull request #21 from borisfom/master
Fixed install location, new .deb version
2016-04-21 14:46:41 -07:00
0177cf3ea4 Fixed install location, new .deb version 2016-04-21 14:10:31 -07:00
658aca1469 Merge pull request #17 from Hopobcn/master
Enable compilation with specific g++
2016-04-21 13:25:18 -07:00
03df4c7759 Moved no-as-needed flag to link rule.
Avoids link errors for tests linked with nvcc.
2016-04-19 14:51:03 -07:00
0d4f8f4e95 Merge pull request #18 from apaszke/master
Add --no-as-needed to make sure that cudart library gets linked
2016-04-19 11:11:39 -07:00
ddd3f2084d Fix readme to reflect the new test paths 2016-04-19 11:09:25 -07:00
dba3ec9428 Fix random deadlock during ncclCommInitRank. 2016-04-19 10:47:27 -07:00
9de361a1b9 Fix MPI test usage
Only display usage from rank 0 and exit instead of continuing (and seg fault).
2016-04-19 10:43:38 -07:00
c0c959b1be Add --no-as-needed to make sure that cudart library gets liked 2016-04-13 10:04:38 -04:00
e30bf95989 Enable compilation with old g++ when the default g++ is not supported (+5.0) 2016-04-12 12:49:13 +02:00
b16cc5d197 Merge pull request #16 from borisfom/master
Remved Tegra, fixed + format.
2016-03-17 17:35:04 -07:00
e6f4a83da6 Removing Tegra 2016-03-17 17:25:27 -07:00
1a8bae5b2f fixed version format 2016-03-17 17:13:45 -07:00
e8eb285a59 Merge pull request #15 from borisfom/master
Fixing version number and compile param for 5.3
2016-03-17 16:03:05 -07:00
b508d28123 Version with . 7.5 2016-03-17 15:48:48 -07:00
62b551798f Use arch=5.3 as well 2016-03-16 23:09:36 -07:00
dfbebe395c Delete libnccl1_1.1.1+cuda75_amd64.deb 2016-03-16 21:44:13 -07:00
85280b5bf4 Delete libnccl-dev_1.1.1+cuda75_amd64.deb 2016-03-16 21:44:04 -07:00
fb53cfd9b0 Added files via upload 2016-03-16 21:42:47 -07:00
92d2123d8d Added compute 5.3 2016-03-16 19:24:48 -07:00
ec3de28ae5 Preparing for pbuild 2016-03-16 19:23:49 -07:00
86dc136fa9 Moved to pbuilder 2016-03-16 18:41:54 -07:00
172f316ac2 Moved release files to proper area
Bumping a version; building for 7.5
2016-03-16 18:30:53 -07:00
941d9da08c Updated package version, added manpage 2016-02-29 12:10:34 -08:00
5554a4c9f0 Fixed useRemoteRecv consistency issue.
Change-Id: Ib093a8dc3bb093eddc89dad81d3fffa53c03a6a2
Reviewed-on: http://git-master/r/1013543
Reviewed-by: Cliff Woolley <jwoolley@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-02-18 13:45:42 -08:00
9442285526 Fixed buffer overflow in ReduceOrCopy
Bug caused AllGathers and ReduceScatters of less than
8 bytes to fail in certain cases.

Change-Id: I33e1beb50805bfdb457ae16a90e3f91c1b283b9b
Reviewed-on: http://git-master/r/1011505
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-02-12 15:13:56 -08:00
caa40b8dd3 Libwrap checks for LIB.so.1 if LIB.so not found
Change-Id: I6f07f887f828cb2259dcfd496a2ad707db898cf5
Reviewed-on: http://git-master/r/1000162
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-29 12:36:42 -08:00
2758353380 Added NCCL error checking to tests.
Also cleaned up makefile so that tests and lib are not built unnecessarily.

Change-Id: Ia0c596cc2213628de2f066be97615c09bb1bb262
Reviewed-on: http://git-master/r/999627
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-29 11:09:05 -08:00
fe1a956715 Enabled support for char type to be unsigned.
GCC on POWER arch defines char type as unsigned.

Change-Id: Ic143cb058fe42414b1f6f1f45b02132c837726ae
Reviewed-on: http://git-master/r/999614
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-28 13:38:18 -08:00
c05312f151 Moved tests to separate dir and improved MPI test
test sources moved to test/ directory.
MPI test displays PASS/FAIL and returns code accordingly.

Change-Id: I058ebd1bd5202d8f38cc9787898b2480100c102b
Reviewed-on: http://git-master/r/936086
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-28 12:56:36 -08:00
5966316771 Added support for more than 8 GPUs.
Change-Id: Iaa1841036a7bfdad6ebec99fed0adcd2bbe6ffad
Reviewed-on: http://git-master/r/935459
Reviewed-by: Cliff Woolley <jwoolley@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-21 13:00:21 -08:00
130ee246e2 Fixed deadlock in back-to-back reduce_scatters.
Change-Id: I92d32b15e516a39710b676aee692ae9b70638937
Reviewed-on: http://git-master/r/935458
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
2016-01-21 10:36:03 -08:00
90af7c73ef Merge pull request #6 from lukeyeager/deb
Deb packaging
2016-01-07 13:06:28 -08:00
3251681207 Merge branch 'yangky11-patch-1' 2016-01-06 16:48:29 -08:00
d332c41e71 fix a typo in README.md 2015-12-24 00:01:02 +08:00
c9da89254b Update deb packaging scripts 2015-12-18 14:23:34 -08:00
eb2d869f71 Merge pull request #5 from lukeyeager/tests-nvml
Don't link tests with NVML
2015-12-18 13:36:20 -08:00
f1e92fe2a3 Added Debian packaging files 2015-12-18 13:36:10 -08:00
b5400c54df Don't link tests with NVML 2015-12-18 13:27:55 -08:00
a4de6016f8 Merge pull request #4 from lukeyeager/build-sm50
Build SM 5.0 code
2015-12-18 13:23:48 -08:00
4807909e3f Merge pull request #3 from lukeyeager/semver
Use semantic versioning
2015-12-18 13:22:19 -08:00
dd0884b707 Build SM 5.0 code 2015-12-18 13:19:50 -08:00
e1634ca6cb Use semantic versioning 2015-12-18 12:02:17 -08:00
651a6edc5c Fixed bug in MPI initialization. 2015-12-10 17:54:41 -08:00
ada5edce88 Merge pull request #1 from slayton58/int64_uint64
Add int64 and uint64 types for all algorithms and tests
2015-12-10 17:22:50 -08:00
41ce4ca9fc Add int64 and uint64 types for all algorithms and tests 2015-12-04 13:28:36 -05:00
27d32ac5d9 Fixed a race condition in reduce and braodcast. 2015-11-19 11:11:52 -08:00
0673d5f44f Initial release. 2015-11-17 11:30:40 -08:00
1121 changed files with 109098 additions and 34899 deletions

17
.gitignore vendored
View File

@ -2,17 +2,34 @@ build/
dist/
torch.egg-info/
*/**/__pycache__
torch/version.py
torch/csrc/generic/TensorMethods.cpp
torch/lib/*.so*
torch/lib/*.dylib*
torch/lib/*.h
torch/lib/build
torch/lib/tmp_install
torch/lib/include
torch/lib/torch_shm_manager
torch/csrc/cudnn/cuDNN.cpp
torch/csrc/nn/THNN.cwrap
torch/csrc/nn/THNN.cpp
torch/csrc/nn/THCUNN.cwrap
torch/csrc/nn/THCUNN.cpp
torch/csrc/nn/THNN_generic.cwrap
torch/csrc/nn/THNN_generic.cpp
torch/csrc/nn/THNN_generic.h
docs/src/**/*
test/data/legacy_modules.t7
test/data/gpu_tensors.pt
test/htmlcov
test/.coverage
*/*.pyc
*/**/*.pyc
*/**/**/*.pyc
*/**/**/**/*.pyc
*/**/**/**/**/*.pyc
*/*.so*
*/**/*.so*
*/**/*.dylib*
test/data/legacy_serialized.pt

View File

@ -3,19 +3,27 @@ language: python
python:
- 2.7.8
- 2.7
- 3.3
- 3.4
- 3.5
- 3.6
- nightly
cache:
- ccache
- directories:
- $HOME/.ccache
install:
- export CC="gcc-4.8"
- export CXX="g++-4.8"
- travis_retry pip install -r requirements.txt
- travis_retry pip install .
- unset CCACHE_DISABLE
- export CCACHE_DIR=$HOME/.ccache
- export CC="ccache gcc-4.8"
- export CXX="ccache g++-4.8"
- ccache --show-stats
- travis_retry pip install --upgrade pip setuptools wheel
- travis_retry pip install -r requirements.txt --only-binary=scipy
- python setup.py install
script:
- ./test/run_test.sh
- OMP_NUM_THREADS=2 ./test/run_test.sh
addons:
apt:
@ -32,3 +40,9 @@ sudo: false
matrix:
fast_finish: true
include:
env: LINT_CHECK
python: "2.7"
addons: true
install: pip install flake8
script: flake8

74
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,74 @@
## Contributing to PyTorch
If you are interested in contributing to PyTorch, your contributions will fall
into two categories:
1. You want to propose a new Feature and implement it
- post about your intended feature, and we shall discuss the design and
implementation. Once we agree that the plan looks good, go ahead and implement it.
2. You want to implement a feature or bug-fix for an outstanding issue
- Look at the outstanding issues here: https://github.com/pytorch/pytorch/issues
- Especially look at the Low Priority and Medium Priority issues
- Pick an issue and comment on the task that you want to work on this feature
- If you need more context on a particular issue, please ask and we shall provide.
Once you finish implementing a feature or bugfix, please send a Pull Request to
https://github.com/pytorch/pytorch
If you are not familiar with creating a Pull Request, here are some guides:
- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request
- https://help.github.com/articles/creating-a-pull-request/
## Developing locally with PyTorch
To locally develop with PyTorch, here are some tips:
1. Uninstall all existing pytorch installs
```
conda uninstall pytorch
pip uninstall torch
pip uninstall torch # run this command twice
```
2. Locally clone a copy of PyTorch from source:
```
git clone https://github.com/pytorch/pytorch
cd pytorch
```
3. Install PyTorch in `build develop` mode:
A full set of instructions on installing PyTorch from Source are here:
https://github.com/pytorch/pytorch#from-source
The change you have to make is to replace
`python setup.py install`
with
```
python setup.py build develop
```
This is especially useful if you are only changing Python files.
This mode will symlink the python files from the current local source tree into the
python install.
Hence, if you modify a python file, you do not need to reinstall pytorch again and again.
For example:
- Install local pytorch in `build develop` mode
- modify your python file torch/__init__.py (for example)
- test functionality
- modify your python file torch/__init__.py
- test functionality
- modify your python file torch/__init__.py
- test functionality
You do not need to repeatedly install after modifying python files.
Hope this helps, and thanks for considering to contribute.

38
Dockerfile Normal file
View File

@ -0,0 +1,38 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
ENV CUDNN_VERSION 6.0.20
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
curl \
ca-certificates \
libjpeg-dev \
libpng-dev \
libcudnn6=$CUDNN_VERSION-1+cuda8.0 \
libcudnn6-dev=$CUDNN_VERSION-1+cuda8.0 && \
rm -rf /var/lib/apt/lists/*
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda install conda-build && \
/opt/conda/bin/conda create -y --name pytorch-py35 python=3.5.2 numpy scipy ipython mkl&& \
/opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/envs/pytorch-py35/bin:$PATH
RUN conda install --name pytorch-py35 -c soumith magma-cuda80
# This must be done before pip so that requirements.txt is available
WORKDIR /opt/pytorch
COPY . .
RUN cat requirements.txt | xargs -n1 pip install --no-cache-dir && \
TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_LIBRARY_PATH=/opt/conda/envs/pytorch-py35/lib \
CMAKE_INCLUDE_PATH=/opt/conda/envs/pytorch-py35/include \
pip install -v .
WORKDIR /workspace
RUN chmod -R a+w /workspace

View File

@ -1,3 +1,4 @@
Copyright (c) 2016- Facebook, Inc (Adam Paszke)
Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
@ -19,9 +20,9 @@ modification, are permitted provided that the following conditions are met:
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
and IDIAP Research Institute nor the names of its contributors may be
used to endorse or promote products derived from this software without
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
and IDIAP Research Institute nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

539
README.md
View File

@ -1,352 +1,243 @@
# pytorch [alpha-3]
<p align="center"><img width="40%" src="docs/source/_static/img/pytorch-logo-dark.png" /></p>
| Python | **`Linux CPU`** | **`Linux GPU`** |
|--------|--------------------|------------------|
| 2.7.8 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
| 2.7 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py2)](https://build.pytorch.org/job/pytorch-master-py2) |
| 3.3 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
| 3.4 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
| 3.5 | [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py3)](https://build.pytorch.org/job/pytorch-master-py3) |
| Nightly| [![Build Status](https://travis-ci.com/apaszke/pytorch.svg?token=shqHbUq29zKDxuqzGcjC&branch=master)](https://travis-ci.com/apaszke/pytorch) | |
--------------------------------------------------------------------------------
The project is still under active development and is likely to drastically change in short periods of time.
We will be announcing API changes and important developments via a newsletter, github issues and post a link to the issues on slack.
Please remember that at this stage, this is an invite-only closed alpha, and please don't distribute code further.
This is done so that we can control development tightly and rapidly during the initial phases with feedback from you.
PyTorch is a python package that provides two high-level features:
- Tensor computation (like numpy) with strong GPU acceleration
- Deep Neural Networks built on a tape-based autograd system
You can reuse your favorite python packages such as numpy, scipy and Cython to extend PyTorch when needed.
We are in an early-release Beta. Expect some adventures and rough edges.
- [More About PyTorch](#more-about-pytorch)
- [Installation](#installation)
- [Binaries](#binaries)
- [From source](#from-source)
- [Docker image](#docker-image)
- [Getting Started](#getting-started)
- [Communication](#communication)
- [Releases and Contributing](#releases-and-contributing)
- [The Team](#the-team)
| System | Python | Status |
| --- | --- | --- |
| Linux CPU | 2.7.8, 2.7, 3.5, nightly | [![Build Status](https://travis-ci.org/pytorch/pytorch.svg?branch=master)](https://travis-ci.org/pytorch/pytorch) |
| Linux GPU | 2.7 | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py2)](https://build.pytorch.org/job/pytorch-master-py2) |
| Linux GPU | 3.5 | [![Build Status](http://build.pytorch.org:8080/buildStatus/icon?job=pytorch-master-py3)](https://build.pytorch.org/job/pytorch-master-py3) |
## More about PyTorch
At a granular level, PyTorch is a library that consists of the following components:
<table>
<tr>
<td><b> torch </b></td>
<td> a Tensor library like NumPy, with strong GPU support </td>
</tr>
<tr>
<td><b> torch.autograd </b></td>
<td> a tape based automatic differentiation library that supports all differentiable Tensor operations in torch </td>
</tr>
<tr>
<td><b> torch.nn </b></td>
<td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td>
</tr>
<tr>
<td><b> torch.multiprocessing </b></td>
<td> python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. </td>
</tr>
<tr>
<td><b> torch.utils </b></td>
<td> DataLoader, Trainer and other utility functions for convenience </td>
</tr>
<tr>
<td><b> torch.legacy(.nn/.optim) </b></td>
<td> legacy code that has been ported over from torch for backward compatibility reasons </td>
</tr>
</table>
Usually one uses PyTorch either as:
- A replacement for numpy to use the power of GPUs.
- a deep learning research platform that provides maximum flexibility and speed
Elaborating further:
### A GPU-ready Tensor library
If you use numpy, then you have used Tensors (a.k.a ndarray).
<p align=center><img width="30%" src="docs/source/_static/img/tensor_illustration.png" /></p>
PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerate
compute by a huge amount.
We provide a wide variety of tensor routines to accelerate and fit your scientific computation needs
such as slicing, indexing, math operations, linear algebra, reductions.
And they are fast!
### Dynamic Neural Networks: Tape based Autograd
PyTorch has a unique way of building neural networks: using and replaying a tape recorder.
Most frameworks such as `TensorFlow`, `Theano`, `Caffe` and `CNTK` have a static view of the world.
One has to build a neural network, and reuse the same structure again and again.
Changing the way the network behaves means that one has to start from scratch.
With PyTorch, we use a technique called Reverse-mode auto-differentiation, which allows you to
change the way your network behaves arbitrarily with zero lag or overhead. Our inspiration comes
from several research papers on this topic, as well as current and past work such as
[autograd](https://github.com/twitter/torch-autograd),
[autograd](https://github.com/HIPS/autograd),
[Chainer](http://chainer.org), etc.
While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date.
You get the best of speed and flexibility for your crazy research.
<p align=center><img width="80%" src="docs/source/_static/img/dynamic_graph.gif" /></p>
### Python first
PyTorch is not a Python binding into a monolothic C++ framework.
It is built to be deeply integrated into Python.
You can use it naturally like you would use numpy / scipy / scikit-learn etc.
You can write your new neural network layers in Python itself, using your favorite libraries
and use packages such as Cython and Numba.
Our goal is to not reinvent the wheel where appropriate.
### Imperative experiences
PyTorch is designed to be intuitive, linear in thought and easy to use.
When you execute a line of code, it gets executed. There isn't an asynchronous view of the world.
When you drop into a debugger, or receive error messages and stack traces, understanding them is straight-forward.
The stack-trace points to exactly where your code was defined.
We hope you never spend hours debugging your code because of bad stack traces or asynchronous and opaque execution engines.
### Fast and Lean
PyTorch has minimal framework overhead. We integrate acceleration libraries
such as Intel MKL and NVIDIA (CuDNN, NCCL) to maximize speed.
At the core, its CPU and GPU Tensor and Neural Network backends
(TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API.
They are mature and have been tested for years.
Hence, PyTorch is quite fast -- whether you run small or large neural networks.
The memory usage in PyTorch is extremely efficient compared to Torch or some of the alternatives.
We've written custom memory allocators for the GPU to make sure that
your deep learning models are maximally memory efficient.
This enables you to train bigger deep learning models than before.
### Extensions without pain
Writing new neural network modules, or interfacing with PyTorch's Tensor API was designed to be straight-forward
and with minimal abstractions.
You can write new neural network layers in Python using the torch API
[or your favorite numpy based libraries such as SciPy](http://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html).
If you want to write your layers in C/C++, we provide an extension API based on
[cffi](http://cffi.readthedocs.io/en/latest/) that is efficient and with minimal boilerplate.
There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/c_extension.html) and [an example here](https://github.com/pytorch/extension-ffi).
## Installation
### Binaries
- Anaconda
```bash
conda install pytorch -c https://conda.anaconda.org/t/6N-MsQ4WZ7jo/soumith
```
Commands to install from binaries via Conda or pip wheels are on our website:
[http://pytorch.org](http://pytorch.org)
### From source
If you are installing from source, we highly recommend installing an [Anaconda](https://www.continuum.io/downloads) environment.
You will get a high-quality BLAS library (MKL) and you get a controlled compiler version regardless of your Linux distro.
Once you have [anaconda](https://www.continuum.io/downloads) installed, here are the instructions.
If you want to compile with CUDA support, install
- [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) 7.5 or above
- [NVIDIA CuDNN](https://developer.nvidia.com/cudnn) v5.x
If you want to disable CUDA support, export environment variable `NO_CUDA=1`.
#### Install optional dependencies
On Linux
```bash
pip install -r requirements.txt
pip install .
export CMAKE_PREFIX_PATH=[anaconda root directory]
# Install basic dependencies
conda install numpy mkl setuptools cmake gcc cffi
# Add LAPACK support for the GPU
conda install -c soumith magma-cuda75 # or magma-cuda80 if CUDA 8.0
```
On OSX
```bash
export CMAKE_PREFIX_PATH=[anaconda root directory]
conda install numpy setuptools cmake cffi
```
#### Install PyTorch
```bash
export MACOSX_DEPLOYMENT_TARGET=10.9 # if OSX
pip install -r requirements.txt
python setup.py install
```
### Docker image
Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual
```
docker build -t pytorch-cudnnv6 .
```
and run with nvidia-docker:
```
nvidia-docker run --rm -ti --ipc=host pytorch-cudnnv5
```
Please note that pytorch uses shared memory to share data between processes, so if torch multiprocessing is used (e.g.
for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you
should increase shared memory size either with --ipc=host or --shm-size command line options to nvidia-docker run.
## Getting Started
A more comprehensive Getting Started section will be filled in soon.
For now, there's two pointers:
- The MNIST example: [https://github.com/pytorch/examples](https://github.com/pytorch/examples)
- The API Reference: [http://pytorch.org/api/](http://pytorch.org/api/)
Three pointers to get you started:
- [Tutorials: get you started with understanding and using PyTorch](http://pytorch.org/tutorials/)
- [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples)
- The API Reference: [http://pytorch.org/docs/](http://pytorch.org/docs/)
## Communication
* forums: discuss implementations, research, etc. http://discuss.pytorch.org
* github issues: bug reports, feature requests, install issues, RFCs, thoughts, etc.
* slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . If you need a slack invite, ping me at soumith@pytorch.org
* slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . If you need a slack invite, ping us at soumith@pytorch.org
* newsletter: no-noise, one-way email newsletter with important announcements about pytorch. You can sign-up here: http://eepurl.com/cbG0rv
## Timeline
## Releases and Contributing
We will run the alpha releases weekly for 6 weeks.
After that, we will reevaluate progress, and if we are ready, we will hit beta-0. If not, we will do another two weeks of alpha.
PyTorch has a 90 day release cycle (major releases).
It's current state is Beta (v0.1.6), we expect no obvious bugs. Please let us know if you encounter a bug by [filing an issue](https://github.com/pytorch/pytorch/issues).
* ~~alpha-0: Working versions of torch, cutorch, nn, cunn, optim fully unit tested with seamless numpy conversions~~
* ~~alpha-1: Serialization to/from disk with sharing intact. initial release of the new neuralnets package based on a Chainer-like design~~
* ~~alpha-2: sharing tensors across processes for hogwild training or data-loading processes. a rewritten optim package for this new nn.~~
* ~~alpha-3: binary installs, contbuilds, etc.
* alpha-4: a ton of examples across vision, nlp, speech, RL -- this phase might make us rethink parts of the APIs, and hence want to do this in alpha than beta
* alpha-5: Putting a simple and efficient story around multi-machine training. Probably simplistic like torch-distlearn. Building the website, release scripts, more documentation, etc.
* alpha-6: [no plan yet]
We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion.
The beta phases will be leaning more towards working with all of you, convering your use-cases, active development on non-core aspects.
If you plan to contribute new features, utility functions or extensions to the core, please first open an issue and discuss the feature with us.
Sending a PR without discussion might end up resulting in a rejected PR, because we might be taking the core in a different direction than you might be aware of.
## pytorch vs torch: important changes
**For the next release cycle, these are the 3 big features we are planning to add:**
We've decided that it's time to rewrite/update parts of the old torch API, even if it means losing some of backward compatibility (we can hack up a model converter that converts correctly).
This section lists the biggest changes, and suggests how to shift from torch to pytorch.
1. [Distributed PyTorch](https://github.com/pytorch/pytorch/issues/241) (a draft implementation is present in this [branch](https://github.com/apaszke/pytorch-dist) )
2. Backward of Backward - Backpropagating through the optimization process itself. Some past and recent papers such as
[Double Backprop](http://yann.lecun.com/exdb/publis/pdf/drucker-lecun-91.pdf) and [Unrolled GANs](https://arxiv.org/abs/1611.02163) need this.
3. Lazy Execution Engine for autograd - This will enable us to optionally introduce caching and JIT compilers to optimize autograd code.
For now there's no pytorch documentation.
Since all currently implemented modules are very similar to the old ones, it's best to use torch7 docs for now (having in mind several differences described below).
### Library structure
## The Team
All core modules are merged into a single repository.
Most of them will be rewritten and will be completely new (more on this below), but we're providing a Python version of old packages under torch.legacy namespace.
* torch (torch)
* cutorch (torch.cuda)
* nn (torch.legacy.nn)
* cunn (torch.legacy.cunn)
* optim (torch.legacy.optim)
* nngraph (torch.legacy.nngraph - not implemented yet)
PyTorch is a community driven project with several skillful engineers and researchers contributing to it.
### 0-based indexing
pytorch uses 0-based indexing everywhere.
This includes arguments to `index*` functions and nn criterion weights.
Under the hood, on the C side, we've changed logic on TH / THC / THNN / THCUNN to introduce a TH_INDEX_BASE compile-time definition to switch between 0 and 1 indexing logic.
### New Tensor API
**All methods operating on tensors are now out-of-place by default.**
This means that although `a.add(b)` used to have a side-effect of mutating the elements in a, it will now return a new Tensor, holding the result.
All methods that mutate the Tensor/Storage are now marked with a trailing underscore (including `copy` -> `copy_`, `fill` -> `fill_`, `set` -> `set_`, etc.).
Most of math methods have their in-place counterparts, so an equivalent to `a.add(b)` in Lua is now `a.add_(b)` (or `torch.add(a, a, b)`, which is not recommended in this case)
### CUDA module
All tensors have their CUDA counterparts in torch.cuda module.
There is no `torch.cuda.setDevice` anymore. By default always the 0th device is selected, but code can be placed in a `with` statement to change it:
```python
with torch.cuda.device(1):
a = torch.cuda.FloatTensor(10) # a is allocated on GPU1
```
Calling `.cuda()` on tensors no longer converts it to a GPU float tensor, but to a CUDA tensor of the same type located on a currently selected device.
So, for example: `a = torch.LongTensor(10).cuda() # a is a CudaLongTensor`
Calling `.cuda(3)` will send it to the third device.
`.cuda()` can be also used to transfer CUDA tensors between devices (calling it on a GPU tensor, with a different device selected will copy it into the current device).
```python
a = torch.LongTensor(10)
b = a.cuda() # b is a torch.cuda.LongTensor placed on GPU0
c = a.cuda(2) # c is a torch.cuda.LongTensor placed on GPU2
with torch.cuda.device(1):
d = b.cuda() # d is a copy of b, but on GPU1
e = d.cuda() # a no-op, d is already on current GPU, e is d == True
```
Also, setting device is now only important to specify where to allocate new Tensors. You can perform operations on CUDA Tensors irrespective of currently selected device (but all arguments have to be on the same device) - result will be also allocated there. See below for an example:
```python
a = torch.randn(2, 2).cuda()
b = torch.randn(2, 2).cuda()
with torch.cuda.device(1):
c = a + b # c is on GPU0
d = torch.randn(2, 2).cuda() # d is on GPU1
```
In the near future, we also plan to use a CUDA allocator, which allows to alleviate problems with cudaMalloc/cudaFree being a sync point.
This will help us to not worry about using buffers for every intermediate computation in a module if one wants to do multi-GPU training, for example.
See: https://github.com/torch/cutorch/pull/443
### Numpy integration
Because numpy is a core numerical package in Python, and is used by many other libraries like matplotlib, we've implemented a two-way bridge between pytorch and numpy.
```python
a = torch.randn(2, 2)
b = a.numpy() # b is a numpy array of type corresponding to a
# no memory copy is performed, they share the same storage
c = numpy.zeros(5, 5)
d = torch.DoubleTensor(c) # it's possible to construct Tensors from numpy arrays
# d shares memory with b - there's no copy
```
### New neural network module
After looking at several framework designs, looking at the current design of `nn` and thinking through a few original design ideas, this is what we've converged to:
* Adopt a Chainer-like design
* Makes it extremely natural to express Recurrent Nets and weight sharing
* Each module can operate in-place, but marks used variables as dirty - errors will be raised if they're used again
* RNN example:
```python
class Network(nn.Container):
def __init__(self):
super(Network, self).__init__(
conv1=nn.SpatialConvolution(3, 16, 3, 3, 1, 1),
relu1=nn.ReLU(True),
lstm=nn.LSTM(),
)
def __call__(self, input):
y = self.conv(input)
y = self.relu1(y)
y = self.lstm(y)
return y
model = Network()
input = nn.Variable(torch.zeros(256, 3, 224, 224))
output = model(input)
loss = 0
for i in range(ITERS):
input, target = ...
# That's all you need for an RNN
for t in range(TIMESTEPS):
loss += loss_fn(model(input), target)
loss.backward()
```
* Here, nn.Variable will have a complete tape-based automatic differentiation implemented
* To access states, have hooks for forward / backward (this also makes multi-GPU easier to implement)
* This has the advantage of not having to worry about in-place / out-of-place operators for accessing .output or .gradInput
* When writing the module, make sure debuggability is straight forward. Dropping into pdb and inspecting things should be natural, especially when going over the backward graph.
* Pulling handles to a module after constructing a chain should be very natural (apart from having a handle at construction)
* It's easy, since modules are assigned as Container properties
* Drop overly verbose names. Example:
* SpatialConvolution → conv2d
* VolumetricConvolution → conv3d
#### Some notes on new nn implementation
As shown above, structure of the networks is fully defined by control-flow embedded in the code. There are no rigid containers known from Lua. You can put an `if` in the middle of your model and freely branch depending on any condition you can come up with. All operations are registered in the computational graph history.
There are two main objects that make this possible - variables and functions. They will be denoted as squares and circles respectively.
![Variable and function symbols](http://students.mimuw.edu.pl/~ap360585/__torch_img/variable_function.png)
Variables are the objects that hold a reference to a tensor (and optionally to gradient w.r.t. that tensor), and to the function in the computational graph that created it. Variables created explicitly by the user (`Variable(tensor)`) have a Leaf function node associated with them.
![Variable and leaf function](http://students.mimuw.edu.pl/~ap360585/__torch_img/variable_leaf.png)
Functions are simple classes that define a function from a tuple of inputs to a tuple of outputs, and a formula for computing gradient w.r.t. it's inputs. Function objects are instantiated to hold references to other functions, and these references allow to reconstruct the history of a computation. An example graph for a linear layer (`Wx + b`) is shown below.
![Linear layer](http://students.mimuw.edu.pl/~ap360585/__torch_img/linear.png)
Please note that function objects never hold references to Variable objects, except for when they're necessary in the backward pass. This allows to free all the unnecessary intermediate values. A good example for this is addition when computing e.g. (`y = Wx + My`):
![Freeing intermediate values](http://students.mimuw.edu.pl/~ap360585/__torch_img/intermediate_free.png)
Matrix multiplication operation keeps references to it's inputs because it will need them, but addition doesn't need `Wx` and `My` after it computes the result, so as soon as they go out of scope they are freed. To access intermediate values in the forward pass you can either copy them when you still have a reference, or you can use a system of hooks that can be attached to any function. Hooks also allow to access and inspect gradients inside the graph.
Another nice thing about this is that a single layer doesn't hold any state other than it's parameters (all intermediate values are alive as long as the graph references them), so it can be used multiple times before calling backward. This is especially convenient when training RNNs. You can use the same network for all timesteps and the gradients will sum up automatically.
To compute backward pass you can call `.backward()` on a variable if it's a scalar (a 1-element Variable), or you can provide a gradient tensor of matching shape if it's not. This creates an execution engine object that manages the whole backward pass. It's been introduced, so that the code for analyzing the graph and scheduling node processing order is decoupled from other parts, and can be easily replaced. Right now it's simply processing the nodes in topological order, without any prioritization, but in the future we can implement algorithms and heuristics for scheduling independent nodes on different GPU streams, deciding which branches to compute first, etc.
### Serialization
Pickling tensors is supported, but requires making a temporary copy of all data and breaks sharing.
For this reason we're providing `torch.load` and `torch.save`, that are free of these problems.
They have the same interfaces as `pickle.load` (file object) and `pickle.dump` (serialized object, file object) respectively.
For now the only requirement is that the file should have a `fileno` method, which returns a file descriptor number (this is already implemented by objects returned by `open`).
Objects are serialized in a tar archive consisting of four files:
`sys_info` - protocol version, byte order, long size, etc.
`pickle` - pickled object
`tensors` - tensor metadata
`storages` - serialized data
### Multi-GPU
Proposed solutions need to address:
* Kernel launch latency
* without affecting the user's code
* Implementation should be as transparent as possible
* Should we expose DPT as:
* Split
* ParallelApply (scheduling kernels in breadth first order, to address launch latency)
* Join
* In backward phase, send parameters as soon as the module finishes computation
**Rough solution:**
```python
# This is an example of a network that has a data parallel part inside
#
# B is data parallel
# +->A+-->B+-+
# +--+ +->D
# +->C+------+
class Network(nn.Container):
__init__(self):
super(Network, self).__init__(
A = ...,
B = GPUReplicate(B, [0, 1, 2, 3]), # Copies the module onto a list of GPUs
C = ...,
D = ...
)
__call__(self, x):
a = self.A(x)
c = self.C(x)
a_split = Split(a) # a_split is a list of Tensors placed on different devices
b = ParallelApply(self.B, a_split) # self.B is a list-like object containing copies of B
d_input = Join(b + [c]) # gathers Tensors on a single GPU
return self.D(d_input)
```
Each module is assigned to a single GPU.
For Kernel Launch Latency:
* Python threading
* Generators
For parameter reductions ASAP:
* In the forward pass, register a hooks on a every parameter which are evaluated as soon as the last backward is executed for that parameter. The hook will then “all-reduce” those parameters across GPUs
* Problem with multiple forward calls - how do you know that the parameters won't be used anymore?
* Well, last usage in backward graph = first usage in forward graph, so this should be straightforward
### Multiprocessing with Tensor sharing
In Torch, or in general, one uses "threads" to build parallel data loaders, as well as to do Hogwild training.
Threads are powerful, as one can share Tensors between threads.
This allows you to:
* transfer data between threads with efficiently with zero memory copy and serialization overhead.
* share tensors among threads for parameter sharing models
Sharing Tensors among threads is very useful when you do Hogwild training, i.e. if you want to train several models in parallel, but want to share their underlying parameters.
This is often used in non ConvNets, like training word embeddings, RL-for-games, etc.
With Python, one cannot use threads because of a few technical issues.
Python has what is called [Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock), which does not allow threads to concurrently execute python code.
Hence, the most pythonic way to use multiple CPU cores is [multiprocessing](http://docs.python.org/2/library/multiprocessing.html)
We made PyTorch to seamlessly integrate with python multiprocessing.
This involved solving some complex technical problems to make this an air-tight solution, and more can be read [in this in-depth technical discussion](http://github.com/pytorch/pytorch/wiki/Multiprocessing-Technical-Notes).
What this means for you as the end-user is that you can simply use multiprocessing in this way:
```python
# loaders.py
# Functions from this file run in the workers
def fill(queue):
while True:
tensor = queue.get()
tensor.fill_(10)
queue.put(tensor)
def fill_pool(tensor):
tensor.fill_(10)
```
```python
# Example 1: Using multiple persistent processes and a Queue
# process.py
import torch
import torch.multiprocessing as multiprocessing
from loaders import fill
# torch.multiprocessing.Queue automatically moves Tensor data to shared memory
# So the main process and worker share the data
queue = multiprocessing.Queue()
buffers = [torch.Tensor(2, 2) for i in range(4)]
for b in buffers:
queue.put(b)
processes = [multiprocessing.Process(target=fill, args=(queue,)).start() for i in range(10)]
```
```python
# Example 2: Using a process pool
# pool.py
import torch
from torch.multiprocessing import Pool
from loaders import fill_pool
tensors = [torch.Tensor(2, 2) for i in range(100)]
pool = Pool(10)
pool.map(fill_pool, tensors)
```
PyTorch is currently maintained by [Adam Paszke](https://apaszke.github.io/), [Sam Gross](https://github.com/colesbury) and [Soumith Chintala](http://soumith.ch) with major contributions coming from 10s of talented individuals in various forms and means. A non-exhaustive but growing list needs to mention: Sergey Zagoruyko, Adam Lerer, Francisco Massa, Andreas Kopf, James Bradbury, Zeming Lin, Yuandong Tian, Guillaume Lample, Marat Dukhan, Natalia Gimelshein.
Note: this project is unrelated to [hughperkins/pytorch](https://github.com/hughperkins/pytorch) with the same name. Hugh is a valuable contributor in the Torch community and has helped with many things Torch and PyTorch.

View File

@ -685,17 +685,21 @@ endif()
# CUDA_NVCC_EXECUTABLE
cuda_find_host_program(CUDA_NVCC_EXECUTABLE
NAMES nvcc
PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
ENV CUDA_PATH
ENV CUDA_BIN_PATH
PATH_SUFFIXES bin bin64
NO_DEFAULT_PATH
)
# Search default search paths, after we search our own set of paths.
cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)
if(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
SET(CUDA_NVCC_EXECUTABLE "$ENV{CUDA_NVCC_EXECUTABLE}")
else(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
cuda_find_host_program(CUDA_NVCC_EXECUTABLE
NAMES nvcc
PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
ENV CUDA_PATH
ENV CUDA_BIN_PATH
PATH_SUFFIXES bin bin64
NO_DEFAULT_PATH
)
# Search default search paths, after we search our own set of paths.
cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)
endif(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
# Compute the version.

View File

@ -63,11 +63,16 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
"}\n")
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
"-ccbin" ${CMAKE_CXX_COMPILER}
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(nvcc_res EQUAL 0)
# only keep the last line of nvcc_out
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
list(GET nvcc_out -1 nvcc_out)
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
endif()
@ -116,13 +121,13 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
set(add_ptx TRUE)
set(arch_name ${CMAKE_MATCH_1})
endif()
if(arch_name MATCHES "([0-9]\\.[0-9])$")
if(arch_name MATCHES "(^[0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
set(arch_ptx ${arch_bin})
else()
# Look for it in our list of known architectures
if(${arch_name} STREQUAL "Fermi")
set(arch_bin 2.0 "2.1(2.0)")
set(arch_bin "2.0 2.1(2.0)")
elseif(${arch_name} STREQUAL "Kepler+Tegra")
set(arch_bin 3.2)
elseif(${arch_name} STREQUAL "Kepler+Tesla")
@ -173,11 +178,11 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
# Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin})
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
# User explicitly specified ARCH for the concrete CODE
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()

20
docs/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = PyTorch
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -1,435 +0,0 @@
#! /usr/bin/env python
# encoding: utf-8
"""
Very lightweight docstring to Markdown converter. Modified for use in pytorch
### License
Copyright © 2013 Thomas Gläßle <t_glaessle@gmx.de>
This work is free. You can redistribute it and/or modify it under the
terms of the Do What The Fuck You Want To Public License, Version 2, as
published by Sam Hocevar. See the COPYING file for more details.
This program is free software. It comes without any warranty, to the
extent permitted by applicable law.
### Description
Little convenience tool to extract docstrings from a module or class and
convert them to GitHub Flavoured Markdown:
https://help.github.com/articles/github-flavored-markdown
Its purpose is to quickly generate `README.md` files for small projects.
### API
The interface consists of the following functions:
- `doctrim(docstring)`
- `doc2md(docstring, title)`
You can run this script from the command line like:
$ doc2md.py [-a] [--no-toc] [-t title] module-name [class-name] > README.md
### Limitations
At the moment this is suited only for a very specific use case. It is
hardly forseeable, if I will decide to improve on it in the near future.
"""
import re
import sys
import inspect
__all__ = ['doctrim', 'doc2md']
doctrim = inspect.cleandoc
def unindent(lines):
"""
Remove common indentation from string.
Unlike doctrim there is no special treatment of the first line.
"""
try:
# Determine minimum indentation:
indent = min(len(line) - len(line.lstrip())
for line in lines if line)
except ValueError:
return lines
else:
return [line[indent:] for line in lines]
def escape_markdown(line):
line = line.replace('[', '\[').replace(']', '\]')
line = line.replace('(', '\(').replace(')', '\)')
line = line.replace('{', '\{').replace('}', '\}')
line = line.replace('\\', '\\\\')
line = line.replace('`', '\`')
line = line.replace('*', '\*')
line = line.replace('_', '\_')
line = line.replace('#', '\#')
line = line.replace('+', '\+')
line = line.replace('-', '\-')
line = line.replace('.', '\.')
line = line.replace('!', '\!')
return line
def code_block(lines, language=''):
"""
Mark the code segment for syntax highlighting.
"""
return ['```' + language] + lines + ['```']
def doctest2md(lines):
"""
Convert the given doctest to a syntax highlighted markdown segment.
"""
is_only_code = True
lines = unindent(lines)
for line in lines:
if not line.startswith('>>> ') and not line.startswith('... ') and line not in ['>>>', '...']:
is_only_code = False
break
if is_only_code:
orig = lines
lines = []
for line in orig:
lines.append(line[4:])
return lines
def doc_code_block(lines, language):
if language == 'python':
lines = doctest2md(lines)
return code_block(lines, language)
_args_section = re.compile('^\s*Args:\s*')
def is_args_check(line):
return _args_section.match(line)
def args_block(lines):
out = ['']
out += ['Parameter | Default | Description']
out += ['--------- | ------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + default + ' | ' + description]
return out
_returns_section = re.compile('^\s*Returns:\s*')
def is_returns_check(line):
return _returns_section.match(line)
_image_section = re.compile('^\s*Image:\s*')
def is_image_check(line):
return _image_section.match(line)
_example_section = re.compile('^\s*Returns:\s*|^\s*Examples:\s*')
def is_example_check(line):
return _example_section.match(line)
_inputshape_section = re.compile('^\s*Returns:\s*|^\s*Input Shape:\s*')
def is_inputshape_check(line):
return _inputshape_section.match(line)
_outputshape_section = re.compile('^\s*Returns:\s*|^\s*Output Shape:\s*')
def is_outputshape_check(line):
return _outputshape_section.match(line)
#def get_docargs(line)
_reg_section = re.compile('^#+ ')
def is_heading(line):
return _reg_section.match(line)
def get_heading(line):
assert is_heading(line)
part = line.partition(' ')
return len(part[0]), part[2]
def make_heading(level, title):
return '#'*max(level, 1) + ' ' + title
def find_sections(lines):
"""
Find all section names and return a list with their names.
"""
sections = []
for line in lines:
if is_heading(line):
sections.append(get_heading(line))
return sections
def make_toc(sections):
"""
Generate table of contents for array of section names.
"""
if not sections:
return []
outer = min(n for n,t in sections)
refs = []
for ind,sec in sections:
ref = sec.lower()
ref = ref.replace(' ', '-')
ref = ref.replace('?', '')
refs.append(" "*(ind-outer) + "- [%s](#%s)" % (sec, ref))
return refs
def _doc2md(lines, shiftlevel=0):
_doc2md.md = []
_doc2md.is_code = False
_doc2md.is_code_block = False
_doc2md.is_args = False
_doc2md.is_returns = False
_doc2md.is_inputshape = False
_doc2md.is_outputshape = False
_doc2md.code = []
def reset():
if _doc2md.is_code:
_doc2md.is_code = False
_doc2md.code += doc_code_block(code, 'python')
_doc2md.code += ['']
if _doc2md.is_code_block:
_doc2md.is_code_block = False
_doc2md.code += doc_code_block(code_block, 'python')
_doc2md.code += ['']
if _doc2md.is_args:
_doc2md.is_args = False
_doc2md.md += args_block(args)
if _doc2md.is_returns:
_doc2md.is_returns = False
_doc2md.md += returns
_doc2md.is_inputshape = False
_doc2md.is_outputshape = False
for line in lines:
trimmed = line.lstrip()
if is_args_check(line):
reset()
_doc2md.is_args = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Constructor Arguments']
args = []
elif is_returns_check(line):
reset()
_doc2md.is_returns = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Returns']
returns = []
elif is_example_check(line):
reset()
elif is_inputshape_check(line):
reset()
inputshape = re.findall(r'\s*Input\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
elif is_outputshape_check(line):
reset()
outputshape = re.findall(r'\s*Output\sShape:\s*(.*)\s*:\s*(.*)\s*$', line)[0]
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Expected Shape']
_doc2md.md += [' | Shape | Description ']
_doc2md.md += ['------ | ----- | ------------']
_doc2md.md += [' input | ' + inputshape[0] + ' | ' + inputshape[1]]
_doc2md.md += ['output | ' + outputshape[0] + ' | ' + outputshape[1]]
elif is_image_check(line):
reset()
_doc2md.md += ['']
filename = re.findall(r'\s*Image:\s*(.*?)\s*$', line)
_doc2md.md += ['<img src="image/' + filename[0] + '" >']
elif _doc2md.is_code == False and trimmed.startswith('>>> '):
reset()
_doc2md.is_code = True
code = [line]
elif _doc2md.is_code_block == False and trimmed.startswith('```'):
reset()
_doc2md.is_code_block = True
code_block = []
elif _doc2md.is_code_block == True and trimmed.startswith('```'):
# end of code block
reset()
elif shiftlevel != 0 and is_heading(line):
reset()
level, title = get_heading(line)
_doc2md.md += [make_heading(level + shiftlevel, title)]
elif _doc2md.is_args:
if line:
args.append(line)
else:
reset()
elif _doc2md.is_returns:
if line:
returns.append(line)
else:
reset()
elif _doc2md.is_code:
if line:
code.append(line)
else:
reset()
elif _doc2md.is_code_block:
if line:
code_block.append(line)
else:
reset()
else:
reset()
_doc2md.md += [line]
reset()
_doc2md.code += _doc2md.md
return _doc2md.code
def doc2md(docstr, title, min_level=1, more_info=False, toc=True):
"""
Convert a docstring to a markdown text.
"""
text = doctrim(docstr)
lines = text.split('\n')
sections = find_sections(lines)
if sections:
level = min(n for n,t in sections) - 1
else:
level = 1
shiftlevel = 0
if level < min_level:
shiftlevel = min_level - level
level = min_level
sections = [(lev+shiftlevel, tit) for lev,tit in sections]
md = [
make_heading(level, title),
"",
lines.pop(0),
""
]
if toc:
md += make_toc(sections)
md += _doc2md(lines, shiftlevel)
if more_info:
return (md, sections)
else:
return "\n".join(md)
def mod2md(module, title, title_api_section, toc=True):
"""
Generate markdown document from module, including API section.
"""
docstr = module.__doc__ or " "
text = doctrim(docstr)
lines = text.split('\n')
sections = find_sections(lines)
if sections:
level = min(n for n,t in sections) - 1
else:
level = 1
api_md = []
api_sec = []
if title_api_section :
# sections.append((level+1, title_api_section))
for name, entry in module.__dict__.items():
if name[0] != '_' and entry.__doc__:
#api_sec.append((level+1, name))
#api_md += ['', '']
if entry.__doc__:
md, sec = doc2md(entry.__doc__, name,
min_level=level+1, more_info=True, toc=False)
api_sec += sec
api_md += md
sections += api_sec
# headline
md = [
make_heading(level, title),
"",
lines.pop(0),
""
]
# main sections
if toc:
md += make_toc(sections)
md += _doc2md(lines)
if toc:
md += ['']
md += make_toc(api_sec)
md += api_md
return "\n".join(md)
def main(args=None):
# parse the program arguments
import argparse
parser = argparse.ArgumentParser(
description='Convert docstrings to markdown.')
parser.add_argument(
'module', help='The module containing the docstring.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
'entry', nargs='?',
help='Convert only docstring of this entry in module.')
group.add_argument(
'-a', '--all', dest='all', action='store_true',
help='Create an API section with the contents of module.__all__.')
parser.add_argument(
'-t', '--title', dest='title',
help='Document title (default is module name)')
parser.add_argument(
'--no-toc', dest='toc', action='store_false', default=True,
help='Do not automatically generate the TOC')
args = parser.parse_args(args)
import importlib
import inspect
import os
def add_path(*pathes):
for path in reversed(pathes):
if path not in sys.path:
sys.path.insert(0, path)
file = inspect.getfile(inspect.currentframe())
add_path(os.path.realpath(os.path.abspath(os.path.dirname(file))))
add_path(os.getcwd())
mod_name = args.module
if mod_name.endswith('.py'):
mod_name = mod_name.rsplit('.py', 1)[0]
title = args.title or mod_name.replace('_', '-')
module = importlib.import_module(mod_name)
if args.all:
print(mod2md(module, title, 'API', toc=args.toc))
else:
if args.entry:
docstr = module.__dict__[args.entry].__doc__ or ''
else:
docstr = module.__doc__ or ''
print(doc2md(docstr, title, toc=args.toc))
if __name__ == "__main__":
main()

View File

@ -1,6 +0,0 @@
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd $SCRIPT_DIR
python doc2md.py torch.nn --no-toc --all >../nn.md
popd

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

36
docs/make.bat Normal file
View File

@ -0,0 +1,36 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=PyTorch
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd

View File

@ -1,526 +0,0 @@
# torch.nn
## LogSoftmax
Applies the Log(Softmax(x)) function to an n-dimensional input Tensor.
The LogSoftmax formulation can be simplified as
f_i(x) = log(1 / a * exp(x_i)) where a = sum_j exp(x_j) .
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
<img src="image/logsoftmax.png" >
```python
m = nn.LogSoftmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m.forward(input))
```
## ReLU
Applies the rectified linear unit function element-wise ReLU(x)= max(0,x)
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/relu.png" >
```python
m = nn.ReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## LogSigmoid
Applies element-wise LogSigmoid(x) = log( 1 / (1 + exp(-x_i)))
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/logsigmoid.png" >
```python
m = nn.LogSigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## PReLU
Applies element-wise the function PReLU(x) = max(0,x) + a * min(0,x)
Here "a" is a learnable parameter.
When called without arguments, nn.PReLU() uses a single parameter "a"
across all input channels. If called with nn.PReLU(nChannels), a separate
"a" is used for each input channel.
Note that weight decay should not be used when learning "a" for good
performance.
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_parameters | 1 | number of "a" to learn.
init | 0.25 | the initial value of "a".
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/prelu.png" >
```python
m = nn.PReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Softmax2d
Applies SoftMax over features to each spatial location
When given an image of Channels x Height x Width, it will
apply Softmax to each location [Channels, h_i, w_j]
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , * , * ] | 4D Tensor of any size
output | Same | Output has the same shape as input
```python
m = nn.Softmax2d()
# you softmax over the 2nd dimension
input = autograd.Variable(torch.randn(2, 3, 12, 13))
print(input)
print(m.forward(input))
```
## ReLU6
Applies the element-wise function ReLU6(x) = min( max(0,x), 6)
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/relu6.png" >
```python
m = nn.ReLU6()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Tanh
Applies element-wise, Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/tanh.png" >
```python
m = nn.Tanh()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Softplus
Applies element-wise SoftPlus(x) = 1/beta * log(1 + exp(beta * x_i))
SoftPlus is a smooth approximation to the ReLU function and can be used
to constrain the output of a machine to always be positive.
For numerical stability the implementation reverts to the linear function
for inputs above a certain value.
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
beta | 1 | the beta value for the Softplus formulation.
threshold | 20 | values above this revert to a linear function.
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/softplus.png" >
```python
m = nn.Softplus()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Threshold
Thresholds each element of the input Tensor
Threshold is defined as:
y = x if x >= threshold
value if x < threshold
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
threshold | | The value to threshold at
value | | The value to replace with
inplace | | can optionally do the operation in-place
### Returns
Tensor of same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
```python
m = nn.Threshold(0.1, 20)
input = Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Softmin
Applies the Softmin function to an n-dimensional input Tensor
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmin(x) = exp(-x_i - shift) / sum_j exp(-x_j - shift)
where shift = max_i - x_i
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
<img src="image/softmin.png" >
```python
m = nn.Softmin()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m.forward(input))
```
## Softshrink
Applies the soft shrinkage function elementwise
SoftShrinkage operator is defined as:
f(x) = x-lambda, if x > lambda > f(x) = x+lambda, if x < -lambda
f(x) = 0, otherwise
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
lambd | 0.5 | the lambda value for the Softshrink formulation.
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/sshrink.png" >
```python
m = nn.Softshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## ELU
Applies element-wise, ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1))
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
alpha | 1.0 | the alpha value for the ELU formulation.
inplace | | can optionally do the operation in-place
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/elu.png" >
```python
m = nn.ELU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Hardshrink
Applies the hard shrinkage function element-wise
Hardshrink is defined as f(x) = x, if x > lambda
f(x) = x, if x < -lambda
f(x) = 0, otherwise
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
lambd | 0.5 | the lambda value for the Hardshrink formulation.
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/hshrink.png" >
```python
m = nn.Hardshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Hardtanh
Applies the HardTanh function element-wise
HardTanh is defined as:
f(x) = +1, if x > 1
f(x) = -1, if x < -1
f(x) = x, otherwise
The range of the linear region [-1, 1] can be adjusted
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
min_value | | minimum value of the linear region range
max_value | | maximum value of the linear region range
inplace | | can optionally do the operation in-place
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/htanh.png" >
```python
m = nn.HardTanh(-2, 2)
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Softsign
Applies element-wise, the function Softsign(x) = x / (1 + |x|)
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/softsign.png" >
```python
m = nn.Softsign()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## LeakyReLU
Applies element-wise, f(x) = max(0, x) + negative_slope * min(0, x)
### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
negative_slope | 1e-2 | Controls the angle of the negative slope.
inplace | | can optionally do the operation in-place
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
```python
m = nn.LeakyReLU(0.1)
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Sigmoid
Applies the element-wise function sigmoid(x) = 1 / ( 1 + exp(-x))
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
<img src="image/sigmoid.png" >
```python
m = nn.Sigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Tanhshrink
Applies element-wise, Tanhshrink(x) = x - Tanh(x)
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
```python
m = nn.Tanhshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m.forward(input))
```
## Softmax
Applies the Softmax function to an n-dimensional input Tensor
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmax is defined as f_i(x) = exp(x_i - shift) / sum_j exp(x_j - shift)
where shift = max_i x_i
### Returns
a Tensor of the same dimension and shape as the input
### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
<img src="image/softmax.png" >
Notes:
Note that this module doesn't work directly with NLLLoss,
which expects the Log to be computed between the Softmax and itself.
Use Logsoftmax instead (it's faster).
```python
m = nn.Softmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m.forward(input))
```

2
docs/requirements.txt Normal file
View File

@ -0,0 +1,2 @@
sphinx
-e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme

View File

@ -0,0 +1,114 @@
body {
font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
}
/* Default header fonts are ugly */
h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
}
/* Use white for docs background */
.wy-side-nav-search {
background-color: #fff;
}
.wy-nav-content-wrap, .wy-menu li.current > a {
background-color: #fff;
}
@media screen and (min-width: 1400px) {
.wy-nav-content-wrap {
background-color: rgba(0, 0, 0, 0.0470588);
}
.wy-nav-content {
background-color: #fff;
}
}
/* Fixes for mobile */
.wy-nav-top {
background-color: #fff;
background-image: url('../img/pytorch-logo-dark.svg');
background-repeat: no-repeat;
background-position: center;
padding: 0;
margin: 0.4045em 0.809em;
color: #333;
}
.wy-nav-top > a {
display: none;
}
@media screen and (max-width: 768px) {
.wy-side-nav-search>a img.logo {
height: 60px;
}
}
/* This is needed to ensure that logo above search scales properly */
.wy-side-nav-search a {
display: block;
}
/* This ensures that multiple constructors will remain in separate lines. */
.rst-content dl:not(.docutils) dt {
display: table;
}
/* Use our red for literals (it's very similar to the original color) */
.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
color: #F05732;
}
.rst-content tt.xref, a .rst-content tt, .rst-content tt.xref,
.rst-content code.xref, a .rst-content tt, a .rst-content code {
color: #404040;
}
/* Change link colors (except for the menu) */
a {
color: #F05732;
}
a:hover {
color: #F05732;
}
a:visited {
color: #D44D2C;
}
.wy-menu a {
color: #b3b3b3;
}
.wy-menu a:hover {
color: #b3b3b3;
}
/* Default footer text is quite big */
footer {
font-size: 80%;
}
footer .rst-footer-buttons {
font-size: 125%; /* revert footer settings - 1/80% = 125% */
}
footer p {
font-size: 100%;
}
/* For hidden headers that appear in TOC tree */
/* see http://stackoverflow.com/a/32363545/3343043 */
.rst-content .hidden-section {
display: none;
}
nav .hidden-section {
display: inherit;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 21.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 199.7 40.2" style="enable-background:new 0 0 199.7 40.2;" xml:space="preserve">
<style type="text/css">
.st0{fill:#F05732;}
.st1{fill:#9E529F;}
.st2{fill:#333333;}
</style>
<path class="st0" d="M102.7,12.2c-1.3-1-1.8,3.9-4.4,3.9c-3,0-4-13-6.3-13c-0.7,0-0.8-0.4-7.9,21.3c-2.9,9,4.4,15.8,11.8,15.8
c4.6,0,12.3-3,12.3-12.6C108.2,20.5,104.7,13.7,102.7,12.2z M95.8,35.3c-3.7,0-6.7-3.1-6.7-7c0-3.9,3-7,6.7-7s6.7,3.1,6.7,7
C102.5,32.1,99.5,35.3,95.8,35.3z"/>
<path class="st1" d="M99.8,0c-0.5,0-1.8,2.5-1.8,3.6c0,1.5,1,2,1.8,2c0.8,0,1.8-0.5,1.8-2C101.5,2.5,100.2,0,99.8,0z"/>
<path class="st2" d="M0,39.5V14.9h11.5c5.3,0,8.3,3.6,8.3,7.9c0,4.3-3,7.9-8.3,7.9H5.2v8.8H0z M14.4,22.8c0-2.1-1.6-3.3-3.7-3.3H5.2
v6.6h5.5C12.8,26.1,14.4,24.8,14.4,22.8z"/>
<path class="st2" d="M35.2,39.5V29.4l-9.4-14.5h6l6.1,9.8l6.1-9.8h5.9l-9.4,14.5v10.1H35.2z"/>
<path class="st2" d="M63.3,39.5v-20h-7.2v-4.6h19.6v4.6h-7.2v20H63.3z"/>
<path class="st2" d="M131.4,39.5l-4.8-8.7h-3.8v8.7h-5.2V14.9H129c5.1,0,8.3,3.4,8.3,7.9c0,4.3-2.8,6.7-5.4,7.3l5.6,9.4H131.4z
M131.9,22.8c0-2-1.6-3.3-3.7-3.3h-5.5v6.6h5.5C130.3,26.1,131.9,24.9,131.9,22.8z"/>
<path class="st2" d="M145.6,27.2c0-7.6,5.7-12.7,13.1-12.7c5.4,0,8.5,2.9,10.3,6l-4.5,2.2c-1-2-3.2-3.6-5.8-3.6
c-4.5,0-7.7,3.4-7.7,8.1c0,4.6,3.2,8.1,7.7,8.1c2.5,0,4.7-1.6,5.8-3.6l4.5,2.2c-1.7,3.1-4.9,6-10.3,6
C151.3,39.9,145.6,34.7,145.6,27.2z"/>
<path class="st2" d="M194.5,39.5V29.1h-11.6v10.4h-5.2V14.9h5.2v9.7h11.6v-9.7h5.3v24.6H194.5z"/>
</svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

53
docs/source/autograd.rst Normal file
View File

@ -0,0 +1,53 @@
.. role:: hidden
:class: hidden-section
Automatic differentiation package - torch.autograd
==================================================
.. automodule:: torch.autograd
.. currentmodule:: torch.autograd
.. autofunction:: backward
Variable
--------
API compatibility
^^^^^^^^^^^^^^^^^
Variable API is nearly the same as regular Tensor API (with the exception
of a couple in-place methods, that would overwrite inputs required for
gradient computation). In most cases Tensors can be safely replaced with
Variables and the code will remain to work just fine. Because of this,
we're not documenting all the operations on variables, and you should
refere to :class:`torch.Tensor` docs for this purpose.
In-place operations on Variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Supporting in-place operations in autograd is a hard matter, and we discourage
their use in most cases. Autograd's aggressive buffer freeing and reuse makes
it very efficient and there are very few occasions when in-place operations
actually lower memory usage by any significant amount. Unless you're operating
under heavy memory pressure, you might never need to use them.
In-place correctness checks
^^^^^^^^^^^^^^^^^^^^^^^^^^^
All :class:`Variable` s keep track of in-place operations applied to them, and
if the implementation detects that a variable was saved for backward in one of
the functions, but it was modified in-place afterwards, an error will be raised
once backward pass is started. This ensures that if you're using in-place
functions and not seing any errors, you can be sure that the computed gradients
are correct.
.. autoclass:: Variable
:members:
:hidden:`Function`
------------------
.. autoclass:: Function
:members:

246
docs/source/conf.py Normal file
View File

@ -0,0 +1,246 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# PyTorch documentation build configuration file, created by
# sphinx-quickstart on Fri Dec 23 13:31:47 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
import torch
try:
import torchvision
except ImportError:
import warnings
warnings.warn('unable to load "torchvision" package')
import sphinx_rtd_theme
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
]
napoleon_use_ivar = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'PyTorch'
copyright = '2017, Torch Contributors'
author = 'Torch Contributors'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
# TODO: change to [:2] at v1.0
version = '.'.join(torch.__version__.split('+')[0].split('.')[:3])
# The full version, including alpha/beta/rc tags.
# TODO: verify this works as expected
release = torch.__version__.split('+')[0]
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
'collapse_navigation': False,
'display_version': False,
'logo_only': True,
}
html_logo = '_static/img/pytorch-logo-dark.svg'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# html_style_path = 'css/pytorch_theme.css'
html_context = {
'css_files': [
'https://fonts.googleapis.com/css?family=Lato',
'_static/css/pytorch_theme.css'
],
}
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'PyTorchdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pytorch.tex', 'PyTorch Documentation',
'Torch Contributors', 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'PyTorch', 'PyTorch Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'PyTorch', 'PyTorch Documentation',
author, 'PyTorch', 'One line description of project.',
'Miscellaneous'),
]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
'python': ('https://docs.python.org/', None),
'numpy': ('http://docs.scipy.org/doc/numpy/', None),
}
# -- A patch that prevents Sphinx from cross-referencing ivar tags -------
# See http://stackoverflow.com/a/41184353/3343043
from docutils import nodes
from sphinx.util.docfields import TypedField
from sphinx import addnodes
def patched_make_field(self, types, domain, items):
# type: (List, unicode, Tuple) -> nodes.field
def handle_item(fieldarg, content):
par = nodes.paragraph()
par += addnodes.literal_strong('', fieldarg) # Patch: this line added
# par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
# addnodes.literal_strong))
if fieldarg in types:
par += nodes.Text(' (')
# NOTE: using .pop() here to prevent a single type node to be
# inserted twice into the doctree, which leads to
# inconsistencies later when references are resolved
fieldtype = types.pop(fieldarg)
if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text):
typename = u''.join(n.astext() for n in fieldtype)
typename = typename.replace('int', 'python:int')
typename = typename.replace('long', 'python:long')
typename = typename.replace('float', 'python:float')
typename = typename.replace('type', 'python:type')
par.extend(self.make_xrefs(self.typerolename, domain, typename,
addnodes.literal_emphasis))
else:
par += fieldtype
par += nodes.Text(')')
par += nodes.Text(' -- ')
par += content
return par
fieldname = nodes.field_name('', self.label)
if len(items) == 1 and self.can_collapse:
fieldarg, content = items[0]
bodynode = handle_item(fieldarg, content)
else:
bodynode = self.list_type()
for fieldarg, content in items:
bodynode += nodes.list_item('', handle_item(fieldarg, content))
fieldbody = nodes.field_body('', bodynode)
return nodes.field('', fieldname, fieldbody)
TypedField.make_field = patched_make_field

27
docs/source/cuda.rst Normal file
View File

@ -0,0 +1,27 @@
torch.cuda
===================================
.. currentmodule:: torch.cuda
.. automodule:: torch.cuda
:members:
Communication collectives
-------------------------
.. autofunction:: torch.cuda.comm.broadcast
.. autofunction:: torch.cuda.comm.reduce_add
.. autofunction:: torch.cuda.comm.scatter
.. autofunction:: torch.cuda.comm.gather
Streams and events
------------------
.. autoclass:: Stream
:members:
.. autoclass:: Event
:members:

12
docs/source/data.rst Normal file
View File

@ -0,0 +1,12 @@
torch.utils.data
===================================
.. automodule:: torch.utils.data
.. autoclass:: Dataset
.. autoclass:: TensorDataset
.. autoclass:: DataLoader
.. autoclass:: torch.utils.data.sampler.Sampler
.. autoclass:: torch.utils.data.sampler.SequentialSampler
.. autoclass:: torch.utils.data.sampler.RandomSampler
.. autoclass:: torch.utils.data.sampler.SubsetRandomSampler
.. autoclass:: torch.utils.data.sampler.WeightedRandomSampler

6
docs/source/ffi.rst Normal file
View File

@ -0,0 +1,6 @@
torch.utils.ffi
===============
.. currentmodule:: torch.utils.ffi
.. autofunction:: create_extension

54
docs/source/index.rst Normal file
View File

@ -0,0 +1,54 @@
.. PyTorch documentation master file, created by
sphinx-quickstart on Fri Dec 23 13:31:47 2016.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
:github_url: https://github.com/pytorch/pytorch
PyTorch documentation
===================================
PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
.. toctree::
:glob:
:maxdepth: 1
:caption: Notes
notes/*
.. toctree::
:maxdepth: 1
:caption: Package Reference
torch
tensors
storage
nn
optim
torch.autograd <autograd>
torch.multiprocessing <multiprocessing>
torch.legacy <legacy>
cuda
ffi
data
model_zoo
.. toctree::
:glob:
:maxdepth: 1
:caption: torchvision Reference
torchvision/torchvision
torchvision/datasets
torchvision/models
torchvision/transforms
torchvision/utils
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`

4
docs/source/legacy.rst Normal file
View File

@ -0,0 +1,4 @@
Legacy package - torch.legacy
===================================
.. automodule:: torch.legacy

View File

@ -0,0 +1,5 @@
torch.utils.model_zoo
===================================
.. automodule:: torch.utils.model_zoo
.. autofunction:: load_url

View File

@ -0,0 +1,88 @@
Multiprocessing package - torch.multiprocessing
===============================================
.. automodule:: torch.multiprocessing
.. currentmodule:: torch.multiprocessing
.. warning::
If the main process exits abruptly (e.g. because of an incoming signal),
Python's ``multiprocessing`` sometimes fails to clean up its children.
It's a known caveat, so if you're seeing any resource leaks after
interrupting the interpreter, it probably means that this has just happened
to you.
Strategy management
-------------------
.. autofunction:: get_all_sharing_strategies
.. autofunction:: get_sharing_strategy
.. autofunction:: set_sharing_strategy
Sharing CUDA tensors
--------------------
Sharing CUDA tensors between processes is supported only in Python 3, using
a ``spawn`` or ``forkserver`` start methods. :mod:`python:multiprocessing` in
Python 2 can only create subprocesses using ``fork``, and it's not supported
by the CUDA runtime.
.. warning::
CUDA API requires that the allocation exported to other processes remains
valid as long as it's used by them. You should be careful and ensure that
CUDA tensors you shared don't go out of scope as long as it's necessary.
This shouldn't be a problem for sharing model parameters, but passing other
kinds of data should be done with care. Note that this restriction doesn't
apply to shared CPU memory.
Sharing strategies
------------------
This section provides a brief overview into how different sharing strategies
work. Note that it applies only to CPU tensor - CUDA tensors will always use
the CUDA API, as that's the only way they can be shared.
File descriptor - ``file_descriptor``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. note::
This is the default strategy (except for macOS and OS X where it's not
supported).
This strategy will use file descriptors as shared memory handles. Whenever a
storage is moved to shared memory, a file descriptor obtained from ``shm_open``
is cached with the object, and when it's going to be sent to other processes,
the file descriptor will be transferred (e.g. via UNIX sockets) to it. The
receiver will also cache the file descriptor and ``mmap`` it, to obtain a shared
view onto the storage data.
Note that if there will be a lot of tensors shared, this strategy will keep a
large number of file descriptors open most of the time. If your system has low
limits for the number of open file descriptors, and you can't rise them, you
should use the ``file_system`` strategy.
File system - ``file_system``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This strategy will use file names given to ``shm_open`` to identify the shared
memory regions. This has a benefit of not requiring the implementation to cache
the file descriptors obtained from it, but at the same time is prone to shared
memory leaks. The file can't be deleted right after its creation, because other
processes need to access it to open their views. If the processes fatally
crash, or are killed, and don't call the storage destructors, the files will
remain in the system. This is very serious, because they keep using up the
memory until the system is restarted, or they're freed manually.
To counter the problem of shared memory file leaks, :mod:`torch.multiprocessing`
will spawn a daemon named ``torch_shm_manager`` that will isolate itself from
the current process group, and will keep track of all shared memory allocations.
Once all processes connected to it exit, it will wait a moment to ensure there
will be no new connections, and will iterate over all shared memory files
allocated by the group. If it finds that any of them still exist, they will be
deallocated. We've tested this method and it prooved to be robust to various
failures. Still, if your system has high enough limits, and ``file_descriptor``
is a supported strategy, we do not recommend switching to this one.

849
docs/source/nn.rst Normal file
View File

@ -0,0 +1,849 @@
.. role:: hidden
:class: hidden-section
torch.nn
===================================
.. automodule:: torch.nn
.. currentmodule:: torch.nn
Parameters
----------
.. autoclass:: Parameter
:members:
Containers
----------------------------------
:hidden:`Module`
~~~~~~~~~~~~~~~~
.. autoclass:: Module
:members:
:hidden:`Sequential`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Sequential
:members:
:hidden:`ModuleList`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ModuleList
:members:
:hidden:`ParameterList`
~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ParameterList
:members:
Convolution Layers
----------------------------------
:hidden:`Conv1d`
~~~~~~~~~~~~~~~~
.. autoclass:: Conv1d
:members:
:hidden:`Conv2d`
~~~~~~~~~~~~~~~~
.. autoclass:: Conv2d
:members:
:hidden:`Conv3d`
~~~~~~~~~~~~~~~~
.. autoclass:: Conv3d
:members:
:hidden:`ConvTranspose1d`
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ConvTranspose1d
:members:
:hidden:`ConvTranspose2d`
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ConvTranspose2d
:members:
:hidden:`ConvTranspose3d`
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: ConvTranspose3d
:members:
Pooling Layers
----------------------------------
:hidden:`MaxPool1d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxPool1d
:members:
:hidden:`MaxPool2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxPool2d
:members:
:hidden:`MaxPool3d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxPool3d
:members:
:hidden:`MaxUnpool1d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxUnpool1d
:members:
:hidden:`MaxUnpool2d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxUnpool2d
:members:
:hidden:`MaxUnpool3d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MaxUnpool3d
:members:
:hidden:`AvgPool1d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: AvgPool1d
:members:
:hidden:`AvgPool2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: AvgPool2d
:members:
:hidden:`AvgPool3d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: AvgPool3d
:members:
:hidden:`FractionalMaxPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: FractionalMaxPool2d
:members:
:hidden:`LPPool2d`
~~~~~~~~~~~~~~~~~~
.. autoclass:: LPPool2d
:members:
:hidden:`AdaptiveMaxPool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveMaxPool1d
:members:
:hidden:`AdaptiveMaxPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveMaxPool2d
:members:
:hidden:`AdaptiveAvgPool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool1d
:members:
:hidden:`AdaptiveAvgPool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: AdaptiveAvgPool2d
:members:
Non-linear Activations
----------------------------------
:hidden:`ReLU`
~~~~~~~~~~~~~~
.. autoclass:: ReLU
:members:
:hidden:`ReLU6`
~~~~~~~~~~~~~~~
.. autoclass:: ReLU6
:members:
:hidden:`ELU`
~~~~~~~~~~~~~
.. autoclass:: ELU
:members:
:hidden:`PReLU`
~~~~~~~~~~~~~~~
.. autoclass:: PReLU
:members:
:hidden:`LeakyReLU`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: LeakyReLU
:members:
:hidden:`Threshold`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Threshold
:members:
:hidden:`Hardtanh`
~~~~~~~~~~~~~~~~~~
.. autoclass:: Hardtanh
:members:
:hidden:`Sigmoid`
~~~~~~~~~~~~~~~~~
.. autoclass:: Sigmoid
:members:
:hidden:`Tanh`
~~~~~~~~~~~~~~
.. autoclass:: Tanh
:members:
:hidden:`LogSigmoid`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: LogSigmoid
:members:
:hidden:`Softplus`
~~~~~~~~~~~~~~~~~~
.. autoclass:: Softplus
:members:
:hidden:`Softshrink`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Softshrink
:members:
:hidden:`Softsign`
~~~~~~~~~~~~~~~~~~
.. autoclass:: Softsign
:members:
:hidden:`Tanhshrink`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Tanhshrink
:members:
:hidden:`Softmin`
~~~~~~~~~~~~~~~~~
.. autoclass:: Softmin
:members:
:hidden:`Softmax`
~~~~~~~~~~~~~~~~~
.. autoclass:: Softmax
:members:
:hidden:`LogSoftmax`
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: LogSoftmax
:members:
Normalization layers
----------------------------------
:hidden:`BatchNorm1d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: BatchNorm1d
:members:
:hidden:`BatchNorm2d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: BatchNorm2d
:members:
:hidden:`BatchNorm3d`
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: BatchNorm3d
:members:
Recurrent layers
----------------------------------
:hidden:`RNN`
~~~~~~~~~~~~~
.. autoclass:: RNN
:members:
:hidden:`LSTM`
~~~~~~~~~~~~~~
.. autoclass:: LSTM
:members:
:hidden:`GRU`
~~~~~~~~~~~~~
.. autoclass:: GRU
:members:
:hidden:`RNNCell`
~~~~~~~~~~~~~~~~~
.. autoclass:: RNNCell
:members:
:hidden:`LSTMCell`
~~~~~~~~~~~~~~~~~~
.. autoclass:: LSTMCell
:members:
:hidden:`GRUCell`
~~~~~~~~~~~~~~~~~
.. autoclass:: GRUCell
:members:
Linear layers
----------------------------------
:hidden:`Linear`
~~~~~~~~~~~~~~~~
.. autoclass:: Linear
:members:
Dropout layers
----------------------------------
:hidden:`Dropout`
~~~~~~~~~~~~~~~~~
.. autoclass:: Dropout
:members:
:hidden:`Dropout2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Dropout2d
:members:
:hidden:`Dropout3d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Dropout3d
:members:
Sparse layers
----------------------------------
:hidden:`Embedding`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: Embedding
:members:
Distance functions
----------------------------------
:hidden:`PairwiseDistance`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: PairwiseDistance
:members:
Loss functions
----------------------------------
:hidden:`L1Loss`
~~~~~~~~~~~~~~~~
.. autoclass:: L1Loss
:members:
:hidden:`MSELoss`
~~~~~~~~~~~~~~~~~
.. autoclass:: MSELoss
:members:
:hidden:`CrossEntropyLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: CrossEntropyLoss
:members:
:hidden:`NLLLoss`
~~~~~~~~~~~~~~~~~
.. autoclass:: NLLLoss
:members:
:hidden:`NLLLoss2d`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: NLLLoss2d
:members:
:hidden:`KLDivLoss`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: KLDivLoss
:members:
:hidden:`BCELoss`
~~~~~~~~~~~~~~~~~~~
.. autoclass:: BCELoss
:members:
:hidden:`MarginRankingLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MarginRankingLoss
:members:
:hidden:`HingeEmbeddingLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: HingeEmbeddingLoss
:members:
:hidden:`MultiLabelMarginLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MultiLabelMarginLoss
:members:
:hidden:`SmoothL1Loss`
~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: SmoothL1Loss
:members:
:hidden:`SoftMarginLoss`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: SoftMarginLoss
:members:
:hidden:`MultiLabelSoftMarginLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MultiLabelSoftMarginLoss
:members:
:hidden:`CosineEmbeddingLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: CosineEmbeddingLoss
:members:
:hidden:`MultiMarginLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: MultiMarginLoss
:members:
Vision layers
----------------
:hidden:`PixelShuffle`
~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: PixelShuffle
:members:
:hidden:`UpsamplingNearest2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingNearest2d
:members:
:hidden:`UpsamplingBilinear2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: UpsamplingBilinear2d
:members:
Multi-GPU layers
----------------
:hidden:`DataParallel`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: DataParallel
:members:
Utilities
---------
:hidden:`clip_grad_norm`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.clip_grad_norm
.. currentmodule:: torch.nn.utils.rnn
:hidden:`PackedSequence`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.PackedSequence
:hidden:`pack_padded_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pack_padded_sequence
:hidden:`pad_packed_sequence`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: torch.nn.utils.rnn.pad_packed_sequence
torch.nn.functional
===================
.. currentmodule:: torch.nn.functional
Convolution functions
----------------------------------
:hidden:`conv1d`
~~~~~~~~~~~~~~~~
.. autofunction:: conv1d
:hidden:`conv2d`
~~~~~~~~~~~~~~~~
.. autofunction:: conv2d
:hidden:`conv3d`
~~~~~~~~~~~~~~~~
.. autofunction:: conv3d
:hidden:`conv_transpose1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: conv_transpose1d
:hidden:`conv_transpose2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: conv_transpose2d
:hidden:`conv_transpose3d`
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: conv_transpose3d
Pooling functions
----------------------------------
:hidden:`avg_pool1d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: avg_pool1d
:hidden:`avg_pool2d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: avg_pool2d
:hidden:`avg_pool3d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: avg_pool3d
:hidden:`max_pool1d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_pool1d
:hidden:`max_pool2d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_pool2d
:hidden:`max_pool3d`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_pool3d
:hidden:`max_unpool1d`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_unpool1d
:hidden:`max_unpool2d`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_unpool2d
:hidden:`max_unpool3d`
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: max_unpool3d
:hidden:`lp_pool2d`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: lp_pool2d
:hidden:`adaptive_max_pool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_max_pool1d
:hidden:`adaptive_max_pool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_max_pool2d
:hidden:`adaptive_avg_pool1d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_avg_pool1d
:hidden:`adaptive_avg_pool2d`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: adaptive_avg_pool2d
Non-linear activation functions
-------------------------------
:hidden:`threshold`
~~~~~~~~~~~~~~~~~~~
.. autofunction:: threshold
:hidden:`relu`
~~~~~~~~~~~~~~
.. autofunction:: relu
:hidden:`hardtanh`
~~~~~~~~~~~~~~~~~~
.. autofunction:: hardtanh
:hidden:`relu6`
~~~~~~~~~~~~~~~
.. autofunction:: relu6
:hidden:`elu`
~~~~~~~~~~~~~
.. autofunction:: elu
:hidden:`leaky_relu`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: leaky_relu
:hidden:`prelu`
~~~~~~~~~~~~~~~
.. autofunction:: prelu
:hidden:`rrelu`
~~~~~~~~~~~~~~~
.. autofunction:: rrelu
:hidden:`logsigmoid`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: logsigmoid
:hidden:`hardshrink`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: hardshrink
:hidden:`tanhshrink`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: tanhshrink
:hidden:`softsign`
~~~~~~~~~~~~~~~~~~
.. autofunction:: softsign
:hidden:`softplus`
~~~~~~~~~~~~~~~~~~
.. autofunction:: softplus
:hidden:`softmin`
~~~~~~~~~~~~~~~~~
.. autofunction:: softmin
:hidden:`softmax`
~~~~~~~~~~~~~~~~~
.. autofunction:: softmax
:hidden:`softshrink`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: softshrink
:hidden:`log_softmax`
~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: log_softmax
:hidden:`tanh`
~~~~~~~~~~~~~~
.. autofunction:: tanh
:hidden:`sigmoid`
~~~~~~~~~~~~~~~~~
.. autofunction:: sigmoid
Normalization functions
-----------------------
:hidden:`batch_norm`
~~~~~~~~~~~~~~~~~~~~
.. autofunction:: batch_norm
Linear functions
----------------
:hidden:`linear`
~~~~~~~~~~~~~~~~
.. autofunction:: linear
Dropout functions
-----------------
:hidden:`dropout`
~~~~~~~~~~~~~~~~~
.. autofunction:: dropout
Distance functions
----------------------------------
:hidden:`pairwise_distance`
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pairwise_distance
Loss functions
--------------
:hidden:`nll_loss`
~~~~~~~~~~~~~~~~~~
.. autofunction:: nll_loss
:hidden:`kl_div`
~~~~~~~~~~~~~~~~
.. autofunction:: kl_div
:hidden:`cross_entropy`
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: cross_entropy
:hidden:`binary_cross_entropy`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: binary_cross_entropy
:hidden:`smooth_l1_loss`
~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: smooth_l1_loss
Vision functions
----------------
:hidden:`pixel_shuffle`
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pixel_shuffle
:hidden:`pad`
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: pad
torch.nn.init
=============
.. currentmodule:: torch.nn.init
.. autofunction:: uniform
.. autofunction:: normal
.. autofunction:: constant
.. autofunction:: xavier_uniform
.. autofunction:: xavier_normal
.. autofunction:: kaiming_uniform
.. autofunction:: kaiming_normal
.. autofunction:: orthogonal
.. autofunction:: sparse

View File

@ -0,0 +1,144 @@
Autograd mechanics
==================
This note will present an overview of how autograd works and records the
operations. It's not strictly necessary to understand all this, but we recommend
getting familiar with it, as it will help you write more efficient, cleaner
programs, and can aid you in debugging.
.. _excluding-subgraphs:
Excluding subgraphs from backward
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Every Variable has two flags: :attr:`requires_grad` and :attr:`volatile`.
They both allow for fine grained exclusion of subgraphs from gradient
computation and can increase efficiency.
.. _excluding-requires_grad:
``requires_grad``
~~~~~~~~~~~~~~~~~
If there's a single input to an operation that requires gradient, its output
will also require gradient. Conversely, only if all inputs don't require
gradient, the output also won't require it. Backward computation is never
performed in the subgraphs, where all Variables didn't require gradients.
.. code::
>>> x = Variable(torch.randn(5, 5))
>>> y = Variable(torch.randn(5, 5))
>>> z = Variable(torch.randn(5, 5), requires_grad=True)
>>> a = x + y
>>> a.requires_grad
False
>>> b = a + z
>>> b.requires_grad
True
This is especially useful when you want to freeze part of your model, or you
know in advance that you're not going to use gradients w.r.t. some parameters.
For example if you want to finetune a pretrained CNN, it's enough to switch the
:attr:`requires_grad` flags in the frozen base, and no intermediate buffers will
be saved, until the computation gets to the last layer, where the affine
transform will use weights that require gradient, and the output of the network
will also require them.
.. code::
model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
param.requires_grad = False
# Replace the last fully-connected layer
# Parameters of newly constructed modules have requires_grad=True by default
model.fc = nn.Linear(512, 100)
# Optimize only the classifier
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)
``volatile``
~~~~~~~~~~~~
Volatile is recommended for purely inference mode, when you're sure you won't
be even calling `.backward()`. It's more efficient than any other autograd
setting - it will use the absolute minimal amount of memory to evaluate the
model. ``volatile`` also determines that ``requires_grad is False``.
Volatile differs from :ref:`excluding-requires_grad` in how the flag propagates.
If there's even a single volatile input to an operation, its output is also
going to be volatile. Volatility spreads accross the graph much easier than
non-requiring gradient - you only need a **single** volatile leaf to have a
volatile output, while you need **all** leaves to not require gradient to
have an output the doesn't require gradient. Using volatile flag you don't
need to change any settings of your model parameters to use it for
inference. It's enough to create a volatile input, and this will ensure that
no intermediate states are saved.
.. code::
>>> regular_input = Variable(torch.randn(5, 5))
>>> volatile_input = Variable(torch.randn(5, 5), volatile=True)
>>> model = torchvision.models.resnet18(pretrained=True)
>>> model(regular_input).requires_grad
True
>>> model(volatile_input).requires_grad
False
>>> model(volatile_input).volatile
True
>>> model(volatile_input).creator is None
True
How autograd encodes the history
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Each Variable has a ``.creator`` attribute, that points to the function, of
which it is an output. This is an entry point to a directed acyclic graph (DAG)
consisting of :class:`Function` objects as nodes, and references between them
being the edges. Every time an operation is performed, a new :class:`Function`
representing it is instantiated, its :meth:`~torch.autograd.Function.forward`
method is called, and its output :class:`Variable` s creators are set to it.
Then, by following the path from any :class:`Variable` to the leaves, it is
possible to reconstruct the sequence of operations that has created the data,
and automatically compute the gradients.
An important thing to note is that the graph is recreated from scratch at every
iteration, and this is exactly what allows for using arbitrary Python control
flow statements, that can change the overall shape and size of the graph at
every iteration. You don't have to encode all possible paths before you
launch the training - what you run is what you differentiate.
In-place operations on Variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Supporting in-place operations in autograd is a hard matter, and we discourage
their use in most cases. Autograd's aggressive buffer freeing and reuse makes
it very efficient and there are very few occasions when in-place operations
actually lower memory usage by any significant amount. Unless you're operating
under heavy memory pressure, you might never need to use them.
There are two main reasons that limit the applicability of in-place operations:
1. Overwriting values required to compute gradients. This is why variables don't
support ``log_``. Its gradient formula requires the original input, and while
it is possible to recreate it by computing the inverse operation, it is
numerically unstable, and requires additional work that often defeats the
purpose of using these functions.
2. Every in-place operation actually requires the implementation to rewrite the
computational graph. Out-of-place versions simply allocate new objects and
keep references to the old graph, while in-place operations, require
changing the creator of all inputs to the :class:`Function` representing
this operation. This can be tricky, especially if there are many Variables
that reference the same storage (e.g. created by indexing or transposing),
and in-place functions will actually raise an error if the storage of
modified inputs is referenced by any other :class:`Variable`.
In-place correctness checks
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Every variable keeps a version counter, that is incremented every time it's
marked dirty in any operation. When a Function saves any tensors for backward,
a version counter of their containing Variable is saved as well. Once you access
``self.saved_tensors`` it is checked, and if it's greater than the saved value
an error is raised.

View File

@ -0,0 +1,83 @@
.. _cuda-semantics:
CUDA semantics
==============
:mod:`torch.cuda` keeps track of currently selected GPU, and all CUDA tensors
you allocate will be created on it. The selected device can be changed with a
:any:`torch.cuda.device` context manager.
However, once a tensor is allocated, you can do operations on it irrespectively
of your selected device, and the results will be always placed in on the same
device as the tensor.
Cross-GPU operations are not allowed by default, with the only exception of
:meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory accesses
any attempts to launch ops on tensors spread across different devices will
raise an error.
Below you can find a small example showcasing this::
x = torch.cuda.FloatTensor(1)
# x.get_device() == 0
y = torch.FloatTensor(1).cuda()
# y.get_device() == 0
with torch.cuda.device(1):
# allocates a tensor on GPU 1
a = torch.cuda.FloatTensor(1)
# transfers a tensor from CPU to GPU 1
b = torch.FloatTensor(1).cuda()
# a.get_device() == b.get_device() == 1
c = a + b
# c.get_device() == 1
z = x + y
# z.get_device() == 0
# even within a context, you can give a GPU id to the .cuda call
d = torch.randn(2).cuda(2)
# d.get_device() == 2
Best practices
--------------
Use pinned memory buffers
^^^^^^^^^^^^^^^^^^^^^^^^^
.. warning:
This is an advanced tip. You overuse of pinned memory can cause serious
problems if you'll be running low on RAM, and you should be aware that
pinning is often an expensive operation.
Host to GPU copies are much faster when they originate from pinned (page-locked)
memory. CPU tensors and storages expose a :meth:`~torch.Tensor.pin_memory`
method, that returns a copy of the object, with data put in a pinned region.
Also, once you pin a tensor or storage, you can use asynchronous GPU copies.
Just pass an additional ``async=True`` argument to a :meth:`~torch.Tensor.cuda`
call. This can be used to overlap data transfers with computation.
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
pinned memory by passing ``pin_memory=True`` to its constructor.
.. _cuda-nn-dataparallel-instead:
Use nn.DataParallel instead of multiprocessing
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most use cases involving batched input and multiple GPUs should default to using
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
a single python process can saturate multiple GPUs.
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
However, this is a known issue that is under active development. As always,
test your use case.
There are significant caveats to using CUDA models with
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
requirements exactly, it is likely that your program will have incorrect or
undefined behavior.

View File

@ -0,0 +1,169 @@
Extending PyTorch
=================
In this note we'll cover ways of extending :mod:`torch.nn`,
:mod:`torch.autograd`, and writing custom C extensions utilizing our C
libraries.
Extending :mod:`torch.autograd`
-------------------------------
.. currentmodule:: torch.autograd
Adding operations to :mod:`~torch.autograd` requires implementing a new
:class:`Function` subclass for each operation. Recall that :class:`Function` s
are what :mod:`~torch.autograd` uses to compute the results and gradients, and
encode the operation history. Every new function requires you to implement 3
methods:
- ``__init__`` (*optional*) - if your operation is parametrized by/uses
objects different than :class:`Variable` s, you should pass them as arguments
to ``__init__``. For example, ``AddConstant`` function takes a scalar to add,
while ``Transpose`` requires specifying which two dimensions to swap. If your
function doesn't require any additional parameters, you can skip it.
- :meth:`~Function.forward` - the code that performs the operation. It can take
as many arguments as you want, with some of them being
optional, if you specify the default values. Keep in mind that only
:class:`Variable` s will be passed in here. You can return either a single
:class:`Variable` output, or a :class:`tuple` of :class:`Variable` s if there
are multiple. Also, please refer to the docs of :class:`Function` to find
descriptions of useful methods that can be called only from
:meth:`~Function.forward`.
- :meth:`~Function.backward` - gradient formula. It will be given
as many arguments as there were outputs, with each of them representing
gradient w.r.t. that output. It should return as many :class:`Tensor` s as
there were inputs, with each of them containing the gradient w.r.t.
corresponding input. If you inputs didn't require gradient (see
:attr:`~Variable.needs_input_grad`), or it was non-differentiable, you
can return :class:`None`. Also, if you have optional arguments to
:meth:`~Variable.forward` you can return more gradients than there were
inputs, as long as they're all :any:`python:None`.
Below you can find code for a ``Linear`` function from :mod:`torch.nn`, with
additional comments::
# Inherit from Function
class Linear(Function):
# bias is an optional argument
def forward(self, input, weight, bias=None):
self.save_for_backward(input, weight, bias)
output = input.mm(weight.t())
if bias is not None:
output += bias.unsqueeze(0).expand_as(output)
return output
# This function has only a single output, so it gets only one gradient
def backward(self, grad_output):
# This is a pattern that is very convenient - at the top of backward
# unpack saved_tensors and initialize all gradients w.r.t. inputs to
# None. Thanks to the fact that additional trailing Nones are
# ignored, the return statement is simple even when the function has
# optional inputs.
input, weight, bias = self.saved_tensors
grad_input = grad_weight = grad_bias = None
# These needs_input_grad checks are optional and there only to
# improve efficiency. If you want to make your code simpler, you can
# skip them. Returning gradients for inputs that don't require it is
# not an error.
if self.needs_input_grad[0]:
grad_input = grad_output.mm(weight)
if self.needs_input_grad[1]:
grad_weight = grad_output.t().mm(input)
if bias is not None and self.needs_input_grad[2]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_weight, grad_bias
Now, to make it easier to use these custom ops, we recommend wrapping them in
small helper functions::
def linear(input, weight, bias=None):
# First braces create a Function object. Any arguments given here
# will be passed to __init__. Second braces will invoke the __call__
# operator, that will then use forward() to compute the result and
# return it.
return Linear()(input, weight, bias)
You probably want to check if the backward method you implemented actually
computes the derivatives of your function. It is possible by comparing with
numerical approximations using small finite differences::
from torch.autograd import gradcheck
# gradchek takes a tuple of tensor as input, check if your gradient
# evaluated with these tensors are close enough to numerical
# approximations and returns True if they all verify this condition.
input = (Variable(torch.randn(20,20).double(), requires_grad=True),)
test = gradcheck.gradcheck(Linear(), input, eps=1e-6, atol=1e-4)
print(test)
Extending :mod:`torch.nn`
-------------------------
.. currentmodule:: torch.nn
:mod:`~torch.nn` exports two kinds of interfaces - modules and their functional
versions. You can extend it in both ways, but we recommend using modules for
all kinds of layers, that hold any parameters or buffers, and recommend using
a functional form parameter-less operations like activation functions, pooling,
etc.
Adding a functional version of an operation is already fully covered in the
section above.
Adding a :class:`Module`
^^^^^^^^^^^^^^^^^^^^^^^^
Since :mod:`~torch.nn` heavily utilizes :mod:`~torch.autograd`, adding a new
:class:`Module` requires implementing a :class:`~torch.autograd.Function`
that performs the operation and can compute the gradient. From now on let's
assume that we want to implement a ``Linear`` module and we have the function
implementated as in the listing above. There's very little code required to
add this. Now, there are two functions that need to be implemented:
- ``__init__`` (*optional*) - takes in arguments such as kernel sizes, numbers
of features, etc. and initializes parameters and buffers.
- :meth:`~Module.forward` - instantiates a :class:`~torch.autograd.Function` and
uses it to perform the operation. It's very similar to a functional wrapper
shown above.
This is how a ``Linear`` module can be implemented::
class Linear(nn.Module):
def __init__(self, input_features, output_features, bias=True):
self.input_features = input_features
self.output_features = output_features
# nn.Parameter is a special kind of Variable, that will get
# automatically registered as Module's parameter once it's assigned
# as an attribute. Parameters and buffers need to be registered, or
# they won't appear in .parameters() (doesn't apply to buffers), and
# won't be converted when e.g. .cuda() is called. You can use
# .register_buffer() to register buffers.
# nn.Parameters can never be volatile and, different than Variables,
# they require gradients by default.
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
# You should always register all possible parameters, but the
# optional ones can be None if you want.
self.register_parameter('bias', None)
# Not a very smart way to initialize weights
self.weight.data.uniform_(-0.1, 0.1)
if bias is not None:
self.bias.data.uniform_(-0.1, 0.1)
def forward(self, input):
# See the autograd section for explanation of what happens here.
return Linear()(input, self.weight, self.bias)
Writing custom C extensions
---------------------------
Coming soon. For now you can find an example at
`GitHub <https://github.com/pytorch/extension-ffi>`_.

View File

@ -0,0 +1,124 @@
Multiprocessing best practices
==============================
:mod:`torch.multiprocessing` is a drop in replacement for Python's
:mod:`python:multiprocessing` module. It supports the exact same operations,
but extends it, so that all tensors sent through a
:class:`python:multiprocessing.Queue`, will have their data moved into shared
memory and will only send a handle to another process.
.. note::
When a :class:`~torch.autograd.Variable` is sent to another process, both
the :attr:`Variable.data` and :attr:`Variable.grad.data` are going to be
shared.
This allows to implement various training methods, like Hogwild, A3C, or any
others that require asynchronous operation.
Sharing CUDA tensors
--------------------
Sharing CUDA tensors between processes is supported only in Python 3, using
a ``spawn`` or ``forkserver`` start methods. :mod:`python:multiprocessing` in
Python 2 can only create subprocesses using ``fork``, and it's not supported
by the CUDA runtime.
.. warning::
CUDA API requires that the allocation exported to other processes remains
valid as long as it's used by them. You should be careful and ensure that
CUDA tensors you shared don't go out of scope as long as it's necessary.
This shouldn't be a problem for sharing model parameters, but passing other
kinds of data should be done with care. Note that this restriction doesn't
apply to shared CPU memory.
See also: :ref:`cuda-nn-dataparallel-instead`
Best practices and tips
-----------------------
Avoiding and fighting deadlocks
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are a lot of things that can go wrong when a new process is spawned, with
the most common cause of deadlocks being background threads. If there's any
thread that holds a lock or imports a module, and ``fork`` is called, it's very
likely that the subprocess will be in a corrupted state and will deadlock or
fail in a different way. Note that even if you don't, Python built in
libraries do - no need to look further than :mod:`python:multiprocessing`.
:class:`python:multiprocessing.Queue` is actually a very complex class, that
spawns multiple threads used to serialize, send and receive objects, and they
can cause aforementioned problems too. If you find yourself in such situation
try using a :class:`~python:multiprocessing.queues.SimpleQueue`, that doesn't
use any additional threads.
We're trying our best to make it easy for you and ensure these deadlocks don't
happen but some things are out of our control. If you have any issues you can't
cope with for a while, try reaching out on forums, and we'll see if it's an
issue we can fix.
Reuse buffers passed through a Queue
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Remember that each time you put a :class:`~torch.Tensor` into a
:class:`python:multiprocessing.Queue`, it has to be moved into shared memory.
If it's already shared, it is a no-op, otherwise it will incur an additional
memory copy that can slow down the whole process. Even if you have a pool of
processes sending data to a single one, make it send the buffers back - this
is nearly free and will let you avoid a copy when sending next batch.
Asynchronous multiprocess training (e.g. Hogwild)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Using :mod:`torch.multiprocessing`, it is possible to train a model
asynchronously, with parameters either shared all the time, or being
periodically synchronized. In the first case, we recommend sending over the whole
model object, while in the latter, we advise to only send the
:meth:`~torch.nn.Module.state_dict`.
We recommend using :class:`python:multiprocessing.Queue` for passing all kinds
of PyTorch objects between processes. It is possible to e.g. inherit the tensors
and storages already in shared memory, when using the ``fork`` start method,
however it is very bug prone and should be used with care, and only by advanced
users. Queues, even though they're sometimes a less elegant solution, will work
properly in all cases.
.. warning::
You should be careful about having global statements, that are not guarded
with an ``if __name__ == '__main__'``. If a different start method than
``fork`` is used, they will be executed in all subprocesses.
Hogwild
~~~~~~~
A concrete Hogwild implementation can be found in the `examples repository`__,
but to showcase the overall structure of the code, there's also a minimal
example below as well::
import torch.multiprocessing as mp
from model import MyModel
def train(model):
# Construct data_loader, optimizer, etc.
for data, labels in data_loader:
optimizer.zero_grad()
loss_fn(model(data), labels).backward()
optimizer.step() # This will update the shared parameters
if __name__ == '__main__':
num_processes = 4
model = MyModel()
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes = []
for rank in range(num_processes):
p = mp.Process(target=train, args=(model,))
p.start()
processes.append(p)
for p in processes:
p.join()
.. __: https://github.com/pytorch/examples/tree/master/mnist_hogwild

View File

@ -0,0 +1,34 @@
Serialization semantics
=======================
Best practices
--------------
.. _recommend-saving-models:
Recommended approach for saving a model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are two main approaches for serializing and restoring a model.
The first (recommended) saves and loads only the model parameters::
torch.save(the_model.state_dict(), PATH)
Then later::
the_model = TheModelClass(*args, **kwargs)
the_model.load_state_dict(torch.load(PATH))
The second saves and loads the entire model::
torch.save(the_model, PATH)
Then later::
the_model = torch.load(PATH)
However in this case, the serialized data is bound to the specific classes
and the exact directory structure used, so it can break in various ways when
used in other projects, or after some serious refactors.

116
docs/source/optim.rst Normal file
View File

@ -0,0 +1,116 @@
torch.optim
===================================
.. automodule:: torch.optim
How to use an optimizer
-----------------------
To use :mod:`torch.optim` you have to construct an optimizer object, that will hold
the current state and will update the parameters based on the computed gradients.
Constructing it
^^^^^^^^^^^^^^^
To construct an :class:`Optimizer` you have to give it an iterable containing the
parameters (all should be :class:`~torch.autograd.Variable` s) to optimize. Then,
you can specify optimizer-specific options such as the learning rate, weight decay, etc.
Example::
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr = 0.0001)
Per-parameter options
^^^^^^^^^^^^^^^^^^^^^
:class:`Optimizer` s also support specifying per-parameter options. To do this, instead
of passing an iterable of :class:`~torch.autograd.Variable` s, pass in an iterable of
:class:`dict` s. Each of them will define a separate parameter group, and should contain
a ``params`` key, containing a list of parameters belonging to it. Other keys
should match the keyword arguments accepted by the optimizers, and will be used
as optimization options for this group.
.. note::
You can still pass options as keyword arguments. They will be used as
defaults, in the groups that didn't override them. This is useful when you
only want to vary a single option, while keeping all others consistent
between parameter groups.
For example, this is very useful when one wants to specify per-layer learning rates::
optim.SGD([
{'params': model.base.parameters()},
{'params': model.classifier.parameters(), 'lr': 1e-3}
], lr=1e-2, momentum=0.9)
This means that ``model.base``'s parameters will use the default learning rate of ``1e-2``,
``model.classifier``'s parameters will use a learning rate of ``1e-3``, and a momentum of
``0.9`` will be used for all parameters
Taking an optimization step
^^^^^^^^^^^^^^^^^^^^^^^^^^^
All optimizers implement a :func:`~Optimizer.step` method, that updates the
parameters. It can be used in two ways:
``optimizer.step()``
~~~~~~~~~~~~~~~~~~~~
This is a simplified version supported by most optimizers. The function can be
called once the gradients are computed using e.g.
:func:`~torch.autograd.Variable.backward`.
Example::
for input, target in dataset:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
``optimizer.step(closure)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some optimization algorithms such as Conjugate Gradient and LBFGS need to
reevaluate the function multiple times, so you have to pass in a closure that
allows them to recompute your model. The closure should clear the gradients,
compute the loss, and return it.
Example::
for input, target in dataset:
def closure():
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
return loss
optimizer.step(closure)
Algorithms
----------
.. autoclass:: Optimizer
:members:
.. autoclass:: Adadelta
:members:
.. autoclass:: Adagrad
:members:
.. autoclass:: Adam
:members:
.. autoclass:: Adamax
:members:
.. autoclass:: ASGD
:members:
.. autoclass:: LBFGS
:members:
.. autoclass:: RMSprop
:members:
.. autoclass:: Rprop
:members:
.. autoclass:: SGD
:members:

12
docs/source/storage.rst Normal file
View File

@ -0,0 +1,12 @@
torch.Storage
===================================
A :class:`torch.Storage` is a contiguous, one-dimensional array of a single
data type.
Every :class:`torch.Tensor` has a corresponding storage of the same data type.
.. autoclass:: torch.FloatStorage
:members:
:undoc-members:
:inherited-members:

308
docs/source/tensors.rst Normal file
View File

@ -0,0 +1,308 @@
.. currentmodule:: torch
torch.Tensor
===================================
A :class:`torch.Tensor` is a multi-dimensional matrix containing elements of
a single data type.
Torch defines seven CPU tensor types and eight GPU tensor types:
======================== =========================== ================================
Data type CPU tensor GPU tensor
======================== =========================== ================================
32-bit floating point :class:`torch.FloatTensor` :class:`torch.cuda.FloatTensor`
64-bit floating point :class:`torch.DoubleTensor` :class:`torch.cuda.DoubleTensor`
16-bit floating point N/A :class:`torch.cuda.HalfTensor`
8-bit integer (unsigned) :class:`torch.ByteTensor` :class:`torch.cuda.ByteTensor`
8-bit integer (signed) :class:`torch.CharTensor` :class:`torch.cuda.CharTensor`
16-bit integer (signed) :class:`torch.ShortTensor` :class:`torch.cuda.ShortTensor`
32-bit integer (signed) :class:`torch.IntTensor` :class:`torch.cuda.IntTensor`
64-bit integer (signed) :class:`torch.LongTensor` :class:`torch.cuda.LongTensor`
======================== =========================== ================================
The :class:`torch.Tensor` constructor is an alias for the default tensor type
(:class:`torch.FloatTensor`).
A tensor can be constructed from a Python :class:`list` or sequence:
::
>>> torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
1 2 3
4 5 6
[torch.FloatTensor of size 2x3]
An empty tensor can be constructed by specifying its size:
::
>>> torch.IntTensor(2, 4).zero_()
0 0 0 0
0 0 0 0
[torch.IntTensor of size 2x4]
The contents of a tensor can be accessed and modified using Python's indexing
and slicing notation:
::
>>> x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
>>> print(x[1][2])
6.0
>>> x[0][1] = 8
>>> print(x)
1 8 3
4 5 6
[torch.FloatTensor of size 2x3]
Each tensor has an associated :class:`torch.Storage`, which holds its data.
The tensor class provides multi-dimensional, `strided <https://en.wikipedia.org/wiki/Stride_of_an_array>`_
view of a storage and defines numeric operations on it.
.. note::
Methods which mutate a tensor are marked with an underscore suffix.
For example, :func:`torch.FloatTensor.abs_` computes the absolute value
in-place and returns the modified tensor, while :func:`torch.FloatTensor.abs`
computes the result in a new tensor.
.. class:: Tensor()
Tensor(*sizes)
Tensor(size)
Tensor(sequence)
Tensor(ndarray)
Tensor(tensor)
Tensor(storage)
Creates a new tensor from an optional size or data.
If no arguments are given, an empty zero-dimensional tensor is returned.
If a :class:`numpy.ndarray`, :class:`torch.Tensor`, or :class:`torch.Storage`
is given, a new tensor that shares the same data is returned. If a Python
sequence is given, a new tensor is created from a copy of the sequence.
.. automethod:: abs
.. automethod:: abs_
.. automethod:: acos
.. automethod:: acos_
.. automethod:: add
.. automethod:: add_
.. automethod:: addbmm
.. automethod:: addbmm_
.. automethod:: addcdiv
.. automethod:: addcdiv_
.. automethod:: addcmul
.. automethod:: addcmul_
.. automethod:: addmm
.. automethod:: addmm_
.. automethod:: addmv
.. automethod:: addmv_
.. automethod:: addr
.. automethod:: addr_
.. automethod:: apply_
.. automethod:: asin
.. automethod:: asin_
.. automethod:: atan
.. automethod:: atan2
.. automethod:: atan2_
.. automethod:: atan_
.. automethod:: baddbmm
.. automethod:: baddbmm_
.. automethod:: bernoulli
.. automethod:: bernoulli_
.. automethod:: bmm
.. automethod:: byte
.. automethod:: cauchy_
.. automethod:: ceil
.. automethod:: ceil_
.. automethod:: char
.. automethod:: chunk
.. automethod:: clamp
.. automethod:: clamp_
.. automethod:: clone
.. automethod:: contiguous
.. automethod:: copy_
.. automethod:: cos
.. automethod:: cos_
.. automethod:: cosh
.. automethod:: cosh_
.. automethod:: cpu
.. automethod:: cross
.. automethod:: cuda
.. automethod:: cumprod
.. automethod:: cumsum
.. automethod:: data_ptr
.. automethod:: diag
.. automethod:: dim
.. automethod:: dist
.. automethod:: div
.. automethod:: div_
.. automethod:: dot
.. automethod:: double
.. automethod:: eig
.. automethod:: element_size
.. automethod:: eq
.. automethod:: eq_
.. automethod:: equal
.. automethod:: exp
.. automethod:: exp_
.. automethod:: expand
.. automethod:: expand_as
.. automethod:: exponential_
.. automethod:: fill_
.. automethod:: float
.. automethod:: floor
.. automethod:: floor_
.. automethod:: fmod
.. automethod:: fmod_
.. automethod:: frac
.. automethod:: frac_
.. automethod:: gather
.. automethod:: ge
.. automethod:: ge_
.. automethod:: gels
.. automethod:: geometric_
.. automethod:: geqrf
.. automethod:: ger
.. automethod:: gesv
.. automethod:: gt
.. automethod:: gt_
.. automethod:: half
.. automethod:: histc
.. automethod:: index
.. automethod:: index_add_
.. automethod:: index_copy_
.. automethod:: index_fill_
.. automethod:: index_select
.. automethod:: int
.. automethod:: inverse
.. automethod:: is_contiguous
.. autoattribute:: is_cuda
:annotation:
.. automethod:: is_pinned
.. automethod:: is_set_to
.. automethod:: is_signed
.. automethod:: kthvalue
.. automethod:: le
.. automethod:: le_
.. automethod:: lerp
.. automethod:: lerp_
.. automethod:: log
.. automethod:: log1p
.. automethod:: log1p_
.. automethod:: log_
.. automethod:: log_normal_
.. automethod:: long
.. automethod:: lt
.. automethod:: lt_
.. automethod:: map_
.. automethod:: masked_copy_
.. automethod:: masked_fill_
.. automethod:: masked_select
.. automethod:: max
.. automethod:: mean
.. automethod:: median
.. automethod:: min
.. automethod:: mm
.. automethod:: mode
.. automethod:: mul
.. automethod:: mul_
.. automethod:: multinomial
.. automethod:: mv
.. automethod:: narrow
.. automethod:: ndimension
.. automethod:: ne
.. automethod:: ne_
.. automethod:: neg
.. automethod:: neg_
.. automethod:: nelement
.. automethod:: new
.. automethod:: nonzero
.. automethod:: norm
.. automethod:: normal_
.. automethod:: numel
.. automethod:: numpy
.. automethod:: orgqr
.. automethod:: ormqr
.. automethod:: permute
.. automethod:: pin_memory
.. automethod:: potrf
.. automethod:: potri
.. automethod:: potrs
.. automethod:: pow
.. automethod:: pow_
.. automethod:: prod
.. automethod:: pstrf
.. automethod:: qr
.. automethod:: random_
.. automethod:: reciprocal
.. automethod:: reciprocal_
.. automethod:: remainder
.. automethod:: remainder_
.. automethod:: renorm
.. automethod:: renorm_
.. automethod:: repeat
.. automethod:: resize_
.. automethod:: resize_as_
.. automethod:: round
.. automethod:: round_
.. automethod:: rsqrt
.. automethod:: rsqrt_
.. automethod:: scatter_
.. automethod:: select
.. automethod:: set_
.. automethod:: share_memory_
.. automethod:: short
.. automethod:: sigmoid
.. automethod:: sigmoid_
.. automethod:: sign
.. automethod:: sign_
.. automethod:: sin
.. automethod:: sin_
.. automethod:: sinh
.. automethod:: sinh_
.. automethod:: size
.. automethod:: sort
.. automethod:: split
.. automethod:: sqrt
.. automethod:: sqrt_
.. automethod:: squeeze
.. automethod:: squeeze_
.. automethod:: std
.. automethod:: storage
.. automethod:: storage_offset
.. automethod:: storage_type
.. automethod:: stride
.. automethod:: sub
.. automethod:: sub_
.. automethod:: sum
.. automethod:: svd
.. automethod:: symeig
.. automethod:: t
.. automethod:: t_
.. automethod:: tan
.. automethod:: tan_
.. automethod:: tanh
.. automethod:: tanh_
.. automethod:: tolist
.. automethod:: topk
.. automethod:: trace
.. automethod:: transpose
.. automethod:: transpose_
.. automethod:: tril
.. automethod:: tril_
.. automethod:: triu
.. automethod:: triu_
.. automethod:: trtrs
.. automethod:: trunc
.. automethod:: trunc_
.. automethod:: type
.. automethod:: type_as
.. automethod:: unfold
.. automethod:: uniform_
.. automethod:: unsqueeze
.. automethod:: unsqueeze_
.. automethod:: var
.. automethod:: view
.. automethod:: view_as
.. automethod:: zero_

184
docs/source/torch.rst Normal file
View File

@ -0,0 +1,184 @@
torch
===================================
.. automodule:: torch
Tensors
----------------------------------
.. autofunction:: is_tensor
.. autofunction:: is_storage
.. autofunction:: set_default_tensor_type
.. autofunction:: numel
.. autofunction:: set_printoptions
Creation Ops
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: eye
.. autofunction:: from_numpy
.. autofunction:: linspace
.. autofunction:: logspace
.. autofunction:: ones
.. autofunction:: rand
.. autofunction:: randn
.. autofunction:: randperm
.. autofunction:: range
.. autofunction:: zeros
Indexing, Slicing, Joining, Mutating Ops
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: cat
.. autofunction:: chunk
.. autofunction:: gather
.. autofunction:: index_select
.. autofunction:: masked_select
.. autofunction:: nonzero
.. autofunction:: split
.. autofunction:: squeeze
.. autofunction:: stack
.. autofunction:: t
.. autofunction:: transpose
.. autofunction:: unbind
.. autofunction:: unsqueeze
Random sampling
----------------------------------
.. autofunction:: manual_seed
.. autofunction:: initial_seed
.. autofunction:: get_rng_state
.. autofunction:: set_rng_state
.. autodata:: default_generator
.. autofunction:: bernoulli
.. autofunction:: multinomial
.. autofunction:: normal
Serialization
----------------------------------
.. autofunction:: save
.. autofunction:: load
Parallelism
----------------------------------
.. autofunction:: get_num_threads
.. autofunction:: set_num_threads
Math operations
----------------------------------
Pointwise Ops
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: abs
.. autofunction:: acos
.. autofunction:: add
.. autofunction:: addcdiv
.. autofunction:: addcmul
.. autofunction:: asin
.. autofunction:: atan
.. autofunction:: atan2
.. autofunction:: ceil
.. autofunction:: clamp
.. autofunction:: cos
.. autofunction:: cosh
.. autofunction:: div
.. autofunction:: exp
.. autofunction:: floor
.. autofunction:: fmod
.. autofunction:: frac
.. autofunction:: lerp
.. autofunction:: log
.. autofunction:: log1p
.. autofunction:: mul
.. autofunction:: neg
.. autofunction:: pow
.. autofunction:: reciprocal
.. autofunction:: remainder
.. autofunction:: round
.. autofunction:: rsqrt
.. autofunction:: sigmoid
.. autofunction:: sign
.. autofunction:: sin
.. autofunction:: sinh
.. autofunction:: sqrt
.. autofunction:: tan
.. autofunction:: tanh
.. autofunction:: trunc
Reduction Ops
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: cumprod
.. autofunction:: cumsum
.. autofunction:: dist
.. autofunction:: mean
.. autofunction:: median
.. autofunction:: mode
.. autofunction:: norm
.. autofunction:: prod
.. autofunction:: std
.. autofunction:: sum
.. autofunction:: var
Comparison Ops
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: eq
.. autofunction:: equal
.. autofunction:: ge
.. autofunction:: gt
.. autofunction:: kthvalue
.. autofunction:: le
.. autofunction:: lt
.. autofunction:: max
.. autofunction:: min
.. autofunction:: ne
.. autofunction:: sort
.. autofunction:: topk
Other Operations
~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: cross
.. autofunction:: diag
.. autofunction:: histc
.. autofunction:: renorm
.. autofunction:: trace
.. autofunction:: tril
.. autofunction:: triu
BLAS and LAPACK Operations
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: addbmm
.. autofunction:: addmm
.. autofunction:: addmv
.. autofunction:: addr
.. autofunction:: baddbmm
.. autofunction:: bmm
.. autofunction:: btrifact
.. autofunction:: btrisolve
.. autofunction:: dot
.. autofunction:: eig
.. autofunction:: gels
.. autofunction:: geqrf
.. autofunction:: ger
.. autofunction:: gesv
.. autofunction:: inverse
.. autofunction:: mm
.. autofunction:: mv
.. autofunction:: orgqr
.. autofunction:: ormqr
.. autofunction:: potrf
.. autofunction:: potri
.. autofunction:: potrs
.. autofunction:: pstrf
.. autofunction:: qr
.. autofunction:: svd
.. autofunction:: symeig
.. autofunction:: trtrs

View File

@ -0,0 +1,162 @@
torchvision.datasets
====================
The following dataset loaders are available:
- `MNIST`_
- `COCO (Captioning and Detection)`_
- `LSUN Classification`_
- `ImageFolder`_
- `Imagenet-12`_
- `CIFAR10 and CIFAR100`_
- `STL10`_
Datasets have the API:
- ``__getitem__``
- ``__len__``
They all subclass from ``torch.utils.data.Dataset``
Hence, they can all be multi-threaded (python multiprocessing) using
standard torch.utils.data.DataLoader.
For example:
``torch.utils.data.DataLoader(coco_cap, batch_size=args.batchSize, shuffle=True, num_workers=args.nThreads)``
In the constructor, each dataset has a slightly different API as needed,
but they all take the keyword args:
- ``transform`` - a function that takes in an image and returns a
transformed version
- common stuff like ``ToTensor``, ``RandomCrop``, etc. These can be
composed together with ``transforms.Compose`` (see transforms section
below)
- ``target_transform`` - a function that takes in the target and
transforms it. For example, take in the caption string and return a
tensor of word indices.
MNIST
~~~~~
``dset.MNIST(root, train=True, transform=None, target_transform=None, download=False)``
- ``root`` : root directory of dataset where ``processed/training.pt`` and ``processed/test.pt`` exist.
- ``train`` : ``True`` = Training set, ``False`` = Test set
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, place the processed dataset (function available in mnist.py) in the ``processed`` folder.
COCO
~~~~
This requires the `COCO API to be installed`_
Captions:
^^^^^^^^^
``dset.CocoCaptions(root="dir where images are", annFile="json annotation file", [transform, target_transform])``
Example:
.. code:: python
import torchvision.datasets as dset
import torchvision.transforms as transforms
cap = dset.CocoCaptions(root = 'dir where images are',
annFile = 'json annotation file',
transform=transforms.ToTensor())
print('Number of samples: ', len(cap))
img, target = cap[3] # load 4th sample
print("Image Size: ", img.size())
print(target)
Output:
::
Number of samples: 82783
Image Size: (3L, 427L, 640L)
[u'A plane emitting smoke stream flying over a mountain.',
u'A plane darts across a bright blue sky behind a mountain covered in snow',
u'A plane leaves a contrail above the snowy mountain top.',
u'A mountain that has a plane flying overheard in the distance.',
u'A mountain view with a plume of smoke in the background']
Detection:
^^^^^^^^^^
``dset.CocoDetection(root="dir where images are", annFile="json annotation file", [transform, target_transform])``
LSUN
~~~~
``dset.LSUN(db_path, classes='train', [transform, target_transform])``
- db\_path = root directory for the database files
- ``classes`` = ``train`` (all categories, training set), ``val`` (all categories, validation set), ``test`` (all categories, test set)
- [``bedroom\_train``, ``church\_train``, …] : a list of categories to load
ImageFolder
~~~~~~~~~~~
A generic data loader where the images are arranged in this way:
::
root/dog/xxx.png
root/dog/xxy.png
root/dog/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
``dset.ImageFolder(root="root folder path", [transform, target_transform])``
It has the members:
- ``self.classes`` - The class names as a list
- ``self.class_to_idx`` - Corresponding class indices
- ``self.imgs`` - The list of (image path, class-index) tuples
Imagenet-12
~~~~~~~~~~~
This is simply implemented with an ImageFolder dataset.
The data is preprocessed `as described
here <https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset>`__
`Here is an
example <https://github.com/pytorch/examples/blob/27e2a46c1d1505324032b1d94fc6ce24d5b67e97/imagenet/main.py#L48-L62>`__.
CIFAR
~~~~~
``dset.CIFAR10(root, train=True, transform=None, target_transform=None, download=False)``
``dset.CIFAR100(root, train=True, transform=None, target_transform=None, download=False)``
- ``root`` : root directory of dataset where there is folder
``cifar-10-batches-py``
- ``train`` : ``True`` = Training set, ``False`` = Test set
- ``download`` : ``True`` = downloads the dataset from the internet and
puts it in root directory. If dataset already downloaded, doesn't do anything.
STL10
~~~~~
``dset.STL10(root, split='train', transform=None, target_transform=None, download=False)``
- ``root`` : root directory of dataset where there is folder ``stl10_binary``
- ``split`` : ``'train'`` = Training set, ``'test'`` = Test set, ``'unlabeled'`` = Unlabeled set, ``'train+unlabeled'`` = Training + Unlabeled set (missing label marked as ``-1``)
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, doesn't do anything.
.. _MNIST: #mnist
.. _COCO (Captioning and Detection): #coco
.. _LSUN Classification: #lsun
.. _ImageFolder: #imagefolder
.. _Imagenet-12: #imagenet-12
.. _CIFAR10 and CIFAR100: #cifar
.. _STL10: #stl10
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI

View File

@ -0,0 +1,11 @@
torchvision.models
===================
.. currentmodule:: torchvision.models
.. automodule:: torchvision.models
:members: alexnet, resnet18, resnet34, resnet50, resnet101, resnet152,
vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19,
vgg19_bn
:undoc-members:

View File

@ -0,0 +1,5 @@
torchvision
===================
The :mod:`torchvision` package consists of popular datasets, model
architectures, and common image transformations for computer vision.

View File

@ -0,0 +1,40 @@
torchvision.transforms
======================
.. currentmodule:: torchvision.transforms
.. autoclass:: Compose
Transforms on PIL.Image
-----------------------
.. autoclass:: Scale
.. autoclass:: CenterCrop
.. autoclass:: RandomCrop
.. autoclass:: RandomHorizontalFlip
.. autoclass:: RandomSizedCrop
.. autoclass:: Pad
Transforms on torch.\*Tensor
----------------------------
.. autoclass:: Normalize
Conversion Transforms
---------------------
.. autoclass:: ToTensor
.. autoclass:: ToPILImage
Generic Transforms
------------------
.. autoclass:: Lambda

View File

@ -0,0 +1,9 @@
torchvision.utils
===================
.. currentmodule:: torchvision.utils
.. autofunction:: make_grid
.. autofunction:: save_image

331
setup.py
View File

@ -1,6 +1,9 @@
from setuptools import setup, Extension, distutils, Command, find_packages
import setuptools.command.build_ext
import setuptools.command.install
import setuptools.command.develop
import setuptools.command.build_py
import distutils.unixccompiler
import distutils.command.build
import distutils.command.clean
import platform
@ -9,21 +12,32 @@ import shutil
import sys
import os
# TODO: make this more robust
WITH_CUDA = os.path.exists('/Developer/NVIDIA/CUDA-7.5/include') or os.path.exists('/usr/local/cuda/include')
DEBUG = False
from tools.setup_helpers.env import check_env_flag
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME
from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR
DEBUG = check_env_flag('DEBUG')
WITH_DISTRIBUTED = check_env_flag('WITH_DISTRIBUTED')
WITH_DISTRIBUTED_MW = WITH_DISTRIBUTED and check_env_flag('WITH_DISTRIBUTED_MW')
WITH_NCCL = WITH_CUDA and platform.system() != 'Darwin'
SYSTEM_NCCL = False
################################################################################
# Monkey-patch setuptools to compile in parallel
################################################################################
original_link = distutils.unixccompiler.UnixCCompiler.link
def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
def parallelCCompile(self, sources, output_dir=None, macros=None,
include_dirs=None, debug=0, extra_preargs=None,
extra_postargs=None, depends=None):
# those lines are copied from distutils.ccompiler.CCompiler directly
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs)
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
# compile using a thread pool
import multiprocessing.pool
def _single_compile(obj):
src, ext = build[obj]
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
@ -32,12 +46,23 @@ def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=N
return objects
def patched_link(self, *args, **kwargs):
_cxx = self.compiler_cxx
self.compiler_cxx = None
result = original_link(self, *args, **kwargs)
self.compiler_cxx = _cxx
return result
distutils.ccompiler.CCompiler.compile = parallelCCompile
distutils.unixccompiler.UnixCCompiler.link = patched_link
################################################################################
# Custom build commands
################################################################################
class build_deps(Command):
user_options = []
@ -52,6 +77,10 @@ class build_deps(Command):
build_all_cmd = ['bash', 'torch/lib/build_all.sh']
if WITH_CUDA:
build_all_cmd += ['--with-cuda']
if WITH_NCCL and not SYSTEM_NCCL:
build_all_cmd += ['--with-nccl']
if WITH_DISTRIBUTED:
build_all_cmd += ['--with-distributed']
if subprocess.call(build_all_cmd) != 0:
sys.exit(1)
generate_nn_wrappers()
@ -71,22 +100,72 @@ class build_module(Command):
self.run_command('build_ext')
class build_ext(setuptools.command.build_ext.build_ext):
class build_py(setuptools.command.build_py.build_py):
def run(self):
self.create_version_file()
setuptools.command.build_py.build_py.run(self)
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'torch', 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
class develop(setuptools.command.develop.develop):
def run(self):
build_py.create_version_file()
setuptools.command.develop.develop.run(self)
class build_ext(setuptools.command.build_ext.build_ext):
def run(self):
# Print build options
if WITH_NUMPY:
print('-- Building with NumPy bindings')
else:
print('-- NumPy not found')
if WITH_CUDNN:
print('-- Detected cuDNN at ' + CUDNN_LIB_DIR + ', ' + CUDNN_INCLUDE_DIR)
else:
print('-- Not using cuDNN')
if WITH_CUDA:
print('-- Detected CUDA at ' + CUDA_HOME)
else:
print('-- Not using CUDA')
if WITH_NCCL and SYSTEM_NCCL:
print('-- Using system provided NCCL library')
elif WITH_NCCL:
print('-- Building NCCL library')
else:
print('-- Not using NCCL')
# cwrap depends on pyyaml, so we can't import it earlier
from tools.cwrap import cwrap
from tools.cwrap.plugins.THPPlugin import THPPlugin
from tools.cwrap.plugins.THPLongArgsPlugin import THPLongArgsPlugin
from tools.cwrap.plugins.ArgcountSortPlugin import ArgcountSortPlugin
from tools.cwrap.plugins.AutoGPU import AutoGPU
from tools.cwrap.plugins.BoolOption import BoolOption
from tools.cwrap.plugins.KwargsPlugin import KwargsPlugin
from tools.cwrap.plugins.NullableArguments import NullableArguments
from tools.cwrap.plugins.CuDNNPlugin import CuDNNPlugin
thp_plugin = THPPlugin()
cwrap('torch/csrc/generic/TensorMethods.cwrap', plugins=[
THPLongArgsPlugin(), THPPlugin(), ArgcountSortPlugin(), AutoGPU()
BoolOption(), thp_plugin, AutoGPU(condition='IS_CUDA'),
ArgcountSortPlugin(), KwargsPlugin()
])
cwrap('torch/csrc/cudnn/cuDNN.cwrap', plugins=[
CuDNNPlugin(), NullableArguments()
])
# It's an old-style class in Python 2.7...
setuptools.command.build_ext.build_ext.run(self)
class build(distutils.command.build.build):
sub_commands = [
('build_deps', lambda self: True),
@ -94,6 +173,7 @@ class build(distutils.command.build.build):
class install(setuptools.command.install.install):
def run(self):
if not self.skip_build:
self.run_command('build_deps')
@ -101,21 +181,28 @@ class install(setuptools.command.install.install):
class clean(distutils.command.clean.clean):
def run(self):
import glob
with open('.gitignore', 'r') as f:
ignores = f.read()
for glob in filter(bool, ignores.split('\n')):
shutil.rmtree(glob, ignore_errors=True)
for wildcard in filter(bool, ignores.split('\n')):
for filename in glob.glob(wildcard):
try:
os.remove(filename)
except OSError:
shutil.rmtree(filename, ignore_errors=True)
# It's an old-style class in Python 2.7...
distutils.command.clean.clean.run(self)
################################################################################
# Configure compile flags
################################################################################
include_dirs = []
library_dirs = []
extra_link_args = []
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
@ -132,51 +219,137 @@ include_dirs += [
os.path.join(cwd, "torch", "csrc"),
tmp_install_path + "/include",
tmp_install_path + "/include/TH",
tmp_install_path + "/include/THPP",
tmp_install_path + "/include/THNN",
]
extra_link_args.append('-L' + lib_path)
library_dirs.append(lib_path)
main_libraries = ['TH', 'shm']
# we specify exact lib names to avoid conflict with lua-torch installs
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
THS_LIB = os.path.join(lib_path, 'libTHS.so.1')
THC_LIB = os.path.join(lib_path, 'libTHC.so.1')
THCS_LIB = os.path.join(lib_path, 'libTHCS.so.1')
THNN_LIB = os.path.join(lib_path, 'libTHNN.so.1')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.so.1')
THPP_LIB = os.path.join(lib_path, 'libTHPP.so.1')
THD_LIB = os.path.join(lib_path, 'libTHD.so.1')
NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1')
if platform.system() == 'Darwin':
TH_LIB = os.path.join(lib_path, 'libTH.1.dylib')
THS_LIB = os.path.join(lib_path, 'libTHS.1.dylib')
THC_LIB = os.path.join(lib_path, 'libTHC.1.dylib')
THCS_LIB = os.path.join(lib_path, 'libTHCS.1.dylib')
THNN_LIB = os.path.join(lib_path, 'libTHNN.1.dylib')
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.1.dylib')
THPP_LIB = os.path.join(lib_path, 'libTHPP.1.dylib')
THD_LIB = os.path.join(lib_path, 'libTHD.1.dylib')
NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib')
if WITH_NCCL and subprocess.call('ldconfig -p | grep libnccl >/dev/null', shell=True) == 0:
SYSTEM_NCCL = True
main_compile_args = ['-D_THP_CORE']
main_libraries = ['shm']
main_link_args = [TH_LIB, THS_LIB, THPP_LIB, THNN_LIB]
main_sources = [
"torch/csrc/PtrWrapper.cpp",
"torch/csrc/Module.cpp",
"torch/csrc/Generator.cpp",
"torch/csrc/Size.cpp",
"torch/csrc/Exceptions.cpp",
"torch/csrc/Tensor.cpp",
"torch/csrc/Storage.cpp",
"torch/csrc/DynamicTypes.cpp",
"torch/csrc/byte_order.cpp",
"torch/csrc/utils.cpp",
"torch/csrc/utils/object_ptr.cpp",
"torch/csrc/utils/tuple_parser.cpp",
"torch/csrc/allocators.cpp",
"torch/csrc/serialization.cpp",
"torch/csrc/autograd/init.cpp",
"torch/csrc/autograd/engine.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/grad_buffer.cpp",
"torch/csrc/autograd/python_function.cpp",
"torch/csrc/autograd/python_cpp_function.cpp",
"torch/csrc/autograd/python_variable.cpp",
"torch/csrc/autograd/python_engine.cpp",
"torch/csrc/autograd/python_hook.cpp",
"torch/csrc/autograd/functions/batch_normalization.cpp",
"torch/csrc/autograd/functions/convolution.cpp",
"torch/csrc/autograd/functions/init.cpp",
"torch/csrc/nn/THNN_generic.cpp",
]
try:
import numpy as np
include_dirs += [np.get_include()]
extra_compile_args += ['-DWITH_NUMPY']
WITH_NUMPY = True
except ImportError:
pass
WITH_NUMPY = False
if WITH_DISTRIBUTED:
extra_compile_args += ['-DWITH_DISTRIBUTED']
main_sources += [
"torch/csrc/distributed/Module.cpp",
"torch/csrc/distributed/utils.cpp",
]
if WITH_DISTRIBUTED_MW:
main_sources += [
"torch/csrc/distributed/Tensor.cpp",
"torch/csrc/distributed/Storage.cpp",
]
include_dirs += [tmp_install_path + "/include/THD"]
main_link_args += [THD_LIB]
if WITH_CUDA:
if platform.system() == 'Darwin':
cuda_path = '/Developer/NVIDIA/CUDA-7.5'
cuda_include_path = cuda_path + '/include'
cuda_lib_path = cuda_path + '/lib'
else:
cuda_path = '/usr/local/cuda'
cuda_include_path = cuda_path + '/include'
cuda_lib_path = cuda_path + '/lib64'
cuda_lib_dirs = ['lib64', 'lib']
cuda_include_path = os.path.join(CUDA_HOME, 'include')
for lib_dir in cuda_lib_dirs:
cuda_lib_path = os.path.join(CUDA_HOME, lib_dir)
if os.path.exists(cuda_lib_path):
break
include_dirs.append(cuda_include_path)
extra_link_args.append('-L' + cuda_lib_path)
include_dirs.append(tmp_install_path + "/include/THCUNN")
library_dirs.append(cuda_lib_path)
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
extra_compile_args += ['-DWITH_CUDA']
main_libraries += ['THC']
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
main_libraries += ['cudart']
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
main_sources += [
"torch/csrc/cuda/Module.cpp",
"torch/csrc/cuda/Storage.cpp",
"torch/csrc/cuda/Stream.cpp",
"torch/csrc/cuda/Tensor.cpp",
"torch/csrc/cuda/AutoGPU.cpp",
"torch/csrc/cuda/utils.cpp",
"torch/csrc/cuda/serialization.cpp",
]
if WITH_NCCL:
if SYSTEM_NCCL:
main_libraries += ['nccl']
else:
main_link_args += [NCCL_LIB]
extra_compile_args += ['-DWITH_NCCL']
if WITH_CUDNN:
main_libraries += ['cudnn']
include_dirs.append(CUDNN_INCLUDE_DIR)
library_dirs.append(CUDNN_LIB_DIR)
main_sources += [
"torch/csrc/cudnn/BatchNorm.cpp",
"torch/csrc/cudnn/Conv.cpp",
"torch/csrc/cudnn/cuDNN.cpp",
"torch/csrc/cudnn/Types.cpp",
"torch/csrc/cudnn/Handles.cpp",
]
extra_compile_args += ['-DWITH_CUDNN']
if DEBUG:
extra_compile_args += ['-O0', '-g']
extra_link_args += ['-O0', '-g']
@ -193,51 +366,85 @@ def make_relative_rpath(path):
################################################################################
extensions = []
packages = find_packages(exclude=('tools.*', 'torch.cuda', 'torch.legacy.cunn'))
packages = find_packages(exclude=('tools.*',))
C = Extension("torch._C",
libraries=main_libraries,
sources=main_sources,
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [make_relative_rpath('lib')]
)
libraries=main_libraries,
sources=main_sources,
language='c++',
extra_compile_args=main_compile_args + extra_compile_args,
include_dirs=include_dirs,
library_dirs=library_dirs,
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
)
extensions.append(C)
DL = Extension("torch._dl",
sources=["torch/csrc/dl.c"],
language='c',
)
extensions.append(DL)
THNN = Extension("torch._thnn._THNN",
libraries=['TH', 'THNN'],
sources=['torch/csrc/nn/THNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [make_relative_rpath('../lib')]
)
sources=['torch/csrc/nn/THNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THNN_LIB,
make_relative_rpath('../lib'),
]
)
extensions.append(THNN)
if WITH_CUDA:
THCUNN = Extension("torch._thnn._THCUNN",
libraries=['TH', 'THC', 'THCUNN'],
sources=['torch/csrc/nn/THCUNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [make_relative_rpath('../lib')]
)
sources=['torch/csrc/nn/THCUNN.cpp'],
language='c++',
extra_compile_args=extra_compile_args,
include_dirs=include_dirs,
extra_link_args=extra_link_args + [
TH_LIB,
THC_LIB,
THCUNN_LIB,
make_relative_rpath('../lib'),
]
)
extensions.append(THCUNN)
packages += ['torch.cuda', 'torch.legacy.cunn']
setup(name="torch", version="0.1",
ext_modules=extensions,
cmdclass = {
'build': build,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'install': install,
'clean': clean,
},
packages=packages,
package_data={'torch': ['lib/*.so*', 'lib/*.dylib*', 'lib/*.h', 'lib/torch_shm_manager']},
install_requires=['pyyaml'],
)
version = '0.1.11'
if os.getenv('PYTORCH_BUILD_VERSION'):
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
version = os.getenv('PYTORCH_BUILD_VERSION') \
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
else:
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except subprocess.CalledProcessError:
pass
setup(name="torch", version=version,
description="Tensors and Dynamic neural networks in Python with strong GPU acceleration",
ext_modules=extensions,
cmdclass={
'build': build,
'build_py': build_py,
'build_ext': build_ext,
'build_deps': build_deps,
'build_module': build_module,
'develop': develop,
'install': install,
'clean': clean,
},
packages=packages,
package_data={'torch': [
'lib/*.so*', 'lib/*.dylib*',
'lib/torch_shm_manager',
'lib/*.h',
'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
install_requires=['pyyaml'],
)

View File

@ -1,10 +1,55 @@
import sys
import os
import argparse
import unittest
import contextlib
from functools import wraps
from itertools import product
from copy import deepcopy
import torch
import torch.cuda
from torch.autograd import Variable
from torch.autograd.leaf import Leaf
torch.set_default_tensor_type('torch.DoubleTensor')
def run_tests():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--seed', type=int, default=123)
args, remaining = parser.parse_known_args()
torch.manual_seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(args.seed)
remaining = [sys.argv[0]] + remaining
unittest.main(argv=remaining)
TEST_NUMPY = True
try:
import numpy
except ImportError:
TEST_NUMPY = False
TEST_SCIPY = True
try:
import scipy
except ImportError:
TEST_SCIPY = False
def skipIfNoLapack(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as e:
if 'Lapack library not found' in e.args[0]:
raise unittest.SkipTest('Compiled without Lapack')
raise
return wrapper
def get_cpu_type(t):
assert t.__module__ == 'torch.cuda'
@ -17,13 +62,13 @@ def get_gpu_type(t):
def to_gpu(obj, type_map={}):
if torch.isTensor(obj):
if torch.is_tensor(obj):
t = type_map.get(type(obj), get_gpu_type(type(obj)))
return obj.clone().type(t)
elif torch.isStorage(obj):
elif torch.is_storage(obj):
return obj.new().resize_(obj.size()).copy_(obj)
elif isinstance(obj, Variable):
assert type(obj.creator) == Leaf
assert obj.creator is None
t = type_map.get(type(obj.data), get_gpu_type(type(obj.data)))
return Variable(obj.data.clone().type(t), requires_grad=obj.requires_grad)
elif isinstance(obj, list):
@ -34,7 +79,20 @@ def to_gpu(obj, type_map={}):
return deepcopy(obj)
@contextlib.contextmanager
def freeze_rng_state():
rng_state = torch.get_rng_state()
if torch.cuda.is_available():
cuda_rng_state = torch.cuda.get_rng_state()
yield
if torch.cuda.is_available():
torch.cuda.set_rng_state(cuda_rng_state)
torch.set_rng_state(rng_state)
def iter_indices(tensor):
if tensor.dim() == 0:
return range(0)
if tensor.dim() == 1:
return range(tensor.size(0))
return product(*(range(s) for s in tensor.size()))
@ -59,12 +117,19 @@ class TestCase(unittest.TestCase):
x = x.data
y = y.data
if torch.isTensor(x) and torch.isTensor(y):
max_err = 0
super(TestCase, self).assertEqual(x.size().tolist(), y.size().tolist())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertLessEqual(max_err, prec)
if torch.is_tensor(x) and torch.is_tensor(y):
def assertTensorsEqual(a, b):
max_err = 0
super(TestCase, self).assertEqual(a.size(), b.size())
for index in iter_indices(a):
max_err = max(max_err, abs(a[index] - b[index]))
self.assertLessEqual(max_err, prec, message)
self.assertEqual(x.is_sparse, y.is_sparse, message)
if x.is_sparse:
assertTensorsEqual(x.indices(), y.indices())
assertTensorsEqual(x.values(), y.values())
else:
assertTensorsEqual(x, y)
elif type(x) == str and type(y) == str:
super(TestCase, self).assertEqual(x, y)
elif is_iterable(x) and is_iterable(y):
@ -72,68 +137,63 @@ class TestCase(unittest.TestCase):
self.assertEqual(x_, y_, prec, message)
else:
try:
self.assertLessEqual(abs(x - y), prec)
self.assertLessEqual(abs(x - y), prec, message)
return
except:
pass
super(TestCase, self).assertEqual(x, y)
super(TestCase, self).assertEqual(x, y, message)
def assertNotEqual(self, x, y, prec=None, message=''):
if prec is None:
prec = self.precision
if isinstance(x, Variable) and isinstance(y, Variable):
x = x.data
y = y.data
if torch.is_tensor(x) and torch.is_tensor(y):
max_err = 0
if x.size() != y.size():
super(TestCase, self).assertNotEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertGreaterEqual(max_err, prec, message)
elif type(x) == str and type(y) == str:
super(TestCase, self).assertNotEqual(x, y)
elif is_iterable(x) and is_iterable(y):
super(TestCase, self).assertNotEqual(x, y)
else:
try:
self.assertGreaterEqual(abs(x - y), prec, message)
return
except:
pass
super(TestCase, self).assertNotEqual(x, y, message)
def assertObjectIn(self, obj, iterable):
for elem in iterable:
if id(obj) == id(elem):
return
raise AssertionError("object not found in iterable")
def make_jacobian(input, num_out):
if torch.isTensor(input) or isinstance(input, Variable):
return torch.zeros(input.nElement(), num_out)
def download_file(url, path, binary=True):
if sys.version_info < (3,):
import urllib2
request = urllib2
error = urllib2
else:
return type(input)(make_jacobian(elem, num_out) for elem in input)
import urllib.request
import urllib.error
request = urllib.request
error = urllib.error
def iter_tensors(x):
if torch.isTensor(x):
yield x
elif isinstance(x, Variable):
yield x.data
else:
for elem in x:
for result in iter_tensors(elem):
yield result
def contiguous(input):
if torch.isTensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous_()
else:
return type(input)(contiguous(e) for e in input)
def get_numerical_jacobian(fn, input, target):
perturbation = 1e-6
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.Tensor(output_size)
outb = torch.Tensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nElement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - perturbation
outa.copy_(fn(input))
flat_tensor[i] = orig + perturbation
outb.copy_(fn(input))
flat_tensor[i] = orig
outb.add_(-1,outa).div_(2*perturbation)
d_tensor[i] = outb
return jacobian
if os.path.exists(path):
return True
try:
data = request.urlopen(url, timeout=15).read()
with open(path, 'wb' if binary else 'w') as f:
f.write(data)
return True
except error.URLError as e:
return False

View File

@ -2,10 +2,14 @@ import sys
import tempfile
import unittest
from copy import deepcopy
from itertools import product
import torch
import torch.cuda
from torch.autograd import Variable
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
from common import TestCase, to_gpu, freeze_rng_state
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors, contiguous
import torch.backends.cudnn
# tarfile module tries to obtain a file object name in python 3.3
if sys.version_info[:2] == (3, 3):
@ -13,15 +17,10 @@ if sys.version_info[:2] == (3, 3):
else:
TemporaryFile = tempfile.TemporaryFile
try:
import torch.cuda
import torch.legacy.cunn
import torch.nn.cuda
TEST_CUDA = True
except Exception:
# TODO: catch ImportError once it works with "setup.py develop"
TEST_CUDA = False
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version()
PRECISION = 1e-5
module_tests = [
@ -29,7 +28,14 @@ module_tests = [
module_name='Linear',
constructor_args=(10, 8),
input_size=(4, 10),
reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
),
dict(
module_name='Linear',
constructor_args=(10, 8, False),
input_size=(4, 10),
desc='no_bias',
reference_fn=lambda i, p: torch.mm(i, p[0].t())
),
dict(
module_name='Threshold',
@ -54,10 +60,22 @@ module_tests = [
input_size=(2, 3, 4, 5),
check_inplace=True
),
dict(
module_name='RReLU',
input_size=(1, 2, 2),
test_cuda=False
),
dict(
module_name='RReLU',
constructor_args=(0.1, 0.9),
input_size=(4, 4, 5),
desc='with_up_down',
test_cuda=False
),
dict(
module_name='Hardtanh',
input_size=(3, 2, 5),
reference_fn=lambda i,_: i.clamp(-1, 1)
reference_fn=lambda i, _: i.clamp(-1, 1)
),
dict(
module_name='Sigmoid',
@ -70,63 +88,23 @@ module_tests = [
dict(
module_name='Softmax',
input_size=(10, 20),
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
),
dict(
module_name='Softmax2d',
input_size=(1, 3, 10, 20),
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expandAs(i))
),
dict(
module_name='BatchNorm1d',
constructor_args=(10,),
input_size=(4, 10),
desc='affine'
),
dict(
module_name='BatchNorm1d',
constructor_args=(10, 1e-3, 0.3, False),
input_size=(4, 10),
desc='not_affine'
),
dict(
module_name='BatchNorm2d',
constructor_args=(3,),
input_size=(2, 3, 6, 6),
),
dict(
module_name='BatchNorm2d',
constructor_args=(3, 1e-3, 0.8),
input_size=(2, 3, 6, 6),
desc='momentum',
),
dict(
module_name='BatchNorm2d',
constructor_args=(3, 1e-3, 0.8, False),
input_size=(2, 3, 6, 6),
desc='no_affine',
),
dict(
module_name='BatchNorm3d',
constructor_args=(3,),
input_size=(2, 3, 4, 4, 4)
),
dict(
module_name='BatchNorm3d',
constructor_args=(3, 1e-3, 0.7),
input_size=(2, 3, 4, 4, 4),
desc='momentum'
),
dict(
module_name='BatchNorm3d',
constructor_args=(3, 1e-3, 0.7, False),
input_size=(2, 3, 4, 4, 4),
desc='no_affine'
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i))
),
dict(
module_name='LogSoftmax',
input_size=(10, 20),
reference_fn=lambda i,_: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
),
dict(
module_name='LogSoftmax',
input_size=(1, 3, 10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand_as(i)).log_(),
desc='multiparam'
),
dict(
module_name='ELU',
@ -155,18 +133,18 @@ module_tests = [
dict(
module_name='LogSigmoid',
input_size=(2, 3, 4),
reference_fn=lambda i,_: i.sigmoid().log()
reference_fn=lambda i, _: i.sigmoid().log()
),
dict(
module_name='Softplus',
input_size=(10, 20),
reference_fn=lambda i,_: torch.log(1 + torch.exp(i))
reference_fn=lambda i, _: torch.log(1 + torch.exp(i))
),
dict(
module_name='Softplus',
constructor_args=(2,),
input_size=(10, 20),
reference_fn=lambda i,_: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
desc='beta'
),
dict(
@ -186,18 +164,47 @@ module_tests = [
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5)
input_size=(2, 3, 4),
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
desc='1d',
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4),
desc='1d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5),
desc='2d',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4, 5),
desc='multiparam'
desc='2d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='PReLU',
input_size=(2, 3, 4, 5, 6),
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
desc='3d',
),
dict(
module_name='PReLU',
constructor_args=(3,),
input_size=(2, 3, 4, 5, 6),
desc='3d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
),
dict(
module_name='Softsign',
input_size=(3, 2, 5),
reference_fn=lambda i,_: i.div(1 + torch.abs(i))
reference_fn=lambda i, _: i.div(1 + torch.abs(i))
),
dict(
module_name='Softmin',
@ -212,11 +219,11 @@ module_tests = [
criterion_tests = [
dict(module_name='L1Loss',
input_size=(2, 3, 4),
target=torch.randn(2, 3, 4),
reference_fn=lambda i,t,_: 1./i.numel() * \
sum((a-b).abs().sum() for a,b in zip(i, t))
),
input_size=(2, 3, 4),
target=torch.randn(2, 3, 4),
reference_fn=lambda i, t, _: 1. / i.numel() *
sum((a - b).abs().sum() for a, b in zip(i, t))
),
dict(
module_name='NLLLoss',
input=torch.rand(15, 10).log(),
@ -238,7 +245,7 @@ criterion_tests = [
module_name='MSELoss',
input=torch.randn(2, 3, 4, 5),
target=torch.randn(2, 3, 4, 5),
reference_fn=lambda i,t,_: (i-t).abs().pow(2).sum() / i.numel()
reference_fn=lambda i, t, _: (i - t).abs().pow(2).sum() / i.numel()
),
dict(
module_name='BCELoss',
@ -253,12 +260,12 @@ criterion_tests = [
desc='weights'
),
dict(
module_name='CELoss',
module_name='CrossEntropyLoss',
input=torch.randn(15, 10),
target=torch.Tensor(15).uniform_().mul(10).floor().long()
),
dict(
module_name='CELoss',
module_name='CrossEntropyLoss',
constructor_args=(torch.rand(10),),
input=torch.randn(15, 10),
target=torch.Tensor(15).uniform_().mul(10).floor().long(),
@ -269,6 +276,13 @@ criterion_tests = [
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long()
),
dict(
module_name='NLLLoss2d',
constructor_args=(torch.rand(3),),
input_size=(2, 3, 5, 5),
target=torch.rand(2, 5, 5).mul(3).floor().long(),
desc='weights'
),
dict(
module_name='HingeEmbeddingLoss',
input=torch.rand(10),
@ -284,7 +298,7 @@ criterion_tests = [
dict(
module_name='MultiLabelMarginLoss',
input_size=(5, 10),
target=torch.rand(5, 10).mul(10).floor()
target=torch.rand(5, 10).mul(10).floor().long()
),
dict(
module_name='MultiLabelSoftMarginLoss',
@ -301,7 +315,7 @@ criterion_tests = [
dict(
module_name='MultiMarginLoss',
input_size=(5, 10),
target=torch.rand(5).mul(8).floor()
target=torch.rand(5).mul(8).floor().long()
),
dict(
module_name='SmoothL1Loss',
@ -348,20 +362,24 @@ class NNTestCase(TestCase):
elif isinstance(input, list):
return [self._jacobian(elem, num_out) for elem in input]
else:
return torch.zeros(input.nElement(), num_out)
return torch.zeros(input.nelement(), num_out)
def _flatten_tensors(self, x):
if torch.isTensor(x):
return x.view(-1)
if torch.is_tensor(x):
if x.is_sparse:
return x.to_dense().view(-1)
else:
return x.view(-1)
elif isinstance(x, Variable):
return x.data.view(-1)
return self._flatten_tensors(x.data)
else:
return tuple(self._flatten_tensors(a) for a in x)
def _zero_grad_input(self, input):
if isinstance(input, Variable):
input.grad.zero_()
elif torch.isTensor(input):
if input.requires_grad and input.grad is not None:
input.grad.data.zero_()
elif torch.is_tensor(input):
return
else:
for i in input:
@ -374,15 +392,15 @@ class NNTestCase(TestCase):
flat_d_out = d_out.view(-1)
if jacobian_input:
jacobian_input = self._jacobian(input, d_out.nElement())
flat_jacobian_input = list(iter_tensors(jacobian_input))
jacobian_inp = self._jacobian(input, d_out.nelement())
flat_jacobian_input = list(iter_tensors(jacobian_inp))
if jacobian_parameters:
param, d_param = self._get_parameters(module)
num_param = sum(p.numel() for p in param)
jacobian_param = torch.zeros(num_param, d_out.nElement())
jacobian_param = torch.zeros(num_param, d_out.nelement())
for i in range(flat_d_out.nElement()):
for i in range(flat_d_out.nelement()):
d_out.zero_()
flat_d_out[i] = 1
@ -395,13 +413,13 @@ class NNTestCase(TestCase):
if jacobian_input:
for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)):
jacobian_x[:,i] = d_x
jacobian_x[:, i] = d_x
if jacobian_parameters:
jacobian_param[:,i] = torch.cat(self._flatten_tensors(d_param), 0)
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
res = tuple()
if jacobian_input:
res += jacobian_input,
res += jacobian_inp,
if jacobian_parameters:
res += jacobian_param,
@ -409,7 +427,7 @@ class NNTestCase(TestCase):
def _numerical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True):
output = self._forward(module, input)
output_size = output.nElement()
output_size = output.nelement()
if jacobian_parameters:
param, d_param = self._get_parameters(module)
@ -424,9 +442,9 @@ class NNTestCase(TestCase):
# TODO: enable non-contig tests
input = contiguous(input)
if jacobian_input:
res += get_numerical_jacobian(fw, input, input),
res += get_numerical_jacobian(fw, input, input, eps=1e-6),
if jacobian_parameters:
res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0),
res += torch.cat(list(get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param), 0),
return res
def check_jacobian(self, module, input, jacobian_input=True):
@ -452,13 +470,13 @@ class NNTestCase(TestCase):
for x, d_x in zip(input_t, numerical_t):
x = x.view(-1)
d_x = d_x.view(-1)
for i in range(x.nElement()):
for i in range(x.nelement()):
original = x[i]
x[i] = original + eps
fx1 = self._forward_criterion(criterion, input, target)
x[i] = original - eps
fx2 = self._forward_criterion(criterion, input, target)
deriv = (fx1 - fx2) / (2.*eps)
deriv = (fx1 - fx2) / (2. * eps)
d_x[i] = deriv
x[i] = original
@ -472,8 +490,9 @@ class NNTestCase(TestCase):
class TestBase(object):
def __init__(self, constructor, constructor_args=tuple(), input_size=None,
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
if input_size is None and input is None:
raise RuntimeError("Specify either an input tensor, or it's size!")
self.constructor = constructor
@ -496,7 +515,7 @@ class TestBase(object):
def _unpack_input(self, input):
if isinstance(input, Variable):
return input.data
elif torch.isTensor(input):
elif torch.is_tensor(input):
return input
else:
return type(input)(self._unpack_input(i) for i in input)
@ -508,8 +527,8 @@ class TestBase(object):
def map_input_sizes(sizes):
if isinstance(sizes, list):
return [map_input_sizes(s) for s in sizes]
elif torch.isTensor(sizes):
return sizes
elif torch.is_tensor(sizes):
return sizes.double()
else:
return torch.randn(*sizes)
@ -521,6 +540,7 @@ class TestBase(object):
class ModuleTest(TestBase):
def __init__(self, *args, **kwargs):
super(ModuleTest, self).__init__(*args, **kwargs)
self.jacobian_input = kwargs.get('jacobian_input', True)
@ -538,6 +558,8 @@ class ModuleTest(TestBase):
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
test_case.assertEqual(out, expected_out)
self.test_noncontig(test_case, module, input)
# TODO: do this with in-memory files as soon as torch.save will support it
with TemporaryFile() as f:
test_case._forward(module, input)
@ -548,6 +570,51 @@ class ModuleTest(TestBase):
self._do_test(test_case, module, input)
def noncontiguize(self, obj):
if isinstance(obj, list):
return [self.noncontiguize(o) for o in obj]
tensor = obj.data if isinstance(obj, Variable) else obj
ndim = tensor.dim()
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
if isinstance(obj, Variable):
return Variable(noncontig, requires_grad=obj.requires_grad)
return noncontig
def test_noncontig(self, test_case, module, input):
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(input)
with freeze_rng_state():
output = test_case._forward(module, input)
grad_output = output
if isinstance(grad_output, Variable):
grad_output = grad_output.data.clone()
else:
grad_output = grad_output.clone()
output = output.clone()
grad_output.normal_()
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
d_param = deepcopy(test_case._get_parameters(module)[1])
nc_input = self.noncontiguize(input)
nc_grad_output = self.noncontiguize(grad_output)
for contig_i, contig_g in product((True, False), repeat=2):
i = input if contig_i else nc_input
go = grad_output if contig_g else nc_grad_output
test_case._zero_grad_parameters(module)
test_case._zero_grad_input(i)
with freeze_rng_state():
try:
out = test_case._forward(module, i)
except Exception:
# Some modules will fail because of non contiguous inputs and we're ok with that
continue
grad = test_case._backward(module, i, out, go)
test_case.assertEqual(out, output)
test_case.assertEqual(grad, d_input, 1e-4)
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
def test_cuda(self, test_case):
if not TEST_CUDA or not self.should_test_cuda:
raise unittest.SkipTest('Excluded from CUDA tests')
@ -557,9 +624,7 @@ class ModuleTest(TestBase):
gpu_input = to_gpu(cpu_input, type_map=type_map)
cpu_module = self.constructor(*self.constructor_args)
gpu_module = self.constructor(*self.constructor_args).cuda()
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
gpu_module = self.constructor(*self.constructor_args).float().cuda()
cpu_param = test_case._get_parameters(cpu_module)
gpu_param = test_case._get_parameters(gpu_module)
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
@ -569,6 +634,10 @@ class ModuleTest(TestBase):
gpu_p = gpu_p.data
gpu_p.copy_(cpu_p)
test_case._zero_grad_input(cpu_input)
test_case._zero_grad_input(gpu_input)
test_case._zero_grad_parameters(cpu_module)
test_case._zero_grad_parameters(gpu_module)
cpu_output = test_case._forward(cpu_module, cpu_input)
gpu_output = test_case._forward(gpu_module, gpu_input)
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
@ -582,6 +651,8 @@ class ModuleTest(TestBase):
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
self.test_noncontig(test_case, gpu_module, gpu_input)
except NotImplementedError:
pass
# TODO: remove this after CUDA scatter_ is implemented
@ -593,6 +664,7 @@ class ModuleTest(TestBase):
class CriterionTest(TestBase):
def __init__(self, *args, **kwargs):
super(CriterionTest, self).__init__(*args, **kwargs)
self.target = self._get_target(kwargs['target'])
@ -615,7 +687,7 @@ class CriterionTest(TestBase):
if isinstance(target, Variable):
target = target.data
expected_out = self.reference_fn(deepcopy(self._unpack_input(input)),
deepcopy(target), module)
deepcopy(target), module)
test_case.assertEqual(out, expected_out)
test_case.check_criterion_jacobian(module, input, self.target)
@ -627,7 +699,6 @@ class CriterionTest(TestBase):
cpu_input = self._get_input()
type_map = {
torch.DoubleTensor: torch.cuda.FloatTensor,
torch.LongTensor: torch.cuda.FloatTensor
}
gpu_input = to_gpu(cpu_input, type_map=type_map)
@ -635,15 +706,14 @@ class CriterionTest(TestBase):
gpu_target = to_gpu(self.target, type_map=type_map)
cpu_module = self.constructor(*self.constructor_args)
gpu_module = self.constructor(*self.constructor_args).cuda()
gpu_module = self.constructor(*self.constructor_args).float().cuda()
cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target)
gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target)
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
test_case.assertEqual(cpu_output, gpu_output, 4e-4)
cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target)
gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 4e-4)
except NotImplementedError:
pass

8
test/data/network1.py Normal file
View File

@ -0,0 +1,8 @@
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)

9
test/data/network2.py Normal file
View File

@ -0,0 +1,9 @@
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)
self.relu = nn.ReLU()

View File

@ -0,0 +1,71 @@
import torch
def check_error(desc, fn, *required_substrings):
try:
fn()
except Exception as e:
error_message = e.args[0]
print('=' * 80)
print(desc)
print('-' * 80)
print(error_message)
print('')
for sub in required_substrings:
assert sub in error_message
return
assert False, "given function ({}) didn't raise an error".format(desc)
check_error(
'Wrong argument types',
lambda: torch.FloatStorage(object()),
'object')
check_error('Unknown keyword argument',
lambda: torch.FloatStorage(content=1234.),
'keyword')
check_error('Invalid types inside a sequence',
lambda: torch.FloatStorage(['a', 'b']),
'list', 'str')
check_error('Invalid size type',
lambda: torch.FloatStorage(1.5),
'float')
check_error('Invalid offset',
lambda: torch.FloatStorage(torch.FloatStorage(2), 4),
'2', '4')
check_error('Negative offset',
lambda: torch.FloatStorage(torch.FloatStorage(2), -1),
'2', '-1')
check_error('Invalid size',
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, 5),
'2', '1', '5')
check_error('Negative size',
lambda: torch.FloatStorage(torch.FloatStorage(3), 1, -5),
'2', '1', '-5')
check_error('Invalid index type',
lambda: torch.FloatStorage(10)['first item'],
'str')
def assign():
torch.FloatStorage(10)[1:-1] = '1'
check_error('Invalid value type',
assign,
'str')
check_error('resize_ with invalid type',
lambda: torch.FloatStorage(10).resize_(1.5),
'float')
check_error('fill_ with invalid type',
lambda: torch.IntStorage(10).fill_('asdf'),
'str')
# TODO: frombuffer

6
test/ffi/src/cpu/lib.h Normal file
View File

@ -0,0 +1,6 @@
void good_func(THFloatTensor *tensor, int a, float b);
void bad_func(THFloatTensor *tensor, int a, float b);
THFloatTensor * new_tensor(int a);
float int_to_float(int a);

19
test/ffi/src/cpu/lib1.c Normal file
View File

@ -0,0 +1,19 @@
#include <TH/TH.h>
void good_func(THFloatTensor *tensor, int a, float b)
{
THFloatTensor_mul(tensor, tensor, a);
THFloatTensor_add(tensor, tensor, b);
}
THFloatTensor * new_tensor(int a)
{
THFloatTensor *t = THFloatTensor_newWithSize2d(a, a);
THFloatTensor_fill(t, a);
return t;
}
float int_to_float(int a)
{
return a;
}

8
test/ffi/src/cpu/lib2.c Normal file
View File

@ -0,0 +1,8 @@
#include <TH/TH.h>
void bad_func(THFloatTensor *tensor, int a, float b)
{
THFloatTensor_mul(tensor, tensor, a);
THFloatTensor_add(tensor, tensor, b);
THFloatTensor_addbmm(tensor, 1, tensor, 1, tensor, tensor);
}

View File

@ -0,0 +1,12 @@
#include <TH/TH.h>
#include <THC/THC.h>
extern THCState *state;
#include "../cpu/lib1.c"
void cuda_func(THCudaTensor *tensor, int a, float b)
{
THCudaTensor_mul(state, tensor, tensor, a);
THCudaTensor_add(state, tensor, tensor, b);
}

View File

@ -0,0 +1,5 @@
void good_func(THFloatTensor *tensor, int a, float b);
void cuda_func(THCudaTensor *tensor, int a, float b);
THFloatTensor * new_tensor(int a);
float int_to_float(int a);

5
test/ffi/src/lib.h Normal file
View File

@ -0,0 +1,5 @@
void my_func(THFloatTensor *tensor, int a, float b);
void my_cuda_func(THCudaTensor *tensor, int a, float b);
THFloatTensor * new_t(int a);
float new_int(int a);

View File

@ -1,5 +1,5 @@
# th test.lua > lua.out
th test.lua > lua.out
python3 test.py > python.out
diff lua.out python.out >/dev/null 2>&1

File diff suppressed because it is too large Load Diff

View File

@ -1,39 +0,0 @@
assert(arg[1])
funcs = {
'resizeAs', 'add', 'zero', 'mul', 'div', 'abs',
'addcmul', 'addcdiv', 'copy', 'sqrt', 'fill',
{'cmul', 'mul'},
{'cdiv', 'div'},
}
for _, val in pairs(funcs) do
local name, newname
if type(val) == 'table' then
name = val[1]
newname = val[2]
else
name = val
newname = val .. '_'
end
command = "sed -i -r "
.. "'/torch\\." .. name .. "\\(/b; " -- short-circuits
.. "s/([a-zA-Z]*)\\." .. name .. "\\(" -- substitution
.. "/"
.. "\\1\\." .. newname .. "\\(/g' " .. arg[1]
print(command)
os.execute(command)
command = "sed -i 's/math\\." .. newname
.. "/math\\." .. name .. "/' " .. arg[1]
print(command)
os.execute(command)
end
funcs = {
{'torch\.cmul', 'torch\.mul'},
{'torch\.cdiv', 'torch\.div'},
}
for _, val in pairs(funcs) do
command = "sed -i 's/" .. val[1] .. "/" .. val[2] .. "/' " .. arg[1]
print(command)
os.execute(command)
end

33
test/optim/test.lua Normal file
View File

@ -0,0 +1,33 @@
local cjson = require 'cjson'
require 'optim'
function rosenbrock(t)
x, y = t[1], t[2]
return (1 - x) ^ 2 + 100 * (y - x^2)^2
end
function drosenbrock(t)
x, y = t[1], t[2]
return torch.DoubleTensor({-400 * x * (y - x^2) - 2 * (1 - x), 200 * x * (y - x^2)})
end
local fd = io.open('tests.json', 'r')
local tests = cjson.decode(fd:read('*a'))
fd:close()
for i, test in ipairs(tests) do
print(test.algorithm)
algorithm = optim[test.algorithm]
for i, config in ipairs(test.config) do
print('================================================================================')
params = torch.DoubleTensor({1.5, 1.5})
for i = 1, 100 do
function closure(x)
return rosenbrock(x), drosenbrock(x)
end
algorithm(closure, params, config)
print(string.format('%.8f\t%.8f', params[1], params[2]))
end
end
end

View File

@ -3,13 +3,15 @@ import torch
import torch.legacy.optim as optim
from pprint import pprint
def rosenbrock(tensor):
x, y = tensor
return (1 - x)**2 + 100 * (y - x**2)**2
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
def drosenbrock(tensor):
x, y = tensor
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * x * (y - x**2)))
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * x * (y - x ** 2)))
algorithms = {
'adadelta': optim.adadelta,
@ -22,6 +24,7 @@ algorithms = {
'rmsprop': optim.rmsprop,
'rprop': optim.rprop,
'sgd': optim.sgd,
'lbfgs': optim.lbfgs,
}
with open('tests.json', 'r') as f:
@ -35,4 +38,4 @@ for test in tests:
params = torch.DoubleTensor((1.5, 1.5))
for i in range(100):
algorithm(lambda x: (rosenbrock(x), drosenbrock(x)), params, config)
print('{:.12f}\t{:.12f}\t'.format(params[0], params[1]))
print('{:.8f}\t{:.8f}\t'.format(params[0], params[1]))

View File

@ -98,5 +98,12 @@
{"learningRate": 1e-4, "nesterov": true, "momentum": 0.95, "dampening": 0},
{"weightDecay": 0.2}
]
},
{
"algorithm": "lbfgs",
"config": [
{},
{"learningRate": 1e-1}
]
}
]

View File

@ -1,31 +1,90 @@
#!/usr/bin/env bash
set -e
PYCMD=${PYCMD:="python"}
COVERAGE=0
while [[ "$#" -gt 0 ]]; do
case "$1" in
-p|--python) PYCMD=$2; shift 2 ;;
-c|--coverage) COVERAGE=1; shift 1;;
--) shift; break ;;
*) echo "Invalid argument: $1!" ; exit 1 ;;
esac
done
if [[ $COVERAGE -eq 1 ]]; then
coverage erase
PYCMD="coverage run --parallel-mode --source torch "
echo "coverage flag found. Setting python command to: \"$PYCMD\""
fi
pushd "$(dirname "$0")"
echo "Running torch tests"
python test_torch.py
$PYCMD test_torch.py $@
echo "Running autograd tests"
python test_autograd.py
$PYCMD test_autograd.py $@
echo "Running sparse tests"
$PYCMD test_sparse.py $@
echo "Running nn tests"
python test_nn.py
$PYCMD test_nn.py $@
echo "Running legacy nn tests"
python test_legacy_nn.py
$PYCMD test_legacy_nn.py $@
echo "Running optim tests"
$PYCMD test_optim.py $@
echo "Running multiprocessing tests"
python test_multiprocessing.py
$PYCMD test_multiprocessing.py $@
MULTIPROCESSING_METHOD=spawn $PYCMD test_multiprocessing.py $@
MULTIPROCESSING_METHOD=forkserver $PYCMD test_multiprocessing.py $@
echo "Running util tests"
python test_utils.py
if which nvcc >/dev/null 2>&1
then
echo "Running cuda tests"
python test_cuda.py
else
echo "nvcc not found in PATH, skipping CUDA tests"
$PYCMD test_utils.py $@
echo "Running dataloader tests"
$PYCMD test_dataloader.py $@
echo "Running cuda tests"
$PYCMD test_cuda.py $@
echo "Running NCCL tests"
$PYCMD test_nccl.py $@
################################################################################
if [[ "$TEST_DISTRIBUTED" -eq 1 ]]; then
distributed_set_up() {
export TEMP_DIR="$(mktemp -d)"
rm -rf "$TEMP_DIR/"*
mkdir "$TEMP_DIR/barrier"
mkdir "$TEMP_DIR/test_dir"
}
distributed_tear_down() {
rm -rf "$TEMP_DIR"
}
trap distributed_tear_down EXIT SIGHUP SIGINT SIGTERM
echo "Running distributed tests for the TCP backend"
distributed_set_up
BACKEND=tcp WORLD_SIZE=3 $PYCMD ./test_distributed.py
distributed_tear_down
echo "Running distributed tests for the MPI backend"
distributed_set_up
BACKEND=mpi mpiexec -n 3 $PYCMD ./test_distributed.py
distributed_tear_down
fi
################################################################################
if [[ $COVERAGE -eq 1 ]]; then
coverage combine
coverage html
fi
popd

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,21 @@
import math
import tempfile
import unittest
from itertools import repeat
import torch
import torch.cuda
import torch.cuda.comm as comm
from test_torch import TestTorch
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests
HAS_CUDA = True
if not torch.cuda.is_available():
print('CUDA not available, skipping tests')
TestCase = object # noqa: F811
HAS_CUDA = False
from common import TestCase, get_gpu_type, to_gpu
def is_floating(t):
return type(t) in [torch.FloatTensor, torch.DoubleTensor,
@ -19,35 +30,88 @@ types = [
torch.CharTensor,
torch.ByteTensor,
]
float_types = [
torch.FloatTensor,
torch.DoubleTensor
] # TODO: add half...
def number(floating, integer, t):
name = type(t).__name__
if 'Double' in name or 'Float' in name or 'Half' in name:
return floating
else:
return integer
# TODO: check HalfTensor
S = 10
M = 50
def make_tensor(t, *sizes):
return t(*sizes).copy_(torch.randn(*sizes))
def small_2d(t):
return make_tensor(t, S, S)
def small_2d_scaled(t, scale=10):
return make_tensor(t, S, S).mul(scale)
def small_2d_oneish(t):
if is_floating(t):
return make_tensor(t, S, S).clamp(min=0.99, max=1.01)
else:
return t(S, S).fill_(1)
def small_3d(t):
return make_tensor(t, S, S, S)
def medium_1d(t):
return make_tensor(t, M)
def medium_2d(t):
return make_tensor(t, M, M)
def medium_2d_scaled(t, scale=10):
return make_tensor(t, M, M).mul(scale)
def small_3d_ones(t):
return t(S, S, S).copy_(torch.ones(S, S, S))
def small_3d_positive(t):
min_val = 1e-3 if is_floating(t) else 2
return make_tensor(t, S, S, S).clamp_(min_val, 120)
def small_3d_unique(t):
return t(S, S, S).copy_(torch.range(1, S*S*S))
return t(S, S, S).copy_(torch.range(1, S * S * S))
def small_1d_lapack(t):
return t(1, 3).copy_(torch.range(1, 3).view(3))
def small_2d_lapack(t):
return t(3, 3).copy_(torch.range(1, 9).view(3, 3))
def small_2d_lapack_skinny(t):
return t(3, 4).copy_(torch.range(1, 12).view(3, 4))
def small_2d_lapack_fat(t):
return t(4, 3).copy_(torch.range(1, 12).view(4, 3))
def new_t(*sizes):
def tmp(t):
@ -55,174 +119,206 @@ def new_t(*sizes):
return tmp
tests = [
('add', small_3d, lambda t: [3.14] ),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('add', small_3d, lambda t: [0.2, small_3d_positive(t)], 'scalar_tensor' ),
('sub', small_3d, lambda t: [3.14], ),
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('mul', small_3d, lambda t: [3.14], ),
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('div', small_3d, lambda t: [3.14], ),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('pow', small_3d, lambda t: [3.14], ),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor' ),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], ),
('addbmm', small_2d, lambda t: [0.2, small_3d(t), small_3d(t)], 'scalar' ),
('addbmm', small_2d, lambda t: [0.5, 0.2, small_3d(t), small_3d(t)], 'two_scalars' ),
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('baddbmm', small_3d, lambda t: [0.2, small_3d(t), small_3d(t)], 'scalar' ),
('baddbmm', small_3d, lambda t: [0.5, 0.2, small_3d(t), small_3d(t)], 'two_scalars' ),
('addcdiv', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('addcdiv', small_3d, lambda t: [0.2, small_3d(t), small_3d(t)], 'scalar' ),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('addcmul', small_3d, lambda t: [0.2, small_3d(t), small_3d(t)], 'scalar' ),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], ),
('addmm', medium_2d, lambda t: [0.2, medium_2d(t), medium_2d(t)], 'scalar' ),
('addmm', medium_2d, lambda t: [0.5, 0.2, medium_2d(t), medium_2d(t)], 'two_scalars' ),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
('addmv', medium_1d, lambda t: [0.2, medium_2d(t), medium_1d(t)], 'scalar' ),
('addmv', medium_1d, lambda t: [0.5, 0.2, medium_2d(t), medium_1d(t)], 'two_scalars' ),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
('addmv', medium_1d, lambda t: [0.2, medium_2d(t), medium_1d(t)], 'scalar' ),
('addmv', medium_1d, lambda t: [0.5, 0.2, medium_2d(t), medium_1d(t)], 'two_scalars' ),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], ),
('addr', medium_2d, lambda t: [0.2, medium_1d(t), medium_1d(t)], 'scalar' ),
('addr', medium_2d, lambda t: [0.5, 0.2, medium_1d(t), medium_1d(t)], 'two_scalars' ),
('addr', medium_2d, lambda t: [0.5, 0.2, medium_1d(t), medium_1d(t)], 'two_scalars' ),
('atan2', medium_2d, lambda t: [medium_2d(t)], ),
('chunk', medium_2d, lambda t: [4], ),
('chunk', medium_2d, lambda t: [4, 1], 'dim' ),
('clamp', medium_2d, lambda t: [-0.1, 0.5], ),
('clone', medium_2d, lambda t: [], ),
('cmax', medium_2d, lambda t: [medium_2d(t)], ),
('cmin', medium_2d, lambda t: [medium_2d(t)], ),
('contiguous', medium_2d, lambda t: [], ),
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)], ),
('cumprod', small_3d, lambda t: [1], ),
('cumsum', small_3d, lambda t: [1], ),
('dim', small_3d, lambda t: [], ),
('dist', small_2d, lambda t: [small_2d(t)], ),
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm' ),
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2.5_norm' ),
('dot', medium_1d, lambda t: [medium_1d(t)], ),
('elementSize', medium_1d, lambda t: [], ),
('eq', small_3d_ones, lambda t: [small_3d(t)], ),
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
('ne', small_3d_ones, lambda t: [small_3d(t)], ),
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], ),
('equal', small_3d_ones, lambda t: [small_3d(t)], ),
('expand', new_t(M, 1, M), lambda t: [M, 4, M], ),
('expandAs', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)], ),
('fill', medium_2d, lambda t: [3.14], ),
('ge', medium_2d, lambda t: [medium_2d(t)], ),
('le', medium_2d, lambda t: [medium_2d(t)], ),
('gt', medium_2d, lambda t: [medium_2d(t)], ),
('lt', medium_2d, lambda t: [medium_2d(t)], ),
('isContiguous', medium_2d, lambda t: [], ),
('add', small_3d, lambda t: [number(3.14, 3, t)]),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'),
('sub', small_3d, lambda t: [number(3.14, 3, t)],),
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('mul', small_3d, lambda t: [number(3.14, 3, t)],),
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('div', small_3d, lambda t: [number(3.14, 3, t)],),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)],),
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)],),
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t),
small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar'),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)],),
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)],),
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar'),
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars'),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)],),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar'),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars'),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)],),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar'),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars'),
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types),
('fmod', small_3d, lambda t: [3], 'value'),
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('chunk', medium_2d, lambda t: [4],),
('chunk', medium_2d, lambda t: [4, 1], 'dim'),
('clamp', medium_2d_scaled, lambda t: [-1, 5],),
('clone', medium_2d, lambda t: [],),
('contiguous', medium_2d, lambda t: [],),
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],),
('cumprod', small_3d, lambda t: [1],),
('cumsum', small_3d, lambda t: [1],),
('dim', small_3d, lambda t: [],),
('dist', small_2d, lambda t: [small_2d(t)],),
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'),
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm'),
('dot', medium_1d, lambda t: [medium_1d(t)],),
('element_size', medium_1d, lambda t: [],),
('eq', small_3d_ones, lambda t: [small_3d(t)],),
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('ne', small_3d_ones, lambda t: [small_3d(t)],),
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
('equal', small_3d_ones, lambda t: [small_3d(t)],),
('expand', new_t(M, 1, M), lambda t: [M, 4, M],),
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],),
('fill', medium_2d, lambda t: [number(3.14, 3, t)],),
('ge', medium_2d, lambda t: [medium_2d(t)],),
('le', medium_2d, lambda t: [medium_2d(t)],),
('gt', medium_2d, lambda t: [medium_2d(t)],),
('lt', medium_2d, lambda t: [medium_2d(t)],),
('is_contiguous', medium_2d, lambda t: [],),
# TODO: can't check negative case - GPU copy will be contiguous
('isSameSizeAs', medium_2d, lambda t: [small_3d(t)], 'negative' ),
('isSameSizeAs', medium_2d, lambda t: [medium_2d(t)], 'positive' ),
('isSetTo', medium_2d, lambda t: [medium_2d(t)], ),
('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative'),
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive'),
('is_set_to', medium_2d, lambda t: [medium_2d(t)],),
# TODO: positive case
('isSize', medium_2d, lambda t: [torch.LongStorage((M, M))], ),
('kthvalue', small_3d_unique, lambda t: [3], ),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim' ),
('lerp', small_3d, lambda t: [small_3d(t), 0.3], ),
('max', small_3d_unique, lambda t: [], ),
('max', small_3d_unique, lambda t: [1], 'dim' ),
('min', small_3d_unique, lambda t: [], ),
('min', small_3d_unique, lambda t: [1], 'dim' ),
('mean', small_3d, lambda t: [], ),
('mean', small_3d, lambda t: [1], 'dim' ),
('mode', small_3d, lambda t: [], ),
('mode', small_3d, lambda t: [1], 'dim' ),
('std', small_3d, lambda t: [], ),
('std', small_3d, lambda t: [1], 'dim' ),
('var', small_3d, lambda t: [], ),
('var', small_3d, lambda t: [1], 'dim' ),
('nDimension', small_3d, lambda t: [], ),
('nElement', small_3d, lambda t: [], ),
('numel', small_3d, lambda t: [], ),
('narrow', small_3d, lambda t: [1, 3, 2], ),
('nonzero', small_3d, lambda t: [], ),
('norm', small_3d, lambda t: [], ),
('norm', small_3d, lambda t: [3], '3_norm' ),
('norm', small_3d, lambda t: [3, 0], '3_norm_dim' ),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5], ),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0], ),
('prod', small_3d, lambda t: [], ),
('prod', small_3d, lambda t: [1], 'dim' ),
('sum', small_2d, lambda t: [], ),
('sum', small_3d, lambda t: [1], 'dim' ),
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm' ),
('renorm', small_3d, lambda t: [1.5, 1, 1], '1.5_norm' ),
('repeatTensor', small_2d, lambda t: [2, 2, 2], ),
('size', new_t(1, 2, 3, 4), lambda t: [], ),
('sort', small_3d_unique, lambda t: [], ),
('sort', small_3d_unique, lambda t: [1], 'dim' ),
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
('split', small_3d, lambda t: [2], ),
('split', small_3d, lambda t: [2, 1], 'dim' ),
('squeeze', new_t(1, 2, 1, 4), lambda t: [], ),
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim' ),
('t', new_t(1, 2), lambda t: [], ),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2], ),
('to_list', small_3d, lambda t: [], ),
('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort' ),
('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort' ),
('trace', medium_2d, lambda t: [], ),
('tril', medium_2d, lambda t: [], ),
('tril', medium_2d, lambda t: [2], 'positive' ),
('tril', medium_2d, lambda t: [-2], 'negative' ),
('triu', medium_2d, lambda t: [], ),
('triu', medium_2d, lambda t: [2], 'positive' ),
('triu', medium_2d, lambda t: [-2], 'negative' ),
('view', small_3d, lambda t: [100, 10], ),
('viewAs', small_3d, lambda t: [t(100, 10)], ),
('zero', small_3d, lambda t: [], ),
('zeros', small_3d, lambda t: [1, 2, 3, 4], ),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], ),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], ),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], ),
('kthvalue', small_3d_unique, lambda t: [3],),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'),
('lerp', small_3d, lambda t: [small_3d(t), 0.3],),
('max', small_3d_unique, lambda t: [],),
('max', small_3d_unique, lambda t: [1], 'dim'),
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('min', small_3d_unique, lambda t: [],),
('min', small_3d_unique, lambda t: [1], 'dim'),
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('mean', small_3d, lambda t: [],),
('mean', small_3d, lambda t: [1], 'dim'),
('mode', small_3d, lambda t: [],),
('mode', small_3d, lambda t: [1], 'dim'),
('remainder', small_3d, lambda t: [3], 'value'),
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('std', small_3d, lambda t: [],),
('std', small_3d, lambda t: [1], 'dim'),
('var', small_3d, lambda t: [],),
('var', small_3d, lambda t: [1], 'dim'),
('ndimension', small_3d, lambda t: [],),
('nelement', small_3d, lambda t: [],),
('numel', small_3d, lambda t: [],),
('narrow', small_3d, lambda t: [1, 3, 2],),
('nonzero', small_3d, lambda t: [],),
('norm', small_3d, lambda t: [],),
('norm', small_3d, lambda t: [3], '3_norm'),
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
('prod', small_2d_oneish, lambda t: [],),
('prod', small_3d, lambda t: [1], 'dim'),
('sum', small_2d, lambda t: [],),
('sum', small_3d, lambda t: [1], 'dim'),
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'),
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'),
('repeat', small_2d, lambda t: [2, 2, 2],),
('size', new_t(1, 2, 3, 4), lambda t: [],),
('sort', small_3d_unique, lambda t: [],),
('sort', small_3d_unique, lambda t: [1], 'dim'),
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
('split', small_3d, lambda t: [2],),
('split', small_3d, lambda t: [2, 1], 'dim'),
('squeeze', new_t(1, 2, 1, 4), lambda t: [],),
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
('t', new_t(1, 2), lambda t: [],),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
('to_list', small_3d, lambda t: [],),
('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort'),
('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort'),
('trace', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [2], 'positive'),
('tril', medium_2d, lambda t: [-2], 'negative'),
('triu', medium_2d, lambda t: [],),
('triu', medium_2d, lambda t: [2], 'positive'),
('triu', medium_2d, lambda t: [-2], 'negative'),
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
('view', small_3d, lambda t: [100, 10],),
('view_as', small_3d, lambda t: [t(100, 10)],),
('zero', small_3d, lambda t: [],),
('zeros', small_3d, lambda t: [1, 2, 3, 4],),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
# lapack tests
('qr', small_2d_lapack, lambda t: [], 'square', float_types),
('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types),
('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types),
]
# TODO: random functions, cat, gather, scatter, index*, masked*, resize, resizeAs, storageOffset, storage, stride, unfold
# TODO: random functions, cat, gather, scatter, index*, masked*,
# resize, resizeAs, storage_offset, storage, stride, unfold
custom_precision = {
'addbmm': 1e-4,
'addmm': 1e-4,
'addmv': 1e-4,
'addr': 1e-4,
'baddbmm': 1e-4,
'rsqrt': 1e-4,
'cumprod': 1e-4,
}
simple_pointwise = [
'abs',
'acos',
'asin',
'atan',
'ceil',
'cinv',
'cos',
'cosh',
'exp',
'floor',
'fmod',
'frac',
'log',
'log1p',
'neg',
'remainder',
'round',
'sigmoid',
'sign',
'sin',
'sqrt',
'tanh',
'trunc',
]
for fn in simple_pointwise:
tests.append((fn, small_3d, lambda t: []))
simple_pointwise_float = [
'log',
'log1p',
'sigmoid',
'sin',
'sqrt',
'tanh',
'acos',
'asin',
'atan',
'cos',
'cosh',
'exp',
'reciprocal',
'floor',
'frac',
'neg',
'round',
'trunc',
'ceil',
]
for fn in simple_pointwise_float:
tests.append((fn, small_3d, lambda t: [], None, float_types))
_cycles_per_ms = None
def get_cycles_per_ms():
"""Approximate number of cycles per millisecond for torch.cuda._sleep"""
global _cycles_per_ms
if _cycles_per_ms is None:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
torch.cuda._sleep(1000000)
end.record()
end.synchronize()
_cycles_per_ms = 1000000 / start.elapsed_time(end)
return _cycles_per_ms
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
def tmp(self):
cpu_tensor = tensor_constructor(t)
@ -237,6 +333,11 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
if 'unimplemented data type' in reason:
raise unittest.SkipTest('unimplemented data type')
raise
except AttributeError as e:
reason = e.args[0]
if 'object has no attribute' in reason:
raise unittest.SkipTest('unimplemented data type')
raise
# If one changes, another should change as well
self.assertEqual(cpu_tensor, gpu_tensor, precision)
self.assertEqual(cpu_args, gpu_args, precision)
@ -244,25 +345,46 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
self.assertEqual(cpu_result, gpu_result, precision)
return tmp
class TestCuda(TestCase):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_autogpu(self):
if torch.cuda.deviceCount() > 1:
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.getDevice(), 0)
self.assertEqual(x.getDevice(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.getDevice(), 1)
q = x.add(y)
self.assertEqual(q.getDevice(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.getDevice(), 1)
z = z.cuda()
self.assertEqual(z.getDevice(), 0)
x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda()
self.assertEqual(x.get_device(), 0)
self.assertEqual(x.get_device(), 0)
with torch.cuda.device(1):
z = torch.randn(5, 5).cuda()
self.assertEqual(z.get_device(), 1)
q = x.add(y)
self.assertEqual(q.get_device(), 0)
w = torch.randn(5, 5).cuda()
self.assertEqual(w.get_device(), 1)
z = z.cuda()
self.assertEqual(z.get_device(), 0)
def test_serialization(self):
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_copy_device(self):
x = torch.randn(5, 5).cuda()
with torch.cuda.device(1):
y = x.cuda()
self.assertEqual(y.get_device(), 1)
self.assertIs(y.cuda(), y)
z = y.cuda(0)
self.assertEqual(z.get_device(), 0)
self.assertIs(z.cuda(0), z)
x = torch.randn(5, 5)
with torch.cuda.device(1):
y = x.cuda()
self.assertEqual(y.get_device(), 1)
self.assertIs(y.cuda(), y)
z = y.cuda(0)
self.assertEqual(z.get_device(), 0)
self.assertIs(z.cuda(0), z)
def test_serialization_array_with_storage(self):
x = torch.randn(5, 5).cuda()
y = torch.IntTensor(2, 5).fill_(0).cuda()
q = [x, y, x, y.storage()]
@ -295,38 +417,370 @@ class TestCuda(TestCase):
self.assertIs(type(y.cuda().float().cpu()), torch.FloatStorage)
self.assertIs(type(y.cuda().float().cpu().int()), torch.IntStorage)
@unittest.skipIf(torch.cuda.deviceCount() < 2, "only one GPU detected")
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_type_conversions_same_gpu(self):
x = torch.randn(5, 5).cuda(1)
self.assertEqual(x.int().getDevice(), 1)
self.assertEqual(x.int().get_device(), 1)
def _test_broadcast(self, input):
if torch.cuda.device_count() < 2:
raise unittest.SkipTest("only one GPU detected")
result = comm.broadcast(input, (0, 1))
for i, t in enumerate(result):
self.assertEqual(t.get_device(), i)
self.assertEqual(t, input)
def test_broadcast_cpu(self):
self._test_broadcast(torch.randn(5, 5))
def test_broadcast_gpu(self):
self._test_broadcast(torch.randn(5, 5))
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_reduce_add(self):
x = torch.randn(5, 5)
y = torch.randn(5, 5)
x_cuda = x.cuda(0)
y_cuda = y.cuda(1)
result = comm.reduce_add((x_cuda, y_cuda))
self.assertEqual(result.get_device(), 0)
self.assertEqual(result.cpu(), x + y)
def _test_scatter(self, input, chunk_sizes=None, dim=0):
if torch.cuda.device_count() < 2:
raise unittest.SkipTest("only one GPU detected")
result = comm.scatter(input, (0, 1), chunk_sizes, dim)
self.assertEqual(len(result), 2)
if chunk_sizes is None:
chunk_sizes = tuple(repeat(input.size(dim) // 2, 2))
chunk_start = 0
for i, r in enumerate(result):
chunk_end = chunk_start + chunk_sizes[i]
index = [slice(None, None), slice(None, None)]
index[dim] = slice(chunk_start, chunk_end)
self.assertEqual(r, input[tuple(index)], 0)
chunk_start = chunk_end
def test_scatter_cpu(self):
self._test_scatter(torch.randn(4, 4), dim=0)
def test_scatter_cpu_dim(self):
self._test_scatter(torch.randn(4, 4), dim=1)
def test_scatter_cpu_sizes(self):
self._test_scatter(torch.randn(6, 4), chunk_sizes=(2, 4))
def test_scatter_gpu(self):
self._test_scatter(torch.randn(4, 4).cuda(), dim=0)
def test_scatter_gpu_dim(self):
self._test_scatter(torch.randn(4, 4).cuda(), dim=1)
def test_scatter_gpu_sizes(self):
self._test_scatter(torch.randn(6, 4).cuda(), chunk_sizes=(2, 4))
def _test_gather(self, dim):
if torch.cuda.device_count() < 2:
raise unittest.SkipTest("only one GPU detected")
x = torch.randn(2, 5).cuda(0)
y = torch.randn(2, 5).cuda(1)
result = comm.gather((x, y), dim)
expected_size = list(x.size())
expected_size[dim] += y.size(dim)
expected_size = torch.Size(expected_size)
self.assertEqual(result.get_device(), 0)
self.assertEqual(result.size(), expected_size)
index = [slice(None, None), slice(None, None)]
index[dim] = slice(0, x.size(dim))
self.assertEqual(result[tuple(index)], x)
index[dim] = slice(x.size(dim), x.size(dim) + y.size(dim))
self.assertEqual(result[tuple(index)], y)
def test_gather(self):
self._test_gather(0)
def test_gather_dim(self):
self._test_gather(1)
def test_from_sequence(self):
seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)]
reference = torch.range(0, 19).resize_(5, 4)
for t in types:
cuda_type = get_gpu_type(t)
self.assertEqual(cuda_type(seq), reference)
def test_manual_seed(self):
with freeze_rng_state():
x = torch.zeros(4, 4).float().cuda()
torch.cuda.manual_seed(2)
self.assertEqual(torch.cuda.initial_seed(), 2)
x.uniform_()
torch.cuda.manual_seed(2)
y = x.clone().uniform_()
self.assertEqual(x, y)
self.assertEqual(torch.cuda.initial_seed(), 2)
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_cat_autogpu(self):
x = torch.randn(4, 4).cuda(1)
y = torch.randn(4, 4).cuda(1)
z = torch.cat([x, y], 0)
self.assertEqual(z.get_device(), x.get_device())
def test_serialization(self):
x = torch.randn(4, 4).cuda()
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
self.assertEqual(x_copy, x)
self.assertIs(type(x_copy), type(x))
self.assertEqual(x_copy.get_device(), x.get_device())
def test_serialization_array_with_empty(self):
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization_remap(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
def gpu_remap(storage, location):
if location == 'cuda:1':
return storage.cuda(0)
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f, map_location=gpu_remap)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), 0)
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization_remap_dict(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f, map_location={'cuda:1': 'cuda:0'})
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), 0)
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_cuda_set_device(self):
x = torch.randn(5, 5)
with torch.cuda.device(1):
self.assertEqual(x.cuda().get_device(), 1)
torch.cuda.set_device(0)
self.assertEqual(x.cuda().get_device(), 0)
with torch.cuda.device(1):
self.assertEqual(x.cuda().get_device(), 1)
self.assertEqual(x.cuda().get_device(), 0)
torch.cuda.set_device(1)
self.assertEqual(x.cuda().get_device(), 0)
def test_is_tensor(self):
for t in types:
tensor = get_gpu_type(t)()
self.assertTrue(torch.is_tensor(tensor))
self.assertTrue(torch.is_tensor(torch.cuda.HalfTensor()))
def test_cuda_synchronize(self):
torch.cuda.synchronize()
def test_streams(self):
default_stream = torch.cuda.current_stream()
user_stream = torch.cuda.Stream()
self.assertEqual(torch.cuda.current_stream(), default_stream)
self.assertNotEqual(default_stream, user_stream)
self.assertEqual(default_stream.cuda_stream, 0)
self.assertNotEqual(user_stream.cuda_stream, 0)
with torch.cuda.stream(user_stream):
self.assertEqual(torch.cuda.current_stream(), user_stream)
self.assertTrue(user_stream.query())
# copy 10 MB tensor from CPU-GPU which should take some time
tensor1 = torch.ByteTensor(10000000).pin_memory()
tensor2 = tensor1.cuda(async=True)
self.assertFalse(default_stream.query())
default_stream.synchronize()
self.assertTrue(default_stream.query())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_streams_multi_gpu(self):
default_stream = torch.cuda.current_stream()
self.assertEqual(default_stream.device, 0)
stream = torch.cuda.Stream(device=1)
self.assertEqual(stream.device, 1)
with torch.cuda.device(1):
self.assertEqual(torch.cuda.current_stream().device, 1)
self.assertNotEqual(torch.cuda.current_stream(), default_stream)
@unittest.skipIf(torch.cuda.device_count() < 2, "multi-GPU not supported")
def test_tensor_device(self):
self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 0)
self.assertEqual(torch.cuda.FloatTensor(1, device=1).get_device(), 1)
with torch.cuda.device(1):
self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 1)
self.assertEqual(torch.cuda.FloatTensor(1, device=0).get_device(), 0)
self.assertEqual(torch.cuda.FloatTensor(1, device=None).get_device(), 1)
def test_events(self):
stream = torch.cuda.current_stream()
event = torch.cuda.Event(enable_timing=True)
self.assertTrue(event.query())
start_event = torch.cuda.Event(enable_timing=True)
stream.record_event(start_event)
torch.cuda._sleep(int(50 * get_cycles_per_ms()))
stream.record_event(event)
self.assertFalse(event.query())
event.synchronize()
self.assertTrue(event.query())
self.assertGreater(start_event.elapsed_time(event), 0)
def test_record_stream(self):
cycles_per_ms = get_cycles_per_ms()
t = torch.FloatTensor([1, 2, 3, 4]).pin_memory()
result = torch.cuda.FloatTensor(t.size())
stream = torch.cuda.Stream()
ptr = [None]
# Performs the CPU->GPU copy in a background stream
def perform_copy():
with torch.cuda.stream(stream):
tmp = t.cuda(async=True)
ptr[0] = tmp.data_ptr()
torch.cuda.current_stream().wait_stream(stream)
tmp.record_stream(torch.cuda.current_stream())
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
result.copy_(tmp)
perform_copy()
with torch.cuda.stream(stream):
tmp2 = torch.cuda.FloatTensor(t.size())
tmp2.zero_()
self.assertNotEqual(tmp2.data_ptr(), ptr[0], 'allocation re-used to soon')
self.assertEqual(result.tolist(), [1, 2, 3, 4])
# Check that the block will be re-used after the main stream finishes
torch.cuda.current_stream().synchronize()
with torch.cuda.stream(stream):
tmp3 = torch.cuda.FloatTensor(t.size())
self.assertEqual(tmp3.data_ptr(), ptr[0], 'allocation not re-used')
def test_caching_pinned_memory(self):
cycles_per_ms = get_cycles_per_ms()
# check that allocations are re-used after deletion
t = torch.FloatTensor([1]).pin_memory()
ptr = t.data_ptr()
del t
t = torch.FloatTensor([1]).pin_memory()
self.assertEqual(t.data_ptr(), ptr, 'allocation not reused')
# check that the allocation is not re-used if it's in-use by a copy
gpu_tensor = torch.cuda.FloatTensor([0])
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
gpu_tensor.copy_(t, async=True)
del t
t = torch.FloatTensor([1]).pin_memory()
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
self.assertEqual(list(gpu_tensor), [1])
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
def test_caching_pinned_memory_multi_gpu(self):
# checks that the events preventing pinned memory from being re-used
# too early are recorded on the correct GPU
cycles_per_ms = get_cycles_per_ms()
t = torch.FloatTensor([1]).pin_memory()
ptr = t.data_ptr()
gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
with torch.cuda.device(1):
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
gpu_tensor1.copy_(t, async=True)
del t
t = torch.FloatTensor([2]).pin_memory()
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
with torch.cuda.device(0):
gpu_tensor0.copy_(t, async=True)
self.assertEqual(gpu_tensor1[0], 1)
self.assertEqual(gpu_tensor0[0], 2)
def test_btrifact(self):
TestTorch._test_btrifact(self, lambda t: t.cuda())
def test_btrisolve(self):
TestTorch._test_btrisolve(self, lambda t: t.cuda())
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
for inplace in (True, False):
if HAS_CUDA:
for decl in tests:
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
if inplace:
name = name + '_'
if not hasattr(tensor, name):
continue
if not hasattr(gpu_tensor, name):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name, gpu_tensor.__class__.__name__))
continue
test_name = 'test_' + t.__name__ + '_' + name
if desc:
test_name += '_' + desc
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
name_inner, gpu_tensor.__class__.__name__))
continue
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
assert not hasattr(TestCase, test_name)
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name, t, precision))
if __name__ == '__main__':
unittest.main()
run_tests()

201
test/test_dataloader.py Normal file
View File

@ -0,0 +1,201 @@
import math
import sys
import torch
import traceback
import unittest
from torch.utils.data import Dataset, TensorDataset, DataLoader
from common import TestCase, run_tests, TEST_NUMPY
from common_nn import TEST_CUDA
class TestTensorDataset(TestCase):
def test_len(self):
source = TensorDataset(torch.randn(15, 10, 2, 3, 4, 5), torch.randperm(15))
self.assertEqual(len(source), 15)
def test_getitem(self):
t = torch.randn(15, 10, 2, 3, 4, 5)
l = torch.randn(15, 10)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
def test_getitem_1d(self):
t = torch.randn(15)
l = torch.randn(15)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
class ErrorDataset(Dataset):
def __init__(self, size):
self.size = size
def __len__(self):
return self.size
class TestDataLoader(TestCase):
def setUp(self):
self.data = torch.randn(100, 2, 3, 5)
self.labels = torch.randperm(50).repeat(2)
self.dataset = TensorDataset(self.data, self.labels)
def _test_sequential(self, loader):
batch_size = loader.batch_size
for i, (sample, target) in enumerate(loader):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx + batch_size])
self.assertEqual(target, self.labels[idx:idx + batch_size])
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
def _test_shuffle(self, loader):
found_data = {i: 0 for i in range(self.data.size(0))}
found_labels = {i: 0 for i in range(self.labels.size(0))}
batch_size = loader.batch_size
for i, (batch_samples, batch_targets) in enumerate(loader):
for sample, target in zip(batch_samples, batch_targets):
for data_point_idx, data_point in enumerate(self.data):
if data_point.eq(sample).all():
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels[data_point_idx])
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
def _test_error(self, loader):
it = iter(loader)
errors = 0
while True:
try:
it.next()
except NotImplementedError:
errors += 1
except StopIteration:
self.assertEqual(errors,
math.ceil(float(len(loader.dataset)) / loader.batch_size))
return
def test_sequential(self):
self._test_sequential(DataLoader(self.dataset))
def test_sequential_batch(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2))
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_sequential_pin_memory(self):
loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
for input, target in loader:
self.assertTrue(input.is_pinned())
self.assertTrue(target.is_pinned())
def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))
def test_shuffle_batch(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
def test_sequential_workers(self):
self._test_sequential(DataLoader(self.dataset, num_workers=4))
def test_seqential_batch_workers(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2, num_workers=4))
def test_shuffle_workers(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
def test_shuffle_batch_workers(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_shuffle_pin_memory(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
for input, target in loader:
self.assertTrue(input.is_pinned())
self.assertTrue(target.is_pinned())
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
def test_numpy(self):
import numpy as np
class TestDataset(torch.utils.data.Dataset):
def __getitem__(self, i):
return np.ones((2, 3, 4)) * i
def __len__(self):
return 1000
loader = DataLoader(TestDataset(), batch_size=12)
batch = next(iter(loader))
self.assertIsInstance(batch, torch.DoubleTensor)
self.assertEqual(batch.size(), torch.Size([12, 2, 3, 4]))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
def test_error_workers(self):
self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_partial_workers(self):
"check that workers exit even if the iterator is not exhausted"
loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True))
workers = loader.workers
pin_thread = loader.pin_thread
for i, sample in enumerate(loader):
if i == 3:
break
del loader
for w in workers:
w.join(1.0) # timeout of one second
self.assertFalse(w.is_alive(), 'subprocess not terminated')
self.assertEqual(w.exitcode, 0)
pin_thread.join(1.0)
self.assertFalse(pin_thread.is_alive())
def test_len(self):
def check_len(dl, expected):
self.assertEqual(len(dl), expected)
n = 0
for sample in dl:
n += 1
self.assertEqual(n, expected)
check_len(self.dataset, 100)
check_len(DataLoader(self.dataset, batch_size=2), 50)
check_len(DataLoader(self.dataset, batch_size=3), 34)
class StringDataset(Dataset):
def __init__(self):
self.s = '12345'
def __len__(self):
return len(self.s)
def __getitem__(self, ndx):
return (self.s[ndx], ndx)
class TestStringDataLoader(TestCase):
def setUp(self):
self.dataset = StringDataset()
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_shuffle_pin_memory(self):
loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
for batch_ndx, (s, n) in enumerate(loader):
self.assertIsInstance(s[0], str)
self.assertTrue(n.is_pinned())
if __name__ == '__main__':
run_tests()

508
test/test_distributed.py Normal file
View File

@ -0,0 +1,508 @@
import fcntl
import multiprocessing
import os
import sys
import time
import unittest
from functools import wraps, reduce
from contextlib import contextmanager
import torch
import torch.distributed as dist
from common import TestCase
BACKEND = os.environ['BACKEND']
TEMP_DIR = os.environ['TEMP_DIR']
MASTER_PORT = '29500'
MASTER_ADDR = '127.0.0.1:' + MASTER_PORT
@contextmanager
def _lock():
lockfile = os.path.join(TEMP_DIR, 'lockfile')
with open(lockfile, 'w') as lf:
try:
fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
yield
finally:
fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
lf.close()
def _build_tensor(size, value=None):
if value is None:
value = size
return torch.FloatTensor(size, size, size).fill_(value)
class Barrier(object):
barrier_id = 0
@classmethod
def init(cls):
cls.barrier_id = 0
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
for f_name in os.listdir(barrier_dir):
os.unlink(os.path.join(barrier_dir, f_name))
@classmethod
def sync(cls, timeout=5):
cls.barrier_id += 1
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
pid = str(os.getpid())
barrier_file = os.path.join(barrier_dir, pid)
with _lock():
with open(barrier_file, 'w') as f:
f.write(str(cls.barrier_id))
start_time = time.time()
while True:
arrived = 0
with _lock():
for f_name in os.listdir(barrier_dir):
with open(os.path.join(barrier_dir, f_name), 'r') as f:
data = f.read()
if int(data) >= cls.barrier_id:
arrived += 1
if arrived == dist.get_num_processes():
break
if time.time() - start_time > timeout:
raise RuntimeError("barrier timeout")
time.sleep(0.1)
class _DistTestBase(object):
def _barrier(self, *args, **kwargs):
Barrier.sync(*args, **kwargs)
def _init_group_test(self):
group = [1, 2]
group_id = dist.new_group(group)
rank = dist.get_rank()
if rank not in group:
return ([], None, rank)
return (group, group_id, rank)
def _init_global_test(self):
group = [i for i in range(0, dist.get_num_processes())]
group_id = dist.group.WORLD
rank = dist.get_rank()
return (group, group_id, rank)
# GET RANK
def test_get_rank(self):
test_dir = os.path.join(TEMP_DIR, 'test_dir')
pid = str(os.getpid())
num_processes = dist.get_num_processes()
with open(os.path.join(test_dir, pid), 'w') as f:
f.write(str(dist.get_rank()))
self._barrier()
all_ranks = set()
for f_name in os.listdir(test_dir):
with open(os.path.join(test_dir, f_name), 'r') as f:
all_ranks.add(int(f.read()))
self.assertEqual(len(all_ranks), num_processes)
self._barrier()
if dist.get_rank() == 0:
for f_name in os.listdir(test_dir):
os.unlink(os.path.join(test_dir, f_name))
self._barrier()
# SEND RECV
def test_send_recv(self):
rank = dist.get_rank()
tensor = _build_tensor(rank + 1)
for dest in range(0, dist.get_num_processes()):
if dest == rank:
continue
dist.send(tensor, dest)
for src in range(0, dist.get_num_processes()):
if src == rank:
continue
tensor = _build_tensor(src + 1, value=-1)
expected_tensor = _build_tensor(src + 1)
dist.recv(tensor, src)
self.assertEqual(tensor, expected_tensor)
self._barrier()
# SEND RECV ANY SOURCE
def test_send_recv_any_source(self):
rank = dist.get_rank()
tensor = _build_tensor(10, rank)
for dest in range(0, dist.get_num_processes()):
if dest == rank:
continue
dist.send(tensor, dest)
recv_ranks = set()
for src in range(0, dist.get_num_processes()):
if src == rank:
continue
tensor = _build_tensor(10, value=-1)
dist.recv(tensor)
recv_ranks.add(tensor.resize_(1)[0])
self.assertEqual(len(recv_ranks), dist.get_num_processes() - 1)
self._barrier()
# ISEND
def test_isend(self):
rank = dist.get_rank()
world_size = dist.get_num_processes()
if rank == 0:
requests = [
dist.isend(_build_tensor(dest, 10), dest) for dest in range(1, world_size)
]
for request in requests:
request.wait()
self.assertTrue(request.is_completed())
else:
tensor = _build_tensor(rank, -1)
dist.recv(tensor, 0)
self.assertEqual(tensor, _build_tensor(rank, 10))
self._barrier()
# IRECV
def test_irecv(self):
rank = dist.get_rank()
world_size = dist.get_num_processes()
if rank == 0:
expected_tensors = [_build_tensor(src, -1) for src in range(1, world_size)]
requests = [
dist.irecv(expected_tensors[src - 1], src) for src in range(1, world_size)
]
for src in range(1, world_size):
requests[src - 1].wait()
self.assertTrue(requests[src - 1].is_completed())
self.assertEqual(expected_tensors[src - 1], _build_tensor(src, 10))
else:
tensor = _build_tensor(rank, 10)
dist.send(tensor, 0)
self._barrier()
# BROADCAST
def _test_broadcast_helper(self, group, group_id, rank):
for src in group:
expected_tensor = _build_tensor(src + 1)
if rank == src:
dist.broadcast(expected_tensor, src, group_id)
else:
tensor = _build_tensor(src + 1, -1)
dist.broadcast(tensor, src, group_id)
self.assertEqual(tensor, expected_tensor)
self._barrier()
def test_broadcast(self):
group, group_id, rank = self._init_global_test()
self._test_broadcast_helper(group, group_id, rank)
def test_broadcast_group(self):
group, group_id, rank = self._init_group_test()
self._test_broadcast_helper(group, group_id, rank)
# REDUCE
def _test_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
for src in group:
if rank == src:
tensor = _build_tensor(src + 1).fill_(master_value)
dist.reduce(tensor, src, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
else:
tensor = _build_tensor(src + 1).fill_(worker_value)
dist.reduce(tensor, src, op, group_id)
self._barrier()
def test_reduce_sum(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_reduce_product(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_reduce_min(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_reduce_max(self):
group, group_id, rank = self._init_global_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
def test_reduce_group_sum(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_reduce_group_product(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_reduce_group_min(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_reduce_group_max(self):
group, group_id, rank = self._init_group_test()
self._test_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
# ALL REDUCE
def _test_all_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
for src in group:
if rank == src:
tensor = _build_tensor(src + 1).fill_(master_value)
dist.all_reduce(tensor, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
else:
tensor = _build_tensor(src + 1).fill_(worker_value)
dist.all_reduce(tensor, op, group_id)
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
self._barrier()
def test_all_reduce_sum(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_all_reduce_product(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_all_reduce_min(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_all_reduce_max(self):
group, group_id, rank = self._init_global_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
def test_all_reduce_group_sum(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
)
def test_all_reduce_group_product(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.PRODUCT,
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
)
def test_all_reduce_group_min(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
)
def test_all_reduce_group_max(self):
group, group_id, rank = self._init_group_test()
self._test_all_reduce_helper(
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
)
# SCATTER
def _test_scatter_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, -1)
expected_tensor = _build_tensor(dest + 1, rank)
if rank == dest:
tensors = [_build_tensor(dest + 1, i) for i in group]
dist.scatter_send(tensors, tensor, group_id)
self.assertEqual(tensor, expected_tensor)
else:
dist.scatter_recv(tensor, dest, group_id)
self.assertEqual(tensor, expected_tensor)
self._barrier()
def test_scatter(self):
group, group_id, rank = self._init_global_test()
self._test_scatter_helper(group, group_id, rank)
def test_scatter_group(self):
group, group_id, rank = self._init_group_test()
self._test_scatter_helper(group, group_id, rank)
# GATHER
def _test_gather_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, rank)
if rank == dest:
tensors = [_build_tensor(dest + 1, -1) for i in group]
dist.gather_recv(tensors, tensor, group_id)
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
for t1, t2 in zip(tensors, expected_tensors):
self.assertEqual(t1, t2)
else:
dist.gather_send(tensor, dest, group_id)
self._barrier()
def test_gather(self):
group, group_id, rank = self._init_global_test()
self._test_gather_helper(group, group_id, rank)
def test_gather_group(self):
group, group_id, rank = self._init_group_test()
self._test_gather_helper(group, group_id, rank)
# ALL GATHER
def _test_all_gather_helper(self, group, group_id, rank):
for dest in group:
tensor = _build_tensor(dest + 1, rank)
tensors = [_build_tensor(dest + 1, -1) for i in group]
dist.all_gather(tensors, tensor, group_id)
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
for t1, t2 in zip(tensors, expected_tensors):
self.assertEqual(t1, t2)
self._barrier()
def test_all_gather(self):
group, group_id, rank = self._init_global_test()
self._test_all_gather_helper(group, group_id, rank)
def test_all_gather_group(self):
group, group_id, rank = self._init_group_test()
self._test_all_gather_helper(group, group_id, rank)
# BARRIER
def _test_barrier_helper(self, group, group_id, rank):
WAIT_TIME = 0.3 # seconds
for dest in group:
expected_time = torch.DoubleTensor(1).fill_(0.0)
if dest == rank:
expected_time.fill_(time.time() + WAIT_TIME)
dist.broadcast(expected_time, dest, group_id)
time.sleep(WAIT_TIME + 0.1) # sleep a little bit longer
dist.barrier(group_id)
else:
dist.broadcast(expected_time, dest, group_id)
dist.barrier(group_id)
self.assertGreaterEqual(time.time(), expected_time[0])
self._barrier()
def test_barrier(self):
group, group_id, rank = self._init_global_test()
self._test_barrier_helper(group, group_id, rank)
def test_barrier_group(self):
group, group_id, rank = self._init_group_test()
self._test_barrier_helper(group, group_id, rank)
if BACKEND == 'tcp':
WORLD_SIZE = os.environ['WORLD_SIZE']
class TestTCP(TestCase, _DistTestBase):
MANAGER_PROCESS_RANK = -1
JOIN_TIMEOUT = 5
@staticmethod
def manager_join(fn):
@wraps(fn)
def wrapper(self):
if self.rank == self.MANAGER_PROCESS_RANK:
self._join_and_reduce()
else:
fn(self)
return wrapper
@classmethod
def setUpClass(cls):
os.environ['MASTER_ADDR'] = MASTER_ADDR
os.environ['MASTER_PORT'] = MASTER_PORT
os.environ['WORLD_SIZE'] = WORLD_SIZE
for attr in dir(cls):
if attr.startswith('test'):
fn = getattr(cls, attr)
setattr(cls, attr, cls.manager_join(fn))
def setUp(self):
self.processes = []
self.rank = self.MANAGER_PROCESS_RANK
Barrier.init()
for rank in range(int(WORLD_SIZE)):
self.processes.append(self._spawn_process(rank))
def tearDown(self):
for p in self.processes:
p.terminate()
def _spawn_process(self, rank):
os.environ['RANK'] = str(rank)
name = 'process ' + str(rank)
process = multiprocessing.Process(target=self._run, name=name,
args=(rank,))
process.start()
return process
def _run(self, rank):
self.rank = rank
dist.init_process_group(backend=BACKEND)
# self.id() == e.g. '__main__.TestDistributed.test_get_rank'
# We're retreiving a corresponding test and executing it.
getattr(self, self.id().split(".")[2])()
sys.exit(0)
def _join_and_reduce(self):
for p in self.processes:
p.join(self.JOIN_TIMEOUT)
self.assertEqual(p.exitcode, 0)
elif BACKEND == 'mpi':
dist.init_process_group(backend='mpi')
class TestMPI(TestCase, _DistTestBase):
pass
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,25 @@
import os
import contextlib
import gc
import os
import sys
import time
import unittest
import contextlib
from sys import platform
import torch
import torch.cuda
import torch.multiprocessing as mp
from common import TestCase
from torch.autograd import Variable
from torch.nn import Parameter
from common import TestCase, run_tests
TEST_REPEATS = 30
HAS_SHM_FILES = os.path.isdir('/dev/shm')
TEST_CUDA_IPC = torch.cuda.is_available() and \
sys.version_info[0] == 3 and \
sys.platform != 'darwin'
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
def simple_fill(queue, event):
@ -24,6 +33,59 @@ def simple_pool_fill(tensor):
return tensor.add(1)
def send_tensor(queue, event, tp):
t = torch.ones(5, 5).type(tp)
queue.put(t)
queue.put(t)
event.wait()
def sum_tensors(inq, outq):
with torch.cuda.device(1):
tensors = inq.get()
for tensor in tensors:
outq.put((tensor.sum(), tensor.get_device(),
tensor.numel(), tensor.storage().size()))
def queue_get_exception(inqueue, outqueue):
os.close(2) # hide expected error message
try:
torch.zeros(5, 5).cuda()
except Exception as e:
outqueue.put(e)
else:
outqueue.put('no exception')
# Multiply by two in a separate stream
def cuda_multiply_two(queue, ready, done):
ready.set()
with torch.cuda.stream(torch.cuda.Stream()):
cuda_event, tensor = queue.get()
cuda_event.wait()
tensor.mul_(2)
cuda_event.record()
done.set()
del cuda_event
def autograd_sharing(queue, ready, master_modified):
var = queue.get()
ready.set()
master_modified.wait()
expected_var = torch.range(1, 25).view(5, 5)
expected_var[0, 0] = 1000
is_ok = var.data.equal(expected_var)
var.data[:] = torch.ones(5, 5)
is_ok &= var.grad is None
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
queue.put(is_ok)
@contextlib.contextmanager
def fs_sharing():
prev_strategy = mp.get_sharing_strategy()
@ -41,24 +103,30 @@ class leak_checker(object):
self.test_case = test_case
def __enter__(self):
self.next_fd = self._get_next_fd()
self.next_fds = self._get_next_fds(10)
return self
def __exit__(self, *args):
if args[0] is None:
gc.collect()
self.test_case.assertEqual(self.next_fd, self._get_next_fd())
# Check that the 10th available file-descriptor at the end of the
# test is no more than 4 higher than the 10th available at the
# start. This attempts to catch file descriptor leaks, but allows
# one-off initialization that may use up a file descriptor
available_fds = self._get_next_fds(10)
self.test_case.assertLessEqual(
available_fds[-1] - self.next_fds[-1], 5)
self.test_case.assertFalse(self.has_shm_files())
return False
def check_pid(self, pid):
self.checked_pids.append(pid)
def _get_next_fd(self):
def _get_next_fds(self, n=1):
# dup uses the lowest-numbered unused descriptor for the new descriptor
fd = os.dup(0)
os.close(fd)
return fd
fds = [os.dup(0) for i in range(n)]
for fd in fds:
os.close(fd)
return fds
def has_shm_files(self, wait=True):
if not HAS_SHM_FILES:
@ -81,97 +149,267 @@ class leak_checker(object):
class TestMultiprocessing(TestCase):
def __init__(self, *args, **kwargs):
super(TestMultiprocessing, self).__init__(*args, **kwargs)
def _test_sharing(self):
def do_test():
x = torch.zeros(5, 5)
q = mp.Queue()
e = mp.Event()
def _test_sharing(self, ctx=mp, type=torch.FloatTensor, repeat=1):
def test_fill():
x = torch.zeros(5, 5).type(type)
q = ctx.Queue()
e = ctx.Event()
data = [x, x[:, 1]]
q.put(data)
p = mp.Process(target=simple_fill, args=(q, e))
p = ctx.Process(target=simple_fill, args=(q, e))
p.daemon = True
lc.check_pid(p.pid)
p.start()
e.wait()
e.wait(10)
self.assertTrue(e.is_set())
self.assertTrue(data[0].eq(4).all())
self.assertTrue(data[1].eq(4).all())
p.join(1)
self.assertFalse(p.is_alive())
with leak_checker(self) as lc:
do_test()
def test_receive():
q = ctx.Queue()
e = ctx.Event()
p = ctx.Process(target=send_tensor, args=(q, e, type))
p.daemon = True
lc.check_pid(p.pid)
p.start()
t1 = q.get()
t2 = q.get()
self.assertTrue(t1.eq(1).all())
self.assertTrue(id(t1.storage()) == id(t2.storage()))
e.set()
p.join(1)
self.assertFalse(p.is_alive())
def _test_preserve_sharing(self):
with leak_checker(self) as lc:
for _ in range(repeat):
test_fill()
test_receive()
def _test_preserve_sharing(self, ctx=mp, repeat=1):
def do_test():
x = torch.randn(5, 5)
data = [x.storage(), x, x[2], x[:,1]]
q = mp.Queue()
data = [x.storage(), x.storage()[1:4], x, x[2], x[:, 1]]
q = ctx.Queue()
q.put(data)
new_data = q.get()
new_data = q.get(timeout=1)
self.assertEqual(new_data, data, 0)
storage_cdata = data[0]._cdata
self.assertEqual(new_data[0]._cdata, storage_cdata)
for t in new_data[1:]:
for t in new_data[2:]:
self.assertEqual(t.storage()._cdata, storage_cdata)
# TODO: enable after fixing #46
# new_data[0].fill_(10)
# self.assertEqual(new_data[1], new_data[0][1:4], 0)
with leak_checker(self):
do_test()
for i in range(repeat):
do_test()
def _test_pool(self):
def _test_pool(self, ctx=mp, repeat=1):
def do_test():
p = mp.Pool(2)
p = ctx.Pool(2)
for proc in p._pool:
lc.check_pid(proc.pid)
buffers = (torch.zeros(2, 2) for i in range(4))
buffers = [torch.zeros(2, 2) for i in range(4)]
results = p.map(simple_pool_fill, buffers, 1)
self.assertEqual(len(results), len(buffers))
for r in results:
self.assertEqual(r, torch.ones(2, 2) * 5, 0)
self.assertEqual(len(results), 4)
for b in buffers:
self.assertEqual(b, torch.ones(2, 2) * 4, 0)
p.close()
p.join()
with leak_checker(self) as lc:
do_test()
for i in range(repeat):
do_test()
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_sharing(self):
self._test_sharing()
self._test_sharing(repeat=TEST_REPEATS)
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_preserve_sharing(self):
self._test_preserve_sharing()
self._test_preserve_sharing(repeat=TEST_REPEATS)
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_fd_pool(self):
self._test_pool()
self._test_pool(repeat=TEST_REPEATS)
@unittest.skipIf(platform == "darwin", "file_system sharing strategy doesn't work in OSX")
def test_fs_sharing(self):
with fs_sharing():
self._test_sharing()
self._test_sharing(repeat=TEST_REPEATS)
def test_fs_preserve_sharing(self):
with fs_sharing():
self._test_preserve_sharing()
self._test_preserve_sharing(repeat=TEST_REPEATS)
def test_fs_pool(self):
with fs_sharing():
self._test_pool()
self._test_pool(repeat=TEST_REPEATS)
@unittest.skipIf(not HAS_SHM_FILES, "don't not how to check if shm files exist")
def test_fs(self):
with fs_sharing(), leak_checker(self) as lc:
def queue_put():
x = torch.DoubleStorage(4)
q = mp.Queue()
self.assertFalse(lc.has_shm_files())
q.put(x)
time.sleep(0.05) # queue serializes asynchronously
self.assertTrue(lc.has_shm_files(wait=False))
q.get()
del x
del q # We have to clean up fds for leak_checker
with fs_sharing(), leak_checker(self) as lc:
for _ in range(TEST_REPEATS):
queue_put()
def test_inherit_tensor(self):
class SubProcess(mp.Process):
def __init__(self, tensor):
super(SubProcess, self).__init__()
self.tensor = tensor
self.daemon = True
def run(self):
self.tensor.add_(3)
t = torch.zeros(5, 5)
p = SubProcess(t.share_memory_())
p.start()
p.join(1)
self.assertEqual(t, torch.ones(5, 5) * 3, 0)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
def test_cuda(self):
torch.cuda.FloatTensor([1]) # initialize CUDA outside of leak checker
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
def test_cuda_small_tensors(self):
# Check multiple small tensors which will likely use the same
# underlying cached allocation
ctx = mp.get_context('spawn')
tensors = []
for i in range(5):
tensors += [torch.range(i * 5, (i * 5) + 4).cuda()]
inq = ctx.Queue()
outq = ctx.Queue()
inq.put(tensors)
p = ctx.Process(target=sum_tensors, args=(inq, outq))
p.start()
results = []
for i in range(5):
results.append(outq.get())
p.join()
for i, tensor in enumerate(tensors):
v, device, tensor_size, storage_size = results[i]
self.assertEqual(v, torch.range(i * 5, (i * 5) + 4).sum())
self.assertEqual(device, 0)
self.assertEqual(tensor_size, 5)
self.assertEqual(storage_size, 5)
@unittest.skipIf(not torch.cuda.is_available(), 'CUDA not available')
def test_cuda_bad_call(self):
# Initialize CUDA
t = torch.zeros(5, 5).cuda().cpu()
inq = mp.Queue()
outq = mp.Queue()
p = mp.Process(target=queue_get_exception, args=(inq, outq))
p.start()
inq.put(t)
p.join()
self.assertIsInstance(outq.get(), RuntimeError)
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
def test_event(self):
ctx = mp.get_context('spawn')
queue = ctx.Queue()
ready = ctx.Event()
done = ctx.Event()
p = ctx.Process(target=cuda_multiply_two, args=(queue, ready, done))
p.start()
ready.wait()
with torch.cuda.stream(torch.cuda.Stream()):
tensor = torch.cuda.FloatTensor([1, 1, 1, 1])
# Use a sleep kernel to test events. Without the event, the
# multiply happens before the add.
event = torch.cuda.Event(interprocess=True)
torch.cuda._sleep(20000000) # about 30 ms
tensor.add_(1)
event.record()
queue.put((event, tensor))
done.wait() # must wait until subprocess records event
event.synchronize()
self.assertEqual(list(tensor), [4, 4, 4, 4])
p.join()
def _test_autograd_sharing(self, var):
ready = mp.Event()
master_modified = mp.Event()
queue = mp.Queue()
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
p.daemon = True
p.start()
var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
queue.put(var)
ready.wait()
var.data[0, 0] = 1000
var.grad.data[:] = torch.ones(5, 5) * 4
master_modified.set()
worker_ok = queue.get()
self.assertTrue(worker_ok)
self.assertEqual(var.data, torch.ones(5, 5))
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
p.join(1)
self.assertFalse(p.is_alive())
def test_variable_sharing(self):
configs = [
(True, False),
(False, False),
(False, True),
]
for requires_grad, volatile in configs:
var = Variable(torch.range(1, 25).view(5, 5),
requires_grad=requires_grad,
volatile=volatile)
self._test_autograd_sharing(var)
def test_parameter_sharing(self):
param = Parameter(torch.range(1, 25).view(5, 5))
self._test_autograd_sharing(param)
def _test_is_shared(self):
t = torch.randn(5, 5)
self.assertFalse(t.is_shared())
t.share_memory_()
self.assertTrue(t.is_shared())
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
def test_is_shared(self):
self._test_is_shared()
def test_fs_is_shared(self):
with fs_sharing():
self._test_is_shared()
@unittest.skipIf(not torch.cuda.is_available(), 'CUDA not available')
def test_is_shared_cuda(self):
t = torch.randn(5, 5).cuda()
self.assertTrue(t.is_shared())
if __name__ == '__main__':
unittest.main()
run_tests()

88
test/test_nccl.py Normal file
View File

@ -0,0 +1,88 @@
import unittest
import torch
import torch.cuda.nccl as nccl
import torch.cuda
from common import TestCase, run_tests
nGPUs = torch.cuda.device_count()
if nGPUs == 0:
print('CUDA not available, skipping tests')
TestCase = object # noqa: F811
class TestNCCL(TestCase):
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_broadcast(self):
expected = torch.FloatTensor(128).uniform_()
tensors = [expected.cuda()]
for device in range(1, torch.cuda.device_count()):
with torch.cuda.device(device):
tensors.append(torch.cuda.FloatTensor(128))
nccl.broadcast(tensors)
for i in range(torch.cuda.device_count()):
self.assertEqual(tensors[i], expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_reduce(self):
tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(128).zero_()
for t in tensors:
expected.add_(t)
tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
nccl.reduce(tensors)
self.assertEqual(tensors[0], expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_all_reduce(self):
tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(128).zero_()
for t in tensors:
expected.add_(t)
tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
nccl.all_reduce(tensors)
for tensor in tensors:
self.assertEqual(tensor, expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_all_gather(self):
inputs = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.cat(inputs, 0)
inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i)
for i in range(nGPUs)]
nccl.all_gather(inputs, outputs)
for tensor in outputs:
self.assertEqual(tensor, expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_reduce_scatter(self):
in_size = 32 * nGPUs
out_size = 32
inputs = [torch.FloatTensor(in_size).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(in_size).zero_()
for t in inputs:
expected.add_(t)
expected = expected.view(nGPUs, 32)
inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
outputs = [torch.cuda.FloatTensor(out_size, device=i)
for i in range(nGPUs)]
nccl.reduce_scatter(inputs, outputs)
for i in range(nGPUs):
self.assertEqual(outputs[i], expected[i])
if __name__ == '__main__':
run_tests()

File diff suppressed because it is too large Load Diff

347
test/test_optim.py Normal file
View File

@ -0,0 +1,347 @@
import unittest
import functools
from copy import deepcopy
import torch
import torch.optim as optim
import torch.legacy.optim as old_optim
from torch.autograd import Variable
from common import TestCase, run_tests
def rosenbrock(tensor):
x, y = tensor
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
def drosenbrock(tensor):
x, y = tensor
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
def wrap_old_fn(old_fn, **config):
def wrapper(closure, params, state):
return old_fn(closure, params, config, state)
return wrapper
class TestOptim(TestCase):
def _test_rosenbrock(self, constructor, old_fn):
params_t = torch.Tensor([1.5, 1.5])
state = {}
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
optimizer = constructor([params])
solution = torch.Tensor([1, 1])
initial_dist = params.data.dist(solution)
def eval():
optimizer.zero_grad()
loss = rosenbrock(params)
loss.backward()
# loss.backward() will give **slightly** different
# gradients, than drosenbtock, because of a different ordering
# of floating point operations. In most cases it doesn't matter,
# but some optimizers are so sensitive that they can temporarily
# diverge up to 1e-4, just to converge again. This makes the
# comparison more stable.
params.grad.data.copy_(drosenbrock(params.data))
return loss
for i in range(2000):
optimizer.step(eval)
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
params_t, state)
self.assertEqual(params.data, params_t)
self.assertLessEqual(params.data.dist(solution), initial_dist)
def _test_basic_cases_template(self, weight, bias, input, constructor):
weight = Variable(weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
input = Variable(input)
optimizer = constructor(weight, bias)
def fn():
optimizer.zero_grad()
y = weight.mv(input)
if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
y = y.cuda(bias.get_device())
loss = (y + bias).pow(2).sum()
loss.backward()
return loss
initial_value = fn().data[0]
for i in range(200):
optimizer.step(fn)
self.assertLess(fn().data[0], initial_value)
def _test_state_dict(self, weight, bias, input, constructor):
weight = Variable(weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
input = Variable(input)
def fn_base(optimizer, weight, bias):
optimizer.zero_grad()
loss = (weight.mv(input) + bias).pow(2).sum()
loss.backward()
return loss
optimizer = constructor(weight, bias)
fn = functools.partial(fn_base, optimizer, weight, bias)
# Prime the optimizer
for i in range(20):
optimizer.step(fn)
# Clone the weights and construct new optimizer for them
weight_c = Variable(weight.data.clone(), requires_grad=True)
bias_c = Variable(bias.data.clone(), requires_grad=True)
optimizer_c = constructor(weight_c, bias_c)
fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)
# Load state dict
state_dict = deepcopy(optimizer.state_dict())
state_dict_c = deepcopy(optimizer.state_dict())
optimizer_c.load_state_dict(state_dict_c)
# Run both optimizations in parallel
for i in range(20):
optimizer.step(fn)
optimizer_c.step(fn_c)
self.assertEqual(weight, weight_c)
self.assertEqual(bias, bias_c)
# Make sure state dict wasn't modified
self.assertEqual(state_dict, state_dict_c)
def _test_basic_cases(self, constructor, ignore_multidevice=False):
self._test_state_dict(
torch.randn(10, 5),
torch.randn(10),
torch.randn(5),
constructor
)
self._test_basic_cases_template(
torch.randn(10, 5),
torch.randn(10),
torch.randn(5),
constructor
)
# non-contiguous parameters
self._test_basic_cases_template(
torch.randn(10, 5, 2)[..., 0],
torch.randn(10, 2)[..., 0],
torch.randn(5),
constructor
)
# CUDA
if not torch.cuda.is_available():
return
self._test_basic_cases_template(
torch.randn(10, 5).cuda(),
torch.randn(10).cuda(),
torch.randn(5).cuda(),
constructor
)
# Multi-GPU
if not torch.cuda.device_count() > 1 or ignore_multidevice:
return
self._test_basic_cases_template(
torch.randn(10, 5).cuda(0),
torch.randn(10).cuda(1),
torch.randn(5).cuda(0),
constructor
)
def _build_params_dict(self, weight, bias, **kwargs):
return [dict(params=[weight]), dict(params=[bias], **kwargs)]
def test_sgd(self):
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3),
wrap_old_fn(old_optim.sgd, learningRate=1e-3)
)
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
dampening=0, weight_decay=1e-4),
wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
dampening=0, weightDecay=1e-4)
)
self._test_basic_cases(
lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.SGD(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
def test_adam(self):
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.Adam(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def test_adagrad(self):
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, lr_decay=1e-3),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, learningRateDecay=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-1)
)
def test_adamax(self):
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1),
wrap_old_fn(old_optim.adamax, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, betas=(0.95, 0.998)),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, beta1=0.95, beta2=0.998)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-1)
)
def test_rmsprop(self):
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-3),
lr=1e-2)
)
def test_asgd(self):
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3),
wrap_old_fn(old_optim.asgd, eta0=1e-3)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)
)
self._test_basic_cases(
lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
)
self._test_basic_cases(
lambda weight, bias: optim.ASGD(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3, t0=100)
)
def test_rprop(self):
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3),
wrap_old_fn(old_optim.rprop, stepsize=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, etas=(0.6, 1.1)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, etaminus=0.6, etaplus=1.1)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, step_sizes=(1e-4, 3)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, stepsizemin=1e-4, stepsizemax=3)
)
self._test_basic_cases(
lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.Rprop(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
def test_lbfgs(self):
self._test_rosenbrock(
lambda params: optim.LBFGS(params),
wrap_old_fn(old_optim.lbfgs)
)
self._test_rosenbrock(
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
)
self._test_basic_cases(
lambda weight, bias: optim.LBFGS([weight, bias]),
ignore_multidevice=True
)
def test_invalid_param_type(self):
with self.assertRaises(TypeError):
optim.SGD(Variable(torch.randn(5, 5)), lr=3)
if __name__ == '__main__':
run_tests()

372
test/test_sparse.py Normal file
View File

@ -0,0 +1,372 @@
import torch
from torch import sparse
import itertools
import random
import unittest
from common import TestCase, run_tests
from numbers import Number
SparseTensor = sparse.DoubleTensor
class TestSparse(TestCase):
@staticmethod
def _gen_sparse(d, nnz, with_size):
if isinstance(with_size, Number):
v = torch.randn(nnz)
i = (torch.rand(d, nnz) * with_size).type(torch.LongTensor)
x = SparseTensor(i, v)
else:
v_size = [nnz] + list(with_size[d:])
v = torch.randn(*v_size)
i = torch.rand(d, nnz) * \
torch.Tensor(with_size[:d]).repeat(nnz, 1).transpose(0, 1)
i = i.type(torch.LongTensor)
x = SparseTensor(i, v, torch.Size(with_size))
return x, i, v
def test_basic(self):
x, i, v = self._gen_sparse(3, 10, 100)
self.assertEqual(i, x.indices())
self.assertEqual(v, x.values())
x, i, v = self._gen_sparse(3, 10, [100, 100, 100])
self.assertEqual(i, x.indices())
self.assertEqual(v, x.values())
self.assertEqual(x.ndimension(), 3)
self.assertEqual(x.nnz(), 10)
for i in range(3):
self.assertEqual(x.size(i), 100)
# Make sure we can access empty indices / values
x = SparseTensor()
self.assertEqual(x.indices().numel(), 0)
self.assertEqual(x.values().numel(), 0)
def test_to_dense(self):
i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
v = torch.Tensor([2, 1, 3, 4])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
res = torch.Tensor([
[[2, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
[[1, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
[[0, 3, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 4]],
])
x.to_dense() # Tests double to_dense for memory corruption
x.to_dense()
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_to_dense_hybrid(self):
i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
])
v = torch.Tensor([[2, 3], [1, 2], [3, 4], [4, 5]])
x = SparseTensor(i, v, torch.Size([3, 4, 2]))
res = torch.Tensor([
[[2, 3],
[0, 0],
[0, 0],
[0, 0]],
[[1, 2],
[0, 0],
[0, 0],
[0, 0]],
[[3, 4],
[0, 0],
[0, 0],
[4, 5]],
])
x.to_dense() # Tests double to_dense for memory corruption
x.to_dense()
x.to_dense()
self.assertEqual(res, x.to_dense())
def test_contig(self):
i = torch.LongTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
])
v = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
x = SparseTensor(i, v, torch.Size([100, 100]))
exp_i = torch.LongTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
])
exp_v = torch.Tensor([2, 1, 6, 4, 10, 3, 5, 9, 8, 7])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
i = torch.LongTensor([
[2, 0, 2, 1],
[0, 0, 3, 0],
[1, 0, 4, 0],
])
v = torch.Tensor([3, 2, 4, 1])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
exp_i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
exp_v = torch.Tensor([2, 1, 3, 4])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
# Duplicate indices
i = torch.LongTensor([
[0, 0, 2, 0],
[0, 0, 3, 0],
[0, 0, 4, 0],
])
v = torch.Tensor([3, 2, 4, 1])
x = SparseTensor(i, v, torch.Size([3, 4, 5]))
exp_i = torch.LongTensor([
[0, 2],
[0, 3],
[0, 4],
])
exp_v = torch.Tensor([6, 4])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_contig_hybrid(self):
i = torch.LongTensor([
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
])
v = torch.Tensor([
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
])
x = SparseTensor(i, v, torch.Size([100, 100, 2]))
exp_i = torch.LongTensor([
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
])
exp_v = torch.Tensor([
[2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
[3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
i = torch.LongTensor([
[2, 0, 2, 1],
[0, 0, 3, 0],
[1, 0, 4, 0],
])
v = torch.Tensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = torch.LongTensor([
[0, 1, 2, 2],
[0, 0, 0, 3],
[0, 0, 1, 4],
])
exp_v = torch.Tensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
# Duplicate indices
i = torch.LongTensor([
[0, 0, 2, 0],
[0, 0, 3, 0],
[0, 0, 4, 0],
])
v = torch.Tensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]])
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
exp_i = torch.LongTensor([
[0, 2],
[0, 3],
[0, 4],
])
exp_v = torch.Tensor([[6, 4, 5], [4, 3, 4]])
x.contiguous()
self.assertEqual(exp_i, x.indices())
self.assertEqual(exp_v, x.values())
def test_transpose(self):
x = self._gen_sparse(4, 20, 5)[0]
y = x.to_dense()
for i, j in itertools.combinations(range(4), 2):
x = x.transpose_(i, j)
y = y.transpose(i, j)
self.assertEqual(x.to_dense(), y)
x = x.transpose(i, j)
y = y.transpose(i, j)
self.assertEqual(x.to_dense(), y)
def test_mm(self):
def test_shape(di, dj, dk):
x, _, _ = self._gen_sparse(2, 20, [di, dj])
t = torch.randn(di, dk)
y = torch.randn(dj, dk)
alpha = random.random()
beta = random.random()
expected = torch.addmm(alpha, t, beta, x.to_dense(), y)
res = torch.addmm(alpha, t, beta, x, y)
self.assertEqual(res, expected)
expected = torch.addmm(t, x.to_dense(), y)
res = torch.addmm(t, x, y)
self.assertEqual(res, expected)
expected = torch.mm(x.to_dense(), y)
res = torch.mm(x, y)
self.assertEqual(res, expected)
test_shape(10, 100, 100)
test_shape(100, 1000, 200)
test_shape(64, 10000, 300)
def test_saddmm(self):
def test_shape(di, dj, dk):
x = self._gen_sparse(2, 20, [di, dj])[0]
t = self._gen_sparse(2, 20, [di, dk])[0]
y = torch.randn(dj, dk)
alpha = random.random()
beta = random.random()
expected = torch.addmm(alpha, t.to_dense(), beta, x.to_dense(), y)
res = torch.saddmm(alpha, t, beta, x, y)
self.assertEqual(res.to_dense(), expected)
expected = torch.addmm(t.to_dense(), x.to_dense(), y)
res = torch.saddmm(t, x, y)
self.assertEqual(res.to_dense(), expected)
expected = torch.mm(x.to_dense(), y)
res = torch.smm(x, y)
self.assertEqual(res.to_dense(), expected)
test_shape(7, 5, 3)
test_shape(1000, 100, 100)
test_shape(3000, 64, 300)
def _test_spadd_shape(self, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x, _, _ = self._gen_sparse(len(shape_i), 10, shape)
y = torch.randn(*shape)
r = random.random()
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
self.assertEqual(res, expected)
# Non contiguous dense tensor
s = list(shape)
s[0] = shape[-1]
s[-1] = shape[0]
y = torch.randn(*s).transpose_(0, len(s) - 1)
r = random.random()
expected = y + r * x.to_dense()
res = torch.add(y, r, x)
self.assertEqual(res, expected)
def test_spadd(self):
self._test_spadd_shape([5, 6])
self._test_spadd_shape([10, 10, 10])
self._test_spadd_shape([50, 30, 20])
self._test_spadd_shape([5, 5, 5, 5, 5, 5])
def test_spadd_hybrid(self):
self._test_spadd_shape([5, 6], [2, 3])
self._test_spadd_shape([10, 10, 10], [3])
self._test_spadd_shape([50, 30, 20], [2])
self._test_spadd_shape([5, 5, 5, 5, 5, 5], [2])
def _test_basic_ops_shape(self, shape_i, shape_v=None):
shape = shape_i + (shape_v or [])
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape)
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape)
y1 = x1 + x2
y2 = x1.clone()
y2.add_(x2)
expected = x1.to_dense() + x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 - x2
y2 = x1.clone()
y2.sub_(x2)
expected = x1.to_dense() - x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 * x2
y2 = x1.clone()
y2.mul_(x2)
expected = x1.to_dense() * x2.to_dense()
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 * 37.5
y2 = x1.clone()
y2.mul_(37.5)
expected = x1.to_dense() * 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y1 = x1 / 37.5
y2 = x1.clone()
y2.div_(37.5)
expected = x1.to_dense() / 37.5
self.assertEqual(y1.to_dense(), expected)
self.assertEqual(y2.to_dense(), expected)
y = x1.clone()
y.zero_()
expected = torch.zeros(x1.size())
self.assertEqual(y.to_dense(), expected)
def test_basic_ops(self):
self._test_basic_ops_shape([5, 6])
self._test_basic_ops_shape([10, 10, 10])
self._test_basic_ops_shape([50, 30, 20])
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5])
def test_basic_ops_hybrid(self):
self._test_basic_ops_shape([5, 6], [2, 3])
self._test_basic_ops_shape([10, 10, 10], [3])
self._test_basic_ops_shape([50, 30, 20], [2])
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5], [2])
if __name__ == '__main__':
run_tests()

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,35 @@
from __future__ import print_function
import sys
import os
import math
import shutil
import random
import tempfile
import unittest
import traceback
import torch
import torch.cuda
import warnings
from torch.autograd import Variable
from torch.utils.trainer import Trainer
from torch.utils.trainer.plugins import *
from torch.utils.trainer.plugins.plugin import Plugin
from torch.utils.data import *
from torch.utils.serialization import load_lua
HAS_CUDA = torch.cuda.is_available()
from common import TestCase, run_tests, download_file
try:
import cffi
from torch.utils.ffi import compile_extension
HAS_CFFI = True
except ImportError:
HAS_CFFI = False
from common import TestCase
class SimplePlugin(Plugin):
def __init__(self, interval):
super(SimplePlugin, self).__init__(interval)
self.trainer = None
@ -40,9 +59,10 @@ class SimplePlugin(Plugin):
class ModelMock(object):
def __init__(self):
self.num_calls = 0
self.output = Variable(torch.ones(1, 1))
self.output = Variable(torch.ones(1, 1), requires_grad=True)
def __call__(self, i):
self.num_calls += 1
@ -50,6 +70,7 @@ class ModelMock(object):
class CriterionMock(object):
def __init__(self):
self.num_calls = 0
@ -67,14 +88,17 @@ class OptimizerMock(object):
self.num_evals = 0
def step(self, closure):
for i in range(random.randint(1, self.max_evals)):
for i in range(random.randint(self.min_evals, self.max_evals)):
loss = closure()
self.num_evals += 1
loss.backward()
self.num_steps += 1
def zero_grad(self):
pass
class DatasetMock(object):
def __iter__(self):
for i in range(10):
yield torch.randn(2, 10), torch.randperm(10)[:2]
@ -100,8 +124,9 @@ class TestTrainer(TestCase):
]
def setUp(self):
self.trainer = Trainer(ModelMock(), CriterionMock(), OptimizerMock(),
DatasetMock())
self.optimizer = OptimizerMock()
self.trainer = Trainer(ModelMock(), CriterionMock(),
self.optimizer, DatasetMock())
self.num_epochs = 3
self.dataset_size = len(self.trainer.dataset)
self.num_iters = self.num_epochs * self.dataset_size
@ -156,92 +181,170 @@ class TestTrainer(TestCase):
def test_model_gradient(self):
self.trainer.run(epochs=self.num_epochs)
output_var = self.trainer.model.output
expected_grad = torch.ones(1, 1) * 2 * self.num_iters
self.assertEqual(output_var.grad, expected_grad)
expected_grad = torch.ones(1, 1) * 2 * self.optimizer.num_evals
self.assertEqual(output_var.grad.data, expected_grad)
class TestTensorDataSource(TestCase):
def test_len(self):
source = TensorDataSource(torch.randn(15, 10, 2, 3, 4, 5), torch.randperm(15))
self.assertEqual(len(source), 15)
def test_getitem(self):
t = torch.randn(15, 10, 2, 3, 4, 5)
l = torch.randn(15, 10)
source = TensorDataSource(t, l)
for i in range(15):
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
def test_getitem_1d(self):
t = torch.randn(15)
l = torch.randn(15)
source = TensorDataSource(t, l)
for i in range(15):
self.assertEqual(t[i:i+1], source[i][0])
self.assertEqual(l[i:i+1], source[i][1])
test_dir = os.path.abspath(os.path.dirname(str(__file__)))
class TestDataset(TestCase):
class TestFFI(TestCase):
def setUp(self):
self.data = torch.randn(10, 2, 3, 5)
self.labels = torch.randperm(5).repeatTensor(2)
self.datasource = TensorDataSource(self.data, self.labels)
self.tmpdir = tempfile.mkdtemp()
os.chdir(self.tmpdir)
sys.path.append(self.tmpdir)
def _test_sequential(self, dataset):
batch_size = dataset.batch_size
for i, (sample, target) in enumerate(dataset):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx+batch_size])
self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1))
self.assertEqual(i, math.floor((len(self.datasource)-1) / batch_size))
def tearDown(self):
shutil.rmtree(self.tmpdir)
def _test_shuffle(self, dataset):
batch_size = dataset.batch_size
found_data = {i: 0 for i in range(self.data.size(0))}
found_labels = {i: 0 for i in range(self.labels.size(0))}
for i, (batch_samples, batch_targets) in enumerate(dataset):
for sample, target in zip(batch_samples, batch_targets):
for data_point_idx, data_point in enumerate(self.data):
if data_point.eq(sample).all():
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i+1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i+1) * batch_size)
self.assertEqual(i, math.floor((len(self.datasource)-1) / batch_size))
@unittest.skipIf(not HAS_CFFI, "ffi tests require cffi package")
def test_cpu(self):
compile_extension(
name='test_extensions.cpulib',
header=test_dir + '/ffi/src/cpu/lib.h',
sources=[
test_dir + '/ffi/src/cpu/lib1.c',
test_dir + '/ffi/src/cpu/lib2.c',
],
verbose=False,
)
from test_extensions import cpulib
tensor = torch.ones(2, 2).float()
def test_seqential(self):
self._test_sequential(Dataset(self.datasource))
cpulib.good_func(tensor, 2, 1.5)
self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5)
def test_seqential_batch(self):
self._test_sequential(Dataset(self.datasource, batch_size=2))
new_tensor = cpulib.new_tensor(4)
self.assertEqual(new_tensor, torch.ones(4, 4) * 4)
def test_shuffle(self):
self._test_shuffle(Dataset(self.datasource, shuffle=True))
f = cpulib.int_to_float(5)
self.assertIs(type(f), float)
def test_shuffle_batch(self):
self._test_shuffle(Dataset(self.datasource, batch_size=2, shuffle=True))
self.assertRaises(TypeError,
lambda: cpulib.good_func(tensor.double(), 2, 1.5))
self.assertRaises(torch.FatalError,
lambda: cpulib.bad_func(tensor, 2, 1.5))
def test_types(self):
dataset = Dataset(self.datasource, batch_size=2)
for samples, targets in dataset:
self.assertIs(type(samples), torch.DoubleTensor)
self.assertIs(type(targets), torch.DoubleTensor)
dataset.input_type(torch.FloatTensor)
for samples, targets in dataset:
self.assertIs(type(samples), torch.FloatTensor)
self.assertIs(type(targets), torch.DoubleTensor)
dataset.target_type(torch.IntTensor)
for samples, targets in dataset:
self.assertIs(type(samples), torch.FloatTensor)
self.assertIs(type(targets), torch.IntTensor)
@unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
def test_gpu(self):
compile_extension(
name='gpulib',
header=test_dir + '/ffi/src/cuda/cudalib.h',
sources=[
test_dir + '/ffi/src/cuda/cudalib.c',
],
with_cuda=True,
verbose=False,
)
import gpulib
tensor = torch.ones(2, 2).float()
gpulib.good_func(tensor, 2, 1.5)
self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5)
ctensor = tensor.cuda().fill_(1)
gpulib.cuda_func(ctensor, 2, 1.5)
self.assertEqual(ctensor, torch.ones(2, 2) * 2 + 1.5)
self.assertRaises(TypeError,
lambda: gpulib.cuda_func(tensor, 2, 1.5))
self.assertRaises(TypeError,
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
class TestLuaReader(TestCase):
@staticmethod
def _module_test(name, test):
def do_test(self):
module = test['module']
input = test['input']
grad_output = test['grad_output']
if hasattr(self, '_transform_' + name):
input = getattr(self, '_transform_' + name)(input)
output = module.forward(input)
module.zeroGradParameters()
grad_input = module.backward(input, grad_output)
self.assertEqual(output, test['output'])
self.assertEqual(grad_input, test['grad_input'])
if module.parameters() is not None:
params, d_params = module.parameters()
self.assertEqual(params, test['params'])
self.assertEqual(d_params, test['d_params'])
else:
self.assertFalse('params' in test and test['params'])
self.assertFalse('params' in test and test['d_params'])
return do_test
@staticmethod
def _criterion_test(name, test):
def do_test(self):
module = test['module']
input = test['input']
if name == 'L1Cost':
target = None
else:
target = test['target']
if hasattr(self, '_transform_' + name):
input, target = getattr(self, '_transform_' + name)(input, target)
output = module.forward(input, target)
grad_input = module.backward(input, target)
self.assertEqual(output, test['loss'])
self.assertEqual(grad_input, test['grad_input'])
return do_test
@classmethod
def init(cls):
DATA_URL = 'https://download.pytorch.org/test_data/legacy_modules.t7'
data_dir = os.path.join(os.path.dirname(__file__), 'data')
test_file_path = os.path.join(data_dir, 'legacy_modules.t7')
succ = download_file(DATA_URL, test_file_path)
if not succ:
warnings.warn(("Couldn't download the test file for TestLuaReader! "
"Tests will be incomplete!"), RuntimeWarning)
return
tests = load_lua(test_file_path)
for name, test in tests['modules'].items():
test_name = 'test_' + name.replace('nn.', '')
setattr(cls, test_name, cls._module_test(name, test))
for name, test in tests['criterions'].items():
test_name = 'test_' + name.replace('nn.', '')
setattr(cls, test_name, cls._criterion_test(name, test))
def _transform_Index(self, input):
return [input[0], input[1].sub(1)]
def _transform_LookupTable(self, input):
return input.sub(1)
def _transform_MultiLabelMarginCriterion(self, input, target):
return input, target.sub(1)
def _transform_ClassNLLCriterion(self, input, target):
return input, target.sub(1)
def _transform_SpatialClassNLLCriterion(self, input, target):
return input, target.sub(1)
def _transform_ClassSimplexCriterion(self, input, target):
return input, target.sub(1)
def _transform_CrossEntropyCriterion(self, input, target):
return input, target.sub(1)
def _transform_ParallelCriterion(self, input, target):
return input, [target[0].sub(1), target[1]]
def _transform_MultiCriterion(self, input, target):
return input, target.sub(1)
def _transform_MultiMarginCriterion(self, input, target):
return input, target.sub(1)
TestLuaReader.init()
if __name__ == '__main__':
unittest.main()
run_tests()

View File

@ -2,45 +2,38 @@ import os
import yaml
from string import Template
from copy import deepcopy
from .plugins import ArgcountChecker, OptionalArguments, ArgumentReferences, BeforeCall, ConstantArguments, ReturnArguments
from .plugins import ArgcountChecker, OptionalArguments, ArgumentReferences, \
BeforeAfterCall, ConstantArguments, ReturnArguments, GILRelease
class cwrap(object):
BASE_INDENT_SIZE = 6
RETURN_WRAPPERS = {
'void': Template('$call;\n Py_RETURN_NONE;'),
'long': Template('return PyLong_FromLong($call);'),
'bool': Template('return PyBool_FromLong($call);'),
}
TYPE_CHECK = {
'void*': Template('PyLong_Check($arg)'),
'bool': Template('PyLong_Check($arg)'),
'float': Template('PyFloat_Check($arg)'),
'double': Template('PyFloat_Check($arg)'),
# TODO: this will only work for python3
'int': Template('PyLong_Check($arg)'),
'long': Template('PyLong_Check($arg)'),
}
TYPE_UNPACK = {
'void*': Template('PyLong_AsVoidPtr($arg)'),
'bool': Template('PyLong_AsLong($arg)'),
'float': Template('(float)PyFloat_AsDouble($arg)'),
'double': Template('PyFloat_AsDouble($arg)'),
# TODO: this will only work for python3
'int': Template('PyLong_AsLong($arg)'),
'long': Template('PyLong_AsLong($arg)'),
'void': Template('Py_RETURN_NONE;'),
'long': Template('return PyLong_FromLong($result);'),
'bool': Template('return PyBool_FromLong($result);'),
'void*': Template('return PyLong_FromVoidPtr($result);'),
}
OPTION_TEMPLATE = Template("""
${els}if ($arg_check) {
$call
$pre_arg_assign
$arg_assign
$code
""")
CALL_TEMPLATE = Template("$cname($arg_unpack)")
ARG_ASSIGN_TEMPLATE = Template("""${type} ${name} = ${unpack};""")
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeCall, ReturnArguments]
OPTION_CODE_TEMPLATE = [
'$call',
'$return_result',
]
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($call_arg);")
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments,
ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
def __init__(self, source, destination=None, plugins=[], default_plugins=True):
if destination is None:
@ -54,6 +47,7 @@ class cwrap(object):
for plugin in self.plugins:
plugin.initialize(self)
self.base_path = os.path.dirname(os.path.abspath(source))
with open(source, 'r') as f:
declarations = f.read()
@ -69,8 +63,10 @@ class cwrap(object):
declaration_lines = []
output = []
in_declaration = False
i = 0
for line in lines:
while i < len(lines):
line = lines[i]
if line == '[[':
declaration_lines = []
in_declaration = True
@ -93,18 +89,25 @@ class cwrap(object):
output.append(wrapper)
elif in_declaration:
declaration_lines.append(line)
elif '!!inc ' == line[:6]:
fname = os.path.join(self.base_path, line[6:].strip())
with open(fname, 'r') as f:
included = f.read().split('\n')
# insert it into lines at position i+1
lines[i + 1:i + 1] = included
else:
output.append(line)
i += 1
return '\n'.join(output)
def set_declaration_defaults(self, declaration):
declaration.setdefault('arguments', [])
declaration.setdefault('return', 'void')
if not 'cname' in declaration:
if 'cname' not in declaration:
declaration['cname'] = declaration['name']
# Simulate multiple dispatch, even if it's not necessary
if not 'options' in declaration:
if 'options' not in declaration:
declaration['options'] = [{'arguments': declaration['arguments']}]
del declaration['arguments']
# Parse arguments (some of them can be strings)
@ -140,19 +143,28 @@ class cwrap(object):
return fallback(*args)
def get_type_check(self, arg, option):
return self.search_plugins('get_type_check', (arg, option), lambda arg,_: self.TYPE_CHECK[arg['type']])
return self.search_plugins('get_type_check', (arg, option), lambda arg, _: None)
def get_type_unpack(self, arg, option):
return self.search_plugins('get_type_unpack', (arg, option), lambda arg,_: self.TYPE_UNPACK[arg['type']])
return self.search_plugins('get_type_unpack', (arg, option), lambda arg, _: None)
def get_return_wrapper(self, option):
return self.search_plugins('get_return_wrapper', (option,), lambda t: self.RETURN_WRAPPERS[option['return']])
return self.search_plugins('get_return_wrapper', (option,), lambda _: self.RETURN_WRAPPERS[option['return']])
def get_wrapper_template(self, declaration):
return self.search_plugins('get_wrapper_template', (declaration,), lambda _: None)
def get_assign_args(self, arguments):
return self.search_plugins('get_assign_args', (arguments,), lambda _: arguments)
def get_arg_accessor(self, arg, option):
return self.search_plugins('get_arg_accessor', (arg, option), lambda arg,_: 'PyTuple_GET_ITEM(args, {})'.format(arg['idx']))
def wrap_accessor(arg, _):
if arg.get('idx') is None:
raise RuntimeError("Missing accessor for '{} {}'".format(
arg['type'], arg['name']))
return 'PyTuple_GET_ITEM(args, {})'.format(arg['idx'])
return self.search_plugins('get_arg_accessor', (arg, option), wrap_accessor)
def generate_wrapper(self, declaration):
wrapper = ''
@ -167,45 +179,109 @@ class cwrap(object):
result = []
for arg in arguments:
accessor = self.get_arg_accessor(arg, option)
res = getattr(self, base_fn_name)(arg, option).substitute(arg=accessor)
tmpl = getattr(self, base_fn_name)(arg, option)
if tmpl is None:
fn = 'check' if base_fn_name == 'get_type_check' else 'unpack'
raise RuntimeError("Missing type {} for '{} {}'".format(
fn, arg['type'], arg['name']))
res = tmpl.substitute(arg=accessor, idx=arg.get('idx'))
for plugin in self.plugins:
res = getattr(plugin, plugin_fn_name)(res, arg, accessor)
result.append(res)
return result
def build_option_args(self, arguments, arg_unpack):
assignement = []
call_arg = []
# If types or names needs to be changed
arguments = self.get_assign_args(arguments)
for arg, unpack in zip(arguments, arg_unpack):
if arg['type'] == 'CONSTANT':
call_arg.append(str(arg['name']))
else:
var_name = "arg_" + str(arg.get('assign_name', arg['name']))
res = self.ARG_ASSIGN_TEMPLATE.substitute(
type=arg['type'],
name=var_name,
unpack=unpack)
if var_name not in call_arg:
assignement.append(res)
call_arg.append(var_name)
return assignement, call_arg
def indent_code(self, code):
if code == '':
return code
code_lines = map(lambda s: s.strip(), code.split('\n'))
code = '\n'
depth = self.BASE_INDENT_SIZE
for line in code_lines:
depth -= line.count('}') * 2
code += ' ' * depth + line + '\n'
depth += line.count('{') * 2
depth += line.count('(') * 4
depth -= line.count(')') * 4
return code[:-1]
def generate_option(self, option, is_first):
checked_args = list(filter(
lambda arg: not 'ignore_check' in arg or not arg['ignore_check'],
lambda arg: 'ignore_check' not in arg or not arg['ignore_check'],
option['arguments']))
option['num_checked_args'] = len(checked_args)
for i, arg in enumerate(checked_args):
idx_args = list(filter(
lambda arg: not arg.get('ignore_check') and not arg.get('no_idx'),
option['arguments']))
for i, arg in enumerate(idx_args):
arg['idx'] = i
# Generate checks
arg_checks = self.map_selected_arguments('get_type_check',
'process_single_check', option, checked_args)
'process_single_check', option, checked_args)
arg_checks = ' &&\n '.join(arg_checks)
for plugin in self.plugins:
arg_checks = plugin.process_all_checks(arg_checks, option)
# Generate unpacks
arg_unpack = self.map_selected_arguments('get_type_unpack',
'process_single_unpack', option, option['arguments'])
arg_unpack = ', '.join(arg_unpack)
# Generate pre_arg assign
pre_arg_assign = []
for plugin in self.plugins:
arg_unpack = plugin.process_all_unpacks(arg_unpack, option)
pre_arg_assign = plugin.process_pre_arg_assign(pre_arg_assign, option)
# Generate arg assignment and call arguments
arg_unpack = self.map_selected_arguments('get_type_unpack',
'process_single_unpack', option, option['arguments'])
arg_assign, call_arg = self.build_option_args(option['arguments'], arg_unpack)
call_arg = ', '.join(call_arg)
for plugin in self.plugins:
call_arg = plugin.process_all_call_arg(call_arg, option)
# Generate call
raw_call = self.CALL_TEMPLATE.substitute(cname=option['cname'], arg_unpack=arg_unpack)
call = self.get_return_wrapper(option).substitute(call=raw_call)
try:
return_result = self.get_return_wrapper(option).substitute()
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result='',
cname=option['cname'], call_arg=call_arg)
except KeyError:
return_result = self.get_return_wrapper(option).substitute(result='__result')
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result=(option['return'] + ' __result = '),
cname=option['cname'], call_arg=call_arg)
code_template = deepcopy(self.OPTION_CODE_TEMPLATE)
for plugin in self.plugins:
call = plugin.process_call(call, option)
call = '\n '.join(map(lambda s: s.strip(), call.split('\n')))
code_template = plugin.process_option_code_template(code_template,
option)
code_template = Template('\n'.join(code_template))
code = code_template.substitute(call=call, return_result=return_result)
code = self.indent_code(code)
pre_arg_assign = self.indent_code('\n'.join(pre_arg_assign))
arg_assign = self.indent_code('\n'.join(arg_assign))
# Put everything together
return self.OPTION_TEMPLATE.substitute(
els=('} else ' if not is_first else ''),
arg_check=arg_checks,
call=call
pre_arg_assign=pre_arg_assign,
arg_assign=arg_assign,
code=code,
)

View File

@ -1,5 +1,6 @@
from . import CWrapPlugin
class ArgcountChecker(CWrapPlugin):
def process_all_checks(self, checks, option):
@ -7,6 +8,6 @@ class ArgcountChecker(CWrapPlugin):
checks = '__argcount == 0'
else:
indent = '\n '
checks = '__argcount == {} &&'.format(option['num_checked_args']) + \
indent + checks
argcount = option['num_checked_args'] + option.get('argcount_offset', 0)
checks = '__argcount == {} &&'.format(str(argcount)) + indent + checks
return checks

View File

@ -1,5 +1,6 @@
from . import CWrapPlugin
class ArgcountSortPlugin(CWrapPlugin):
def __init__(self, descending=True):
@ -11,4 +12,3 @@ class ArgcountSortPlugin(CWrapPlugin):
for declaration in declarations:
declaration['options'].sort(key=num_checked_args, reverse=self.descending)
return declarations

View File

@ -1,6 +1,7 @@
from . import CWrapPlugin
from string import Template
class ArgumentReferences(CWrapPlugin):
def initialize(self, cwrap):

Some files were not shown because too many files have changed in this diff Show More