Compare commits

..

382 Commits

Author SHA1 Message Date
509dd57c2e tensor docs 2016-11-18 04:00:27 -05:00
7a837b7a14 fixing nn docs to be categorized, and optim docs 2016-11-18 03:18:48 -05:00
dee864116a optim docs 2016-11-17 21:09:17 -05:00
e51d0bef97 Add cuDNN bindings for 2D transposed convolution 2016-11-17 14:34:40 -08:00
2fd78112ab Add half copy/conversions 2016-11-17 14:34:33 -08:00
26d626a47c adding docs for loss functions, container, module and fix typos 2016-11-17 15:11:27 -05:00
071e68d99d fixing output size w / h order 2016-11-16 15:32:18 -08:00
78c1094d93 Don't override __call__ in modules 2016-11-16 15:32:18 -08:00
56fc639c9f Fix no bias mode of autogenerated THNN function 2016-11-16 15:32:18 -08:00
f8ae5c93e9 enables random functions for float and half types on cuda (#223) 2016-11-16 15:14:26 -08:00
56dd4132c4 add MACOSX_DEPLOYMENT_TARGET to instructions 2016-11-16 10:45:56 -05:00
ae6f2dd11c Adapt nn code to changes in THNN and THCUNN 2016-11-15 23:02:14 +01:00
456998f043 Merge commit 'aeed8a6ea4650d1092289a60e71d8d83875a0ba6' 2016-11-15 12:55:11 -08:00
c09f07edd9 Merge commit 'c82537462baa715b2c70726f7da8f734b2ad3a3f' 2016-11-15 12:53:29 -08:00
aeed8a6ea4 Remove duplicate entries and add optional marks in THCUNN.h 2016-11-15 21:22:14 +01:00
c82537462b [cutorch] remove syncing point from baddbmm
This change removes HtoD copies inside baddbmm. These copies
introduce a syncing point which causes slow downs in a multi
gpu training.

Test plan: Run unittests for baddbmm.
2016-11-15 11:55:36 -08:00
72a9df19c8 Merge pull request #598 from killeent/rr2
move random functions to generic (attempt 2)
2016-11-14 11:44:41 -05:00
5b9b9634f9 [cutorch rand2gen] various fixes 2016-11-14 08:13:30 -08:00
c279a91c03 Merge commit '64c8a1377335799b322ca41d323dee13118be0ab' 2016-11-13 21:54:27 -08:00
ef6a764509 Merge commit '1cee5a359c2828800db0c41ebe0108bd5eef9501' 2016-11-13 15:23:11 -08:00
4db5afdf7e Merge commit 'f2daa616d105d700b63f05c4d544befb6e65a036' 2016-11-13 15:20:03 -08:00
7867187451 Merge commit '4f8e6ec42abd5b9b5491a49bdfe1a637e6675207' 2016-11-13 15:19:10 -08:00
4f8e6ec42a [PATCH] Improve potrf error message. (#189) 2016-11-13 15:17:05 -08:00
64c8a13773 Remove comment. 2016-11-11 15:46:44 -08:00
395ab4a287 Fix SpatialDilatedMaxPooling shape check.
In nn, indices are 3d, but they are 4d in cunn.
2016-11-11 15:43:54 -08:00
15dc862056 more improvements on error messages and shape checks. 2016-11-11 15:43:49 -08:00
f2daa616d1 Revert "Move random functions to generic" 2016-11-11 18:15:01 -05:00
64a50f5ad3 Merge pull request #589 from killeent/random-refactor
Move random functions to generic
2016-11-11 17:56:39 -05:00
1d0f86144c [cutorch rand2gen] fix illegal memory access in multinomial code, update unit tests 2016-11-11 13:23:03 -08:00
89e93bba9d [cutorch rand2gen] test fixes, add floor to geometric distribution transform 2016-11-11 13:23:02 -08:00
3290d4c7d6 [cutorch rand2gen] extend functions to use _double methods 2016-11-11 13:23:02 -08:00
ca22befc93 [cutorch rand2gen] move randn to generic 2016-11-11 13:23:02 -08:00
b08df5b9c0 [cutorch rand2gen] partial move of logNormal to generic, needs further debugging 2016-11-11 13:23:01 -08:00
ebd3c3291c [cutorch rand2gen] move geometric to generic 2016-11-11 13:23:01 -08:00
16728d2f26 [cutorch rand2gen] move multinomial to generic 2016-11-11 13:23:00 -08:00
34dab66f44 [cutorch rand2gen] move cauchy to generic 2016-11-11 13:22:59 -08:00
3a111c7499 [cutorch rand2gen] move exponential to generic 2016-11-11 13:22:59 -08:00
3600c94ec5 [cutorch rand2gen] move normal to generic 2016-11-11 13:22:58 -08:00
e2f8b00e00 [cutorch rand2gen] move bernoulli to generic 2016-11-11 13:22:58 -08:00
65ed1eba48 [cutorch rand2gen] move uniform, rand to generic 2016-11-11 13:22:57 -08:00
7fff7977fe [cutorch rand2gen] make sampleMultinomialWithRoutReplacement utility function generic 2016-11-11 13:22:57 -08:00
add5922aac [cutorch rand2gen] make sampleMultinomialWithReplacement utility function generic 2016-11-11 13:22:56 -08:00
a94b54a533 [cutorch rand2gen] make sampleMultinomialOnce utility function generic 2016-11-11 13:22:56 -08:00
bea82b9da6 [cutorch rand2gen] make renormRowsL1 utility function generic 2016-11-11 13:22:56 -08:00
2e7debe282 [cutorch rand2gen] introduce THCTensorRandom.cuh, move and templatize simple binary search function 2016-11-11 13:22:55 -08:00
1cee5a359c Fix checking and spacing of dilation parameters in SpatialDilatedConvolution
and SpatialDilatedMaxPooling.
2016-11-11 10:25:44 -08:00
b08862405e Remove extraneous shape check from SpatialDilatedConvolution. (#1029) 2016-11-11 12:53:48 -05:00
d57e1a6756 change to compile with msvc && export THCDescBuff for cunn 2016-11-11 13:56:13 +08:00
c9172c5bc9 change to work on windows && ptrdiff_t replacement 2016-11-11 13:33:36 +08:00
5d5e877a05 Fix implementation of logNormal 2016-11-10 18:35:45 -08:00
1e794c87ae adding bidirectional doc 2016-11-10 17:38:47 -08:00
d9cb1b545a Fix build on 32bit platform like JETSON TK1 2016-11-11 00:22:06 +00:00
23f611f14d Rename assertSameGPU_generic to assertSameGPU.
Also remove old assertSameGPU since there is no
longer both generic and non-generic support.
2016-11-10 15:40:41 -08:00
42b28d0d69 Merge pull request #370 from gchanan/sizeCheckErrorMessages
Improving error messages in nn.
2016-11-10 18:35:22 -05:00
d0cf5f7b65 Improving error messages in nn.
Differences from nn equivalent:
1) No changes to VolumetricConvolutionMM, which doesn't exist in cunn.
2) No changes to HardShrink, which doesn't  exist in cunn.
3) LookupTable doesn't verify that all inputs are within range.
2016-11-10 15:12:35 -08:00
4699c817e8 [cutorch rand2gen] fix illegal memory access in multinomial code, update unit tests 2016-11-10 15:10:12 -08:00
4f490c16e9 [cutorch rand2gen] test fixes, add floor to geometric distribution transform 2016-11-10 13:44:55 -08:00
bcdab7a632 Remove mul/div from THCHalfAutoNumerics as they've been moved to
THCNumerics.
2016-11-10 12:13:41 -08:00
7f51af7cbc adding dropout, bidirection, etc. to RNN (#214) 2016-11-10 13:25:14 -05:00
b4ae60cac8 Protect half operations with CUDA_HALF_TENSOR with generic modules. 2016-11-10 08:59:23 -08:00
a39ffebc3a Add THCTensor_(sizeDesc) for better debug messages. 2016-11-09 12:09:18 -08:00
4bba6082ed [cutorch rand2gen] extend functions to use _double methods 2016-11-09 11:55:51 -08:00
b111632965 [cutorch rand2gen] move randn to generic 2016-11-09 11:09:30 -08:00
0a34b34bfe [cutorch rand2gen] partial move of logNormal to generic, needs further debugging 2016-11-09 10:55:54 -08:00
6b821ece22 fixing trainer tests (#213) 2016-11-08 21:50:17 -05:00
d3b2096bfd trainer fix for new optim API 2016-11-08 15:49:03 -08:00
9f1b12bf06 Merge pull request #1009 from gchanan/spatialNNGeneric
Support generic type Spatial modules
2016-11-08 18:17:58 -05:00
e64fca4b04 Allow wider test tolerances for:
1) Size of half numbers
2) Convolution weight/bias
3) BatchNormalization
2016-11-08 13:47:01 -08:00
b941e73f4f ArgCheck that dilation parameters are > 0 and ensure tests
pick dilation parameters > 0.
2016-11-08 13:46:52 -08:00
c57873d3cb Add generic support for LookupTable.
In some cases, does not do accumulation as accreal.
2016-11-08 13:46:48 -08:00
f3bc3275ac Add generic support for TemporalConvolution.
Has increased tolerance for backward weight/bias like other
Convolution modules.
2016-11-08 13:46:45 -08:00
8df26e6c5c Add generic support for VolumetricFullConvolution, VolumetricDilatedConvolution.
Has increased tolerance for backward weight/bias like other
Convolution modules.
2016-11-08 13:46:33 -08:00
5c8ecb8150 Fix one more compatibility bug in Python 3.3 2016-11-08 16:13:25 -05:00
3928f7740a Implement functional interface for Variables (torch.*) 2016-11-08 16:13:25 -05:00
1767f73e6b Add generic support for VolumetricConvolution.
Uses the higher tolerances for weight/bias that are used for
SpatialConvolution modules.
2016-11-08 13:07:35 -08:00
9e7d5e93ab Add generic support for VolumetricReplicationPadding. 2016-11-08 13:07:35 -08:00
70c6ee93a2 Add generic support for VolumetricAveragePooling. 2016-11-08 13:07:35 -08:00
5cbf8504ef Add generic support for VolumetricMaxPooling, VolumetricMaxUnpooling,
VolumetricDilatedMaxPooling.
2016-11-08 13:07:35 -08:00
9a393b023d Add generic support for TemporalMaxPooling. 2016-11-08 13:07:35 -08:00
30bf464f73 Rebase BatchNormalization. 2016-11-08 13:06:52 -08:00
9fb1f8934b Add support for L1Cost.
Changes thrust::reduce to trust::transform_reduce in order
to be able to do summation at accreal precision.
2016-11-08 13:01:06 -08:00
f3f02b23a0 Add generic support for SparseLinear.
We don't support SparseLInear with fp16 because of lack of cusparseHcsrmm
(or equivalent Ex function) until CUDA 8.0.
2016-11-08 13:01:06 -08:00
7668cdd32c Add generic support for DistKLDivCriterion. 2016-11-08 13:01:06 -08:00
f9dafdcf09 Add generic support for ClassNLLCriterion. 2016-11-08 13:01:06 -08:00
d284a419c1 Add generic support for BCECriterion.
Test skips comparing vs lua version for half type, because hdot is
not currently implemented in cutorch.
2016-11-08 13:01:06 -08:00
b45844e3d9 Add generic support for L1SmoothCriterion. 2016-11-08 13:01:06 -08:00
6caa7e0fff Add generic support for MultiLabelMarginCriterion. 2016-11-08 13:01:06 -08:00
1669fffb8d Add generic support for MultiMarginCriterion.
Accumulation is done at accreal precision and changes target tensor
indexing to THCIndexTensor.
2016-11-08 13:01:06 -08:00
18aa86eebd Add generic support for MSECriterion. 2016-11-08 13:01:06 -08:00
075e49d3f4 Add generic support for SoftMarginCriterion. 2016-11-08 13:01:06 -08:00
a6695b8365 Add generic support for MarginCriterion. 2016-11-08 13:01:06 -08:00
06ee48b391 Add generic support for AbsCriterion. 2016-11-08 13:01:06 -08:00
fcaeffbbd4 Fix spacing in SpatialDilatedMaxPooling. 2016-11-08 13:01:06 -08:00
6146a9a641 Generic support for SpatialFullConvolution and SpatialDilatedConvolution.
Uses matrix multiple for matrix vector multiply for half (no matrix vector
implementation exists).
2016-11-08 13:01:06 -08:00
83de8e40d5 Add generic support for SpatialFractionalMaxPooling. 2016-11-08 13:01:06 -08:00
30590c46a3 Generic support for SpatialConvolutionMM.
Still need Hgemv.
2016-11-08 13:01:06 -08:00
a3a5e56287 Add generic support for SpatialConvolutionLocal. 2016-11-08 13:01:06 -08:00
185c96d63a Add generic support for SpatialUpSamplingBilinear.
Math is done at accreal precision.  At real precision,
forward pass fails, but backward passes.  We do backward
pass at accreal precision for consistency.
2016-11-08 13:01:06 -08:00
be61ad6eb4 Add generic support for SpatialUpSamplingNearest.
Accumulates as AccType.
2016-11-08 13:01:06 -08:00
222dfd2259 Add generic support for SpatialReplicationPadding. 2016-11-08 13:01:06 -08:00
b06e1c7e1d Add generic support for SpatialReflectionPooling. 2016-11-08 13:01:06 -08:00
6876abba51 Add generic support for SpatialSubSampling.
Half types fail on backward, probably because we don't consistently
accumulate in accreal.  This is difficult because gradInput is
accumulated directly (either with atomicAdd or not) rather than
in another variable.
2016-11-08 13:01:06 -08:00
0798466a01 Generic support for SpatialCrossMapLRN
Removed the C-linkage for a couple of functions because they are now generic --
not sure if they were used by anyone outside.
2016-11-08 13:01:06 -08:00
2cda782273 Add generic support for SpatialAveragePooling. 2016-11-08 13:01:06 -08:00
7d1c9554b6 Add generic support for SpatialAdaptiveMaxPooling. 2016-11-08 13:01:06 -08:00
a29d16f1a8 Use THCIndexTensors more generally. 2016-11-08 13:01:06 -08:00
6d0c1c0f17 Use indices for SpatialAdaptiveMaxPooling indices. 2016-11-08 13:01:06 -08:00
5ed4b5c25b Add generic support for SpatialMaxUnpooling. 2016-11-08 13:01:05 -08:00
6fe89c5e44 Fix tests 2016-11-08 13:01:05 -08:00
fda8c37641 Add generic support for SpatialMaxPooling.
Also fix tests for SpatialDilatedMaxPooling.
2016-11-08 13:01:05 -08:00
6d5a0ff3a1 Get SpatialDilatedMaxPooling generic working with long tensors as index.
Does as much math as possible in accreal to try to suss out why CudaHalfTensor fails.
2016-11-08 13:01:05 -08:00
f8718dd355 Add generic support for SpatialDilatedMaxPooling. 2016-11-08 13:01:05 -08:00
85af686797 Add generic support for SpatialClassNLLCriterion. 2016-11-08 13:01:05 -08:00
0f6ec3f15f Remove fastExpIfAvail and benchmarking from functional tests.
Also fix broken IFNDEF and test whitespace.
2016-11-08 13:01:05 -08:00
44644c50ee Reorganize THCHalfAutoNumerics. 2016-11-08 13:01:05 -08:00
9749f7eacc Add generic support for RReLU. 2016-11-08 13:01:05 -08:00
d9a2bdb9df Add generic support for PReLU.
This is the first instance of functions that take a lua number but
are not reals in C.  So, instead of automatically converting lua
numbers in the half case, we parse the function definitions to
find the argument positions to convert.
2016-11-08 13:01:05 -08:00
57e678c94b fix logsoftmax 2016-11-08 13:01:05 -08:00
516f127cfd Add generic support for LogSoftMax. 2016-11-08 13:01:05 -08:00
e477add103 Add generic support for SoftMax.
Math is done at accreal precision (e.g. for half,
math is done at float precision).  Originally code
called __expf, which doesn't have a double equivalent;
we call exp instead of converting down.
2016-11-08 13:01:05 -08:00
ba3d577875 Add generic support for ELU. 2016-11-08 13:01:05 -08:00
917e4f47c4 Add generic support for SoftShrink. 2016-11-08 13:01:05 -08:00
0143dac247 Add generic support for Square.
Math is (arbitrarily?) done at double precision to
keep the intent of existing code.
2016-11-08 13:01:05 -08:00
d2390f3616 Add generic support for Sqrt. 2016-11-08 13:01:05 -08:00
949ea73402 Add generic support for LeakyReLU. 2016-11-08 13:01:05 -08:00
d1e2fe0efe Add generic support for Threshold. 2016-11-08 13:01:05 -08:00
584ada12bf Add generic support for LogSigmoid.
This has the same logic as Sigmoid; i.e.
math is done at double precision and then
stored back at desired precision.
2016-11-08 13:01:05 -08:00
3ead72f654 Add generic support for Sigmoid.
This maintains the existing logic of doing the math in
double precision and converting back to the intended
type (previously: just float).  We do the same for
half here, although perhaps we should do the math
at float in that case.

There is some question about what to do with conversions;
Sigmoid did math in double before converting back to float;
we keep this intent, although there is some question on whether
this was intentional and for half -- should we just go up to
float or up to double?
2016-11-08 13:01:05 -08:00
9ce96d3bd3 Add generic support for Abs. 2016-11-08 13:01:05 -08:00
5549c003d9 Add generic support for HardTanh. 2016-11-08 13:01:05 -08:00
46105bf90b Add generic support for Tanh. 2016-11-08 13:01:05 -08:00
73ce3b3702 Add generic support for SoftPlus.
Adds the ability to "genericize" cunn modules that can exist
simultaneously with non-generic modules (i.e. modules can
be genericized one at a time).  Allowing both generic and
non-generic modules simultaneously requires some extra code
that can be removed once every module is genericized.
Also genericizes SoftPlus in this way.
2016-11-08 13:01:05 -08:00
1c6225dc2f [cutorch rand2gen] move geometric to generic 2016-11-08 10:47:28 -08:00
44874542c8 fix printing in console (#208) 2016-11-08 13:42:26 -05:00
31f2846aff [cutorch rand2gen] move multinomial to generic 2016-11-08 09:34:19 -08:00
bc08011e72 Don't jongjmp out of omp loops in unpooling modules 2016-11-08 18:12:56 +01:00
7cccc216d0 ArgCheck that dilation parameters are > 0. 2016-11-08 18:12:56 +01:00
09493603f6 Change optimizer API 2016-11-08 18:12:56 +01:00
e799bd0ba9 Restrict in-place autograd ops to disjoint variables 2016-11-08 18:12:56 +01:00
40247b0382 Fix torch tests in Python 3.3 and 3.4 2016-11-08 18:12:56 +01:00
cd2e9c5119 [cutorch rand2gen] move cauchy to generic 2016-11-08 08:11:39 -08:00
0b6f7b12b1 [cutorch rand2gen] move exponential to generic 2016-11-08 08:04:26 -08:00
86e42ba291 Adding truncated tensor printing (#202)
* Adding truncated tensor printing
2016-11-08 10:05:30 -05:00
e0a18cafd3 Don't jongjmp out of omp loops in unpooling modules 2016-11-08 13:23:43 +01:00
8c2f77cab6 updated autogen docs 2016-11-07 17:19:00 -05:00
c1bd6ba1e1 Zero-initialize outputs for BLAS functions 2016-11-07 22:50:56 +01:00
df59b89fbb Add more optimizers 2016-11-07 22:50:56 +01:00
8fd9cc160c [cutorch rand2gen] move normal to generic 2016-11-07 13:26:59 -08:00
28e3f07b63 adding apply function 2016-11-07 16:17:49 -05:00
513d902df1 adding __repr__ for nn 2016-11-07 16:17:40 -05:00
fce14a9f51 [cutorch rand2gen] move bernoulli to generic 2016-11-07 13:16:10 -08:00
884107da01 [cutorch rand2gen] move uniform, rand to generic 2016-11-07 12:27:30 -08:00
caa79a354a [cutorch rand2gen] make sampleMultinomialWithRoutReplacement utility function generic 2016-11-07 10:33:03 -08:00
5bb873a2fe [cutorch rand2gen] make sampleMultinomialWithReplacement utility function generic 2016-11-07 10:28:19 -08:00
bc0442d7df [cutorch rand2gen] make sampleMultinomialOnce utility function generic 2016-11-07 10:15:13 -08:00
cfcd33552b [cutorch rand2gen] make renormRowsL1 utility function generic 2016-11-07 10:02:21 -08:00
5f6b9fd5ba [cutorch rand2gen] introduce THCTensorRandom.cuh, move and templatize simple binary search function 2016-11-07 08:31:19 -08:00
469dce4a2d skip test_scatter_gpu on no CUDA 2016-11-05 20:10:07 -04:00
55d32de331 Fix bugs in torch.legacy.nn and add regression tests 2016-11-05 22:48:52 +01:00
4491d2d3cb Expose ger, mv, mm, bmm as tensor methods 2016-11-05 22:48:52 +01:00
f9669b9b9a Merge pull request #583 from nicolasvasilache/master
THC UVA Allocator
2016-11-05 11:50:07 -04:00
246d5f37c7 THC UVA Allocator 2016-11-05 02:40:44 +00:00
293bfb03dd Merge commit '4def4e696b9079f587d0dba3e86423df5ea429b8' 2016-11-03 14:12:22 -07:00
4def4e696b fix result type 2016-11-03 14:10:49 -07:00
b6e58c030a enable dot for CUDA_HALF 2016-11-03 13:50:50 -07:00
bf00308ab2 Merge commit 'fd677945741b4ee353079911993ada3770e07f5c' 2016-11-03 13:31:12 -07:00
e3e786e35e Move source code checks from __getstate__ to torch.load (#200)
The __getstate__ and __setstate__ functions are called from copy.copy as
well as pickling. The source code inspection currently slows down the
data parallel code because it makes a copy of the object every
iteration.
2016-11-03 16:29:14 -04:00
fd67794574 Merge pull request #581 from torch/dotfix
making dot to have an accreal return type (consistent with CPU)
2016-11-03 12:51:27 -04:00
104b502919 ArgCheck that dilation parameters are > 0. 2016-11-03 09:02:22 -07:00
a18cd3ba92 ArgCheck that dilation parameters are > 0. 2016-11-03 09:01:43 -07:00
0676cad200 Merge commit 'e644f6ed2c1965b0de55cc9037d5c75245f63d54' 2016-11-03 08:36:42 -07:00
3b1d217310 Merge commit 'e32af0196e10ad11b3938ad73ec5ef49cac7c03e' 2016-11-03 08:36:04 -07:00
93bcb2e7ba making dot to have an accreal return type (consistent with CPU) 2016-11-02 16:40:54 -07:00
ebc70f7919 Look for libcudart in default CUDA installation paths (#195) 2016-11-02 19:36:10 -04:00
e32af0196e Merge pull request #828 from apaszke/lapack
Add more size checks and improve some LAPACK error messages
2016-11-02 18:53:45 -04:00
3e5c121c56 Adding !!inc to cwrap and splitting up TensorMethods.cwrap (#197)
* Adding !!inc to cwrap and splitting up TensorMethods.cwrap
2016-11-02 18:50:56 -04:00
e644f6ed2c Add supporting code for CUDA IPC
This adds three small pieces to help with sharing THCStorages across
processes:

 1. THCIpcAllocator: a THCDeviceAllocator to close shared memory handles in the
    child process.
 2. THCCachingAllocator_getBaseAllocation which returns the pointer and
    size of the underlying cudaMalloc allocation. This is necessary
    because cudaIpcGetMemHandle requires 'base' pointers
 3. Support for TH_STORAGE_VIEW in THCStorage_(free). This is useful in
    child processes to represent THCCachingAllocator allocations split
    from a larger cudaMalloc call.
2016-11-02 14:53:28 -07:00
551a7c72f3 Fix multiprocess serialization with "spawn" or "forksever" (#198) 2016-11-02 17:44:36 -04:00
05b121841e Add more size checks and improve some LAPACK error messages 2016-11-02 21:51:51 +01:00
c29aea89ee Merge pull request #827 from howard0su/freebsd
Fix compile error on freebsd
2016-11-02 16:10:50 -04:00
103e70ccc5 adding cuda types for tensor methods (#194) 2016-11-02 10:25:58 -04:00
ec7ecbe2dd Fix compile error on freebsd 2016-11-02 20:27:05 +08:00
7a06dbb87e Merge commit '1234e434fa2b6ddd440194c8bccd352593902c69' 2016-11-01 21:33:41 -07:00
1234e434fa TH_INDEX_BASE for nonzero 2016-11-01 21:08:52 -07:00
2d374f982e Changes for ccache nvcc support 2016-11-01 15:54:33 -04:00
4e73630a95 Fix criterion backward, that was modifying grad_output shape 2016-11-01 19:31:53 +01:00
e867baa5f9 Accept file paths in torch.save and torch.load 2016-11-01 19:31:53 +01:00
04b750cb52 Improve Parameter's __repr__ 2016-11-01 19:31:53 +01:00
97c7b12542 Fix Variable __setstate__ refcounting bugs 2016-11-01 19:31:53 +01:00
0dfec752a3 Merge commit 'f16f68e103dfc22921f6106ec7136ddc7a0ab087' 2016-11-01 10:38:13 -07:00
f16f68e103 CMake: Install generic/THCTensorMathScan.h 2016-11-01 16:07:07 +01:00
4b7f8f9b77 adding notes for compiling from source 2016-11-01 01:27:28 -04:00
9969d50833 fix for CPU-only builds 2016-11-01 01:19:37 -04:00
7355c63845 adding multiple types for dist 2016-10-31 21:26:19 -07:00
16cac6442a adding multiple types for cumsum, cumprod 2016-10-31 21:26:19 -07:00
5009ae5548 adding multiple types for pow, trace, diag, tril, triu 2016-10-31 19:26:08 -07:00
32647e285e implement torch.nonzero 2016-10-31 18:22:49 -07:00
6df334ea68 Improve potrf error message. (#189) 2016-10-31 18:48:29 -04:00
f8501042c1 Make _requires_grad Variable attribute writeable 2016-10-31 22:47:09 +01:00
be085b8f6c Allow marking non-leaf variables as non-requiring grad 2016-10-31 22:47:09 +01:00
ef557761dd Allow to not use all function outputs in autograd 2016-10-31 22:47:09 +01:00
15377ac391 Copy Module._buffers in nn.parallel.replicate (#180) 2016-10-31 12:12:29 -04:00
ad5fdef6ac Make every user-visible Tensor have a Storage (#179) 2016-10-31 12:12:22 -04:00
0cb5943be8 Fix NCCL reduce_scatter in Python 2.7 (#183) 2016-10-30 17:58:02 -04:00
fb593d5f28 Fix bugs in variable __setitem__ and improve __getitem__ 2016-10-30 00:16:06 +02:00
645c913e4f Print GPU id for CUDA tensors 2016-10-30 00:16:06 +02:00
b4f4cca875 Rename training and evaluation methods 2016-10-30 00:16:06 +02:00
6027513574 Add support for indexing with numpy types 2016-10-30 00:16:06 +02:00
849188fdab Fix multiprocessing 2016-10-29 14:23:23 -07:00
a9c14a5306 Remove unused code 2016-10-28 15:28:22 -07:00
2da36a14d1 Clean up cuDNN code and fix chooseBackwardFilterAlgorithm 2016-10-28 13:05:53 -07:00
2ee451f5f7 Build in Release mode 2016-10-28 12:51:19 -07:00
f2d7e94948 Use torch.Size for Tensor sizes and tuple for strides
See issue #20

The torch.Size class is a tuple subclass which distinguishes sizes from
other tuples so that torch.Tensor(size) is interpreted as size instead
of data.
2016-10-28 19:37:09 +02:00
2031dfc08a Add hdot support for CUDA 8.
If not compiled with CUDA 8+, an error is raised indicating that
CUDA 8.0+ is required.
2016-10-27 15:01:09 -07:00
34ede14877 Fix compile error due to THCStorage change 2016-10-27 14:27:10 -07:00
2af3098e5a Merge commit '42e835ebb81a3ecf8f76e15bb1866c1427f61d74' 2016-10-27 13:49:23 -07:00
2e44511b13 Merge commit 'bbe8627a3f0e6cbb8fd1952826f75df741e44b01' 2016-10-27 13:47:36 -07:00
7bc4aa7e72 Merge commit '2bd36604e298547cc66f175588c925271223b4e9' 2016-10-27 13:46:38 -07:00
e2458bce97 Add Parameter class to nn 2016-10-27 22:31:36 +02:00
ae9789fccc adding input / output / member sections to the docgen 2016-10-27 01:11:53 -04:00
45ef25ea27 fix rnn documentation typos and format 2016-10-27 01:11:53 -04:00
ad2d413c0b Add C++ bindings for cuDNN (#167)
The Python ctypes bindings overhead was high enough that it slowed down
multi-gpu training when using 4+ Maxwell GPUs.
2016-10-26 19:51:48 -04:00
30924ff1e0 Fix test_nonzero flakiness (#173) 2016-10-26 19:50:56 -04:00
383c48968f Add support for indexing with ellipsis (#172) 2016-10-26 19:50:44 -04:00
bbe8627a3f Use 'void' for no-arg functions 2016-10-26 12:44:34 -07:00
2bd36604e2 Fix no-arg function prototypes 2016-10-26 12:35:05 -07:00
9ed47ef531 fix bug in mmaping 2016-10-26 07:23:04 -07:00
139f98a872 pushing THCState back to the header 2016-10-25 18:23:53 -07:00
c825895190 Make KwargsPlugin output deterministic 2016-10-26 00:19:33 +02:00
42e835ebb8 Add sameGPU checks to BatchNormalization (#361) 2016-10-25 15:19:03 -04:00
a7d5fdf54e Add integer indexing for MultiLabelMarginCriterion. 2016-10-25 11:42:56 -07:00
3b4e41f6ec Add integer indexing for MultiMarginCriterion. 2016-10-25 10:19:53 -07:00
5505e1de7d Store the device in THCStorage 2016-10-25 07:21:54 -07:00
6d329e418b allocator updates 2016-10-25 07:07:52 -07:00
3a11afb57f some bugfixes for THC 2016-10-24 17:16:17 -07:00
df86e02c9e update nn docs 2016-10-24 17:20:00 -04:00
deebc1383e Show exponent when printing vectors 2016-10-24 22:30:11 +02:00
19f2f1a9d3 Buffer values when constructing a CUDA tensor from a sequence 2016-10-24 22:30:11 +02:00
4dc13ecdd8 Make tests deterministic 2016-10-24 22:30:11 +02:00
b4b6e356ef Fix clang warnings 2016-10-24 22:30:11 +02:00
9000f40e61 Add torch.from_numpy 2016-10-24 22:30:11 +02:00
f137c0c05a Improve error messages of stateless functions 2016-10-24 22:29:43 +02:00
b43a02a9aa Make random 0-based 2016-10-24 22:29:43 +02:00
30be715900 Add training and evaluation to torch.nn 2016-10-24 22:29:43 +02:00
71cf8e14cb Fixes in torch.legacy.nn 2016-10-24 22:29:43 +02:00
ffd4863b23 Don't build nccl on macOS 2016-10-24 22:29:43 +02:00
4c17098bb8 Fix platform detection in torch.cuda 2016-10-24 22:29:43 +02:00
bcfdd18599 Fix python2.7 compatibility and check cffi version in ffi utils 2016-10-24 22:29:43 +02:00
067662d280 making .numpy return writeable arrays (#164) 2016-10-24 16:23:28 -04:00
93d02e4686 Merge pull request #129 from adamlerer/cudnn_rnn
CuDNN + PyTorch RNN library
2016-10-24 15:00:02 -04:00
12de115305 Fix Lua->Python logic in legacy.optim 2016-10-24 20:04:23 +02:00
b5d13296c6 addressing comments 2016-10-23 21:11:22 -07:00
86288265ad Adding rnn cell library 2016-10-23 20:23:48 -07:00
a559d94a44 docs and such 2016-10-23 20:23:48 -07:00
1eb6870853 add nobias option to rnn 2016-10-23 20:23:48 -07:00
f88c3e9c12 fix some missing features in pytorch needed for RNNs 2016-10-23 20:23:48 -07:00
942ca477a6 Copying weights for CUDNN 2016-10-23 20:23:48 -07:00
b0e33fb473 cudnn + THNN match with parameters 2016-10-23 20:23:48 -07:00
d58b627b98 CUDNN RNN bindings 2016-10-23 20:23:48 -07:00
b85fc35f9a Fix for versions compiled without CUDA support (#155)
* Fix pytorch when compiling without CUDA support
* Skip print test with CUDA types if CUDA is not available
2016-10-23 13:03:10 +02:00
bcb466fb76 fix bug with numpy conversion and storageOffset > 0 (#154) 2016-10-22 11:56:18 -04:00
6db721b5dd Make DataLoader preserve the ordering of the dataset (#135) 2016-10-21 23:54:16 -04:00
140c65e52b fixing python setup.py clean 2016-10-21 23:20:02 -04:00
29e8d77ce0 Merge pull request #558 from gchanan/genericDeviceTensorUtils
Add generic type support for toDeviceTensor.
2016-10-19 18:19:13 -04:00
b66a4ea919 Add THNN_CHECK_DIM_SIZE_INDICES to avoid pointer conversion warnings. 2016-10-19 15:01:49 -07:00
d3d59e5024 Indices for nn. 2016-10-19 14:53:19 -07:00
5285da0418 Use index types for SpatialAdaptiveMaxPooling indices. 2016-10-19 14:53:10 -07:00
a76e69d709 Use index types for Max Pooling / Unpooling indices. 2016-10-19 14:52:58 -07:00
4d0d775d16 Add generic type support for toDeviceTensor. 2016-10-19 14:36:03 -07:00
98f67e90d5 Fix super call in Container.modules and Container.parameters (#142) 2016-10-19 13:21:03 -04:00
fee67c2e1a Allow parameters and child modules to be assigned by attribute (#136)
For example:
  self.linear = nn.Linear(10, 20)
  self.weight = torch.autograd.Variable(torch.Tensor(10, 20))
2016-10-18 23:34:20 +02:00
c295f26a00 Support async argument to Variable.cuda (#137) 2016-10-18 23:27:11 +02:00
8a09c45f28 Fix typo 2016-10-18 09:29:19 -07:00
79ead42ade Add CUDA Stream and Event API (#133) 2016-10-18 12:15:57 -04:00
94e52e1d17 Fix Variable.cat 2016-10-17 15:36:08 -07:00
3931beee81 Use THSetNumThreads instead of omp_set_num_threads
Set OMP num threads to one in the data loader.

Fixes #81
Fixes #82
2016-10-17 15:15:00 -04:00
d293c17d21 Merge commit '1a3920e5dc546803ec8ada369ff1b0d56cf24e76' 2016-10-17 10:29:41 -07:00
1a3920e5dc Expose OpenMP num threads through TH lib
Expose omp_set_num_threads and similar APIs through the TH lib. This
means a third-party libaries using TH don't need to be compiled with
OpenMP support just to control the number of TH OMP threads.
2016-10-17 10:09:10 -07:00
ffc3eb1a24 Exclude THNN Linear in favor of Python implementation 2016-10-17 09:53:20 -07:00
2f5d4a7318 gcc 5 + cuda < 8 workaround improved 2016-10-17 12:46:21 -04:00
70553f4253 gcc 5 + cuda < 8 workaround improved 2016-10-17 12:45:45 -04:00
8d39fb4094 Use new THC API for device allocator 2016-10-17 09:35:41 -07:00
7d10b2370f Merge commit 'ec7a2878013ec70a4d4a8bfb6f5e5503f87f9ea0' 2016-10-17 09:35:04 -07:00
31ec7650ac Merge commit '429f2d67652f4fcba0bbf65c7d3e109e136a9cdf' 2016-10-17 09:33:06 -07:00
c014920dc1 Merge commit 'b01c78580594c53e6afb02b3d2110577a4673308' 2016-10-17 09:32:01 -07:00
17e3d4e1ee Merge commit '38cb3d02270b9e558a891a9a2bef01a75d1bd9e1' 2016-10-17 09:31:38 -07:00
b01c785805 Fix cutorch.getStream()
state->numUserStreams does not include the NULL stream, which is stored
in res->streams[i]
2016-10-17 08:49:23 -07:00
0eea71f878 torch.cat for multiple cuda types 2016-10-17 01:56:33 -04:00
ec7a287801 Merge pull request #1006 from torch/errorsimprovements
more improvments on error messages and shape checks
2016-10-17 00:46:21 -04:00
4bc585a2fe more improvments on error messages and shape checks 2016-10-17 00:37:50 -04:00
429f2d6765 fixes to upsampling bilinear API 2016-10-17 00:30:25 -04:00
a0c7e3cf04 Merge pull request #550 from colesbury/streams
Add stream API that is not based on indices
2016-10-16 19:08:03 -04:00
9cd68129da fixing typo 2016-10-16 19:07:09 -04:00
aa6f6117b7 Ported Linear module to THNN 2016-10-16 17:49:47 +02:00
6fa9c87aa4 Merge pull request #548 from BTNC/win-msvc
make cunn compile with msvc && fix compilation failure for linux/mac os
2016-10-15 22:07:52 -04:00
ee14cf9438 Add support for pinned memory: (#127)
torch.Storage/Tensor.pin_memory()
 torch.Storage/Tensor.is_pinned()
2016-10-15 18:38:26 -04:00
0391bbb376 Fix view_as and view for empty tensors (#128) 2016-10-15 18:33:05 -04:00
28ada0c634 update md docs 2016-10-14 18:56:24 -04:00
2c233d23ad Add stream API that is not based on indices
This implements the THC code so that we can expose streams as objects
instead of simply referring to them by indices. This is not exposed in
Lua yet.
2016-10-14 15:25:38 -07:00
59c628803a fixing padding_idx option 2016-10-14 15:05:21 -07:00
6b830bc77f Merge pull request #78 from colesbury/nccl
Use NCCL in comm.py if available
2016-10-14 17:44:11 -04:00
f30081a313 Use NCCL bcast and reduce functions in comm 2016-10-14 14:16:32 -07:00
c15648c6b5 Add NCCL build scripts 2016-10-14 14:16:32 -07:00
a02917f502 Fix typo 2016-10-14 14:07:29 -07:00
70d8bd04c0 Make cuDNN descriptors extend object
Fixes weird double __del__ issue
2016-10-14 13:58:20 -07:00
ad2cee0cae Fix caching allocator when used from multiple Lua threads
Use a single, global THCCachingAllocator instance.

Previously, each Lua thread had its own THCCachingAllocator instance.
However, threads can share storages, which means a segment could be
allocated from on THCCachingAllocator and freed on another, which
breaks.

Fixes #539
2016-10-14 10:08:56 -07:00
756a7122ad torchdoc 2016-10-14 04:18:10 -04:00
3d6ebde756 qr and ormqr tests and bugfix 2016-10-14 03:10:16 -04:00
daa30aa992 fix typo 2016-10-13 23:11:32 -07:00
39459eb238 make cunn compile with msvc && fix compilation failure for linux/mac os 2016-10-14 12:54:00 +08:00
0325e2f646 Major autograd refactor
Improves autograd performance by more than 2x and fixes a couple
of bugs. All core functions have been moved to C.
2016-10-13 17:17:49 -07:00
93b8b5631f Improve CUDA tensor constructor speed 2016-10-13 17:16:39 -07:00
60ab1ce0c1 Stop using contextlib for device and device_of 2016-10-13 17:16:39 -07:00
2f186df52d removing CUDA_HALF_INSTRUCTIONS and enabling hgemm only for P100 2016-10-13 16:52:40 -07:00
452e07d432 Revert "change to work on windows && replace long with ptrdiff_t" 2016-10-13 18:09:34 -04:00
05d1404b9c Revert "changes to make cunn compile on windows with msvc" 2016-10-13 18:08:56 -04:00
2acee24332 Add keyword argument support to most tensor functions 2016-10-13 12:32:04 -04:00
e7639e55f8 change to work on windows && replace long with ptrdiff_t 2016-10-13 23:44:28 +08:00
f978eca477 change to work on windows && replace long with ptrdiff_t 2016-10-13 22:55:58 +08:00
eb3ac2b367 changes to make cunn compile on windows with msvc 2016-10-13 22:22:23 +08:00
968d386b36 Make atomicAdd functions static inline. 2016-10-12 15:18:30 -07:00
38cb3d0227 Fix build when NEON is supported 2016-10-12 12:51:22 +00:00
6f606dd5f9 updating nn docs 2016-10-11 14:41:25 -04:00
bab616cf11 Fix OOM error message in tensor constructor 2016-10-10 20:51:15 -07:00
966adc6291 Simplify torch.cat 2016-10-10 20:51:15 -07:00
518cb6ec7c Allow specifying output size in MaxUnpooling 2016-10-10 20:51:15 -07:00
34bcd4c237 Rename FullConv to ConvTranspose and allow specifying output size 2016-10-10 20:51:15 -07:00
a121127082 Merge remote-tracking branch 'upstream/master' into more-generic-functions 2016-10-10 10:09:43 -07:00
50326e94b1 try cudnn 5.1.5 and 5.1.3 in that order to load them up. This is needed because cudnn for cuda 7.5 ships with 5.1.3 and cudnn for cuda 8.0 ships with 5.1.5 2016-10-09 22:26:43 -04:00
160723b5b4 fix cudnn lib name 2016-10-09 21:19:50 -04:00
7991125293 Improve error messages 2016-10-08 20:37:40 -07:00
96f61bff30 Add LAPACK functions 2016-10-08 20:37:37 -07:00
a94488f584 replace long with ptrdiff_t for memory size/offset, element count 2016-10-08 21:39:16 +08:00
f2cf673d3a fix tensor printing when the tensor is a view into a giant storage 2016-10-07 17:53:37 -04:00
c4595a3dd6 [cutorch refactor] addcmul/addcdiv to generic 2016-10-07 13:09:05 -07:00
5db118e64b Update LogSoftMax to work in spatial domain 2016-10-07 16:08:39 -04:00
1620c56808 [cutorch refactor] cmin/cmax to generic 2016-10-07 11:50:28 -07:00
e88e0026b1 [cutorch refactor] make dist(...)'s op generic, add missing unit test 2016-10-07 11:50:28 -07:00
ace9b49e28 [cutorch refactor] move cross(...) to generic 2016-10-07 11:50:28 -07:00
da90751add [cutorch refactor] move lerp(...) to generic 2016-10-07 11:50:28 -07:00
8cc566f7b5 [cutorch refactor] move clamp(...) to generic 2016-10-07 11:50:28 -07:00
02ad199905 [cutorch refactor] make var(...) generic 2016-10-07 11:50:28 -07:00
c3e0811d86 [cutorch refactor] cleanup code in prep for review 2016-10-07 11:50:28 -07:00
499d1c5709 [cutorch refactor] fixes for norm, wrap/test 2016-10-07 11:50:28 -07:00
cf16ec45e1 [cutorch refactor] move stdall into generic, wrap test for std 2016-10-07 11:50:27 -07:00
daa15dcceb [cutorch refactor] move varall into generic 2016-10-07 11:50:27 -07:00
32556cbe5e [cutorch refactor] move normall to generic 2016-10-07 11:50:27 -07:00
74d9c674f5 Make _norm(...)'s ops generic 2016-10-07 11:50:27 -07:00
a4da558fa0 [cutorch refactor] move mean function into generic/ 2016-10-07 11:50:27 -07:00
dba6d1d57f Make _norm(...)'s ops generic 2016-10-07 11:50:27 -07:00
b01c4338c9 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
811d947da3 [cutorch refactor] move renorm function into generic 2016-10-07 11:50:27 -07:00
de7bf7efe6 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
5537df9927 [cutorch refactor] make _renorm(...)'s ops generic 2016-10-07 11:50:27 -07:00
81fea93741 [cutorch refactor] move std function into generic 2016-10-07 11:50:27 -07:00
df1065a2d8 Move _std dependencies into THCTensorMathReduce.cuh 2016-10-07 11:50:27 -07:00
c2e3bf2145 [cutorch refactor] move meanall function into generic/, update cwrap for lua mean 2016-10-07 11:49:33 -07:00
a4d849ef68 [cutorch refactor] move mean function into generic/ 2016-10-07 11:49:33 -07:00
957c9f3853 Move atomicAdd functions to THCAtomics.cuh in order to share
definitions with other projects, e.g. cunn.
2016-10-07 11:43:02 -07:00
3958b6b0e1 Merge pull request #338 from nitsky/spatial_logsoftmax
SpatialLogSoftMax
2016-10-07 10:36:40 -04:00
5d70feb573 bug fix for wrong usage of checkGPU && port to windows with msvc 2016-10-07 15:55:38 +08:00
a22af69335 Add versioning and shared storage handling to autograd (#105) 2016-10-06 17:12:58 -04:00
1213149a2f add bias option to linear; allow modules to return nested lists/tuples of tensors (#106)
* add bias option to linear; allow modules to return nested lists/tuples of tensors
2016-10-06 15:59:12 -04:00
398b6f75cd update nn.md 2016-10-05 14:56:41 -04:00
e46e05e7c5 fix container doc 2016-10-05 14:53:41 -04:00
166028836d Ignore graph parts not requiring gradient in engine 2016-10-05 08:46:34 -07:00
3cbe66ba8c Change requires_grad default to False 2016-10-05 08:46:34 -07:00
99de537a2e Remove CUDA sync points from losses and trainer 2016-10-05 08:46:31 -07:00
1d0afdf9f7 Make requires_grad read only (except for leaves) 2016-10-05 07:55:07 -07:00
4db6667923 Allow specifying per-parameter optimization parameters 2016-10-04 18:21:50 -07:00
80e16e44aa Check container source on load 2016-10-04 17:41:12 -07:00
58b134b793 Allow exporting optimizer state as a dict 2016-10-04 17:33:49 -07:00
6efefac2df Add parameter_dict and load_parameter_dict methods for modules 2016-10-04 14:47:56 -07:00
0c9670ddf0 Allow remapping storages at load time and serialize data in little endian order 2016-10-04 12:54:55 -07:00
53c65ddc6a Fix memory leak when constructing a tensor from numpy (#98) 2016-10-03 23:27:54 -04:00
33371c5164 ffi tests skip on cuda 2016-10-03 12:15:28 -07:00
64dd1419c5 Fix Variable indexing bugs (#96) 2016-10-03 14:49:21 -04:00
6e8ed95ada ‘fix compilation error: 'orr' loop initial declarations are only allowed in C99 mode 2016-10-03 14:11:59 +08:00
39c9f9e9e8 replace long with ptrdiff_t for memory size/offset etc 2016-10-03 12:55:30 +08:00
89666fc4fe Fix SpatialLogSoftMax memory leak and code cleanup 2016-09-27 08:16:31 -07:00
0eff3897e3 Update SpatialLogSoftMax kernel to use cuda dimensions 2016-09-26 09:39:56 -07:00
df77a8a81a Update LogSoftMax to work in spatial domain 2016-09-21 08:11:59 -07:00
456 changed files with 36834 additions and 19084 deletions

6
.gitignore vendored
View File

@ -10,10 +10,16 @@ torch/lib/build
torch/lib/tmp_install
torch/lib/include
torch/lib/torch_shm_manager
torch/csrc/cudnn/cuDNN.cpp
torch/csrc/nn/THNN.cwrap
torch/csrc/nn/THNN.cpp
torch/csrc/nn/THCUNN.cwrap
torch/csrc/nn/THCUNN.cpp
*/*.pyc
*/**/*.pyc
*/**/**/*.pyc
*/**/**/**/*.pyc
*/**/**/**/**/*.pyc
*/*.so*
*/**/*.so*
*/**/*.dylib*

View File

@ -85,7 +85,18 @@ conda install pytorch -c https://conda.anaconda.org/t/6N-MsQ4WZ7jo/soumith
```
### From source
#### Install optional dependencies
```bash
export CMAKE_PREFIX_PATH=[anaconda root directory]
conda install numpy mkl
conda install -c soumith magma-cuda75# or magma-cuda80
```
#### Install PyTorch
```bash
export MACOSX_DEPLOYMENT_TARGET=10.9 # for OSX
pip install -r requirements.txt
pip install .
```

View File

@ -685,17 +685,21 @@ endif()
# CUDA_NVCC_EXECUTABLE
cuda_find_host_program(CUDA_NVCC_EXECUTABLE
NAMES nvcc
PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
ENV CUDA_PATH
ENV CUDA_BIN_PATH
PATH_SUFFIXES bin bin64
NO_DEFAULT_PATH
)
# Search default search paths, after we search our own set of paths.
cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)
if(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
SET(CUDA_NVCC_EXECUTABLE "$ENV{CUDA_NVCC_EXECUTABLE}")
else(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
cuda_find_host_program(CUDA_NVCC_EXECUTABLE
NAMES nvcc
PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
ENV CUDA_PATH
ENV CUDA_BIN_PATH
PATH_SUFFIXES bin bin64
NO_DEFAULT_PATH
)
# Search default search paths, after we search our own set of paths.
cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)
endif(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
# Compute the version.

View File

@ -128,6 +128,60 @@ def args_block(lines):
out += [name + ' | ' + default + ' | ' + description]
return out
# Inputs
_inputs_section = re.compile('^\s*Inputs:\s*(.*)\s*')
def is_inputs_check(line):
return _inputs_section.match(line)
def inputs_block(lines):
out = ['']
out += ['Parameter | Default | Description']
out += ['--------- | ------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + default + ' | ' + description]
return out
# Outputs
_outputs_section = re.compile('^\s*Outputs:\s*(.*)\s*')
def is_outputs_check(line):
return _outputs_section.match(line)
def outputs_block(lines):
out = ['']
out += ['Parameter | Description']
out += ['--------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + description]
return out
# Members
_members_section = re.compile('^\s*Members:\s*(.*)\s*')
def is_members_check(line):
return _members_section.match(line)
def members_block(lines):
out = ['']
out += ['Parameter | Description']
out += ['--------- | -----------']
for line in lines:
matches = re.findall(r'\s*([^:]+):\s*(.*?)\s*(Default:\s(.*))?\s*$', line)
assert matches != None
name = matches[0][0]
description = matches[0][1]
default = matches[0][3]
out += [name + ' | ' + description]
return out
_returns_section = re.compile('^\s*Returns:\s*')
def is_returns_check(line):
return _returns_section.match(line)
@ -147,10 +201,7 @@ def is_inputshape_check(line):
_outputshape_section = re.compile('^\s*Returns:\s*|^\s*Output Shape:\s*')
def is_outputshape_check(line):
return _outputshape_section.match(line)
#def get_docargs(line)
###############################################
_reg_section = re.compile('^#+ ')
def is_heading(line):
return _reg_section.match(line)
@ -193,6 +244,9 @@ def _doc2md(lines, shiftlevel=0):
_doc2md.is_code = False
_doc2md.is_code_block = False
_doc2md.is_args = False
_doc2md.is_inputs = False
_doc2md.is_outputs = False
_doc2md.is_members = False
_doc2md.is_returns = False
_doc2md.is_inputshape = False
_doc2md.is_outputshape = False
@ -211,6 +265,18 @@ def _doc2md(lines, shiftlevel=0):
_doc2md.is_args = False
_doc2md.md += args_block(args)
if _doc2md.is_inputs:
_doc2md.is_inputs = False
_doc2md.md += inputs_block(inputs)
if _doc2md.is_outputs:
_doc2md.is_outputs = False
_doc2md.md += outputs_block(outputs)
if _doc2md.is_members:
_doc2md.is_members = False
_doc2md.md += members_block(members)
if _doc2md.is_returns:
_doc2md.is_returns = False
_doc2md.md += returns
@ -226,6 +292,24 @@ def _doc2md(lines, shiftlevel=0):
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Constructor Arguments']
args = []
elif is_inputs_check(line):
reset()
_doc2md.is_inputs = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Inputs']
inputs = []
elif is_outputs_check(line):
reset()
_doc2md.is_outputs = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Outputs']
outputs = []
elif is_members_check(line):
reset()
_doc2md.is_members = True
_doc2md.md += ['']
_doc2md.md += ['#' * (shiftlevel+2) + ' Members']
members = []
elif is_returns_check(line):
reset()
_doc2md.is_returns = True
@ -276,6 +360,21 @@ def _doc2md(lines, shiftlevel=0):
args.append(line)
else:
reset()
elif _doc2md.is_inputs:
if line:
inputs.append(line)
else:
reset()
elif _doc2md.is_outputs:
if line:
outputs.append(line)
else:
reset()
elif _doc2md.is_members:
if line:
members.append(line)
else:
reset()
elif _doc2md.is_returns:
if line:
returns.append(line)
@ -293,7 +392,7 @@ def _doc2md(lines, shiftlevel=0):
_doc2md.code += _doc2md.md
return _doc2md.code
def doc2md(docstr, title, min_level=1, more_info=False, toc=True):
def doc2md(docstr, title, min_level=3, more_info=False, toc=True):
"""
Convert a docstring to a markdown text.
"""
@ -345,7 +444,7 @@ def mod2md(module, title, title_api_section, toc=True):
api_sec = []
if title_api_section :
# sections.append((level+1, title_api_section))
for name, entry in iter(sorted(module.__dict__.items())):
for name, entry in iter(module.__dict__.items()):
if name[0] != '_' and entry.__doc__:
#api_sec.append((level+1, name))
#api_md += ['', '']

View File

@ -1,6 +1,100 @@
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd $SCRIPT_DIR
python doc2md.py torch.nn --no-toc --all >../nn.md
# module
#python doc2md.py torch.nn Module --title Module --no-toc >../nn_module.md
# containers
echo "## Containers" > ../nn_container.md
python doc2md.py torch.nn Container --title Container --no-toc >>../nn_container.md
python doc2md.py torch.nn Sequential --title Sequential --no-toc >>../nn_container.md
# convolution
echo "## Convolution Layers" > ../nn_convolution.md
echo Conv1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo Conv2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo ConvTranspose2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo Conv3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
echo ConvTranspose3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_convolution.md
# pooling
echo "## Pooling Layers" > ../nn_pooling.md
echo MaxPool1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxUnpool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo MaxUnpool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo AvgPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo AvgPool3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo FractionalMaxPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
echo LPPool2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_pooling.md
# activations
echo "## Non-linearities" > ../nn_activation.md
echo ReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo ReLU6 | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Threshold | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Hardtanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Sigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Tanh | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo ELU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LeakyReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LogSigmoid | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softplus | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo PReLU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softsign | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Tanhshrink | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmin | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo Softmax2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
echo LogSoftmax | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_activation.md
# normalization
echo "## Normalization layers" > ../nn_normalization.md
echo BatchNorm1d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
echo BatchNorm2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
echo BatchNorm3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_normalization.md
# recurrentnet
echo "## Recurrent layers" > ../nn_recurrent.md
echo RNN | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo LSTM | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo GRU | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo RNNCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo LSTMCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
echo GRUCell | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_recurrent.md
# linear
echo "## Linear layers" > ../nn_linear.md
echo Linear | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_linear.md
# dropout
echo "## Dropout layers" > ../nn_dropout.md
echo Dropout | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
echo Dropout2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
echo Dropout3d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_dropout.md
# Sparse
echo "## Sparse layers" > ../nn_sparse.md
echo Embedding | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_sparse.md
# loss_functions
echo "## Loss functions" > ../nn_loss.md
echo L1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MSELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo CrossEntropyLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo NLLLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo NLLLoss2d | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo KLDivLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo BCELoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MarginRankingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo HingeEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiLabelMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo SmoothL1Loss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo SoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiLabelSoftMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo CosineEmbeddingLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
echo MultiMarginLoss | xargs -I {} python doc2md.py torch.nn {} --title {} --no-toc >>../nn_loss.md
popd

View File

@ -0,0 +1,143 @@
import sys
from tools.cwrap import cwrap
from tools.cwrap.plugins import CWrapPlugin
from string import Template
import sys
import torch
from torch.autograd import Variable
def transform_defined_if(defined_if):
if defined_if != None:
defined_if = defined_if.replace('defined(TH_REAL_IS_FLOAT)', 'Float')
defined_if = defined_if.replace('defined(TH_REAL_IS_DOUBLE)', 'Double')
defined_if = defined_if.replace('defined(TH_REAL_IS_BYTE)', 'Byte')
defined_if = defined_if.replace('defined(TH_REAL_IS_CHAR)', 'Char')
defined_if = defined_if.replace('defined(TH_REAL_IS_INT)', 'Int')
defined_if = defined_if.replace('defined(TH_REAL_IS_LONG)', 'Long')
defined_if = defined_if.replace('defined(NUMPY_TYPE_ENUM)',
'Byte // Short // Int // Long // Float // Double')
defined_if = defined_if.replace('CUDA_INT', 'Cuda_Int')
defined_if = defined_if.replace('CUDA_LONG', 'Cuda_Long')
defined_if = defined_if.replace('CUDA_FLOAT', 'Cuda_Float')
defined_if = defined_if.replace('CUDA_DOUBLE', 'Cuda_Double')
defined_if = defined_if.replace('CUDA_HALF', 'Cuda_Half')
defined_if = defined_if.replace('!IS_CUDA', 'All CPU Types')
else:
defined_if = "All Types (CPU and CUDA)"
defined_if = defined_if.replace('||', '//')
return defined_if
class DocGen(CWrapPlugin):
def __init__(self):
self.declarations = {}
def process_declarations(self, declarations):
self.declarations.update({declaration['name']: declaration for declaration in declarations})
# self.declarations += declarations
return declarations
def get_wrapper_template(self, declaration):
return Template("")
def get_type_check(self, arg, option):
return Template("")
def get_type_unpack(self, arg, option):
return Template("")
def get_return_wrapper(self, option):
return Template("")
def print_declarations(self):
print("# torch.Tensor")
for name, declarations in sorted(self.declarations.items()):
if name.endswith('_') and name[:-1] in self.declarations:
continue
if not name.endswith('_') and name + '_' in self.declarations:
inplace = True
else:
inplace = False
pname = declarations['options'][0].get('python_name', None)
if pname != None:
name = pname
if name.startswith('_'):
continue
# START PRINTING MARKDOWN
print("## " + name + " \n")
print("| %-25s | %-8s | %-25s |" % ("Name", "Autograd", "defined if"))
print("| " + ('-' * 28) + " | " + ('-' * 11) + " | "+ ('-' * 28) + " |")
if inplace:
sys.stdout.write("| %-25s" % (name + ' // ' + name + "_"))
else:
sys.stdout.write("| %-25s" % name)
sys.stdout.write(' | ')
if hasattr(Variable(torch.randn(10)), name):
sys.stdout.write(' %9s ' % 'yes') # + ' ' + name)
else:
sys.stdout.write(' %9s ' % 'no') # + ' ' + name)
defined_if = declarations.get('defined_if', None)
defined_if = transform_defined_if(defined_if)
sys.stdout.write(' | ')
sys.stdout.write(defined_if)
sys.stdout.write(' |')
sys.stdout.write('\n\n')
#if inplace:
# print('Inplace Exists : True')
#sys.stdout.write('Arguments : ')
args = declarations['options'][0]['arguments']
if len(args) == 0:
print( '**No Arguments**\n' )
else:
print( '**Arguments**\n' )
print("| %-15s | %-12s | %-15s |" % ("Name", "Type", "Default"))
print("| " + ('-' * 18) + " | " + ('-' * 15) + " | "+ ('-' * 18) + " |")
for arg in args:
type_ = arg['type']
if type_ == 'THGenerator*':
continue
if type_ == 'THTensor*':
type_ = 'Tensor'
if type_ == 'THIndexTensor*':
type_ = 'LongTensor'
if type_ == 'THBoolTensor*':
type_ = 'ByteTensor'
if type_ == 'THLongTensor*':
type_ = 'LongTensor'
if type_ == 'THLongStorage*':
type_ = 'LongStorage'
default = arg.get('default', None)
allocated = arg.get('allocate', None)
if default == None and allocated == None:
default = " [required]"
elif allocated != None:
default = " [optional]"
else:
default = str(default)
import re
m = re.search('\s*AS_REAL\((.+)\)\s*', default)
if m:
default = m.group(1)
default = default
print('| %15s | %12s | %10s |' % (arg['name'], type_, default))
# print( 'Options : ' )
# print(declarations['options'][0])
print('')
if declarations['return']:
return_ = declarations['return']
if return_ == 'THTensor*':
return_ = 'Tensor'
if return_ == 'void':
return_ = 'nothing'
print( '**Returns : ' + return_ + '**')
print('')
docs = DocGen()
cwrap('../../torch/csrc/generic/TensorMethods.cwrap', plugins=[docs])
docs.print_declarations()

1369
docs/nn.md

File diff suppressed because it is too large Load Diff

496
docs/nn_activation.md Normal file
View File

@ -0,0 +1,496 @@
## Non-linearities
### ReLU
Applies the rectified linear unit function element-wise ReLU(x)= max(0,x)
```python
m = nn.ReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/relu.png" >
### ReLU6
Applies the element-wise function ReLU6(x) = min( max(0,x), 6)
```python
m = nn.ReLU6()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/relu6.png" >
### Threshold
Thresholds each element of the input Tensor
```python
m = nn.Threshold(0.1, 20)
input = Variable(torch.randn(2))
print(input)
print(m(input))
```
Threshold is defined as:
y = x if x >= threshold
value if x < threshold
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
threshold | | The value to threshold at
value | | The value to replace with
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
Tensor of same dimension and shape as the input
### Hardtanh
Applies the HardTanh function element-wise
```python
m = nn.HardTanh(-2, 2)
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
HardTanh is defined as:
f(x) = +1, if x > 1
f(x) = -1, if x < -1
f(x) = x, otherwise
The range of the linear region [-1, 1] can be adjusted
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
min_value | | minimum value of the linear region range
max_value | | maximum value of the linear region range
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/htanh.png" >
### Sigmoid
Applies the element-wise function sigmoid(x) = 1 / ( 1 + exp(-x))
```python
m = nn.Sigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/sigmoid.png" >
### Tanh
Applies element-wise, Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
```python
m = nn.Tanh()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/tanh.png" >
### ELU
Applies element-wise, ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1))
```python
m = nn.ELU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
alpha | 1.0 | the alpha value for the ELU formulation.
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/elu.png" >
### LeakyReLU
Applies element-wise, f(x) = max(0, x) + negative_slope * min(0, x)
```python
m = nn.LeakyReLU(0.1)
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
negative_slope | 1e-2 | Controls the angle of the negative slope.
inplace | | can optionally do the operation in-place
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
### LogSigmoid
Applies element-wise LogSigmoid(x) = log( 1 / (1 + exp(-x_i)))
```python
m = nn.LogSigmoid()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/logsigmoid.png" >
### Softplus
Applies element-wise SoftPlus(x) = 1/beta * log(1 + exp(beta * x_i))
```python
m = nn.Softplus()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
SoftPlus is a smooth approximation to the ReLU function and can be used
to constrain the output of a machine to always be positive.
For numerical stability the implementation reverts to the linear function
for inputs above a certain value.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
beta | 1 | the beta value for the Softplus formulation.
threshold | 20 | values above this revert to a linear function.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/softplus.png" >
### Softshrink
Applies the soft shrinkage function elementwise
```python
m = nn.Softshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
SoftShrinkage operator is defined as:
f(x) = x-lambda, if x > lambda > f(x) = x+lambda, if x < -lambda
f(x) = 0, otherwise
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
lambd | 0.5 | the lambda value for the Softshrink formulation.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/sshrink.png" >
### PReLU
Applies element-wise the function PReLU(x) = max(0,x) + a * min(0,x)
```python
m = nn.PReLU()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
Here "a" is a learnable parameter.
When called without arguments, nn.PReLU() uses a single parameter "a"
across all input channels. If called with nn.PReLU(nChannels), a separate
"a" is used for each input channel.
Note that weight decay should not be used when learning "a" for good
performance.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_parameters | 1 | number of "a" to learn.
init | 0.25 | the initial value of "a".
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/prelu.png" >
### Softsign
Applies element-wise, the function Softsign(x) = x / (1 + |x|)
```python
m = nn.Softsign()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
<img src="image/softsign.png" >
### Tanhshrink
Applies element-wise, Tanhshrink(x) = x - Tanh(x)
```python
m = nn.Tanhshrink()
input = autograd.Variable(torch.randn(2))
print(input)
print(m(input))
```
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Tensor of any size and dimension
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input
### Softmin
Applies the Softmin function to an n-dimensional input Tensor
```python
m = nn.Softmin()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmin(x) = exp(-x_i - shift) / sum_j exp(-x_j - shift)
where shift = max_i - x_i
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input, with
values in the range [0, 1]
<img src="image/softmin.png" >
### Softmax
Applies the Softmax function to an n-dimensional input Tensor
```python
m = nn.Softmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
rescaling them so that the elements of the n-dimensional output Tensor
lie in the range (0,1) and sum to 1
Softmax is defined as f_i(x) = exp(x_i - shift) / sum_j exp(x_j - shift)
where shift = max_i x_i
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [0, 1]
<img src="image/softmax.png" >
Notes:
Note that this module doesn't work directly with NLLLoss,
which expects the Log to be computed between the Softmax and itself.
Use Logsoftmax instead (it's faster).
### Softmax2d
Applies SoftMax over features to each spatial location
```python
m = nn.Softmax2d()
# you softmax over the 2nd dimension
input = autograd.Variable(torch.randn(2, 3, 12, 13))
print(input)
print(m(input))
```
When given an image of Channels x Height x Width, it will
apply Softmax to each location [Channels, h_i, w_j]
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , * , * ] | 4D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [0, 1]
### LogSoftmax
Applies the Log(Softmax(x)) function to an n-dimensional input Tensor.
```python
m = nn.LogSoftmax()
input = autograd.Variable(torch.randn(2, 3))
print(input)
print(m(input))
```
The LogSoftmax formulation can be simplified as
f_i(x) = log(1 / a * exp(x_i)) where a = sum_j exp(x_j) .
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * ] | 2D Tensor of any size
output | Same | Output has the same shape as input
#### Returns
a Tensor of the same dimension and shape as the input with
values in the range [-inf, 0)
<img src="image/logsoftmax.png" >

136
docs/nn_container.md Normal file
View File

@ -0,0 +1,136 @@
## Containers
### Container
This is the base container class for all neural networks you would define.
```python
# Example of using Container
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
conv1 = nn.Conv2d(1, 20, 5),
relu = nn.ReLU()
)
def forward(self, input):
output = self.relu(self.conv1(x))
return output
model = Net()
```
```python
# one can add modules to the container after construction
model.add_module('pool1', nn.MaxPool2d(2, 2))
```
```python
```
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .parameter_dict()
```
```python
>>> pdict = model.parameter_dict()
>>> print(pdict.keys())
['conv1.bias', 'conv1.weight']
```
```python
```
You will subclass your container from this class.
In the constructor you define the modules that you would want to use,
and in the "forward" function you use the constructed modules in
your operations.
To make it easier to understand, given is a small example.
One can also add new modules to a container after construction.
You can do this with the add_module function
or by assigning them as Container attributes.
#### one can also set modules as attributes of the container
model.conv1 = nn.Conv2d(12, 24, 3)
The container has some important additional methods:
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] parameter_dict()`**
returns a dictionary of learnable parameters of the Container.
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'conv1.bias' : Parameter(torch.FloatTensor(20)),
]
**`load_parameter_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
It loads loads the parameters recursively.
Excessive or non-matching parameter names are ignored.
For example, the input dict has an entry 'conv44.weight', but
if the container does not have a module named 'conv44', then this entry is ignored.
**`children()`**
Returns a generator over all the children modules of self
**`train()`**
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
**`apply(closure)`**
Applies the given closure to each parameter of the container.
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**
### Sequential
A sequential Container. It is derived from the base nn.Container class
```python
# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
```
```python
```
Modules will be added to it in the order they are passed in the constructor.
Alternatively, an ordered dict of modules can also be passed in.
To make it easier to understand, given is a small example.
#### Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))

236
docs/nn_convolution.md Normal file
View File

@ -0,0 +1,236 @@
## Convolution Layers
### Conv1d
Applies a 1D convolution over an input signal composed of several input
```python
The output value of the layer with input (b x iC x W) and output (b x oC x oW)
can be precisely described as:
output[b_i][oc_i][w_i] = bias[oc_i]
+ sum_iC sum_{ow = 0, oW-1} sum_{kw = 0 to kW-1}
weight[oc_i][ic_i][kw] * input[b_i][ic_i][stride_w * ow + kw)]
```
```python
m = nn.Conv1d(16, 33, 3, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns of the input might be lost. It is up to the user
to add proper padding.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel.
stride | | the stride of the convolving kernel.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * ] | Input is minibatch x in_channels x iW
output | [ * , out_channels , * ] | Output shape is precisely minibatch x out_channels x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kW)
bias | the learnable bias of the module of shape (out_channels)
### Conv2d
Applies a 2D convolution over an input image composed of several input
```python
The output value of the layer with input (b x iC x H x W) and output (b x oC x oH x oW)
can be precisely described as:
output[b_i][oc_i][h_i][w_i] = bias[oc_i]
+ sum_iC sum_{oh = 0, oH-1} sum_{ow = 0, oW-1} sum_{kh = 0 to kH-1} sum_{kw = 0 to kW-1}
weight[oc_i][ic_i][kh][kw] * input[b_i][ic_i][stride_h * oh + kh)][stride_w * ow + kw)]
```
```python
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
# non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 100))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns or rows of the input image might be lost. It is up to the user
to add proper padding in images.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
dilation | None | If given, will do dilated (or atrous) convolutions. Can be a single number s or a tuple.
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * ] | Output shape is precisely minibatch x out_channels x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### ConvTranspose2d
Applies a 2D deconvolution operator over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.ConvTranspose2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
input = autograd.Variable(torch.randn(20, 16, 50, 100))
output = m(input)
# exact output size can be also specified as an argument
input = autograd.Variable(torch.randn(1, 16, 12, 12))
downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
h = downsample(input)
output = upsample(h, output_size=input.size())
```
planes.
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
and sums over the outputs from all input feature planes.
This module can be seen as the exact reverse of the Conv2d module.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * ] | Output shape is minibatch x out_channels x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW, or as specified in a second argument to the call.
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (in_channels x out_channels x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### Conv3d
Applies a 3D convolution over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.Conv3d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
output = m(input)
```
planes.
Note that depending of the size of your kernel, several (of the last)
columns or rows of the input image might be lost. It is up to the user
to add proper padding in images.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number s or a tuple (kt x sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number s or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iT x iH x iW
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x floor((iT + 2*padT - kT) / dT + 1) x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_channels x in_channels x kT x kH x kW)
bias | the learnable bias of the module of shape (out_channels)
### ConvTranspose3d
Applies a 3D deconvolution operator over an input image composed of several input
```python
# With square kernels and equal stride
m = nn.ConvTranspose3d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(0, 4, 2))
input = autograd.Variable(torch.randn(20, 16, 10, 50, 100))
output = m(input)
```
planes.
The deconvolution operator multiplies each input value element-wise by a learnable kernel,
and sums over the outputs from all input feature planes.
This module can be seen as the exact reverse of the Conv3d module.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_channels | | The number of expected input channels in the image given as input
out_channels | | The number of output channels the convolution layer will produce
kernel_size | | the size of the convolving kernel. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | 1 | the stride of the convolving kernel. Can be a single number or a tuple (st x sh x sw).
padding | 0 | implicit zero padding on the input. Can be a single number or a tuple.
output_padding | 0 | A zero-padding of 0 <= padding < stride that should be added to the output. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , in_channels , * , * , * ] | Input is minibatch x in_channels x iH x iW
output | [ * , out_channels , * , * , * ] | Output shape is precisely minibatch x out_channels x (iT - 1) * sT - 2*padT + kT + output_paddingT x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (in_channels x out_channels x kT x kH x kW)
bias | the learnable bias of the module of shape (out_channels)

233
docs/nn_core.md Normal file
View File

@ -0,0 +1,233 @@
# Module
This is the base class for all Modules defined in the nn package.
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .parameter_dict()
```
```python
>>> pdict = model.parameter_dict()
>>> print(pdict.keys())
['bias', 'weight']
```
```python
```
Even the Container class derives from this class.
An nn.Module has the following interface:
**Constructor:**
nn.Module(**parameters)
All arguments passed in to the constructor need to be of type
nn.Parameter or a Tensor.
**forward(...)**
This is the function that one defines when subclassing to create
their own modules.
It takes in inputs and returns outputs.
**__call__(...)**
This calls the forward function, as well as the hooks
**register_buffer(name, tensor)**
This is typically used to register a buffer that is not a Parameter.
For example, in BatchNorm, the running_mean is a buffer, so one would
register it in the constructor of BatchNorm with:
`self.register_buffer('running_mean', torch.zeros(num_features))`
The registered buffers can simply be accessed as class members
when needed.
**cpu()**
Recursively moves all it's parameters and buffers to the CPU
**cuda(device_id=None)**
Recursively moves all it's parameters and buffers to the CUDA memory.
If device_id is given, moves it to GPU number device_id
**float()**
Typecasts the parameters and buffers to float
**double()**
Typecasts the parameters and buffers to double
**register_forward_hook(name, hook)**
This will register a user-defined closure on the module.
Whenever the module finishes it's forward operation,
the user closure is called.
The signature of the closure is `def closure(input, output)`
**register_backward_hook(name, hook)**
This will register a user-defined closure on the module.
Whenever the module finishes it's backward operation,
the user closure is called.
The signature of the closure is `def closure(gradOutput, gradInput)`
**remove_forward_hook(name)**
Removes a registered forward hook with the given name
**remove_backward_hook(name)**
Removes a registered backward hook with the given name
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] parameter_dict()`**
returns a dictionary of learnable parameters of the Module.
For example: ['weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'bias' : Parameter(torch.FloatTensor(20)),
]
**`load_parameter_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
**`train()`**
Sets the Container to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container to evaluate mode (for modules such as batchnorm, dropout etc.)
**`zero_grad()`**
Zeroes the gradients of each Parameter of the module
# Container
This is the base container class for all neural networks you would define.
```python
# Example of using Container
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
conv1 = nn.Conv2d(1, 20, 5),
relu = nn.ReLU()
)
def forward(self, input):
output = self.relu(self.conv1(x))
return output
model = Net()
```
```python
# one can add modules to the container after construction
model.add_module('pool1', nn.MaxPool2d(2, 2))
```
```python
```
```python
# .parameters()
```
```python
>>> for param in model.parameters():
>>> print(type(param.data), param.size())
<class 'torch.FloatTensor'> (20L,)
<class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
```
```python
```
```python
# .parameter_dict()
```
```python
>>> pdict = model.parameter_dict()
>>> print(pdict.keys())
['conv1.bias', 'conv1.weight']
```
```python
```
You will subclass your container from this class.
In the constructor you define the modules that you would want to use,
and in the "forward" function you use the constructed modules in
your operations.
To make it easier to understand, given is a small example.
One can also add new modules to a container after construction.
You can do this with the add_module function
or by assigning them as Container attributes.
## one can also set modules as attributes of the container
model.conv1 = nn.Conv2d(12, 24, 3)
The container has some important additional methods:
**`[generator] parameters()`**
returns a generator over all learnable parameters in the container instance.
This can typically be passed to the optimizer API
**`[dict] parameter_dict()`**
returns a dictionary of learnable parameters of the Container.
For example: ['conv1.weight' : Parameter(torch.FloatTensor(20x1x5x5)),
'conv1.bias' : Parameter(torch.FloatTensor(20)),
]
**`load_parameter_dict(dict)`**
Given a parameter dict, sets the parameters of self to be the given dict.
It loads loads the parameters recursively.
Excessive or non-matching parameter names are ignored.
For example, the input dict has an entry 'conv44.weight', but
if the container does not have a module named 'conv44', then this entry is ignored.
**`children()`**
Returns a generator over all the children modules of self
**`train()`**
Sets the Container (and all it's child modules) to training mode (for modules such as batchnorm, dropout etc.)
**`eval()`**
Sets the Container (and all it's child modules) to evaluate mode (for modules such as batchnorm, dropout etc.)
**`apply(closure)`**
Applies the given closure to each parameter of the container.
**__Note: Apart from these, the container will define the base functions that it has derived from nn.Module __**

90
docs/nn_dropout.md Normal file
View File

@ -0,0 +1,90 @@
## Dropout layers
### Dropout
Randomly zeroes some of the elements of the input tensor.
```python
m = nn.Dropout(p=0.2)
input = autograd.Variable(torch.randn(20, 16))
output = m(input)
```
The elements to zero are randomized on every forward call.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | Any | Input can be of any shape
output | Same | Output is of the same shape as input
### Dropout2d
Randomly zeroes whole channels of the input tensor.
```python
m = nn.Dropout2d(p=0.2)
input = autograd.Variable(torch.randn(20, 16, 32, 32))
output = m(input)
```
The input is 4D (batch x channels, height, width) and each channel
is of size (1, height, width).
The channels to zero are randomized on every forward call.
Usually the input comes from Conv2d modules.
As described in the paper &quot;Efficient Object Localization Using Convolutional
Networks&quot; (http:arxiv.org/abs/1411.4280), if adjacent pixels within
feature maps are strongly correlated (as is normally the case in early
convolution layers) then iid dropout will not regularize the activations
and will otherwise just result in an effective learning rate decrease.
In this case, nn.Dropout2d will help promote independence between
feature maps and should be used instead.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, *, *, *] | Input can be of any sizes of 4D shape
output | Same | Output is of the same shape as input
### Dropout3d
Randomly zeroes whole channels of the input tensor.
```python
m = nn.Dropout3d(p=0.2)
input = autograd.Variable(torch.randn(20, 16, 4, 32, 32))
output = m(input)
```
The input is 5D (batch x channels, depth, height, width) and each channel
is of size (1, depth, height, width).
The channels to zero are randomized on every forward call.
Usually the input comes from Conv3d modules.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
p | 0.5 | probability of an element to be zeroed.
inplace | false | If set to True, will do this operation in-place.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, *, *, *, *] | Input can be of any sizes of 5D shape
output | Same | Output is of the same shape as input

36
docs/nn_linear.md Normal file
View File

@ -0,0 +1,36 @@
## Linear layers
### Linear
Applies a linear transformation to the incoming data, y = Ax + b
```python
m = nn.Linear(20, 30)
input = autograd.Variable(torch.randn(128, 20))
output = m(input)
print(output.size())
```
The input is a 2D mini-batch of samples, each of size in_features
The output will be a 2D Tensor of size mini-batch x out_features
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
in_features | | size of each input sample
out_features | | size of each output sample
bias | True | If set to False, the layer will not learn an additive bias.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [*, in_features] | Input can be of shape minibatch x in_features
output | [*, out_features] | Output is of shape minibatch x out_features
#### Members
Parameter | Description
--------- | -----------
weight | the learnable weights of the module of shape (out_features x in_features)
bias | the learnable bias of the module of shape (out_features)

294
docs/nn_loss.md Normal file
View File

@ -0,0 +1,294 @@
## Loss functions
### L1Loss
Creates a criterion that measures the mean absolute value of the
element-wise difference between input `x` and target `y`:
loss(x, y) = 1/n \sum |x_i - y_i|
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal
variable `sizeAverage` to `False`
### MSELoss
Creates a criterion that measures the mean squared error between
`n` elements in the input `x` and target `y`:
loss(x, y) = 1/n \sum |x_i - y_i|^2
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal variable
`sizeAverage` to `False`
By default, the losses are averaged over observations for each minibatch.
However, if the field `sizeAverage = False`, the losses are instead summed.
### CrossEntropyLoss
This criterion combines `LogSoftMax` and `ClassNLLLoss` in one single class.
It is useful when training a classification problem with `n` classes.
If provided, the optional argument `weights` should be a 1D `Tensor`
assigning weight to each of the classes.
This is particularly useful when you have an unbalanced training set.
The `input` is expected to contain scores for each class:
`input` has to be a 2D `Tensor` of size `batch x n`.
This criterion expects a class index (0 to nClasses-1) as the
`target` for each value of a 1D tensor of size `n`
The loss can be described as:
loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j])))
= -x[class] + log(\sum_j exp(x[j]))
or in the case of the `weights` argument being specified:
loss(x, class) = weights[class] * (-x[class] + log(\sum_j exp(x[j])))
The losses are averaged across observations for each minibatch.
### NLLLoss
The negative log likelihood loss. It is useful to train a classication problem with n classes
```python
m = nn.LogSoftmax()
loss = nn.NLLLoss()
# input is of size nBatch x nClasses = 3 x 5
input = autograd.Variable(torch.randn(3, 5))
# each element in target has to have 0 <= value < nclasses
target = autograd.Variable(torch.LongTensor([1, 0, 4]))
output = loss(m(input), target)
output.backward()
```
If provided, the optional argument `weights` should be a 1D Tensor assigning
weight to each of the classes.
This is particularly useful when you have an unbalanced training set.
The input given through a forward call is expected to contain log-probabilities
of each class: input has to be a 2D Tensor of size minibatch x n
Obtaining log-probabilities in a neural network is easily achieved by
adding a `LogSoftmax` layer in the last layer.
You may use `CrossEntropyLoss` instead, if you prefer not to
add an extra layer.
The target that this loss expects is a class index (1 to the number of class)
The loss can be described as:
loss(x, class) = -x[class]
or in the case of the weights argument it is specified as follows:
loss(x, class) = -weights[class] * x[class]
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
weight | None | a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses".
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
Target Shape: [ * ] : Targets of size [minibatch], each value has to be 1 <= targets[i] <= nClasses
#### Members
Parameter | Description
--------- | -----------
weight | the class-weights given as input to the constructor
### NLLLoss2d
This is negative log likehood loss, but for image inputs. It computes NLL loss per-pixel.
```python
m = nn.Conv2d(16, 32, (3, 3)).float()
loss = nn.NLLLoss2d()
# input is of size nBatch x nClasses x height x width
input = autograd.Variable(torch.randn(3, 16, 10, 10))
# each element in target has to have 0 <= value < nclasses
target = autograd.Variable(torch.LongTensor(3, 8, 8).random_(0, 4))
output = loss(m(input), target)
output.backward()
```
This loss does not support per-class weights
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
size_average | True | By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch.
Target Shape: [ * , *, *] : Targets of size minibatch x height x width, each value has to be 1 <= targets[i] <= nClasses
### KLDivLoss
The [Kullback-Leibler divergence](http://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence) Loss
KL divergence is a useful distance measure for continuous distributions
and is often useful when performing direct regression over the space of
(discretely sampled) continuous output distributions.
As with ClassNLLLoss, the `input` given is expected to contain
_log-probabilities_, however unlike ClassNLLLoss, `input` is not
restricted to a 2D Tensor, because the criterion is applied element-wise.
This criterion expects a `target` `Tensor` of the same size as the
`input` `Tensor`.
The loss can be described as:
loss(x, target) = 1/n \sum(target_i * (log(target_i) - x_i))
By default, the losses are averaged for each minibatch over observations
*as well as* over dimensions. However, if the field
`sizeAverage` is set to `False`, the losses are instead summed.
### BCELoss
Creates a criterion that measures the Binary Cross Entropy
between the target and the output:
loss(o, t) = - 1/n sum_i (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
or in the case of the weights argument being specified:
loss(o, t) = - 1/n sum_i weights[i] * (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i]))
This is used for measuring the error of a reconstruction in for example
an auto-encoder. Note that the targets `t[i]` should be numbers between 0 and 1,
for instance, the output of an `nn.Sigmoid` layer.
By default, the losses are averaged for each minibatch over observations
*as well as* over dimensions. However, if the field `sizeAverage` is set
to `False`, the losses are instead summed.
### MarginRankingLoss
Creates a criterion that measures the loss given
inputs `x1`, `x2`, two 1D min-batch `Tensor`s,
and a label 1D mini-batch tensor `y` with values (`1` or `-1`).
If `y == 1` then it assumed the first input should be ranked higher
(have a larger value) than the second input, and vice-versa for `y == -1`.
The loss function for each sample in the mini-batch is:
loss(x, y) = max(0, -y * (x1 - x2) + margin)
if the internal variable `sizeAverage = True`,
the loss function averages the loss over the batch samples;
if `sizeAverage = False`, then the loss function sums over the batch samples.
By default, `sizeAverage` equals to `True`.
### HingeEmbeddingLoss
Measures the loss given an input `x` which is a 2D mini-batch tensor
and a labels `y`, a 1D tensor containg values (`1` or `-1`).
This is usually used for measuring whether two inputs are similar or dissimilar,
e.g. using the L1 pairwise distance, and is typically used for learning
nonlinear embeddings or semi-supervised learning.
{ x_i, if y_i == 1
loss(x, y) = 1/n {
{ max(0, margin - x_i), if y_i == -1
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
(the division by `n` can be avoided if one sets the internal variable `sizeAverage=False`).
The `margin` has a default value of `1`, or can be set in the constructor.
### MultiLabelMarginLoss
Creates a criterion that optimizes a multi-class multi-classification
hinge loss (margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
output `y` (which is a 2D `Tensor` of target class indices).
For each sample in the mini-batch:
loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x:size(1)
where `i == 0` to `x.size(0)`, `j == 0` to `y.size(0)`,
`y[j] != 0`, and `i != y[j]` for all `i` and `j`.
`y` and `x` must have the same size.
The criterion only considers the first non zero `y[j]` targets.
This allows for different samples to have variable amounts of target classes
### SmoothL1Loss
Creates a criterion that uses a squared term if the absolute
element-wise error falls below 1 and an L1 term otherwise.
It is less sensitive to outliers than the `MSELoss` and in some cases
prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick).
{ 0.5 * (x_i - y_i)^2, if |x_i - y_i| < 1
loss(x, y) = 1/n \sum {
{ |x_i - y_i| - 0.5, otherwise
`x` and `y` arbitrary shapes with a total of `n` elements each
the sum operation still operates over all the elements, and divides by `n`.
The division by `n` can be avoided if one sets the internal variable
`sizeAverage` to `False`
### SoftMarginLoss
Creates a criterion that optimizes a two-class classification
logistic loss between input `x` (a 2D mini-batch `Tensor`) and
target `y` (which is a tensor containing either `1`s or `-1`s).
loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x:nElement()
The normalization by the number of elements in the input can be disabled by
setting `self.sizeAverage` to `False`.
### MultiLabelSoftMarginLoss
Creates a criterion that optimizes a multi-label one-versus-all
loss based on max-entropy, between input `x` (a 2D mini-batch `Tensor`) and
target `y` (a binary 2D `Tensor`). For each sample in the minibatch:
loss(x, y) = - sum_i (y[i] log( exp(x[i]) / (1 + exp(x[i])))
+ (1-y[i]) log(1/(1+exp(x[i])))) / x:nElement()
where `i == 0` to `x.nElement()-1`, `y[i] in {0,1}`.
`y` and `x` must have the same size.
### CosineEmbeddingLoss
Creates a criterion that measures the loss given an input tensors x1, x2
and a `Tensor` label `y` with values 1 or -1.
This is used for measuring whether two inputs are similar or dissimilar,
using the cosine distance, and is typically used for learning nonlinear
embeddings or semi-supervised learning.
`margin` should be a number from `-1` to `1`, `0` to `0.5` is suggested.
If `margin` is missing, the default value is `0`.
The loss function for each sample is:
{ 1 - cos(x1, x2), if y == 1
loss(x, y) = {
{ max(0, cos(x1, x2) - margin), if y == -1
If the internal variable `sizeAverage` is equal to `True`,
the loss function averages the loss over the batch samples;
if `sizeAverage` is `False`, then the loss function sums over the
batch samples. By default, `sizeAverage = True`.
### MultiMarginLoss
Creates a criterion that optimizes a multi-class classification hinge loss
(margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and
output `y` (which is a 1D tensor of target class indices, `0` <= `y` <= `x.size(1)`):
For each mini-batch sample:
loss(x, y) = sum_i(max(0, (margin - x[y] + x[i]))^p) / x.size(0)
where `i == 0` to `x.size(0)` and `i != y`.
Optionally, you can give non-equal weighting on the classes by passing
a 1D `weights` tensor into the constructor.
The loss function then becomes:
loss(x, y) = sum_i(max(0, w[y] * (margin - x[y] - x[i]))^p) / x.size(0)
By default, the losses are averaged over observations for each minibatch.
However, if the field `sizeAverage` is set to `False`,
the losses are instead summed.

142
docs/nn_normalization.md Normal file
View File

@ -0,0 +1,142 @@
## Normalization layers
### BatchNorm1d
Applies Batch Normalization over a 2d input that is seen as a mini-batch of 1d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm1d(100)
# Without Learnable Parameters
m = nn.BatchNorm1d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | the size of each 1D input in the mini-batch
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features ] | 2D Tensor of nBatches x num_features
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension
### BatchNorm2d
Applies Batch Normalization over a 4d input that is seen as a mini-batch of 3d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm2d(100)
# Without Learnable Parameters
m = nn.BatchNorm2d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100, 35, 45))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | num_features from an expected input of size batch_size x num_features x height x width
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features , *, * ] | 4D Tensor of batch_size x num_features x height x width
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension
### BatchNorm3d
Applies Batch Normalization over a 5d input that is seen as a mini-batch of 4d inputs
```python
x - mean(x)
y = ----------------------------- * gamma + beta
standard_deviation(x) + eps
```
```python
# With Learnable Parameters
m = nn.BatchNorm3d(100)
# Without Learnable Parameters
m = nn.BatchNorm3d(100, affine=False)
input = autograd.Variable(torch.randn(20, 100, 35, 45, 10))
output = m(input)
```
The mean and standard-deviation are calculated per-dimension over
the mini-batches and gamma and beta are learnable parameter vectors
of size N (where N is the input size).
During training, this layer keeps a running estimate of its computed mean
and variance. The running sum is kept with a default momentum of 0.1
During evaluation, this running mean/variance is used for normalization.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_features | | num_features from an expected input of size batch_size x num_features x height x width
eps | 1e-5 | a value added to the denominator for numerical stability.
momentum | 0.1 | the value used for the running_mean and running_var computation.
affine | | a boolean value that when set to true, gives the layer learnable affine parameters.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , num_features , * , * , * ] | 5D Tensor of batch_size x num_features x depth x height x width
output | Same | Output has the same shape as input
#### Returns
a normalized tensor in the batch dimension

308
docs/nn_pooling.md Normal file
View File

@ -0,0 +1,308 @@
## Pooling Layers
### MaxPool1d
Applies a 1D max pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x W) and output (b x C x oW)
can be precisely described as:
output[b_i][c_i][w_i] = max_{k=1, K} input[b_i][c_i][stride_w * w_i + k)]
```
```python
# pool of size=3, stride=2
m = nn.MaxPool1d(3, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over
stride | | the stride of the window
padding | 0 | implicit padding to be added.
dilation | kernel_size | a parameter that controls the stride of elements in the window.
return_indices | False | if True, will return the indices along with the outputs. Useful when Unpooling later.
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , * ] | Input is minibatch x channels x iW
output | [ * , * , * ] | Output shape = minibatch x channels x floor((iW + 2*padW - kernel_size) / stride + 1)
### MaxPool2d
Applies a 2D max pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
can be precisely described as:
output[b_i][c_i][h_i][w_i] = max_{{kh=1, KH}, {kw=1, kW}} input[b_i][c_i][stride_h * h_i + kH)][stride_w * w_i + kW)]
```
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool2d(3, stride=2)
# pool of non-square window
m = nn.MaxPool2d((3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### MaxPool3d
Applies a 3D max pooling over an input signal composed of several input
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool3d(3, stride=2)
# pool of non-square window
m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
dilation | 1 | a parameter that controls the stride of elements in the window. Can be a single number or a tuple.
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool3d .
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### MaxUnpool2d
Computes the inverse operation of MaxPool2d
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool2d(2, stride=2, return_indices = True)
mu = nn.MaxUnpool2d(2, stride=2)
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output, indices = m(input)
unpooled_output = mu.forward(output, indices)
# exact output size can be also specified as an argument
input = autograd.Variable(torch.randn(1, 16, 11, 11))
downsample = nn.MaxPool2d(3, 3, return_indices=True)
upsample = nn.MaxUnpool2d(3, 3)
h, indices = downsample(input)
output = upsample(h, indices, output_size=input.size())
```
MaxPool2d is not invertible, as the locations of the max locations are lost.
MaxUnpool2d takes in as input the output of MaxPool2d and the indices of the Max locations
and computes the inverse.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape is minibatch x channels x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW, or as specified to the call.
### MaxUnpool3d
Computes the inverse operation of MaxPool3d
```python
# pool of square window of size=3, stride=2
m = nn.MaxPool3d(3, stride=2, return_indices = True)
mu = nn.MaxUnpool3d(3, stride=2)
input, indices = autograd.Variable(torch.randn(20, 16, 50, 32, 15))
output = m(input)
unpooled_output = m2.forward(output, indices)
```
MaxPool3d is not invertible, as the locations of the max locations are lost.
MaxUnpool3d takes in as input the output of MaxPool3d and the indices of the Max locations
and computes the inverse.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the max window. Can be a single number k (for a square kernel of k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
padding | 0 | implicit padding that was added to the input. Can be a single number or a tuple.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x padT x (iT - 1) * sT + kT x padH x (iH - 1) * sH + kH x padW x (iW - 1) * sW + kW
### AvgPool2d
Applies a 2D average pooling over an input signal composed of several input
```python
The output value of the layer with input (b x C x H x W) and output (b x C x oH x oW)
can be precisely described as:
output[b_i][c_i][h_i][w_i] = (1 / K) * sum_{kh=1, KH} sum_{kw=1, kW} input[b_i][c_i][stride_h * h_i + kh)][stride_w * w_i + kw)]
```
```python
# pool of square window of size=3, stride=2
m = nn.AvgPool2d(3, stride=2)
# pool of non-square window
m = nn.AvgPool2d((3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
padding | 0 | implicit padding to be added. Can be a single number or a tuple.
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### AvgPool3d
Applies a 3D average pooling over an input signal composed of several input
```python
# pool of square window of size=3, stride=2
m = nn.AvgPool3d(3, stride=2)
# pool of non-square window
m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
input = autograd.Variable(torch.randn(20, 16, 50,44, 31))
output = m(input)
```
planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a average over. Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (st x sh x sw).
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, *, * ] | Input is minibatch x channels x iT x iH x iW
output | [ * , * , *, *, * ] | Output shape = minibatch x channels x floor((iT + 2*padT - kT) / sT + 1) x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### FractionalMaxPool2d
Applies a 2D fractional max pooling over an input signal composed of several input
```python
# pool of square window of size=3, and target output size 13x12
m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
# pool of square window and target output size being half of input image size
m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
Fractiona MaxPooling is described in detail in the paper ["Fractional Max-Pooling" by Ben Graham](http://arxiv.org/abs/1412.6071)
The max-pooling operation is applied in kHxkW regions by a stochastic
step size determined by the target output size.
The number of output features is equal to the number of input planes.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window to take a max over. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
output_size | | the target output size of the image of the form oH x oW. Can be a tuple (oH, oW) or a single number oH for a square image oH x oH
output_ratio | | If one wants to have an output size as a ratio of the input size, this option can be given. This has to be a number or tuple in the range (0, 1)
return_indices | False | if True, will return the indices along with the outputs. Useful to pass to nn.MaxUnpool2d .
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)
### LPPool2d
Applies a 2D power-average pooling over an input signal composed of several input
```python
# power-2 pool of square window of size=3, stride=2
m = nn.LPPool2d(2, 3, stride=2)
# pool of non-square window of power 1.2
m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
input = autograd.Variable(torch.randn(20, 16, 50, 32))
output = m(input)
```
planes.
On each window, the function computed is: f(X) = pow(sum(pow(X, p)), 1/p)
At p = infinity, one gets Max Pooling
At p = 1, one gets Average Pooling
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
kernel_size | | the size of the window. Can be a single number k (for a square kernel of k x k) or a tuple (kh x kw)
stride | kernel_size | the stride of the window. Can be a single number s or a tuple (sh x sw).
ceil_mode | | when True, will use "ceil" instead of "floor" to compute the output shape
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ * , * , *, * ] | Input is minibatch x channels x iH x iW
output | [ * , * , *, * ] | Output shape = minibatch x channels x floor((iH + 2*padH - kH) / sH + 1) x floor((iW + 2*padW - kW) / sW + 1)

346
docs/nn_recurrent.md Normal file
View File

@ -0,0 +1,346 @@
## Recurrent layers
### RNN
Applies a multi-layer Elman RNN with tanh or ReLU non-linearity to an input sequence.
```python
h_t = tanh(w_ih * x_t + b_ih + w_hh * h_(t-1) + b_hh)
```
```python
rnn = nn.RNN(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, h0)
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, and `x_t` is the hidden
state of the previous layer at time t or `input_t` for the first layer.
If nonlinearity='relu', then ReLU is used instead of tanh.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_k) from the last layer of the RNN, for each k
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for k=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer, of shape (input_size x hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer, of shape (hidden_size x hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer, of shape (hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer, of shape (hidden_size)
### LSTM
Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
```python
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
f_t = sigmoid(W_if x_t + b_if + W_hf h_(t-1) + b_hf)
g_t = tanh(W_ig x_t + b_ig + W_hc h_(t-1) + b_hg)
o_t = sigmoid(W_io x_t + b_io + W_ho h_(t-1) + b_ho)
c_t = f_t * c_(t-1) + i_t * c_t
h_t = o_t * tanh(c_t)
```
```python
rnn = nn.LSTM(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
c0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, (h0, c0))
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, `c_t` is the cell state at time t,
`x_t` is the hidden state of the previous layer at time t or input_t for the first layer,
and `i_t`, `f_t`, `g_t`, `o_t` are the input, forget, cell, and out gates, respectively.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
c_0 | | A (num_layers x batch x hidden_size) tensor containing the initial cell state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
c_n | A (num_layers x batch x hidden_size) tensor containing the cell state for t=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
### GRU
Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
```python
r_t = sigmoid(W_ir x_t + b_ir + W_hr h_(t-1) + b_hr)
i_t = sigmoid(W_ii x_t + b_ii + W_hi h_(t-1) + b_hi)
n_t = tanh(W_in x_t + resetgate * W_hn h_(t-1))
h_t = (1 - i_t) * n_t + i_t * h_(t-1)
```
```python
rnn = nn.GRU(10, 20, 2)
input = Variable(torch.randn(5, 3, 10))
h0 = Variable(torch.randn(2, 3, 20))
output, hn = rnn(input, h0)
```
For each element in the input sequence, each layer computes the following
function:
where `h_t` is the hidden state at time t, `x_t` is the hidden
state of the previous layer at time t or input_t for the first layer,
and `r_t`, `i_t`, `n_t` are the reset, input, and new gates, respectively.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
num_layers | | the size of the convolving kernel.
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
batch_first | | If True, then the input tensor is provided as (batch, seq, feature)
dropout | | If non-zero, introduces a dropout layer on the outputs of each RNN layer
bidirectional | False | If True, becomes a bidirectional RNN.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (seq_len x batch x input_size) tensor containing the features of the input sequence.
h_0 | | A (num_layers x batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
output | A (seq_len x batch x hidden_size) tensor containing the output features (h_t) from the last layer of the RNN, for each t
h_n | A (num_layers x batch x hidden_size) tensor containing the hidden state for t=seq_len
#### Members
Parameter | Description
--------- | -----------
weight_ih_l[k] | the learnable input-hidden weights of the k-th layer (W_ir|W_ii|W_in), of shape (input_size x 3*hidden_size)
weight_hh_l[k] | the learnable hidden-hidden weights of the k-th layer (W_hr|W_hi|W_hn), of shape (hidden_size x 3*hidden_size)
bias_ih_l[k] | the learnable input-hidden bias of the k-th layer (b_ir|b_ii|b_in), of shape (3*hidden_size)
bias_hh_l[k] | the learnable hidden-hidden bias of the k-th layer (W_hr|W_hi|W_hn), of shape (3*hidden_size)
### RNNCell
An Elman RNN cell with tanh or ReLU non-linearity.
```python
h' = tanh(w_ih * x + b_ih + w_hh * h + b_hh)
```
```python
rnn = nn.RNNCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx = rnn(input, hx)
output[i] = hx
```
If nonlinearity='relu', then ReLU is used in place of tanh.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
nonlinearity | 'tanh' | The non-linearity to use ['tanh'|'relu'].
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
### LSTMCell
A long short-term memory (LSTM) cell.
```python
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
f = sigmoid(W_if x + b_if + W_hf h + b_hf)
g = tanh(W_ig x + b_ig + W_hc h + b_hg)
o = sigmoid(W_io x + b_io + W_ho h + b_ho)
c' = f * c + i * c
h' = o * tanh(c_t)
```
```python
rnn = nn.LSTMCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
cx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx, cx = rnn(input, (hx, cx))
output[i] = hx
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
c' | A (batch x hidden_size) tensor containing the next cell state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)
### GRUCell
A gated recurrent unit (GRU) cell
```python
r = sigmoid(W_ir x + b_ir + W_hr h + b_hr)
i = sigmoid(W_ii x + b_ii + W_hi h + b_hi)
n = tanh(W_in x + resetgate * W_hn h)
h' = (1 - i) * n + i * h
```
```python
rnn = nn.RNNCell(10, 20)
input = Variable(torch.randn(6, 3, 10))
hx = Variable(torch.randn(3, 20))
output = []
for i in range(6):
hx = rnn(input, hx)
output[i] = hx
```
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
input_size | | The number of expected features in the input x
hidden_size | | The number of features in the hidden state h
bias | True | If False, then the layer does not use bias weights b_ih and b_hh.
#### Inputs
Parameter | Default | Description
--------- | ------- | -----------
input | | A (batch x input_size) tensor containing input features
hidden | | A (batch x hidden_size) tensor containing the initial hidden state for each element in the batch.
#### Outputs
Parameter | Description
--------- | -----------
h' | A (batch x hidden_size) tensor containing the next hidden state for each element in the batch
#### Members
Parameter | Description
--------- | -----------
weight_ih | the learnable input-hidden weights, of shape (input_size x hidden_size)
weight_hh | the learnable hidden-hidden weights, of shape (hidden_size x hidden_size)
bias_ih | the learnable input-hidden bias, of shape (hidden_size)
bias_hh | the learnable hidden-hidden bias, of shape (hidden_size)

37
docs/nn_sparse.md Normal file
View File

@ -0,0 +1,37 @@
## Sparse layers
### Embedding
A simple lookup table that stores embeddings of a fixed dictionary and size
```python
# an Embedding module containing 10 tensors of size 3
embedding = nn.Embedding(10, 3)
# a batch of 2 samples of 4 indices each
input = torch.LongTensor([[1,2,4,5],[4,3,2,9]])
print(embedding(input))
# example with padding_idx
embedding = nn.Embedding(10, 3, padding_idx=0)
input = torch.LongTensor([[0,2,0,5]])
print(embedding(input))
```
This module is often used to store word embeddings and retrieve them using indices.
The input to the module is a list of indices, and the output is the corresponding
word embeddings.
#### Constructor Arguments
Parameter | Default | Description
--------- | ------- | -----------
num_embeddings | | size of the dictionary of embeddings
embedding_dim | | the size of each embedding vector
padding_idx | None | If given, pads the output with zeros whenever it encounters the index.
max_norm | None | If given, will renormalize the embeddings to always have a norm lesser than this
norm_type | | The p of the p-norm to compute for the max_norm option
scale_grad_by_freq | | if given, this will scale gradients by the frequency of the words in the dictionary.
#### Expected Shape
| Shape | Description
------ | ----- | ------------
input | [ *, * ] | Input is a 2D mini_batch LongTensor of m x n indices to extract from the Embedding dictionary
output | [ * , *, * ] | Output shape = m x n x embedding_dim

114
docs/optim.md Normal file
View File

@ -0,0 +1,114 @@
# torch.optim
The Optim package in Torch is targeted for one to optimize their neural networks
using a wide variety of optimization methods such as SGD, Adam etc.
Currently, the following optimization methods are supported, typically with
options such as weight decay and other bells and whistles.
- SGD `(params, lr=required, momentum=0, dampening=0)`
- AdaDelta `(params, rho=0.9, eps=1e-6, weight_decay=0)`
- Adagrad `(params, lr=1e-2, lr_decay=0, weight_decay=0)`
- Adam `(params, lr=1e-2, betas=(0.9, 0.999), epsilon=1e-8, weight_decay=0)`
- AdaMax `(params, lr=1e-2, betas=(0.9, 0.999), eps=1e-38, weight_decay=0)`
- Averaged SGD `(params, lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0)`
- RProp `(params, lr=1e-2, etas=(0.5, 1.2), step_sizes=(1e-6, 50))`
- RMSProp `(params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0)`
The usage of the Optim package itself is as follows.
1. Construct an optimizer
2. Use `optimizer.step(...)` to optimize.
- Call `optimizer.zero_grad()` to zero out the gradient buffers when appropriate
## 1. Constructing the optimizer
One first constructs an `Optimizer` object by giving it a list of parameters
to optimize, as well as the optimizer options,such as learning rate, weight decay, etc.
Examples:
`optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)`
`optimizer = optim.Adam([var1, var2], lr = 0.0001)`
### Per-parameter options
In a more advanced usage, one can specify per-layer options by passing each parameter group along with it's custom options.
**__Any parameter group that does not have an attribute defined will use the default attributes.__**
This is very useful when one wants to specify per-layer learning rates for example.
Example:
`optim.SGD([{'params': model1.parameters()}, {'params': model2.parameters(), 'lr': 1e-3}, lr=1e-2, momentum=0.9)`
`model1`'s parameters will use the default learning rate of `1e-2` and momentum of `0.9`
`model2`'s parameters will use a learning rate of `1e-3`, and the default momentum of `0.9`
Then, you can use the optimizer by calling `optimizer.zero_grad()` and `optimizer.step(...)`. Read the next sections.
## 2. Taking an optimization step using `Optimizer.step(...)`
The step function has the following two signatures:
### a. `Optimizer.step(closure)`
The `step` function takes a user-defined closure that computes f(x) and returns the loss.
The closure needs to do the following:
- Optimizer.zero_grad()
- Compute the loss
- Call loss.backward()
- return the loss
Example 1: training a neural network
```python
# Example 1: training a neural network with optimizer.step(closure)
net = MNISTNet()
criterion = ClassNLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
for data in data_batches:
input, target = data
def closure():
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
return loss
optimizer.step(closure)
```
Notes: Why is this required? Why cant we simply have the optimizer take the parameters and grads?
Some optimization algorithms such as Conjugate Gradient and LBFGS need to evaluate their function
multiple times. For such optimization methods, the function (i.e. the closure) has to be defined.
### b. `Optimizer.step()`
This is a simplified usage that supports most, but not all optimization algorithms. For example, it does not support LBFGS or Conjugate Gradient.
The usage for this is to simply call the function after the backward() is called on your model.
Example 2: training a neural network
```python
# Example 2: training a neural network with optimizer.step()
net = MNISTNet()
criterion = ClassNLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
for data in data_batches:
input, target = data
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
```

407
docs/tensor.md Normal file
View File

@ -0,0 +1,407 @@
# Tensors
A `Tensor` is a potentially multi-dimensional matrix.
The number of dimensions is unlimited.
The `Tensor` set of classes are probably the most important class in
`torch`. Almost every package depends on these classes. They are *__the__*
class for handling numeric data. As with pretty much anything in
[torch], tensors are serializable with `torch.save` and `torch.load`
There are 7 Tensor classes in torch:
- `torch.FloatTensor` : Signed 32-bit floating point tensor
- `torch.DoubleTensor` : Signed 64-bit floating point tensor
- `torch.ByteTensor` : Signed 8-bit integer tensor
- `torch.CharTensor` : Unsigned 8-bit integer tensor
- `torch.ShortTensor` : Signed 16-bit integer tensor
- `torch.IntTensor` : Signed 32-bit integer tensor
- `torch.LongTensor` : Signed 64-bit integer tensor
The data in these tensors lives on the system memory connected to your CPU.
Most numeric operations are implemented _only_ for `FloatTensor` and `DoubleTensor`.
Other Tensor types are useful if you want to save memory space or specifically
do integer operations.
The number of dimensions of a `Tensor` can be queried by
`ndimension()` or `dim()`. Size of the `i-th` dimension is
returned by `size(i)`. A tuple containing the size of all the dimensions
can be returned by `size()`.
```python
import torch
# allocate a matrix of shape 3x4
a = torch.FloatTensor(3, 4)
print(a)
# convert this into a LongTensor
b = a.long()
print(b)
# print the size of the tensor
print(a.size())
# print the number of dimensions
print(a.dim())
```
These tensors can be converted to numpy arrays very efficiently
with zero memory copies.
For this, the two provided functions are `.numpy()` and `torch.from_numpy()`
```python
import numpy as np
# convert to numpy
c = a.numpy()
print(type(c))
```
When using GPUs, each of the classes above has an equivalent
class such as: `torch.cuda.FloatTensor`, `torch.cuda.LongTensor`, etc.
When one allocates a CUDA tensor, the data in these tensors lives in the
GPU memory.
One can seamlessly transfer a tensor from the CPU to the GPU, as well as
between different GPUs on your machine.
Apart from the above 7 tensor types, there is one additional tensor type on the GPU
- `torch.cuda.HalfTensor` : Signed 16-bit floating point tensor
```python
import torch.cuda
# allocate a matrix of shape 3x4
a = torch.cuda.FloatTensor(3, 4)
print(a)
# transfer this to the CPU
b = a.cpu()
print(b)
# transfer this back to the GPU-1
a = b.cuda()
print(a)
# transfer this to GPU-2
b = a.cuda(1)
```
## Internal data representation
The actual data of a `Tensor` is contained into a
`Storage`. It can be accessed using
`storage()`. While the memory of a
`Tensor` has to be contained in this unique `Storage`, it might
not be contiguous: the first position used in the `Storage` is given
by `storage_offset()` (starting at `0`).
And the _jump_ needed to go from one element to another
element in the `i-th` dimension is given by
`stride(i-1)`. See the code example for an illustration.
```python
# given a 3d tensor
x = torch.FloatTensor(7,7,7)
# accessing the element `(3,4,5)` can be done by
x[3 - 1][4 - 1][5 - 1]
# or equivalently (but slowly!)
x.storage()[x.storageOffset()
+ (3 - 1) * x.stride(0)
+ (4 - 1) * x.stride(1)
+ (5 - 1) * x.stride(2)]
```
One could say that a `Tensor` is a particular way of _viewing_ a
`Storage`: a `Storage` only represents a chunk of memory, while the
`Tensor` interprets this chunk of memory as having dimensions:
```python
# a tensor interprets a chunk of memory as having dimensions
>>> x = torch.Tensor(4,5)
>>> s = x.storage()
>>> for i in range(s.size()): # fill up the Storage
>>> s[i] = i
# s is interpreted by x as a 2D matrix
>>> print(x)
1 2 3 4 5
6 7 8 9 10
11 12 13 14 15
16 17 18 19 20
[torch.FloatTensor of dimension 4x5]
```
Note also that in Torch7 ___elements in the same row___ [elements along the __last__ dimension]
are contiguous in memory for a matrix [tensor]:
This is exactly like in `C` and `numpy` (and not `Fortran`).
## Default Tensor type
For convenience, _an alias_ `torch.Tensor` is provided, which allows the user to write
type-independent scripts, which can then ran after choosing the desired Tensor type with
a call like
`torch.set_default_tensor_type('torch.DoubleTensor')`
By default, the alias points to `torch.FloatTensor`.
## Efficient memory management
_All_ tensor operations post-fixed with an underscore (for example `.fill_`)
do _not_ make any memory copy. All these methods transform the existing tensor.
Tensor methods such as `narrow` and `select` return a new tensor referencing _the same storage_.
This magical behavior is internally obtained by good usage of the `stride()` and
`storage_offset()`. See the code example illustrating this.
```python
>>> x = torch.Tensor(5).zero_()
>>> print(x)
0
0
0
0
0
[torch.FloatTensor of dimension 5]
>>> x.narrow(0, 1, 2).fill_(1)
>>> # narrow() returns a Tensor referencing the same Storage as x
>>> print(x)
0
1
1
1
0
[torch.FloatTensor of dimension 5]
```
If you really need to copy a `Tensor`, you can use the `copy_()` method:
```python
# making a copy of a tensor
y = x.new(x.size()).copy_(x)
y = x.clone()
```
Or the convenience method `clone()`
We now describe all the methods for `Tensor`. If you want to specify the Tensor type,
just replace `Tensor` by the name of the Tensor variant (like `CharTensor`).
## Constructors ##
Tensor constructors, create new Tensor object, optionally, allocating
new memory. By default the elements of a newly allocated memory are
not initialized, therefore, might contain arbitrary numbers. Here are
several ways to construct a new `Tensor`.
### torch.Tensor() ###
Returns an empty tensor.
### torch.Tensor(tensor) ###
Returns a new tensor which reference the same `Storage` than the given `tensor`.
The `size`, `stride`, and `storage_offset` are the same than the given tensor.
The new `Tensor` is now going to "view" the same `storage`
as the given `tensor`. As a result, any modification in the elements
of the `Tensor` will have a impact on the elements of the given
`tensor`, and vice-versa. No memory copy!
```python
>>> x = torch.Tensor(2,5).fill_(3.14)
>>> x
3.1400 3.1400 3.1400 3.1400 3.1400
3.1400 3.1400 3.1400 3.1400 3.1400
[torch.FloatTensor of dimension 2x5]
>>> y = torch.Tensor(x)
>>> y
3.1400 3.1400 3.1400 3.1400 3.1400
3.1400 3.1400 3.1400 3.1400 3.1400
[torch.FloatTensor of dimension 2x5]
>>> y.zero_()
>>> x # elements of x are the same as y!
0 0 0 0 0
0 0 0 0 0
[torch.FloatTensor of dimension 2x5]
```
### torch.Tensor(sz1 [,sz2 [,sz3 [,sz4 [,sz5 ...]]]]]) ###
Create a tensor of the given sizes.
The tensor size will be `sz1 x sz2 x sx3 x sz4 x sz5 x ...`.
### torch.Tensor(sizes) ###
Create a tensor of any number of dimensions. `sizes` gives the size in each dimension of
the tensor and is of type `torch.Size`.
```python
Example, create a 4D 4x4x3x2 tensor:
x = torch.Tensor(torch.Size([4,4,3,2]))
```
### torch.Tensor(storage) ###
Returns a tensor which uses the existing `Storage` starting at a storage offset of 0.
### torch.Tensor(sequence) ###
One can create a tensor from a python sequence.
For example, you can create a `Tensor` from a `list` or a `tuple`
```python
# create a 2d tensor from a list of lists
>>> torch.Tensor([[1,2,3,4], [5,6,7,8]])
1 2 3 4
5 6 7 8
[torch.FloatTensor of dimension 2x4]
```
### torch.Tensor(ndarray) ###
Creates a `Tensor` from a NumPy `ndarray`.
If the `dtype` of the `ndarray` is the same as the type of the `Tensor` being created,
The underlying memory of both are shared, i.e. if the value of an element
in the `ndarray` is changed, the corresponding value in the `Tensor` changes,
and vice versa.
```python
# create a ndarray of dtype=int64
>>> a = np.random.randint(2, size=10)
>>> a
array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])
# create a LongTensor. Since they are the same type (int64), the memory is shared
>>> b = torch.LongTensor(a)
0
0
1
1
0
1
1
0
0
0
[torch.LongTensor of size 10]
>>> b[3] = 100
>>> print(a[3])
100
# now create an IntTensor from the same ndarray.
# The memory is not shared in this case as the dtype=int64 != IntTensor (int32)
>>> b = torch.IntTensor(a)
>>> b[3] = 30000
>>> print(a[3])
100
# a did not change to the value 30000
```
## NumPy Conversion ##
### torch.from_numpy(ndarray)
This is a convenience function similar to the constructor above.
Given a numpy `ndarray`, it constructs a torch `Tensor` of the same `dtype`
as the numpy array.
For example, passing in an ndarray of dtype=float64 will create a torch.DoubleTensor
### Tensor.numpy()
This is a member function on a tensor that converts a torch `Tensor` to a
numpy `ndarray`. The memory of the data of both objects is shared.
Hence, changing a value in the `Tensor` will change the corresponding value in
the `ndarray` and vice versa.
```python
>>> a = torch.randn(3,4)
>>> b = a.numpy() # creates a numpy array with dtype=float32 in this case
>>> print(a)
-1.0453 1.4730 -1.8990 -0.7763
1.8155 1.4004 -1.5286 1.0420
0.6551 1.0258 0.1152 -0.3239
[torch.FloatTensor of size 3x4]
>>> print(b)
[[-1.04525673 1.4730444 -1.89899576 -0.77626842]
[ 1.81549406 1.40035892 -1.5286355 1.04199517]
[ 0.6551016 1.02575183 0.11520521 -0.32391372]]
>>> a[2][2] = 1000
>>> print(b)
[[ -1.04525673e+00 1.47304440e+00 -1.89899576e+00 -7.76268423e-01]
[ 1.81549406e+00 1.40035892e+00 -1.52863550e+00 1.04199517e+00]
[ 6.55101597e-01 1.02575183e+00 1.00000000e+03 -3.23913723e-01]]
# notice that b[2][2] has changed to the value 1000 too.
```
### torch.is_tensor(obj)
Returns True if the passed-in object is a `Tensor` (of any type). Returns `False` otherwise.
### torch.is_storage
Returns True if the passed-in object is a `Storage` (of any type). Returns `False` otherwise.
### torch.expand_as
### torch.expand
### torch.view
### torch.view_as
### torch.permute
### torch.pin_memory
### copy
### split
### chunk
### tolist
### repeat
### unsqueeze
### unsqueeze_
### add, iadd, sub, isub, mul, imul, matmul, div, rdiv, idiv, mod, neg
## GPU Semantics ##
When you create a `torch.cuda.*Tensor`, it is allocated on the current GPU.
However, you could allocate it on another GPU as well, using the `with torch.cuda.device(id)` context.
All allocations within this context will be placed on the GPU `id`.
Once `Tensor`s are allocated, you can do operations on them from any GPU context, and the results will be placed on the same device as where the source `Tensor` is located.
For example if Tensor `a` and `b` are on GPU-2, but the GPU-1 is the current device.
If one does `c = a + b`, then `c` will be on GPU-2, regardless of what the current device is.
Cross-GPU operations are not allowed. The only Cross-GPU operation allowed is `copy`.
If `a` is on GPU-1 and `b` is on GPU-2, then `c = a + b` will result in an error.
See the example for more clarity on these semantics.
```python
# Tensors are allocated on GPU 1 by default
x = torch.cuda.FloatTensor(1)
# x.get_device() == 0
y = torch.FloatTensor(1).cuda()
# y.get_device() == 0
with torch.cuda.device(1):
# allocates a tensor on GPU 2
a = torch.cuda.FloatTensor(1)
# transfers a tensor from CPU to GPU-2
b = torch.FloatTensor(1).cuda()
# a.get_device() == b.get_device() == 1
z = x + y
# z.get_device() == 1
# even within a context, you can give a GPU id to the .cuda call
c = torch.randn(2).cuda(2)
# c.get_device() == 2
```

2460
docs/tensor_ref.md Normal file

File diff suppressed because it is too large Load Diff

83
docs/torch.md Normal file
View File

@ -0,0 +1,83 @@
# torch
```python
# load torch with
import torch
```
```python
# load the CUDA features of torch with
import torch.cuda
```
__torch__ is the main package where data structures for multi-dimensional
tensors and mathematical operations over these are defined.
Additionally, it provides many utilities for efficient serializing of
Tensors and arbitrary types, and other useful utilities.
It has a CUDA counterpart, that enables you to run your tensor computations
on an NVIDIA GPU with compute capability >= 2.0.
## Multi-core
### torch.get_num_threads()
Gets the number of OpenMP threads that will be used for parallelizing CPU operations
### torch.set_num_threads(n)
Sets the number of OpenMP threads to use for parallelizing CPU operations
## Serialization
### torch.save(object, file)
This function pickles a Python object to the `file`. `file` is either a filename or a file handle.
`object` can be a picklable python object, including `torch` `Tensor`s, autograd `Variable`, nn `Module`s etc.
When a group of `torch` `Tensor`s are saved together, and if any of them share the same storages, then this sharing is preserved during saving and loading back.
### torch.load(file)
This function unpickles objects that have been pickled with `torch.save`
## Random Numbers
### torch.get_rng_state()
Gets the current state of the torch Random Number Generator.
This can be passed in the future to `torch.set_rng_state` to restore the current RNG state.
### torch.set_rng_state(state)
Sets the current state of the torch Random Number Generator to the given `state`.
### torch.manual_seed(number)
Sets the initial seed of the random number generator to a given number.
### torch.initial_seed()
Returns the number that is the initial seed to the Random Number Generator
## CUDA
### torch.cuda.is_available()
Returns `True` if CUDA is available and usable. Returns `False` otherwise.
### torch.cuda.device_count()
Returns the number of CUDA devices on the system.
### torch.cuda.current_device()
Returns the device index of the current default CUDA device.
### torch.cuda.synchronize()
This function issues a `cudaDeviceSynchronize` on the current device, and hence waits for all in-flight CUDA computation to finish.
### torch.cuda.current_stream()
Returns the handle to the current stream of the CUDA context.

View File

@ -9,8 +9,9 @@ import shutil
import sys
import os
# TODO: make this more robust
WITH_CUDA = os.path.exists('/Developer/NVIDIA/CUDA-7.5/include') or os.path.exists('/usr/local/cuda/include')
CUDA_HOME = os.getenv('CUDA_HOME', '/usr/local/cuda')
WITH_CUDA = os.path.exists(CUDA_HOME)
WITH_CUDNN = WITH_CUDA
DEBUG = False
################################################################################
@ -76,12 +77,18 @@ class build_ext(setuptools.command.build_ext.build_ext):
# cwrap depends on pyyaml, so we can't import it earlier
from tools.cwrap import cwrap
from tools.cwrap.plugins.THPPlugin import THPPlugin
from tools.cwrap.plugins.THPLongArgsPlugin import THPLongArgsPlugin
from tools.cwrap.plugins.ArgcountSortPlugin import ArgcountSortPlugin
from tools.cwrap.plugins.AutoGPU import AutoGPU
from tools.cwrap.plugins.BoolOption import BoolOption
from tools.cwrap.plugins.KwargsPlugin import KwargsPlugin
from tools.cwrap.plugins.NullableArguments import NullableArguments
from tools.cwrap.plugins.CuDNNPlugin import CuDNNPlugin
cwrap('torch/csrc/generic/TensorMethods.cwrap', plugins=[
AutoGPU(condition='IS_CUDA'), THPLongArgsPlugin(), THPPlugin(),
ArgcountSortPlugin(),
BoolOption(), THPPlugin(), AutoGPU(condition='IS_CUDA'),
ArgcountSortPlugin(), KwargsPlugin(),
])
cwrap('torch/csrc/cudnn/cuDNN.cwrap', plugins=[
CuDNNPlugin(), NullableArguments()
])
# It's an old-style class in Python 2.7...
setuptools.command.build_ext.build_ext.run(self)
@ -102,10 +109,16 @@ class install(setuptools.command.install.install):
class clean(distutils.command.clean.clean):
def run(self):
import glob
with open('.gitignore', 'r') as f:
ignores = f.read()
for glob in filter(bool, ignores.split('\n')):
shutil.rmtree(glob, ignore_errors=True)
for wildcard in filter(bool, ignores.split('\n')):
for filename in glob.glob(wildcard):
try:
os.remove(filename)
except OSError:
shutil.rmtree(filename, ignore_errors=True)
# It's an old-style class in Python 2.7...
distutils.command.clean.clean.run(self)
@ -141,6 +154,7 @@ main_libraries = ['TH', 'shm']
main_sources = [
"torch/csrc/Module.cpp",
"torch/csrc/Generator.cpp",
"torch/csrc/Size.cpp",
"torch/csrc/Exceptions.cpp",
"torch/csrc/Tensor.cpp",
"torch/csrc/Storage.cpp",
@ -148,6 +162,10 @@ main_sources = [
"torch/csrc/utils.cpp",
"torch/csrc/allocators.cpp",
"torch/csrc/serialization.cpp",
"torch/csrc/autograd/init.cpp",
"torch/csrc/autograd/variable.cpp",
"torch/csrc/autograd/function.cpp",
"torch/csrc/autograd/engine.cpp",
]
try:
@ -158,27 +176,40 @@ except ImportError:
pass
if WITH_CUDA:
if platform.system() == 'Darwin':
cuda_path = '/Developer/NVIDIA/CUDA-7.5'
cuda_include_path = cuda_path + '/include'
cuda_lib_path = cuda_path + '/lib'
else:
cuda_path = '/usr/local/cuda'
cuda_include_path = cuda_path + '/include'
cuda_lib_path = cuda_path + '/lib64'
cuda_lib_dirs = ['lib64', 'lib']
cuda_include_path = os.path.join(CUDA_HOME, 'include')
for lib_dir in cuda_lib_dirs:
cuda_lib_path = os.path.join(CUDA_HOME, lib_dir)
if os.path.exists(cuda_lib_path):
break
include_dirs.append(cuda_include_path)
extra_link_args.append('-L' + cuda_lib_path)
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
extra_compile_args += ['-DWITH_CUDA']
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
main_libraries += ['THC']
main_sources += [
"torch/csrc/cuda/Module.cpp",
"torch/csrc/cuda/Storage.cpp",
"torch/csrc/cuda/Stream.cpp",
"torch/csrc/cuda/Tensor.cpp",
"torch/csrc/cuda/AutoGPU.cpp",
"torch/csrc/cuda/utils.cpp",
"torch/csrc/cuda/serialization.cpp",
]
if WITH_CUDNN:
main_libraries += ['cudnn']
main_sources += [
"torch/csrc/cudnn/Module.cpp",
"torch/csrc/cudnn/Conv.cpp",
"torch/csrc/cudnn/cuDNN.cpp",
"torch/csrc/cudnn/Types.cpp",
"torch/csrc/cudnn/Handles.cpp",
"torch/csrc/cudnn/CppWrapper.cpp",
]
extra_compile_args += ['-DWITH_CUDNN']
if DEBUG:
extra_compile_args += ['-O0', '-g']
extra_link_args += ['-O0', '-g']

View File

@ -4,12 +4,22 @@ from itertools import product
from copy import deepcopy
import torch
import torch.cuda
from torch.autograd import Variable, Function
torch.set_default_tensor_type('torch.DoubleTensor')
torch.manual_seed(123)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(123)
TEST_NUMPY = True
try:
import numpy
except ImportError:
TEST_NUMPY = False
def get_cpu_type(t):
assert t.__module__ == 'torch.cuda'
return getattr(torch, t.__class__.__name__)
@ -78,7 +88,7 @@ class TestCase(unittest.TestCase):
if torch.is_tensor(x) and torch.is_tensor(y):
max_err = 0
super(TestCase, self).assertEqual(x.size().tolist(), y.size().tolist())
super(TestCase, self).assertEqual(x.size(), y.size())
for index in iter_indices(x):
max_err = max(max_err, abs(x[index] - y[index]))
self.assertLessEqual(max_err, prec, message)

View File

@ -7,6 +7,7 @@ import torch
import torch.cuda
from torch.autograd import Variable
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
import torch.backends.cudnn
# tarfile module tries to obtain a file object name in python 3.3
if sys.version_info[:2] == (3, 3):
@ -15,6 +16,8 @@ else:
TemporaryFile = tempfile.TemporaryFile
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
PRECISION = 1e-5
module_tests = [
@ -24,6 +27,13 @@ module_tests = [
input_size=(4, 10),
reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
),
dict(
module_name='Linear',
constructor_args=(10, 8, False),
input_size=(4, 10),
desc='no_bias',
reference_fn=lambda i,p: torch.mm(i, p[0].t())
),
dict(
module_name='Threshold',
constructor_args=(2, 1),
@ -289,7 +299,7 @@ criterion_tests = [
dict(
module_name='MultiLabelMarginLoss',
input_size=(5, 10),
target=torch.rand(5, 10).mul(10).floor()
target=torch.rand(5, 10).mul(10).floor().long()
),
dict(
module_name='MultiLabelSoftMarginLoss',
@ -306,7 +316,7 @@ criterion_tests = [
dict(
module_name='MultiMarginLoss',
input_size=(5, 10),
target=torch.rand(5).mul(8).floor()
target=torch.rand(5).mul(8).floor().long()
),
dict(
module_name='SmoothL1Loss',

BIN
test/data/legacy_modules.t7 Normal file

Binary file not shown.

7
test/data/network1.py Normal file
View File

@ -0,0 +1,7 @@
import torch.nn as nn
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)

8
test/data/network2.py Normal file
View File

@ -0,0 +1,8 @@
import torch.nn as nn
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(10, 20)
self.relu = nn.ReLU()

View File

@ -15,15 +15,27 @@ python test_nn.py
echo "Running legacy nn tests"
python test_legacy_nn.py
echo "Running optim tests"
python test_optim.py
echo "Running multiprocessing tests"
python test_multiprocessing.py
MULTIPROCESSING_METHOD=spawn python test_multiprocessing.py
MULTIPROCESSING_METHOD=forkserver python test_multiprocessing.py
echo "Running util tests"
python test_utils.py
echo "Running dataloader tests"
python test_dataloader.py
if which nvcc >/dev/null 2>&1
then
echo "Running cuda tests"
python test_cuda.py
echo "Running NCCL tests"
python test_nccl.py
else
echo "nvcc not found in PATH, skipping CUDA tests"
fi

View File

@ -1,9 +1,12 @@
import math
import unittest
import contextlib
from copy import deepcopy
from collections import OrderedDict
from common import make_jacobian, TestCase, iter_tensors, get_numerical_jacobian
from torch.autograd.functions import *
from torch.autograd import Variable
PRECISION = 1e-4
@ -35,11 +38,21 @@ def get_analytical_jacobian(input, output):
return jacobian
@contextlib.contextmanager
def backward_engine(engine):
_prev_engine = Variable._execution_engine
Variable._execution_engine = engine()
try:
yield
finally:
Variable._execution_engine = _prev_engine
class TestAutograd(TestCase):
def test_hooks(self):
x = Variable(torch.ones(5, 5))
y = Variable(torch.ones(5, 5) * 4)
x = Variable(torch.ones(5, 5), requires_grad=True)
y = Variable(torch.ones(5, 5) * 4, requires_grad=True)
counter = [0]
def bw_hook(inc, grad):
@ -59,16 +72,16 @@ class TestAutograd(TestCase):
z.backward(torch.ones(5, 5), retain_variables=True)
self.assertEqual(counter[0], 5)
def test_backward(self):
def _test_backward(self):
v_t = torch.randn(5, 5)
x_t = torch.randn(5, 5)
y_t = torch.rand(5, 5) + 0.1
z_t = torch.randn(5, 5)
grad_output = torch.randn(5, 5)
v = Variable(v_t)
x = Variable(x_t)
y = Variable(y_t)
z = Variable(z_t)
v = Variable(v_t, requires_grad=True)
x = Variable(x_t, requires_grad=True)
y = Variable(y_t, requires_grad=True)
z = Variable(z_t, requires_grad=True)
v.backward(grad_output)
self.assertEqual(v.grad, grad_output)
@ -82,8 +95,15 @@ class TestAutograd(TestCase):
self.assertEqual(y.grad, y_grad * grad_output)
self.assertEqual(z.grad, z_grad * grad_output)
def test_backward(self):
self._test_backward()
def test_backward_basic_engine(self):
with backward_engine(torch.autograd.engine.BasicEngine):
self._test_backward()
def test_volatile(self):
x = Variable(torch.ones(5, 5))
x = Variable(torch.ones(5, 5), requires_grad=True)
y = Variable(torch.ones(5, 5) * 4, volatile=True)
z = x ** 2
@ -99,16 +119,46 @@ class TestAutograd(TestCase):
self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5)))
self.assertIsNone(w.creator)
def test_indexing(self):
x = torch.range(1, 16).resize_(4, 4)
y = Variable(x)
self.assertEqual(x[1], y[1].data)
self.assertEqual(x[1, 1], y[1, 1].data[0])
self.assertEqual(x[1:], y[1:].data)
self.assertEqual(x[:2], y[:2].data)
self.assertEqual(x[:2, 2], y[:2, 2].data)
self.assertEqual(x[1:2, 2], y[1:2, 2].data)
self.assertEqual(x[1, 2:], y[1, 2:].data)
def test_requires_grad(self):
x = Variable(torch.randn(5, 5))
y = Variable(torch.randn(5, 5))
z = Variable(torch.randn(5, 5), requires_grad=True)
a = x + y
self.assertFalse(a.requires_grad)
b = a + z
self.assertTrue(b.requires_grad)
def error():
raise RuntimeError
# Make sure backward isn't called on these
a.backward_hooks = OrderedDict()
x.backward_hooks = OrderedDict()
y.backward_hooks = OrderedDict()
a.backward_hooks['test'] = error
x.backward_hooks['test'] = error
y.backward_hooks['test'] = error
b.backward(torch.ones(5, 5))
def test_inplace(self):
x = Variable(torch.ones(5, 5))
y = Variable(torch.ones(5, 5) * 4)
x = Variable(torch.ones(5, 5), requires_grad=True)
y = Variable(torch.ones(5, 5) * 4, requires_grad=True)
z = x * y
q = z + y
w = z * y
z.dirty = True
z.add_(2)
# Add doesn't need it's inputs to do backward, so it shouldn't raise
q.backward(torch.ones(5, 5))
q.backward(torch.ones(5, 5), retain_variables=True)
# Mul saves both inputs in forward, so it should raise
self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5)))
@ -128,14 +178,125 @@ class TestAutograd(TestCase):
z = m + y / 8
q = z * y
r = z + y
prev_version = z._version
w = z.exp_()
self.assertTrue(z.dirty)
self.assertNotEqual(z._version, prev_version)
r.backward(torch.ones(5, 5), retain_variables=True)
self.assertEqual(x.grad, torch.ones(5, 5) / 2)
w.backward(torch.ones(5, 5), retain_variables=True)
self.assertEqual(x.grad, torch.Tensor(5, 5).fill_((1 + math.e) / 2))
self.assertRaises(RuntimeError, lambda: q.backward(torch.ones(5, 5)))
leaf = Variable(torch.ones(5, 5), requires_grad=True)
x = leaf.clone()
x.add_(10)
self.assertEqual(x.data, torch.ones(5, 5) * 11)
# x should be still usable
y = x + 2
y.backward(torch.ones(5, 5))
self.assertEqual(leaf.grad, torch.ones(5, 5))
z = x * y
x.add_(2)
self.assertRaises(RuntimeError, lambda: z.backward(torch.ones(5, 5)))
def test_shared_storage(self):
x = Variable(torch.ones(5, 5))
y = x.t()
z = x[1]
self.assertRaises(RuntimeError, lambda: x.add_(2))
self.assertRaises(RuntimeError, lambda: y.add_(2))
self.assertRaises(RuntimeError, lambda: z.add_(2))
def _test_setitem(self, size, index):
x = Variable(torch.ones(*size), requires_grad=True)
y = x + 2
y_version = y._version
y[index] = 2
self.assertNotEqual(y._version, y_version)
y.backward(torch.ones(*size))
expected_grad = torch.ones(*size)
if isinstance(index, Variable):
index = index.data
expected_grad[index] = 0
self.assertEqual(x.grad, expected_grad)
def _test_setitem_tensor(self, size, index):
x = Variable(torch.ones(*size), requires_grad=True)
y = x + 2
y_version = y._version
value = Variable(torch.Tensor(x[index].size()).fill_(7), requires_grad=True)
y[index] = value
self.assertNotEqual(y._version, y_version)
y.backward(torch.ones(*size))
expected_grad_input = torch.ones(*size)
if isinstance(index, Variable):
index = index.data
expected_grad_input[index] = 0
self.assertEqual(x.grad, expected_grad_input)
self.assertEqual(value.grad, torch.ones(value.size()))
def test_setitem(self):
self._test_setitem((5, 5), 1)
self._test_setitem((5,), 1)
self._test_setitem((1,), 0)
self._test_setitem_tensor((5, 5), 3)
self._test_setitem_tensor((5,), 3)
def test_setitem_mask(self):
mask = torch.ByteTensor(5, 5).bernoulli_()
self._test_setitem((5, 5), Variable(mask))
self._test_setitem((5,), Variable(mask[0]))
self._test_setitem((1,), Variable(mask[0, 0:1]))
self._test_setitem_tensor((5, 5), Variable(mask))
self._test_setitem_tensor((5,), Variable(mask[0]))
def test_unused_output(self):
x = Variable(torch.randn(10, 10), requires_grad=True)
outputs = x.chunk(5)
o = outputs[2]
o = o * 4 + 2
o.sum().backward()
expected_grad = torch.zeros(10, 10)
expected_grad[4:6] = 4
self.assertEqual(x.grad, expected_grad)
x.grad.zero_()
grad_output = torch.randn(2, 10)
outputs = x.chunk(5)
outputs[0].backward(grad_output)
expected_grad = torch.zeros(10, 10)
expected_grad[:2] = grad_output
self.assertEqual(x.grad, expected_grad)
@unittest.skipIf(not torch.cuda.is_available() or torch.cuda.device_count() < 2,
"CUDA not available or <2 GPUs detected")
def test_unused_output_gpu(self):
from torch.nn.parallel.functions import Broadcast
x = Variable(torch.randn(5, 5).float().cuda(), requires_grad=True)
outputs = Broadcast(list(range(torch.cuda.device_count())))(x)
y = outputs[-1] * 2
y.sum().backward()
self.assertEqual(x.grad, torch.ones(5, 5) * 2)
def test_no_grad(self):
x = Variable(torch.randn(10, 10), requires_grad=True)
y = x + 2
y = y.no_grad()
z = y * 4 + 2
self.assertFalse(y.requires_grad)
self.assertFalse(z.requires_grad)
x = Variable(torch.randn(10, 10), requires_grad=True)
y = x * 2
y = y.no_grad()
self.assertFalse(y.requires_grad)
self.assertFalse(y.creator.requires_grad)
z = x + y
z.sum().backward()
# This is an incorrect gradient, but we assume that's what the user
# wanted. no_grad() is an advanced option.
self.assertEqual(x.grad, torch.ones(10, 10))
def test_type_conversions(self):
import torch.cuda
x = Variable(torch.randn(5, 5))
@ -156,6 +317,72 @@ class TestAutograd(TestCase):
self.assertIs(type(x2.data), torch.cuda.FloatTensor)
self.assertIs(x2.get_device(), 1)
def test_backward_copy(self):
# This tests checks backward engine for a very subtle bug that appreared
# in one of the initial versions of autograd. Gradients tensors were
# simply stored in lists while the function waited for all its gradients
# to be computed. However, sometimes an output was used multiple times,
# so the gradients needed to be summed. Engine used to keep a need_copy
# set of tensors that will need a clone upon next addition and removed
# them from the set as soon as the clone was performed. However, this
# could lead to incorrect results if the same gradient tensor was
# buffered in three places in the graph:
# 1. When accumulating gradients in one of these places it was cloned
# and removed from need_copy set.
# 2. When accumulating in second place, it wasn't in the need_copy set,
# so the gradients were simply accumulated in-place (which already
# modified the grad in 3rd place)
# 3. When accumulating in the third place, it wasn't in the need_copy set
# as well, so the incoming gradient was summed in-place, yielding
# incorrect results in all functions, except the first one.
x = Variable(torch.ones(5, 5), requires_grad=True)
y = Variable(torch.ones(5, 5), requires_grad=True)
# Simulate that we're in the middle of the graph
a = x + 2
b = y + 2
c = x + 2
# This op will just return grad_output two times in backward
add1 = a + b
add2 = add1 + c
# Simulate a long branch, so grad_output will get buffered.
for i in range(4):
a = a * 2
b = b * 2
c = c * 2
branch = a + b + c
out = add2 + branch
# expected gradients are:
# for x: 34 (16 from final a, 16 from final c, 2 from add2)
# for y: 17 (16 from final b, 1 from add2)
grad_output = torch.ones(5, 5)
out.backward(grad_output)
self.assertEqual(x.grad, torch.ones(5, 5) * 34)
self.assertEqual(y.grad, torch.ones(5, 5) * 17)
def test_functional_blas(self):
def compare(fn, *args):
unpacked_args = tuple(arg.data if isinstance(arg, Variable) else arg
for arg in args)
self.assertEqual(fn(*args).data, fn(*unpacked_args))
def test_blas(fn, x, y, z):
# Checks all signatures
compare(fn, x, y, z)
compare(fn, 0.5, x, y, z)
compare(fn, 0.5, x, 0.25, y, z)
test_blas(torch.addmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4)))
test_blas(torch.addbmm, Variable(torch.randn(2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas(torch.baddbmm, Variable(torch.randn(4, 2, 4)),
Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4)))
test_blas(torch.addmv, Variable(torch.randn(2)),
Variable(torch.randn(2, 10)), Variable(torch.randn(10)))
test_blas(torch.addr, Variable(torch.randn(5, 6)),
Variable(torch.randn(5)), Variable(torch.randn(6)))
def index_variable(num_indices, max_indices):
index = torch.randperm(max_indices)[:num_indices].long()
@ -179,9 +406,10 @@ function_tests = [
(PowConstant, (3.14,), (torch.rand(L, L),) ),
(Transpose, (0, 1), (torch.rand(L, L),) ),
(Transpose, (2, 0), (torch.rand(S, S, S),), '3d' ),
(Permute, (0, 4, 3, 5, 1, 2), ((1, 2, 3, 4, 5, 6),), ),
(Index, (1, 2), (torch.rand(S, S, S),) ),
(Permute, (0, 4, 3, 5, 1, 2), ((1, 2, 3, 4, 5, 6),) ),
(Index, ((1, 2),), (torch.rand(S, S, S),) ),
(Index, (slice(0, 3),), (torch.rand(S, S, S),), 'slice' ),
(Index, ((slice(0, 3), 1),),(torch.rand(S, S, S),), 'slice_index' ),
(View, (S*S, S), (torch.rand(S, S, S),) ),
(Expand, (S, 5, S, 5), ((S, 1, S, 1),) ),
(Exp, (), (torch.rand(S, S, S),) ),
@ -353,7 +581,6 @@ method_tests = [
('dist', (S, S, S), ((S, S, S),) ),
('dist', (S, S, S), ((S, S, S), 4), '4' ),
('index_select', (S, S, S), (0, index_variable(2, S)) ),
('cat', (1, S, S), ((Variable(torch.randn(2, S, S)), Variable(torch.randn(3, S, S))), 0)),
('diag', (M, M), (), '2d' ),
('diag', (M,), (), '1d' ),
('tril', (M, M), () ),
@ -384,12 +611,12 @@ def create_input(call_args):
call_args = (call_args,)
def map_arg(arg):
if isinstance(arg, tuple) and not isinstance(arg[0], Variable):
return Variable(torch.randn(*arg).double())
return Variable(torch.randn(*arg).double(), requires_grad=True)
elif torch.is_tensor(arg):
if isinstance(arg, torch.FloatTensor):
return Variable(arg.double())
return Variable(arg.double(), requires_grad=True)
else:
return Variable(arg)
return Variable(arg, requires_grad=True)
else:
return arg
return tuple(map_arg(arg) for arg in call_args)
@ -456,6 +683,13 @@ for test in function_tests:
setattr(TestAutograd, test_name, do_test)
EXCLUDE_FUNCTIONAL = {
'addmm',
'addbmm',
'baddbmm',
'addmv',
'addr',
}
for test in method_tests:
name, self_size, args = test[:3]
test_name = 'test_' + name + ('_' + test[3] if len(test) == 4 else '')
@ -472,6 +706,16 @@ for test in method_tests:
self.assertEqual(unpack_variables(output_variable), output_tensor)
# TODO: check that both have changed after adding all inplace ops
# functional interface tests
if hasattr(torch, name) and name not in EXCLUDE_FUNCTIONAL:
f_args_variable = (self_variable,) + args_variable
f_args_tensor = (self_tensor,) + args_tensor
output_variable = getattr(torch, name)(*f_args_variable)
output_tensor = getattr(torch, name)(*f_args_tensor)
if not torch.is_tensor(output_tensor) and not isinstance(output_tensor, tuple):
output_tensor = torch.DoubleTensor((output_tensor,))
self.assertEqual(unpack_variables(output_variable), output_tensor)
check(name)
inplace_name = name + '_'
if hasattr(Variable(torch.ones(1)), inplace_name):

View File

@ -23,6 +23,11 @@ types = [
torch.ByteTensor,
]
float_types = [
torch.FloatTensor,
torch.DoubleTensor
] # TODO: add half...
def number(floating, integer, t):
name = type(t).__name__
if 'Double' in name or 'Float' in name or 'Half' in name:
@ -40,6 +45,9 @@ def make_tensor(t, *sizes):
def small_2d(t):
return make_tensor(t, S, S)
def small_2d_scaled(t, scale=10):
return make_tensor(t, S, S).mul(scale)
def small_3d(t):
return make_tensor(t, S, S, S)
@ -49,6 +57,9 @@ def medium_1d(t):
def medium_2d(t):
return make_tensor(t, M, M)
def medium_2d_scaled(t, scale=10):
return make_tensor(t, M, M).mul(scale)
def small_3d_ones(t):
return t(S, S, S).copy_(torch.ones(S, S, S))
@ -59,6 +70,18 @@ def small_3d_positive(t):
def small_3d_unique(t):
return t(S, S, S).copy_(torch.range(1, S*S*S))
def small_1d_lapack(t):
return torch.range(1, 3).view(3)
def small_2d_lapack(t):
return torch.range(1, 9).view(3, 3)
def small_2d_lapack_skinny(t):
return torch.range(1, 12).view(3, 4)
def small_2d_lapack_fat(t):
return torch.range(1, 12).view(4, 3)
def new_t(*sizes):
def tmp(t):
return t(*sizes).copy_(torch.randn(*sizes))
@ -74,16 +97,16 @@ tests = [
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('div', small_3d, lambda t: [number(3.14, 3, t)], ),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
('pow', small_3d, lambda t: [number(3.14, 3, t)], ),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor' ),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], ),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
('addcdiv', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('addcdiv', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)], ),
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar' ),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], ),
@ -92,17 +115,13 @@ tests = [
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar' ),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars' ),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar' ),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars' ),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], ),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar' ),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars' ),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars' ),
('atan2', medium_2d, lambda t: [medium_2d(t)], ),
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types),
('chunk', medium_2d, lambda t: [4], ),
('chunk', medium_2d, lambda t: [4, 1], 'dim' ),
('clamp', medium_2d, lambda t: [-0.1, 0.5], ),
('clamp', medium_2d_scaled, lambda t: [-1, 5], ),
('clone', medium_2d, lambda t: [], ),
('cmax', medium_2d, lambda t: [medium_2d(t)], ),
('cmin', medium_2d, lambda t: [medium_2d(t)], ),
@ -135,7 +154,6 @@ tests = [
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive' ),
('is_set_to', medium_2d, lambda t: [medium_2d(t)], ),
# TODO: positive case
('is_size', medium_2d, lambda t: [torch.LongStorage((M, M))], ),
('kthvalue', small_3d_unique, lambda t: [3], ),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim' ),
('lerp', small_3d, lambda t: [small_3d(t), 0.3], ),
@ -192,12 +210,18 @@ tests = [
('view_as', small_3d, lambda t: [t(100, 10)], ),
('zero', small_3d, lambda t: [], ),
('zeros', small_3d, lambda t: [1, 2, 3, 4], ),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], ),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], ),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], ),
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
# lapack tests
('qr', small_2d_lapack, lambda t: [], 'square', float_types),
('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types),
('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types),
]
# TODO: random functions, cat, gather, scatter, index*, masked*, resize, resizeAs, storage_offset, storage, stride, unfold
# TODO: random functions, cat, gather, scatter, index*, masked*,
# resize, resizeAs, storage_offset, storage, stride, unfold
custom_precision = {
'addbmm': 1e-4,
@ -211,32 +235,38 @@ custom_precision = {
simple_pointwise = [
'abs',
'acos',
'asin',
'atan',
'ceil',
'cinv',
'cos',
'cosh',
'exp',
'floor',
'fmod',
'frac',
'log',
'log1p',
'neg',
'remainder',
'round',
'sigmoid',
'sign',
'sin',
'sqrt',
'tanh',
'trunc',
]
for fn in simple_pointwise:
tests.append((fn, small_3d, lambda t: []))
simple_pointwise_float = [
'log',
'log1p',
'sigmoid',
'sin',
'sqrt',
'tanh',
'acos',
'asin',
'atan',
'cos',
'cosh',
'exp',
'cinv',
'floor',
'fmod',
'frac',
'neg',
'round',
'trunc',
'ceil',
]
for fn in simple_pointwise_float:
tests.append((fn, small_3d, lambda t: [], None, float_types))
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
def tmp(self):
cpu_tensor = tensor_constructor(t)
@ -251,6 +281,11 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
if 'unimplemented data type' in reason:
raise unittest.SkipTest('unimplemented data type')
raise
except AttributeError as e:
reason = e.args[0]
if 'object has no attribute' in reason:
raise unittest.SkipTest('unimplemented data type')
raise
# If one changes, another should change as well
self.assertEqual(cpu_tensor, gpu_tensor, precision)
self.assertEqual(cpu_args, gpu_args, precision)
@ -378,10 +413,11 @@ class TestCuda(TestCase):
y = torch.randn(2, 5).cuda(1)
result = comm.gather((x, y), dim)
expected_size = x.size()
expected_size = list(x.size())
expected_size[dim] += y.size(dim)
expected_size = torch.Size(expected_size)
self.assertEqual(result.get_device(), 0)
self.assertTrue(result.is_size(expected_size))
self.assertEqual(result.size(), expected_size)
index = [slice(None, None), slice(None, None)]
index[dim] = slice(0, x.size(dim))
@ -395,6 +431,13 @@ class TestCuda(TestCase):
def test_gather_dim(self):
self._test_gather(1)
def test_from_sequence(self):
seq = [list(range(i*4,i*4+4)) for i in range(5)]
reference = torch.range(0, 19).resize_(5, 4)
for t in types:
cuda_type = get_gpu_type(t)
self.assertEqual(cuda_type(seq), reference)
def test_manual_seed(self):
with freeze_rng_state():
x = torch.zeros(4, 4).float().cuda()
@ -406,6 +449,113 @@ class TestCuda(TestCase):
self.assertEqual(x, y)
self.assertEqual(torch.cuda.initial_seed(), 2)
def test_serialization(self):
x = torch.randn(4, 4).cuda()
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
self.assertEqual(x_copy, x)
self.assertIs(type(x_copy), type(x))
self.assertEqual(x_copy.get_device(), x.get_device())
def test_serialization_empty(self):
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization_remap(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
def gpu_remap(storage, location):
if location == 'cuda:1':
return storage.cuda(0)
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f, map_location=gpu_remap)
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), 0)
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_multigpu_serialization_remap_dict(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f, map_location={'cuda:1': 'cuda:0'})
for original, copy in zip(x, x_copy):
self.assertEqual(copy, original)
self.assertIs(type(copy), type(original))
self.assertEqual(copy.get_device(), 0)
def test_cuda_synchronize(self):
torch.cuda.synchronize()
def test_streams(self):
default_stream = torch.cuda.current_stream()
user_stream = torch.cuda.Stream()
self.assertEqual(torch.cuda.current_stream(), default_stream)
self.assertNotEqual(default_stream, user_stream)
self.assertEqual(default_stream.cuda_stream, 0)
self.assertNotEqual(user_stream.cuda_stream, 0)
with torch.cuda.stream(user_stream):
self.assertEqual(torch.cuda.current_stream(), user_stream)
self.assertTrue(user_stream.query())
# copy 10 MB tensor from CPU-GPU which should take some time
tensor1 = torch.ByteTensor(10000000).pin_memory()
tensor2 = tensor1.cuda(async=True)
self.assertFalse(default_stream.query())
default_stream.synchronize()
self.assertTrue(default_stream.query())
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
def test_streams_multi_gpu(self):
default_stream = torch.cuda.current_stream()
self.assertEqual(default_stream.device, 0)
stream = torch.cuda.Stream(device=1)
self.assertEqual(stream.device, 1)
with torch.cuda.device(1):
self.assertEqual(torch.cuda.current_stream().device, 1)
self.assertNotEqual(torch.cuda.current_stream(), default_stream)
def test_events(self):
stream = torch.cuda.current_stream()
event = torch.cuda.Event(enable_timing=True)
self.assertTrue(event.query())
# copy 10 MB tensor from CPU-GPU which should take some time
tensor1 = torch.ByteTensor(10000000).pin_memory()
start_event = torch.cuda.Event(enable_timing=True)
stream.record_event(start_event)
tensor2 = tensor1.cuda(async=True)
stream.record_event(event)
self.assertFalse(event.query())
event.synchronize()
self.assertTrue(event.query())
self.assertGreater(start_event.elapsed_time(event), 0)
for decl in tests:
for t in types:
@ -416,23 +566,29 @@ for decl in tests:
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace:
name = name + '_'
if not hasattr(tensor, name):
name_inner = name + '_'
else:
name_inner = name
if not hasattr(tensor, name_inner):
continue
if not hasattr(gpu_tensor, name):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name, gpu_tensor.__class__.__name__))
if not hasattr(gpu_tensor, name_inner):
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name_inner, gpu_tensor.__class__.__name__))
continue
test_name = 'test_' + t.__name__ + '_' + name
test_name = 'test_' + t.__name__ + '_' + name_inner
if desc:
test_name += '_' + desc
assert not hasattr(TestCase, test_name)
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name, t, precision))
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
if __name__ == '__main__':
unittest.main()

134
test/test_dataloader.py Normal file
View File

@ -0,0 +1,134 @@
import math
import sys
import torch
import traceback
import unittest
from torch.utils.data import Dataset, TensorDataset, DataLoader
from common import TestCase
class TestTensorDataset(TestCase):
def test_len(self):
source = TensorDataset(torch.randn(15, 10, 2, 3, 4, 5), torch.randperm(15))
self.assertEqual(len(source), 15)
def test_getitem(self):
t = torch.randn(15, 10, 2, 3, 4, 5)
l = torch.randn(15, 10)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
def test_getitem_1d(self):
t = torch.randn(15)
l = torch.randn(15)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i:i+1], source[i][0])
self.assertEqual(l[i:i+1], source[i][1])
class ErrorDataset(Dataset):
def __init__(self, size):
self.size = size
def __len__(self):
return self.size
class TestDataLoader(TestCase):
def setUp(self):
self.data = torch.randn(100, 2, 3, 5)
self.labels = torch.randperm(50).repeat(2)
self.dataset = TensorDataset(self.data, self.labels)
def _test_sequential(self, loader):
batch_size = loader.batch_size
for i, (sample, target) in enumerate(loader):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx+batch_size])
self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1))
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
def _test_shuffle(self, loader):
found_data = {i: 0 for i in range(self.data.size(0))}
found_labels = {i: 0 for i in range(self.labels.size(0))}
batch_size = loader.batch_size
for i, (batch_samples, batch_targets) in enumerate(loader):
for sample, target in zip(batch_samples, batch_targets):
for data_point_idx, data_point in enumerate(self.data):
if data_point.eq(sample).all():
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i+1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i+1) * batch_size)
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
def _test_error(self, loader):
it = iter(loader)
errors = 0
while True:
try:
it.next()
except NotImplementedError:
msg = "".join(traceback.format_exception(*sys.exc_info()))
self.assertTrue("collate_fn" in msg)
errors += 1
except StopIteration:
self.assertEqual(errors,
math.ceil(float(len(loader.dataset))/loader.batch_size))
return
def test_sequential(self):
self._test_sequential(DataLoader(self.dataset))
def test_sequential_batch(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2))
def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))
def test_shuffle_batch(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
def test_sequential_workers(self):
self._test_sequential(DataLoader(self.dataset, num_workers=4))
def test_seqential_batch_workers(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2, num_workers=4))
def test_shuffle_workers(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
def test_shuffle_batch_workers(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
def test_error_workers(self):
self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
def test_partial_workers(self):
"check that workers exit even if the iterator is not exhausted"
loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4))
workers = loader.workers
for i, sample in enumerate(loader):
if i == 3:
break
del loader
for w in workers:
w.join(1.0) # timeout of one second
self.assertFalse(w.is_alive(), 'subprocess not terminated')
self.assertEqual(w.exitcode, 0)
if __name__ == '__main__':
unittest.main()

View File

@ -42,7 +42,7 @@ class OldModuleTest(ModuleTest):
# TODO: hessian tests
tests = [
OldModuleTest(nn.Add,
(torch.LongStorage([5, 4]),),
(torch.Size([5, 4]),),
input_size=(3, 5, 4),
desc='3D'),
OldModuleTest(nn.Add,
@ -1109,6 +1109,90 @@ class TestNN(NNTestCase):
module.__repr__()
str(module)
def _build_net(self):
return (nn.Sequential()
.add(nn.Concat(0)
.add(nn.Linear(2, 5))
.add(nn.Linear(2, 5)))
.add(nn.ReLU())
.add(nn.Linear(10, 20)))
def test_parameters(self):
net = self._build_net()
concat = net.modules[0]
param, grad = net.parameters()
self.assertEqual(len(param), 6)
self.assertEqual(len(grad), 6)
self.assertIn(concat.modules[0].weight, param)
self.assertIn(concat.modules[0].bias, param)
self.assertIn(concat.modules[1].weight, param)
self.assertIn(concat.modules[1].bias, param)
self.assertIn(net.modules[2].weight, param)
self.assertIn(net.modules[2].bias, param)
self.assertIn(concat.modules[0].gradWeight, grad)
self.assertIn(concat.modules[0].gradBias, grad)
self.assertIn(concat.modules[1].gradWeight, grad)
self.assertIn(concat.modules[1].gradBias, grad)
self.assertIn(net.modules[2].gradWeight, grad)
self.assertIn(net.modules[2].gradBias, grad)
def test_flattenParameters(self):
net = self._build_net()
param, grad_param = net.flattenParameters()
self.assertEqual(param.dim(), 1)
self.assertEqual(param.size(0), 250)
self.assertEqual(grad_param.dim(), 1)
self.assertEqual(grad_param.size(0), 250)
def test_findModules(self):
net = self._build_net()
modules, containers = net.findModules(nn.Linear)
self.assertEqual(len(modules), 3)
self.assertEqual(len(modules), len(containers))
self.assertIn(net.modules[0].modules[0], modules)
self.assertIn(net.modules[0].modules[1], modules)
self.assertIn(net.modules[2], modules)
self.assertIn(net.modules[0], containers)
self.assertEqual(containers.count(net.modules[0]), 2)
self.assertIn(net, containers)
for m, c in zip(modules, containers):
self.assertIn(m, c.modules)
def test_apply(self):
net = self._build_net()
seen_modules = set()
def callback(module):
self.assertNotIn(module, seen_modules)
seen_modules.add(module)
net.apply(callback)
self.assertEqual(len(seen_modules), 6)
def test_listModules(self):
net = self._build_net()
module_list = list()
def callback(module):
module_list.append(module)
net.apply(callback)
self.assertEqual(module_list, net.listModules())
def test_replace(self):
ref_net = self._build_net()
net = self._build_net()
def callback(module):
if isinstance(module, nn.ReLU):
return nn.Tanh()
return module
net.replace(callback)
for module, reference in zip(net.listModules(), ref_net.listModules()):
if isinstance(reference, nn.ReLU):
self.assertIsInstance(module, nn.Tanh)
else:
self.assertIsInstance(module, type(reference))
if __name__ == '__main__':
prepare_tests()

View File

@ -1,8 +1,10 @@
import os
import contextlib
import gc
import multiprocessing
import os
import sys
import time
import unittest
import contextlib
from sys import platform
import torch
@ -178,5 +180,12 @@ class TestMultiprocessing(TestCase):
if __name__ == '__main__':
start_method = os.environ.get('MULTIPROCESSING_METHOD')
if start_method:
if sys.version_info < (3, 4):
print("Python <3.4 does not support 'multiprocessing.set_start_method'")
sys.exit(0)
else:
print("INFO: Using multiprocessing start method '{}'".format(start_method))
multiprocessing.set_start_method(start_method)
unittest.main()

85
test/test_nccl.py Normal file
View File

@ -0,0 +1,85 @@
import unittest
import torch
import torch.cuda.nccl as nccl
import torch.cuda
from common import TestCase
nGPUs = torch.cuda.device_count()
class TestNCCL(TestCase):
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_broadcast(self):
expected = torch.FloatTensor(128).uniform_()
tensors = [expected.cuda()]
for device in range(1, torch.cuda.device_count()):
with torch.cuda.device(device):
tensors.append(torch.cuda.FloatTensor(128))
nccl.broadcast(tensors)
for i in range(torch.cuda.device_count()):
self.assertEqual(tensors[i], expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_reduce(self):
tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(128).zero_()
for t in tensors:
expected.add_(t)
tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
nccl.reduce(tensors)
self.assertEqual(tensors[0], expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_all_reduce(self):
tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(128).zero_()
for t in tensors:
expected.add_(t)
tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
nccl.all_reduce(tensors)
for tensor in tensors:
self.assertEqual(tensor, expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_all_gather(self):
inputs = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
expected = torch.cat(inputs, 0)
inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i)
for i in range(nGPUs)]
nccl.all_gather(inputs, outputs)
for tensor in outputs:
self.assertEqual(tensor, expected)
@unittest.skipIf(nGPUs < 2, "only one GPU detected")
def test_reduce_scatter(self):
in_size = 32 * nGPUs
out_size = 32
inputs = [torch.FloatTensor(in_size).uniform_() for i in range(nGPUs)]
expected = torch.FloatTensor(in_size).zero_()
for t in inputs:
expected.add_(t)
expected = expected.view(nGPUs, 32)
inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
outputs = [torch.cuda.FloatTensor(out_size, device=i)
for i in range(nGPUs)]
nccl.reduce_scatter(inputs, outputs)
for i in range(nGPUs):
self.assertEqual(outputs[i], expected[i])
if __name__ == '__main__':
unittest.main()

View File

@ -2,16 +2,32 @@ import math
import torch
import random
import unittest
import contextlib
from copy import deepcopy
from itertools import repeat
from functools import wraps
import torch.nn as nn
import torch.nn.parallel as dp
from torch.autograd import Variable
from torch.nn import Parameter
from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \
module_tests, criterion_tests, TEST_CUDA, PRECISION
module_tests, criterion_tests, TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, PRECISION
from common import freeze_rng_state
def default_tensor_type(type):
type_str = torch.typename(type)
def decorator(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
old_type = torch.typename(torch.Tensor())
torch.set_default_tensor_type(type_str)
try:
return fn(*args, **kwargs)
finally:
torch.set_default_tensor_type(old_type)
return wrapper
return decorator
class InputVariableMixin(object):
def _get_input(self):
@ -20,7 +36,7 @@ class InputVariableMixin(object):
if isinstance(i, Variable):
return i
elif torch.is_tensor(i):
return Variable(i)
return Variable(i, requires_grad=True)
else:
return type(i)(map_variables(elem) for elem in i)
return map_variables(input)
@ -37,12 +53,13 @@ class NewModuleTest(InputVariableMixin, ModuleTest):
if self.check_inplace:
module_ip = self.constructor(*self.constructor_args, inplace=True)
input_version = input._version
output = module(input)
test_case.assertFalse(input.dirty)
test_case.assertEqual(input._version, input_version)
input_ip = deepcopy(input)
output_ip = module_ip(input_ip)
test_case.assertTrue(input_ip.dirty)
test_case.assertNotEqual(input_ip._version, input_version)
test_case.assertEqual(output, output_ip)
@ -123,6 +140,10 @@ class NewCriterionTest(InputVariableMixin, CriterionTest):
class TestNN(NNTestCase):
# # protip: uncomment this line to figure out which test is segfaulting
# def setUp(self):
# print("In method", self._testMethodName)
# super(TestNN, self).setUp()
def _forward(self, module, input):
with freeze_rng_state():
@ -170,7 +191,7 @@ class TestNN(NNTestCase):
def test_hooks(self):
module = nn.Sigmoid()
input = Variable(torch.ones(5, 5))
input = Variable(torch.ones(5, 5), requires_grad=True)
counter = {
'forwards': 0,
@ -258,14 +279,14 @@ class TestNN(NNTestCase):
input.fill_(1-p)
module = cls(p)
input_var = Variable(input)
input_var = Variable(input, requires_grad=True)
output = module(input_var)
self.assertLess(abs(output.data.mean() - (1-p)), 0.05)
output.backward(input)
self.assertLess(abs(input_var.grad.mean() - (1-p)), 0.05)
module = cls(p, True)
input_var = Variable(input.clone())
input_var = Variable(input.clone(), requires_grad=True)
output = module(input_var + 0)
self.assertLess(abs(output.data.mean() - (1-p)), 0.05)
output.backward(input)
@ -284,12 +305,25 @@ class TestNN(NNTestCase):
l1=l,
l2=l
)
self.param = Parameter(torch.Tensor(3, 5))
l = nn.Linear(10, 20)
n = Net()
s = nn.Sequential(l, l, l, l)
s = nn.Sequential(n, n, n, n)
self.assertEqual(num_params(l), 2)
self.assertEqual(num_params(n), 2)
self.assertEqual(num_params(s), 2)
self.assertEqual(num_params(n), 3)
self.assertEqual(num_params(s), 3)
def test_modules(self):
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__()
self.l1 = l
self.l2 = l
self.param = Variable(torch.Tensor(3, 5))
l = nn.Linear(10, 20)
n = Net()
s = nn.Sequential(n, n, n, n)
self.assertEqual(list(s.modules()), [s, n, l])
def test_Sequential_getitem(self):
l1 = nn.Linear(10, 20)
@ -315,7 +349,7 @@ class TestNN(NNTestCase):
net.add_module('l3', l)
self.assertEqual(net.l3, l)
self.assertRaises(KeyError, lambda: net.add_module('l', l))
self.assertRaises(ValueError, lambda: net.add_module('x', 'non-module'))
self.assertRaises(TypeError, lambda: net.add_module('x', 'non-module'))
def test_type(self):
l = nn.Linear(10, 20)
@ -336,9 +370,16 @@ class TestNN(NNTestCase):
l2 = nn.Linear(10, 10)
def assign_weight():
l2.weight = l1.weight + 2
self.assertRaises(RuntimeError, assign_weight)
self.assertRaises(TypeError, assign_weight)
# This should work though
l2.weight = Variable(torch.randn(10, 10))
l2.weight = Parameter(torch.randn(10, 10))
def test_embedding_padding_idx(self):
embedding = nn.Embedding(10, 20, padding_idx = 0)
input = Variable(torch.LongTensor([[0,2,4,5],[4,3,0,9]]))
output = embedding(input)
self.assertEqual(output[0][0].sum().data[0], 0)
self.assertEqual(output[1][2].sum().data[0], 0)
def test_Dropout(self):
input = torch.Tensor(1000)
@ -381,7 +422,7 @@ class TestNN(NNTestCase):
module = module_cls(2, return_indices=True)
numel = 4 ** num_dim
input = torch.range(1, numel).view(1, 1, *repeat(4, num_dim))
input_var = Variable(input)
input_var = Variable(input, requires_grad=True)
# Check forward
output, indices = module(input_var)
@ -413,7 +454,7 @@ class TestNN(NNTestCase):
self._test_maxpool_indices(3)
def _test_scatter(self, x):
if not TEST_CUDA or torch.cuda.device_count() < 2:
if not TEST_MULTIGPU:
raise unittest.SkipTest("Only one GPU detected")
x = Variable(x)
result = dp.scatter(x, (0, 1))
@ -427,17 +468,18 @@ class TestNN(NNTestCase):
self._test_scatter(torch.randn(4, 4))
def test_scatter_gpu(self):
self._test_scatter(torch.randn(4, 4))
if TEST_CUDA:
self._test_scatter(torch.randn(4, 4).cuda())
def _test_gather(self, output_device):
if not TEST_CUDA or torch.cuda.device_count() < 2:
if not TEST_MULTIGPU:
raise unittest.SkipTest("Only one GPU detected")
inputs = (
Variable(torch.randn(2, 4).cuda(0)),
Variable(torch.randn(2, 4).cuda(1))
)
result = dp.gather(inputs, output_device)
self.assertEqual(result.size().tolist(), [4, 4])
self.assertEqual(result.size(), torch.Size([4, 4]))
self.assertEqual(result[:2], inputs[0])
self.assertEqual(result[2:], inputs[1])
if output_device != -1:
@ -451,11 +493,10 @@ class TestNN(NNTestCase):
def test_gather_gpu(self):
self._test_gather(0)
@unittest.skipIf(not TEST_CUDA or torch.cuda.device_count() < 2,
"Only one GPU detected")
def _test_replicate(self):
@unittest.skipIf(not TEST_MULTIGPU, "Only one GPU detected")
def test_replicate(self):
module = nn.Linear(10, 5).float().cuda()
input = torch.randn(2, 10).float().cuda()
input = Variable(torch.randn(2, 10).float().cuda())
expected_output = module(input).data
replicas = dp.replicate(module, (0, 1))
for i, replica in enumerate(replicas):
@ -464,8 +505,17 @@ class TestNN(NNTestCase):
replica_input = input.cuda(i)
self.assertEqual(replica(replica_input).data, expected_output)
@unittest.skipIf(not TEST_CUDA or torch.cuda.device_count() < 2,
"Only one GPU detected")
@unittest.skipIf(not TEST_MULTIGPU, "Only one GPU detected")
def test_replicate_buffers(self):
net = nn.Container()
net.bn = nn.BatchNorm2d(10)
net.cuda()
replicas = dp.replicate(net, (0, 1))
for i, replica in enumerate(replicas):
self.assertEqual(replica.bn.running_mean.get_device(), i, 'buffer on wrong device')
self.assertEqual(replica.bn.running_var.get_device(), i, 'buffer on wrong device')
@unittest.skipIf(not TEST_MULTIGPU, "Only one GPU detected")
def test_parallel_apply(self):
l1 = nn.Linear(10, 5).float().cuda(0)
l2 = nn.Linear(10, 5).float().cuda(1)
@ -483,8 +533,7 @@ class TestNN(NNTestCase):
inputs = (i1, Variable(i2.data.new()))
expected_outputs = (expected1, expected2.new())
@unittest.skipIf(not TEST_CUDA or torch.cuda.device_count() < 2,
"Only one GPU detected")
@unittest.skipIf(not TEST_MULTIGPU, "Only one GPU detected")
def test_data_parallel(self):
l = nn.Linear(10, 5).float().cuda()
i = Variable(torch.randn(20, 10).float().cuda(1))
@ -495,6 +544,302 @@ class TestNN(NNTestCase):
self.assertEqual(out.get_device(), 1)
self.assertEqual(out.data, expected_out)
def test_parameter_dict(self):
l = nn.Linear(5, 5)
block = nn.Container(
conv=nn.Conv2d(3, 3, 3, bias=False)
)
net = nn.Container(
linear1=l,
linear2=l,
block=block,
empty=None,
)
param_dict = net.parameter_dict()
self.assertEqual(len(param_dict), 5)
self.assertIn('linear1.weight', param_dict)
self.assertIn('linear1.bias', param_dict)
self.assertIn('linear2.weight', param_dict)
self.assertIn('linear2.bias', param_dict)
self.assertIn('block.conv.weight', param_dict)
self.assertNotIn('block.conv.bias', param_dict)
self.assertFalse(any(map(lambda k: k.startswith('empty'), param_dict.keys())))
for k, v in param_dict.items():
param = net
for component in k.split('.'):
param = getattr(param, component)
self.assertIs(v, param)
l = nn.Linear(5, 5)
param_dict = l.parameter_dict()
self.assertEqual(len(param_dict), 2)
self.assertIs(param_dict['weight'], l.weight)
self.assertIs(param_dict['bias'], l.bias)
def test_load_parameter_dict(self):
l = nn.Linear(5, 5)
block = nn.Container(
conv=nn.Conv2d(3, 3, 3, bias=False)
)
net = nn.Container(
linear1=l,
linear2=l,
block=block,
empty=None,
)
param_dict = {
'linear1.weight': Variable(torch.ones(5, 5)),
'block.conv.bias': Variable(torch.range(1, 3)),
}
net.load_parameter_dict(param_dict)
self.assertIs(net.linear1.weight, param_dict['linear1.weight'])
self.assertIs(net.block.conv.bias, param_dict['block.conv.bias'])
def test_parameter_assignment(self):
l = nn.Linear(5, 5)
def num_params():
return len(list(l.parameters()))
self.assertEqual(num_params(), 2)
new_param = Parameter(torch.randn(5, 5))
l.param_name = new_param
self.assertEqual(num_params(), 3)
self.assertIn(new_param, l.parameters())
var = Variable(torch.randn(5, 5))
l.var_name = var
self.assertEqual(num_params(), 3)
self.assertNotIn(var, l.parameters())
# Make sure Variables are not saved as parameters
l.variable_attr = Variable(torch.Tensor(5, 5))
self.assertEqual(num_params(), 3)
l.param_attr = Parameter(torch.Tensor(5, 5))
self.assertEqual(num_params(), 4)
# It shouldn't be possible to replace a parameter with a Variable
def assign_var():
l.param_attr = Variable(torch.Tensor(5, 5))
self.assertRaises(TypeError, assign_var)
# But replacing it with None should be fine
l.param_attr = None
self.assertEqual(num_params(), 3)
def test_ConvTranspose2d_output_size(self):
m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2)
i = Variable(torch.randn(2, 3, 6, 6))
for h in range(15, 22):
for w in range(15, 22):
if 18 <= h <= 20 and 18 <= w <= 20:
size = (h, w)
if h == 19:
size = torch.LongStorage(size)
elif h == 2:
size = torch.LongStorage((2, 4) + size)
m(i, output_size=(h, w))
else:
self.assertRaises(ValueError, lambda: m(i, (h, w)))
def test_MaxUnpool2d_output_size(self):
m = nn.MaxPool2d(3, stride=2, return_indices=True)
mu = nn.MaxUnpool2d(3, stride=2)
big_t = torch.rand(1, 1, 6, 6)
big_t[0][0][4][4] = 100
output_big, indices_big = m(Variable(big_t))
self.assertRaises(RuntimeError, lambda: mu(output_big, indices_big))
small_t = torch.rand(1, 1, 5, 5)
for i in range(0, 4, 2):
for j in range(0, 4, 2):
small_t[:,:,i,j] = 100
output_small, indices_small = m(Variable(small_t))
for h in range(3, 10):
for w in range(3, 10):
if 4 <= h <= 6 and 4 <= w <= 6:
size = (h, w)
if h == 5:
size = torch.LongStorage(size)
elif h == 6:
size = torch.LongStorage((1, 1) + size)
mu(output_small, indices_small, output_size=size)
else:
self.assertRaises(ValueError, lambda:
mu(output_small, indices_small, (h, w)))
def test_RNN_cell(self):
# this is just a smoke test; these modules are implemented through
# autograd so no Jacobian test is needed
for module in (nn.RNNCell, nn.GRUCell):
for bias in (True, False):
input = Variable(torch.randn(3, 10))
hx = Variable(torch.randn(3, 20))
cell = module(10, 20, bias=bias)
for i in range(6):
hx = cell(input, hx)
hx.sum().backward()
def test_LSTM_cell(self):
# this is just a smoke test; these modules are implemented through
# autograd so no Jacobian test is needed
for bias in (True, False):
input = Variable(torch.randn(3, 10))
hx = Variable(torch.randn(3, 20))
cx = Variable(torch.randn(3, 20))
lstm = nn.LSTMCell(10, 20, bias=bias)
for i in range(6):
hx, cx = lstm(input, (hx, cx))
(hx+cx).sum().backward()
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
@default_tensor_type(torch.FloatTensor) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented
def test_RNN_cpu_vs_cudnn(self):
def forward_backward(cuda, rnn, input_val, hx_val, weights_val):
is_lstm = type(rnn) == nn.LSTM
for x_layer, y_layer in zip(rnn.all_weights, weights_val):
for x, y in zip(x_layer, y_layer):
x.data.copy_(y.data)
input = Variable(input_val.clone(), requires_grad=True)
if is_lstm:
hx = (Variable(hx_val.clone(), requires_grad=True),
Variable(hx_val.add(1), requires_grad=True))
else:
hx = Variable(hx_val.clone(), requires_grad=True)
if cuda:
rnn.cuda()
input.data = input.data.cuda()
if is_lstm:
hx[0].data = hx[0].data.cuda()
hx[1].data = hx[1].data.cuda()
else:
hx.data = hx.data.cuda()
output, hy = rnn(input, hx)
# FIXME this is because of a pytorch bug
if is_lstm:
fake_loss = 0*(hy[0] + hy[1]).sum()
else:
fake_loss = 0*hy.sum()
loss = output.sum() + fake_loss
loss.backward()
return {'output': output.data,
'hy': hy[0].data if is_lstm else hy.data,
'weights': rnn.all_weights,
'grad_input': input.grad,
'grad_hx': hx[0].grad if is_lstm else hx.grad,
'cy': hy[1].data if is_lstm else None,
'grad_cx': hx[1].grad if is_lstm else None}
input_size = 10
hidden_size = 6
num_layers = 2
seq_length = 7
batch = 5
def compare_cpu_gpu(outputs_cpu, outputs_gpu):
self.assertEqual(list(outputs_cpu.keys()), list(outputs_gpu.keys()))
for key in outputs_cpu.keys():
if key != 'weights':
self.assertEqual(outputs_cpu[key], outputs_gpu[key], prec=5e-5, message=key)
# check grad weights separately, as nested dict
for cpu_layer_weight, gpu_layer_weight in zip(outputs_cpu['weights'], outputs_gpu['weights']):
for (cpu_weight, gpu_weight) in zip(cpu_layer_weight, gpu_layer_weight):
self.assertEqual(cpu_weight.grad, gpu_weight.grad, prec=5e-5)
input_val = torch.randn(seq_length, batch, input_size)
for module in (nn.RNN, nn.LSTM, nn.GRU):
for bias in (True, False):
for bidirectional in (False, True):
for dropout in (0, 1): # Because of dropout randomness, can only compare 0 and 1
num_directions = 2 if bidirectional else 1
hx_val = torch.randn(num_layers * num_directions, batch, hidden_size)
rnn = module(input_size,
hidden_size,
num_layers,
bias=bias,
dropout=dropout,
bidirectional=bidirectional)
outputs_cpu = forward_backward(
False, rnn, input_val, hx_val, rnn.all_weights)
rnn_gpu = module(input_size,
hidden_size,
num_layers,
bias=bias,
dropout=dropout,
bidirectional=bidirectional)
outputs_gpu = forward_backward(
True, rnn_gpu, input_val, hx_val, rnn.all_weights)
compare_cpu_gpu(outputs_cpu, outputs_gpu)
for nonlinearity in ('tanh', 'relu'):
hx_val = torch.randn(num_layers, batch, hidden_size)
rnn = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
outputs_cpu = forward_backward(False, rnn, input_val, hx_val, rnn.all_weights)
rnn_gpu = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, rnn.all_weights)
compare_cpu_gpu(outputs_cpu, outputs_gpu)
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
def test_RNN_dropout(self):
# checking the assumption that cuDNN sticks dropout in between
# RNN layers
for p in (0, 0.276, 0.731, 1):
for train in (True, False):
for cuda in (True, False):
rnn = nn.RNN(10, 1000, 2, bias=False, dropout=p, nonlinearity='relu')
if cuda:
rnn.cuda()
if train:
rnn.train()
else:
rnn.eval()
rnn.weight_ih_l0.data.fill_(1)
rnn.weight_hh_l0.data.fill_(1)
rnn.weight_ih_l1.data.fill_(1)
rnn.weight_hh_l1.data.fill_(1)
input = Variable(torch.Tensor(1,1,10).fill_(1))
hx = Variable(torch.Tensor(2,1,1000).fill_(0))
if cuda:
input = input.cuda()
hx = hx.cuda()
output, hy = rnn(input, hx)
self.assertEqual(output.data.min(), output.data.max())
output_val = output.data[0][0][0]
if p == 0 or not train:
self.assertEqual(output_val, 10000)
elif p == 1:
self.assertEqual(output_val, 0)
else:
self.assertGreater(output_val, 8000)
self.assertLess(output_val, 12000)
denorm_mod = (output_val * (1 - p)) % 10
self.assertLess(min(denorm_mod, 10 - denorm_mod), 1e-2)
self.assertEqual(hy[0].data.min(), hy[0].data.max())
self.assertEqual(hy[1].data.min(), hy[1].data.max())
self.assertEqual(hy.data[0][0][0], 10)
self.assertEqual(hy.data[1][0][0], output_val)
def add_test(test):
test_name = test.get_name()
@ -555,10 +900,21 @@ new_module_tests = [
),
dict(
module_name='Conv2d',
constructor_args=(3, 4, (3, 3), 1, 0, None, 1, True),
constructor_args=(3, 4, (3, 3), 1, 0, None, 1, False),
input_size=(2, 3, 6, 6),
desc='no_bias',
),
dict(
module_name='ConvTranspose2d',
constructor_args=(3, 4, 3, (2, 2), 1, (1, 1)),
input_size=(1, 3, 7, 7)
),
dict(
module_name='ConvTranspose2d',
constructor_args=(3, 4, 3, (2, 2), 1, (1, 1), 1, False),
input_size=(1, 3, 7, 7),
desc='no_bias'
),
dict(
module_name='MaxPool2d',
constructor_args=((3, 3), (2, 2), (1, 1)),
@ -620,7 +976,7 @@ new_module_tests = [
desc='stride_padding'
),
dict(
module_name='FullConv3d',
module_name='ConvTranspose3d',
constructor_args=(2, 3, (2, 2, 2)),
input_size=(1, 2, 4, 4, 4)
),
@ -670,7 +1026,8 @@ new_module_tests = [
constructor=lambda: nn.FractionalMaxPool2d(2, output_ratio=0.5, _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()),
input_size=(1, 3, 5, 5),
fullname='FractionalMaxPool2d_ratio',
test_cuda=False),
test_cuda=False
),
dict(
constructor=lambda: nn.FractionalMaxPool2d((2, 2), output_size=(4, 4), _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()),
input_size=(1, 3, 7, 7),

279
test/test_optim.py Normal file
View File

@ -0,0 +1,279 @@
import unittest
import torch
import torch.optim as optim
import torch.legacy.optim as old_optim
from torch.autograd import Variable
from common import TestCase
def rosenbrock(tensor):
x, y = tensor
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
def drosenbrock(tensor):
x, y = tensor
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * (y - x**2)))
def wrap_old_fn(old_fn, **config):
def wrapper(closure, params, state):
return old_fn(closure, params, config, state)
return wrapper
class TestOptim(TestCase):
def _test_rosenbrock(self, constructor, old_fn):
params_t = torch.Tensor([1.5, 1.5])
state = {}
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
optimizer = constructor([params])
solution = torch.Tensor([1, 1])
initial_dist = params.data.dist(solution)
def eval():
loss = rosenbrock(params)
loss.backward()
return loss
for i in range(2000):
optimizer.zero_grad()
optimizer.step(eval)
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
params_t, state)
self.assertEqual(params.data, params_t)
self.assertLessEqual(params.data.dist(solution), initial_dist)
def _test_basic_cases_template(self, weight, bias, input, constructor):
weight = Variable(weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
input = Variable(input, requires_grad=False)
optimizer = constructor(weight, bias)
def fn():
y = weight.mv(input)
if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
y = y.cuda(bias.get_device())
return (y + bias).abs().sum()
initial_value = fn().data[0]
for i in range(200):
weight.grad.zero_()
bias.grad.zero_()
fn().backward()
optimizer.step()
self.assertLessEqual(fn().data[0], initial_value)
def _test_basic_cases(self, constructor):
self._test_basic_cases_template(
torch.randn(10, 5),
torch.randn(10),
torch.randn(5),
constructor
)
# non-contiguous parameters
self._test_basic_cases_template(
torch.randn(10, 5, 2)[...,0],
torch.randn(10, 2)[...,0],
torch.randn(5),
constructor
)
# CUDA
if not torch.cuda.is_available():
return
self._test_basic_cases_template(
torch.randn(10, 5).cuda(),
torch.randn(10).cuda(),
torch.randn(5).cuda(),
constructor
)
# Multi-GPU
if not torch.cuda.device_count() > 1:
return
self._test_basic_cases_template(
torch.randn(10, 5).cuda(),
torch.randn(10).cuda(),
torch.randn(5).cuda(),
constructor
)
def _build_params_dict(self, weight, bias, **kwargs):
return [dict(params=[weight]), dict(params=[bias], **kwargs)]
def test_sgd(self):
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3),
wrap_old_fn(old_optim.sgd, learningRate=1e-3)
)
self._test_rosenbrock(
lambda params: optim.SGD(params, lr=1e-3, momentum=0.9, dampening=0),
wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9, dampening=0)
)
self._test_basic_cases(
lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.SGD(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
def test_adam(self):
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.Adam(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
def test_adadelta(self):
self._test_rosenbrock(
lambda params: optim.Adadelta(params),
wrap_old_fn(old_optim.adadelta)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, rho=0.95),
wrap_old_fn(old_optim.adadelta, rho=0.95)
)
self._test_rosenbrock(
lambda params: optim.Adadelta(params, weight_decay=1e-2),
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta([weight, bias])
)
self._test_basic_cases(
lambda weight, bias: optim.Adadelta(
self._build_params_dict(weight, bias, rho=0.95))
)
def test_adagrad(self):
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, lr_decay=1e-3),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, learningRateDecay=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Adagrad(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, weightDecay=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-1)
)
def test_adamax(self):
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1),
wrap_old_fn(old_optim.adamax, learningRate=1e-1)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, weight_decay=1e-2),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.Adamax(params, lr=1e-1, betas=(0.95, 0.998)),
wrap_old_fn(old_optim.adamax, learningRate=1e-1, beta1=0.95, beta2=0.998)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-1)
)
def test_rmsprop(self):
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
)
self._test_rosenbrock(
lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
)
self._test_basic_cases(
lambda weight, bias: optim.Adagrad(
self._build_params_dict(weight, bias, lr=1e-3),
lr=1e-2)
)
def test_asgd(self):
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3),
wrap_old_fn(old_optim.asgd, eta0=1e-3)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)
)
self._test_rosenbrock(
lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)
)
self._test_basic_cases(
lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
)
self._test_basic_cases(
lambda weight, bias: optim.ASGD(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3, t0=100)
)
def test_rprop(self):
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3),
wrap_old_fn(old_optim.rprop, stepsize=1e-3)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, etas=(0.6, 1.1)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, etaminus=0.6, etaplus=1.1)
)
self._test_rosenbrock(
lambda params: optim.Rprop(params, lr=1e-3, step_sizes=(1e-4, 3)),
wrap_old_fn(old_optim.rprop, stepsize=1e-3, stepsizemin=1e-4, stepsizemax=3)
)
self._test_basic_cases(
lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
)
self._test_basic_cases(
lambda weight, bias: optim.Rprop(
self._build_params_dict(weight, bias, lr=1e-2),
lr=1e-3)
)
if __name__ == '__main__':
unittest.main()

View File

@ -2,13 +2,30 @@ import sys
import math
import random
import torch
import torch.cuda
import tempfile
import unittest
import warnings
from itertools import product, chain
from common import TestCase, iter_indices
from functools import wraps
from common import TestCase, iter_indices, TEST_NUMPY
if TEST_NUMPY:
import numpy as np
SIZE = 100
def skipIfNoLapack(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as e:
if 'Lapack library not found' in e.args[0]:
raise unittest.SkipTest('Compiled without Lapack')
raise
return wrapper
class TestTorch(TestCase):
def test_dot(self):
@ -114,6 +131,18 @@ class TestTorch(TestCase):
def test_round(self):
self._testMath(torch.round, round)
def test_has_storage(self):
self.assertIsNotNone(torch.Tensor().storage())
self.assertIsNotNone(torch.Tensor(0).storage())
self.assertIsNotNone(torch.Tensor([]).storage())
self.assertIsNotNone(torch.Tensor().clone().storage())
self.assertIsNotNone(torch.Tensor([0, 0, 0]).nonzero().storage())
@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
def test_has_storage_numpy(self):
arr = np.array([], dtype=np.float32)
self.assertIsNotNone(torch.Tensor(arr).storage())
def _testSelection(self, torchfn, mathfn):
# contiguous
m1 = torch.randn(100,100)
@ -1002,15 +1031,10 @@ class TestTorch(TestCase):
for dim in range(3):
x = torch.rand(13, SIZE, SIZE).transpose(0, dim)
y = torch.rand(17, SIZE, SIZE).transpose(0, dim)
res1 = torch.cat(x, y, dim)
res1 = torch.cat((x, y), dim)
self.assertEqual(res1.narrow(dim, 0, 13), x, 0)
self.assertEqual(res1.narrow(dim, 13, 17), y, 0)
# Check stateless implementation
res2 = torch.Tensor()
torch.cat(res2, x, y, dim)
self.assertEqual(res1, res2, 0)
# Check iterables
for dim in range(3):
x = torch.rand(13, SIZE, SIZE).transpose(0, dim)
@ -1023,16 +1047,6 @@ class TestTorch(TestCase):
self.assertEqual(res1.narrow(dim, 30, 19), z, 0)
self.assertRaises(ValueError, lambda: torch.cat([]))
res2 = torch.Tensor()
torch.cat(res2, (x, y, z), dim)
self.assertEqual(res1, res2, 0)
res2 = res2.float()
torch.cat(res2, (x.float(), y.float(), z.float()), dim)
self.assertEqual(res1.float(), res2, 0)
res2 = res2.double()
torch.cat(res2, (x.double(), y.double(), z.double()), dim)
self.assertEqual(res1.double(), res2, 0)
def test_linspace(self):
_from = random.random()
to = _from + random.random()
@ -1085,7 +1099,7 @@ class TestTorch(TestCase):
torch.randn(res2, SIZE, SIZE)
self.assertEqual(res1, res2)
@unittest.skipIf(not hasattr(torch, 'gesv'), 'Compiled without gesv')
@skipIfNoLapack
def test_gesv(self):
a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
(-6.05, -3.30, 5.36, -4.44, 1.08),
@ -1096,27 +1110,144 @@ class TestTorch(TestCase):
(-1.56, 4.00, -8.67, 1.75, 2.86),
(9.81, -4.09, -4.57, -8.61, 8.99))).t()
res1 = torch.gesv(b,a)
self.assertLessEqual(b.dist(a * res1), 1e-12)
res1 = torch.gesv(b,a)[0]
self.assertLessEqual(b.dist(torch.mm(a, res1)), 1e-12)
ta = torch.Tensor()
tb = torch.Tensor()
res2 = torch.gesv(tb, ta, b, a)
res3 = torch.gesv(b, a, b, a)
res2 = torch.gesv(tb, ta, b, a)[0]
res3 = torch.gesv(b, a, b, a)[0]
self.assertEqual(res1, tb)
self.assertEqual(res1, b)
self.assertEqual(res1, res2)
self.assertEqual(res1, res3)
# test reuse
res1 = torch.gesv(b, a)
res1 = torch.gesv(b, a)[0]
ta = torch.Tensor()
tb = torch.Tensor()
torch.gesv(tb, ta, b, a)
torch.gesv(tb, ta, b, a)[0]
self.assertEqual(res1, tb)
torch.gesv(tb, ta, b, a)
torch.gesv(tb, ta, b, a)[0]
self.assertEqual(res1, tb)
@unittest.skipIf(not hasattr(torch, 'trtrs'), 'Compiled without trtrs')
@skipIfNoLapack
def test_qr(self):
# Since the QR decomposition is unique only up to the signs of the rows of
# R, we must ensure these are positive before doing the comparison.
def canonicalize(q, r):
d = r.diag().sign().diag()
return torch.mm(q, d), torch.mm(d, r)
def canon_and_check(q, r, expected_q, expected_r):
q_canon, r_canon = canonicalize(q, r)
expected_q_canon, expected_r_canon = canonicalize(expected_q, expected_r)
self.assertEqual(q_canon, expected_q_canon)
self.assertEqual(r_canon, expected_r_canon)
def check_qr(a, expected_q, expected_r):
# standard invocation
q, r = torch.qr(a)
canon_and_check(q, r, expected_q, expected_r)
# in-place
q, r = torch.Tensor(), torch.Tensor()
torch.qr(q, r, a)
canon_and_check(q, r, expected_q, expected_r)
# manually calculate qr using geqrf and orgqr
m = a.size(0)
n = a.size(1)
k = min(m, n)
result, tau = torch.geqrf(a)
self.assertEqual(result.size(0), m)
self.assertEqual(result.size(1), n)
self.assertEqual(tau.size(0), k)
r = torch.triu(result.narrow(0, 0, k))
q, _ = torch.orgqr(result, tau)
q, r = q.narrow(1, 0, k), r
canon_and_check(q, r, expected_q, expected_r)
# check square case
a = torch.Tensor(((1, 2, 3), (4, 5, 6), (7, 8, 10)))
expected_q = torch.Tensor((
(-1.230914909793328e-01, 9.045340337332914e-01, 4.082482904638621e-01),
(-4.923659639173310e-01, 3.015113445777629e-01, -8.164965809277264e-01),
(-8.616404368553292e-01, -3.015113445777631e-01, 4.082482904638634e-01)))
expected_r = torch.Tensor((
(-8.124038404635959e+00, -9.601136296387955e+00, -1.193987e+01),
( 0.000000000000000e+00, 9.045340337332926e-01, 1.507557e+00),
( 0.000000000000000e+00, 0.000000000000000e+00, 4.082483e-01)))
check_qr(a, expected_q, expected_r)
# check rectangular thin
a = torch.Tensor((
( 1, 2, 3),
( 4, 5, 6),
( 7, 8, 9),
(10, 11, 13),
))
expected_q = torch.Tensor((
(-0.0776150525706334, -0.833052161400748 , 0.3651483716701106),
(-0.3104602102825332, -0.4512365874254053, -0.1825741858350556),
(-0.5433053679944331, -0.0694210134500621, -0.7302967433402217),
(-0.7761505257063329, 0.3123945605252804, 0.5477225575051663)
))
expected_r = torch.Tensor((
(-12.8840987267251261, -14.5916298832790581, -17.0753115655393231),
( 0, -1.0413152017509357, -1.770235842976589 ),
( 0, 0, 0.5477225575051664)
))
check_qr(a, expected_q, expected_r)
# check rectangular fat
a = torch.Tensor((
(1, 2, 3, 4),
(5, 6, 7, 8),
(9, 10, 11, 13)
))
expected_q = torch.Tensor((
(-0.0966736489045663, 0.907737593658436 , 0.4082482904638653),
(-0.4833682445228317, 0.3157348151855452, -0.8164965809277254),
(-0.870062840141097 , -0.2762679632873518, 0.4082482904638621)
))
expected_r = torch.Tensor((
( -1.0344080432788603e+01, -1.1794185166357092e+01,
-1.3244289899925587e+01, -1.5564457473635180e+01),
( 0.0000000000000000e+00, 9.4720444555662542e-01,
1.8944088911132546e+00, 2.5653453733825331e+00),
( 0.0000000000000000e+00, 0.0000000000000000e+00,
1.5543122344752192e-15, 4.0824829046386757e-01)
))
check_qr(a, expected_q, expected_r)
@skipIfNoLapack
def test_ormqr(self):
mat1 = torch.randn(10, 10)
mat2 = torch.randn(10, 10)
q, r = torch.qr(mat1)
m, tau = torch.geqrf(mat1)
res1 = torch.mm(q, mat2)
res2, _ = torch.ormqr(m, tau, mat2)
self.assertEqual(res1, res2)
res1 = torch.mm(mat2, q)
res2, _ = torch.ormqr(m, tau, mat2, False)
self.assertEqual(res1, res2)
res1 = torch.mm(q.t(), mat2)
res2, _ = torch.ormqr(m, tau, mat2, True, True)
self.assertEqual(res1, res2)
res1 = torch.mm(mat2, q.t())
res2, _ = torch.ormqr(m, tau, mat2, False, True)
self.assertEqual(res1, res2)
@skipIfNoLapack
def test_trtrs(self):
a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
(-6.05, -3.30, 5.36, -4.44, 1.08),
@ -1131,39 +1262,39 @@ class TestTorch(TestCase):
L = torch.tril(a)
# solve Ux = b
x = torch.trtrs(b, U)
self.assertLessEqual(b.dist(U * x), 1e-12)
x = torch.trtrs(b, U, 'U', 'N', 'N')
self.assertLessEqual(b.dist(U * x), 1e-12)
x = torch.trtrs(b, U)[0]
self.assertLessEqual(b.dist(torch.mm(U, x)), 1e-12)
x = torch.trtrs(b, U, True, False, False)[0]
self.assertLessEqual(b.dist(torch.mm(U, x)), 1e-12)
# solve Lx = b
x = torch.trtrs(b, L, 'L')
self.assertLessEqual(b.dist(L * x), 1e-12)
x = torch.trtrs(b, L, 'L', 'N', 'N')
self.assertLessEqual(b.dist(L * x), 1e-12)
x = torch.trtrs(b, L, False)[0]
self.assertLessEqual(b.dist(torch.mm(L, x)), 1e-12)
x = torch.trtrs(b, L, False, False, False)[0]
self.assertLessEqual(b.dist(torch.mm(L, x)), 1e-12)
# solve U'x = b
x = torch.trtrs(b, U, 'U', 'T')
self.assertLessEqual(b.dist(U.t() * x), 1e-12)
x = torch.trtrs(b, U, 'U', 'T', 'N')
self.assertLessEqual(b.dist(U.t() * x), 1e-12)
x = torch.trtrs(b, U, True, True)[0]
self.assertLessEqual(b.dist(torch.mm(U.t(), x)), 1e-12)
x = torch.trtrs(b, U, True, True, False)[0]
self.assertLessEqual(b.dist(torch.mm(U.t(), x)), 1e-12)
# solve U'x = b by manual transposition
y = torch.trtrs(b, U.t(), 'L', 'N')
y = torch.trtrs(b, U.t(), False, False)[0]
self.assertLessEqual(x.dist(y), 1e-12)
# solve L'x = b
x = torch.trtrs(b, L, 'L', 'T')
self.assertLessEqual(b.dist(L.t() * x), 1e-12)
x = torch.trtrs(b, L, 'L', 'T', 'N')
self.assertLessEqual(b.dist(L.t() * x), 1e-12)
x = torch.trtrs(b, L, False, True)[0]
self.assertLessEqual(b.dist(torch.mm(L.t(), x)), 1e-12)
x = torch.trtrs(b, L, False, True, False)[0]
self.assertLessEqual(b.dist(torch.mm(L.t(), x)), 1e-12)
# solve L'x = b by manual transposition
y = torch.trtrs(b, L.t(), 'U', 'N')
y = torch.trtrs(b, L.t(), True, False)[0]
self.assertLessEqual(x.dist(y), 1e-12)
# test reuse
res1 = torch.trtrs(b,a)
res1 = torch.trtrs(b,a)[0]
ta = torch.Tensor()
tb = torch.Tensor()
torch.trtrs(tb,ta,b,a)
@ -1172,25 +1303,25 @@ class TestTorch(TestCase):
torch.trtrs(tb,ta,b,a)
self.assertEqual(res1, tb, 0)
@unittest.skipIf(not hasattr(torch, 'gels'), 'Compiled without gels')
@skipIfNoLapack
def test_gels(self):
def _test(a, b, expectedNorm):
a_copy = a.clone()
b_copy = b.clone()
res1 = torch.gels(b, a)
res1 = torch.gels(b, a)[0]
self.assertEqual(a, a_copy, 0)
self.assertEqual(b, b_copy, 0)
self.assertEqual((a * res1 - b).norm(), expectedNorm, 1e-8)
self.assertEqual((torch.mm(a, res1) - b).norm(), expectedNorm, 1e-8)
ta = torch.Tensor()
tb = torch.Tensor()
res2 = torch.gels(tb, ta, b, a)
res2 = torch.gels(tb, ta, b, a)[0]
self.assertEqual(a, a_copy, 0)
self.assertEqual(b, b_copy, 0)
self.assertEqual((a * res1 - b).norm(), expectedNorm, 1e-8)
self.assertEqual((torch.mm(a, res1) - b).norm(), expectedNorm, 1e-8)
res3 = torch.gels(b, a, b, a)
self.assertEqual((a_copy * b - b_copy).norm(), expectedNorm, 1e-8)
res3 = torch.gels(b, a, b, a)[0]
self.assertEqual((torch.mm(a_copy, b) - b_copy).norm(), expectedNorm, 1e-8)
self.assertEqual(res1, tb, 0)
self.assertEqual(res1, b, 0)
self.assertEqual(res1, res2, 0)
@ -1237,24 +1368,24 @@ class TestTorch(TestCase):
ta = torch.Tensor()
tb = torch.Tensor()
torch.gels(tb, ta, b, a)
self.assertEqual((a * tb - b).norm(), expectedNorm, 1e-8)
self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8)
torch.gels(tb, ta, b, a)
self.assertEqual((a * tb - b).norm(), expectedNorm, 1e-8)
self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8)
torch.gels(tb, ta, b, a)
self.assertEqual((a * tb - b).norm(), expectedNorm, 1e-8)
self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8)
@unittest.skipIf(not hasattr(torch, 'eig'), 'Compiled without eig')
@skipIfNoLapack
def test_eig(self):
a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00),
(-6.49, 3.80, 0.00, 0.00, 0.00),
(-0.47, -6.39, 4.17, 0.00, 0.00),
(-7.20, 1.50, -1.51, 5.70, 0.00),
(-0.65, -6.34, 2.67, 1.80, -7.10))).t().contiguous()
e = torch.eig(a)
ee, vv = torch.eig(a, 'V')
e = torch.eig(a)[0]
ee, vv = torch.eig(a, True)
te = torch.Tensor()
tv = torch.Tensor()
eee, vvv = torch.eig(te, tv, a, 'V')
eee, vvv = torch.eig(te, tv, a, True)
self.assertEqual(e, ee, 1e-12)
self.assertEqual(ee, eee, 1e-12)
self.assertEqual(ee, te, 1e-12)
@ -1263,30 +1394,30 @@ class TestTorch(TestCase):
# test reuse
X = torch.randn(4,4)
X = X.t() * X
X = torch.mm(X.t(), X)
e, v = torch.zeros(4,2), torch.zeros(4,4)
torch.eig(e, v, X, 'V')
Xhat = v * torch.diag(e.select(1, 0)) * v.t()
torch.eig(e, v, X, True)
Xhat = torch.mm(torch.mm(v, torch.diag(e.select(1, 0))), v.t())
self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
self.assertFalse(v.is_contiguous(), 'V is contiguous')
torch.eig(e, v, X, 'V')
torch.eig(e, v, X, True)
Xhat = torch.mm(v, torch.mm(e.select(1, 0).diag(), v.t()))
self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
self.assertFalse(v.is_contiguous(), 'V is contiguous')
# test non-contiguous
X = torch.randn(4, 4)
X = X.t() * X
e = torch.zeros(4, 2, 2)[:,2]
v = torch.zeros(4, 2, 4)[:,2]
X = torch.mm(X.t(), X)
e = torch.zeros(4, 2, 2)[:,1]
v = torch.zeros(4, 2, 4)[:,1]
self.assertFalse(v.is_contiguous(), 'V is contiguous')
self.assertFalse(e.is_contiguous(), 'E is contiguous')
torch.eig(e, v, X, 'V')
Xhat = v * torch.diag(e.select(1, 0)) * v.t()
torch.eig(e, v, X, True)
Xhat = torch.mm(torch.mm(v, torch.diag(e.select(1, 0))), v.t())
self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
@unittest.skipIf(not hasattr(torch, 'symeig'), 'Compiled without symeig')
@skipIfNoLapack
def test_symeig(self):
xval = torch.rand(100,3)
cov = torch.mm(xval.t(), xval)
@ -1295,15 +1426,15 @@ class TestTorch(TestCase):
# First call to symeig
self.assertTrue(resv.is_contiguous(), 'resv is not contiguous')
torch.symeig(rese, resv, cov.clone(), 'V')
ahat = resv * torch.diag(rese) * resv.t()
torch.symeig(rese, resv, cov.clone(), True)
ahat = torch.mm(torch.mm(resv, torch.diag(rese)), resv.t())
self.assertEqual(cov, ahat, 1e-8, 'VeV\' wrong')
# Second call to symeig
self.assertFalse(resv.is_contiguous(), 'resv is contiguous')
torch.symeig(rese, resv, cov.clone(), 'V')
torch.symeig(rese, resv, cov.clone(), True)
ahat = torch.mm(torch.mm(resv, torch.diag(rese)), resv.t())
mytester.assertTensorEq(cov, ahat, 1e-8, 'VeV\' wrong')
self.assertEqual(cov, ahat, 1e-8, 'VeV\' wrong')
# test non-contiguous
X = torch.rand(5, 5)
@ -1312,11 +1443,11 @@ class TestTorch(TestCase):
v = torch.zeros(4, 2, 4)[:,1]
self.assertFalse(v.is_contiguous(), 'V is contiguous')
self.assertFalse(e.is_contiguous(), 'E is contiguous')
torch.symeig(e, v, X, 'V')
Xhat = v * torch.diag(e) * v.t()
torch.symeig(e, v, X, True)
Xhat = torch.mm(torch.mm(v, torch.diag(e)), v.t())
self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
@unittest.skipIf(not hasattr(torch, 'svd'), 'Compiled without svd')
@skipIfNoLapack
def test_svd(self):
a=torch.Tensor(((8.79, 6.11, -9.15, 9.57, -3.49, 9.84),
(9.93, 6.91, -7.93, 1.64, 4.02, 0.15),
@ -1339,7 +1470,7 @@ class TestTorch(TestCase):
X = torch.randn(4, 4)
U, S, V = torch.svd(X)
Xhat = torch.mm(U, torch.mm(S.diag(), V.t()))
mytester.assertEqual(X, Xhat, 1e-8, 'USV\' wrong')
self.assertEqual(X, Xhat, 1e-8, 'USV\' wrong')
self.assertFalse(U.is_contiguous(), 'U is contiguous')
torch.svd(U, S, V, X)
@ -1349,8 +1480,8 @@ class TestTorch(TestCase):
# test non-contiguous
X = torch.randn(5, 5)
U = torch.zeros(5, 2, 5)[:,1]
S = torch.zeros(5, 2)[:,2]
V = torch.zeros(5, 2, 5)[:,2]
S = torch.zeros(5, 2)[:,1]
V = torch.zeros(5, 2, 5)[:,1]
self.assertFalse(U.is_contiguous(), 'U is contiguous')
self.assertFalse(S.is_contiguous(), 'S is contiguous')
@ -1359,7 +1490,7 @@ class TestTorch(TestCase):
Xhat = torch.mm(U, torch.mm(S.diag(), V.t()))
self.assertEqual(X, Xhat, 1e-8, 'USV\' wrong')
@unittest.skipIf(not hasattr(torch, 'inverse'), 'Compiled without inverse')
@skipIfNoLapack
def test_inverse(self):
M = torch.randn(5,5)
MI = torch.inverse(M)
@ -1512,7 +1643,7 @@ class TestTorch(TestCase):
self._test_conv_corr_eq(lambda x, k: torch.conv3(x, k, 'F'), reference)
def test_logical(self):
x = torch.rand(100, 100) * 2 - 1;
x = torch.rand(100, 100) * 2 - 1
xx = x.clone()
xgt = torch.gt(x, 1)
@ -1574,27 +1705,27 @@ class TestTorch(TestCase):
self.assertEqual(x, y)
torch.set_rng_state(rng_state)
@unittest.skip("Not implemented yet")
@skipIfNoLapack
def test_cholesky(self):
x = torch.rand(10, 10)
A = x * x.t()
x = torch.rand(10, 10) + 1e-1
A = torch.mm(x, x.t())
# default Case
C = torch.potrf(A)
B = C.t() * C
B = torch.mm(C.t(), C)
self.assertEqual(A, B, 1e-14)
# test Upper Triangular
U = torch.potrf(A, 'U')
B = U.t() * U
U = torch.potrf(A, True)
B = torch.mm(U.t(), U)
self.assertEqual(A, B, 1e-14, 'potrf (upper) did not allow rebuilding the original matrix')
# test Lower Triangular
L = torch.potrf(A, 'L')
B = L * L.t()
L = torch.potrf(A, False)
B = torch.mm(L, L.t())
self.assertEqual(A, B, 1e-14, 'potrf (lower) did not allow rebuilding the original matrix')
@unittest.skipIf(not hasattr(torch, 'potrs'), 'Compiled without potrs')
@skipIfNoLapack
def test_potrs(self):
a=torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
(-6.05, -3.30, 5.36, -4.44, 1.08),
@ -1606,19 +1737,19 @@ class TestTorch(TestCase):
(9.81, -4.09, -4.57, -8.61, 8.99))).t()
# make sure 'a' is symmetric PSD
a = a * a.t()
a = torch.mm(a, a.t())
# upper Triangular Test
U = torch.potrf(a, 'U')
x = torch.potrs(b, U, 'U')
self.assertLessEqual(b.dist(a * x), 1e-12)
U = torch.potrf(a)
x = torch.potrs(b, U)
self.assertLessEqual(b.dist(torch.mm(a, x)), 1e-12)
# lower Triangular Test
L = torch.potrf(a, 'L')
x = torch.potrs(b, L, 'L')
self.assertLessEqual(b.dist(a * x), 1e-12)
L = torch.potrf(a, False)
x = torch.potrs(b, L, False)
self.assertLessEqual(b.dist(torch.mm(a, x)), 1e-12)
@unittest.skipIf(not hasattr(torch, 'potri'), 'Compiled without potri')
@skipIfNoLapack
def tset_potri(self):
a=torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
(-6.05, -3.30, 5.36, -4.44, 1.08),
@ -1647,7 +1778,7 @@ class TestTorch(TestCase):
inv1 = torch.potri(chol, 'L')
self.assertLessEqual(inv0.dist(inv1), 1e-12)
@unittest.skip("Not implemented yet")
@skipIfNoLapack
def test_pstrf(self):
def checkPsdCholesky(a, uplo, inplace):
if inplace:
@ -1662,28 +1793,25 @@ class TestTorch(TestCase):
u, piv = torch.pstrf(*args)
if uplo == 'L':
a_reconstructed = u * u.t()
if uplo is False:
a_reconstructed = torch.mm(u, u.t())
else:
a_reconstructed = u.t() * u
a_reconstructed = torch.mm(u.t(), u)
piv = piv.long()
a_permuted = a.index(0, piv-1).index(1, piv-1)
self.assertTensorEq(a_permuted, a_reconstructed, 1e-14)
a_permuted = a.index_select(0, piv).index_select(1, piv)
self.assertEqual(a_permuted, a_reconstructed, 1e-14)
dimensions = ((5, 1), (5, 3), (5, 5), (10, 10))
for dim in dimensions:
m = torch.Tensor(*dim).uniform_()
a = m * m.t()
a = torch.mm(m, m.t())
# add a small number to the diagonal to make the matrix numerically positive semidefinite
for i in range(m.size(0)):
a[i][i] = a[i][i] + 1e-7
checkPsdCholesky(a, None, False)
checkPsdCholesky(a, 'U', False)
checkPsdCholesky(a, 'L', False)
checkPsdCholesky(a, None, True)
checkPsdCholesky(a, 'U', True)
checkPsdCholesky(a, 'L', True)
for inplace in (True, False):
for uplo in (None, True, False):
checkPsdCholesky(a, uplo, inplace)
def test_numel(self):
b = torch.ByteTensor(3, 100, 100)
@ -1705,6 +1833,24 @@ class TestTorch(TestCase):
self.assertEqual(reference[2, 2, 2], 27, 0)
self.assertEqual(reference[:], self._consecutive((3, 3, 3)), 0)
# Check Ellipsis
self.assertEqual(reference[..., 2], torch.Tensor([[3, 6, 9],
[12, 15, 18],
[21, 24, 27]]), 0)
self.assertEqual(reference[0, ..., 2], torch.Tensor([3, 6, 9]), 0)
self.assertEqual(reference[..., 2], reference[:, :, 2], 0)
self.assertEqual(reference[0, ..., 2], reference[0, :, 2], 0)
self.assertEqual(reference[0, 2, ...], reference[0, 2], 0)
self.assertEqual(reference[..., 2, 2, 2], 27, 0)
self.assertEqual(reference[2, ..., 2, 2], 27, 0)
self.assertEqual(reference[2, 2, ..., 2], 27, 0)
self.assertEqual(reference[2, 2, 2, ...], 27, 0)
reference_5d = self._consecutive((3, 3, 3, 3, 3))
self.assertEqual(reference_5d[..., 1, 0], reference_5d[:, :, :, 1, 0], 0)
self.assertEqual(reference_5d[2, ..., 1, 0], reference_5d[2, :, :, 1, 0], 0)
self.assertEqual(reference_5d[2, 1, 0, ..., 1], reference_5d[2, 1, 0, :, 1], 0)
self.assertRaises(RuntimeError, lambda: reference[1, 1, 1, 1])
self.assertRaises(RuntimeError, lambda: reference[1, 1, 1, 1:1])
self.assertRaises(RuntimeError, lambda: reference[3, 3, 3, 3, 3, 3, 3, 3])
@ -1927,37 +2073,40 @@ class TestTorch(TestCase):
def test_view(self):
tensor = torch.rand(15)
template = torch.rand(3, 5)
target = template.size().tolist()
self.assertEqual(tensor.view_as(template).size().tolist(), target)
self.assertEqual(tensor.view(3, 5).size().tolist(), target)
self.assertEqual(tensor.view(torch.LongStorage((3, 5))).size().tolist(), target)
self.assertEqual(tensor.view(-1, 5).size().tolist(), target)
self.assertEqual(tensor.view(3, -1).size().tolist(), target)
empty = torch.Tensor()
target = template.size()
self.assertEqual(tensor.view_as(template).size(), target)
self.assertEqual(tensor.view(3, 5).size(), target)
self.assertEqual(tensor.view(torch.Size([3, 5])).size(), target)
self.assertEqual(tensor.view(-1, 5).size(), target)
self.assertEqual(tensor.view(3, -1).size(), target)
tensor_view = tensor.view(5, 3)
tensor_view.fill_(random.uniform(0, 1))
self.assertEqual((tensor_view-tensor).abs().max(), 0)
self.assertEqual(empty.view_as(empty), empty)
self.assertEqual(empty.view(0), empty)
def test_expand(self):
result = torch.Tensor()
tensor = torch.rand(8, 1)
template = torch.rand(8, 5)
target = template.size().tolist()
self.assertEqual(tensor.expand_as(template).size().tolist(), target)
self.assertEqual(tensor.expand(8, 5).size().tolist(), target)
self.assertEqual(tensor.expand(torch.LongStorage((8, 5))).size().tolist(), target)
target = template.size()
self.assertEqual(tensor.expand_as(template).size(), target)
self.assertEqual(tensor.expand(8, 5).size(), target)
self.assertEqual(tensor.expand(torch.Size([8, 5])).size(), target)
def test_repeat(self):
result = torch.Tensor()
tensor = torch.rand(8, 4)
size = (3, 1, 1)
sizeStorage = torch.LongStorage(size)
torchSize = torch.Size(size)
target = [3, 8, 4]
self.assertEqual(tensor.repeat(*size).size().tolist(), target, 'Error in repeat')
self.assertEqual(tensor.repeat(sizeStorage).size().tolist(), target, 'Error in repeat using LongStorage')
self.assertEqual(tensor.repeat(*size).size(), target, 'Error in repeat')
self.assertEqual(tensor.repeat(torchSize).size(), target, 'Error in repeat using LongStorage')
result = tensor.repeat(*size)
self.assertEqual(result.size().tolist(), target, 'Error in repeat using result')
result = tensor.repeat(sizeStorage)
self.assertEqual(result.size().tolist(), target, 'Error in repeat using result and LongStorage')
self.assertEqual(result.size(), target, 'Error in repeat using result')
result = tensor.repeat(torchSize)
self.assertEqual(result.size(), target, 'Error in repeat using result and LongStorage')
self.assertEqual((result.mean(0).view(8, 4)-tensor).abs().max(), 0, 'Error in repeat (not equal)')
def test_is_same_size(self):
@ -1983,6 +2132,21 @@ class TestTorch(TestCase):
"Tensors with no storages should not appear to be set "
"to each other")
def test_tensor_set(self):
t1 = torch.Tensor()
t2 = torch.Tensor(3, 4, 9, 10).uniform_()
t1.set_(t2)
self.assertEqual(t1.storage()._cdata, t2.storage()._cdata)
size = torch.Size([9, 3, 4, 10])
t1.set_(t2.storage(), 0, size)
self.assertEqual(t1.size(), size)
t1.set_(t2.storage(), 0, tuple(size))
self.assertEqual(t1.size(), size)
self.assertEqual(t1.stride(), (120, 40, 10, 1))
stride = (10, 360, 90, 1)
t1.set_(t2.storage(), 0, size, stride)
self.assertEqual(t1.stride(), stride)
def test_equal(self):
# Contiguous, 1D
t1 = torch.Tensor((3, 4, 9, 10))
@ -2014,15 +2178,6 @@ class TestTorch(TestCase):
self.assertTrue(torch.equal(s1, s3))
self.assertFalse(torch.equal(s1, s4))
def test_is_size(self):
t1 = torch.Tensor(3, 4, 5)
s1 = torch.LongStorage((3, 4, 5))
s2 = torch.LongStorage((5, 4, 3))
self.assertTrue(t1.is_size(s1))
self.assertFalse(t1.is_size(s2))
self.assertTrue(t1.is_size(t1.size()))
def test_element_size(self):
byte = torch.ByteStorage().element_size()
char = torch.CharStorage().element_size()
@ -2066,7 +2221,7 @@ class TestTorch(TestCase):
splits = tensor.split(split_size, dim)
start = 0
for target_size, split in zip(target_sizes, splits):
self.assertEqual(split.size().tolist(), target_size)
self.assertEqual(split.size(), target_size)
self.assertEqual(tensor.narrow(dim, start, target_size[dim]), split, 0)
start = start + target_size[dim]
@ -2078,7 +2233,7 @@ class TestTorch(TestCase):
splits = tensor.chunk(num_chunks, dim)
start = 0
for target_size, split in zip(target_sizes, splits):
self.assertEqual(split.size().tolist(), target_size)
self.assertEqual(split.size(), target_size)
self.assertEqual(tensor.narrow(dim, start, target_size[dim]), split, 0)
start = start + target_size[dim]
@ -2110,7 +2265,7 @@ class TestTorch(TestCase):
x = torch.Tensor(*orig).fill_(0)
new = list(map(lambda x: x - 1, x.permute(*perm).size()))
self.assertEqual(perm, new)
self.assertEqual(x.size().tolist(), orig)
self.assertEqual(x.size(), orig)
def test_storageview(self):
s1 = torch.LongStorage((3, 4, 5))
@ -2137,22 +2292,25 @@ class TestTorch(TestCase):
]
shapes = [
torch.LongStorage((12,)),
torch.LongStorage((12, 1)),
torch.LongStorage((1, 12)),
torch.LongStorage((6, 2)),
torch.LongStorage((3, 2, 2)),
torch.Size((12,)),
torch.Size((12, 1)),
torch.Size((1, 12)),
torch.Size((6, 2)),
torch.Size((3, 2, 2)),
]
for t in types:
tensor = torch.rand(num_src).mul(2).floor().type(t)
while True:
tensor = torch.rand(num_src).mul(2).floor().type(t)
if tensor.sum() > 0:
break
for shape in shapes:
tensor = tensor.clone().resize_(shape)
dst1 = torch.nonzero(tensor)
dst2 = tensor.nonzero()
dst3 = torch.LongTensor()
torch.nonzero(dst3, tensor)
if shape.size() == 1:
if len(shape) == 1:
dst = []
for i in range(num_src):
if tensor[i] != 0:
@ -2161,12 +2319,12 @@ class TestTorch(TestCase):
self.assertEqual(dst1.select(1, 0), torch.LongTensor(dst), 0)
self.assertEqual(dst2.select(1, 0), torch.LongTensor(dst), 0)
self.assertEqual(dst3.select(1, 0), torch.LongTensor(dst), 0)
elif shape.size() == 2:
elif len(shape) == 2:
# This test will allow through some False positives. It only checks
# that the elements flagged positive are indeed non-zero.
for i in range(dst1.size(0)):
self.assertNotEqual(tensor[dst1[i,0], dst1[i,1]], 0)
elif shape.size() == 3:
elif len(shape) == 3:
# This test will allow through some False positives. It only checks
# that the elements flagged positive are indeed non-zero.
for i in range(dst1.size(0)):
@ -2231,22 +2389,57 @@ class TestTorch(TestCase):
b = [a[i % 2] for i in range(4)]
b += [a[0].storage()]
b += [a[0].storage()[1:4]]
with tempfile.NamedTemporaryFile() as f:
torch.save(b, f)
f.seek(0)
c = torch.load(f)
self.assertEqual(b, c, 0)
self.assertTrue(isinstance(c[0], torch.FloatTensor))
self.assertTrue(isinstance(c[1], torch.FloatTensor))
self.assertTrue(isinstance(c[2], torch.FloatTensor))
self.assertTrue(isinstance(c[3], torch.FloatTensor))
self.assertTrue(isinstance(c[4], torch.FloatStorage))
c[0].fill_(10)
self.assertEqual(c[0], c[2], 0)
self.assertEqual(c[4], torch.FloatStorage(25).fill_(10), 0)
c[1].fill_(20)
self.assertEqual(c[1], c[3], 0)
self.assertEqual(c[4], c[5][1:4], 0)
for use_name in (False, True):
with tempfile.NamedTemporaryFile() as f:
handle = f if not use_name else f.name
torch.save(b, handle)
f.seek(0)
c = torch.load(handle)
self.assertEqual(b, c, 0)
self.assertTrue(isinstance(c[0], torch.FloatTensor))
self.assertTrue(isinstance(c[1], torch.FloatTensor))
self.assertTrue(isinstance(c[2], torch.FloatTensor))
self.assertTrue(isinstance(c[3], torch.FloatTensor))
self.assertTrue(isinstance(c[4], torch.FloatStorage))
c[0].fill_(10)
self.assertEqual(c[0], c[2], 0)
self.assertEqual(c[4], torch.FloatStorage(25).fill_(10), 0)
c[1].fill_(20)
self.assertEqual(c[1], c[3], 0)
self.assertEqual(c[4], c[5][1:4], 0)
def test_serialization_container(self):
def import_module(name, filename):
if sys.version_info >= (3, 5):
import importlib.util
spec = importlib.util.spec_from_file_location(name, filename)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
else:
import imp
module = imp.load_source(name, filename)
sys.modules[module.__name__] = module
return module
with tempfile.NamedTemporaryFile() as checkpoint:
module = import_module('tmpmodule', 'data/network1.py')
torch.save(module.Net(), checkpoint)
# First check that the checkpoint can be loaded without warnings
checkpoint.seek(0)
with warnings.catch_warnings(record=True) as w:
loaded = torch.load(checkpoint)
self.assertTrue(isinstance(loaded, module.Net))
self.assertEquals(len(w), 0)
# Replace the module with different source
module = import_module('tmpmodule', 'data/network2.py')
checkpoint.seek(0)
with warnings.catch_warnings(record=True) as w:
loaded = torch.load(checkpoint)
self.assertTrue(isinstance(loaded, module.Net))
self.assertEquals(len(w), 1)
self.assertTrue(w[0].category, 'SourceChangeWarning')
def test_from_buffer(self):
a = bytearray([1, 2, 3, 4])
@ -2264,10 +2457,14 @@ class TestTorch(TestCase):
def test_print(self):
for t in torch._tensor_classes:
if t.is_cuda and not torch.cuda.is_available():
continue
obj = t(100, 100).fill_(1)
obj.__repr__()
str(obj)
for t in torch._storage_classes:
if t.is_cuda and not torch.cuda.is_available():
continue
obj = t(100).fill_(1)
obj.__repr__()
str(obj)
@ -2290,7 +2487,115 @@ class TestTorch(TestCase):
y = x.clone().unsqueeze_(2)
self.assertEqual(y, x.contiguous().view(2, 4, 1))
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_pin_memory(self):
x = torch.randn(3, 5)
self.assertFalse(x.is_pinned())
pinned = x.pin_memory()
self.assertTrue(pinned.is_pinned())
self.assertEqual(pinned, x)
self.assertNotEqual(pinned.data_ptr(), x.data_ptr())
@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
def test_toNumpy(self):
types = [
'torch.ByteTensor',
'torch.IntTensor',
'torch.FloatTensor',
'torch.DoubleTensor',
'torch.LongTensor',
]
for tp in types:
# 1D
sz = 10
x = torch.randn(sz).mul(255).type(tp)
y = x.numpy()
for i in range(sz):
self.assertEqual(x[i], y[i])
# 1D > 0 storage offset
xm = torch.randn(sz * 2).mul(255).type(tp)
x = xm.narrow(0, sz-1, sz)
self.assertTrue(x.storage_offset() > 0)
y = x.numpy()
for i in range(sz):
self.assertEqual(x[i], y[i])
def check2d(x, y):
for i in range(sz1):
for j in range(sz2):
self.assertEqual(x[i][j], y[i][j])
# empty
x = torch.Tensor().type(tp)
y = x.numpy()
self.assertEqual(y.size, 0)
# contiguous 2D
sz1 = 3
sz2 = 5
x = torch.randn(sz1, sz2).mul(255).type(tp)
y = x.numpy()
check2d(x, y)
# with storage offset
xm = torch.randn(sz1 * 2, sz2).mul(255).type(tp)
x = xm.narrow(0, sz1-1, sz1)
y = x.numpy()
self.assertTrue(x.storage_offset() > 0)
check2d(x, y)
# non-contiguous 2D
x = torch.randn(sz2, sz1).t().mul(255).type(tp)
y = x.numpy()
check2d(x, y)
# with storage offset
xm = torch.randn(sz2 * 2, sz1).mul(255).type(tp)
x = xm.narrow(0, sz2-1, sz2).t()
y = x.numpy()
self.assertTrue(x.storage_offset() > 0)
check2d(x, y)
# non-contiguous 2D with holes
xm = torch.randn(sz2 * 2, sz1 * 2).mul(255).type(tp)
x = xm.narrow(0, sz2-1, sz2).narrow(1, sz1-1, sz1).t()
y = x.numpy()
self.assertTrue(x.storage_offset() > 0)
check2d(x, y)
# check writeable
x = torch.randn(3, 4).mul(255).type(tp)
y = x.numpy()
self.assertTrue(y.flags.writeable)
y[0][1] = 3
self.assertTrue(x[0][1] == 3)
y = x.t().numpy()
self.assertTrue(y.flags.writeable)
y[0][1] = 3
self.assertTrue(x[0][1] == 3)
@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
def test_from_numpy(self):
dtypes = [
np.double,
np.float,
np.int64,
np.int32,
np.uint8
]
for dtype in dtypes:
array = np.array([1, 2, 3, 4], dtype=dtype)
self.assertEqual(torch.from_numpy(array), torch.Tensor([1, 2, 3, 4]))
@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
def test_numpy_index(self):
i = np.int32([0, 1, 2])
x = torch.randn(5, 5)
for idx in i:
self.assertFalse(isinstance(idx, int))
self.assertEqual(x[idx], x[int(idx)])
if __name__ == '__main__':
unittest.main()

View File

@ -5,15 +5,16 @@ import shutil
import random
import tempfile
import unittest
import torch
import torch.cuda
import sys
import traceback
import torch
import torch.cuda
from torch.autograd import Variable
from torch.utils.trainer import Trainer
from torch.utils.trainer.plugins import *
from torch.utils.trainer.plugins.plugin import Plugin
from torch.utils.data import *
HAS_CUDA = torch.cuda.is_available()
from common import TestCase
@ -56,7 +57,7 @@ class SimplePlugin(Plugin):
class ModelMock(object):
def __init__(self):
self.num_calls = 0
self.output = Variable(torch.ones(1, 1))
self.output = Variable(torch.ones(1, 1), requires_grad=True)
def __call__(self, i):
self.num_calls += 1
@ -81,12 +82,14 @@ class OptimizerMock(object):
self.num_evals = 0
def step(self, closure):
for i in range(random.randint(1, self.max_evals)):
for i in range(random.randint(self.min_evals, self.max_evals)):
loss = closure()
self.num_evals += 1
loss.backward()
self.num_steps += 1
def zero_grad(self):
pass
class DatasetMock(object):
def __iter__(self):
@ -114,8 +117,9 @@ class TestTrainer(TestCase):
]
def setUp(self):
self.trainer = Trainer(ModelMock(), CriterionMock(), OptimizerMock(),
DatasetMock())
self.optimizer = OptimizerMock()
self.trainer = Trainer(ModelMock(), CriterionMock(),
self.optimizer, DatasetMock())
self.num_epochs = 3
self.dataset_size = len(self.trainer.dataset)
self.num_iters = self.num_epochs * self.dataset_size
@ -170,122 +174,10 @@ class TestTrainer(TestCase):
def test_model_gradient(self):
self.trainer.run(epochs=self.num_epochs)
output_var = self.trainer.model.output
expected_grad = torch.ones(1, 1) * 2 * self.num_iters
expected_grad = torch.ones(1, 1) * 2 * self.optimizer.num_evals
self.assertEqual(output_var.grad, expected_grad)
class TestTensorDataset(TestCase):
def test_len(self):
source = TensorDataset(torch.randn(15, 10, 2, 3, 4, 5), torch.randperm(15))
self.assertEqual(len(source), 15)
def test_getitem(self):
t = torch.randn(15, 10, 2, 3, 4, 5)
l = torch.randn(15, 10)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i], source[i][0])
self.assertEqual(l[i], source[i][1])
def test_getitem_1d(self):
t = torch.randn(15)
l = torch.randn(15)
source = TensorDataset(t, l)
for i in range(15):
self.assertEqual(t[i:i+1], source[i][0])
self.assertEqual(l[i:i+1], source[i][1])
class ErrorDataset(Dataset):
def __init__(self, size):
self.size = size
def __len__(self):
return self.size
class TestDataLoader(TestCase):
def setUp(self):
self.data = torch.randn(100, 2, 3, 5)
self.labels = torch.randperm(50).repeat(2)
self.dataset = TensorDataset(self.data, self.labels)
def _test_sequential(self, loader):
batch_size = loader.batch_size
for i, (sample, target) in enumerate(loader):
idx = i * batch_size
self.assertEqual(sample, self.data[idx:idx+batch_size])
self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1))
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
def _test_shuffle(self, loader):
found_data = {i: 0 for i in range(self.data.size(0))}
found_labels = {i: 0 for i in range(self.labels.size(0))}
batch_size = loader.batch_size
for i, (batch_samples, batch_targets) in enumerate(loader):
for sample, target in zip(batch_samples, batch_targets):
for data_point_idx, data_point in enumerate(self.data):
if data_point.eq(sample).all():
self.assertFalse(found_data[data_point_idx])
found_data[data_point_idx] += 1
break
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
found_labels[data_point_idx] += 1
self.assertEqual(sum(found_data.values()), (i+1) * batch_size)
self.assertEqual(sum(found_labels.values()), (i+1) * batch_size)
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
def _test_error(self, loader):
it = iter(loader)
errors = 0
while True:
try:
it.next()
except NotImplementedError:
msg = "".join(traceback.format_exception(*sys.exc_info()))
self.assertTrue("_processBatch" in msg)
errors += 1
except StopIteration:
self.assertEqual(errors,
math.ceil(float(len(loader.dataset))/loader.batch_size))
return
def test_sequential(self):
self._test_sequential(DataLoader(self.dataset))
def test_sequential_batch(self):
self._test_sequential(DataLoader(self.dataset, batch_size=2))
def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))
def test_shuffle_batch(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
def test_sequential_workers(self):
# still use test shuffle here because the workers may shuffle the order
self._test_shuffle(DataLoader(self.dataset, num_workers=4))
def test_seqential_batch_workers(self):
# still use test shuffle here because the workers may shuffle the order
self._test_shuffle(DataLoader(self.dataset, batch_size=2, num_workers=4))
def test_shuffle_workers(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
def test_shuffle_batch_workers(self):
self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
def test_error(self):
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
def test_error_workers(self):
self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
test_dir = os.path.abspath(os.path.dirname(str(__file__)))
class TestFFI(TestCase):
@ -326,7 +218,7 @@ class TestFFI(TestCase):
self.assertRaises(torch.FatalError,
lambda: cpulib.bad_func(tensor, 2, 1.5))
@unittest.skipIf(not HAS_CFFI, "ffi tests require cffi package")
@unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
def test_gpu(self):
compile_extension(
name='gpulib',
@ -355,4 +247,3 @@ class TestFFI(TestCase):
if __name__ == '__main__':
unittest.main()

View File

@ -3,7 +3,7 @@ import yaml
from string import Template
from copy import deepcopy
from .plugins import ArgcountChecker, OptionalArguments, ArgumentReferences, \
BeforeCall, ConstantArguments, ReturnArguments, GILRelease
BeforeAfterCall, ConstantArguments, ReturnArguments, GILRelease
class cwrap(object):
@ -26,7 +26,7 @@ class cwrap(object):
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($arg_unpack);")
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeCall, ReturnArguments, GILRelease]
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
def __init__(self, source, destination=None, plugins=[], default_plugins=True):
if destination is None:
@ -40,6 +40,7 @@ class cwrap(object):
for plugin in self.plugins:
plugin.initialize(self)
self.base_path = os.path.dirname(os.path.abspath(source))
with open(source, 'r') as f:
declarations = f.read()
@ -55,8 +56,10 @@ class cwrap(object):
declaration_lines = []
output = []
in_declaration = False
i = 0
for line in lines:
while i < len(lines):
line = lines[i]
if line == '[[':
declaration_lines = []
in_declaration = True
@ -79,8 +82,15 @@ class cwrap(object):
output.append(wrapper)
elif in_declaration:
declaration_lines.append(line)
elif '!!inc ' == line[:6]:
fname = os.path.join(self.base_path, line[6:].strip())
with open(fname, 'r') as f:
included = f.read().split('\n')
# insert it into lines at position i+1
lines[i+1:i+1] = included
else:
output.append(line)
i += 1
return '\n'.join(output)
@ -138,7 +148,13 @@ class cwrap(object):
return self.search_plugins('get_wrapper_template', (declaration,), lambda _: None)
def get_arg_accessor(self, arg, option):
return self.search_plugins('get_arg_accessor', (arg, option), lambda arg,_: 'PyTuple_GET_ITEM(args, {})'.format(arg['idx']))
def wrap_accessor(arg, _):
if arg.get('idx') is None:
raise RuntimeError("Missing accessor for '{} {}'".format(
arg['type'], arg['name']))
return 'PyTuple_GET_ITEM(args, {})'.format(arg['idx'])
return self.search_plugins('get_arg_accessor', (arg, option), wrap_accessor)
def generate_wrapper(self, declaration):
wrapper = ''
@ -153,7 +169,12 @@ class cwrap(object):
result = []
for arg in arguments:
accessor = self.get_arg_accessor(arg, option)
res = getattr(self, base_fn_name)(arg, option).substitute(arg=accessor)
tmpl = getattr(self, base_fn_name)(arg, option)
if tmpl is None:
fn = 'check' if base_fn_name == 'get_type_check' else 'unpack'
raise RuntimeError("Missing type {} for '{} {}'".format(
fn, arg['type'], arg['name']))
res = tmpl.substitute(arg=accessor, idx=arg.get('idx'))
for plugin in self.plugins:
res = getattr(plugin, plugin_fn_name)(res, arg, accessor)
result.append(res)

View File

@ -0,0 +1,27 @@
from . import CWrapPlugin
from string import Template
class BeforeAfterCall(CWrapPlugin):
def initialize(self, cwrap):
self.cwrap = cwrap
def insert_snippet(self, template, option, offset, name):
prepend_str = option.get(name)
if prepend_str is None:
return
if '$' in prepend_str:
before_call_template = Template(option[name])
args = {'arg' + str(i): self.cwrap.get_arg_accessor(arg, option) for i, arg
in enumerate(option['arguments'])}
prepend_str = before_call_template.substitute(args)
template.insert(offset, prepend_str)
def process_option_code_template(self, template, option):
if option.get('before_call') or option.get('after_call'):
call_idx = template.index('$call')
self.insert_snippet(template, option, call_idx, 'before_call')
# call position might have changed
call_idx = template.index('$call')
self.insert_snippet(template, option, call_idx+1, 'after_call')
return template

View File

@ -1,19 +0,0 @@
from . import CWrapPlugin
from string import Template
class BeforeCall(CWrapPlugin):
def initialize(self, cwrap):
self.cwrap = cwrap
def process_option_code_template(self, template, option):
if option.get('before_call', False):
call_idx = template.index('$call')
prepend_str = option['before_call']
if '$' in prepend_str:
before_call_template = Template(option['before_call'])
args = {'arg' + str(i): self.cwrap.get_arg_accessor(arg, option) for i, arg
in enumerate(option['arguments'])}
prepend_str = before_call_template.substitute(args)
template.insert(call_idx, prepend_str)
return template

View File

@ -0,0 +1,19 @@
from . import CWrapPlugin
from string import Template
class BoolOption(CWrapPlugin):
UNPACK_TEMPLATE = Template('$arg == Py_True ? $if_true : $if_false')
def is_bool_option(self, arg):
return arg['type'] == 'bool' and 'if_true' in arg and 'if_false' in arg
def get_type_check(self, arg, option):
if self.is_bool_option(arg):
return Template('PyBool_Check($arg)')
def get_type_unpack(self, arg, option):
if self.is_bool_option(arg):
return Template(self.UNPACK_TEMPLATE.safe_substitute(
if_true=arg['if_true'], if_false=arg['if_false']))

View File

@ -0,0 +1,158 @@
from string import Template
from copy import deepcopy
from . import CWrapPlugin
from itertools import product
class CuDNNPlugin(CWrapPlugin):
TYPE_UNPACK = {
'THTensor*': Template('((THPVoidTensor*)$arg)->cdata'),
'int': Template('THPUtils_unpackLong($arg)'),
'cudnnDataType_t': Template('$arg'),
'cudnnHandle_t': Template('$arg'),
'Convolution*': Template('(Convolution*)THPWrapper_get($arg)'),
'bool': Template('$arg == Py_True'),
}
TYPE_CHECK = {
'Convolution*': Template('THPWrapper_check($arg)'),
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'),
'int': Template('THPUtils_checkLong($arg)'),
'bool': Template('PyBool_Check($arg)'),
}
RETURN_WRAPPER = {
'Convolution*': Template('return THPWrapper_New($result, [](void* arg) { delete (Convolution*)arg; });'),
}
METHODS_DECLARATION = Template("""
static PyMethodDef _THCUDNN_methods[] = {
$methods
{NULL}
};
PyMethodDef* THCUDNN_methods()
{
return _THCUDNN_methods;
}
""")
WRAPPER_TEMPLATE = Template("""\
static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
int __tuplecount = args ? PyTuple_Size(args) : 0;
int __dictcount = kwargs ? PyDict_Size(kwargs) : 0;
int __argcount = __tuplecount + __dictcount;
PyObject* tensorClass = getTensorClass(args);
THCPAutoGPU __autogpu_guard = THCPAutoGPU(args);
$options
}
THPUtils_invalidArguments(args, "$readable_name", $num_options, $expected_args);
return NULL;
END_HANDLE_TH_ERRORS
}
""")
RELEASE_ARG = Template("_${name}_guard.release();")
TYPE_NAMES = {
'THTensor*': '" THPTensorStr "',
'long': 'int',
'bool': 'bool',
'int': 'int',
}
def __init__(self):
self.declarations = []
def get_type_unpack(self, arg, option):
return self.TYPE_UNPACK.get(arg['type'], None)
def get_type_check(self, arg, option):
return self.TYPE_CHECK.get(arg['type'], None)
def get_wrapper_template(self, declaration):
arg_desc = []
for option in declaration['options']:
option_desc = [self.TYPE_NAMES.get(arg['type'], arg['type']) + ' ' + arg['name']
for arg in option['arguments']
if not arg.get('ignore_check', False)]
# TODO: this should probably go to THPLongArgsPlugin
if option_desc:
arg_desc.append('({})'.format(', '.join(option_desc)))
else:
arg_desc.append('no arguments')
arg_desc.sort(key=len)
arg_desc = ['"' + desc + '"' for desc in arg_desc]
arg_str = ', '.join(arg_desc)
readable_name = declaration['python_name']
return Template(self.WRAPPER_TEMPLATE.safe_substitute(
readable_name=readable_name, num_options=len(arg_desc),
expected_args=arg_str))
def get_return_wrapper(self, option):
return self.RETURN_WRAPPER.get(option['return'], None)
def get_arg_accessor(self, arg, option):
name = arg['name']
if name == 'self':
return 'self'
elif name == 'dataType':
return 'getCudnnDataType(tensorClass)'
elif name == 'handle':
return 'getCudnnHandle()'
def process_declarations(self, declarations):
for declaration in declarations:
declaration.setdefault('python_name', '_{}'.format(declaration['name']))
declaration['name'] = 'THCUDNN_{}'.format(declaration['name'])
self.declarations.append(declaration)
for option in declaration['options']:
for arg in option['arguments']:
if arg['name'] in ['self', 'state', 'dataType', 'handle']:
arg['ignore_check'] = True
declaration['options'] = self.filter_unique_options(declaration['options'])
return declarations
def filter_unique_options(self, options):
def signature(option):
return '#'.join(arg['type'] for arg in option['arguments'] if not 'ignore_check' in arg or not arg['ignore_check'])
seen_signatures = set()
unique = []
for option in options:
sig = signature(option)
if sig not in seen_signatures:
unique.append(option)
seen_signatures.add(sig)
return unique
def preprocessor_guard(self, code, condition):
return '#if ' + condition + '\n' + code + '#endif\n'
def process_wrapper(self, code, declaration):
if 'defined_if' in declaration:
return self.preprocessor_guard(code, declaration['defined_if'])
return code
def process_all_unpacks(self, code, option):
return 'state, ' + code
def declare_methods(self):
methods = ''
for declaration in self.declarations:
extra_flags = ' | ' + declaration.get('method_flags') if 'method_flags' in declaration else ''
if not declaration.get('only_register'):
extra_flags += ' | METH_KEYWORDS'
entry = Template(' {"$python_name", (PyCFunction)$name, METH_VARARGS$extra_flags, NULL},\n').substitute(
python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags
)
if 'defined_if' in declaration:
entry = self.preprocessor_guard(entry, declaration['defined_if'])
methods += entry
return self.METHODS_DECLARATION.substitute(methods=methods)
def process_full_file(self, code):
return code + self.declare_methods()

View File

@ -0,0 +1,50 @@
from . import CWrapPlugin
from string import Template
class KwargsPlugin(CWrapPlugin):
ACCESSOR_TEMPLATE = Template('(__tuplecount > $idx ? PyTuple_GET_ITEM(args, $idx) : __kw_$name)')
CHECK_TEMPLATE = Template('(__tuplecount > $idx || __kw_$name) && $code')
WRAPPER_TEMPLATE = Template("""
$declarations
if (kwargs) {
$lookups
}
""")
def process_declarations(self, declarations):
# We don't have access to declaration or options in get_arg_accessor
# and process_single_check, so we have to push the flag down to
# the args.
for declaration in declarations:
if declaration.get('no_kwargs'):
for option in declaration['options']:
for arg in option['arguments']:
arg['no_kwargs'] = True
return declarations
def get_arg_accessor(self, arg, option):
if not arg.get('no_kwargs'):
return self.ACCESSOR_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'])
def process_single_check(self, code, arg, arg_accessor):
if not arg.get('no_kwargs'):
return self.CHECK_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'], code=code)
return code
def process_wrapper(self, code, declaration):
if declaration.get('no_kwargs'):
return code
seen_args = set()
args = []
for option in declaration['options']:
for arg in option['arguments']:
name = arg['name']
if not arg.get('ignore_check') and name not in seen_args:
seen_args.add(name)
args.append(name)
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args])
lookups = '\n '.join(['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args])
start_idx = code.find('{') + 1
new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups)
return code[:start_idx] + new_code + code[start_idx:]

View File

@ -30,8 +30,11 @@ class StandaloneExtension(CWrapPlugin):
'THDoubleTensor*': Template('THPDoubleTensor_CData((THPDoubleTensor*)$arg)'),
'THLongTensor*': Template('THPLongTensor_CData((THPLongTensor*)$arg)'),
'THIntTensor*': Template('THPIntTensor_CData((THPIntTensor*)$arg)'),
'THCudaHalfTensor*': Template('THCPHalfTensor_CData((THCPHalfTensor*)$arg)'),
'THCudaTensor*': Template('THCPFloatTensor_CData((THCPFloatTensor*)$arg)'),
'THCudaDoubleTensor*': Template('THCPDoubleTensor_CData((THCPDoubleTensor*)$arg)'),
'THCudaLongTensor*': Template('THCPLongTensor_CData((THCPLongTensor*)$arg)'),
'half': Template('THPHalfUtils_unpackReal($arg)'),
'float': Template('THPFloatUtils_unpackReal($arg)'),
'double': Template('THPDoubleUtils_unpackReal($arg)'),
'bool': Template('THPUtils_unpackLong($arg)'),
@ -46,9 +49,12 @@ class StandaloneExtension(CWrapPlugin):
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
'THCudaHalfTensor*': Template('THCPHalfTensor_Check($arg)'),
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
'THCudaDoubleTensor*': Template('THCPDoubleTensor_Check($arg)'),
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
'float': Template('THPDoubleUtils_checkReal($arg)'),
'half': Template('THPHalfUtils_checkReal($arg)'),
'float': Template('THPFloatUtils_checkReal($arg)'),
'double': Template('THPDoubleUtils_checkReal($arg)'),
'bool': Template('THPUtils_checkLong($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
@ -73,7 +79,9 @@ PyObject * $name(PyObject *_unused, PyObject *args)
TYPE_NAMES = {
'THGenerator*': 'Generator',
'THCudaHalfTensor*': 'torch.cuda.HalfTensor',
'THCudaTensor*': 'torch.cuda.FloatTensor',
'THCudaDoubleTensor*': 'torch.cuda.DoubleTensor',
'THCudaLongTensor*': 'torch.cuda.LongTensor',
'THDoubleTensor*': 'torch.DoubleTensor',
'THFloatTensor*': 'torch.FloatTensor',
@ -85,6 +93,7 @@ PyObject * $name(PyObject *_unused, PyObject *args)
'long': 'int',
'int': 'int',
'real': 'float',
'half': 'float',
'double': 'float',
'float': 'float',
'accreal': 'float',

View File

@ -1,56 +0,0 @@
from string import Template
from . import CWrapPlugin
class THPLongArgsPlugin(CWrapPlugin):
PARSE_LONG_ARGS = Template("""\
THLongStoragePtr __long_args_guard;
try {
__long_args_guard = THPUtils_getLongStorage(args, $num_checked);
} catch (std::exception &e) {
goto invalid_arguments;
}
THLongStorage* __long_args = __long_args_guard.get();
""")
def get_arg_accessor(self, arg, option):
if 'long_args' in option and option['long_args'] and arg['name'] == 'long_args':
return '__long_args'
def get_type_unpack(self, arg, option):
if option.get('long_args', False) and arg['name'] == 'long_args':
return Template('$arg')
def process_declarations(self, declarations):
for declaration in declarations:
for option in declaration['options']:
if not 'long_args' in option or not option['long_args']:
continue
for arg in option['arguments']:
if arg['name'] == 'long_args':
arg['ignore_check'] = True
return declarations
def process_all_checks(self, code, option):
if 'long_args' in option and option['long_args']:
code = code.replace('__argcount ==', '__argcount >')
return code
def process_wrapper(self, code, declaration):
if any(map(lambda opt: opt.get('long_args'), declaration['options'])):
invalid_arguments_idx = code.find('THPUtils_invalidArguments')
newline_idx = code.rfind('\n', 0, invalid_arguments_idx)
code = code[:newline_idx] + '\ninvalid_arguments:' + code[newline_idx:]
return code
def process_option_code(self, code, option):
if 'long_args' in option and option['long_args']:
lines = code.split('\n')
end_checks = 0
for i, line in enumerate(lines):
if ') {' in line:
end_checks = i
break
lines = lines[:end_checks+1] + [self.PARSE_LONG_ARGS.substitute(num_checked=option['num_checked_args'])] + lines[end_checks+1:]
code = '\n'.join(lines)
return code

View File

@ -2,6 +2,7 @@ from string import Template
from copy import deepcopy
from . import CWrapPlugin
from itertools import product
from collections import OrderedDict
class THPPlugin(CWrapPlugin):
@ -16,6 +17,8 @@ class THPPlugin(CWrapPlugin):
'THLongStorage*': Template('((THPLongStorage*)$arg)->cdata'),
'THStorage*': Template('((THPStorage*)$arg)->cdata'),
'THGenerator*': Template('((THPGenerator*)$arg)->cdata'),
'THSize*': Template('__size.get()'),
'THStride*': Template('__stride.get()'),
'void*': Template('THPUtils_unpackLong($arg)'),
'long': Template('THPUtils_unpackLong($arg)'),
'int': Template('THPUtils_unpackLong($arg)'),
@ -38,6 +41,8 @@ class THPPlugin(CWrapPlugin):
'THLongStorage*': Template('(PyObject*)Py_TYPE($arg) == THPLongStorageClass'),
'THStorage*': Template('(PyObject*)Py_TYPE($arg) == THPStorageClass'),
'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'),
'THSize*': Template('THPUtils_tryUnpackLongs($arg, __size)'),
'THStride*': Template('THPUtils_tryUnpackLongs($arg, __stride)'),
'void*': Template('THPUtils_checkLong($arg)'),
'long': Template('THPUtils_checkLong($arg)'),
'int': Template('THPUtils_checkLong($arg)'),
@ -49,6 +54,8 @@ class THPPlugin(CWrapPlugin):
'accreal': Template('THPUtils_(checkReal)($arg)'),
}
SIZE_VARARG_CHECK = Template('THPUtils_tryUnpackLongVarArgs(args, $idx, __size)')
RETURN_WRAPPER = {
'THTensor*': Template('return THPTensor_(New)($result);'),
'THLongStorage*': Template('return THPLongStorage_New($result);'),
@ -68,10 +75,14 @@ $methods
""")
WRAPPER_TEMPLATE = Template("""\
PyObject * $name(PyObject *self, PyObject *args)
PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
int __argcount = args ? PyTuple_Size(args) : 0;
int __tuplecount = args ? PyTuple_Size(args) : 0;
int __dictcount = kwargs ? PyDict_Size(kwargs) : 0;
int __argcount = __tuplecount + __dictcount;
$variables
$options
}
@ -81,65 +92,50 @@ PyObject * $name(PyObject *self, PyObject *args)
}
""")
ALLOCATE_TYPE = {
'THTensor*': Template("""\
THTensorPtr _th_$name = THTensor_(new)(LIBRARY_STATE_NOARGS);
THPTensorPtr _${name}_guard = (THPTensor*)THPTensor_(New)(_th_$name.get());
THPTensor* $name = _${name}_guard.get();
if (!$name)
return NULL;
_th_$name.release();
"""),
'THLongTensor*': Template("""\
THLongTensorPtr _th_$name = THLongTensor_new(LIBRARY_STATE_NOARGS);
THPLongTensorPtr _${name}_guard = (THPLongTensor*)THPLongTensor_New(_th_$name.get());
THPLongTensor* $name = _${name}_guard.get();
if (!$name)
return NULL;
_th_$name.release();
"""),
'THBoolTensor*': Template("""
#if IS_CUDA
THCByteTensorPtr _t_$name = THCudaByteTensor_new(LIBRARY_STATE_NOARGS);
THCPByteTensorPtr _${name}_guard = (THCPByteTensor*)THCPByteTensor_New(_t_$name);
THCPByteTensor *$name = _${name}_guard.get();
#else
THByteTensorPtr _t_$name = THByteTensor_new();
THPByteTensorPtr _${name}_guard = (THPByteTensor*)THPByteTensor_New(_t_$name);
THPByteTensor *$name = _${name}_guard.get();
#endif
if (!$name)
return NULL;
_t_$name.release();
"""),
'THIndexTensor*': Template("""
#if IS_CUDA
THCLongTensorPtr _t_$name = THCudaLongTensor_new(LIBRARY_STATE_NOARGS);
THCPLongTensorPtr _${name}_guard = (THCPLongTensor*)THCPLongTensor_New(_t_$name);
THCPLongTensor *$name = _${name}_guard.get();
#else
THLongTensorPtr _t_$name = THLongTensor_new();
THPLongTensorPtr _${name}_guard = (THPLongTensor*)THPLongTensor_New(_t_$name);
THPLongTensor *$name = _${name}_guard.get();
#endif
if (!$name)
return NULL;
_t_$name.release();
"""),
}
ALLOCATE_TMPL = Template("""\
THP${type}TensorPtr _${name}_guard = (THP${type}Tensor*) THP${type}Tensor_NewEmpty();
if (!_${name}_guard.get()) return NULL;
THP${type}Tensor* $name = _${name}_guard.get();
""")
RELEASE_ARG = Template("_${name}_guard.release();")
ALLOCATE_CUDA = Template("""\
#if IS_CUDA
${cuda}
#else
${cpu}
#endif
""")
def _allocate(typename, tmpl, cuda_tmpl=None):
code = tmpl.safe_substitute(type=typename)
if typename == '':
code = code.replace('NewEmpty', '(NewEmpty)')
if cuda_tmpl:
cuda_code = code.replace('THP', 'THCP')
code = cuda_tmpl.substitute(cuda=cuda_code, cpu=code)
return Template(code)
ALLOCATE_TYPE = {
'THTensor*': _allocate('', ALLOCATE_TMPL),
'THLongTensor*': _allocate('Long', ALLOCATE_TMPL),
'THIntTensor*': _allocate('Int', ALLOCATE_TMPL),
'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA),
'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA),
}
TYPE_NAMES = {
'THTensor*': '" THPTensorStr "',
'THStorage*': '" THPStorageStr "',
'THGenerator*': 'Generator',
'THLongStorage*': 'LongStorage',
'THLongTensor*': 'LongTensor',
'THBoolTensor*': 'ByteTensor',
'THIndexTensor*': 'LongTensor',
'THFloatTensor*': 'FloatTensor',
'THDoubleTensor*': 'DoubleTensor',
'THGenerator*': 'torch.Generator',
'THLongStorage*': '" THPModuleStr "LongStorage',
'THLongTensor*': '" THPModuleStr "LongTensor',
'THIntTensor*': '" THPModuleStr "IntTensor',
'THBoolTensor*': '" THPModuleStr "ByteTensor',
'THIndexTensor*': '" THPModuleStr "LongTensor',
'THFloatTensor*': '" THPModuleStr "FloatTensor',
'THDoubleTensor*': '" THPModuleStr "DoubleTensor',
'THSize*': 'torch.Size',
'THStride*': 'tuple',
'long': 'int',
'real': '" RealStr "',
'double': 'float',
@ -155,32 +151,44 @@ PyObject * $name(PyObject *self, PyObject *args)
return self.TYPE_UNPACK.get(arg['type'], None)
def get_type_check(self, arg, option):
if arg['type'] == 'THSize*' and arg.get('long_args', False):
return self.SIZE_VARARG_CHECK
return self.TYPE_CHECK.get(arg['type'], None)
# TODO: argument descriptions shouldn't be part of THP, but rather a general cwrap thing
def get_wrapper_template(self, declaration):
arg_desc = []
for option in declaration['options']:
option_desc = [self.TYPE_NAMES[arg['type']] + ' ' + arg['name']
for arg in option['arguments']
if not arg.get('ignore_check', False)]
# TODO: this should probably go to THPLongArgsPlugin
if option.get('long_args'):
option_desc.append('int ...')
if option_desc:
arg_desc.append('({})'.format(', '.join(option_desc)))
arg_desc = OrderedDict()
def format_arg(arg, var_args=False):
if var_args and arg.get('long_args', False):
return 'int ... ' + arg['name']
else:
arg_desc.append('no arguments')
arg_desc.sort(key=len)
return self.TYPE_NAMES[arg['type']] + ' ' + arg['name']
def format_args(args, var_args=False):
option_desc = [format_arg(arg, var_args)
for arg in args
if not arg.get('ignore_check', False)]
if option_desc:
return '({})'.format(', '.join(option_desc))
else:
return 'no arguments'
for option in declaration['options']:
arg_desc[format_args(option['arguments'], False)] = True
arg_desc[format_args(option['arguments'], True)] = True
arg_desc = sorted(list(arg_desc.keys()), key=len)
arg_desc = ['"' + desc + '"' for desc in arg_desc]
arg_str = ', '.join(arg_desc)
variables_str = '\n'.join(declaration.get('variables', []))
if 'stateless' in declaration['name']:
readable_name = 'torch.' + declaration['python_name']
else:
readable_name = declaration['python_name']
return Template(self.WRAPPER_TEMPLATE.safe_substitute(
readable_name=readable_name, num_options=len(arg_desc),
expected_args=arg_str))
expected_args=arg_str, variables=variables_str))
def get_return_wrapper(self, option):
return self.RETURN_WRAPPER.get(option['return'], None)
@ -195,10 +203,28 @@ PyObject * $name(PyObject *self, PyObject *args)
new_declarations = []
register_only = [d for d in declarations if d.get('only_register', False)]
declarations = [d for d in declarations if not d.get('only_register', False)]
def has_arg_type(declaration, type_name):
return any(arg['type'] == type_name
for option in declaration['options']
for arg in option['arguments'])
def has_long_args(declaration):
return any(arg.get('long_args', False)
for option in declaration['options']
for arg in option['arguments'])
for declaration in declarations:
if declaration.get('only_register', False):
continue
declaration.setdefault('python_name', declaration['name'])
declaration.setdefault('variables', [])
if has_arg_type(declaration, 'THSize*'):
declaration['variables'] += ['THLongStoragePtr __size;']
if has_arg_type(declaration, 'THStride*'):
declaration['variables'] += ['THLongStoragePtr __stride;']
if has_long_args(declaration):
declaration['no_kwargs'] = True
if declaration.get('with_stateless', False) or declaration.get('only_stateless', False):
stateless_declaration = self.make_stateless(deepcopy(declaration))
new_declarations.append(stateless_declaration)
@ -218,10 +244,14 @@ PyObject * $name(PyObject *self, PyObject *args)
# TODO: we can probably allow duplicate signatures once we implement
# keyword arguments
declaration['options'] = self.filter_unique_options(declaration['options'])
declarations = [d for d in declarations if not d.get('only_stateless', False)]
self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only))
self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only))
return declarations + new_declarations
all_declarations = declarations + new_declarations
return all_declarations
def make_stateless(self, declaration):
declaration['name'] = 'THPTensor_stateless_({})'.format(declaration['name'])
@ -262,9 +292,14 @@ PyObject * $name(PyObject *self, PyObject *args)
def declare_methods(self, stateless):
tensor_methods = ''
for declaration in (self.declarations if not stateless else self.stateless_declarations):
extra_flags = ' | ' + declaration.get('method_flags') if 'method_flags' in declaration else ''
entry = Template(' {"$python_name", (PyCFunction)$name, METH_VARARGS$extra_flags, NULL},\n').substitute(
python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags
flags = 'METH_VARARGS'
flags += ' | ' + declaration.get('method_flags') if 'method_flags' in declaration else ''
if not declaration.get('only_register'):
flags += ' | METH_KEYWORDS'
if declaration.get('override_method_flags'):
flags = declaration['override_method_flags']
entry = Template(' {"$python_name", (PyCFunction)$name, $flags, NULL},\n').substitute(
python_name=declaration['python_name'], name=declaration['name'], flags=flags
)
if 'defined_if' in declaration:
entry = self.preprocessor_guard(entry, declaration['defined_if'])
@ -287,6 +322,11 @@ PyObject * $name(PyObject *self, PyObject *args)
def process_all_unpacks(self, code, option):
return 'LIBRARY_STATE ' + code
def process_all_checks(self, code, option):
if any(arg.get('long_args', False) for arg in option['arguments']):
code = code.replace('__argcount ==', '__argcount >=')
return code
def process_option_code_template(self, template, option):
new_args = []
for arg in option['arguments']:

View File

@ -52,8 +52,9 @@ from .NullableArguments import NullableArguments
from .OptionalArguments import OptionalArguments
from .ArgcountChecker import ArgcountChecker
from .ArgumentReferences import ArgumentReferences
from .BeforeCall import BeforeCall
from .BeforeAfterCall import BeforeAfterCall
from .ConstantArguments import ConstantArguments
from .ReturnArguments import ReturnArguments
from .GILRelease import GILRelease
from .AutoGPU import AutoGPU
from .CuDNNPlugin import CuDNNPlugin

View File

@ -34,6 +34,7 @@ FUNCTION_TEMPLATE = Template("""\
COMMON_TRANSFORMS = {
'THIndex_t': 'long',
'THCIndex_t': 'long',
'THInteger_t': 'int',
}
COMMON_CPU_TRANSFORMS = {
@ -41,6 +42,10 @@ COMMON_CPU_TRANSFORMS = {
'THIndexTensor*': 'THLongTensor*',
'THIntegerTensor*': 'THIntTensor*',
}
COMMON_GPU_TRANSFORMS = {
'THCState*': 'void*',
'THCIndexTensor*': 'THCudaLongTensor*',
}
TYPE_TRANSFORMS = {
'Float': {
@ -51,15 +56,26 @@ TYPE_TRANSFORMS = {
'THTensor*': 'THDoubleTensor*',
'real': 'double',
},
'CudaHalf': {
'THCTensor*': 'THCudaHalfTensor*',
'real': 'half',
},
'Cuda': {
'THCState*': 'void*',
'THIndexTensor*': 'THCudaLongTensor*',
}
'THCTensor*': 'THCudaTensor*',
'real': 'float',
},
'CudaDouble': {
'THCTensor*': 'THCudaDoubleTensor*',
'real': 'double',
},
}
for t, transforms in TYPE_TRANSFORMS.items():
transforms.update(COMMON_TRANSFORMS)
TYPE_TRANSFORMS['Float'].update(COMMON_CPU_TRANSFORMS)
TYPE_TRANSFORMS['Double'].update(COMMON_CPU_TRANSFORMS)
for t in ['Float', 'Double']:
TYPE_TRANSFORMS[t].update(COMMON_CPU_TRANSFORMS)
for t in ['CudaHalf', 'Cuda', 'CudaDouble']:
TYPE_TRANSFORMS[t].update(COMMON_GPU_TRANSFORMS)
def wrap_function(name, type, arguments):
@ -102,11 +118,9 @@ def wrap_cunn():
wrapper = '#include <TH/TH.h>\n'
wrapper += '#include <THC/THC.h>\n\n\n'
cunn_functions = thnn_utils.parse_header(thnn_utils.THCUNN_H_PATH)
# Get rid of Cuda prefix
for function in cunn_functions:
function.name = function.name[4:]
for fn in cunn_functions:
wrapper += wrap_function(fn.name, 'Cuda', fn.arguments)
for t in ['CudaHalf', 'Cuda', 'CudaDouble']:
wrapper += wrap_function(fn.name, t, fn.arguments)
with open('torch/csrc/nn/THCUNN.cwrap', 'w') as f:
f.write(wrapper)
cwrap('torch/csrc/nn/THCUNN.cwrap', plugins=[

View File

@ -81,6 +81,7 @@ def initial_seed():
from .serialization import save, load
from ._tensor_str import set_printoptions
################################################################################
# Define Storage and Tensor classes
@ -107,25 +108,46 @@ class ByteStorage(_C.ByteStorageBase, _StorageBase):
class DoubleTensor(_C.DoubleTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return DoubleStorage
class FloatTensor(_C.FloatTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return FloatStorage
class LongTensor(_C.LongTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return LongStorage
class IntTensor(_C.IntTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return IntStorage
class ShortTensor(_C.ShortTensorBase, _TensorBase):
def is_signed(self):
return True
@classmethod
def storage_type(cls):
return ShortStorage
class CharTensor(_C.CharTensorBase, _TensorBase):
def is_signed(self):
# TODO
return False
@classmethod
def storage_type(cls):
return CharStorage
class ByteTensor(_C.ByteTensorBase, _TensorBase):
def is_signed(self):
return False
@classmethod
def storage_type(cls):
return ByteStorage
_tensor_classes = set()
@ -184,4 +206,3 @@ del IntTensorBase
del ShortTensorBase
del CharTensorBase
del ByteTensorBase

View File

@ -3,13 +3,71 @@ import torch
from functools import reduce
from ._utils import _range
SCALE_FORMAT = '{:.5f} *\n'
class __PrinterOptions(object):
precision = 4
threshold = 1000
edgeitems = 3
linewidth = 80
def _number_format(storage):
min_sz = 0
double_storage = torch.DoubleStorage(storage.size()).copy_(storage)
tensor = torch.DoubleTensor(double_storage).abs()
PRINT_OPTS = __PrinterOptions()
SCALE_FORMAT = '{:.5e} *\n'
# We could use **kwargs, but this will give better docs
def set_printoptions(
precision=None,
threshold=None,
edgeitems=None,
linewidth=None,
profile=None,
):
"""Set options for printing. Items shamelessly taken from Numpy
Args:
precision: Number of digits of precision for floating point output
(default 8).
threshold: Total number of array elements which trigger summarization
rather than full repr (default 1000).
edgeitems: Number of array items in summary at beginning and end of
each dimension (default 3).
linewidth: The number of characters per line for the purpose of
inserting line breaks (default 80). Thresholded matricies will
ignore this parameter.
profile: Sane defaults for pretty printing. Can override with any of
the above options. (default, short, full)
"""
if profile is not None:
if profile == "default":
PRINT_OPTS.precision = 4
PRINT_OPTS.threshold = 1000
PRINT_OPTS.edgeitems = 3
PRINT_OPTS.linewidth = 80
elif profile == "short":
PRINT_OPTS.precision = 2
PRINT_OPTS.threshold = 1000
PRINT_OPTS.edgeitems = 2
PRINT_OPTS.linewidth = 80
elif profile == "full":
PRINT_OPTS.precision = 4
PRINT_OPTS.threshold = float('inf')
PRINT_OPTS.edgeitems = 3
PRINT_OPTS.linewidth = 80
if precision is not None:
PRINT_OPTS.precision = precision
if threshold is not None:
PRINT_OPTS.threshold = threshold
if edgeitems is not None:
PRINT_OPTS.edgeitems = edgeitems
if linewidth is not None:
PRINT_OPTS.linewidth = linewidth
def _number_format(tensor, min_sz=-1):
min_sz = max(min_sz, 2)
tensor = torch.DoubleTensor(tensor.nelement()).copy_(tensor).abs_()
pos_inf_mask = tensor.eq(float('inf'))
neg_inf_mask = tensor.eq(float('-inf'))
@ -21,7 +79,7 @@ def _number_format(storage):
example_value = tensor[invalid_value_mask.eq(0)][0]
tensor[invalid_value_mask] = example_value
if invalid_value_mask.any():
min_sz = 3
min_sz = max(min_sz, 3)
int_mode = True
# TODO: use fmod?
@ -43,22 +101,23 @@ def _number_format(storage):
scale = 1
exp_max = int(exp_max)
prec = PRINT_OPTS.precision
if int_mode:
if exp_max > 9:
format = '{:11.4e}'
sz = max(min_sz, 11)
if exp_max > prec + 1:
format = '{{:11.{}e}}'.format(prec)
sz = max(min_sz, 7 + prec)
else:
sz = max(min_sz, exp_max + 1)
format = '{:' + str(sz) + '.0f}'
else:
if exp_max - exp_min > 4:
sz = 11
if exp_max - exp_min > prec:
sz = 7 + prec
if abs(exp_max) > 99 or abs(exp_min) > 99:
sz = sz + 1
sz = max(min_sz, sz)
format = '{:' + str(sz) + '.4e}'
format = '{{:{}.{}e}}'.format(sz, prec)
else:
if exp_max > 5 or exp_max < 0:
if exp_max > prec + 1 or exp_max < 0:
sz = max(min_sz, 7)
scale = math.pow(10, exp_max-1)
else:
@ -67,61 +126,157 @@ def _number_format(storage):
else:
sz = exp_max + 6
sz = max(min_sz, sz)
format = '{:' + str(sz) + '.4f}'
format = '{{:{}.{}f}}'.format(sz, prec)
return format, scale, sz
def _tensor_str(self):
counter_dim = self.ndimension()-2
n = PRINT_OPTS.edgeitems
has_hdots = self.size()[-1] > 2*n
has_vdots = self.size()[-2] > 2*n
print_full_mat = not has_hdots and not has_vdots
formatter = _number_format(self, min_sz=3 if not print_full_mat else 0)
print_dots = self.numel() >= PRINT_OPTS.threshold
dim_sz = max(2, max(len(str(x)) for x in self.size()))
dim_fmt = "{:^" + str(dim_sz) + "}"
dot_fmt = u"{:^" + str(dim_sz+1) + "}"
counter_dim = self.ndimension() - 2
counter = torch.LongStorage(counter_dim).fill_(0)
counter[0] = -1
counter[counter.size()-1] = -1
finished = False
strt = ''
while True:
for i in _range(counter_dim):
nrestarted = [False for i in counter]
nskipped = [False for i in counter]
for i in _range(counter_dim - 1, -1, -1):
counter[i] += 1
if print_dots and counter[i] == n and self.size(i) > 2*n:
counter[i] = self.size(i) - n
nskipped[i] = True
if counter[i] == self.size(i):
if i == counter_dim-1:
if i == 0:
finished = True
counter[i] = 0
nrestarted[i] = True
else:
break
if finished:
break
elif print_dots:
if any(nskipped):
for hdot in nskipped:
strt += dot_fmt.format('...') if hdot \
else dot_fmt.format('')
strt += '\n'
if any(nrestarted):
strt += ' '
for vdot in nrestarted:
strt += dot_fmt.format(u'\u22EE' if vdot else '')
strt += '\n'
if strt != '':
strt += '\n'
strt += '({},.,.) = \n'.format(','.join(str(i) for i in counter))
submatrix = reduce(lambda t,i: t.select(0, i), counter, self)
strt += _matrix_str(submatrix, ' ')
strt += '\n'
strt += '({},.,.) = \n'.format(
','.join(dim_fmt.format(i) for i in counter))
submatrix = reduce(lambda t, i: t.select(0, i), counter, self)
strt += _matrix_str(submatrix, ' ', formatter, print_dots)
return strt
def _matrix_str(self, indent=''):
fmt, scale, sz = _number_format(self.storage())
nColumnPerLine = int(math.floor((80-len(indent))/(sz+1)))
def __repr_row(row, indent, fmt, scale, sz, truncate=None):
if truncate is not None:
dotfmt = " {:^5} "
return (indent +
' '.join(fmt.format(val/scale) for val in row[:truncate]) +
dotfmt.format('...') +
' '.join(fmt.format(val/scale) for val in row[-truncate:]) +
'\n')
else:
return indent + ' '.join(fmt.format(val/scale) for val in row) + '\n'
def _matrix_str(self, indent='', formatter=None, force_truncate=False):
n = PRINT_OPTS.edgeitems
has_hdots = self.size(1) > 2*n
has_vdots = self.size(0) > 2*n
print_full_mat = not has_hdots and not has_vdots
if formatter is None:
fmt, scale, sz = _number_format(self,
min_sz=5 if not print_full_mat else 0)
else:
fmt, scale, sz = formatter
nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth-len(indent))/(sz+1)))
strt = ''
firstColumn = 0
while firstColumn < self.size(1):
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1)-1)
if nColumnPerLine < self.size(1):
strt += '\n' if firstColumn != 1 else ''
strt += 'Columns {} to {} \n{}'.format(firstColumn, lastColumn, indent)
if not force_truncate and \
(self.numel() < PRINT_OPTS.threshold or print_full_mat):
while firstColumn < self.size(1):
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1)-1)
if nColumnPerLine < self.size(1):
strt += '\n' if firstColumn != 1 else ''
strt += 'Columns {} to {} \n{}'.format(
firstColumn, lastColumn, indent)
if scale != 1:
strt += SCALE_FORMAT.format(scale)
for l in _range(self.size(0)):
strt += indent + (' ' if scale != 1 else '')
row_slice = self[l, firstColumn:lastColumn+1]
strt += ' '.join(fmt.format(val/scale) for val in row_slice)
strt += '\n'
firstColumn = lastColumn + 1
else:
if scale != 1:
strt += SCALE_FORMAT.format(scale)
for l in _range(self.size(0)):
strt += indent + (' ' if scale != 1 else '')
row_slice = self[l, firstColumn:lastColumn+1]
strt += ' '.join(fmt.format(val/scale) for val in row_slice) + '\n'
firstColumn = lastColumn + 1
if has_vdots and has_hdots:
vdotfmt = "{:^" + str((sz+1)*n-1) + "}"
ddotfmt = u"{:^5}"
for row in self[:n]:
strt += __repr_row(row, indent, fmt, scale, sz, n)
strt += indent + ' '.join([vdotfmt.format('...'),
ddotfmt.format(u'\u22F1'),
vdotfmt.format('...')]) + "\n"
for row in self[-n:]:
strt += __repr_row(row, indent, fmt, scale, sz, n)
elif not has_vdots and has_hdots:
for row in self:
strt += __repr_row(row, indent, fmt, scale, sz, n)
elif has_vdots and not has_hdots:
vdotfmt = u"{:^" + \
str(len(__repr_row(self[0], '', fmt, scale, sz))) + \
"}\n"
for row in self[:n]:
strt += __repr_row(row, indent, fmt, scale, sz)
strt += vdotfmt.format(u'\u22EE')
for row in self[-n:]:
strt += __repr_row(row, indent, fmt, scale, sz)
else:
for row in self:
strt += __repr_row(row, indent, fmt, scale, sz)
return strt
def _vector_str(tensor):
fmt, scale, _ = _number_format(tensor.storage())
def _vector_str(self):
fmt, scale, sz = _number_format(self)
strt = ''
ident = ''
n = PRINT_OPTS.edgeitems
dotfmt = u"{:^" + str(sz) + "}\n"
if scale != 1:
strt += SCALE_FORMAT.format(scale)
return '\n'.join(fmt.format(val/scale) for val in tensor) + '\n'
ident = ' '
if self.numel() < PRINT_OPTS.threshold:
return (strt +
'\n'.join(ident + fmt.format(val/scale) for val in self) +
'\n')
else:
return (strt +
'\n'.join(ident + fmt.format(val/scale) for val in self[:n]) +
'\n' + (ident + dotfmt.format(u"\u22EE")) +
'\n'.join(ident + fmt.format(val/scale) for val in self[-n:]) +
'\n')
def _str(self):
@ -135,6 +290,9 @@ def _str(self):
strt = _tensor_str(self)
size_str = 'x'.join(str(size) for size in self.size())
strt += '[{} of size {}]\n'.format(torch.typename(self), size_str)
device_str = '' if not self.is_cuda else \
' (GPU {})'.format(self.get_device())
strt += '[{} of size {}{}]\n'.format(torch.typename(self),
size_str, device_str)
return '\n' + strt

View File

@ -46,8 +46,6 @@ type2backend = Backends()
_thnn_headers = parse_header(THNN_H_PATH)
_thcunn_headers = parse_header(THCUNN_H_PATH)
for function in _thcunn_headers:
function.name = function.name[4:]
for t in ['Float', 'Double']:
backend = Backend(t, 'torch._thnn._THNN', _thnn_headers)

View File

@ -90,7 +90,7 @@ def parse_header(path):
fn_name = fn_name[:-1]
generic_functions.append(Function(fn_name))
elif l:
t, name = l.split(' ')
t, name = l.split()
if '*' in name:
t = t + '*'
name = name[1:]

View File

@ -1,3 +1,4 @@
import torch
def _type(self, new_type=None, async=False):
if new_type is None:
@ -10,10 +11,7 @@ def _type(self, new_type=None, async=False):
return new_type(self.size()).copy_(self, async)
def _cuda(self, idx=None, async=False):
import torch.cuda
# This already is a CUDA tensor.
# Let's check if it needs to be transfered to another GPU.
if hasattr(self, 'get_device'):
if self.is_cuda:
target_device = idx if idx else torch.cuda.current_device()
if self.get_device() != target_device:
with torch.cuda.device(target_device):

View File

@ -1,3 +1,6 @@
import torch
from .variable import Variable
from .function import Function
from .function import Function, NestedIOFunction
assert torch._C._autograd_init()

View File

@ -1,52 +1,40 @@
from collections import Counter, deque
from .variable import Variable
from collections import deque, defaultdict
from torch._C import _ImperativeEngine as ImperativeEngine
class ExecutionEngine(object):
def __init__(self):
pass
class BasicEngine(object):
def _compute_dependencies(self, function):
dependencies = {}
dependencies = defaultdict(int)
seen = {function}
queue = [function]
while len(queue) > 0:
fn = queue.pop()
for prev_fn, arg_id in fn.previous_functions:
if isinstance(prev_fn, Variable):
for prev_fn, output_nr in fn.previous_functions:
if not prev_fn.requires_grad or isinstance(prev_fn, Variable):
continue
if prev_fn not in dependencies:
dependencies[prev_fn] = [Counter() for _ in prev_fn.output_ids]
output_idx = prev_fn.output_ids[arg_id]
dependencies[prev_fn][output_idx][fn] += 1
dependencies[prev_fn] += 1
if prev_fn not in seen:
queue.append(prev_fn)
seen.add(prev_fn)
return dependencies
def _free_backward_dependency(self, dependencies, prev_fn, fn, arg_id):
deps = dependencies[prev_fn]
output_idx = prev_fn.output_ids[arg_id]
output_deps = deps[output_idx]
output_deps[fn] -= 1
if output_deps[fn] == 0:
del output_deps[fn]
return output_idx
def _is_ready_for_backward(self, dependencies, function):
for deps in dependencies[function]:
if len(deps) > 0:
return False
return True
def _free_backward_dependency(self, dependencies, prev_fn):
dependencies[prev_fn] -= 1
if dependencies[prev_fn] == 0:
del dependencies[prev_fn]
return True
return False
def _add_grad(self, need_copy, prev_grad, output_nr, d_prev_fn):
copy_id = (id(prev_grad), output_nr)
if not prev_grad[output_nr]:
prev_grad[output_nr] = d_prev_fn
need_copy.add(d_prev_fn)
need_copy.add(copy_id)
else:
grad_tensor = prev_grad[output_nr]
if grad_tensor in need_copy:
need_copy.remove(grad_tensor)
if copy_id in need_copy:
need_copy.remove(copy_id)
grad_tensor = grad_tensor.clone()
prev_grad[output_nr] = grad_tensor
grad_tensor.add_(d_prev_fn)
@ -56,7 +44,9 @@ class ExecutionEngine(object):
variable._do_backward((grad,), retain_variables)
return
ready = deque([(variable.creator, (grad,))])
initial_grad = [None for _ in range(variable.creator.num_outputs)]
initial_grad[variable.output_nr] = grad
ready = deque([(variable.creator, initial_grad)])
not_ready = {}
need_copy = set()
@ -64,30 +54,35 @@ class ExecutionEngine(object):
while len(ready) > 0:
fn, grad = ready.pop()
# TODO: double-buffering
grad_input = fn._do_backward(grad, retain_variables)
for (prev_fn, arg_id), d_prev_fn in zip(fn.previous_functions, grad_input):
grad_input = fn._do_backward(tuple(grad), retain_variables)
for (prev_fn, output_nr), d_prev_fn in zip(fn.previous_functions, grad_input):
if not prev_fn.requires_grad:
# TODO: check that d_prev_fn is None and warn otherwise
continue
if isinstance(prev_fn, Variable):
prev_fn._do_backward((d_prev_fn,), retain_variables)
continue
output_nr = self._free_backward_dependency(dependencies, prev_fn, fn, arg_id)
is_ready = self._is_ready_for_backward(dependencies, prev_fn)
is_ready = self._free_backward_dependency(dependencies, prev_fn)
if is_ready:
if prev_fn in not_ready:
prev_grad = not_ready[prev_fn]
self._add_grad(need_copy, prev_grad, output_nr, d_prev_fn)
else:
assert output_nr == 0
if prev_fn.num_outputs != 1:
raise RuntimeError("one of the function outputs "
"wasn't used - this is an error not, but "
"it's going to be fixed soon")
prev_grad = (d_prev_fn,)
ready.appendleft((prev_fn, prev_grad))
else:
if prev_fn in not_ready:
prev_grad = not_ready[prev_fn]
else:
prev_grad = [None for _ in prev_fn.output_ids]
prev_grad = [None for _ in range(prev_fn.num_outputs)]
self._add_grad(need_copy, prev_grad, output_nr, d_prev_fn)
not_ready[prev_fn] = prev_grad
from .variable import Variable

View File

@ -1,114 +1,33 @@
import torch
import torch._C as _C
from collections import OrderedDict
from itertools import chain
from .variable import Variable
class Function(object):
class Function(_C._FunctionBase):
def __init__(self):
self.previous_functions = None
self.output_ids = None
self.needs_input_grad = None
self.saved_variables = None
self.to_save = None
self.non_differentiable = None
self.backward_hooks = OrderedDict()
def __call__(self, *input):
return self._do_forward(*input)
__call__ = _C._FunctionBase._do_forward
def save_for_backward(self, *tensors):
self.to_save = tensors
def mark_dirty(self, *args):
dirty_set = set(args)
for var in self.input:
if var.data in dirty_set:
var.mark_dirty()
self.dirty_tensors = args
def mark_shared_storage(self, *pairs):
self.shared_pairs = pairs
def mark_non_differentiable(self, *args):
self.non_differentiable = set(args)
@property
def saved_tensors(self):
return tuple(arg.data for arg in self.saved_variables)
def _do_forward(self, *input):
for i in input:
if not isinstance(i, Variable):
raise RuntimeError("expected a Variable argument, but got " +
type(i).__name__)
unpacked_input = tuple(arg.data for arg in input)
is_volatile = any(arg.volatile for arg in input)
# Save the input, so _save_for_backward can access it
self.input = input
if not is_volatile:
self.needs_input_grad = tuple(arg.requires_grad for arg in input)
self.requires_grad = any(self.needs_input_grad)
self.previous_functions = [(arg.creator or arg, id(arg)) for arg in input]
raw_output = self.forward(*unpacked_input)
if not isinstance(raw_output, tuple):
raw_output = (raw_output,)
if is_volatile:
output = tuple(Variable(tensor, volatile=True)
for tensor in raw_output)
else:
output = tuple(Variable(tensor, self, requires_grad=self.requires_grad)
for tensor in raw_output)
self.output_ids = {id(var): i for i, var in enumerate(output)}
if self.to_save:
# output has to be chained after input, so if the same tensor
# appears both in the input and output (happens for in-place
# function), we save the clean output variable.
#
# Some variables might have been changed in-place, so accessing
# their .data will throw. If they also occur in the output
# these references will be overwritten by clean variables,
# if now, they'll raise an error on backward.
t2var = {var._data: var for var in chain(input, output)}
self.saved_variables = tuple(t2var[t] for t in self.to_save)
del self.to_save
if self.non_differentiable is not None:
for var in output:
if var.data in self.non_differentiable:
var.requires_grad = False
del self.input # Remove unnecessary references to input
del self.non_differentiable # and output
if len(output) == 1:
output = output[0]
return output
def _do_backward(self, grad_output, retain_variables):
if not hasattr(self, 'saved_variables'):
raise RuntimeError("Trying to backward through the graph second "
"time, but the buffers have already been freed. Please "
"specify retain_variables=True when calling backward for "
"the first time.")
grad_input = self.backward(*grad_output)
if not isinstance(grad_input, tuple):
grad_input = (grad_input,)
assert len(grad_input) == len(self.previous_functions), \
self.__class__.__name__ + ' returned an invalid number of gradient tensors'
self._call_hooks(grad_input, grad_output)
if not retain_variables:
del self.saved_variables
return grad_input
def _call_hooks(self, grad_input, grad_output):
for hook in self.backward_hooks.values():
hook(grad_input, grad_output)
self.non_differentiable = args
def register_hook(self, name, hook):
self.backward_hooks = self.backward_hooks or OrderedDict()
assert name not in self.backward_hooks, \
"Trying to register a second hook with name {}".format(name)
self.backward_hooks[name] = hook
def remove_hook(self, name):
assert name in self.backward_hooks, \
assert self.backward_hooks and name in self.backward_hooks, \
"Trying to remove an inexistent hook with name {}".format(name)
del self.backward_hooks[name]
@ -124,3 +43,87 @@ class InplaceFunction(Function):
def __init__(self, inplace=False):
super(InplaceFunction, self).__init__()
self.inplace = inplace
def _nested_map(condition, fn):
def _map(obj):
if condition(obj):
return fn(obj)
elif obj is None:
return None
elif isinstance(obj, (list, tuple)):
return type(obj)(_map(x) for x in obj)
else:
raise ValueError("NestedIOFunction doesn't know how to process "
"an input object of type " + torch.typename(obj))
return _map
def _iter_filter(condition):
def _iter(obj):
if condition(obj):
yield obj
elif obj is None:
return
elif isinstance(obj, (list, tuple)):
for o in obj:
for var in _iter(o):
yield var
else:
raise ValueError("NestedIOFunction doesn't know how to process "
"an input object of type " + torch.typename(obj))
return _iter
_iter_variables = _iter_filter(lambda o: isinstance(o, torch.autograd.Variable))
_iter_tensors = _iter_filter(torch.is_tensor)
_iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o))
_map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data)
def _map_tensor_fromiter(itr):
return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr))
class NestedIOFunction(Function):
def _do_forward(self, *input):
self._nested_input = input
flat_input = tuple(_iter_variables(input))
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
nested_output = self._nested_output
nested_variables = _map_tensor_fromiter(iter(flat_output))(self._nested_output)
return nested_variables
def backward(self, *gradients):
nested_gradients = _map_tensor_fromiter(iter(gradients))(self._nested_output)
del self._nested_output
result = self.backward_extended(*nested_gradients)
del self._to_save_nested
return tuple(_iter_None_tensors(result))
__call__ = _do_forward
def forward(self, *args):
nested_tensors = _map_variable_tensor(self._nested_input)
result = self.forward_extended(*nested_tensors)
del self._nested_input
self._nested_output = result
return tuple(_iter_tensors(result))
def save_for_backward(self, *args):
self.to_save = tuple(_iter_tensors(args))
self._to_save_nested = args
@property
def saved_tensors(self):
flat_tensors = super(NestedIOFunction, self).saved_tensors
return _map_tensor_fromiter(iter(flat_tensors))(self._to_save_nested)
def mark_dirty(self, *args, **kwargs):
self.dirty_tensors = tuple(_iter_tensors((args, kwargs)))
def mark_non_differentiable(self, *args, **kwargs):
self.non_differentiable = tuple(_iter_tensors((args, kwargs)))
def forward_extended(self, *input):
raise NotImplementedError
def backward_extended(self, *grad_output):
raise NotImplementedError

View File

@ -1,6 +1,5 @@
from itertools import repeat
from ..variable import Variable
from ..function import Function, InplaceFunction
@ -345,7 +344,6 @@ class Addcmul(InplaceFunction):
self.scale = scale
def forward(self, add_tensor, mul_tensor1, mul_tensor2):
self.add_tensor_size = add_tensor.size().tolist()
self.save_for_backward(mul_tensor1, mul_tensor2)
if self.inplace:
return add_tensor.addcmul_(self.scale, mul_tensor1, mul_tensor2)
@ -375,7 +373,6 @@ class Addcdiv(InplaceFunction):
self.scale = scale
def forward(self, add_tensor, div_tensor1, div_tensor2):
self.add_tensor_size = add_tensor.size().tolist()
self.save_for_backward(div_tensor1, div_tensor2)
if self.inplace:
return add_tensor.addcdiv_(self.scale, div_tensor1, div_tensor2)

View File

@ -9,7 +9,7 @@ class _DimReduceFunction(Function):
self.dim = dim
def forward(self, input):
self.input_size = input.size().tolist()
self.input_size = input.size()
fn = getattr(input, self.fn_name)
if self.dim is None:
return input.new((fn(),))
@ -22,7 +22,7 @@ class Sum(_DimReduceFunction):
def backward(self, grad_output):
if self.dim is None:
return grad_output.new(*self.input_size).fill_(grad_output[0])
return grad_output.new(self.input_size).fill_(grad_output[0])
else:
repeats = [1 for _ in self.input_size]
repeats[self.dim] = self.input_size[self.dim]
@ -32,7 +32,7 @@ class Sum(_DimReduceFunction):
class Prod(_DimReduceFunction):
def forward(self, input):
self.input_size = input.size().tolist()
self.input_size = input.size()
if self.dim is None:
self.result = input.prod()
self.save_for_backward(input)
@ -45,7 +45,7 @@ class Prod(_DimReduceFunction):
def backward(self, grad_output):
if self.dim is None:
input, = self.saved_tensors
grad_input = grad_output.new(*self.input_size).fill_(self.result)
grad_input = grad_output.new(self.input_size).fill_(self.result)
return grad_input.div(input)
else:
input, output = self.saved_tensors
@ -81,7 +81,7 @@ class _SelectionFunction(Function):
def forward(self, input):
fn = getattr(input, type(self).__name__.lower())
self.input_size = input.size().tolist()
self.input_size = input.size()
if self.dim is None and self.has_all_reduce:
value = fn(*self.additional_args)
self.indices = tuple(input.eq(value).nonzero()[0])
@ -142,7 +142,6 @@ class Norm(Function):
self.dim = dim
def forward(self, input):
self.input_size = input.size().tolist()
if self.dim is None:
self.norm = input.norm(self.norm_type)
self.save_for_backward(input)

View File

@ -3,26 +3,72 @@ import torch
from torch._utils import _accumulate
from ..function import Function, InplaceFunction
from ..variable import Variable
class Index(Function):
def __init__(self, *index):
def __init__(self, index):
super(Index, self).__init__()
self.index = index
def forward(self, i):
self.input_size = i.size()
return i[self.index]
result = i.index(self.index)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
# TODO: this won't have to be zeroed
grad_input = grad_output.new(self.input_size).zero_()
grad_input[self.index].copy_(grad_output)
grad_input.index(self.index).copy_(grad_output)
return grad_input
class SetItem(InplaceFunction):
def __init__(self, index, value=None):
super(SetItem, self).__init__(True)
self.index = index
self.value = value
def forward(self, i, value=None):
self.mark_dirty(i)
if value is None:
value = self.value
i.set_index(self.index, value)
return i
def backward(self, grad_output):
if self.value is None:
grad_input = grad_output.clone()
grad_input.set_index(self.index, 0)
grad_value = grad_output.index(self.index).clone()
return grad_input, grad_value
else:
grad_input = grad_output.clone()
grad_input.set_index(self.index, 0)
return grad_input
class NoGrad(Function):
def forward(self, i):
result = i.new(i)
self.mark_non_differentiable(result)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
assert False, "backward of NoGrad should never be called"
def _do_forward(self, *args, **kwargs):
result = super(NoGrad, self)._do_forward(*args, **kwargs)
self.requires_grad = False
return result
__call__ = _do_forward
class Transpose(Function):
def __init__(self, *dims):
@ -31,7 +77,9 @@ class Transpose(Function):
self.dims = dims
def forward(self, i):
return i.transpose(*self.dims)
result = i.transpose(*self.dims)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
return grad_output.transpose(*self.dims)
@ -45,7 +93,9 @@ class View(Function):
def forward(self, i):
self.input_size = i.size()
return i.view(*self.sizes)
result = i.view(*self.sizes)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
# TODO: not sure if this clone is necessary
@ -62,7 +112,9 @@ class Expand(Function):
self.expanded_dims = [dim for dim, (expanded, original)
in enumerate(zip(self.sizes, i.size()))
if expanded != original]
return i.expand(*self.sizes)
result = i.expand(*self.sizes)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
grad_input = grad_output
@ -88,20 +140,24 @@ class Type(Function):
class CudaTransfer(Function):
def __init__(self, device_id=None):
def __init__(self, device_id=None, async=False):
super(CudaTransfer, self).__init__()
self.device_id = device_id
self.async = async
def forward(self, i):
self.source_device = -1 if not i.is_cuda else i.get_device()
self.source_was_cuda = i.is_cuda
if self.device_id:
return i.cuda(self.device_id)
return i.cuda(self.device_id, async=self.async)
else:
return i.cuda()
return i.cuda(async=self.async)
def backward(self, grad_output):
if self.source_device != -1:
return grad_output.cuda(self.source_device)
elif self.source_was_cuda:
return grad_output
else:
return grad_output.cpu()
@ -116,7 +172,9 @@ class Permute(Function):
self.rev_dim_indices[dim_idx] = i
def forward(self, i):
return i.permute(*self.dim_indices)
result = i.permute(*self.dim_indices)
self.mark_shared_storage((i, result))
return result
def backward(self, grad_output):
return grad_output.permute(*self.rev_dim_indices)
@ -266,7 +324,9 @@ class Resize(Function):
'x'.join(map(str, self.sizes)), self.numel,
'x'.join(map(str, tensor.size())), tensor.numel()))
self.input_sizes = tensor.size()
return tensor.new(tensor).resize_(*self.sizes)
result = tensor.new(tensor).resize_(*self.sizes)
self.mark_shared_storage((tensor, result))
return result
def backward(self, grad_output):
assert grad_output.numel() == self.numel
@ -292,9 +352,11 @@ class Squeeze(Function):
self.input_size = input.size()
self.numel = input.numel()
if self.dim is not None:
return input.squeeze(self.dim)
result = input.squeeze(self.dim)
else:
return input.squeeze()
result = input.squeeze()
self.mark_shared_storage((input, result))
return result
def backward(self, grad_output):
assert grad_output.numel() == self.numel
@ -308,7 +370,9 @@ class Unsqueeze(Function):
self.dim = dim
def forward(self, input):
return input.unsqueeze(self.dim)
result = input.unsqueeze(self.dim)
self.mark_shared_storage((input, result))
return result
def backward(self, grad_output):
return grad_output.squeeze(self.dim)
@ -332,7 +396,7 @@ class MaskedCopy(InplaceFunction):
if self.needs_input_grad[0]:
grad_tensor1 = grad_output.clone().masked_fill_(mask, 0)
if self.needs_input_grad[2]:
grad_tensor2 = grad_output.clone().masked_fill_(mask.eq(0), 0)
grad_tensor2 = grad_output.masked_select(mask)
return grad_tensor1, None, grad_tensor2
@ -434,7 +498,29 @@ class Topk(_MultiSelectionFunction):
return super(Topk, self).forward(input)
# TODO: chunk
class Chunk(Function):
def __init__(self, num_chunks, dim=0):
super(Chunk, self).__init__()
self.num_chunks = num_chunks
self.dim = dim
def forward(self, i):
self.input_size = i.size()
result = i.chunk(self.num_chunks, self.dim)
self.mark_shared_storage(*((i, chunk) for chunk in result))
return result
def backward(self, *grad_output):
grad_input = grad_output[0].new(self.input_size)
offset = 0
for grad in grad_output:
grad_size = grad.size(self.dim)
grad_input.narrow(self.dim, offset, grad_size).copy_(grad)
offset += grad_size
return grad_input
# TODO: gather
# TODO: kthvalue
# TODO: repeat

View File

@ -1,9 +1,12 @@
import torch._C as _C
from collections import OrderedDict
from .functions import *
class Variable(object):
_fallthrough_methods = [
class Variable(_C._VariableBase):
_fallthrough_methods = {
'size',
'stride',
'nelement',
@ -12,22 +15,12 @@ class Variable(object):
'is_contiguous',
'is_same_size',
'is_set_to',
'is_size',
'is_signed',
'numel',
'dim',
'get_device',
'is_cuda',
]
def __init__(self, tensor, creator=None, volatile=False, requires_grad=True):
self.creator = creator
self.volatile = volatile
self.dirty = False
self.requires_grad = (not volatile) and requires_grad
self._data = tensor
self._grad = None
self.backward_hooks = OrderedDict()
}
@property
def grad(self):
@ -37,14 +30,21 @@ class Variable(object):
return self._grad
@property
def data(self):
if self.dirty:
raise RuntimeError('Accessing data of a dirty variable!')
return self._data
def requires_grad(self):
return self._requires_grad
def mark_dirty(self):
self.dirty = True
self._data = None
@requires_grad.setter
def requires_grad(self, value):
if self.creator is not None:
if value is False:
hint = (" If you want to use a computed variable in a subgraph "
"that doesn't require differentiation use "
"var_no_grad = var.no_grad().")
else:
hint = ''
raise RuntimeError("you can only change requires_grad flags of "
"leaf variables." + hint)
self._requires_grad = value
def __getattr__(self, name):
if name in self._fallthrough_methods:
@ -52,11 +52,26 @@ class Variable(object):
raise AttributeError(name)
def __getitem__(self, key):
if isinstance(key, Variable) and isinstance(key.data, torch.ByteTensor):
if (isinstance(key, Variable) and
type(key.data).__name__ == 'ByteTensor'):
return MaskedSelect()(self, key)
return Index(key)(self)
# TODO: setitem
def __setitem__(self, key, value):
if (isinstance(key, Variable) and
type(key.data).__name__ == 'ByteTensor'):
if isinstance(value, Variable):
return MaskedCopy(inplace=True)(self, key, value)
else:
return MaskedFill(value, inplace=True)(self, key)
else:
if isinstance(value, Variable):
return SetItem(key)(self, value)
else:
return SetItem(key, value)(self)
def __iter__(self):
return iter(map(lambda i: self[i], range(self.size(0))))
def __deepcopy__(self, memo):
if self.creator is None:
@ -77,23 +92,17 @@ class Variable(object):
self._execution_engine.run_backward(self, gradient, retain_variables)
def __repr__(self):
if self.dirty:
return 'Variable used in an in-place operation'
return 'Variable containing:' + self.data.__repr__()
def _call_hooks(self, grad_output):
for hook in self.backward_hooks.values():
hook(grad_output)
def register_hook(self, name, hook):
if self.volatile:
raise RuntimeError('registering hook on a volatile variable')
if not self.requires_grad:
raise RuntimeError("registering hook on a variable that doesn't require gradient")
if self.creator is not None:
idx = self.creator.output_ids[id(self)]
self.creator.register_hook(name, lambda gi, go: hook(go[idx]))
self.creator.register_hook(name, lambda gi, go: hook(go[self.output_nr]))
else:
self.backward_hooks = self.backward_hooks or OrderedDict()
assert name not in self.backward_hooks, \
"Trying to register a second hook with name {}".format(name)
self.backward_hooks[name] = hook
@ -104,19 +113,25 @@ class Variable(object):
if self.creator is not None:
self.creator.remove_hook(name)
else:
assert name in self.backward_hooks, \
assert self.backward_hooks and name in self.backward_hooks, \
"Trying to remove an inexistent hook with name {}".format(name)
del self.backward_hooks[name]
def _do_backward(self, grad_output, retain_variables):
assert len(grad_output) == 1
assert not self.dirty
self._call_hooks(grad_output[0])
assert self._version == 0 and self.creator is None, \
"leaf variable was used in an inplace operation"
if self.backward_hooks:
for hook in self.backward_hooks.values():
hook(grad_output[0])
self.grad.add_(grad_output[0])
return tuple()
def no_grad(self):
return NoGrad()(self)
def contiguous(self):
self._data = self.data.contiguous()
self.data = self.data.contiguous()
return self
def clone(self):
@ -131,8 +146,8 @@ class Variable(object):
module = torch._import_dotted_name(self.data.__module__)
return getattr(module, name)
def cuda(self, device_id=None):
return CudaTransfer(device_id)(self)
def cuda(self, device_id=None, async=False):
return CudaTransfer(device_id, async)(self)
def cpu(self):
return self.type(getattr(torch, type(self.data).__name__))
@ -206,7 +221,7 @@ class Variable(object):
def div_(self, other):
if not isinstance(other, Variable) and not torch.is_tensor(other):
return DivConstant(other, inplace=True)(self)
raise RuntimeError("mul_ only supports scalar multiplication")
raise RuntimeError("div_ only supports scalar multiplication")
def pow(self, other):
if isinstance(other, Variable):
@ -430,7 +445,7 @@ class Variable(object):
def addr(self, *args):
return self._blas(Addr, args, False)
def addr(self, *args):
def addr_(self, *args):
return self._blas(Addr, args, True)
def dot(self, other):
@ -504,18 +519,18 @@ class Variable(object):
def transpose(self, dim1, dim2):
return Transpose(dim1, dim2)(self)
def cat(self, iterable, dim=0):
return Concat(dim)(*iterable)
def select(self, dim, _index):
index = tuple(slice(None, None) for _ in range(dim)) + (_index,)
return Index(*index)(self)
return Index(index)(self)
def narrow(self, dim, start_index, length):
index = tuple(slice(None, None) for _ in range(dim)) + \
(slice(start_index, start_index+length),)
return Index(*index)(self)
return Index(index)(self)
def chunk(self, num_chunks, dim=0):
return Chunk(num_chunks, dim)(self)
def squeeze(self, dim=None):
return Squeeze(dim)(self)
@ -566,8 +581,59 @@ class Variable(object):
def __neg__(self):
return Negate()(self)
class _torch(object):
from .functions import *
from .engine import ExecutionEngine
@staticmethod
def cat(iterable, dim=0):
return Concat(dim)(*iterable)
Variable._execution_engine = ExecutionEngine()
@staticmethod
def _blas(cls, args, inplace):
num_args = len(args)
alpha = beta = 1
if num_args > 5:
raise RuntimeError("too many args")
if num_args == 5:
alpha, beta = args[0], args[2]
tensors = args[1:2] + args[3:]
elif num_args == 4:
alpha = args[0]
tensors = args[1:]
else:
tensors = args
return cls(alpha, beta, inplace)(*tensors)
@classmethod
def addmm(cls, *args):
return cls._blas(Addmm, args, False)
@classmethod
def addbmm(cls, *args):
return cls._blas(Addbmm, args, False)
@classmethod
def baddbmm(cls, *args):
return cls._blas(Baddbmm, args, False)
@classmethod
def addmv(cls, *args):
return cls._blas(Addmv, args, False)
@classmethod
def addr(cls, *args):
return cls._blas(Addr, args, False)
for method in dir(Variable):
# This will also wrap some methods that normally aren't part of the
# funcitonal interface, but we don't care, as they won't ever be used
if method.startswith('_') or method.endswith('_'):
continue
if hasattr(Variable._torch, method):
continue
as_static = staticmethod(getattr(Variable, method))
setattr(Variable._torch, method, as_static)
from .engine import ImperativeEngine
Variable._execution_engine = ImperativeEngine()

View File

@ -5,23 +5,25 @@ import os.path as path
lib = None
# TODO: fix libname for OSX / Windows
# TODO: just load 5.1, not 5.1.3
# TODO: dynamic version checks via cudnnGetVersion
libname = 'libcudnn.so.5.1.3'
# TODO: load 5.1.3 if using CUDA 7.5 and 5.1.5 if using CUDA 8.0
thisdir = path.dirname(__file__)
libpaths = ['', path.join(thisdir, '../../lib')]
libnames = ['libcudnn.so.5.1.5', 'libcudnn.so.5.1.3']
def _loadlib():
global lib
loaded = False
for libpath in libpaths:
try:
lib = ctypes.cdll.LoadLibrary(path.join(libpath, libname))
loaded = True
for libname in libnames:
try:
lib = ctypes.cdll.LoadLibrary(path.join(libpath, libname))
loaded = True
break
except OSError:
continue
if loaded:
break
except OSError:
continue
if loaded:
lib.cudnnGetErrorString.restype = ctypes.c_char_p
else:
@ -41,6 +43,13 @@ def is_acceptable(tensor):
return False
return True
__cudnn_version = []
def version():
if not lib:
raise RuntimeError("cuDNN not initialized")
if len(__cudnn_version) == 0:
__cudnn_version.append(lib.cudnnGetVersion())
return __cudnn_version[0]
_handles = {}
@ -70,6 +79,13 @@ CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT = 2
CUDNN_TENSOR_NCHW = 0
CUDNN_TENSOR_NHWC = 1
CUDNN_RNN_RELU = 0
CUDNN_RNN_TANH = 1
CUDNN_LSTM = 2
CUDNN_GRU = 3
CUDNN_LINEAR_INPUT = 0
CUDNN_SKIP_INPUT = 1
class CuDNNHandle:
def __init__(self):
@ -86,14 +102,16 @@ class CuDNNError(RuntimeError):
msg = '{}: {}'.format(status, get_error_string(status))
super(CuDNNError, self).__init__(msg)
class TensorDescriptor:
class TensorDescriptor(object):
def __init__(self):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateTensorDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
def __del__(self):
check_error(lib.cudnnDestroyTensorDescriptor(self))
check_error(lib.cudnnDestroyTensorDescriptor(self._as_parameter_))
del self._as_parameter_
def set(self, tensor):
self._type = tensor.type()
@ -106,14 +124,44 @@ class TensorDescriptor:
def as_tuple(self):
return (self._type, tuple(self._size), tuple(self._stride))
class ConvolutionDescriptor:
class TensorDescriptorArray(object):
def __init__(self, N):
self.ptrs = (ctypes.c_void_p * N)()
for i in range(N):
ptr = ctypes.byref(self.ptrs, i * ctypes.sizeof(ctypes.c_void_p))
check_error(lib.cudnnCreateTensorDescriptor(ptr))
self._as_parameter_ = self.ptrs
def __del__(self):
for ptr in self.ptrs:
check_error(lib.cudnnDestroyTensorDescriptor(ctypes.c_void_p(ptr)))
def __getitem__(self, key):
return ctypes.c_void_p(self.ptrs[key])
def set(self, tensor):
self._type = tensor.type()
self._size = tensor.size()
self._stride = tensor.stride()
for ptr in self.ptrs:
check_error(lib.cudnnSetTensorNdDescriptor(
ctypes.c_void_p(ptr), _typemap[tensor.type()], tensor.dim(),
int_array(tensor.size()), int_array(tensor.stride())))
def as_tuple(self):
return (self._type, tuple(self._size), tuple(self._stride))
class ConvolutionDescriptor(object):
def __init__(self):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateConvolutionDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
def __del__(self):
check_error(lib.cudnnDestroyConvolutionDescriptor(self))
check_error(lib.cudnnDestroyConvolutionDescriptor(self._as_parameter_))
del self._as_parameter_
def set(self, typename, pad, stride):
self._pad = pad
@ -126,24 +174,75 @@ class ConvolutionDescriptor:
def as_tuple(self):
return (self._pad, self._stride)
class FilterDescriptor:
class FilterDescriptor(object):
def __init__(self):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateFilterDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
def __del__(self):
check_error(lib.cudnnDestroyFilterDescriptor(self))
check_error(lib.cudnnDestroyFilterDescriptor(self._as_parameter_))
del self._as_parameter_
def set(self, weight):
self._size = weight.size()
datatype = _typemap[weight.type()]
check_error(lib.cudnnSetFilterNdDescriptor(
self, datatype, CUDNN_TENSOR_NCHW, 4, int_array(weight.size())))
self, datatype, CUDNN_TENSOR_NCHW, weight.ndimension(), int_array(weight.size())))
def as_tuple(self):
return tuple(self._size)
class DropoutDescriptor(object):
def __init__(self, handle, dropout, seed):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
dropout_states_size = ctypes.c_long()
check_error(lib.cudnnDropoutGetStatesSize(
handle,
ctypes.byref(dropout_states_size)))
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
check_error(lib.cudnnSetDropoutDescriptor(
self,
handle,
ctypes.c_float(dropout),
ctypes.c_void_p(self.state.data_ptr()),
ctypes.c_size_t(self.state.size(0)),
ctypes.c_ulonglong(seed),
))
def __del__(self):
check_error(lib.cudnnDestroyDropoutDescriptor(self))
class RNNDescriptor(object):
def __init__(self, hidden_size, num_layers, dropout_desc, input_mode,
bidirectional, mode, datatype):
ptr = ctypes.c_void_p()
check_error(lib.cudnnCreateRNNDescriptor(ctypes.byref(ptr)))
self._as_parameter_ = ptr
check_error(lib.cudnnSetRNNDescriptor(
self,
hidden_size,
num_layers,
dropout_desc,
input_mode,
bidirectional,
mode,
datatype
))
def __del__(self):
check_error(lib.cudnnDestroyRNNDescriptor(self))
class ConvolutionAlgoPerf(ctypes.Structure):
_fields_ = [
("algo", ctypes.c_int),
@ -175,6 +274,12 @@ _typemap = {
'torch.cuda.DoubleTensor': CUDNN_DATA_DOUBLE,
}
_sizeofmap = {
CUDNN_DATA_HALF : 2,
CUDNN_DATA_FLOAT : 4,
CUDNN_DATA_DOUBLE : 8,
}
def c_type(tensor):
if isinstance(tensor, torch.cuda.HalfTensor):
return ctypes.c_float
@ -189,8 +294,11 @@ def int_array(itr):
array_type = ctypes.c_int * len(itr)
return array_type(*itr)
def descriptor(tensor):
descriptor = TensorDescriptor()
def descriptor(tensor, N=None):
if N is not None:
descriptor = TensorDescriptorArray(N)
else:
descriptor = TensorDescriptor()
if tensor.dim() == 2:
tensor = tensor.view(tensor.size(0), tensor.size(1), 1, 1)
elif tensor.dim() == 3:

View File

@ -1,135 +0,0 @@
import torch.cuda
import torch.backends.cudnn as cudnn
import ctypes
def forward(fn, input, weight, bias, output):
with torch.cuda.device_of(input):
handle = cudnn.get_handle()
out_channels, in_channels = weight.size(0), weight.size(1)
inslice = input.narrow(1, 0, in_channels // fn.groups)
outslice = output.narrow(1, 0, out_channels // fn.groups)
weight_slice = (
weight.narrow(0, 0, out_channels // fn.groups)
.narrow(1, 0, in_channels // fn.groups)
)
fn.input_offset = inslice[0].numel() * input.element_size()
fn.output_offset = outslice[0].numel() * output.element_size()
fn.weight_offset = weight_slice.numel() * weight.element_size()
fn.idesc = cudnn.descriptor(inslice)
fn.odesc = cudnn.descriptor(outslice)
fn.odesc_bias = cudnn.descriptor(output)
fn.wdesc = cudnn.FilterDescriptor()
fn.wdesc.set(weight_slice)
fn.conv_desc = cudnn.ConvolutionDescriptor()
fn.conv_desc.set(weight.type(), fn.pad, fn.stride)
fwd_alg = cudnn.convolution_forward_algorithm(
fn.idesc, fn.wdesc, fn.conv_desc, fn.odesc)
workspace_size = ctypes.c_size_t()
cudnn.convolution_forward_workspace_size(
cudnn.get_handle(), fn.idesc, fn.wdesc, fn.conv_desc,
fn.odesc, fwd_alg, ctypes.byref(workspace_size))
workspace = torch.cuda.ByteStorage(workspace_size.value)
alpha = cudnn.c_type(input)(1)
beta = cudnn.c_type(output)(0)
for g in range(fn.groups):
input_ptr = ctypes.c_void_p(input.data_ptr() + g * fn.input_offset)
weight_ptr = ctypes.c_void_p(weight.data_ptr() + g * fn.weight_offset)
output_ptr = ctypes.c_void_p(output.data_ptr() + g * fn.output_offset)
workspace_ptr = ctypes.c_void_p(workspace.data_ptr())
cudnn.convolution_forward(
handle, ctypes.byref(alpha), fn.idesc, input_ptr, fn.wdesc,
weight_ptr, fn.conv_desc, fwd_alg, workspace_ptr,
workspace_size, ctypes.byref(beta), fn.odesc, output_ptr)
if bias is not None:
alpha = cudnn.c_type(input)(1)
beta = cudnn.c_type(output)(1)
fn.bias_desc = cudnn.descriptor(bias.view(1, bias.size(0), 1, 1))
cudnn.add_tensor(
handle, ctypes.byref(alpha), fn.bias_desc,
ctypes.c_void_p(bias.data_ptr()), ctypes.byref(beta),
fn.odesc_bias, ctypes.c_void_p(output.data_ptr()))
return output
def backward_data(fn, grad_output, input, weight):
with torch.cuda.device_of(input):
handle = cudnn.get_handle()
grad_input = input.new().resize_as_(input)
bwd_data_alg = cudnn.convolution_backward_data_algorithm(
fn.wdesc, fn.odesc, fn.conv_desc, fn.idesc)
workspace_size = ctypes.c_size_t()
cudnn.convolution_backward_data_workspace_size(
handle, fn.wdesc, fn.odesc, fn.conv_desc, fn.idesc,
bwd_data_alg, ctypes.byref(workspace_size))
workspace = torch.cuda.ByteStorage(workspace_size.value)
alpha = cudnn.c_type(input)(1)
beta = cudnn.c_type(input)(0)
for g in range(fn.groups):
cudnn.convolution_backward_data(
handle, ctypes.byref(alpha), fn.wdesc,
ctypes.c_void_p(weight.data_ptr() + g * fn.weight_offset),
fn.odesc,
ctypes.c_void_p(grad_output.data_ptr() + g * fn.output_offset),
fn.conv_desc, bwd_data_alg, ctypes.c_void_p(workspace.data_ptr()),
workspace_size, ctypes.byref(beta), fn.idesc,
ctypes.c_void_p(grad_input.data_ptr() + g * fn.input_offset))
return grad_input
def backward_filter(fn, grad_output, input, weight):
with torch.cuda.device_of(input):
handle = cudnn.get_handle()
grad_weight = weight.new().resize_as_(weight)
bwd_filter_alg = cudnn.convolution_backward_filter_algorithm(
fn.idesc, fn.odesc, fn.conv_desc, fn.wdesc)
workspace_size = ctypes.c_size_t()
cudnn.convolution_backward_filter_workspace_size(
handle, fn.idesc, fn.odesc, fn.conv_desc, fn.wdesc,
bwd_filter_alg, ctypes.byref(workspace_size))
workspace = torch.cuda.ByteStorage(workspace_size.value)
alpha = cudnn.c_type(input)(1)
beta = cudnn.c_type(input)(0)
for g in range(fn.groups):
cudnn.convolution_backward_filter(
handle, ctypes.byref(alpha), fn.idesc,
ctypes.c_void_p(input.data_ptr() + g * fn.input_offset),
fn.odesc,
ctypes.c_void_p(grad_output.data_ptr() + g * fn.output_offset),
fn.conv_desc, bwd_filter_alg,
ctypes.c_void_p(workspace.data_ptr()), workspace_size,
ctypes.byref(beta), fn.wdesc,
ctypes.c_void_p(grad_weight.data_ptr() + g * fn.weight_offset))
return grad_weight
def backward_bias(fn, grad_output, bias):
with torch.cuda.device_of(grad_output):
grad_bias = bias.new().resize_as_(bias)
alpha = cudnn.c_type(grad_output)(1)
beta = cudnn.c_type(grad_output)(0)
cudnn.convolution_backward_bias(
cudnn.get_handle(), ctypes.byref(alpha), fn.odesc_bias,
ctypes.c_void_p(grad_output.data_ptr()), ctypes.byref(beta),
fn.bias_desc, ctypes.c_void_p(grad_bias.data_ptr()))
return grad_bias

414
torch/backends/cudnn/rnn.py Normal file
View File

@ -0,0 +1,414 @@
import torch.cuda
import torch.backends.cudnn as cudnn
from torch.backends.cudnn import check_error
import ctypes
def get_cudnn_mode(mode):
if mode == 'RNN_RELU':
return cudnn.CUDNN_RNN_RELU
elif mode == 'RNN_TANH':
return cudnn.CUDNN_RNN_TANH
elif mode == 'LSTM':
return cudnn.CUDNN_LSTM
elif mode == 'GRU':
return cudnn.CUDNN_GRU
else:
raise Exception("Unknown mode: {}".format(mode))
def init_dropout_descriptor(fn, handle):
return cudnn.DropoutDescriptor(
handle,
fn.dropout,
fn.seed
)
def init_rnn_descriptor(fn):
return cudnn.RNNDescriptor(
fn.hidden_size,
fn.num_layers,
fn.dropout_desc,
fn.input_mode,
fn.bidirectional,
fn.mode,
fn.datatype
)
def init_weight_descriptor(fn, weight):
w_desc = cudnn.FilterDescriptor()
w_view = weight.view(-1, 1, 1) # seems that filters require >=3 dimensions
w_desc.set(w_view)
return w_desc
def _input_size(fn):
return (fn.seq_length, fn.mini_batch, fn.input_size)
def _hidden_size(fn):
return (fn.num_layers * fn.num_directions, fn.mini_batch, fn.hidden_size)
def _output_size(fn):
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
def get_num_weights(handle, rnn_desc, x_desc, datatype):
weight_size = ctypes.c_long()
check_error(cudnn.lib.cudnnGetRNNParamsSize(
handle,
rnn_desc,
x_desc,
ctypes.byref(weight_size),
datatype
))
elem_size = cudnn._sizeofmap[datatype]
assert(weight_size.value % elem_size == 0)
return weight_size.value // elem_size
def get_parameters(fn, handle, weight_buf):
"""Returns weight and bias tensors for each layer of the RNN. These tensors
are views on the underlying weight buffer allocated by CuDNN.
Note: for LSTM and GRU, which have multiple parameters of each type (4 and 3, respectively),
these parameters are concatenated along the first dimension.
These parameters are returned in a consistent order by CuDNN:
(reset, forget, cell, outut) for LSTM
(reset, input, new) for GRU
Args:
fn: The RNN function object holding the RNN state
handle: a CuDNN handle
weight_buf: a 1D tensor containing the CuDNN-allocated weight (or grad_weight) buffer
Returns:
parameters: [(weight_ih, weight_hh, bias_ih, bias_hh)*], with length equal to the num_layers.
"""
cudnn_methods = [
cudnn.lib.cudnnGetRNNLinLayerMatrixParams,
cudnn.lib.cudnnGetRNNLinLayerBiasParams
]
params = []
num_linear_layers = _num_linear_layers(fn)
num_layers = fn.num_directions * fn.num_layers
for layer in range(num_layers):
layer_params = []
for cudnn_method in cudnn_methods:
for linear_id in range(num_linear_layers):
lin_layer_mat_desc = cudnn.FilterDescriptor()
matrix_pointer = ctypes.c_void_p()
check_error(cudnn_method(
handle,
fn.rnn_desc,
layer,
fn.x_descs[0],
fn.w_desc,
ctypes.c_void_p(weight_buf.data_ptr()),
linear_id,
lin_layer_mat_desc,
ctypes.byref(matrix_pointer)))
data_type = ctypes.c_int()
format = ctypes.c_int()
nb_dims = ctypes.c_int()
min_dim = 3
filter_dim_a = torch.IntTensor(min_dim)
check_error(cudnn.lib.cudnnGetFilterNdDescriptor(
lin_layer_mat_desc,
min_dim,
ctypes.byref(data_type),
ctypes.byref(format),
ctypes.byref(nb_dims),
ctypes.c_void_p(filter_dim_a.data_ptr())))
filter_dim_a.resize_(nb_dims.value)
elem_size = cudnn._sizeofmap[fn.datatype]
offset_bytes = (matrix_pointer.value - weight_buf.data_ptr())
assert(offset_bytes % elem_size == 0)
offset = offset_bytes // elem_size
# for all the RNN types provided by CUDNN, all the ih weights
# are the same size and are allocated in a contiguous chunk
# (same for the hh weights, and the ih and hh biases).
# Since we're storing all the weights in a single tensor anyway,
# might as well merge the CUDNN ones into a single tensor as well
if linear_id == 0 or linear_id == num_linear_layers / 2:
assert(filter_dim_a.prod() == filter_dim_a[0])
param = fn.weight_buf.new().set_(
weight_buf.storage(), offset,
filter_dim_a[0] * num_linear_layers // 2, filter_dim_a[2])
layer_params.append(param)
else:
assert(cur_offset == offset)
cur_offset = offset + filter_dim_a[0]
params.append(layer_params)
return params
def _copyParams(params_from, params_to):
for layer_params_from, layer_params_to in zip(params_from, params_to):
for param_from, param_to in zip(layer_params_from, layer_params_to):
assert(param_from.type() == param_to.type())
param_to.copy_(param_from)
def forward(fn, input, hx, weight, output, hy):
with torch.cuda.device_of(input):
lib = cudnn.lib
handle = cudnn.get_handle()
fn.datatype = cudnn._typemap[input.type()]
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
hy, cy = hy
else:
cx, cy = None, None
if fn.batch_first:
input = input.transpose(0, 1)
if input.dim() != 3:
raise RuntimeError(
'input must have 3 dimensions, got {}'.format(input.dim()))
if fn.input_size != input.size(2):
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
fn.input_size
))
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
x = input.contiguous()
output.resize_(*output_size)
hy.resize_(*hidden_size).zero_()
if cy:
cy.resize_(*hidden_size).zero_()
y = output
# init descriptors
fn.dropout_desc = init_dropout_descriptor(fn, handle)
fn.rnn_desc = init_rnn_descriptor(fn)
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
fn.hx_desc = cudnn.descriptor(hx)
fn.hy_desc = cudnn.descriptor(hx)
fn.cx_desc = cudnn.descriptor(cx) if cx else None
fn.cy_desc = cudnn.descriptor(cx) if cx else None
# create the weight buffer and copy the weights into it
num_weights = get_num_weights(
handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
fn.weight_buf = input.new(num_weights)
fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
w = fn.weight_buf
# this zero might not seem necessary, but it is in the case
# where biases are disabled; then they won't be copied and must be zero'd.
# Alternatively, _copyParams could be written more carefully.
w.zero_()
params = get_parameters(fn, handle, w)
_copyParams(weight, params)
if tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected hidden size {}, got {}'.format(
hidden_size, tuple(hx.size())))
if cx and tuple(cx.size()) != hidden_size:
raise RuntimeError('Expected cell size {}, got {}'.format(
hidden_size, tuple(cx.size())))
workspace_size = ctypes.c_long()
check_error(lib.cudnnGetRNNWorkspaceSize(
handle,
fn.rnn_desc,
fn.seq_length,
fn.x_descs,
ctypes.byref(workspace_size)
))
fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
if fn.train:
reserve_size = ctypes.c_long()
check_error(lib.cudnnGetRNNTrainingReserveSize(
handle,
fn.rnn_desc,
fn.seq_length,
fn.x_descs,
ctypes.byref(reserve_size)
))
fn.reserve = torch.cuda.ByteTensor(reserve_size.value)
check_error(lib.cudnnRNNForwardTraining(
handle,
fn.rnn_desc,
fn.seq_length,
fn.x_descs, ctypes.c_void_p(x.data_ptr()),
fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx else None,
fn.w_desc, ctypes.c_void_p(w.data_ptr()),
fn.y_descs, ctypes.c_void_p(y.data_ptr()),
fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx else None,
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0),
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
))
else: # inference
check_error(lib.cudnnRNNForwardInference(
handle,
fn.rnn_desc,
fn.seq_length,
fn.x_descs, ctypes.c_void_p(x.data_ptr()),
fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx else None,
fn.w_desc, ctypes.c_void_p(w.data_ptr()),
fn.y_descs, ctypes.c_void_p(y.data_ptr()),
fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx else None,
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0)
))
if fn.batch_first:
output = output.transpose(0, 1)
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
with torch.cuda.device_of(input):
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
grad_hx, grad_cx = grad_hx
grad_hy, grad_cy = grad_hy
else:
cx, grad_cx, grad_cy = None, None, None
if fn.batch_first:
input = input.transpose(0, 1)
grad_output = grad_output.transpose(0, 1)
output = output.transpose(0, 1)
input_size = _input_size(fn)
hidden_size = _hidden_size(fn)
output_size = _output_size(fn)
x = input.contiguous()
dy = grad_output.contiguous()
y = output
w = fn.weight_buf
dx = grad_input.resize_as_(input)
dhy = grad_hy.resize_(*hidden_size)
dcy = grad_cy.resize_(*hidden_size) if grad_cy else None
dhx = grad_hx.resize_(*hidden_size)
dcx = grad_cx.resize_(*hidden_size) if grad_cx else None
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if not fn.train:
raise RuntimeError('backward_grad can only be called when training!')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if tuple(output.size()) != _output_size(fn):
raise RuntimeError('Expected output size {}, got {}'.format(
output_size, output.size()))
if hx and tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected hidden size {}, got {}'.format(
hidden_size, hx.size()))
if cx and tuple(cx.size()) != hidden_size:
raise RuntimeError('Expected cell size {}, got {}'.format(
hidden_size, cx.size()))
if dhy and tuple(dhy.size()) != hidden_size:
raise RuntimeError('Expected d_hidden size {}, got {}'.format(
hidden_size, dhy.size()))
if dcy and tuple(dcy.size()) != hidden_size:
raise RuntimeError('Expected d_cell size {}, got {}'.format(
hidden_size, dcy.size()))
check_error(cudnn.lib.cudnnRNNBackwardData(
handle,
fn.rnn_desc,
fn.seq_length,
fn.y_descs, ctypes.c_void_p(y.data_ptr()),
fn.y_descs, ctypes.c_void_p(dy.data_ptr()),
fn.hy_desc, ctypes.c_void_p(dhy.data_ptr()),
fn.cy_desc, ctypes.c_void_p(dcy.data_ptr()) if cx else None,
fn.w_desc, ctypes.c_void_p(w.data_ptr()),
fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx else None,
fn.x_descs, ctypes.c_void_p(dx.data_ptr()),
fn.hx_desc, ctypes.c_void_p(dhx.data_ptr()),
fn.cx_desc, ctypes.c_void_p(dcx.data_ptr()) if cx else None,
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0),
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
))
if fn.batch_first:
grad_input = grad_input.transpose(0, 1)
def _num_linear_layers(fn):
if fn.mode == cudnn.CUDNN_LSTM:
return 8
elif fn.mode == cudnn.CUDNN_GRU:
return 6
elif fn.mode == cudnn.CUDNN_RNN_RELU:
return 2
elif fn.mode == cudnn.CUDNN_RNN_TANH:
return 2
else:
raise RuntimeError('Unknown mode: {}'.format(fn.mode))
def backward_weight(fn, input, hx, output, weight, grad_weight):
with torch.cuda.device_of(input):
handle = cudnn.get_handle()
if fn.mode == cudnn.CUDNN_LSTM:
hx, cx = hx
else:
cx = None
if fn.batch_first:
input = input.transpose(1, 2)
output = output.transpose(1, 2)
input_size = _input_size(fn)
hidden_size = _hidden_size(fn)
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if fn.dropout != 0 and cudnn.version() < 5103:
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
if tuple(input.size()) != input_size:
raise RuntimeError('Expected input size {}, got {}'.format(
input_size, tuple(input.size())))
if not fn.train:
raise RuntimeError('backward_weight can only be called when training!')
if tuple(hx.size()) != hidden_size:
raise RuntimeError('Expected input size {}, got {}'.format(
hidden_size, hx.size()))
x = input.contiguous()
y = output
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()
check_error(cudnn.lib.cudnnRNNBackwardWeights(
handle,
fn.rnn_desc,
fn.seq_length,
fn.x_descs, ctypes.c_void_p(x.data_ptr()),
fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
fn.y_descs, ctypes.c_void_p(y.data_ptr()),
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0),
fn.w_desc, ctypes.c_void_p(dw.data_ptr()),
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
))
# copy the weights from the weight_buf into grad_weight
grad_params = get_parameters(fn, handle, dw)
_copyParams(grad_params, grad_weight)
return grad_weight

View File

@ -45,11 +45,10 @@ static PyObject * THPGenerator_getState(THPGenerator *self)
{
HANDLE_TH_ERRORS
THGenerator *generator = self->cdata;
THByteTensorPtr _t = THByteTensor_new();
THByteTensor_getRNGState(generator, _t.get());
PyObject *_ret = THPByteTensor_New(_t.get());
_t.release();
return _ret;
THPByteTensorPtr res = (THPByteTensor *)THPByteTensor_NewEmpty();
if (!res) return NULL;
THByteTensor_getRNGState(generator, res->cdata);
return (PyObject *)res.release();
END_HANDLE_TH_ERRORS
}

View File

@ -7,6 +7,10 @@
#include <libshm.h>
#include <TH/TH.h>
#ifdef WITH_CUDNN
#include "cudnn/Module.h"
#endif
#define WITH_NUMPY_IMPORT_ARRAY
#include "THP.h"
@ -141,24 +145,15 @@ static PyObject * THPModule_initExtension(PyObject *self, PyObject *shm_manager_
static PyObject * THPModule_getNumThreads(PyObject *module)
{
#ifdef _OPENMP
return PyLong_FromLong(omp_get_max_threads());
#else
return PyLong_FromLong(1);
#endif
return PyLong_FromLong(THGetNumThreads());
}
static PyObject * THPModule_setNumThreads(PyObject *module, PyObject *arg)
{
THPUtils_assert(THPUtils_checkLong(arg), "set_num_threads expects an int, "
"but got %s", THPUtils_typename(arg));
#ifdef _OPENMP
omp_set_num_threads(THPUtils_unpackLong(arg));
#else
PyErr_WarnEx(PyExc_RuntimeWarning, "set_num_threads is a no-op - torch was "
"compiled without OpenMP support", 1);
#endif
return 0;
THSetNumThreads((int)THPUtils_unpackLong(arg));
Py_RETURN_NONE;
}
bool THPModule_isTensor(PyObject *obj)
@ -268,26 +263,64 @@ PyObject * THPModule_setDefaultTensorType(PyObject *_unused, PyObject *type)
Py_RETURN_NONE;
}
PyObject * THPModule_fromNumpy(PyObject *_unused, PyObject *array)
{
#ifndef WITH_NUMPY
THPUtils_setError("torch was compiled without numpy support");
return NULL;
#else
THPUtils_assert(PyArray_Check(array), "from_numpy expects an np.ndarray "
"but got %s", THPUtils_typename(array));
int type = PyArray_TYPE((PyArrayObject*)array);
if (type == NPY_DOUBLE) {
return PyObject_CallFunctionObjArgs(THPDoubleTensorClass, array, NULL);
} else if (type == NPY_FLOAT) {
return PyObject_CallFunctionObjArgs(THPFloatTensorClass, array, NULL);
} else if (type == NPY_INT64) {
return PyObject_CallFunctionObjArgs(THPLongTensorClass, array, NULL);
} else if (type == NPY_INT32) {
return PyObject_CallFunctionObjArgs(THPIntTensorClass, array, NULL);
} else if (type == NPY_UINT8) {
return PyObject_CallFunctionObjArgs(THPByteTensorClass, array, NULL);
}
THPUtils_setError("can't convert a given np.ndarray to a tensor - it has an "
"invalid type. The only supported types are: double, float, int64, "
"int32, and uint8.");
return NULL;
#endif
}
#define IMPLEMENT_STATELESS(name) \
static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *args) \
static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *args, PyObject *kwargs) \
{ \
PyObject *tensor = THPDefaultTensorClass; \
PyObject *key, *value; \
Py_ssize_t pos = 0; \
for (int i = 0; i < PyTuple_Size(args); i++) { \
PyObject *item = PyTuple_GET_ITEM(args, i); \
if (THPModule_isTensor(item)) { \
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
tensor = item; \
break; \
goto dispatch; \
} \
} \
if (kwargs) { \
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
tensor = value; \
goto dispatch; \
} \
} \
} \
\
PyObject *methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
THPUtils_assert(methods, "Type %s doesn't implement statless methods", \
Py_TYPE(tensor)->tp_name); \
PyObject *method = PyObject_GetAttrString(methods, #name); \
dispatch: \
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
THPUtils_assert(methods, "Type %s doesn't implement stateless methods", \
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
THPObjectPtr method = PyObject_GetAttrString(methods, #name); \
THPUtils_assert(method, "Type %s doesn't implement stateless method " #name, \
Py_TYPE(tensor)->tp_name); \
return PyObject_Call(method, args, NULL); \
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
return PyObject_Call(method, args, kwargs); \
}
IMPLEMENT_STATELESS(sigmoid)
@ -369,7 +402,6 @@ IMPLEMENT_STATELESS(reshape)
IMPLEMENT_STATELESS(zeros)
IMPLEMENT_STATELESS(ones)
IMPLEMENT_STATELESS(index_select)
IMPLEMENT_STATELESS(narrow)
IMPLEMENT_STATELESS(addmm)
IMPLEMENT_STATELESS(addmv)
IMPLEMENT_STATELESS(addr)
@ -401,30 +433,56 @@ IMPLEMENT_STATELESS(randn)
IMPLEMENT_STATELESS(all)
IMPLEMENT_STATELESS(any)
IMPLEMENT_STATELESS(masked_select)
IMPLEMENT_STATELESS(gesv)
IMPLEMENT_STATELESS(gels)
IMPLEMENT_STATELESS(trtrs)
IMPLEMENT_STATELESS(symeig)
IMPLEMENT_STATELESS(eig)
IMPLEMENT_STATELESS(svd)
IMPLEMENT_STATELESS(inverse)
IMPLEMENT_STATELESS(potrf)
IMPLEMENT_STATELESS(potrs)
IMPLEMENT_STATELESS(potri)
IMPLEMENT_STATELESS(pstrf)
IMPLEMENT_STATELESS(qr)
IMPLEMENT_STATELESS(geqrf)
IMPLEMENT_STATELESS(orgqr)
IMPLEMENT_STATELESS(ormqr)
#undef IMPLEMENT_STATELESS
// For logical functions a reverse type search is required (if the first argument
// is a ByteTensor (result), it shouldn't pick it's version).
#define IMPLEMENT_STATELESS_REVERSED(name) \
static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *args) \
static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *args, PyObject *kwargs) \
{ \
PyObject *tensor = THPDefaultTensorClass; \
PyObject *key, *value; \
Py_ssize_t pos = 0; \
for (int i = PyTuple_Size(args)-1; i >= 0; i--) { \
PyObject *item = PyTuple_GET_ITEM(args, i); \
if (THPModule_isTensor(item)) { \
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
tensor = item; \
break; \
goto dispatch; \
} \
} \
if (kwargs) { \
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
tensor = value; \
goto dispatch; \
} \
} \
} \
\
PyObject *methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
THPUtils_assert(methods, "Type %s doesn't implement statless methods", \
Py_TYPE(tensor)->tp_name); \
PyObject *method = PyObject_GetAttrString(methods, #name); \
dispatch: \
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
THPUtils_assert(methods, "Type %s doesn't implement stateless methods", \
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
THPObjectPtr method = PyObject_GetAttrString(methods, #name); \
THPUtils_assert(method, "Type %s doesn't implement stateless method " #name, \
Py_TYPE(tensor)->tp_name); \
return PyObject_Call(method, args, NULL); \
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
return PyObject_Call(method, args, kwargs); \
}
IMPLEMENT_STATELESS_REVERSED(gt)
@ -447,41 +505,39 @@ static PyObject * THPModule_nonzero(PyObject *_unused, PyObject *args)
else if (PyTuple_Size(args) == 2)
tensor = PyTuple_GET_ITEM(args, 1);
PyObject *methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
THPUtils_assert(methods, "Type %s doesn't implement statless methods",
Py_TYPE(tensor)->tp_name);
PyObject *method = PyObject_GetAttrString(methods, "nonzero");
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
THPUtils_assert(methods, "Type %s doesn't implement stateless methods",
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
THPObjectPtr method = PyObject_GetAttrString(methods, "nonzero");
THPUtils_assert(method, "Type %s doesn't implement stateless method nonzero",
Py_TYPE(tensor)->tp_name);
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
return PyObject_Call(method, args, NULL);
}
// In nonzero, the first argument might be a LongTensor that will be used
// for indices output, so we should pick a function based on second
// tensor's type.
static PyObject * THPModule_cat(PyObject *_unused, PyObject *args)
{
PyObject *tensor = THPDefaultTensorClass;
THPObjectPtr iterator;
THPObjectPtr item;
if (args && PyTuple_Size(args) > 0) {
if (THPModule_isTensor(PyTuple_GET_ITEM(args, 0))) {
tensor = PyTuple_GET_ITEM(args, 0);
} else if ((iterator = PyObject_GetIter(PyTuple_GET_ITEM(args, 0)))) {
PyObject *first_arg = PyTuple_GET_ITEM(args, 0);
if (THPModule_isTensor(first_arg)) {
tensor = first_arg;
} else if ((iterator = PyObject_GetIter(first_arg))) {
item = PyIter_Next(iterator);
if (item && THPModule_isTensor(item)) {
if (item && (THPModule_isTensor(item) || THPVariable_Check(item))) {
tensor = item;
}
}
PyErr_Clear();
}
PyObject *methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
THPUtils_assert(methods, "Type %s doesn't implement statless methods",
Py_TYPE(tensor)->tp_name);
PyObject *method = PyObject_GetAttrString(methods, "cat");
THPUtils_assert(method, "Type %s doesn't implement stateless method nonzero",
Py_TYPE(tensor)->tp_name);
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
THPObjectPtr method = PyObject_GetAttrString(methods, "cat");
THPUtils_assert(method, "Type %s doesn't implement stateless method cat",
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
return PyObject_Call(method, args, NULL);
}
@ -510,6 +566,8 @@ extern PyObject * THCPModule_initExtension(PyObject *self);
extern PyObject * THCPModule_setDevice_wrap(PyObject *self, PyObject *arg);
extern PyObject * THCPModule_getDevice_wrap(PyObject *self);
extern PyObject * THCPModule_getDeviceCount_wrap(PyObject *self);
extern PyObject * THCPModule_getCurrentStream_wrap(PyObject *self);
extern PyObject * THCPModule_setStream_wrap(PyObject *self, PyObject *stream);
extern PyObject * THCPModule_getDriverVersion(PyObject *self);
extern PyObject * THCPModule_isDriverSufficient(PyObject *self);
extern PyObject * THCPModule_getRNGState(PyObject *_unused);
@ -519,15 +577,21 @@ extern PyObject * THCPModule_manualSeedAll(PyObject *_unused, PyObject *seed);
extern PyObject * THCPModule_seed(PyObject *_unused);
extern PyObject * THCPModule_seedAll(PyObject *_unused);
extern PyObject * THCPModule_initialSeed(PyObject *_unused);
extern PyObject * THCPModule_cudaHostAllocator(PyObject *_unused);
extern PyObject * THCPModule_cudaSynchronize(PyObject *_unused);
extern PyObject * THCPModule_getLibPath(PyObject *_unused);
#endif
static PyMethodDef TorchMethods[] = {
{"_initExtension", (PyCFunction)THPModule_initExtension, METH_O, NULL},
{"_autograd_init", (PyCFunction)THPAutograd_initExtension, METH_NOARGS, NULL},
#ifdef WITH_CUDA
{"_cuda_init", (PyCFunction)THCPModule_initExtension, METH_NOARGS, NULL},
{"_cuda_setDevice", (PyCFunction)THCPModule_setDevice_wrap, METH_O, NULL},
{"_cuda_getDevice", (PyCFunction)THCPModule_getDevice_wrap, METH_NOARGS, NULL},
{"_cuda_getDeviceCount", (PyCFunction)THCPModule_getDeviceCount_wrap, METH_NOARGS, NULL},
{"_cuda_getCurrentStream", (PyCFunction)THCPModule_getCurrentStream_wrap, METH_NOARGS, NULL},
{"_cuda_setStream", (PyCFunction)THCPModule_setStream_wrap, METH_O, NULL},
{"_cuda_isDriverSufficient", (PyCFunction)THCPModule_isDriverSufficient, METH_NOARGS, NULL},
{"_cuda_getDriverVersion", (PyCFunction)THCPModule_getDriverVersion, METH_NOARGS, NULL},
{"_cuda_getRNGState", (PyCFunction)THCPModule_getRNGState, METH_NOARGS, NULL},
@ -537,6 +601,9 @@ static PyMethodDef TorchMethods[] = {
{"_cuda_seed", (PyCFunction)THCPModule_seed, METH_NOARGS, NULL},
{"_cuda_seedAll", (PyCFunction)THCPModule_seedAll, METH_NOARGS, NULL},
{"_cuda_initialSeed", (PyCFunction)THCPModule_initialSeed, METH_NOARGS, NULL},
{"_cuda_cudaHostAllocator", (PyCFunction)THCPModule_cudaHostAllocator, METH_NOARGS, NULL},
{"_cuda_synchronize", (PyCFunction)THCPModule_cudaSynchronize, METH_NOARGS, NULL},
{"_cuda_getLibPath", (PyCFunction)THCPModule_getLibPath, METH_NOARGS, NULL},
#endif
{"_safe_call", (PyCFunction)THPModule_safeCall, METH_VARARGS | METH_KEYWORDS, NULL},
{"_sendfd", (PyCFunction)THPModule_sendfd, METH_VARARGS, NULL},
@ -548,140 +615,145 @@ static PyMethodDef TorchMethods[] = {
{"_storageCopyAsync", (PyCFunction)THPModule_storage_asyncCopyWrapper, METH_VARARGS, NULL},
{"get_num_threads", (PyCFunction)THPModule_getNumThreads, METH_NOARGS, NULL},
{"set_num_threads", (PyCFunction)THPModule_setNumThreads, METH_O, NULL},
{"from_numpy", (PyCFunction)THPModule_fromNumpy, METH_O, NULL},
{"sigmoid", (PyCFunction)THPModule_sigmoid, METH_VARARGS, NULL},
{"log", (PyCFunction)THPModule_log, METH_VARARGS, NULL},
{"log1p", (PyCFunction)THPModule_log1p, METH_VARARGS, NULL},
{"exp", (PyCFunction)THPModule_exp, METH_VARARGS, NULL},
{"cos", (PyCFunction)THPModule_cos, METH_VARARGS, NULL},
{"acos", (PyCFunction)THPModule_acos, METH_VARARGS, NULL},
{"cosh", (PyCFunction)THPModule_cosh, METH_VARARGS, NULL},
{"sin", (PyCFunction)THPModule_sin, METH_VARARGS, NULL},
{"asin", (PyCFunction)THPModule_asin, METH_VARARGS, NULL},
{"sinh", (PyCFunction)THPModule_sinh, METH_VARARGS, NULL},
{"tan", (PyCFunction)THPModule_tan, METH_VARARGS, NULL},
{"atan", (PyCFunction)THPModule_atan, METH_VARARGS, NULL},
{"tanh", (PyCFunction)THPModule_tanh, METH_VARARGS, NULL},
{"sqrt", (PyCFunction)THPModule_sqrt, METH_VARARGS, NULL},
{"rsqrt", (PyCFunction)THPModule_rsqrt, METH_VARARGS, NULL},
{"ceil", (PyCFunction)THPModule_ceil, METH_VARARGS, NULL},
{"floor", (PyCFunction)THPModule_floor, METH_VARARGS, NULL},
{"round", (PyCFunction)THPModule_round, METH_VARARGS, NULL},
{"abs", (PyCFunction)THPModule_abs, METH_VARARGS, NULL},
{"trunc", (PyCFunction)THPModule_trunc, METH_VARARGS, NULL},
{"frac", (PyCFunction)THPModule_frac, METH_VARARGS, NULL},
{"mean", (PyCFunction)THPModule_mean, METH_VARARGS, NULL},
{"std", (PyCFunction)THPModule_std, METH_VARARGS, NULL},
{"var", (PyCFunction)THPModule_var, METH_VARARGS, NULL},
{"norm", (PyCFunction)THPModule_norm, METH_VARARGS, NULL},
{"cinv", (PyCFunction)THPModule_cinv, METH_VARARGS, NULL},
{"neg", (PyCFunction)THPModule_neg, METH_VARARGS, NULL},
{"add", (PyCFunction)THPModule_add, METH_VARARGS, NULL},
{"csub", (PyCFunction)THPModule_csub, METH_VARARGS, NULL},
{"mul", (PyCFunction)THPModule_mul, METH_VARARGS, NULL},
{"div", (PyCFunction)THPModule_div, METH_VARARGS, NULL},
{"fmod", (PyCFunction)THPModule_fmod, METH_VARARGS, NULL},
{"mod", (PyCFunction)THPModule_fmod, METH_VARARGS, NULL},
{"cmul", (PyCFunction)THPModule_cmul, METH_VARARGS, NULL},
{"cdiv", (PyCFunction)THPModule_cdiv, METH_VARARGS, NULL},
{"cfmod", (PyCFunction)THPModule_cfmod, METH_VARARGS, NULL},
{"cmod", (PyCFunction)THPModule_cfmod, METH_VARARGS, NULL},
{"min", (PyCFunction)THPModule_min, METH_VARARGS, NULL},
{"max", (PyCFunction)THPModule_max, METH_VARARGS, NULL},
{"cmax", (PyCFunction)THPModule_cmax, METH_VARARGS, NULL},
{"cmin", (PyCFunction)THPModule_cmin, METH_VARARGS, NULL},
{"cpow", (PyCFunction)THPModule_cpow, METH_VARARGS, NULL},
{"dot", (PyCFunction)THPModule_dot, METH_VARARGS, NULL},
{"sum", (PyCFunction)THPModule_sum, METH_VARARGS, NULL},
{"prod", (PyCFunction)THPModule_prod, METH_VARARGS, NULL},
{"remainder", (PyCFunction)THPModule_remainder, METH_VARARGS, NULL},
{"cremainder", (PyCFunction)THPModule_cremainder, METH_VARARGS, NULL},
{"cumsum", (PyCFunction)THPModule_cumsum, METH_VARARGS, NULL},
{"cumprod", (PyCFunction)THPModule_cumprod, METH_VARARGS, NULL},
{"clamp", (PyCFunction)THPModule_clamp, METH_VARARGS, NULL},
{"equal", (PyCFunction)THPModule_equal, METH_VARARGS, NULL},
{"eye", (PyCFunction)THPModule_eye, METH_VARARGS, NULL},
{"fill", (PyCFunction)THPModule_fill, METH_VARARGS, NULL},
{"diag", (PyCFunction)THPModule_diag, METH_VARARGS, NULL},
{"numel", (PyCFunction)THPModule_numel, METH_VARARGS, NULL},
{"sign", (PyCFunction)THPModule_sign, METH_VARARGS, NULL},
{"trace", (PyCFunction)THPModule_trace, METH_VARARGS, NULL},
{"tril", (PyCFunction)THPModule_tril, METH_VARARGS, NULL},
{"triu", (PyCFunction)THPModule_triu, METH_VARARGS, NULL},
{"zero", (PyCFunction)THPModule_zero, METH_VARARGS, NULL},
{"gt", (PyCFunction)THPModule_gt, METH_VARARGS, NULL},
{"lt", (PyCFunction)THPModule_lt, METH_VARARGS, NULL},
{"ge", (PyCFunction)THPModule_ge, METH_VARARGS, NULL},
{"le", (PyCFunction)THPModule_le, METH_VARARGS, NULL},
{"eq", (PyCFunction)THPModule_eq, METH_VARARGS, NULL},
{"ne", (PyCFunction)THPModule_ne, METH_VARARGS, NULL},
{"kthvalue", (PyCFunction)THPModule_kthvalue, METH_VARARGS, NULL},
{"mode", (PyCFunction)THPModule_mode, METH_VARARGS, NULL},
{"median", (PyCFunction)THPModule_median, METH_VARARGS, NULL},
{"cross", (PyCFunction)THPModule_cross, METH_VARARGS, NULL},
{"sort", (PyCFunction)THPModule_sort, METH_VARARGS, NULL},
{"topk", (PyCFunction)THPModule_topk, METH_VARARGS, NULL},
{"t", (PyCFunction)THPModule_t, METH_VARARGS, NULL},
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS, NULL},
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS, NULL},
{"sigmoid", (PyCFunction)THPModule_sigmoid, METH_VARARGS | METH_KEYWORDS, NULL},
{"log", (PyCFunction)THPModule_log, METH_VARARGS | METH_KEYWORDS, NULL},
{"log1p", (PyCFunction)THPModule_log1p, METH_VARARGS | METH_KEYWORDS, NULL},
{"exp", (PyCFunction)THPModule_exp, METH_VARARGS | METH_KEYWORDS, NULL},
{"cos", (PyCFunction)THPModule_cos, METH_VARARGS | METH_KEYWORDS, NULL},
{"acos", (PyCFunction)THPModule_acos, METH_VARARGS | METH_KEYWORDS, NULL},
{"cosh", (PyCFunction)THPModule_cosh, METH_VARARGS | METH_KEYWORDS, NULL},
{"sin", (PyCFunction)THPModule_sin, METH_VARARGS | METH_KEYWORDS, NULL},
{"asin", (PyCFunction)THPModule_asin, METH_VARARGS | METH_KEYWORDS, NULL},
{"sinh", (PyCFunction)THPModule_sinh, METH_VARARGS | METH_KEYWORDS, NULL},
{"tan", (PyCFunction)THPModule_tan, METH_VARARGS | METH_KEYWORDS, NULL},
{"atan", (PyCFunction)THPModule_atan, METH_VARARGS | METH_KEYWORDS, NULL},
{"tanh", (PyCFunction)THPModule_tanh, METH_VARARGS | METH_KEYWORDS, NULL},
{"sqrt", (PyCFunction)THPModule_sqrt, METH_VARARGS | METH_KEYWORDS, NULL},
{"rsqrt", (PyCFunction)THPModule_rsqrt, METH_VARARGS | METH_KEYWORDS, NULL},
{"ceil", (PyCFunction)THPModule_ceil, METH_VARARGS | METH_KEYWORDS, NULL},
{"floor", (PyCFunction)THPModule_floor, METH_VARARGS | METH_KEYWORDS, NULL},
{"round", (PyCFunction)THPModule_round, METH_VARARGS | METH_KEYWORDS, NULL},
{"abs", (PyCFunction)THPModule_abs, METH_VARARGS | METH_KEYWORDS, NULL},
{"trunc", (PyCFunction)THPModule_trunc, METH_VARARGS | METH_KEYWORDS, NULL},
{"frac", (PyCFunction)THPModule_frac, METH_VARARGS | METH_KEYWORDS, NULL},
{"mean", (PyCFunction)THPModule_mean, METH_VARARGS | METH_KEYWORDS, NULL},
{"std", (PyCFunction)THPModule_std, METH_VARARGS | METH_KEYWORDS, NULL},
{"var", (PyCFunction)THPModule_var, METH_VARARGS | METH_KEYWORDS, NULL},
{"norm", (PyCFunction)THPModule_norm, METH_VARARGS | METH_KEYWORDS, NULL},
{"cinv", (PyCFunction)THPModule_cinv, METH_VARARGS | METH_KEYWORDS, NULL},
{"neg", (PyCFunction)THPModule_neg, METH_VARARGS | METH_KEYWORDS, NULL},
{"add", (PyCFunction)THPModule_add, METH_VARARGS | METH_KEYWORDS, NULL},
{"csub", (PyCFunction)THPModule_csub, METH_VARARGS | METH_KEYWORDS, NULL},
{"mul", (PyCFunction)THPModule_mul, METH_VARARGS | METH_KEYWORDS, NULL},
{"div", (PyCFunction)THPModule_div, METH_VARARGS | METH_KEYWORDS, NULL},
{"fmod", (PyCFunction)THPModule_fmod, METH_VARARGS | METH_KEYWORDS, NULL},
{"mod", (PyCFunction)THPModule_fmod, METH_VARARGS | METH_KEYWORDS, NULL},
{"cmul", (PyCFunction)THPModule_cmul, METH_VARARGS | METH_KEYWORDS, NULL},
{"cdiv", (PyCFunction)THPModule_cdiv, METH_VARARGS | METH_KEYWORDS, NULL},
{"cfmod", (PyCFunction)THPModule_cfmod, METH_VARARGS | METH_KEYWORDS, NULL},
{"cmod", (PyCFunction)THPModule_cfmod, METH_VARARGS | METH_KEYWORDS, NULL},
{"min", (PyCFunction)THPModule_min, METH_VARARGS | METH_KEYWORDS, NULL},
{"max", (PyCFunction)THPModule_max, METH_VARARGS | METH_KEYWORDS, NULL},
{"cmax", (PyCFunction)THPModule_cmax, METH_VARARGS | METH_KEYWORDS, NULL},
{"cmin", (PyCFunction)THPModule_cmin, METH_VARARGS | METH_KEYWORDS, NULL},
{"cpow", (PyCFunction)THPModule_cpow, METH_VARARGS | METH_KEYWORDS, NULL},
{"dot", (PyCFunction)THPModule_dot, METH_VARARGS | METH_KEYWORDS, NULL},
{"sum", (PyCFunction)THPModule_sum, METH_VARARGS | METH_KEYWORDS, NULL},
{"prod", (PyCFunction)THPModule_prod, METH_VARARGS | METH_KEYWORDS, NULL},
{"remainder", (PyCFunction)THPModule_remainder, METH_VARARGS | METH_KEYWORDS, NULL},
{"cremainder", (PyCFunction)THPModule_cremainder, METH_VARARGS | METH_KEYWORDS, NULL},
{"cumsum", (PyCFunction)THPModule_cumsum, METH_VARARGS | METH_KEYWORDS, NULL},
{"cumprod", (PyCFunction)THPModule_cumprod, METH_VARARGS | METH_KEYWORDS, NULL},
{"clamp", (PyCFunction)THPModule_clamp, METH_VARARGS | METH_KEYWORDS, NULL},
{"equal", (PyCFunction)THPModule_equal, METH_VARARGS | METH_KEYWORDS, NULL},
{"eye", (PyCFunction)THPModule_eye, METH_VARARGS | METH_KEYWORDS, NULL},
{"fill", (PyCFunction)THPModule_fill, METH_VARARGS | METH_KEYWORDS, NULL},
{"diag", (PyCFunction)THPModule_diag, METH_VARARGS | METH_KEYWORDS, NULL},
{"numel", (PyCFunction)THPModule_numel, METH_VARARGS | METH_KEYWORDS, NULL},
{"sign", (PyCFunction)THPModule_sign, METH_VARARGS | METH_KEYWORDS, NULL},
{"trace", (PyCFunction)THPModule_trace, METH_VARARGS | METH_KEYWORDS, NULL},
{"tril", (PyCFunction)THPModule_tril, METH_VARARGS | METH_KEYWORDS, NULL},
{"triu", (PyCFunction)THPModule_triu, METH_VARARGS | METH_KEYWORDS, NULL},
{"zero", (PyCFunction)THPModule_zero, METH_VARARGS | METH_KEYWORDS, NULL},
{"gt", (PyCFunction)THPModule_gt, METH_VARARGS | METH_KEYWORDS, NULL},
{"lt", (PyCFunction)THPModule_lt, METH_VARARGS | METH_KEYWORDS, NULL},
{"ge", (PyCFunction)THPModule_ge, METH_VARARGS | METH_KEYWORDS, NULL},
{"le", (PyCFunction)THPModule_le, METH_VARARGS | METH_KEYWORDS, NULL},
{"eq", (PyCFunction)THPModule_eq, METH_VARARGS | METH_KEYWORDS, NULL},
{"ne", (PyCFunction)THPModule_ne, METH_VARARGS | METH_KEYWORDS, NULL},
{"kthvalue", (PyCFunction)THPModule_kthvalue, METH_VARARGS | METH_KEYWORDS, NULL},
{"mode", (PyCFunction)THPModule_mode, METH_VARARGS | METH_KEYWORDS, NULL},
{"median", (PyCFunction)THPModule_median, METH_VARARGS | METH_KEYWORDS, NULL},
{"cross", (PyCFunction)THPModule_cross, METH_VARARGS | METH_KEYWORDS, NULL},
{"sort", (PyCFunction)THPModule_sort, METH_VARARGS | METH_KEYWORDS, NULL},
{"topk", (PyCFunction)THPModule_topk, METH_VARARGS | METH_KEYWORDS, NULL},
{"t", (PyCFunction)THPModule_t, METH_VARARGS | METH_KEYWORDS, NULL},
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS | METH_KEYWORDS, NULL},
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS | METH_KEYWORDS, NULL},
{"nonzero", (PyCFunction)THPModule_nonzero, METH_VARARGS, NULL},
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS, NULL},
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS, NULL},
{"linspace", (PyCFunction)THPModule_linspace, METH_VARARGS, NULL},
{"logspace", (PyCFunction)THPModule_logspace, METH_VARARGS, NULL},
{"histc", (PyCFunction)THPModule_histc, METH_VARARGS, NULL},
{"atan2", (PyCFunction)THPModule_atan2, METH_VARARGS, NULL},
{"pow", (PyCFunction)THPModule_pow, METH_VARARGS, NULL},
{"lerp", (PyCFunction)THPModule_lerp, METH_VARARGS, NULL},
{"reshape", (PyCFunction)THPModule_reshape, METH_VARARGS, NULL},
{"zeros", (PyCFunction)THPModule_zeros, METH_VARARGS, NULL},
{"ones", (PyCFunction)THPModule_ones, METH_VARARGS, NULL},
{"index_select", (PyCFunction)THPModule_index_select, METH_VARARGS, NULL},
{"narrow", (PyCFunction)THPModule_narrow, METH_VARARGS, NULL},
{"addmm", (PyCFunction)THPModule_addmm, METH_VARARGS, NULL},
{"addmv", (PyCFunction)THPModule_addmv, METH_VARARGS, NULL},
{"addr", (PyCFunction)THPModule_addr, METH_VARARGS, NULL},
{"ger", (PyCFunction)THPModule_ger, METH_VARARGS, NULL},
{"mv", (PyCFunction)THPModule_mv, METH_VARARGS, NULL},
{"addbmm", (PyCFunction)THPModule_addbmm, METH_VARARGS, NULL},
{"baddbmm", (PyCFunction)THPModule_baddbmm, METH_VARARGS, NULL},
{"addcmul", (PyCFunction)THPModule_addcmul, METH_VARARGS, NULL},
{"addcdiv", (PyCFunction)THPModule_addcdiv, METH_VARARGS, NULL},
{"mm", (PyCFunction)THPModule_mm, METH_VARARGS, NULL},
{"bmm", (PyCFunction)THPModule_bmm, METH_VARARGS, NULL},
{"multinomial", (PyCFunction)THPModule_multinomial, METH_VARARGS, NULL},
{"uniform", (PyCFunction)THPModule_uniform, METH_VARARGS, NULL},
{"normal", (PyCFunction)THPModule_normal, METH_VARARGS, NULL},
{"cauchy", (PyCFunction)THPModule_cauchy, METH_VARARGS, NULL},
{"log_normal", (PyCFunction)THPModule_log_normal, METH_VARARGS, NULL},
{"exponential", (PyCFunction)THPModule_exponential, METH_VARARGS, NULL},
{"random", (PyCFunction)THPModule_random, METH_VARARGS, NULL},
{"geometric", (PyCFunction)THPModule_geometric, METH_VARARGS, NULL},
{"bernoulli", (PyCFunction)THPModule_bernoulli, METH_VARARGS, NULL},
{"rand", (PyCFunction)THPModule_rand, METH_VARARGS, NULL},
{"randn", (PyCFunction)THPModule_randn, METH_VARARGS, NULL},
{"randperm", (PyCFunction)THPModule_randperm, METH_VARARGS, NULL},
{"unfold", (PyCFunction)THPModule_unfold, METH_VARARGS, NULL},
{"range", (PyCFunction)THPModule_range, METH_VARARGS, NULL},
{"gather", (PyCFunction)THPModule_gather, METH_VARARGS, NULL},
{"scatter", (PyCFunction)THPModule_scatter, METH_VARARGS, NULL},
{"all", (PyCFunction)THPModule_all, METH_VARARGS, NULL},
{"any", (PyCFunction)THPModule_any, METH_VARARGS, NULL},
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS | METH_KEYWORDS, NULL},
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS | METH_KEYWORDS, NULL},
{"linspace", (PyCFunction)THPModule_linspace, METH_VARARGS | METH_KEYWORDS, NULL},
{"logspace", (PyCFunction)THPModule_logspace, METH_VARARGS | METH_KEYWORDS, NULL},
{"histc", (PyCFunction)THPModule_histc, METH_VARARGS | METH_KEYWORDS, NULL},
{"atan2", (PyCFunction)THPModule_atan2, METH_VARARGS | METH_KEYWORDS, NULL},
{"pow", (PyCFunction)THPModule_pow, METH_VARARGS | METH_KEYWORDS, NULL},
{"lerp", (PyCFunction)THPModule_lerp, METH_VARARGS | METH_KEYWORDS, NULL},
{"reshape", (PyCFunction)THPModule_reshape, METH_VARARGS | METH_KEYWORDS, NULL},
{"zeros", (PyCFunction)THPModule_zeros, METH_VARARGS | METH_KEYWORDS, NULL},
{"ones", (PyCFunction)THPModule_ones, METH_VARARGS | METH_KEYWORDS, NULL},
{"index_select", (PyCFunction)THPModule_index_select, METH_VARARGS | METH_KEYWORDS, NULL},
{"addmm", (PyCFunction)THPModule_addmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"addmv", (PyCFunction)THPModule_addmv, METH_VARARGS | METH_KEYWORDS, NULL},
{"addr", (PyCFunction)THPModule_addr, METH_VARARGS | METH_KEYWORDS, NULL},
{"ger", (PyCFunction)THPModule_ger, METH_VARARGS | METH_KEYWORDS, NULL},
{"mv", (PyCFunction)THPModule_mv, METH_VARARGS | METH_KEYWORDS, NULL},
{"addbmm", (PyCFunction)THPModule_addbmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"baddbmm", (PyCFunction)THPModule_baddbmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"addcmul", (PyCFunction)THPModule_addcmul, METH_VARARGS | METH_KEYWORDS, NULL},
{"addcdiv", (PyCFunction)THPModule_addcdiv, METH_VARARGS | METH_KEYWORDS, NULL},
{"mm", (PyCFunction)THPModule_mm, METH_VARARGS | METH_KEYWORDS, NULL},
{"bmm", (PyCFunction)THPModule_bmm, METH_VARARGS | METH_KEYWORDS, NULL},
{"multinomial", (PyCFunction)THPModule_multinomial, METH_VARARGS | METH_KEYWORDS, NULL},
{"uniform", (PyCFunction)THPModule_uniform, METH_VARARGS | METH_KEYWORDS, NULL},
{"normal", (PyCFunction)THPModule_normal, METH_VARARGS | METH_KEYWORDS, NULL},
{"cauchy", (PyCFunction)THPModule_cauchy, METH_VARARGS | METH_KEYWORDS, NULL},
{"log_normal", (PyCFunction)THPModule_log_normal, METH_VARARGS | METH_KEYWORDS, NULL},
{"exponential", (PyCFunction)THPModule_exponential, METH_VARARGS | METH_KEYWORDS, NULL},
{"random", (PyCFunction)THPModule_random, METH_VARARGS | METH_KEYWORDS, NULL},
{"geometric", (PyCFunction)THPModule_geometric, METH_VARARGS | METH_KEYWORDS, NULL},
{"bernoulli", (PyCFunction)THPModule_bernoulli, METH_VARARGS | METH_KEYWORDS, NULL},
{"rand", (PyCFunction)THPModule_rand, METH_VARARGS | METH_KEYWORDS, NULL},
{"randn", (PyCFunction)THPModule_randn, METH_VARARGS | METH_KEYWORDS, NULL},
{"randperm", (PyCFunction)THPModule_randperm, METH_VARARGS | METH_KEYWORDS, NULL},
{"unfold", (PyCFunction)THPModule_unfold, METH_VARARGS | METH_KEYWORDS, NULL},
{"range", (PyCFunction)THPModule_range, METH_VARARGS | METH_KEYWORDS, NULL},
{"gather", (PyCFunction)THPModule_gather, METH_VARARGS | METH_KEYWORDS, NULL},
{"scatter", (PyCFunction)THPModule_scatter, METH_VARARGS | METH_KEYWORDS, NULL},
{"all", (PyCFunction)THPModule_all, METH_VARARGS | METH_KEYWORDS, NULL},
{"any", (PyCFunction)THPModule_any, METH_VARARGS | METH_KEYWORDS, NULL},
{"cat", (PyCFunction)THPModule_cat, METH_VARARGS, NULL},
{"masked_select", (PyCFunction)THPModule_masked_select, METH_VARARGS, NULL},
{"masked_select", (PyCFunction)THPModule_masked_select, METH_VARARGS | METH_KEYWORDS, NULL},
{"gesv", (PyCFunction)THPModule_gesv, METH_VARARGS | METH_KEYWORDS, NULL},
{"gels", (PyCFunction)THPModule_gels, METH_VARARGS | METH_KEYWORDS, NULL},
{"trtrs", (PyCFunction)THPModule_trtrs, METH_VARARGS | METH_KEYWORDS, NULL},
{"symeig", (PyCFunction)THPModule_symeig, METH_VARARGS | METH_KEYWORDS, NULL},
{"eig", (PyCFunction)THPModule_eig, METH_VARARGS | METH_KEYWORDS, NULL},
{"svd", (PyCFunction)THPModule_svd, METH_VARARGS | METH_KEYWORDS, NULL},
{"inverse", (PyCFunction)THPModule_inverse, METH_VARARGS | METH_KEYWORDS, NULL},
{"potrf", (PyCFunction)THPModule_potrf, METH_VARARGS | METH_KEYWORDS, NULL},
{"potrs", (PyCFunction)THPModule_potrs, METH_VARARGS | METH_KEYWORDS, NULL},
{"potri", (PyCFunction)THPModule_potri, METH_VARARGS | METH_KEYWORDS, NULL},
{"pstrf", (PyCFunction)THPModule_pstrf, METH_VARARGS | METH_KEYWORDS, NULL},
{"qr", (PyCFunction)THPModule_qr, METH_VARARGS | METH_KEYWORDS, NULL},
{"geqrf", (PyCFunction)THPModule_geqrf, METH_VARARGS | METH_KEYWORDS, NULL},
{"orgqr", (PyCFunction)THPModule_orgqr, METH_VARARGS | METH_KEYWORDS, NULL},
{"ormqr", (PyCFunction)THPModule_ormqr, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}
};
#if PY_MAJOR_VERSION != 2
static struct PyModuleDef torchmodule = {
PyModuleDef_HEAD_INIT,
"torch._C",
NULL,
-1,
TorchMethods
};
#endif
static void errorHandler(const char *msg, void *data)
{
throw THException(msg);
@ -716,6 +788,10 @@ bool THCPShortTensor_init(PyObject *module);
bool THCPCharTensor_init(PyObject *module);
bool THCPByteTensor_init(PyObject *module);
bool THCPStream_init(PyObject *module);
static std::vector<PyMethodDef> methods;
#if PY_MAJOR_VERSION == 2
PyMODINIT_FUNC init_C()
#else
@ -729,13 +805,29 @@ PyMODINIT_FUNC PyInit__C()
#define ASSERT_TRUE(cmd) if (!(cmd)) return NULL
#endif
THPUtils_addPyMethodDefs(methods, TorchMethods);
#ifdef WITH_CUDNN
THPUtils_addPyMethodDefs(methods, THCUDNN_methods());
#endif
#if PY_MAJOR_VERSION == 2
ASSERT_TRUE(module = Py_InitModule("torch._C", TorchMethods));
ASSERT_TRUE(module = Py_InitModule("torch._C", methods.data()));
#else
static struct PyModuleDef torchmodule = {
PyModuleDef_HEAD_INIT,
"torch._C",
NULL,
-1,
methods.data()
};
ASSERT_TRUE(module = PyModule_Create(&torchmodule));
#endif
ASSERT_TRUE(THPGenerator_init(module));
ASSERT_TRUE(THPException_init(module));
ASSERT_TRUE(THPSize_init(module));
ASSERT_TRUE(THPVariable_initModule(module));
ASSERT_TRUE(THPFunction_initModule(module));
ASSERT_TRUE(THPEngine_initModule(module));
ASSERT_TRUE(THPDoubleStorage_init(module));
ASSERT_TRUE(THPFloatStorage_init(module));
@ -776,6 +868,12 @@ PyMODINIT_FUNC PyInit__C()
ASSERT_TRUE(THCPShortTensor_init(module));
ASSERT_TRUE(THCPCharTensor_init(module));
ASSERT_TRUE(THCPByteTensor_init(module));
ASSERT_TRUE(THCPStream_init(module));
#endif
#ifdef WITH_CUDNN
ASSERT_TRUE(THCUDNNModule_initModule(module));
#endif
THPDefaultGenerator = (THPGenerator*)THPGenerator_New();

View File

@ -8,6 +8,7 @@ extern THPGenerator *THPDefaultGenerator;
#ifdef _THP_CORE
bool THPModule_tensorCopy(PyObject *dst, PyObject *src);
bool THPModule_isTensor(PyObject *obj);
#endif
#endif

107
torch/csrc/Size.cpp Normal file
View File

@ -0,0 +1,107 @@
#include "Size.h"
#include <string>
#include "THP.h"
PyObject* THPSizeClass = NULL;
struct THPSize {
PyTupleObject tuple;
};
PyObject * THPSize_New(int dim, long *sizes)
{
PyTypeObject* type = (PyTypeObject*)THPSizeClass;
PyObject* self = type->tp_alloc(type, dim);
if (!self) {
return NULL;
}
for (int i = 0; i < dim; ++i) {
PyTuple_SET_ITEM(self, i, PyLong_FromLong(sizes[i]));
}
return self;
}
static PyObject * THPSize_pynew(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *self = PyTuple_Type.tp_new(type, args, kwargs);
if (self) {
for (Py_ssize_t i = 0; i < PyTuple_Size(self); ++i) {
PyObject *item = PyTuple_GET_ITEM(self, i);
if (!THPUtils_checkLong(item)) {
Py_DECREF(self);
return PyErr_Format(PyExc_TypeError, "torch.Size() takes an iterable of 'int' (item %zd is '%s')",
i, Py_TYPE(item)->tp_name);
}
}
}
return self;
}
static PyObject * THPSize_repr(THPSize *self)
{
std::string repr("torch.Size([");
for (Py_ssize_t i = 0; i < PyTuple_Size((PyObject*)self); ++i) {
if (i != 0) {
repr += ", ";
}
repr += std::to_string(PyLong_AsLong(PyTuple_GET_ITEM(self, i)));
}
repr += "])";
#if PY_MAJOR_VERSION == 2
return PyString_FromString(repr.c_str());
#else
return PyUnicode_FromString(repr.c_str());
#endif
}
PyTypeObject THPSizeType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch.Size", /* tp_name */
sizeof(THPSize), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)THPSize_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
&PyTuple_Type, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPSize_pynew, /* tp_new */
};
bool THPSize_init(PyObject *module)
{
THPSizeClass = (PyObject*)&THPSizeType;
if (PyType_Ready(&THPSizeType) < 0)
return false;
Py_INCREF(&THPSizeType);
PyModule_AddObject(module, "Size", (PyObject *)&THPSizeType);
return true;
}

16
torch/csrc/Size.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef THP_SIZE_INC
#define THP_SIZE_INC
#include <Python.h>
extern PyObject *THPSizeClass;
#define THPSize_Check(obj) ((PyObject*)Py_TYPE(obj) == THPSizeClass)
PyObject * THPSize_New(int dim, long *sizes);
#ifdef _THP_CORE
bool THPSize_init(PyObject *module);
#endif
#endif

View File

@ -5,7 +5,6 @@
#include <TH/TH.h>
#include <libshm.h>
#include "THP.h"
#include "byte_order.h"
#include "generic/Storage.cpp"
#include <TH/THGenerateAllTypes.h>

View File

@ -23,12 +23,17 @@
#include "Generator.h"
#include "Storage.h"
#include "Tensor.h"
#include "Size.h"
#include "Module.h"
#include "Types.h"
#include "utils.h" // This requires defined Storage and Tensor types
#include "byte_order.h"
#ifdef _THP_CORE
#include "serialization.h"
#include "allocators.h"
#include "autograd/autograd.h"
#endif
#endif

40
torch/csrc/Types.h Normal file
View File

@ -0,0 +1,40 @@
#ifndef THP_TYPES_INC
#define THP_TYPES_INC
#include <Python.h>
#include <cstddef>
namespace torch {
typedef struct THVoidStorage
{
void *data;
ptrdiff_t size;
int refcount;
char flag;
void *allocator;
void *allocatorContext;
THVoidStorage *view;
} THVoidStorage;
typedef struct THVoidTensor
{
long *size;
long *stride;
int nDimension;
THVoidStorage *storage;
ptrdiff_t storageOffset;
int refcount;
char flag;
} THVoidTensor;
struct THPVoidTensor {
PyObject_HEAD
THVoidTensor *cdata;
char device_type;
char data_type;
};
} // namespace torch
#endif

View File

@ -0,0 +1,10 @@
#ifndef THP_AUTOGRAD_H
#define THP_AUTOGRAD_H
PyObject * THPAutograd_initExtension(PyObject *_unused);
#include "variable.h"
#include "function.h"
#include "engine.h"
#endif

View File

@ -0,0 +1,285 @@
#include <Python.h>
#include <structmember.h>
#include <vector>
#include <unordered_map>
#include <deque>
#include <set>
#include "THP.h"
PyObject *THPEngineClass = NULL;
// used for topological sort
using dependencies_type = std::unordered_map<THPFunction *, int>;
// stores gradient buffers
using grad_list_type = std::vector<THPObjectPtr>;
// used for need_copy set (to ensure correct gradient buffering)
using buffer_set_type = std::set<std::pair<size_t, int>>;
// gradient buffer - a list of gradient tensors + id
struct grad_buffer_type: public grad_list_type {
template<typename... Args>
grad_buffer_type(size_t buffer_id, Args&&... args):
grad_list_type(std::forward<Args>(args)...),
buffer_id(buffer_id) {};
grad_buffer_type(grad_buffer_type &&other):
grad_list_type(std::move(other)),
buffer_id(other.buffer_id) {};
grad_buffer_type& operator=(grad_buffer_type &&other) {
grad_list_type::operator=(std::move(other));
buffer_id = other.buffer_id;
return *this;
};
size_t buffer_id;
};
// Computes graph dependencies (using a super simple topological sort)
dependencies_type THPEngine_compute_dependencies(THPFunction *function)
{
dependencies_type dependencies;
std::set<THPFunction *> seen;
std::vector<THPFunction *> queue = {function};
while (queue.size() > 0) {
THPFunction *fn = queue.back(); queue.pop_back();
for (int i = 0; i < fn->num_inputs; i++) {
THPFunction *prev_fn = (THPFunction*)fn->previous_functions[i].get();
// We can ignore variables (their backprop is called every time we have
// gradient ready) and functions that don't require gradient.
if (THPVariable_Check((PyObject*)prev_fn) || !prev_fn->requires_grad)
continue;
dependencies[prev_fn] += 1;
if (seen.count(prev_fn) == 0) {
seen.insert(prev_fn);
queue.push_back(prev_fn);
}
}
}
return dependencies;
}
// Frees backward dependency and returns true if prev_fn is ready for backward
bool THPEngine_free_backward_dependency(dependencies_type &dependencies,
THPFunction *prev_fn)
{
if (--dependencies[prev_fn] == 0) {
dependencies.erase(prev_fn);
return true;
}
return false;
}
// Accumulates d_prev_fn gradient tensor into output_idx position of prev_grad buffer
bool THPEngine_add_grad(buffer_set_type &need_copy, grad_buffer_type &prev_grad,
int output_nr, PyObject *d_prev_fn)
{
// TODO: we should probably clean up need_copy, because most tensors will
// probably never hit the else clause
auto set_key = std::make_pair(prev_grad.buffer_id, output_nr);
if (!prev_grad[output_nr]) {
Py_INCREF(d_prev_fn);
prev_grad[output_nr] = d_prev_fn;
need_copy.insert(set_key);
} else {
PyObject *grad_tensor = prev_grad[output_nr];
if (need_copy.count(set_key) != 0) {
grad_tensor = PyObject_CallMethod(grad_tensor, "clone", "");
if (!grad_tensor)
return false;
need_copy.erase(set_key);
prev_grad[output_nr] = grad_tensor;
}
THPObjectPtr result = PyObject_CallMethod(grad_tensor, "add_", "O", d_prev_fn);
if (!result)
return false;
}
return true;
}
// Main backward function
PyObject *THPEngine_run_backward(THPEngine *self, PyObject *args, PyObject *kwargs)
{
THPVariable *variable = NULL;
PyObject *grad_variable = NULL;
unsigned char retain_variables = 0;
size_t next_buf_id = 0;
const char *accepted_kwargs[] = {"variable", "grad_variable",
"retain_variables", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOb", (char**)accepted_kwargs,
&variable, &grad_variable, &retain_variables))
return NULL;
PyObject *retain_variables_obj = retain_variables ? Py_True : Py_False;
// If someone calls .backward() on a leaf, it's simple...
if (variable->creator == NULL) {
THPObjectPtr result = PyObject_CallMethod((PyObject*)variable,
"_do_backward", "(O)O", grad_variable, retain_variables_obj);
Py_RETURN_NONE;
}
std::deque<std::pair<THPFunction *, grad_buffer_type>> ready;
std::unordered_map<THPFunction *, grad_buffer_type> not_ready;
buffer_set_type need_copy;
// Initialize the queue
grad_buffer_type buf(next_buf_id++, ((THPFunction*)variable->creator)->num_outputs);
Py_INCREF(grad_variable);
buf[variable->output_nr] = grad_variable;
ready.emplace_front((THPFunction*)variable->creator, std::move(buf));
dependencies_type dependencies = THPEngine_compute_dependencies((THPFunction*)variable->creator);
while (ready.size() > 0) {
std::pair<THPFunction *, grad_buffer_type> ready_pair =
std::move(ready.back()); ready.pop_back();
THPFunction *fn = ready_pair.first;
grad_buffer_type &fn_grad_buffer = ready_pair.second;
// Prepare a tuple for a call to _do_backward
THPObjectPtr grad_tuple = PyTuple_New(fn_grad_buffer.size());
if (!grad_tuple) return NULL;
for (unsigned int i = 0; i < fn_grad_buffer.size(); i++) {
PyObject *_grad;
if (fn_grad_buffer[i]) {
_grad = fn_grad_buffer[i].release();
} else {
_grad = Py_None;
Py_INCREF(_grad);
}
PyTuple_SET_ITEM(grad_tuple.get(), i, _grad);
}
// Call _do_backward and make sure grad_input is sound
THPObjectPtr grad_input = PyObject_CallMethod((PyObject*)fn, "_do_backward",
"OO", grad_tuple.get(), retain_variables_obj);
if (!grad_input)
return NULL;
THPUtils_assert(PyTuple_Check(grad_input), "error, _do_backward should "
"return a tuple, but got %s", THPUtils_typename(grad_input));
int num_grads = PyTuple_GET_SIZE(grad_input.get());
// Process tensors inside grad_input
for (int i = 0; i < num_grads; i++) {
PyObject *prev_obj = fn->previous_functions[i].get();
PyObject *grad_prev = PyTuple_GET_ITEM(grad_input.get(), i);
// A shortcut for variables - there's no need to buffer gradients for them
// as their _do_backward is super fast (and we can save memory).
// TODO: this might call leaf variable hooks multiple times
if (THPVariable_Check(prev_obj)) {
THPVariable *prev_var = (THPVariable*)prev_obj;
if (prev_var->requires_grad) {
THPObjectPtr ret = PyObject_CallMethod(prev_obj, "_do_backward",
"(O)O", grad_prev, retain_variables_obj);
if (!ret) return NULL;
}
continue;
}
// No need to do any work for functions that don't require gradients
THPFunction *prev_fn = (THPFunction*)prev_obj;
if (!prev_fn->requires_grad)
continue;
// Check if the function is ready for backward and see if it has any
// buffers allocated
int output_idx = fn->previous_functions[i].output_nr;
bool is_ready = THPEngine_free_backward_dependency(dependencies, prev_fn);
auto not_ready_it = not_ready.find(prev_fn);
if (is_ready) {
// this is only a temporary, so no need for a correct id
grad_buffer_type prev_buffer(-1);
if (not_ready_it == not_ready.end()) {
// The function is ready and no buffers have been allocated for it.
prev_buffer = grad_buffer_type(next_buf_id++, prev_fn->num_outputs);
Py_INCREF(grad_prev);
prev_buffer[output_idx] = grad_prev;
} else {
// The function is ready and it already has a buffer allocated.
prev_buffer = std::move(not_ready_it->second);
not_ready.erase(not_ready_it);
if (!THPEngine_add_grad(need_copy, prev_buffer, output_idx, grad_prev))
return NULL;
}
// Put the function into the ready queue.
ready.emplace_front(prev_fn, std::move(prev_buffer));
} else {
// Allocate a buffer if necessary
if (not_ready_it == not_ready.end()) {
int num_prev_fn_outputs = prev_fn->num_outputs;
std::tie(not_ready_it, std::ignore) =
not_ready.emplace(prev_fn, grad_buffer_type(next_buf_id++, num_prev_fn_outputs));
}
// Accumulate the gradient into the buffer
grad_buffer_type &grad_buffer = not_ready_it->second;
if (!THPEngine_add_grad(need_copy, grad_buffer, output_idx, grad_prev))
return NULL;
}
}
}
Py_RETURN_NONE;
}
PyObject *THPEngine_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
return type->tp_alloc(type, 0);
}
static struct PyMethodDef THPEngine_methods[] = {
{(char*)"run_backward", (PyCFunction)THPEngine_run_backward, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL}
};
PyTypeObject THPEngineType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._EngineBase", /* tp_name */
sizeof(THPEngine), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPEngine_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPEngine_new /* tp_new */
};
bool THPEngine_initModule(PyObject *module)
{
if (PyType_Ready(&THPEngineType) < 0)
return false;
Py_INCREF(&THPEngineType);
PyModule_AddObject(module, "_ImperativeEngine", (PyObject *)&THPEngineType);
return true;
}

View File

@ -0,0 +1,10 @@
#ifndef THP_ENGINE_H
#define THP_ENGINE_H
struct THPEngine {
PyObject_HEAD
};
bool THPEngine_initModule(PyObject *module);
#endif

View File

@ -0,0 +1,606 @@
#include <Python.h>
#include <structmember.h>
#include <unordered_map>
#include "THP.h"
#ifdef WITH_CUDA
#include "cuda/AutoGPU.h"
#endif
PyObject *THPFunctionClass = NULL;
static void THPFunction_dealloc(THPFunction* self)
{
self->num_inputs = 0;
self->num_outputs = 0;
Py_XDECREF(self->needs_input_grad);
Py_XDECREF(self->saved_variables);
Py_XDECREF(self->backward_hooks);
Py_XDECREF(self->to_save);
Py_XDECREF(self->shared_pairs);
Py_XDECREF(self->non_differentiable);
Py_XDECREF(self->dirty_tensors);
THPFunctionPtr *previous_functions = self->previous_functions;
self->previous_functions = NULL;
delete[] previous_functions;
delete self->output_info;
Py_TYPE(self)->tp_free((PyObject*)self);
}
// Traverse and clear are required for supporting Python's GC cycle handling.
static int THPFunction_traverse(THPFunction *self, visitproc visit, void *arg)
{
Py_VISIT(self->needs_input_grad);
Py_VISIT(self->saved_variables);
Py_VISIT(self->backward_hooks);
for (int i = 0; i < self->num_inputs; i++)
Py_VISIT(self->previous_functions[i].get());
Py_VISIT(self->to_save);
Py_VISIT(self->shared_pairs);
Py_VISIT(self->non_differentiable);
Py_VISIT(self->dirty_tensors);
return 0;
}
static int THPFunction_clear(THPFunction *self)
{
self->num_inputs = 0;
self->num_outputs = 0;
Py_CLEAR(self->needs_input_grad);
Py_CLEAR(self->saved_variables);
Py_CLEAR(self->backward_hooks);
Py_CLEAR(self->to_save);
Py_CLEAR(self->shared_pairs);
Py_CLEAR(self->non_differentiable);
Py_CLEAR(self->dirty_tensors);
THPFunctionPtr *previous_functions = self->previous_functions;
self->previous_functions = NULL;
delete[] previous_functions;
return 0;
}
PyObject *THPFunction_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
THPFunction *self = (THPFunction*)type->tp_alloc(type, 0);
if (!self)
return NULL;
// Python zero-initializes the object memory, so there's no need to initialize
// most fields
self->num_outputs = -1;
return (PyObject*)self;
}
using t2var_type = std::unordered_map<PyObject *, THPVariable *>;
static bool _mark_dirty(THPFunction *self, t2var_type &t2var)
{
// Increase versions of modified tensors
if (self->dirty_tensors) {
THPUtils_assertRet(false, PyTuple_Check(self->dirty_tensors), "autograd "
"internal error: dirty_tensors attribute is expected to be a tuple "
"but is %s", THPUtils_typename(self->dirty_tensors));
Py_ssize_t num_dirty = PyTuple_GET_SIZE(self->dirty_tensors);
for (int i = 0; i < num_dirty; i++) {
PyObject *tensor = PyTuple_GET_ITEM(self->dirty_tensors, i);
THPVariable *variable;
try {
variable = t2var.at(tensor);
} catch (std::out_of_range &e) {
THPUtils_assertRet(false, THPModule_isTensor(tensor), "mark_dirty can "
"only accept tensors, but argument %d is of type %s", i,
THPUtils_typename(tensor));
THPUtils_setError("mark_dirty only accepts input tensors, but "
"argument %d isn't one", i);
return false;
}
auto &v_counter = *variable->version_counter;
THPUtils_assert(v_counter.refcnt() == 1, "in-place operations can be "
"only used on variables that don't share storage with any other "
"variables, but detected that there are %d objects sharing it",
v_counter.refcnt());
v_counter++;
}
// We're not going to ever need this so let's remove references now
Py_DECREF(self->dirty_tensors);
self->dirty_tensors = NULL;
}
return true;
}
static bool _wrap_outputs(THPFunction *self, t2var_type &t2var,
PyObject *raw_output, PyObject *outputs)
{
// Wrap outputs in Variables
Py_ssize_t num_outputs = PyTuple_GET_SIZE(raw_output);
self->output_info = new std::vector<output_info_type>(num_outputs);
auto &output_info = *self->output_info;
for (int i = 0; i < num_outputs; i++) {
PyObject *output = PyTuple_GET_ITEM(raw_output, i);
THPVariable *output_var;
auto it = t2var.find(output);
if (it == t2var.end()) {
// A completely new tensor - just wrap it and continue
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self, self->requires_grad);
} else {
// If one of the outputs was also an input tensor it's a bit more complicated.
THPVariable *input_var = it->second;
if (input_var->creator) {
// If it's not a leaf we want to move it in the graph so backprop
// will be computed correctly:
// creator <- variable <- self ==> creator <- self <- variable
Py_INCREF(input_var);
output_var = input_var;
Py_DECREF(input_var->creator);
Py_INCREF(self);
input_var->creator = (PyObject*)self;
} else {
// If it's a leaf it's not as simple. Leaves will raise an error in
// backward if they've been changed, or they're no longer leaves. In
// some cases (e.g. broadcast) it's perfectly valid to return the same
// tensor untouched, so instead of moving it we're going to create a
// copy and join their version counters. This works for broadcast,
// and if the use wasn't valid we'll still detect an error, because
// the leaf will have a version != 0.
output_var = (THPVariable*)THPVariable_New(output, (PyObject*)self, self->requires_grad);
if (!output_var) return false;
output_var->version_counter->join_with(*input_var->version_counter);
}
}
if (!output_var)
return false;
torch::THPVoidTensor *output_obj = (torch::THPVoidTensor*)output_var->data;
torch::THVoidTensor *output_tensor = output_obj->cdata;
long ndim = output_tensor->nDimension;
int device_id = -1;
THPObjectPtr is_cuda = PyObject_GetAttrString(output_var->data, "is_cuda");
if (is_cuda.get() == Py_True) {
THPObjectPtr device_id_obj = PyObject_CallMethod(output_var->data,
"get_device", "");
THPUtils_assertRet(false, THPUtils_checkLong(device_id_obj), "get_device "
"should return an int, but got %s", THPUtils_typename(device_id_obj));
device_id = THPUtils_unpackLong(device_id_obj);
}
output_info[i] = std::make_tuple(
(PyObject*)Py_TYPE(output_var->data),
device_id,
std::vector<long>(output_tensor->size, output_tensor->size + ndim)
);
t2var[output] = output_var;
output_var->output_nr = i;
PyTuple_SET_ITEM(outputs, i, (PyObject*)output_var);
}
return true;
}
static bool _save_variables(THPFunction*self, t2var_type &t2var)
{
// TODO: this can be stored without using python types
if (self->to_save) {
THPUtils_assertRet(false, PyTuple_Check(self->to_save), "autograd internal "
"error: to_save attribute is expected to be a tuple but is %s",
THPUtils_typename(self->to_save));
Py_ssize_t num_saved = PyTuple_GET_SIZE(self->to_save);
self->saved_variables = PyTuple_New(num_saved);
if (!self->saved_variables) return false;
for (int i = 0; i < num_saved; i++) {
PyObject *tensor = PyTuple_GET_ITEM(self->to_save, i);
THPVariable *variable;
try {
variable = t2var.at(tensor);
} catch(std::out_of_range &e) {
THPUtils_assertRet(false, THPModule_isTensor(tensor),
"save_for_backward can only save tensors, but argument %d is of "
"type %s", i, THPUtils_typename(tensor));
THPUtils_setError("save_for_backward can only save input or output "
"tensors, but argument %d doesn't satisfy this condition", i);
return false;
}
PyObject *tuple = PyTuple_New(2);
if (!tuple)
return false;
Py_INCREF(variable);
PyTuple_SET_ITEM(tuple, 0, (PyObject*)variable);
PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong(**variable->version_counter));
PyTuple_SET_ITEM(self->saved_variables, i, tuple);
}
// Free .to_save
Py_DECREF(self->to_save);
self->to_save = NULL;
}
return true;
}
static bool _join_version_counters(THPFunction *self, t2var_type &t2var)
{
if (self->shared_pairs) {
THPUtils_assertRet(false, PyTuple_Check(self->shared_pairs), "autograd internal "
"error: shared_pairs attribute is expected to be a tuple but is %s",
THPUtils_typename(self->shared_pairs));
Py_ssize_t num_shared = PyTuple_GET_SIZE(self->shared_pairs);
for (int i = 0; i < num_shared; i++) {
PyObject *shared_tuple = PyTuple_GET_ITEM(self->shared_pairs, i);
THPUtils_assertRet(false, PyTuple_Check(shared_tuple), "mark_shared_storages "
"accepts a number of pairs, but one of the arguments is of type %s",
THPUtils_typename(shared_tuple));
THPUtils_assertRet(false, PyTuple_GET_SIZE(shared_tuple) == 2,
"mark_shared_storages accepts pairs, but argument %d is a tuple of "
"%d elements", i, PyTuple_GET_SIZE(shared_tuple));
// Now we're sure it's really a pair!
THPVariable *v1, *v2;
try {
v1 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 0));
v2 = t2var.at(PyTuple_GET_ITEM(shared_tuple, 1));
} catch(std::out_of_range &e) {
// One tuple items wasn't present in t2var, so there are two cases:
// 1. it's not a tensor
// 2. it's not an input nor an output
PyObject *t1 = PyTuple_GET_ITEM(shared_tuple, 0);
PyObject *t2 = PyTuple_GET_ITEM(shared_tuple, 1);
THPUtils_assertRet(false, THPModule_isTensor(t1) && THPModule_isTensor(t2),
"mark_shared_storages accepts pairs of tensors, but one of them "
"contains %s and %s", THPUtils_typename(t1), THPUtils_typename(t2));
THPUtils_setError("mark_shared_storages only accepts pairs of input "
"and output tensors, but argument %d doesn't satify this "
"condition", i);
return false;
}
v2->version_counter->join_with(*v1->version_counter);
}
// Free .shared_pairs
Py_DECREF(self->shared_pairs);
self->shared_pairs = NULL;
}
return true;
}
static bool _mark_non_differentiable(THPFunction *self, t2var_type &t2var)
{
if (self->non_differentiable) {
THPUtils_assertRet(false, PyTuple_Check(self->non_differentiable), "autograd "
"internal error: non_differentiable attribute is expected to be a "
"tuple but is %s", THPUtils_typename(self->non_differentiable));
Py_ssize_t num_nondiff = PyTuple_GET_SIZE(self->non_differentiable);
for (int i = 0; i < num_nondiff; i++) {
PyObject *t = PyTuple_GET_ITEM(self->non_differentiable, i);
THPVariable *var;
try {
var = t2var.at(t);
THPUtils_assertRet(false, var->creator == (PyObject*)self,
"mark_non_differentiable only accepts output tensors, but "
"argument %d isn't an output", i);
} catch (std::out_of_range &e) {
THPUtils_assertRet(false, THPModule_isTensor(t), "mark_non_differentiable "
"only accepts tensor arguments, but got %s", THPUtils_typename(t));
THPUtils_setError("mark_non_differentiable only accepts function "
"outputs");
return false;
}
var->requires_grad = 0;
}
Py_DECREF(self->non_differentiable);
self->non_differentiable = NULL;
}
return true;
}
PyObject *THPFunction_do_forward(THPFunction *self, PyObject *inputs)
{
Py_ssize_t num_inputs = inputs ? PyTuple_GET_SIZE(inputs) : 0;
// Unpack inputs and check if they require gradients or are volatile
THPObjectPtr unpacked_inputs = PyTuple_New(num_inputs);
self->needs_input_grad = PyTuple_New(num_inputs);
self->requires_grad = false;
bool is_volatile = false;
for (int i = 0; i < num_inputs; i++) {
PyObject *input = PyTuple_GET_ITEM(inputs, i);
THPUtils_assert(THPVariable_Check(input), "expected a Variable argument, "
"but got %s", THPUtils_typename(input));
THPVariable *variable = (THPVariable*)input;
// Unpack the variable - SET_ITEM steals a reference so INCREF it
Py_INCREF(variable->data);
PyTuple_SET_ITEM(unpacked_inputs.get(), i, variable->data);
// We can't move this to C, because it's going to be accessed from user code.
PyTuple_SET_ITEM(self->needs_input_grad, i, PyBool_FromLong(variable->requires_grad));
is_volatile = is_volatile || variable->is_volatile;
self->requires_grad = self->requires_grad || variable->requires_grad;
}
// Now we're ready to call a forward (implemented in Python)
THPObjectPtr forward_fn = PyObject_GetAttrString((PyObject*)self, "forward");
THPUtils_assert(forward_fn.get(), "function %s doesn't implement a required "
"'forward' method", THPUtils_typename((PyObject*)self));
THPObjectPtr raw_output = PyObject_CallObject(forward_fn, unpacked_inputs);
if (!raw_output)
return NULL;
// Wrap output in a tuple, if it's not one already
if (!PyTuple_Check(raw_output.get())) {
PyObject *tuple = PyTuple_New(1);
if (!tuple)
return NULL;
PyTuple_SET_ITEM(tuple, 0, raw_output.release());
raw_output = tuple;
}
int num_outputs = PyTuple_GET_SIZE(raw_output.get());
THPObjectPtr outputs = PyTuple_New(num_outputs);
if (!outputs)
return NULL;
if (is_volatile) {
// If one of the inputs is volatile let's take a fast path - we want
// minimize the overhead of inference
for (int i = 0; i < num_outputs; i++) {
PyObject *output = PyTuple_GET_ITEM(raw_output.get(), i);
THPVariable *output_var = (THPVariable*)THPVariable_NewVolatile(output);
if (!output_var)
return NULL;
output_var->output_nr = i;
PyTuple_SET_ITEM(outputs.get(), i, (PyObject*)output_var);
}
} else {
// We're not volatile, so there's a lot of bookkeeping to do...
self->num_inputs = num_inputs;
self->num_outputs = num_outputs;
t2var_type t2var;
// Save previous functions and initialize t2var map
self->previous_functions = new THPFunctionPtr[num_inputs];
for (int i = 0; i < num_inputs; i++) {
THPVariable *input_var = (THPVariable*)PyTuple_GET_ITEM(inputs, i);
t2var.emplace(input_var->data, input_var);
// Save previous function in a helper class (that has a smart pointer to
// the object and remembers which output did we use.
PyObject *prev_fn = input_var->creator ? input_var->creator : (PyObject*)input_var;
Py_INCREF(prev_fn);
self->previous_functions[i] = THPFunctionPtr(prev_fn, input_var->output_nr);
}
if (!_mark_dirty(self, t2var))
return NULL;
if (!_wrap_outputs(self, t2var, raw_output, outputs))
return NULL;
if (!_join_version_counters(self, t2var))
return NULL;
if (!_save_variables(self, t2var))
return NULL;
if (!_mark_non_differentiable(self, t2var))
return NULL;
}
if (num_outputs == 1) {
PyObject *output = PyTuple_GET_ITEM(outputs.get(), 0);
Py_INCREF(output);
return output;
}
return outputs.release();
}
PyObject * THPFunction_do_backward(THPFunction *self, PyObject *args)
{
Py_ssize_t num_args = args ? PyTuple_GET_SIZE(args) : 0;
THPUtils_assert(num_args == 2, "_do_backward expects exactly two arguments");
PyObject *raw_grad_output = PyTuple_GET_ITEM(args, 0);
PyObject *retain_variables = PyTuple_GET_ITEM(args, 1);
if (!PyTuple_Check(raw_grad_output) || !PyBool_Check(retain_variables)) {
THPUtils_invalidArguments(args, "_do_backward", 1, "(tuple, bool)");
return NULL;
}
int num_grad_output = PyTuple_GET_SIZE(raw_grad_output);
THPObjectPtr grad_output = PyTuple_New(num_grad_output);
if (!grad_output) return NULL;
#ifdef WITH_CUDA
THCPAutoGPU gpu_guard(-1);
#endif
for (int i = 0; i < num_grad_output; i++) {
PyObject *grad = PyTuple_GET_ITEM(raw_grad_output, i);
// If there's no gradient we have to allocate a buffer ourselves
if (grad == Py_None) {
auto &info = (*self->output_info)[i];
PyObject *tensor_cls = std::get<0>(info);
#ifdef WITH_CUDA
gpu_guard.setDevice(std::get<1>(info));
#endif
std::vector<long> &sizes = std::get<2>(info);
THPObjectPtr grad_size = THPSize_New(sizes.size(), sizes.data());
THPObjectPtr new_grad = PyObject_CallFunctionObjArgs(tensor_cls, grad_size.get(), NULL);
if (!new_grad) return NULL;
THPObjectPtr result = PyObject_CallMethod(new_grad.get(), "zero_", "");
if (!result) return NULL;
grad = new_grad.release();
} else {
Py_INCREF(grad);
}
PyTuple_SET_ITEM(grad_output.get(), i, grad);
}
THPObjectPtr backward_fn = PyObject_GetAttrString((PyObject*)self, "backward");
THPUtils_assert(backward_fn.get(), "function %s doesn't implement a required "
"'backward' method", THPUtils_typename((PyObject*)self));
THPObjectPtr grad_input = PyObject_CallObject(backward_fn, grad_output.get());
if (!grad_input)
return NULL;
if (!PyTuple_Check(grad_input)) {
PyObject *grad_tuple = PyTuple_New(1);
if (!grad_tuple)
return NULL;
PyTuple_SET_ITEM(grad_tuple, 0, grad_input.release());
grad_input = grad_tuple;
}
int num_grads = PyTuple_GET_SIZE(grad_input.get());
int num_prev_fns = self->num_inputs;
THPUtils_assert(num_grads == num_prev_fns, "%s returned an invalid number of "
"gradient tensors (expected %d, but got %d)", THPUtils_typename(self),
num_prev_fns, num_grads);
if (self->backward_hooks) {
PyObject *key, *value;
Py_ssize_t pos = 0;
THPUtils_assert(PyDict_Check(self->backward_hooks), "backward_hooks "
"attribute has to be a dictionary");
while (PyDict_Next(self->backward_hooks, &pos, &key, &value)) {
THPObjectPtr result = PyObject_CallFunctionObjArgs(value,
grad_input.get(), grad_output.get(), NULL);
if (!result)
return NULL;
}
}
if (retain_variables == Py_False) {
Py_XDECREF(self->saved_variables);
self->saved_variables = NULL;
self->has_freed_buffers = 1;
}
return grad_input.release();
}
PyObject *THPFunction_saved_tensors(THPFunction *self, void *_unused)
{
THPUtils_assert(!self->has_freed_buffers, "Trying to backward through the "
"graph second time, but the buffers have already been freed. Please "
"specify retain_variables=True when calling backward for the first time.");
if (!self->saved_variables)
return PyTuple_New(0);
Py_ssize_t num_saved = PyTuple_GET_SIZE(self->saved_variables);
THPObjectPtr saved_tensors = PyTuple_New(num_saved);
if (!saved_tensors)
return NULL;
for (int i = 0; i < num_saved; i++) {
PyObject *tuple = PyTuple_GET_ITEM(self->saved_variables, i);
long expected_version = THPUtils_unpackLong(PyTuple_GET_ITEM(tuple, 1));
THPVariable *variable = (THPVariable*)PyTuple_GET_ITEM(tuple, 0);
int current_version = **variable->version_counter;
THPUtils_assert(expected_version == current_version, "one of the variables "
"needed for gradient computation has been modified by an "
"inplace operation");
Py_INCREF(variable->data);
PyTuple_SET_ITEM(saved_tensors.get(), i, variable->data);
}
return saved_tensors.release();
}
PyObject *THPFunction_previous_functions(THPFunction *self, void *_unused)
{
THPObjectPtr previous_functions = PyTuple_New(self->num_inputs);
if (!previous_functions)
return NULL;
for (int i = 0; i < self->num_inputs; i++) {
THPObjectPtr fn_tuple = PyTuple_New(2);
if (!fn_tuple)
return NULL;
Py_INCREF(self->previous_functions[i].get());
PyTuple_SET_ITEM(fn_tuple.get(), 0, self->previous_functions[i].get());
PyTuple_SET_ITEM(fn_tuple.get(), 1, PyInt_FromLong(self->previous_functions[i].output_nr));
PyTuple_SET_ITEM(previous_functions.get(), i, fn_tuple.release());
}
return previous_functions.release();
}
typedef PyObject *(*getter)(PyObject *, void *);
typedef int (*setter)(PyObject *, PyObject *, void *);
static struct PyGetSetDef THPFunction_properties[] = {
{"saved_tensors", (getter)THPFunction_saved_tensors, NULL, NULL, NULL},
{"previous_functions", (getter)THPFunction_previous_functions, NULL, NULL, NULL},
{NULL}
};
static struct PyMemberDef THPFunction_members[] = {
{(char*)"saved_variables", T_OBJECT, offsetof(THPFunction, saved_variables), 0, NULL},
{(char*)"backward_hooks", T_OBJECT, offsetof(THPFunction, backward_hooks), 0, NULL},
{(char*)"to_save", T_OBJECT, offsetof(THPFunction, to_save), 0, NULL},
{(char*)"shared_pairs", T_OBJECT, offsetof(THPFunction, shared_pairs), 0, NULL},
{(char*)"non_differentiable", T_OBJECT, offsetof(THPFunction, non_differentiable), 0, NULL},
{(char*)"dirty_tensors", T_OBJECT, offsetof(THPFunction, dirty_tensors), 0, NULL},
{(char*)"needs_input_grad", T_OBJECT, offsetof(THPFunction, needs_input_grad), 0, NULL},
{(char*)"requires_grad", T_BOOL, offsetof(THPFunction, requires_grad), 0, NULL},
{(char*)"num_inputs", T_INT, offsetof(THPFunction, num_inputs), 0, NULL},
{(char*)"num_outputs", T_INT, offsetof(THPFunction, num_outputs), 0, NULL},
{NULL}
};
static struct PyMethodDef THPFunction_methods[] = {
{(char*)"_do_forward", (PyCFunction)THPFunction_do_forward, METH_VARARGS, NULL},
{(char*)"_do_backward", (PyCFunction)THPFunction_do_backward, METH_VARARGS, NULL},
{NULL}
};
PyTypeObject THPFunctionType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._FunctionBase", /* tp_name */
sizeof(THPFunction), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPFunction_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
NULL, /* tp_doc */
(traverseproc)THPFunction_traverse, /* tp_traverse */
(inquiry)THPFunction_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPFunction_methods, /* tp_methods */
THPFunction_members, /* tp_members */
THPFunction_properties, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPFunction_new /* tp_new */
};
bool THPFunction_initModule(PyObject *module)
{
if (PyType_Ready(&THPFunctionType) < 0)
return false;
Py_INCREF(&THPFunctionType);
PyModule_AddObject(module, "_FunctionBase", (PyObject *)&THPFunctionType);
return true;
}

View File

@ -0,0 +1,55 @@
#ifndef THP_FUNCTION_H
#define THP_FUNCTION_H
struct THPFunction;
struct THPFunctionPtr: public THPObjectPtr {
THPFunctionPtr(): THPObjectPtr(nullptr), output_nr(-1) {};
THPFunctionPtr(PyObject *fn, int output_nr):
THPObjectPtr(fn), output_nr(output_nr) {};
THPFunctionPtr(THPFunction *fn, int output_nr):
THPObjectPtr((PyObject*)fn), output_nr(output_nr) {};
THPFunctionPtr(THPFunctionPtr &&other):
THPObjectPtr(std::move(other)), output_nr(other.output_nr) {}
THPPointer& operator =(THPFunctionPtr &&other) {
output_nr = other.output_nr;
THPObjectPtr::operator=(std::move(other));
return *this;
}
int output_nr;
};
// (class, gpu id, sizes)
using output_info_type = std::tuple<PyObject *, int, std::vector<long>>;
struct THPFunction {
PyObject_HEAD
PyObject *needs_input_grad;
PyObject *saved_variables;
PyObject *backward_hooks;
PyObject *to_save;
PyObject *shared_pairs;
PyObject *non_differentiable;
PyObject *dirty_tensors;
THPFunctionPtr *previous_functions;
std::vector<output_info_type> *output_info;
int num_inputs;
int num_outputs;
char requires_grad;
char has_freed_buffers;
};
bool THPFunction_initModule(PyObject *module);
extern PyObject *THPFunctionClass;
#define THPFunction_Check(obj) PyObject_IsInstance(obj, THPFunctionClass)
#endif

View File

@ -0,0 +1,21 @@
#include <Python.h>
#include "THP.h"
PyObject * THPAutograd_initExtension(PyObject *_unused)
{
PyObject *autograd_module = PyImport_ImportModule("torch.autograd");
THPUtils_assert(autograd_module, "class loader couldn't access "
"torch.autograd module");
PyObject *autograd_dict = PyModule_GetDict(autograd_module);
THPVariableClass = PyMapping_GetItemString(autograd_dict,(char*)"Variable");
THPFunctionClass = PyMapping_GetItemString(autograd_dict,(char*)"Function");
THPUtils_assert(THPVariableClass, "couldn't find Variable class in "
"torch.autograd module");
THPUtils_assert(THPFunctionClass, "couldn't find Function class in "
"torch.autograd module");
Py_RETURN_TRUE;
}

View File

@ -0,0 +1,265 @@
#include <Python.h>
#include <structmember.h>
#include "THP.h"
PyObject *THPVariableClass = NULL;
constexpr size_t CACHE_SIZE = 100000;
static THPVariable *cached_variables[CACHE_SIZE];
static size_t num_cached;
// This helper steals a reference to data and creator
static inline THPVariable * pop_cache(PyObject *data, PyObject *creator, char requires_grad)
{
THPVariable *self = cached_variables[--num_cached];
PyObject_Init((PyObject*)self, Py_TYPE(self));
PyObject_GC_Track(self);
self->is_volatile = 0;
self->version_counter = new THPVariableVersion();
self->grad = NULL;
self->backward_hooks = NULL;
self->requires_grad = requires_grad;
self->data = data;
self->creator = creator;
return self;
}
// This function DOES NOT steal a reference to data
PyObject * THPVariable_NewVolatile(PyObject *data)
{
THPVariable *variable;
if (num_cached > 0) {
Py_INCREF(data);
variable = pop_cache(data, NULL, 0);
} else {
variable = (THPVariable*)PyObject_CallFunctionObjArgs(THPVariableClass, data, NULL);
}
((THPVariable*)variable)->is_volatile = 1;
return (PyObject*)variable;
}
// This function DOES NOT steal a reference to data and creator
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad)
{
if (num_cached > 0) {
Py_INCREF(data);
Py_INCREF(creator);
return (PyObject*)pop_cache(data, creator, requires_grad);
}
return PyObject_CallFunction(THPVariableClass, "OObb", data, creator, (char)0, requires_grad);
}
static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
{
Py_VISIT(self->creator);
Py_VISIT(self->data);
Py_VISIT(self->grad);
Py_VISIT(self->backward_hooks);
return 0;
}
static int THPVariable_clear(THPVariable *self)
{
Py_CLEAR(self->creator);
Py_CLEAR(self->data);
Py_CLEAR(self->grad);
Py_CLEAR(self->backward_hooks);
return 0;
}
static void THPVariable_dealloc(THPVariable* self)
{
Py_XDECREF(self->creator);
Py_XDECREF(self->data);
Py_XDECREF(self->grad);
Py_XDECREF(self->backward_hooks);
delete self->version_counter;
self->version_counter = nullptr;
// We don't want to cache any subclasses
if ((PyObject*)Py_TYPE(self) == THPVariableClass && num_cached < CACHE_SIZE) {
PyObject_GC_UnTrack(self);
cached_variables[num_cached++] = self;
// Variable class is defined in Python code, and as such has a
// Py_TPFLAGS_HEAPTYPE flag set, so python DECREFs the class at each
// object dealloc.
Py_INCREF(Py_TYPE(self));
} else {
Py_TYPE(self)->tp_free((PyObject*)self);
}
}
PyObject *THPVariable_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
THPVariable *self;
if ((PyObject*)type != THPVariableClass || num_cached == 0) {
self = (THPVariable*)type->tp_alloc(type, 0);
self->version_counter = new THPVariableVersion();
} else {
self = pop_cache(NULL, NULL, 0);
}
return (PyObject*)self;
}
int THPVariable_init(THPVariable *self, PyObject *args, PyObject *kwargs)
{
const char *accepted_args[] = {"data", "creator", "volatile", "requires_grad", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Obb", (char**)accepted_args,
&self->data, &self->creator, &self->is_volatile,
&self->requires_grad))
return -1;
Py_INCREF(self->data);
if (self->creator == Py_None)
self->creator = NULL;
Py_XINCREF(self->creator);
if ((self->creator && !THPFunction_Check(self->creator)) || !THPModule_isTensor(self->data))
return -1;
return 0;
}
PyObject * THPVariable_getstate(THPVariable *self)
{
THPUtils_assert(!self->creator, "serialization of non-leaf variables is not "
"implemented yet");
THPObjectPtr state = PyTuple_New(5);
if (!state)
return NULL;
Py_INCREF(self->data);
PyTuple_SET_ITEM(state.get(), 0, self->data);
PyObject *grad = self->grad ? self->grad : Py_None;
Py_INCREF(grad);
PyTuple_SET_ITEM(state.get(), 1, grad);
PyObject *backward_hooks = self->backward_hooks ? self->backward_hooks : Py_None;
Py_INCREF(backward_hooks);
PyTuple_SET_ITEM(state.get(), 2, backward_hooks);
PyTuple_SET_ITEM(state.get(), 3, PyBool_FromLong(self->requires_grad));
PyTuple_SET_ITEM(state.get(), 4, PyBool_FromLong(self->is_volatile));
return state.release();
}
PyObject * THPVariable_setstate(THPVariable *self, PyObject *state)
{
THPUtils_assert(!self->creator, "__setstate__ can be only called on leaf "
"variables");
THPUtils_assert(PyTuple_Check(state), "__setstate__ expects state to be a "
"tuple");
Py_ssize_t size = PyTuple_GET_SIZE(state);
THPUtils_assert(size == 5, "__setstate__ expects state tuple to have 5 "
"elements, but it has %d", size);
#define LOAD(NAME, IDX) \
Py_XDECREF(self->NAME); \
self->NAME = PyTuple_GET_ITEM(state, IDX) == Py_None ? NULL : PyTuple_GET_ITEM(state, IDX); \
Py_XINCREF(self->NAME);
THPUtils_assert(THPModule_isTensor(PyTuple_GET_ITEM(state, 0)), "first "
"element of variable state tuple has to be a tensor");
LOAD(data, 0);
LOAD(grad, 1);
LOAD(backward_hooks, 2);
#undef LOAD
PyObject *requires_grad_obj = PyTuple_GET_ITEM(state, 3);
PyObject *is_volatile_obj = PyTuple_GET_ITEM(state, 4);
THPUtils_assert(PyBool_Check(requires_grad_obj), "requires_grad "
"found in state was expected to be a bool, but got %s",
THPUtils_typename(requires_grad_obj));
THPUtils_assert(PyBool_Check(is_volatile_obj), "is_volatile "
"found in state was expected to be a bool, but got %s",
THPUtils_typename(is_volatile_obj));
self->requires_grad= requires_grad_obj == Py_True ? 1 : 0;
self->is_volatile = is_volatile_obj == Py_True ? 1 : 0;
Py_RETURN_NONE;
}
typedef PyObject *(*getter)(PyObject *, void *);
typedef int (*setter)(PyObject *, PyObject *, void *);
PyObject *THPVariable_get_version(THPVariable *self)
{
return PyInt_FromLong(**self->version_counter);
}
static struct PyGetSetDef THPVariable_properties[] = {
{"_version", (getter)THPVariable_get_version, NULL, NULL, NULL},
{NULL}
};
static struct PyMemberDef THPVariable_members[] = {
{(char*)"creator", T_OBJECT, offsetof(THPVariable, creator), 0, NULL},
{(char*)"data", T_OBJECT, offsetof(THPVariable, data), 0, NULL},
{(char*)"_grad", T_OBJECT, offsetof(THPVariable, grad), 0, NULL},
{(char*)"volatile", T_BOOL, offsetof(THPVariable, is_volatile), 0, NULL},
{(char*)"output_nr", T_INT, offsetof(THPVariable, output_nr), 0, NULL},
{(char*)"backward_hooks", T_OBJECT, offsetof(THPVariable, backward_hooks), 0, NULL},
{(char*)"_requires_grad", T_BOOL, offsetof(THPVariable, requires_grad), 0, NULL},
{NULL}
};
static struct PyMethodDef THPVariable_methods[] = {
{"__getstate__", (PyCFunction)THPVariable_getstate, METH_NOARGS, NULL},
{"__setstate__", (PyCFunction)THPVariable_setstate, METH_O, NULL},
{NULL}
};
PyTypeObject THPVariableType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._VariableBase", /* tp_name */
sizeof(THPVariable), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPVariable_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
NULL, /* tp_doc */
(traverseproc)THPVariable_traverse, /* tp_traverse */
(inquiry)THPVariable_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPVariable_methods, /* tp_methods */
THPVariable_members, /* tp_members */
THPVariable_properties, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)THPVariable_init, /* tp_init */
0, /* tp_alloc */
THPVariable_new /* tp_new */
};
bool THPVariable_initModule(PyObject *module)
{
if (PyType_Ready(&THPVariableType) < 0)
return false;
Py_INCREF(&THPVariableType);
PyModule_AddObject(module, "_VariableBase", (PyObject *)&THPVariableType);
return true;
}

View File

@ -0,0 +1,56 @@
#ifndef THP_VARIABLE_H
#define THP_VARIABLE_H
struct THPVariableVersion {
THPVariableVersion() {
version_block = new int[2];
version_block[0] = 0;
version_block[1] = 1;
};
int operator++(int) { return version_block[0]++; }
int operator*() { return *version_block; }
int refcnt() { return version_block[1]; }
void join_with(THPVariableVersion &other) {
cleanup();
version_block = other.version_block;
version_block[1]++;
}
void cleanup() {
if (--version_block[1])
return;
delete[] version_block;
version_block = nullptr;
}
~THPVariableVersion() { cleanup(); }
int *version_block;
};
struct THPVariable {
PyObject_HEAD
PyObject *creator;
PyObject *data;
PyObject *grad;
PyObject *backward_hooks;
THPVariableVersion *version_counter;
int output_nr;
char is_volatile;
char requires_grad;
};
bool THPVariable_initModule(PyObject *module);
extern PyObject *THPVariableClass;
PyObject * THPVariable_NewVolatile(PyObject *data);
PyObject * THPVariable_New(PyObject *data, PyObject *creator, char requires_grad);
#define THPVariable_Check(obj) \
(THPVariableClass && \
PyObject_IsInstance(obj, THPVariableClass))
#endif

View File

@ -1,5 +1,7 @@
#include "byte_order.h"
#include <string.h>
static inline uint16_t decodeUInt16LE(const uint8_t *data) {
return (data[0]<<0) | (data[1]<<8);
}
@ -79,3 +81,71 @@ void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order,
src += sizeof(double);
}
}
template<size_t size>
static void swapBytes(uint8_t *ptr)
{
uint8_t tmp;
for (size_t i = 0; i < size / 2; i++) {
tmp = ptr[i];
ptr[i] = ptr[size-i];
ptr[size-i] = tmp;
}
}
void THP_encodeInt16Buffer(uint8_t* dst, const int16_t* src, THPByteOrder order, size_t len)
{
memcpy(dst, src, sizeof(int16_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
swapBytes<sizeof(int16_t)>(dst);
dst += sizeof(int16_t);
}
}
}
void THP_encodeInt32Buffer(uint8_t* dst, const int32_t* src, THPByteOrder order, size_t len)
{
memcpy(dst, src, sizeof(int32_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
swapBytes<sizeof(int32_t)>(dst);
dst += sizeof(int32_t);
}
}
}
void THP_encodeInt64Buffer(uint8_t* dst, const int64_t* src, THPByteOrder order, size_t len)
{
memcpy(dst, src, sizeof(int64_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
swapBytes<sizeof(int64_t)>(dst);
dst += sizeof(int64_t);
}
}
}
void THP_encodeFloatBuffer(uint8_t* dst, const float* src, THPByteOrder order, size_t len)
{
memcpy(dst, src, sizeof(float) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
swapBytes<sizeof(float)>(dst);
dst += sizeof(float);
}
}
}
void THP_encodeDoubleBuffer(uint8_t* dst, const double* src, THPByteOrder order, size_t len)
{
memcpy(dst, src, sizeof(double) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
swapBytes<sizeof(double)>(dst);
dst += sizeof(double);
}
}
}

View File

@ -1,3 +1,6 @@
#ifndef THP_BYTE_ORDER_H
#define THP_BYTE_ORDER_H
#include <stdint.h>
#include <stddef.h>
@ -7,8 +10,17 @@ enum THPByteOrder {
};
THPByteOrder THP_nativeByteOrder();
void THP_decodeInt16Buffer(int16_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeInt32Buffer(int32_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order, size_t len);
void THP_encodeInt16Buffer(uint8_t* dst, const int16_t* src, THPByteOrder order, size_t len);
void THP_encodeInt32Buffer(uint8_t* dst, const int32_t* src, THPByteOrder order, size_t len);
void THP_encodeInt64Buffer(uint8_t* dst, const int64_t* src, THPByteOrder order, size_t len);
void THP_encodeFloatBuffer(uint8_t* dst, const float* src, THPByteOrder order, size_t len);
void THP_encodeDoubleBuffer(uint8_t* dst, const double* src, THPByteOrder order, size_t len);
#endif

View File

@ -0,0 +1,61 @@
#include "AutoGPU.h"
#include "THCP.h"
#include <THC/THC.h>
THCPAutoGPU::THCPAutoGPU(int device_id) {
setDevice(device_id);
}
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self) {
if (self && setObjDevice(self))
return;
if (!args)
return;
for (int i = 0; i < PyTuple_Size(args); i++) {
PyObject *arg = PyTuple_GET_ITEM(args, i);
if (setObjDevice(arg)) return;
}
}
bool THCPAutoGPU::setObjDevice(PyObject *obj) {
int new_device = -1;
PyObject *obj_type = (PyObject*)Py_TYPE(obj);
if (obj_type == THCPDoubleTensorClass) {
new_device = THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
} else if (obj_type == THCPFloatTensorClass) {
new_device = THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
} else if (obj_type == THCPHalfTensorClass) {
new_device = THCudaHalfTensor_getDevice(LIBRARY_STATE ((THCPHalfTensor*)obj)->cdata);
} else if (obj_type == THCPLongTensorClass) {
new_device = THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
} else if (obj_type == THCPIntTensorClass) {
new_device = THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
} else if (obj_type == THCPShortTensorClass) {
new_device = THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
} else if (obj_type == THCPCharTensorClass) {
new_device = THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
} else if (obj_type == THCPByteTensorClass) {
new_device = THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
}
return setDevice(new_device);
}
bool THCPAutoGPU::setDevice(int new_device) {
if (new_device == -1)
return false;
if (device == -1)
THCudaCheck(cudaGetDevice(&device));
if (new_device != device)
THCPModule_setDevice(new_device);
return true;
}
// This can throw... But if it does I have no idea how to recover.
THCPAutoGPU::~THCPAutoGPU() {
if (device != -1)
THCPModule_setDevice(device);
}

16
torch/csrc/cuda/AutoGPU.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef THCP_AUTOGPU_INC
#define THCP_AUTOGPU_INC
#include <Python.h>
class THCPAutoGPU {
public:
THCPAutoGPU(int device_id=-1);
THCPAutoGPU(PyObject *args, PyObject *self=NULL);
~THCPAutoGPU();
bool setObjDevice(PyObject *obj);
bool setDevice(int new_device);
int device = -1;
};
#endif

View File

@ -114,6 +114,24 @@ PyObject * THCPModule_getDeviceCount_wrap(PyObject *self)
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_getCurrentStream_wrap(PyObject *self)
{
HANDLE_TH_ERRORS
THCStream* stream = THCState_getStream(state);
return PyLong_FromVoidPtr(stream);
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_setStream_wrap(PyObject *self, PyObject *obj)
{
HANDLE_TH_ERRORS
THPUtils_assert(PyLong_Check(obj), "invalid stream");
THCStream* stream = (THCStream *)PyLong_AsVoidPtr(obj);
THCState_setStream(state, stream);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_isDriverSufficient(PyObject *self)
{
int count;
@ -140,11 +158,10 @@ PyObject * THCPModule_getDriverVersion(PyObject *self)
PyObject * THCPModule_getRNGState(PyObject *_unused)
{
HANDLE_TH_ERRORS
THByteTensorPtr _t = THByteTensor_new();
THCRandom_getRNGState(state, _t.get());
PyObject *_ret = THPByteTensor_New(_t.get());
_t.release();
return _ret;
THPByteTensorPtr res = (THPByteTensor *)THPByteTensor_NewEmpty();
if (!res) return NULL;
THCRandom_getRNGState(state, res->cdata);
return (PyObject *)res.release();
END_HANDLE_TH_ERRORS
}
@ -200,6 +217,35 @@ PyObject * THCPModule_initialSeed(PyObject *_unused)
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_cudaHostAllocator(PyObject *_unused)
{
HANDLE_TH_ERRORS
THAllocator* allocator = THCState_getCudaHostAllocator(state);
return PyLong_FromVoidPtr(allocator);
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_cudaSynchronize(PyObject *_unused)
{
HANDLE_TH_ERRORS
THCudaCheck(cudaDeviceSynchronize());
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
PyObject * THCPModule_getLibPath(PyObject *_unused)
{
#define _STR(x) #x
#define STR(x) _STR(x)
#if PY_MAJOR_VERSION == 2
return PyString_FromString(STR(CUDA_LIB_PATH));
#else
return PyUnicode_FromString(STR(CUDA_LIB_PATH));
#endif
#undef STR
#undef _STR
}
////////////////////////////////////////////////////////////////////////////////
// Cuda module initialization
////////////////////////////////////////////////////////////////////////////////
@ -207,7 +253,7 @@ PyObject * THCPModule_initialSeed(PyObject *_unused)
bool THCPModule_initCuda(PyObject *module_dict) {
#define ASSERT_TRUE(cond) if (!(cond)) { return false; }
state = THCState_alloc();
THCCachingAllocator_init(THCState_getDeviceAllocator(state));
THCState_setDeviceAllocator(state, THCCachingAllocator_get());
THCudaInit(state);
#ifdef USE_MAGMA

View File

@ -9,9 +9,18 @@ void TH_CONCAT_3(_THPCopy_,THNAME,_copyShort)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyChar)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyByte)(PyObject *dst, PyObject *src);
#ifdef CUDA_HALF_TENSOR
#define THCP_COPY_CUDA_HALF(THNAME) \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)(PyObject *dst, PyObject *src);
#else
#define THCP_COPY_CUDA_HALF(THNAME)
#endif
#define DECLARE_CUDA_COPY(THNAME) \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaFloat)(PyObject *dst, PyObject *src); \
THCP_COPY_CUDA_HALF(THNAME) \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaLong)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)(PyObject *dst, PyObject *src); \
void TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)(PyObject *dst, PyObject *src); \
@ -28,6 +37,7 @@ DECLARE_CUDA_COPY(THByteTensor)
DECLARE_COPY(THCudaDoubleTensor)
DECLARE_COPY(THCudaTensor)
DECLARE_COPY(THCudaHalfTensor)
DECLARE_COPY(THCudaLongTensor)
DECLARE_COPY(THCudaIntTensor)
DECLARE_COPY(THCudaShortTensor)
@ -36,6 +46,7 @@ DECLARE_COPY(THCudaByteTensor)
DECLARE_CUDA_COPY(THCudaDoubleTensor)
DECLARE_CUDA_COPY(THCudaTensor)
DECLARE_CUDA_COPY(THCudaHalfTensor)
DECLARE_CUDA_COPY(THCudaLongTensor)
DECLARE_CUDA_COPY(THCudaIntTensor)
DECLARE_CUDA_COPY(THCudaShortTensor)
@ -52,6 +63,7 @@ DECLARE_CUDA_COPY(THByteStorage)
DECLARE_COPY(THCudaDoubleStorage)
DECLARE_COPY(THCudaStorage)
DECLARE_COPY(THCudaHalfStorage)
DECLARE_COPY(THCudaLongStorage)
DECLARE_COPY(THCudaIntStorage)
DECLARE_COPY(THCudaShortStorage)
@ -60,12 +72,14 @@ DECLARE_COPY(THCudaByteStorage)
DECLARE_CUDA_COPY(THCudaDoubleStorage)
DECLARE_CUDA_COPY(THCudaStorage)
DECLARE_CUDA_COPY(THCudaHalfStorage)
DECLARE_CUDA_COPY(THCudaLongStorage)
DECLARE_CUDA_COPY(THCudaIntStorage)
DECLARE_CUDA_COPY(THCudaShortStorage)
DECLARE_CUDA_COPY(THCudaCharStorage)
DECLARE_CUDA_COPY(THCudaByteStorage)
#undef DECLARE_COPY
#undef THCP_COPY_CUDA_HALF
#define DECLARE_ASYNC_COPY(TYPE) \
void TH_CONCAT_3(THCP,TYPE,Tensor_copyAsyncCPU)(PyObject *dst, PyObject *src); \
@ -98,7 +112,19 @@ extern PyObject *THPByteTensorClass;
static bool THCPModule_initCopy()
{
// TODO: half
#ifdef CUDA_HALF_TENSOR
#define HALF_TENSOR_GPU_CPU_COPY(TYPE, THNAME) \
tensor_copy_handlers.insert({{TYPE, THCPHalfTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#define HALF_TENSOR_GPU_GPU_COPY(TYPE, THNAME) \
tensor_copy_handlers.insert({{TYPE, THCPHalfTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#define HALF_TENSOR_GPU_GPU_COPY_ASYNC(TYPE, THNAME) \
tensor_async_copy_handlers.insert({{TYPE, THCPHalfTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#else
#define HALF_TENSOR_GPU_CPU_COPY(TYPE, THNAME)
#define HALF_TENSOR_GPU_GPU_COPY(TYPE, THNAME)
#define HALF_TENSOR_GPU_GPU_COPY_ASYNC(TYPE, THNAME)
#endif
#define INIT_TENSOR_GPU_CPU_COPY(TYPE, THNAME) \
tensor_copy_handlers.insert({{TYPE, THCPDoubleTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
tensor_copy_handlers.insert({{TYPE, THCPFloatTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaFloat)}); \
@ -106,7 +132,8 @@ static bool THCPModule_initCopy()
tensor_copy_handlers.insert({{TYPE, THCPIntTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)}); \
tensor_copy_handlers.insert({{TYPE, THCPShortTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
tensor_copy_handlers.insert({{TYPE, THCPCharTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
tensor_copy_handlers.insert({{TYPE, THCPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)});
tensor_copy_handlers.insert({{TYPE, THCPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_TENSOR_GPU_CPU_COPY(TYPE, THNAME)
#define INIT_TENSOR_GPU_GPU_COPY(TYPE, THNAME) \
tensor_copy_handlers.insert({{TYPE, THCPDoubleTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
@ -116,6 +143,7 @@ static bool THCPModule_initCopy()
tensor_copy_handlers.insert({{TYPE, THCPShortTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
tensor_copy_handlers.insert({{TYPE, THCPCharTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
tensor_copy_handlers.insert({{TYPE, THCPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_TENSOR_GPU_GPU_COPY(TYPE, THNAME) \
/* CUDA copy launches are always async */ \
tensor_async_copy_handlers.insert({{TYPE, THCPDoubleTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
tensor_async_copy_handlers.insert({{TYPE, THCPFloatTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaFloat)}); \
@ -123,7 +151,8 @@ static bool THCPModule_initCopy()
tensor_async_copy_handlers.insert({{TYPE, THCPIntTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)}); \
tensor_async_copy_handlers.insert({{TYPE, THCPShortTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
tensor_async_copy_handlers.insert({{TYPE, THCPCharTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
tensor_async_copy_handlers.insert({{TYPE, THCPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)});
tensor_async_copy_handlers.insert({{TYPE, THCPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_TENSOR_GPU_GPU_COPY_ASYNC(TYPE, THNAME)
#define INIT_TENSOR_CPU_GPU_COPY(TYPE, THNAME) \
tensor_copy_handlers.insert({{TYPE, THPDoubleTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyDouble)}); \
@ -132,7 +161,7 @@ static bool THCPModule_initCopy()
tensor_copy_handlers.insert({{TYPE, THPIntTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyInt)}); \
tensor_copy_handlers.insert({{TYPE, THPShortTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyShort)}); \
tensor_copy_handlers.insert({{TYPE, THPCharTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyChar)}); \
tensor_copy_handlers.insert({{TYPE, THPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyByte)});
tensor_copy_handlers.insert({{TYPE, THPByteTensorClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyByte)}); \
#define INIT_TENSOR_ASYNC_COPY(TYPE) \
tensor_async_copy_handlers.insert({{TH_CONCAT_3(THP,TYPE,TensorClass), TH_CONCAT_3(THCP,TYPE,TensorClass)}, TH_CONCAT_3(THP,TYPE,Tensor_copyAsyncGPU)}); \
@ -148,6 +177,7 @@ static bool THCPModule_initCopy()
INIT_TENSOR_GPU_GPU_COPY(THCPDoubleTensorClass, THCudaDoubleTensor);
INIT_TENSOR_GPU_GPU_COPY(THCPFloatTensorClass, THCudaTensor);
INIT_TENSOR_GPU_GPU_COPY(THCPHalfTensorClass, THCudaHalfTensor);
INIT_TENSOR_GPU_GPU_COPY(THCPLongTensorClass, THCudaLongTensor);
INIT_TENSOR_GPU_GPU_COPY(THCPIntTensorClass, THCudaIntTensor);
INIT_TENSOR_GPU_GPU_COPY(THCPShortTensorClass, THCudaShortTensor);
@ -156,6 +186,7 @@ static bool THCPModule_initCopy()
INIT_TENSOR_CPU_GPU_COPY(THCPDoubleTensorClass, THCudaDoubleTensor);
INIT_TENSOR_CPU_GPU_COPY(THCPFloatTensorClass, THCudaTensor);
INIT_TENSOR_CPU_GPU_COPY(THCPHalfTensorClass, THCudaHalfTensor);
INIT_TENSOR_CPU_GPU_COPY(THCPLongTensorClass, THCudaLongTensor);
INIT_TENSOR_CPU_GPU_COPY(THCPIntTensorClass, THCudaIntTensor);
INIT_TENSOR_CPU_GPU_COPY(THCPShortTensorClass, THCudaShortTensor);
@ -170,6 +201,20 @@ static bool THCPModule_initCopy()
INIT_TENSOR_ASYNC_COPY(Char)
INIT_TENSOR_ASYNC_COPY(Byte)
#ifdef CUDA_HALF_TENSOR
#define HALF_STORAGE_GPU_CPU_COPY(TYPE, THNAME) \
storage_copy_handlers.insert({{TYPE, THCPHalfStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#define HALF_STORAGE_GPU_GPU_COPY(TYPE, THNAME) \
storage_copy_handlers.insert({{TYPE, THCPHalfStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#define HALF_STORAGE_GPU_GPU_COPY_ASYNC(TYPE, THNAME) \
storage_async_copy_handlers.insert({{TYPE, THCPHalfStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaHalf)});
#else
#define HALF_STORAGE_GPU_CPU_COPY(TYPE, THNAME)
#define HALF_STORAGE_GPU_GPU_COPY(TYPE, THNAME)
#define HALF_STORAGE_GPU_GPU_COPY_ASYNC(TYPE, THNAME)
#endif
#define INIT_STORAGE_GPU_CPU_COPY(TYPE, THNAME) \
storage_copy_handlers.insert({{TYPE, THCPDoubleStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
storage_copy_handlers.insert({{TYPE, THCPFloatStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaFloat)}); \
@ -177,7 +222,8 @@ static bool THCPModule_initCopy()
storage_copy_handlers.insert({{TYPE, THCPIntStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)}); \
storage_copy_handlers.insert({{TYPE, THCPShortStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
storage_copy_handlers.insert({{TYPE, THCPCharStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
storage_copy_handlers.insert({{TYPE, THCPByteStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)});
storage_copy_handlers.insert({{TYPE, THCPByteStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_STORAGE_GPU_CPU_COPY(TYPE, THNAME)
#define INIT_STORAGE_GPU_GPU_COPY(TYPE, THNAME) \
storage_copy_handlers.insert({{TYPE, THCPDoubleStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
@ -186,7 +232,17 @@ static bool THCPModule_initCopy()
storage_copy_handlers.insert({{TYPE, THCPIntStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)}); \
storage_copy_handlers.insert({{TYPE, THCPShortStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
storage_copy_handlers.insert({{TYPE, THCPCharStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
storage_copy_handlers.insert({{TYPE, THCPByteStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)});
storage_copy_handlers.insert({{TYPE, THCPByteStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_STORAGE_GPU_GPU_COPY_ASYNC(TYPE, THNAME) \
/* CUDA copy launches are always async */ \
storage_async_copy_handlers.insert({{TYPE, THCPDoubleStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaDouble)}); \
storage_async_copy_handlers.insert({{TYPE, THCPFloatStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaFloat)}); \
storage_async_copy_handlers.insert({{TYPE, THCPLongStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaLong)}); \
storage_async_copy_handlers.insert({{TYPE, THCPIntStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaInt)}); \
storage_async_copy_handlers.insert({{TYPE, THCPShortStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaShort)}); \
storage_async_copy_handlers.insert({{TYPE, THCPCharStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaChar)}); \
storage_async_copy_handlers.insert({{TYPE, THCPByteStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyCudaByte)}); \
HALF_STORAGE_GPU_GPU_COPY_ASYNC(TYPE, THNAME)
#define INIT_STORAGE_CPU_GPU_COPY(TYPE, THNAME) \
storage_copy_handlers.insert({{TYPE, THPDoubleStorageClass}, TH_CONCAT_3(_THPCopy_,THNAME,_copyDouble)}); \
@ -207,6 +263,7 @@ static bool THCPModule_initCopy()
INIT_STORAGE_GPU_GPU_COPY(THCPDoubleStorageClass, THCudaDoubleStorage);
INIT_STORAGE_GPU_GPU_COPY(THCPFloatStorageClass, THCudaStorage);
INIT_STORAGE_GPU_GPU_COPY(THCPHalfStorageClass, THCudaHalfStorage);
INIT_STORAGE_GPU_GPU_COPY(THCPLongStorageClass, THCudaLongStorage);
INIT_STORAGE_GPU_GPU_COPY(THCPIntStorageClass, THCudaIntStorage);
INIT_STORAGE_GPU_GPU_COPY(THCPShortStorageClass, THCudaShortStorage);
@ -215,6 +272,7 @@ static bool THCPModule_initCopy()
INIT_STORAGE_CPU_GPU_COPY(THCPDoubleStorageClass, THCudaDoubleStorage);
INIT_STORAGE_CPU_GPU_COPY(THCPFloatStorageClass, THCudaStorage);
INIT_STORAGE_CPU_GPU_COPY(THCPHalfStorageClass, THCudaHalfStorage);
INIT_STORAGE_CPU_GPU_COPY(THCPLongStorageClass, THCudaLongStorage);
INIT_STORAGE_CPU_GPU_COPY(THCPIntStorageClass, THCudaIntStorage);
INIT_STORAGE_CPU_GPU_COPY(THCPShortStorageClass, THCudaShortStorage);
@ -229,4 +287,10 @@ static bool THCPModule_initCopy()
#undef INIT_STORAGE_GPU_CPU_COPY
#undef INIT_STORAGE_GPU_GPU_COPY
#undef INIT_STORAGE_CPU_GPU_COPY
#undef HALF_TENSOR_GPU_CPU_COPY
#undef HALF_TENSOR_GPU_GPU_COPY
#undef HALF_TENSOR_CPU_GPU_COPY
#undef HALF_TENSOR_GPU_CPU_COPY
#undef HALF_TENSOR_GPU_GPU_COPY
#undef HALF_TENSOR_CPU_GPU_COPY
}

110
torch/csrc/cuda/Stream.cpp Normal file
View File

@ -0,0 +1,110 @@
#include "Stream.h"
#include "THP.h"
#include "Module.h"
#include <structmember.h>
#include <cuda_runtime_api.h>
PyObject *THCPStreamClass = NULL;
static PyObject * THCPStream_pynew(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
int current_device;
THCudaCheck(cudaGetDevice(&current_device));
THPObjectPtr ptr = (PyObject *)type->tp_alloc(type, 0);
THCPStream* self = (THCPStream *)ptr.get();
THCStream* stream = NULL;
if (kwargs && PyDict_Size(kwargs) > 0) {
PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "_cdata");
if (cdata_ptr && PyDict_Size(kwargs) == 1 && THPUtils_checkLong(cdata_ptr)) {
stream = (THCStream*) PyLong_AsVoidPtr(cdata_ptr);
if (stream) {
THCStream_retain(stream);
}
} else {
THPUtils_setError("torch.cuda.Stream(): invalid keyword arguments");
return NULL;
}
} else {
stream = THCStream_new(cudaStreamNonBlocking);
}
self->cdata = stream;
self->device = stream ? stream->device : current_device;
self->cuda_stream = stream ? stream->stream : NULL;
return (PyObject *)ptr.release();
END_HANDLE_TH_ERRORS
}
static void THCPStream_dealloc(THCPStream* self)
{
THCStream_free(self->cdata);
Py_TYPE(self)->tp_free((PyObject*)self);
}
static struct PyMemberDef THCPStream_members[] = {
{(char*)"_cdata", T_ULONGLONG, offsetof(THCPStream, cdata), READONLY, NULL},
{(char*)"device", T_INT, offsetof(THCPStream, device), READONLY, NULL},
{(char*)"cuda_stream", T_ULONGLONG, offsetof(THCPStream, cuda_stream), READONLY, NULL},
{NULL}
};
static PyMethodDef THCPStream_methods[] = {
{NULL}
};
PyTypeObject THCPStreamType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._CudaStreamBase", /* tp_name */
sizeof(THCPStream), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THCPStream_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THCPStream_methods, /* tp_methods */
THCPStream_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THCPStream_pynew, /* tp_new */
};
bool THCPStream_init(PyObject *module)
{
THCPStreamClass = (PyObject*)&THCPStreamType;
if (PyType_Ready(&THCPStreamType) < 0)
return false;
Py_INCREF(&THCPStreamType);
PyModule_AddObject(module, "_CudaStreamBase", (PyObject *)&THCPStreamType);
return true;
}

17
torch/csrc/cuda/Stream.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef THCP_STREAM_INC
#define THCP_STREAM_INC
#include <Python.h>
#include <THC/THC.h>
struct THCPStream {
PyObject_HEAD
THCStream *cdata;
int device;
cudaStream_t cuda_stream;
};
bool THCPStream_init(PyObject *module);
#endif // THCP_STREAM_INC

View File

@ -3,12 +3,15 @@
#include <TH/TH.h>
#include <THC/THC.h>
#include <THC/THCHalf.h>
#include "torch/csrc/THP.h"
#include "serialization.h"
#include "AutoGPU.h"
#include "Module.h"
#include "Storage.h"
#include "Tensor.h"
#include "Stream.h"
#ifdef _THP_CORE
#include "utils.h"
#endif

View File

@ -10,50 +10,6 @@
#include "override_macros.h"
THCPAutoGPU::THCPAutoGPU(PyObject *args, PyObject *self) {
if (self && setDevice(self))
return;
if (!args)
return;
for (int i = 0; i < PyTuple_Size(args); i++) {
PyObject *arg = PyTuple_GET_ITEM(args, i);
if (setDevice(arg)) return;
}
}
bool THCPAutoGPU::setDevice(PyObject *obj) {
int new_device = -1;
PyObject *obj_type = (PyObject*)Py_TYPE(obj);
if (obj_type == THCPDoubleTensorClass) {
new_device = THCudaDoubleTensor_getDevice(LIBRARY_STATE ((THCPDoubleTensor*)obj)->cdata);
} else if (obj_type == THCPFloatTensorClass) {
new_device = THCudaTensor_getDevice(LIBRARY_STATE ((THCPFloatTensor*)obj)->cdata);
} else if (obj_type == THCPLongTensorClass) {
new_device = THCudaLongTensor_getDevice(LIBRARY_STATE ((THCPLongTensor*)obj)->cdata);
} else if (obj_type == THCPIntTensorClass) {
new_device = THCudaIntTensor_getDevice(LIBRARY_STATE ((THCPIntTensor*)obj)->cdata);
} else if (obj_type == THCPShortTensorClass) {
new_device = THCudaShortTensor_getDevice(LIBRARY_STATE ((THCPShortTensor*)obj)->cdata);
} else if (obj_type == THCPCharTensorClass) {
new_device = THCudaCharTensor_getDevice(LIBRARY_STATE ((THCPCharTensor*)obj)->cdata);
} else if (obj_type == THCPByteTensorClass) {
new_device = THCudaByteTensor_getDevice(LIBRARY_STATE ((THCPByteTensor*)obj)->cdata);
}
if (new_device != -1) {
THCudaCheck(cudaGetDevice(&device));
THCPModule_setDevice(new_device);
return true;
}
return false;
}
// This can throw... But if it does I have no idea how to recover.
THCPAutoGPU::~THCPAutoGPU() {
if (device != -1)
THCPModule_setDevice(device);
}
#define THC_GENERIC_FILE "torch/csrc/generic/Tensor.cpp"
#include <THC/THCGenerateAllTypes.h>

View File

@ -1,14 +1,6 @@
#ifndef THCP_TENSOR_INC
#define THCP_TENSOR_INC
class THCPAutoGPU {
public:
THCPAutoGPU(PyObject *args, PyObject *self=NULL);
~THCPAutoGPU();
bool setDevice(PyObject *obj);
int device = -1;
};
#define THCPTensor TH_CONCAT_3(THCP,Real,Tensor)
#define THCPTensorStr TH_CONCAT_STRING_3(torch.cuda.,Real,Tensor)
#define THCPTensorClass TH_CONCAT_3(THCP,Real,TensorClass)
@ -16,7 +8,7 @@ public:
#define THCPDoubleTensor_Check(obj) PyObject_IsInstance(obj, THCPDoubleTensorClass)
#define THCPFloatTensor_Check(obj) PyObject_IsInstance(obj, THCPFloatTensorClass)
#define THCPHalfTensor_Check(obj) PyObject_IsInstance(obj, THCPHalfTensorClass)
#define THCPHalfTensor_Check(obj) PyObject_IsInstance(obj, THCPHalfTensorClass)
#define THCPLongTensor_Check(obj) PyObject_IsInstance(obj, THCPLongTensorClass)
#define THCPIntTensor_Check(obj) PyObject_IsInstance(obj, THCPIntTensorClass)
#define THCPShortTensor_Check(obj) PyObject_IsInstance(obj, THCPShortTensorClass)
@ -25,6 +17,7 @@ public:
#define THCPDoubleTensor_CData(obj) (obj)->cdata
#define THCPFloatTensor_CData(obj) (obj)->cdata
#define THCPHalfTensor_CData(obj) (obj)->cdata
#define THCPLongTensor_CData(obj) (obj)->cdata
#define THCPIntTensor_CData(obj) (obj)->cdata
#define THCPShortTensor_CData(obj) (obj)->cdata

View File

@ -32,3 +32,7 @@
#define LIBRARY_STATE state,
#define TH_GENERIC_FILE THC_GENERIC_FILE
#define THHostTensor TH_CONCAT_3(TH,Real,Tensor)
#define THHostTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
#define THHostStorage TH_CONCAT_3(TH,Real,Storage)
#define THHostStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)

View File

@ -30,3 +30,7 @@
#undef THTensorPtr
#undef THPTensorPtr
#undef THHostTensor
#undef THHostTensor_
#undef THHostStorage
#undef THHostStorage_

414
torch/csrc/cudnn/Conv.cpp Normal file
View File

@ -0,0 +1,414 @@
#include "Conv.h"
#include "THC/THC.h"
#include <cudnn.h>
#include <stdint.h>
#include <memory>
#include <unordered_map>
#include <functional>
#include <mutex>
namespace torch { namespace cudnn {
namespace {
union Constant
{
float f;
double d;
Constant(cudnnDataType_t dataType, double value) {
if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) {
f = (float) value;
} else {
d = value;
}
}
};
void setTensorDescriptor(TensorDescriptor& desc, cudnnDataType_t dataType, THVoidTensor* tensor, int groups)
{
int inputSize[4];
int inputStride[4];
for (int i = 0; i < 4; ++i) {
inputSize[i] = (int) tensor->size[i];
inputStride[i] = (int) tensor->stride[i];
}
inputSize[1] /= groups;
desc.set(dataType, 4, inputSize, inputStride);
}
void setWeightDescriptor(FilterDescriptor& desc, cudnnDataType_t dataType, THVoidTensor* weight, int groups)
{
int inputSize[4] = { 1, 1, 1, 1 };
for (int i = 0; i < 4; ++i) {
inputSize[i] = (int) weight->size[i];
}
inputSize[0] /= groups;
inputSize[1] /= groups;
desc.set(dataType, inputSize);
}
struct ParamsHash {
std::size_t operator()(const ConvolutionParams& params) const {
auto ptr = reinterpret_cast<const uint8_t*>(&params);
uint32_t value = 0x811C9DC5;
for (int i = 0; i < (int)sizeof(ConvolutionParams); ++i) {
value ^= ptr[i];
value *= 0x01000193;
}
return (size_t)value;
}
};
struct ParamsEqual {
bool operator()(const ConvolutionParams& a, const ConvolutionParams& b) const {
auto ptr1 = reinterpret_cast<const uint8_t*>(&a);
auto ptr2 = reinterpret_cast<const uint8_t*>(&b);
return memcmp(ptr1, ptr2, sizeof(ConvolutionParams)) == 0;
}
};
template <typename T>
struct BenchmarkCache {
std::mutex mutex;
std::unordered_map<ConvolutionParams, T, ParamsHash, ParamsEqual> map;
bool find(const ConvolutionParams& params, T& results) {
std::lock_guard<std::mutex> guard(mutex);
auto it = map.find(params);
if (it == map.end()) {
return false;
}
results = it->second;
return true;
}
void insert(const ConvolutionParams& params, const T& results) {
std::lock_guard<std::mutex> guard(mutex);
map[params] = results;
}
};
BenchmarkCache<cudnnConvolutionFwdAlgo_t> fwd_algos;
BenchmarkCache<cudnnConvolutionBwdDataAlgo_t> bwd_data_algos;
BenchmarkCache<cudnnConvolutionBwdFilterAlgo_t> bwd_filter_algos;
struct Workspace {
void* data;
THCState* state;
Workspace(THCState* state, size_t size) : data(NULL), state(state) {
CUDA_CHECK(THCudaMalloc(state, &data, size));
}
~Workspace() {
THCudaFree(state, data);
}
};
cudnnConvolutionFwdAlgo_t chooseForwardAlgorithm(
cudnnHandle_t handle, const Convolution& conv, bool benchmark)
{
cudnnConvolutionFwdAlgo_t algo;
if (benchmark) {
if (fwd_algos.find(conv.params, algo)) {
return algo;
}
int algoCount;
cudnnConvolutionFwdAlgoPerf_t perfResults;
CHECK(cudnnFindConvolutionForwardAlgorithm(handle, conv.idesc.desc,
conv.wdesc.desc, conv.cdesc.desc, conv.odesc.desc, 1, &algoCount, &perfResults));
fwd_algos.insert(conv.params, perfResults.algo);
return perfResults.algo;
}
cudnnConvolutionFwdPreference_t pref = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
CHECK(cudnnGetConvolutionForwardAlgorithm(handle, conv.idesc.desc,
conv.wdesc.desc, conv.cdesc.desc, conv.odesc.desc, pref, 0, &algo));
return algo;
}
cudnnConvolutionBwdDataAlgo_t chooseBackwardDataAlgorithm(
cudnnHandle_t handle, const Convolution& conv, bool benchmark)
{
cudnnConvolutionBwdDataAlgo_t algo;
if (benchmark) {
if (bwd_data_algos.find(conv.params, algo)) {
return algo;
}
int algoCount;
cudnnConvolutionBwdDataAlgoPerf_t perfResults;
CHECK(cudnnFindConvolutionBackwardDataAlgorithm(handle, conv.wdesc.desc,
conv.odesc.desc, conv.cdesc.desc, conv.idesc.desc, 1, &algoCount, &perfResults));
bwd_data_algos.insert(conv.params, perfResults.algo);
return perfResults.algo;
}
cudnnConvolutionBwdDataPreference_t pref = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle, conv.wdesc.desc,
conv.odesc.desc, conv.cdesc.desc, conv.idesc.desc, pref, 0, &algo));
return algo;
}
cudnnConvolutionBwdFilterAlgo_t chooseBackwardFilterAlgorithm(
cudnnHandle_t handle, const Convolution& conv, bool benchmark)
{
cudnnConvolutionBwdFilterAlgo_t algo;
if (benchmark) {
if (bwd_filter_algos.find(conv.params, algo)) {
return algo;
}
int algoCount;
cudnnConvolutionBwdFilterAlgoPerf_t perfResults;
CHECK(cudnnFindConvolutionBackwardFilterAlgorithm(handle, conv.idesc.desc,
conv.odesc.desc, conv.cdesc.desc, conv.wdesc.desc, 1, &algoCount, &perfResults));
bwd_filter_algos.insert(conv.params, perfResults.algo);
return perfResults.algo;
}
cudnnConvolutionBwdFilterPreference_t pref = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle, conv.idesc.desc,
conv.odesc.desc, conv.cdesc.desc, conv.wdesc.desc, pref, 0, &algo));
return algo;
}
int dataSize(cudnnDataType_t dataType)
{
switch (dataType) {
case CUDNN_DATA_HALF: return 2;
case CUDNN_DATA_FLOAT: return 4;
default: return 8;
}
}
void* tensorPointer(cudnnDataType_t dataType, THVoidTensor* tensor, int groupIdx, int groups)
{
int elementSize = dataSize(dataType);
char* ptr = (char*) tensor->storage->data;
ptr += elementSize * tensor->storageOffset;
if (groupIdx > 0) {
long size = 1;
for (int i = 0; i < 4; ++i) {
size *= tensor->size[i];
}
ptr += elementSize * size * groupIdx / groups;
}
return ptr;
}
}
static_assert(std::is_pod<ConvolutionParams>::value, "ConvolutionParams not POD");
Convolution::Convolution(
cudnnDataType_t dataType, THVoidTensor* input, THVoidTensor* weight,
THVoidTensor* bias, THVoidTensor* output, int pad[2], int stride[2],
int groups, bool transposed)
: idesc(), odesc(), odesc_bias(), bdesc(), wdesc(), cdesc(), groups(groups)
, transposed(transposed)
{
memset(&params, 0, sizeof(ConvolutionParams));
params.dataType = dataType;
for (int i = 0; i < 4; ++i) {
params.input_size[i] = (int) input->size[i];
params.input_stride[i] = (int) input->stride[i];
params.weight_size[i] = (int) weight->size[i];
}
for (int i = 0; i < 2; ++i) {
params.pad[i] = pad[i];
params.stride[i] = stride[i];
}
params.groups = groups;
setTensorDescriptor(idesc, dataType, input, groups);
setTensorDescriptor(odesc, dataType, output, groups);
if (!transposed)
setTensorDescriptor(odesc_bias, dataType, output, 1);
else
setTensorDescriptor(odesc_bias, dataType, input, 1);
setWeightDescriptor(wdesc, dataType, weight, groups);
cdesc.set(dataType, pad, stride);
}
Convolution* cudnn_convolution_init(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool transposed)
{
int pad[2] = {padH, padW};
int stride[2] = {dH, dW};
return new Convolution(dataType, input, weight, bias, output, pad,
stride, groups, transposed);
}
void cudnn_convolution_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* output,
Convolution* info, bool benchmark)
{
int groups = info->groups;
TensorDescriptor& idesc = info->idesc;
TensorDescriptor& odesc = info->odesc;
FilterDescriptor& wdesc = info->wdesc;
ConvolutionDescriptor& cdesc = info->cdesc;
cudnnConvolutionFwdAlgo_t fwdAlg = chooseForwardAlgorithm(handle, *info, benchmark);
size_t workspaceSize;
CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle, idesc.desc, wdesc.desc,
cdesc.desc, odesc.desc, fwdAlg, &workspaceSize));
Workspace workspace(state, workspaceSize);
Constant one(dataType, 1);
Constant zero(dataType, 0);
for (int i = 0; i < groups; ++i) {
void* input_ptr = tensorPointer(dataType, input, i, groups);
void* output_ptr = tensorPointer(dataType, output, i, groups);
void* weight_ptr = tensorPointer(dataType, weight, i, groups);
CHECK(cudnnConvolutionForward(
handle, &one, idesc.desc, input_ptr, wdesc.desc,
weight_ptr, cdesc.desc, fwdAlg, workspace.data,
workspaceSize, &zero, odesc.desc, output_ptr));
}
}
void cudnn_convolution_add_bias(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* bias, THVoidTensor* output,
Convolution* info)
{
TensorDescriptor& odesc_bias = info->odesc_bias;
TensorDescriptor& bdesc = info->bdesc;
int size[4] = { 1, (int)bias->size[0], 1, 1 };
int stride[4] = { 1, (int)bias->stride[0], 1, 1};
bdesc.set(dataType, 4, size, stride);
void* bias_ptr = tensorPointer(dataType, bias, 0, 1);
void* output_ptr = tensorPointer(dataType, output, 0, 1);
Constant one(dataType, 1);
CHECK(cudnnAddTensor(handle, &one, bdesc.desc, bias_ptr, &one,
odesc_bias.desc, output_ptr));
}
void cudnn_convolution_backward_data(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* gradInput, THVoidTensor* weight,
Convolution* info, bool benchmark)
{
TensorDescriptor& idesc = info->idesc;
TensorDescriptor& odesc = info->odesc;
FilterDescriptor& wdesc = info->wdesc;
ConvolutionDescriptor& cdesc = info->cdesc;
int groups = info->params.groups;
cudnnConvolutionBwdDataAlgo_t bwdDataAlg =
chooseBackwardDataAlgorithm(handle, *info, benchmark);
size_t workspaceSize;
CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle, wdesc.desc,
odesc.desc, cdesc.desc, idesc.desc, bwdDataAlg, &workspaceSize));
Workspace workspace(state, workspaceSize);
Constant one(dataType, 1);
Constant zero(dataType, 0);
for (int i = 0; i < groups; ++i) {
void* gradInput_ptr = tensorPointer(dataType, gradInput, i, groups);
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, i, groups);
void* weight_ptr = tensorPointer(dataType, weight, i, groups);
CHECK(cudnnConvolutionBackwardData(
handle, &one, wdesc.desc, weight_ptr, odesc.desc, gradOutput_ptr,
cdesc.desc, bwdDataAlg, workspace.data, workspaceSize, &zero,
idesc.desc, gradInput_ptr));
}
}
void cudnn_convolution_backward_filter(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* input, THVoidTensor* gradWeight,
Convolution* info, bool benchmark)
{
TensorDescriptor& idesc = info->idesc;
TensorDescriptor& odesc = info->odesc;
FilterDescriptor& wdesc = info->wdesc;
ConvolutionDescriptor& cdesc = info->cdesc;
int groups = info->params.groups;
cudnnConvolutionBwdFilterAlgo_t bwdFilterAlg =
chooseBackwardFilterAlgorithm(handle, *info, benchmark);
size_t workspaceSize;
CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle, idesc.desc,
odesc.desc, cdesc.desc, wdesc.desc, bwdFilterAlg, &workspaceSize));
Workspace workspace(state, workspaceSize);
Constant one(dataType, 1);
Constant zero(dataType, 0);
for (int i = 0; i < groups; ++i) {
void* input_ptr = tensorPointer(dataType, input, i, groups);
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, i, groups);
void* gradWeight_ptr = tensorPointer(dataType, gradWeight, i, groups);
if (info->transposed) {
std::swap(input_ptr, gradOutput_ptr);
}
CHECK(cudnnConvolutionBackwardFilter(
handle, &one, idesc.desc, input_ptr, odesc.desc, gradOutput_ptr,
cdesc.desc, bwdFilterAlg, workspace.data, workspaceSize, &zero,
wdesc.desc, gradWeight_ptr));
}
}
void cudnn_convolution_backward_bias(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* gradBias, Convolution* info)
{
TensorDescriptor& bdesc = info->bdesc;
TensorDescriptor& odesc_bias = info->odesc_bias;
Constant one(dataType, 1);
Constant zero(dataType, 0);
void* gradOutput_ptr = tensorPointer(dataType, gradOutput, 0, 1);
void* gradBias_ptr = tensorPointer(dataType, gradBias, 0, 1);
CHECK(cudnnConvolutionBackwardBias(
handle, &one, odesc_bias.desc, gradOutput_ptr, &zero, bdesc.desc,
gradBias_ptr));
}
Convolution* cudnn_convolution_full_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool benchmark)
{
std::unique_ptr<Convolution> info(cudnn_convolution_init(
state, handle, dataType, input, weight, bias, output, padH, padW,
dH, dW, groups, false));
cudnn_convolution_forward(state, handle, dataType, input, weight, output,
info.get(), benchmark);
if (bias) {
cudnn_convolution_add_bias(
state, handle, dataType, bias, output, info.get());
}
return info.release();
}
Convolution* cudnn_convolution_transpose_full_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool benchmark)
{
std::unique_ptr<Convolution> info(cudnn_convolution_init(
state, handle, dataType, output, weight, bias, input, padH, padW,
dH, dW, groups, true));
cudnn_convolution_backward_data(state, handle, dataType, input, output,
weight, info.get(), benchmark);
if (bias) {
cudnn_convolution_add_bias(
state, handle, dataType, bias, output, info.get());
}
return info.release();
}
}} // namespace

89
torch/csrc/cudnn/Conv.h Normal file
View File

@ -0,0 +1,89 @@
#ifndef THP_CUDNN_CONV_INC
#define THP_CUDNN_CONV_INC
#include <cudnn.h>
#include "THC/THC.h"
#include "../Types.h"
#include "Descriptors.h"
namespace torch { namespace cudnn {
struct ConvolutionParams
{
cudnnDataType_t dataType;
int input_size[4];
int input_stride[4];
int weight_size[4];
int pad[2];
int stride[2];
int groups;
};
struct Convolution
{
ConvolutionParams params;
TensorDescriptor idesc;
TensorDescriptor odesc;
TensorDescriptor odesc_bias;
TensorDescriptor bdesc;
FilterDescriptor wdesc;
ConvolutionDescriptor cdesc;
int groups;
bool transposed;
// WARNING: if transposed == true, then idesc and odesc are swapped!
// WARNING2: WARNING does not apply to odesc_bias :)
// This allows for reusing the function code (with a small exception in
// backward_filter)
Convolution(
cudnnDataType_t dataType, THVoidTensor* input, THVoidTensor* weight,
THVoidTensor* bias, THVoidTensor* output, int pad[2], int stride[2],
int groups, bool transposed);
};
Convolution* cudnn_convolution_init(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool transposed);
void cudnn_convolution_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* output,
Convolution* info, bool benchmark);
void cudnn_convolution_add_bias(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* bias, THVoidTensor* output,
Convolution* info);
void cudnn_convolution_backward_data(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* gradInput, THVoidTensor* weight,
Convolution* info, bool benchmark);
void cudnn_convolution_backward_filter(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* input, THVoidTensor* gradWeight,
Convolution* info, bool benchmark);
void cudnn_convolution_backward_bias(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* gradOutput, THVoidTensor* gradBias, Convolution* info);
// Helpers that allow to queue initialization, conv kernel and bias addition
// without reacquiring GIL in between.
Convolution* cudnn_convolution_full_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool benchmark);
Convolution* cudnn_convolution_transpose_full_forward(
THCState* state, cudnnHandle_t handle, cudnnDataType_t dataType,
THVoidTensor* input, THVoidTensor* weight, THVoidTensor* bias, THVoidTensor* output,
int padH, int padW, int dH, int dW, int groups, bool benchmark);
}} // namespace torch::cudnn
#endif

View File

@ -0,0 +1,101 @@
#include <Python.h>
#include <functional>
static PyObject* THPWrapperClass = NULL;
struct THPWrapper {
PyObject_HEAD
void *data;
void (*destructor)(void*);
};
PyObject * THPWrapper_New(void *data, void (*destructor)(void*))
{
PyObject *args = PyTuple_New(0);
if (!args) {
return NULL;
}
PyObject *result = PyObject_Call(THPWrapperClass, args, NULL);
if (result) {
THPWrapper* wrapper = (THPWrapper*) result;
wrapper->data = data;
wrapper->destructor = destructor;
}
Py_DECREF(args);
return result;
}
bool THPWrapper_check(PyObject * obj)
{
return (PyObject*)Py_TYPE(obj) == THPWrapperClass;
}
void * THPWrapper_get(PyObject * obj)
{
return ((THPWrapper*)obj)->data;
}
static PyObject * THPWrapper_pynew(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject* self = type->tp_alloc(type, 0);
THPWrapper* wrapper = (THPWrapper*) self;
wrapper->data = NULL;
wrapper->destructor = NULL;
return self;
}
static void THPWrapper_dealloc(THPWrapper* self)
{
self->destructor(self->data);
Py_TYPE(self)->tp_free((PyObject*)self);
}
PyTypeObject THPWrapperType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C._CppWrapper", /* tp_name */
sizeof(THPWrapper), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPWrapper_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPWrapper_pynew, /* tp_new */
};
bool THPWrapper_init(PyObject *module)
{
THPWrapperClass = (PyObject*)&THPWrapperType;
if (PyType_Ready(&THPWrapperType) < 0)
return false;
Py_INCREF(&THPWrapperType);
return true;
}

View File

@ -0,0 +1,16 @@
#ifndef THP_CUDNN_CPP_WRAPPER_INC
#define THP_CUDNN_CPP_WRAPPER_INC
#include <functional>
/**
* Python wrapper around arbitrary opaque C++ class
*/
bool THPWrapper_init(PyObject *module);
PyObject * THPWrapper_New(void *data, void (*destructor)(void*));
void * THPWrapper_get(PyObject * obj);
bool THPWrapper_check(PyObject * obj);
#endif

View File

@ -0,0 +1,74 @@
#ifndef THP_CUDNN_DESCRIPTORS_INC
#define THP_CUDNN_DESCRIPTORS_INC
#include "Exceptions.h"
#include <cudnn.h>
namespace torch { namespace cudnn {
struct TensorDescriptor
{
cudnnTensorDescriptor_t desc;
TensorDescriptor() : desc(NULL) {
CHECK(cudnnCreateTensorDescriptor(&desc));
}
TensorDescriptor(const TensorDescriptor&) = delete;
TensorDescriptor(TensorDescriptor&& ref)
{
desc = ref.desc;
ref.desc = NULL;
}
~TensorDescriptor() {
cudnnDestroyTensorDescriptor(desc);
}
void set(cudnnDataType_t dataType, int dim, int* size, int* stride) {
CHECK(cudnnSetTensorNdDescriptor(desc, dataType, dim, size, stride));
}
};
struct FilterDescriptor
{
cudnnFilterDescriptor_t desc;
FilterDescriptor() : desc(NULL) {
CHECK(cudnnCreateFilterDescriptor(&desc));
}
FilterDescriptor(const FilterDescriptor&) = delete;
FilterDescriptor(FilterDescriptor&& ref)
{
desc = ref.desc;
ref.desc = NULL;
}
~FilterDescriptor() {
cudnnDestroyFilterDescriptor(desc);
}
void set(cudnnDataType_t dataType, int* size) {
CHECK(cudnnSetFilterNdDescriptor(desc, dataType, CUDNN_TENSOR_NCHW, 4, size));
}
};
struct ConvolutionDescriptor
{
cudnnConvolutionDescriptor_t desc;
ConvolutionDescriptor() : desc(NULL) {
CHECK(cudnnCreateConvolutionDescriptor(&desc));
}
ConvolutionDescriptor(const ConvolutionDescriptor&) = delete;
ConvolutionDescriptor(ConvolutionDescriptor&& ref)
{
desc = ref.desc;
ref.desc = NULL;
}
~ConvolutionDescriptor() {
cudnnDestroyConvolutionDescriptor(desc);
}
void set(cudnnDataType_t dataType, int* pad, int* stride) {
int upscale[2] = {1, 1};
CHECK(cudnnSetConvolutionNdDescriptor(desc, 2, pad, stride, upscale,
CUDNN_CROSS_CORRELATION, dataType));
}
};
}} // namespace
#endif

View File

@ -0,0 +1,32 @@
#ifndef THP_CUDNN_EXCEPTIONS_INC
#define THP_CUDNN_EXCEPTIONS_INC
#include <cudnn.h>
#include <stdexcept>
namespace torch { namespace cudnn {
class cudnn_exception : public std::runtime_error {
public:
cudnnStatus_t status;
cudnn_exception(cudnnStatus_t status, const char* msg) : std::runtime_error(msg), status(status) {
}
};
inline void CHECK(cudnnStatus_t status)
{
if (status != CUDNN_STATUS_SUCCESS) {
throw cudnn_exception(status, cudnnGetErrorString(status));
}
}
inline void CUDA_CHECK(cudaError_t error)
{
if (error) {
throw std::runtime_error("CUDA error");
}
}
}} // namespace torch::cudnn
#endif

Some files were not shown because too many files have changed in this diff Show More