mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-25 08:11:06 +08:00
Compare commits
574 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b13b7010b9 | |||
| 5c79046d39 | |||
| 30fd222b80 | |||
| 761eef1f19 | |||
| 8aa1cefed8 | |||
| 0d908d813b | |||
| 1c391f6f93 | |||
| be146fd721 | |||
| 2979f4b989 | |||
| 22b3600f19 | |||
| 215813d7ac | |||
| dc7695a47a | |||
| 032a65edff | |||
| e4b4e515cd | |||
| 4b1f5f4bd6 | |||
| afd576ec0e | |||
| 95aa2af377 | |||
| 6774d39c96 | |||
| 567faedc59 | |||
| 3eab8a71e2 | |||
| 2fd4d088ff | |||
| 5d274cd499 | |||
| 8051dec608 | |||
| f2c1071c33 | |||
| bb71117ecc | |||
| d25433a099 | |||
| 7dd45490f8 | |||
| bf632544e6 | |||
| 282402d4f3 | |||
| cce03074f5 | |||
| f2f63773d8 | |||
| 84aa41824c | |||
| 25c8a117af | |||
| ae122707b5 | |||
| b4fe5ad641 | |||
| 5a761dbe65 | |||
| dd893391d5 | |||
| e8196f990d | |||
| 269b77a1b2 | |||
| 476d85dd3f | |||
| 63f6c0d692 | |||
| b546fa3fcd | |||
| 1d656b6769 | |||
| 3acbbb30f2 | |||
| 52911f9e47 | |||
| a65e0f488c | |||
| 8dc5d2a22e | |||
| bb353ccc17 | |||
| ced0054a9e | |||
| 68ee5ede29 | |||
| 4df98e2927 | |||
| 6ccac5ce28 | |||
| 3865606299 | |||
| d3334db627 | |||
| 50f5a4dd18 | |||
| b60936b9ae | |||
| 2d750b9da5 | |||
| ca376d4584 | |||
| ef183a1d23 | |||
| f4d8944973 | |||
| 6b7aef63ac | |||
| b3ab4b1094 | |||
| 1e8cb82a2d | |||
| dd399a8d68 | |||
| faac0f5c25 | |||
| c36f47bd1e | |||
| 3d1888cd95 | |||
| 97a82a3018 | |||
| 5cd313ed23 | |||
| b414494035 | |||
| c10efc646e | |||
| 348531ad8d | |||
| 714b2b8bf6 | |||
| fe4bd5066b | |||
| e17d84d38e | |||
| b9aef6bc03 | |||
| 0056b08834 | |||
| bd0df61bb5 | |||
| d9678c2e34 | |||
| b3c0aa3b7d | |||
| 77fbc12f23 | |||
| 7e46eb1613 | |||
| 821656d2d8 | |||
| 86e40ed875 | |||
| b9379cfab7 | |||
| f0b75c4aa4 | |||
| 7654b3f49e | |||
| 37ebbc2809 | |||
| 29ddbc3e37 | |||
| 16a133ed9a | |||
| c4d1318662 | |||
| 379ae6d865 | |||
| 24376ff9d3 | |||
| be6322e4b5 | |||
| 62063b2f62 | |||
| 13b1580613 | |||
| e50a1f19b3 | |||
| e86db387ba | |||
| 704ee3ca68 | |||
| 9004652c7b | |||
| aca6ce984c | |||
| ed8773f7bd | |||
| 48f48b6ff2 | |||
| 615b27eadf | |||
| 170d790b66 | |||
| e216f557fd | |||
| 997312c233 | |||
| d602b3a834 | |||
| f531d98341 | |||
| 6bdd5ecaf5 | |||
| bfbde9d6eb | |||
| b9c816a796 | |||
| 2f5c215d34 | |||
| 01650ac9de | |||
| ce536aa355 | |||
| fc0af33a18 | |||
| c7c4778af6 | |||
| 73a65cd29f | |||
| b785ed0ac0 | |||
| b2d077d81d | |||
| b1c2714ad5 | |||
| a462edd0f6 | |||
| c2425fc9a1 | |||
| fbcedf2da2 | |||
| 3d95e13b33 | |||
| 228e1a8696 | |||
| 3fa8a3ff46 | |||
| 4647f753bc | |||
| 7ba5e7cea1 | |||
| 9b626a8047 | |||
| bd0e9a73c7 | |||
| 2b1cd919ce | |||
| 8e46a15605 | |||
| 15a9fbdedb | |||
| 6336300880 | |||
| 5073132837 | |||
| 65b66264d4 | |||
| 0f872ed02f | |||
| 761d6799be | |||
| 0d179aa8db | |||
| 5b171ad7c2 | |||
| ac9245aeb3 | |||
| 60736bdf99 | |||
| 7d58765cee | |||
| 76f7d749e4 | |||
| 0b7374eb44 | |||
| 6fff764155 | |||
| 8ced72ccb8 | |||
| b1ae7f90d5 | |||
| 8b61ee522e | |||
| 76ca3eb191 | |||
| fea50a51ee | |||
| 51e589ed73 | |||
| 2e87643761 | |||
| ba9a85f271 | |||
| 0714d7a3ca | |||
| 34ce58c909 | |||
| c238ee3681 | |||
| f5338a1fb8 | |||
| d96ad41191 | |||
| f17cfe4293 | |||
| aec182ae72 | |||
| c93c884ee2 | |||
| c42a2d4d24 | |||
| f89252c336 | |||
| 490c15fae9 | |||
| f2d72ba10f | |||
| 2108b42b92 | |||
| bae8df62d3 | |||
| 98775b6bb4 | |||
| b7cc2a501f | |||
| 0720ba53b3 | |||
| ff5fa11129 | |||
| 5e7f5db332 | |||
| b5f7592140 | |||
| f366e5fc81 | |||
| 48f087f6ce | |||
| 7ad948ffa9 | |||
| 3277d83648 | |||
| 1487278fdf | |||
| 977630bc15 | |||
| 12efd53dba | |||
| 37e05485d9 | |||
| c76770f40e | |||
| da725830c2 | |||
| fc6fcf23f7 | |||
| b190f1b5bc | |||
| dfca8dfdc5 | |||
| b46d5e0b04 | |||
| f19a11a306 | |||
| cfcf69703f | |||
| e22b8e0d17 | |||
| fbfba6bdca | |||
| 3cc89afde6 | |||
| 1e4aee057c | |||
| 8dfcf7e35a | |||
| 76de151ddd | |||
| 2676cc46c2 | |||
| 1bf7bc9768 | |||
| 3c41c9fe46 | |||
| 6ff7750364 | |||
| 4d25c3d048 | |||
| 267b7ade50 | |||
| 80429ad9f7 | |||
| 5ca6516ecb | |||
| 67f94557ff | |||
| 61bd5a0643 | |||
| 748d011c8b | |||
| 5d5cfe2e57 | |||
| 7cbe255296 | |||
| 4ef303698c | |||
| 83e8b3f6c3 | |||
| 502ebed796 | |||
| 68ff58d771 | |||
| 969c1602e6 | |||
| 5e1d6a3691 | |||
| 533cfc0381 | |||
| 2b23712dc3 | |||
| 88275da5e8 | |||
| bd7a5ad6f0 | |||
| 1f6f82dbcf | |||
| 1f8939937a | |||
| b3d41a5f96 | |||
| fec2d493a9 | |||
| 86ee75f63f | |||
| 31941918cf | |||
| 19a65d2bea | |||
| 819d4b2b83 | |||
| b87c113cf4 | |||
| b25182971f | |||
| 1ee2c47e37 | |||
| 2dc563f1f1 | |||
| 15ba71a275 | |||
| e5b3fc49d6 | |||
| ae1766951d | |||
| 02d08dafd9 | |||
| 13a5090695 | |||
| 8e32e4c04c | |||
| cf991310c3 | |||
| 938706099e | |||
| 3330287dc7 | |||
| 38c8520adf | |||
| 492e1746af | |||
| 91a8109cfd | |||
| 161490d34a | |||
| 9c302852eb | |||
| 8654fcfd60 | |||
| b3d527d9a0 | |||
| 4d495218c9 | |||
| 13a041284c | |||
| c60c1a003d | |||
| 97add1a5ea | |||
| ca02930e47 | |||
| 20d5e95077 | |||
| eb4a7dc11d | |||
| f722498b72 | |||
| aadfb6fe83 | |||
| 6c273594c9 | |||
| e475c82fa1 | |||
| 0c2e6665df | |||
| 6295e6e94b | |||
| 670a4aa708 | |||
| 1bdc2e64ed | |||
| c587be1e50 | |||
| bd481596f5 | |||
| a504d56b43 | |||
| 91c4dfccea | |||
| 27f618c44d | |||
| a14482a1df | |||
| aa50c5734b | |||
| 293001a4fe | |||
| 638cfdf150 | |||
| 5f80a14525 | |||
| 1342fd3975 | |||
| 8d4af38489 | |||
| 575a064e66 | |||
| 3ab21a3c4f | |||
| 2f592e6c7d | |||
| 5661ffb766 | |||
| 9b74503daa | |||
| 24848f1cd8 | |||
| a31a07ede9 | |||
| c8c4c9b23d | |||
| e1ed9303f0 | |||
| a43aab13c2 | |||
| c698b4a45e | |||
| c6a0ffab50 | |||
| 8ba7cc30d1 | |||
| 61bf08ca24 | |||
| 6ada3c0c16 | |||
| 60061fbe79 | |||
| 46e7042add | |||
| d0c182773b | |||
| b6f60585b5 | |||
| 4b0e3ee219 | |||
| 838842d4b2 | |||
| e71cf20192 | |||
| adb4cb2b5b | |||
| 6073f9b46c | |||
| 8e8022b735 | |||
| da82d2dd70 | |||
| 82176473a5 | |||
| 2d269a9a72 | |||
| 240372a991 | |||
| 5b10411c8c | |||
| 4c474a9939 | |||
| 7ea6ae57c8 | |||
| 42633f8986 | |||
| 84248690a9 | |||
| 53409ca0fb | |||
| c2c1710047 | |||
| 876202503f | |||
| 946a7d9bc3 | |||
| 608bcd3b15 | |||
| 632b02a477 | |||
| 0db9c63300 | |||
| 873ed4e6b6 | |||
| 01bd43037d | |||
| 68c9e3f232 | |||
| a25c8555eb | |||
| dfd1dff383 | |||
| 8f391d4d51 | |||
| 2a6b7685ae | |||
| eb9573107d | |||
| ee43cd7adc | |||
| 4ca26fbc1b | |||
| c165226325 | |||
| 49295ebe54 | |||
| 455038e470 | |||
| ca7f02ea0c | |||
| 04aba1caec | |||
| f6c1bbfa48 | |||
| 4e2c8c6db5 | |||
| c26b9c0a5e | |||
| aaf41c61a6 | |||
| dd844f741b | |||
| 7117a9012e | |||
| 1bdc28161a | |||
| 5e150caf38 | |||
| c0c62d099a | |||
| b9ece39685 | |||
| 15ef008877 | |||
| b14d6318f8 | |||
| 7c44506441 | |||
| 937ba581d7 | |||
| 2ae54f1194 | |||
| a217fefee1 | |||
| 34b7fed802 | |||
| 5221745c21 | |||
| 000ca44b16 | |||
| 8f3d44033b | |||
| 7cc14c595a | |||
| 797544c47a | |||
| 0426f2f3ec | |||
| 336eeee895 | |||
| 593f867e3e | |||
| 385913be1c | |||
| 6aaa14f5fe | |||
| 07f5b21ef1 | |||
| e454870396 | |||
| 2822013437 | |||
| 72c1982734 | |||
| 0de2ea305a | |||
| d899385a3d | |||
| c6d6cbe8a6 | |||
| 85e82e85d8 | |||
| a1534cc37d | |||
| 8c8dc791ef | |||
| 63edca44f2 | |||
| 8d90ab2d9b | |||
| bd5303010d | |||
| 16d2c3d7b3 | |||
| 407a92dc26 | |||
| 0a893abc7b | |||
| 34fa5e0dc7 | |||
| 712686ce91 | |||
| 518864a7e0 | |||
| 750fb5cc73 | |||
| 0f4749907a | |||
| bd2dc63ef6 | |||
| 19a8795450 | |||
| d9dccfdd71 | |||
| 7547a06c4f | |||
| 8929b75795 | |||
| 4d37ef878c | |||
| 126e77d5c6 | |||
| 53eec78bea | |||
| a4edaec81a | |||
| 92481b59d3 | |||
| 6c77fa9121 | |||
| aeb7a72620 | |||
| 73d232ee45 | |||
| c0c65bf915 | |||
| f6cee952af | |||
| e74184f679 | |||
| 3884d36176 | |||
| e7c6886a00 | |||
| ed8e92f63d | |||
| fb97df5d65 | |||
| e9b05c71b4 | |||
| 7926324385 | |||
| 1527b37c26 | |||
| de4659659b | |||
| a96a8c8336 | |||
| 691aa19b88 | |||
| 6b07dc9e22 | |||
| 8aa259b52b | |||
| ac9312e9f8 | |||
| 91a17b702b | |||
| c54597e0b2 | |||
| a9785bba44 | |||
| 833b8cbc7a | |||
| 75aeb16e05 | |||
| fc354a0d6e | |||
| 262611fcd3 | |||
| b8a34f3033 | |||
| 10bb6bb9b8 | |||
| 3c9ef69c37 | |||
| dee987d6ee | |||
| 138f254ec1 | |||
| c7c8aaa7f0 | |||
| d0db624e02 | |||
| e3e7b76310 | |||
| dad02bceb9 | |||
| b195285879 | |||
| 8f3da5b51d | |||
| 825e919eb8 | |||
| acb0ce8885 | |||
| 72089c9c36 | |||
| cf2f158fec | |||
| 41ddc2a786 | |||
| e4886f6589 | |||
| 6470b5bd21 | |||
| 44196955e2 | |||
| f08ec1394d | |||
| f8fb25e0a2 | |||
| 6a0c66752f | |||
| a1bd4efb08 | |||
| b43ce05268 | |||
| 80e56cfda9 | |||
| 24701fc5a7 | |||
| f78a266d99 | |||
| f096fb6859 | |||
| a3e11d606b | |||
| 79232c24e2 | |||
| 15d9d499ab | |||
| 962084c8e8 | |||
| 7518b1eefb | |||
| 8215d7a4ba | |||
| 5aaa220d84 | |||
| 12c16ab9bc | |||
| 76520512e7 | |||
| 66de965882 | |||
| 10d32fb0b7 | |||
| e72c9b6e4a | |||
| ac1f68127a | |||
| 60d1852c7b | |||
| d53eb521fc | |||
| 9808932f10 | |||
| ea876eb6d5 | |||
| 0a45864866 | |||
| 2560b39796 | |||
| 21afa4c88b | |||
| 9fc3c5e4d2 | |||
| 3e3501c98d | |||
| 5e6fcd02b5 | |||
| d46ebcfadf | |||
| 41480c8cf2 | |||
| 236890d902 | |||
| 55632d81d2 | |||
| 0b276d622e | |||
| c81491b37d | |||
| 42e189425f | |||
| 3cfa0d7199 | |||
| 7c9e088661 | |||
| e78aa4bb84 | |||
| f8e94d0d8b | |||
| ebe6f40fce | |||
| 5fb37efb46 | |||
| 4f47855873 | |||
| 52ae6f682f | |||
| c35f58f97b | |||
| 659b2f3154 | |||
| 5ea05cfb96 | |||
| dc9a5b7d2f | |||
| f7ab5a128a | |||
| 368cbe615d | |||
| d4c9a3782b | |||
| 172dca5e8b | |||
| 818bf0c408 | |||
| 03dcf8a83b | |||
| 604f607fd1 | |||
| 956d946c25 | |||
| 970caaa621 | |||
| 00a5980cdf | |||
| e24eee04f0 | |||
| f1b3af4ee2 | |||
| fb2d28f477 | |||
| 3a704ff725 | |||
| 0180e638e5 | |||
| 95c6ae04fb | |||
| 27c4c6e0af | |||
| da17414b3f | |||
| be2b27a747 | |||
| aec2c8f752 | |||
| 13e34b4679 | |||
| 57373c7c29 | |||
| 79f5bf84e5 | |||
| 3ed720079e | |||
| e7c1e6a8e3 | |||
| f1d0d73ed7 | |||
| 9c411513bf | |||
| ce78bc898b | |||
| 887002e932 | |||
| 31dea5ff23 | |||
| ec4602a973 | |||
| a38749d15f | |||
| 6ee77b4edd | |||
| 343d65db91 | |||
| 6328981fcf | |||
| a90913105c | |||
| 9368596059 | |||
| 80ed795ff1 | |||
| a2938e3d11 | |||
| 2ad967dbe4 | |||
| 7415c090ac | |||
| a1fa995044 | |||
| 3c2ecc6b15 | |||
| fa1516d319 | |||
| 5e26f49db4 | |||
| 7694f65120 | |||
| b5ebf68df1 | |||
| aa46055274 | |||
| 2cad802b68 | |||
| 2d01f384f1 | |||
| f8d4f980b3 | |||
| 4f5a6c366e | |||
| ecfcf39f30 | |||
| 3975a2676e | |||
| 138ee75a3b | |||
| 0048f228cb | |||
| 2748b920ab | |||
| a92a2312d4 | |||
| 945ce5cdb0 | |||
| b39de2cbbe | |||
| 49a555e0f5 | |||
| ce13900148 | |||
| 4c77ad6ee4 | |||
| 0bc4246425 | |||
| c45ff2efe6 | |||
| 99b520cc5d | |||
| e05607aee1 | |||
| a360ba1734 | |||
| c661b963b9 | |||
| e374dc1696 | |||
| 116e0c7f38 | |||
| 45596d5289 | |||
| 342e7b873d | |||
| 00410c4496 | |||
| 8b9276bbee | |||
| 3238786ea1 | |||
| 07ebbcbcb3 | |||
| ca555abcf9 | |||
| 63893c3fa2 | |||
| f8ae34706e | |||
| 7179002bfb | |||
| 43b5be1d78 | |||
| b5f6fdb814 | |||
| a69d819901 | |||
| fef2b1526d | |||
| 3719994c96 | |||
| 4461ae8090 | |||
| 2b948c42cd | |||
| b2ae054410 |
6
.gitignore
vendored
6
.gitignore
vendored
@ -2,6 +2,7 @@ build/
|
||||
dist/
|
||||
torch.egg-info/
|
||||
*/**/__pycache__
|
||||
torch/version.py
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
torch/lib/*.so*
|
||||
torch/lib/*.dylib*
|
||||
@ -15,8 +16,12 @@ torch/csrc/nn/THNN.cwrap
|
||||
torch/csrc/nn/THNN.cpp
|
||||
torch/csrc/nn/THCUNN.cwrap
|
||||
torch/csrc/nn/THCUNN.cpp
|
||||
torch/csrc/nn/THNN_generic.cwrap
|
||||
torch/csrc/nn/THNN_generic.cpp
|
||||
torch/csrc/nn/THNN_generic.h
|
||||
docs/src/**/*
|
||||
test/data/legacy_modules.t7
|
||||
test/data/gpu_tensors.pt
|
||||
test/htmlcov
|
||||
test/.coverage
|
||||
*/*.pyc
|
||||
@ -27,3 +32,4 @@ test/.coverage
|
||||
*/*.so*
|
||||
*/**/*.so*
|
||||
*/**/*.dylib*
|
||||
test/data/legacy_serialized.pt
|
||||
|
||||
26
.travis.yml
26
.travis.yml
@ -4,16 +4,26 @@ python:
|
||||
- 2.7.8
|
||||
- 2.7
|
||||
- 3.5
|
||||
- 3.6
|
||||
- nightly
|
||||
|
||||
cache:
|
||||
- ccache
|
||||
- directories:
|
||||
- $HOME/.ccache
|
||||
|
||||
install:
|
||||
- export CC="gcc-4.8"
|
||||
- export CXX="g++-4.8"
|
||||
- travis_retry pip install -r requirements.txt
|
||||
- travis_retry pip install .
|
||||
- unset CCACHE_DISABLE
|
||||
- export CCACHE_DIR=$HOME/.ccache
|
||||
- export CC="ccache gcc-4.8"
|
||||
- export CXX="ccache g++-4.8"
|
||||
- ccache --show-stats
|
||||
- travis_retry pip install --upgrade pip setuptools wheel
|
||||
- travis_retry pip install -r requirements.txt --only-binary=scipy
|
||||
- python setup.py install
|
||||
|
||||
script:
|
||||
- ./test/run_test.sh
|
||||
- OMP_NUM_THREADS=2 ./test/run_test.sh
|
||||
|
||||
addons:
|
||||
apt:
|
||||
@ -30,3 +40,9 @@ sudo: false
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
env: LINT_CHECK
|
||||
python: "2.7"
|
||||
addons: true
|
||||
install: pip install flake8
|
||||
script: flake8
|
||||
|
||||
74
CONTRIBUTING.md
Normal file
74
CONTRIBUTING.md
Normal file
@ -0,0 +1,74 @@
|
||||
## Contributing to PyTorch
|
||||
|
||||
If you are interested in contributing to PyTorch, your contributions will fall
|
||||
into two categories:
|
||||
1. You want to propose a new Feature and implement it
|
||||
- post about your intended feature, and we shall discuss the design and
|
||||
implementation. Once we agree that the plan looks good, go ahead and implement it.
|
||||
2. You want to implement a feature or bug-fix for an outstanding issue
|
||||
- Look at the outstanding issues here: https://github.com/pytorch/pytorch/issues
|
||||
- Especially look at the Low Priority and Medium Priority issues
|
||||
- Pick an issue and comment on the task that you want to work on this feature
|
||||
- If you need more context on a particular issue, please ask and we shall provide.
|
||||
|
||||
Once you finish implementing a feature or bugfix, please send a Pull Request to
|
||||
https://github.com/pytorch/pytorch
|
||||
|
||||
If you are not familiar with creating a Pull Request, here are some guides:
|
||||
- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request
|
||||
- https://help.github.com/articles/creating-a-pull-request/
|
||||
|
||||
|
||||
## Developing locally with PyTorch
|
||||
|
||||
To locally develop with PyTorch, here are some tips:
|
||||
|
||||
1. Uninstall all existing pytorch installs
|
||||
```
|
||||
conda uninstall pytorch
|
||||
pip uninstall torch
|
||||
pip uninstall torch # run this command twice
|
||||
```
|
||||
|
||||
2. Locally clone a copy of PyTorch from source:
|
||||
|
||||
```
|
||||
git clone https://github.com/pytorch/pytorch
|
||||
cd pytorch
|
||||
```
|
||||
|
||||
3. Install PyTorch in `build develop` mode:
|
||||
|
||||
A full set of instructions on installing PyTorch from Source are here:
|
||||
https://github.com/pytorch/pytorch#from-source
|
||||
|
||||
The change you have to make is to replace
|
||||
|
||||
`python setup.py install`
|
||||
|
||||
with
|
||||
|
||||
```
|
||||
python setup.py build develop
|
||||
```
|
||||
|
||||
This is especially useful if you are only changing Python files.
|
||||
|
||||
This mode will symlink the python files from the current local source tree into the
|
||||
python install.
|
||||
|
||||
Hence, if you modify a python file, you do not need to reinstall pytorch again and again.
|
||||
|
||||
For example:
|
||||
- Install local pytorch in `build develop` mode
|
||||
- modify your python file torch/__init__.py (for example)
|
||||
- test functionality
|
||||
- modify your python file torch/__init__.py
|
||||
- test functionality
|
||||
- modify your python file torch/__init__.py
|
||||
- test functionality
|
||||
|
||||
You do not need to repeatedly install after modifying python files.
|
||||
|
||||
|
||||
Hope this helps, and thanks for considering to contribute.
|
||||
38
Dockerfile
Normal file
38
Dockerfile
Normal file
@ -0,0 +1,38 @@
|
||||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
|
||||
|
||||
ENV CUDNN_VERSION 6.0.20
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
curl \
|
||||
ca-certificates \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libcudnn6=$CUDNN_VERSION-1+cuda8.0 \
|
||||
libcudnn6-dev=$CUDNN_VERSION-1+cuda8.0 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
|
||||
chmod +x ~/miniconda.sh && \
|
||||
~/miniconda.sh -b -p /opt/conda && \
|
||||
rm ~/miniconda.sh && \
|
||||
/opt/conda/bin/conda install conda-build && \
|
||||
/opt/conda/bin/conda create -y --name pytorch-py35 python=3.5.2 numpy scipy ipython mkl&& \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
ENV PATH /opt/conda/envs/pytorch-py35/bin:$PATH
|
||||
RUN conda install --name pytorch-py35 -c soumith magma-cuda80
|
||||
# This must be done before pip so that requirements.txt is available
|
||||
WORKDIR /opt/pytorch
|
||||
COPY . .
|
||||
|
||||
RUN cat requirements.txt | xargs -n1 pip install --no-cache-dir && \
|
||||
TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
||||
CMAKE_LIBRARY_PATH=/opt/conda/envs/pytorch-py35/lib \
|
||||
CMAKE_INCLUDE_PATH=/opt/conda/envs/pytorch-py35/include \
|
||||
pip install -v .
|
||||
|
||||
WORKDIR /workspace
|
||||
RUN chmod -R a+w /workspace
|
||||
93
README.md
93
README.md
@ -14,31 +14,48 @@ We are in an early-release Beta. Expect some adventures and rough edges.
|
||||
- [Installation](#installation)
|
||||
- [Binaries](#binaries)
|
||||
- [From source](#from-source)
|
||||
- [Docker image](#docker-image)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Communication](#communication)
|
||||
- [Releases and Contributing](#releases-and-contributing)
|
||||
- [The Team](#the-team)
|
||||
|
||||
| Python | **`Linux CPU`** | **`Linux GPU`** |
|
||||
|--------|--------------------|------------------|
|
||||
| 2.7.8 | [](https://travis-ci.com/apaszke/pytorch) | |
|
||||
| 2.7 | [](https://travis-ci.com/apaszke/pytorch) | [](https://build.pytorch.org/job/pytorch-master-py2) |
|
||||
| 3.5 | [](https://travis-ci.com/apaszke/pytorch) | [](https://build.pytorch.org/job/pytorch-master-py3) |
|
||||
| Nightly| [](https://travis-ci.com/apaszke/pytorch) | |
|
||||
| System | Python | Status |
|
||||
| --- | --- | --- |
|
||||
| Linux CPU | 2.7.8, 2.7, 3.5, nightly | [](https://travis-ci.org/pytorch/pytorch) |
|
||||
| Linux GPU | 2.7 | [](https://build.pytorch.org/job/pytorch-master-py2) |
|
||||
| Linux GPU | 3.5 | [](https://build.pytorch.org/job/pytorch-master-py3) |
|
||||
|
||||
## More about PyTorch
|
||||
|
||||
At a granular level, PyTorch is a library that consists of the following components:
|
||||
|
||||
| \_ | \_ |
|
||||
| ------------------------ | --- |
|
||||
| torch | a Tensor library like NumPy, with strong GPU support |
|
||||
| torch.autograd | a tape based automatic differentiation library that supports all differentiable Tensor operations in torch |
|
||||
| torch.nn | a neural networks library deeply integrated with autograd designed for maximum flexibility |
|
||||
| torch.optim | an optimization package to be used with torch.nn with standard optimization methods such as SGD, RMSProp, LBFGS, Adam etc. |
|
||||
| torch.multiprocessing | python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. |
|
||||
| torch.utils | DataLoader, Trainer and other utility functions for convenience |
|
||||
| torch.legacy(.nn/.optim) | legacy code that has been ported over from torch for backward compatibility reasons |
|
||||
<table>
|
||||
<tr>
|
||||
<td><b> torch </b></td>
|
||||
<td> a Tensor library like NumPy, with strong GPU support </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.autograd </b></td>
|
||||
<td> a tape based automatic differentiation library that supports all differentiable Tensor operations in torch </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.nn </b></td>
|
||||
<td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.multiprocessing </b></td>
|
||||
<td> python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and hogwild training. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.utils </b></td>
|
||||
<td> DataLoader, Trainer and other utility functions for convenience </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b> torch.legacy(.nn/.optim) </b></td>
|
||||
<td> legacy code that has been ported over from torch for backward compatibility reasons </td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Usually one uses PyTorch either as:
|
||||
|
||||
@ -101,7 +118,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
|
||||
|
||||
PyTorch has minimal framework overhead. We integrate acceleration libraries
|
||||
such as Intel MKL and NVIDIA (CuDNN, NCCL) to maximize speed.
|
||||
At the core, it's CPU and GPU Tensor and Neural Network backends
|
||||
At the core, its CPU and GPU Tensor and Neural Network backends
|
||||
(TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API.
|
||||
They are mature and have been tested for years.
|
||||
|
||||
@ -118,41 +135,52 @@ Writing new neural network modules, or interfacing with PyTorch's Tensor API was
|
||||
and with minimal abstractions.
|
||||
|
||||
You can write new neural network layers in Python using the torch API
|
||||
[or your favorite numpy based libraries such as SciPy](https://github.com/pytorch/tutorials/blob/master/Creating%20extensions%20using%20numpy%20and%20scipy.ipynb).
|
||||
[or your favorite numpy based libraries such as SciPy](http://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html).
|
||||
|
||||
If you want to write your layers in C/C++, we provide an extension API based on
|
||||
[cffi](http://cffi.readthedocs.io/en/latest/) that is efficient and with minimal boilerplate.
|
||||
There is no wrapper code that needs to be written. [You can see an example here](https://github.com/pytorch/extension-ffi).
|
||||
There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/c_extension.html) and [an example here](https://github.com/pytorch/extension-ffi).
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
### Binaries
|
||||
- Anaconda
|
||||
```bash
|
||||
conda install pytorch torchvision -c soumith
|
||||
```
|
||||
Commands to install from binaries via Conda or pip wheels are on our website:
|
||||
|
||||
[http://pytorch.org](http://pytorch.org)
|
||||
|
||||
### From source
|
||||
|
||||
Instructions for an Anaconda environment.
|
||||
If you are installing from source, we highly recommend installing an [Anaconda](https://www.continuum.io/downloads) environment.
|
||||
You will get a high-quality BLAS library (MKL) and you get a controlled compiler version regardless of your Linux distro.
|
||||
|
||||
Once you have [anaconda](https://www.continuum.io/downloads) installed, here are the instructions.
|
||||
|
||||
If you want to compile with CUDA support, install
|
||||
- [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) 7.5 or above
|
||||
- [NVIDIA CuDNN](https://developer.nvidia.com/cudnn) v5.x
|
||||
|
||||
If you want to disable CUDA support, export environment variable `NO_CUDA=1`.
|
||||
|
||||
#### Install optional dependencies
|
||||
|
||||
On Linux
|
||||
```bash
|
||||
export CMAKE_PREFIX_PATH=[anaconda root directory]
|
||||
|
||||
# Install basic dependencies
|
||||
conda install numpy mkl setuptools cmake gcc cffi
|
||||
|
||||
# On Linux, add LAPACK support for the GPU
|
||||
# Add LAPACK support for the GPU
|
||||
conda install -c soumith magma-cuda75 # or magma-cuda80 if CUDA 8.0
|
||||
```
|
||||
|
||||
On OSX
|
||||
```bash
|
||||
export CMAKE_PREFIX_PATH=[anaconda root directory]
|
||||
conda install numpy setuptools cmake cffi
|
||||
```
|
||||
|
||||
#### Install PyTorch
|
||||
```bash
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.9 # if OSX
|
||||
@ -160,10 +188,25 @@ pip install -r requirements.txt
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
### Docker image
|
||||
|
||||
Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual
|
||||
```
|
||||
docker build -t pytorch-cudnnv6 .
|
||||
```
|
||||
and run with nvidia-docker:
|
||||
```
|
||||
nvidia-docker run --rm -ti --ipc=host pytorch-cudnnv5
|
||||
```
|
||||
Please note that pytorch uses shared memory to share data between processes, so if torch multiprocessing is used (e.g.
|
||||
for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you
|
||||
should increase shared memory size either with --ipc=host or --shm-size command line options to nvidia-docker run.
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
Three pointers to get you started:
|
||||
- [Tutorials: notebooks to get you started with understanding and using PyTorch](https://github.com/pytorch/tutorials)
|
||||
- [Tutorials: get you started with understanding and using PyTorch](http://pytorch.org/tutorials/)
|
||||
- [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples)
|
||||
- The API Reference: [http://pytorch.org/docs/](http://pytorch.org/docs/)
|
||||
|
||||
|
||||
@ -63,11 +63,16 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
|
||||
"}\n")
|
||||
|
||||
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
|
||||
"-ccbin" ${CMAKE_CXX_COMPILER}
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
|
||||
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if(nvcc_res EQUAL 0)
|
||||
# only keep the last line of nvcc_out
|
||||
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
|
||||
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
|
||||
list(GET nvcc_out -1 nvcc_out)
|
||||
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
|
||||
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
|
||||
endif()
|
||||
@ -116,13 +121,13 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
|
||||
set(add_ptx TRUE)
|
||||
set(arch_name ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
if(arch_name MATCHES "([0-9]\\.[0-9])$")
|
||||
if(arch_name MATCHES "(^[0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
|
||||
set(arch_bin ${CMAKE_MATCH_1})
|
||||
set(arch_ptx ${arch_bin})
|
||||
else()
|
||||
# Look for it in our list of known architectures
|
||||
if(${arch_name} STREQUAL "Fermi")
|
||||
set(arch_bin 2.0 "2.1(2.0)")
|
||||
set(arch_bin "2.0 2.1(2.0)")
|
||||
elseif(${arch_name} STREQUAL "Kepler+Tegra")
|
||||
set(arch_bin 3.2)
|
||||
elseif(${arch_name} STREQUAL "Kepler+Tesla")
|
||||
@ -173,11 +178,11 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
|
||||
# Tell NVCC to add binaries for the specified GPUs
|
||||
foreach(arch ${cuda_arch_bin})
|
||||
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
|
||||
# User explicitly specified PTX for the concrete BIN
|
||||
# User explicitly specified ARCH for the concrete CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
|
||||
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
|
||||
else()
|
||||
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
|
||||
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
|
||||
list(APPEND nvcc_archs_readable sm_${arch})
|
||||
endif()
|
||||
|
||||
@ -74,9 +74,11 @@ author = 'Torch Contributors'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.1.6'
|
||||
# TODO: change to [:2] at v1.0
|
||||
version = '.'.join(torch.__version__.split('+')[0].split('.')[:3])
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '0.1.6'
|
||||
# TODO: verify this works as expected
|
||||
release = torch.__version__.split('+')[0]
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
@ -201,12 +203,13 @@ from docutils import nodes
|
||||
from sphinx.util.docfields import TypedField
|
||||
from sphinx import addnodes
|
||||
|
||||
|
||||
def patched_make_field(self, types, domain, items):
|
||||
# type: (List, unicode, Tuple) -> nodes.field
|
||||
def handle_item(fieldarg, content):
|
||||
par = nodes.paragraph()
|
||||
par += addnodes.literal_strong('', fieldarg) # Patch: this line added
|
||||
#par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
|
||||
# par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
|
||||
# addnodes.literal_strong))
|
||||
if fieldarg in types:
|
||||
par += nodes.Text(' (')
|
||||
|
||||
@ -5,3 +5,8 @@ torch.utils.data
|
||||
.. autoclass:: Dataset
|
||||
.. autoclass:: TensorDataset
|
||||
.. autoclass:: DataLoader
|
||||
.. autoclass:: torch.utils.data.sampler.Sampler
|
||||
.. autoclass:: torch.utils.data.sampler.SequentialSampler
|
||||
.. autoclass:: torch.utils.data.sampler.RandomSampler
|
||||
.. autoclass:: torch.utils.data.sampler.SubsetRandomSampler
|
||||
.. autoclass:: torch.utils.data.sampler.WeightedRandomSampler
|
||||
|
||||
@ -7,6 +7,12 @@ torch.nn
|
||||
.. automodule:: torch.nn
|
||||
.. currentmodule:: torch.nn
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
.. autoclass:: Parameter
|
||||
:members:
|
||||
|
||||
Containers
|
||||
----------------------------------
|
||||
|
||||
@ -16,6 +22,24 @@ Containers
|
||||
.. autoclass:: Module
|
||||
:members:
|
||||
|
||||
:hidden:`Sequential`
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: Sequential
|
||||
:members:
|
||||
|
||||
:hidden:`ModuleList`
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ModuleList
|
||||
:members:
|
||||
|
||||
:hidden:`ParameterList`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: ParameterList
|
||||
:members:
|
||||
|
||||
Convolution Layers
|
||||
----------------------------------
|
||||
|
||||
@ -126,6 +150,31 @@ Pooling Layers
|
||||
.. autoclass:: LPPool2d
|
||||
:members:
|
||||
|
||||
:hidden:`AdaptiveMaxPool1d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: AdaptiveMaxPool1d
|
||||
:members:
|
||||
|
||||
:hidden:`AdaptiveMaxPool2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: AdaptiveMaxPool2d
|
||||
:members:
|
||||
|
||||
:hidden:`AdaptiveAvgPool1d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: AdaptiveAvgPool1d
|
||||
:members:
|
||||
|
||||
:hidden:`AdaptiveAvgPool2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: AdaptiveAvgPool2d
|
||||
:members:
|
||||
|
||||
|
||||
Non-linear Activations
|
||||
----------------------------------
|
||||
|
||||
@ -334,6 +383,15 @@ Sparse layers
|
||||
.. autoclass:: Embedding
|
||||
:members:
|
||||
|
||||
Distance functions
|
||||
----------------------------------
|
||||
|
||||
:hidden:`PairwiseDistance`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: PairwiseDistance
|
||||
:members:
|
||||
|
||||
|
||||
Loss functions
|
||||
----------------------------------
|
||||
@ -362,6 +420,12 @@ Loss functions
|
||||
.. autoclass:: NLLLoss
|
||||
:members:
|
||||
|
||||
:hidden:`NLLLoss2d`
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: NLLLoss2d
|
||||
:members:
|
||||
|
||||
:hidden:`KLDivLoss`
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@ -432,6 +496,19 @@ Vision layers
|
||||
.. autoclass:: PixelShuffle
|
||||
:members:
|
||||
|
||||
:hidden:`UpsamplingNearest2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: UpsamplingNearest2d
|
||||
:members:
|
||||
|
||||
:hidden:`UpsamplingBilinear2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: UpsamplingBilinear2d
|
||||
:members:
|
||||
|
||||
|
||||
Multi-GPU layers
|
||||
----------------
|
||||
|
||||
@ -441,6 +518,36 @@ Multi-GPU layers
|
||||
.. autoclass:: DataParallel
|
||||
:members:
|
||||
|
||||
|
||||
Utilities
|
||||
---------
|
||||
|
||||
:hidden:`clip_grad_norm`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.clip_grad_norm
|
||||
|
||||
|
||||
.. currentmodule:: torch.nn.utils.rnn
|
||||
|
||||
:hidden:`PackedSequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.PackedSequence
|
||||
|
||||
|
||||
:hidden:`pack_padded_sequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.pack_padded_sequence
|
||||
|
||||
|
||||
:hidden:`pad_packed_sequence`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: torch.nn.utils.rnn.pad_packed_sequence
|
||||
|
||||
|
||||
torch.nn.functional
|
||||
===================
|
||||
|
||||
@ -532,6 +639,27 @@ Pooling functions
|
||||
|
||||
.. autofunction:: lp_pool2d
|
||||
|
||||
:hidden:`adaptive_max_pool1d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: adaptive_max_pool1d
|
||||
|
||||
:hidden:`adaptive_max_pool2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: adaptive_max_pool2d
|
||||
|
||||
:hidden:`adaptive_avg_pool1d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: adaptive_avg_pool1d
|
||||
|
||||
:hidden:`adaptive_avg_pool2d`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: adaptive_avg_pool2d
|
||||
|
||||
|
||||
Non-linear activation functions
|
||||
-------------------------------
|
||||
|
||||
@ -655,6 +783,15 @@ Dropout functions
|
||||
|
||||
.. autofunction:: dropout
|
||||
|
||||
Distance functions
|
||||
----------------------------------
|
||||
|
||||
:hidden:`pairwise_distance`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: pairwise_distance
|
||||
|
||||
|
||||
Loss functions
|
||||
--------------
|
||||
|
||||
@ -691,3 +828,22 @@ Vision functions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: pixel_shuffle
|
||||
|
||||
:hidden:`pad`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: pad
|
||||
|
||||
torch.nn.init
|
||||
=============
|
||||
|
||||
.. currentmodule:: torch.nn.init
|
||||
.. autofunction:: uniform
|
||||
.. autofunction:: normal
|
||||
.. autofunction:: constant
|
||||
.. autofunction:: xavier_uniform
|
||||
.. autofunction:: xavier_normal
|
||||
.. autofunction:: kaiming_uniform
|
||||
.. autofunction:: kaiming_normal
|
||||
.. autofunction:: orthogonal
|
||||
.. autofunction:: sparse
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
.. _cuda-semantics:
|
||||
|
||||
CUDA semantics
|
||||
==============
|
||||
|
||||
@ -29,12 +31,15 @@ Below you can find a small example showcasing this::
|
||||
b = torch.FloatTensor(1).cuda()
|
||||
# a.get_device() == b.get_device() == 1
|
||||
|
||||
c = a + b
|
||||
# c.get_device() == 1
|
||||
|
||||
z = x + y
|
||||
# z.get_device() == 1
|
||||
# z.get_device() == 0
|
||||
|
||||
# even within a context, you can give a GPU id to the .cuda call
|
||||
c = torch.randn(2).cuda(2)
|
||||
# c.get_device() == 2
|
||||
d = torch.randn(2).cuda(2)
|
||||
# d.get_device() == 2
|
||||
|
||||
Best practices
|
||||
--------------
|
||||
@ -57,4 +62,22 @@ Just pass an additional ``async=True`` argument to a :meth:`~torch.Tensor.cuda`
|
||||
call. This can be used to overlap data transfers with computation.
|
||||
|
||||
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
|
||||
pinned memory by passing ``pinned=True`` to its constructor.
|
||||
pinned memory by passing ``pin_memory=True`` to its constructor.
|
||||
|
||||
.. _cuda-nn-dataparallel-instead:
|
||||
|
||||
Use nn.DataParallel instead of multiprocessing
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Most use cases involving batched input and multiple GPUs should default to using
|
||||
:class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL,
|
||||
a single python process can saturate multiple GPUs.
|
||||
|
||||
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
|
||||
However, this is a known issue that is under active development. As always,
|
||||
test your use case.
|
||||
|
||||
There are significant caveats to using CUDA models with
|
||||
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
|
||||
requirements exactly, it is likely that your program will have incorrect or
|
||||
undefined behavior.
|
||||
|
||||
@ -86,6 +86,19 @@ small helper functions::
|
||||
# return it.
|
||||
return Linear()(input, weight, bias)
|
||||
|
||||
You probably want to check if the backward method you implemented actually
|
||||
computes the derivatives of your function. It is possible by comparing with
|
||||
numerical approximations using small finite differences::
|
||||
|
||||
from torch.autograd import gradcheck
|
||||
|
||||
# gradchek takes a tuple of tensor as input, check if your gradient
|
||||
# evaluated with these tensors are close enough to numerical
|
||||
# approximations and returns True if they all verify this condition.
|
||||
input = (Variable(torch.randn(20,20).double(), requires_grad=True),)
|
||||
test = gradcheck.gradcheck(Linear(), input, eps=1e-6, atol=1e-4)
|
||||
print(test)
|
||||
|
||||
Extending :mod:`torch.nn`
|
||||
-------------------------
|
||||
|
||||
@ -132,7 +145,7 @@ This is how a ``Linear`` module can be implemented::
|
||||
# nn.Parameters can never be volatile and, different than Variables,
|
||||
# they require gradients by default.
|
||||
self.weight = nn.Parameter(torch.Tensor(input_features, output_features))
|
||||
if bias is not None:
|
||||
if bias:
|
||||
self.bias = nn.Parameter(torch.Tensor(output_features))
|
||||
else:
|
||||
# You should always register all possible parameters, but the
|
||||
|
||||
@ -33,6 +33,8 @@ by the CUDA runtime.
|
||||
kinds of data should be done with care. Note that this restriction doesn't
|
||||
apply to shared CPU memory.
|
||||
|
||||
See also: :ref:`cuda-nn-dataparallel-instead`
|
||||
|
||||
|
||||
Best practices and tips
|
||||
-----------------------
|
||||
@ -100,11 +102,6 @@ example below as well::
|
||||
from model import MyModel
|
||||
|
||||
def train(model):
|
||||
# This for loop will break sharing of gradient buffers. It's not
|
||||
# necessary but it reduces the contention, and has a small memory cost
|
||||
# (equal to the total size of parameters).
|
||||
for param in model.parameters():
|
||||
param.grad.data = param.grad.data.clone()
|
||||
# Construct data_loader, optimizer, etc.
|
||||
for data, labels in data_loader:
|
||||
optimizer.zero_grad()
|
||||
|
||||
34
docs/source/notes/serialization.rst
Normal file
34
docs/source/notes/serialization.rst
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
Serialization semantics
|
||||
=======================
|
||||
|
||||
Best practices
|
||||
--------------
|
||||
|
||||
.. _recommend-saving-models:
|
||||
|
||||
Recommended approach for saving a model
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
There are two main approaches for serializing and restoring a model.
|
||||
|
||||
The first (recommended) saves and loads only the model parameters::
|
||||
|
||||
torch.save(the_model.state_dict(), PATH)
|
||||
|
||||
Then later::
|
||||
|
||||
the_model = TheModelClass(*args, **kwargs)
|
||||
the_model.load_state_dict(torch.load(PATH))
|
||||
|
||||
The second saves and loads the entire model::
|
||||
|
||||
torch.save(the_model, PATH)
|
||||
|
||||
Then later::
|
||||
|
||||
the_model = torch.load(PATH)
|
||||
|
||||
However in this case, the serialized data is bound to the specific classes
|
||||
and the exact directory structure used, so it can break in various ways when
|
||||
used in other projects, or after some serious refactors.
|
||||
@ -106,6 +106,8 @@ Algorithms
|
||||
:members:
|
||||
.. autoclass:: ASGD
|
||||
:members:
|
||||
.. autoclass:: LBFGS
|
||||
:members:
|
||||
.. autoclass:: RMSprop
|
||||
:members:
|
||||
.. autoclass:: Rprop
|
||||
|
||||
@ -14,8 +14,8 @@ Data type CPU tensor GPU tensor
|
||||
32-bit floating point :class:`torch.FloatTensor` :class:`torch.cuda.FloatTensor`
|
||||
64-bit floating point :class:`torch.DoubleTensor` :class:`torch.cuda.DoubleTensor`
|
||||
16-bit floating point N/A :class:`torch.cuda.HalfTensor`
|
||||
8-bit integer (signed) :class:`torch.ByteTensor` :class:`torch.cuda.ByteTensor`
|
||||
8-bit integer (unsigned) :class:`torch.CharTensor` :class:`torch.cuda.CharTensor`
|
||||
8-bit integer (unsigned) :class:`torch.ByteTensor` :class:`torch.cuda.ByteTensor`
|
||||
8-bit integer (signed) :class:`torch.CharTensor` :class:`torch.cuda.CharTensor`
|
||||
16-bit integer (signed) :class:`torch.ShortTensor` :class:`torch.cuda.ShortTensor`
|
||||
32-bit integer (signed) :class:`torch.IntTensor` :class:`torch.cuda.IntTensor`
|
||||
64-bit integer (signed) :class:`torch.LongTensor` :class:`torch.cuda.LongTensor`
|
||||
@ -251,7 +251,6 @@ view of a storage and defines numeric operations on it.
|
||||
.. automethod:: scatter_
|
||||
.. automethod:: select
|
||||
.. automethod:: set_
|
||||
.. automethod:: set_index
|
||||
.. automethod:: share_memory_
|
||||
.. automethod:: short
|
||||
.. automethod:: sigmoid
|
||||
|
||||
@ -8,6 +8,7 @@ Tensors
|
||||
.. autofunction:: is_storage
|
||||
.. autofunction:: set_default_tensor_type
|
||||
.. autofunction:: numel
|
||||
.. autofunction:: set_printoptions
|
||||
|
||||
|
||||
Creation Ops
|
||||
@ -37,6 +38,8 @@ Indexing, Slicing, Joining, Mutating Ops
|
||||
.. autofunction:: stack
|
||||
.. autofunction:: t
|
||||
.. autofunction:: transpose
|
||||
.. autofunction:: unbind
|
||||
.. autofunction:: unsqueeze
|
||||
|
||||
|
||||
Random sampling
|
||||
@ -157,6 +160,8 @@ BLAS and LAPACK Operations
|
||||
.. autofunction:: addr
|
||||
.. autofunction:: baddbmm
|
||||
.. autofunction:: bmm
|
||||
.. autofunction:: btrifact
|
||||
.. autofunction:: btrisolve
|
||||
.. autofunction:: dot
|
||||
.. autofunction:: eig
|
||||
.. autofunction:: gels
|
||||
|
||||
@ -3,11 +3,13 @@ torchvision.datasets
|
||||
|
||||
The following dataset loaders are available:
|
||||
|
||||
- `MNIST`_
|
||||
- `COCO (Captioning and Detection)`_
|
||||
- `LSUN Classification`_
|
||||
- `ImageFolder`_
|
||||
- `Imagenet-12`_
|
||||
- `CIFAR10 and CIFAR100`_
|
||||
- `STL10`_
|
||||
|
||||
Datasets have the API:
|
||||
|
||||
@ -33,6 +35,15 @@ but they all take the keyword args:
|
||||
transforms it. For example, take in the caption string and return a
|
||||
tensor of word indices.
|
||||
|
||||
MNIST
|
||||
~~~~~
|
||||
|
||||
``dset.MNIST(root, train=True, transform=None, target_transform=None, download=False)``
|
||||
|
||||
- ``root`` : root directory of dataset where ``processed/training.pt`` and ``processed/test.pt`` exist.
|
||||
- ``train`` : ``True`` = Training set, ``False`` = Test set
|
||||
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, place the processed dataset (function available in mnist.py) in the ``processed`` folder.
|
||||
|
||||
COCO
|
||||
~~~~
|
||||
|
||||
@ -82,11 +93,42 @@ LSUN
|
||||
``dset.LSUN(db_path, classes='train', [transform, target_transform])``
|
||||
|
||||
- db\_path = root directory for the database files
|
||||
- classes =
|
||||
- ‘train’ - all categories, training set
|
||||
- ‘val’ - all categories, validation set
|
||||
- ‘test’ - all categories, test set
|
||||
- [‘bedroom\_train’, ‘church\_train’, …] : a list of categories to load
|
||||
- ``classes`` = ``‘train’`` (all categories, training set), ``‘val’`` (all categories, validation set), ``‘test’`` (all categories, test set)
|
||||
- [``‘bedroom\_train’``, ``‘church\_train’``, …] : a list of categories to load
|
||||
|
||||
ImageFolder
|
||||
~~~~~~~~~~~
|
||||
|
||||
A generic data loader where the images are arranged in this way:
|
||||
|
||||
::
|
||||
|
||||
root/dog/xxx.png
|
||||
root/dog/xxy.png
|
||||
root/dog/xxz.png
|
||||
|
||||
root/cat/123.png
|
||||
root/cat/nsdf3.png
|
||||
root/cat/asd932_.png
|
||||
|
||||
``dset.ImageFolder(root="root folder path", [transform, target_transform])``
|
||||
|
||||
It has the members:
|
||||
|
||||
- ``self.classes`` - The class names as a list
|
||||
- ``self.class_to_idx`` - Corresponding class indices
|
||||
- ``self.imgs`` - The list of (image path, class-index) tuples
|
||||
|
||||
Imagenet-12
|
||||
~~~~~~~~~~~
|
||||
|
||||
This is simply implemented with an ImageFolder dataset.
|
||||
|
||||
The data is preprocessed `as described
|
||||
here <https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset>`__
|
||||
|
||||
`Here is an
|
||||
example <https://github.com/pytorch/examples/blob/27e2a46c1d1505324032b1d94fc6ce24d5b67e97/imagenet/main.py#L48-L62>`__.
|
||||
|
||||
CIFAR
|
||||
~~~~~
|
||||
@ -99,11 +141,22 @@ CIFAR
|
||||
``cifar-10-batches-py``
|
||||
- ``train`` : ``True`` = Training set, ``False`` = Test set
|
||||
- ``download`` : ``True`` = downloads the dataset from the internet and
|
||||
puts it in root directory. If dataset already downloaded, do
|
||||
puts it in root directory. If dataset already downloaded, doesn't do anything.
|
||||
|
||||
STL10
|
||||
~~~~~
|
||||
|
||||
``dset.STL10(root, split='train', transform=None, target_transform=None, download=False)``
|
||||
|
||||
- ``root`` : root directory of dataset where there is folder ``stl10_binary``
|
||||
- ``split`` : ``'train'`` = Training set, ``'test'`` = Test set, ``'unlabeled'`` = Unlabeled set, ``'train+unlabeled'`` = Training + Unlabeled set (missing label marked as ``-1``)
|
||||
- ``download`` : ``True`` = downloads the dataset from the internet and puts it in root directory. If dataset already downloaded, doesn't do anything.
|
||||
|
||||
.. _MNIST: #mnist
|
||||
.. _COCO (Captioning and Detection): #coco
|
||||
.. _LSUN Classification: #lsun
|
||||
.. _ImageFolder: #imagefolder
|
||||
.. _Imagenet-12: #imagenet-12
|
||||
.. _CIFAR10 and CIFAR100: #cifar
|
||||
.. _STL10: #stl10
|
||||
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI
|
||||
@ -33,7 +33,7 @@ Conversion Transforms
|
||||
|
||||
.. autoclass:: ToPILImage
|
||||
|
||||
Generic Transofrms
|
||||
Generic Transforms
|
||||
------------------
|
||||
|
||||
.. autoclass:: Lambda
|
||||
|
||||
150
setup.py
150
setup.py
@ -1,6 +1,9 @@
|
||||
from setuptools import setup, Extension, distutils, Command, find_packages
|
||||
import setuptools.command.build_ext
|
||||
import setuptools.command.install
|
||||
import setuptools.command.develop
|
||||
import setuptools.command.build_py
|
||||
import distutils.unixccompiler
|
||||
import distutils.command.build
|
||||
import distutils.command.clean
|
||||
import platform
|
||||
@ -13,18 +16,28 @@ from tools.setup_helpers.env import check_env_flag
|
||||
from tools.setup_helpers.cuda import WITH_CUDA, CUDA_HOME
|
||||
from tools.setup_helpers.cudnn import WITH_CUDNN, CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR
|
||||
DEBUG = check_env_flag('DEBUG')
|
||||
WITH_DISTRIBUTED = check_env_flag('WITH_DISTRIBUTED')
|
||||
WITH_DISTRIBUTED_MW = WITH_DISTRIBUTED and check_env_flag('WITH_DISTRIBUTED_MW')
|
||||
WITH_NCCL = WITH_CUDA and platform.system() != 'Darwin'
|
||||
SYSTEM_NCCL = False
|
||||
|
||||
################################################################################
|
||||
# Monkey-patch setuptools to compile in parallel
|
||||
################################################################################
|
||||
original_link = distutils.unixccompiler.UnixCCompiler.link
|
||||
|
||||
def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
|
||||
|
||||
def parallelCCompile(self, sources, output_dir=None, macros=None,
|
||||
include_dirs=None, debug=0, extra_preargs=None,
|
||||
extra_postargs=None, depends=None):
|
||||
# those lines are copied from distutils.ccompiler.CCompiler directly
|
||||
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs)
|
||||
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
|
||||
output_dir, macros, include_dirs, sources, depends, extra_postargs)
|
||||
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
|
||||
|
||||
# compile using a thread pool
|
||||
import multiprocessing.pool
|
||||
|
||||
def _single_compile(obj):
|
||||
src, ext = build[obj]
|
||||
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
|
||||
@ -33,12 +46,23 @@ def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=N
|
||||
|
||||
return objects
|
||||
|
||||
|
||||
def patched_link(self, *args, **kwargs):
|
||||
_cxx = self.compiler_cxx
|
||||
self.compiler_cxx = None
|
||||
result = original_link(self, *args, **kwargs)
|
||||
self.compiler_cxx = _cxx
|
||||
return result
|
||||
|
||||
|
||||
distutils.ccompiler.CCompiler.compile = parallelCCompile
|
||||
distutils.unixccompiler.UnixCCompiler.link = patched_link
|
||||
|
||||
################################################################################
|
||||
# Custom build commands
|
||||
################################################################################
|
||||
|
||||
|
||||
class build_deps(Command):
|
||||
user_options = []
|
||||
|
||||
@ -53,6 +77,10 @@ class build_deps(Command):
|
||||
build_all_cmd = ['bash', 'torch/lib/build_all.sh']
|
||||
if WITH_CUDA:
|
||||
build_all_cmd += ['--with-cuda']
|
||||
if WITH_NCCL and not SYSTEM_NCCL:
|
||||
build_all_cmd += ['--with-nccl']
|
||||
if WITH_DISTRIBUTED:
|
||||
build_all_cmd += ['--with-distributed']
|
||||
if subprocess.call(build_all_cmd) != 0:
|
||||
sys.exit(1)
|
||||
generate_nn_wrappers()
|
||||
@ -72,7 +100,30 @@ class build_module(Command):
|
||||
self.run_command('build_ext')
|
||||
|
||||
|
||||
class build_py(setuptools.command.build_py.build_py):
|
||||
|
||||
def run(self):
|
||||
self.create_version_file()
|
||||
setuptools.command.build_py.build_py.run(self)
|
||||
|
||||
@staticmethod
|
||||
def create_version_file():
|
||||
global version, cwd
|
||||
print('-- Building version ' + version)
|
||||
version_path = os.path.join(cwd, 'torch', 'version.py')
|
||||
with open(version_path, 'w') as f:
|
||||
f.write("__version__ = '{}'\n".format(version))
|
||||
|
||||
|
||||
class develop(setuptools.command.develop.develop):
|
||||
|
||||
def run(self):
|
||||
build_py.create_version_file()
|
||||
setuptools.command.develop.develop.run(self)
|
||||
|
||||
|
||||
class build_ext(setuptools.command.build_ext.build_ext):
|
||||
|
||||
def run(self):
|
||||
# Print build options
|
||||
if WITH_NUMPY:
|
||||
@ -87,6 +138,12 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
||||
print('-- Detected CUDA at ' + CUDA_HOME)
|
||||
else:
|
||||
print('-- Not using CUDA')
|
||||
if WITH_NCCL and SYSTEM_NCCL:
|
||||
print('-- Using system provided NCCL library')
|
||||
elif WITH_NCCL:
|
||||
print('-- Building NCCL library')
|
||||
else:
|
||||
print('-- Not using NCCL')
|
||||
|
||||
# cwrap depends on pyyaml, so we can't import it earlier
|
||||
from tools.cwrap import cwrap
|
||||
@ -116,6 +173,7 @@ class build(distutils.command.build.build):
|
||||
|
||||
|
||||
class install(setuptools.command.install.install):
|
||||
|
||||
def run(self):
|
||||
if not self.skip_build:
|
||||
self.run_command('build_deps')
|
||||
@ -123,6 +181,7 @@ class install(setuptools.command.install.install):
|
||||
|
||||
|
||||
class clean(distutils.command.clean.clean):
|
||||
|
||||
def run(self):
|
||||
import glob
|
||||
with open('.gitignore', 'r') as f:
|
||||
@ -138,12 +197,12 @@ class clean(distutils.command.clean.clean):
|
||||
distutils.command.clean.clean.run(self)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# Configure compile flags
|
||||
################################################################################
|
||||
|
||||
include_dirs = []
|
||||
library_dirs = []
|
||||
extra_link_args = []
|
||||
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
|
||||
if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
|
||||
@ -161,9 +220,10 @@ include_dirs += [
|
||||
tmp_install_path + "/include",
|
||||
tmp_install_path + "/include/TH",
|
||||
tmp_install_path + "/include/THPP",
|
||||
tmp_install_path + "/include/THNN",
|
||||
]
|
||||
|
||||
extra_link_args.append('-L' + lib_path)
|
||||
library_dirs.append(lib_path)
|
||||
|
||||
# we specify exact lib names to avoid conflict with lua-torch installs
|
||||
TH_LIB = os.path.join(lib_path, 'libTH.so.1')
|
||||
@ -173,6 +233,8 @@ THCS_LIB = os.path.join(lib_path, 'libTHCS.so.1')
|
||||
THNN_LIB = os.path.join(lib_path, 'libTHNN.so.1')
|
||||
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.so.1')
|
||||
THPP_LIB = os.path.join(lib_path, 'libTHPP.so.1')
|
||||
THD_LIB = os.path.join(lib_path, 'libTHD.so.1')
|
||||
NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1')
|
||||
if platform.system() == 'Darwin':
|
||||
TH_LIB = os.path.join(lib_path, 'libTH.1.dylib')
|
||||
THS_LIB = os.path.join(lib_path, 'libTHS.1.dylib')
|
||||
@ -181,25 +243,44 @@ if platform.system() == 'Darwin':
|
||||
THNN_LIB = os.path.join(lib_path, 'libTHNN.1.dylib')
|
||||
THCUNN_LIB = os.path.join(lib_path, 'libTHCUNN.1.dylib')
|
||||
THPP_LIB = os.path.join(lib_path, 'libTHPP.1.dylib')
|
||||
THD_LIB = os.path.join(lib_path, 'libTHD.1.dylib')
|
||||
NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib')
|
||||
|
||||
if WITH_NCCL and subprocess.call('ldconfig -p | grep libnccl >/dev/null', shell=True) == 0:
|
||||
SYSTEM_NCCL = True
|
||||
|
||||
main_compile_args = ['-D_THP_CORE']
|
||||
main_libraries = ['shm']
|
||||
main_link_args = [TH_LIB, THS_LIB, THPP_LIB]
|
||||
main_link_args = [TH_LIB, THS_LIB, THPP_LIB, THNN_LIB]
|
||||
main_sources = [
|
||||
"torch/csrc/PtrWrapper.cpp",
|
||||
"torch/csrc/Module.cpp",
|
||||
"torch/csrc/Generator.cpp",
|
||||
"torch/csrc/Size.cpp",
|
||||
"torch/csrc/Exceptions.cpp",
|
||||
"torch/csrc/Tensor.cpp",
|
||||
"torch/csrc/Storage.cpp",
|
||||
"torch/csrc/DynamicTypes.cpp",
|
||||
"torch/csrc/byte_order.cpp",
|
||||
"torch/csrc/utils.cpp",
|
||||
"torch/csrc/utils/object_ptr.cpp",
|
||||
"torch/csrc/utils/tuple_parser.cpp",
|
||||
"torch/csrc/allocators.cpp",
|
||||
"torch/csrc/serialization.cpp",
|
||||
"torch/csrc/autograd/init.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/engine.cpp",
|
||||
"torch/csrc/autograd/function.cpp",
|
||||
"torch/csrc/autograd/variable.cpp",
|
||||
"torch/csrc/autograd/grad_buffer.cpp",
|
||||
"torch/csrc/autograd/python_function.cpp",
|
||||
"torch/csrc/autograd/python_cpp_function.cpp",
|
||||
"torch/csrc/autograd/python_variable.cpp",
|
||||
"torch/csrc/autograd/python_engine.cpp",
|
||||
"torch/csrc/autograd/python_hook.cpp",
|
||||
"torch/csrc/autograd/functions/batch_normalization.cpp",
|
||||
"torch/csrc/autograd/functions/convolution.cpp",
|
||||
"torch/csrc/autograd/functions/init.cpp",
|
||||
"torch/csrc/nn/THNN_generic.cpp",
|
||||
]
|
||||
|
||||
try:
|
||||
@ -210,6 +291,20 @@ try:
|
||||
except ImportError:
|
||||
WITH_NUMPY = False
|
||||
|
||||
if WITH_DISTRIBUTED:
|
||||
extra_compile_args += ['-DWITH_DISTRIBUTED']
|
||||
main_sources += [
|
||||
"torch/csrc/distributed/Module.cpp",
|
||||
"torch/csrc/distributed/utils.cpp",
|
||||
]
|
||||
if WITH_DISTRIBUTED_MW:
|
||||
main_sources += [
|
||||
"torch/csrc/distributed/Tensor.cpp",
|
||||
"torch/csrc/distributed/Storage.cpp",
|
||||
]
|
||||
include_dirs += [tmp_install_path + "/include/THD"]
|
||||
main_link_args += [THD_LIB]
|
||||
|
||||
if WITH_CUDA:
|
||||
cuda_lib_dirs = ['lib64', 'lib']
|
||||
cuda_include_path = os.path.join(CUDA_HOME, 'include')
|
||||
@ -218,11 +313,13 @@ if WITH_CUDA:
|
||||
if os.path.exists(cuda_lib_path):
|
||||
break
|
||||
include_dirs.append(cuda_include_path)
|
||||
extra_link_args.append('-L' + cuda_lib_path)
|
||||
include_dirs.append(tmp_install_path + "/include/THCUNN")
|
||||
library_dirs.append(cuda_lib_path)
|
||||
extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)
|
||||
extra_compile_args += ['-DWITH_CUDA']
|
||||
extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
|
||||
main_link_args += [THC_LIB, THCS_LIB]
|
||||
main_libraries += ['cudart']
|
||||
main_link_args += [THC_LIB, THCS_LIB, THCUNN_LIB]
|
||||
main_sources += [
|
||||
"torch/csrc/cuda/Module.cpp",
|
||||
"torch/csrc/cuda/Storage.cpp",
|
||||
@ -233,18 +330,23 @@ if WITH_CUDA:
|
||||
"torch/csrc/cuda/serialization.cpp",
|
||||
]
|
||||
|
||||
if WITH_NCCL:
|
||||
if SYSTEM_NCCL:
|
||||
main_libraries += ['nccl']
|
||||
else:
|
||||
main_link_args += [NCCL_LIB]
|
||||
extra_compile_args += ['-DWITH_NCCL']
|
||||
|
||||
if WITH_CUDNN:
|
||||
main_libraries += ['cudnn']
|
||||
include_dirs.append(CUDNN_INCLUDE_DIR)
|
||||
extra_link_args.append('-L' + CUDNN_LIB_DIR)
|
||||
library_dirs.append(CUDNN_LIB_DIR)
|
||||
main_sources += [
|
||||
"torch/csrc/cudnn/Module.cpp",
|
||||
"torch/csrc/cudnn/BatchNorm.cpp",
|
||||
"torch/csrc/cudnn/Conv.cpp",
|
||||
"torch/csrc/cudnn/cuDNN.cpp",
|
||||
"torch/csrc/cudnn/Types.cpp",
|
||||
"torch/csrc/cudnn/Handles.cpp",
|
||||
"torch/csrc/cudnn/CppWrapper.cpp",
|
||||
]
|
||||
extra_compile_args += ['-DWITH_CUDNN']
|
||||
|
||||
@ -272,14 +374,15 @@ C = Extension("torch._C",
|
||||
language='c++',
|
||||
extra_compile_args=main_compile_args + extra_compile_args,
|
||||
include_dirs=include_dirs,
|
||||
library_dirs=library_dirs,
|
||||
extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
|
||||
)
|
||||
)
|
||||
extensions.append(C)
|
||||
|
||||
DL = Extension("torch._dl",
|
||||
sources=["torch/csrc/dl.c"],
|
||||
language='c',
|
||||
)
|
||||
)
|
||||
extensions.append(DL)
|
||||
|
||||
THNN = Extension("torch._thnn._THNN",
|
||||
@ -292,7 +395,7 @@ THNN = Extension("torch._thnn._THNN",
|
||||
THNN_LIB,
|
||||
make_relative_rpath('../lib'),
|
||||
]
|
||||
)
|
||||
)
|
||||
extensions.append(THNN)
|
||||
|
||||
if WITH_CUDA:
|
||||
@ -310,18 +413,29 @@ if WITH_CUDA:
|
||||
)
|
||||
extensions.append(THCUNN)
|
||||
|
||||
version="0.1"
|
||||
version = '0.1.11'
|
||||
if os.getenv('PYTORCH_BUILD_VERSION'):
|
||||
assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
|
||||
version = os.getenv('PYTORCH_BUILD_VERSION') \
|
||||
+ '_' + os.getenv('PYTORCH_BUILD_NUMBER')
|
||||
else:
|
||||
try:
|
||||
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
|
||||
version += '+' + sha[:7]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
|
||||
setup(name="torch", version=version,
|
||||
description="Tensors and Dynamic neural networks in Python with strong GPU acceleration",
|
||||
ext_modules=extensions,
|
||||
cmdclass = {
|
||||
cmdclass={
|
||||
'build': build,
|
||||
'build_py': build_py,
|
||||
'build_ext': build_ext,
|
||||
'build_deps': build_deps,
|
||||
'build_module': build_module,
|
||||
'develop': develop,
|
||||
'install': install,
|
||||
'clean': clean,
|
||||
},
|
||||
@ -333,4 +447,4 @@ setup(name="torch", version=version,
|
||||
'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
|
||||
'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
|
||||
install_requires=['pyyaml'],
|
||||
)
|
||||
)
|
||||
|
||||
130
test/common.py
130
test/common.py
@ -1,17 +1,29 @@
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import unittest
|
||||
import contextlib
|
||||
from functools import wraps
|
||||
from itertools import product
|
||||
from copy import deepcopy
|
||||
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch.autograd import Variable, Function
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
torch.set_default_tensor_type('torch.DoubleTensor')
|
||||
torch.manual_seed(123)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed_all(123)
|
||||
|
||||
|
||||
def run_tests():
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument('--seed', type=int, default=123)
|
||||
args, remaining = parser.parse_known_args()
|
||||
torch.manual_seed(args.seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed_all(args.seed)
|
||||
remaining = [sys.argv[0]] + remaining
|
||||
unittest.main(argv=remaining)
|
||||
|
||||
|
||||
TEST_NUMPY = True
|
||||
@ -20,6 +32,25 @@ try:
|
||||
except ImportError:
|
||||
TEST_NUMPY = False
|
||||
|
||||
TEST_SCIPY = True
|
||||
try:
|
||||
import scipy
|
||||
except ImportError:
|
||||
TEST_SCIPY = False
|
||||
|
||||
|
||||
def skipIfNoLapack(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
fn(*args, **kwargs)
|
||||
except Exception as e:
|
||||
if 'Lapack library not found' in e.args[0]:
|
||||
raise unittest.SkipTest('Compiled without Lapack')
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
|
||||
def get_cpu_type(t):
|
||||
assert t.__module__ == 'torch.cuda'
|
||||
return getattr(torch, t.__class__.__name__)
|
||||
@ -87,11 +118,18 @@ class TestCase(unittest.TestCase):
|
||||
y = y.data
|
||||
|
||||
if torch.is_tensor(x) and torch.is_tensor(y):
|
||||
def assertTensorsEqual(a, b):
|
||||
max_err = 0
|
||||
super(TestCase, self).assertEqual(x.size(), y.size())
|
||||
for index in iter_indices(x):
|
||||
max_err = max(max_err, abs(x[index] - y[index]))
|
||||
super(TestCase, self).assertEqual(a.size(), b.size())
|
||||
for index in iter_indices(a):
|
||||
max_err = max(max_err, abs(a[index] - b[index]))
|
||||
self.assertLessEqual(max_err, prec, message)
|
||||
self.assertEqual(x.is_sparse, y.is_sparse, message)
|
||||
if x.is_sparse:
|
||||
assertTensorsEqual(x.indices(), y.indices())
|
||||
assertTensorsEqual(x.values(), y.values())
|
||||
else:
|
||||
assertTensorsEqual(x, y)
|
||||
elif type(x) == str and type(y) == str:
|
||||
super(TestCase, self).assertEqual(x, y)
|
||||
elif is_iterable(x) and is_iterable(y):
|
||||
@ -139,65 +177,23 @@ class TestCase(unittest.TestCase):
|
||||
raise AssertionError("object not found in iterable")
|
||||
|
||||
|
||||
def make_jacobian(input, num_out):
|
||||
if isinstance(input, Variable) and not input.requires_grad:
|
||||
return None
|
||||
if torch.is_tensor(input) or isinstance(input, Variable):
|
||||
return torch.zeros(input.nelement(), num_out)
|
||||
def download_file(url, path, binary=True):
|
||||
if sys.version_info < (3,):
|
||||
import urllib2
|
||||
request = urllib2
|
||||
error = urllib2
|
||||
else:
|
||||
return type(input)(filter(lambda x: x is not None,
|
||||
(make_jacobian(elem, num_out) for elem in input)))
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
request = urllib.request
|
||||
error = urllib.error
|
||||
|
||||
|
||||
def iter_tensors(x, only_requiring_grad=False):
|
||||
if torch.is_tensor(x):
|
||||
yield x
|
||||
elif isinstance(x, Variable):
|
||||
if x.requires_grad or not only_requiring_grad:
|
||||
yield x.data
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_tensors(elem, only_requiring_grad):
|
||||
yield result
|
||||
|
||||
|
||||
def contiguous(input):
|
||||
if torch.is_tensor(input):
|
||||
return input.contiguous()
|
||||
elif isinstance(input, Variable):
|
||||
return input.contiguous()
|
||||
else:
|
||||
return type(input)(contiguous(e) for e in input)
|
||||
|
||||
|
||||
def get_numerical_jacobian(fn, input, target):
|
||||
perturbation = 1e-6
|
||||
# To be able to use .view(-1) input must be contiguous
|
||||
input = contiguous(input)
|
||||
output_size = fn(input).numel()
|
||||
jacobian = make_jacobian(target, output_size)
|
||||
|
||||
# It's much easier to iterate over flattened lists of tensors.
|
||||
# These are reference to the same objects in jacobian, so any changes
|
||||
# will be reflected in it as well.
|
||||
x_tensors = [t for t in iter_tensors(target, True)]
|
||||
j_tensors = [t for t in iter_tensors(jacobian)]
|
||||
|
||||
outa = torch.DoubleTensor(output_size)
|
||||
outb = torch.DoubleTensor(output_size)
|
||||
|
||||
# TODO: compare structure
|
||||
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
|
||||
flat_tensor = x_tensor.view(-1)
|
||||
for i in range(flat_tensor.nelement()):
|
||||
orig = flat_tensor[i]
|
||||
flat_tensor[i] = orig - perturbation
|
||||
outa.copy_(fn(input))
|
||||
flat_tensor[i] = orig + perturbation
|
||||
outb.copy_(fn(input))
|
||||
flat_tensor[i] = orig
|
||||
|
||||
outb.add_(-1,outa).div_(2*perturbation)
|
||||
d_tensor[i] = outb
|
||||
|
||||
return jacobian
|
||||
if os.path.exists(path):
|
||||
return True
|
||||
try:
|
||||
data = request.urlopen(url, timeout=15).read()
|
||||
with open(path, 'wb' if binary else 'w') as f:
|
||||
f.write(data)
|
||||
return True
|
||||
except error.URLError as e:
|
||||
return False
|
||||
|
||||
@ -2,11 +2,13 @@ import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch.autograd import Variable
|
||||
from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
|
||||
from common import TestCase, to_gpu, freeze_rng_state
|
||||
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors, contiguous
|
||||
import torch.backends.cudnn
|
||||
|
||||
# tarfile module tries to obtain a file object name in python 3.3
|
||||
@ -18,6 +20,7 @@ else:
|
||||
TEST_CUDA = torch.cuda.is_available()
|
||||
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
|
||||
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
|
||||
TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version()
|
||||
PRECISION = 1e-5
|
||||
|
||||
module_tests = [
|
||||
@ -25,14 +28,14 @@ module_tests = [
|
||||
module_name='Linear',
|
||||
constructor_args=(10, 8),
|
||||
input_size=(4, 10),
|
||||
reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
|
||||
reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
|
||||
),
|
||||
dict(
|
||||
module_name='Linear',
|
||||
constructor_args=(10, 8, False),
|
||||
input_size=(4, 10),
|
||||
desc='no_bias',
|
||||
reference_fn=lambda i,p: torch.mm(i, p[0].t())
|
||||
reference_fn=lambda i, p: torch.mm(i, p[0].t())
|
||||
),
|
||||
dict(
|
||||
module_name='Threshold',
|
||||
@ -72,7 +75,7 @@ module_tests = [
|
||||
dict(
|
||||
module_name='Hardtanh',
|
||||
input_size=(3, 2, 5),
|
||||
reference_fn=lambda i,_: i.clamp(-1, 1)
|
||||
reference_fn=lambda i, _: i.clamp(-1, 1)
|
||||
),
|
||||
dict(
|
||||
module_name='Sigmoid',
|
||||
@ -85,17 +88,23 @@ module_tests = [
|
||||
dict(
|
||||
module_name='Softmax',
|
||||
input_size=(10, 20),
|
||||
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
|
||||
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20))
|
||||
),
|
||||
dict(
|
||||
module_name='Softmax2d',
|
||||
input_size=(1, 3, 10, 20),
|
||||
reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i))
|
||||
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i))
|
||||
),
|
||||
dict(
|
||||
module_name='LogSoftmax',
|
||||
input_size=(10, 20),
|
||||
reference_fn=lambda i,_: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
|
||||
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_()
|
||||
),
|
||||
dict(
|
||||
module_name='LogSoftmax',
|
||||
input_size=(1, 3, 10, 20),
|
||||
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand_as(i)).log_(),
|
||||
desc='multiparam'
|
||||
),
|
||||
dict(
|
||||
module_name='ELU',
|
||||
@ -124,18 +133,18 @@ module_tests = [
|
||||
dict(
|
||||
module_name='LogSigmoid',
|
||||
input_size=(2, 3, 4),
|
||||
reference_fn=lambda i,_: i.sigmoid().log()
|
||||
reference_fn=lambda i, _: i.sigmoid().log()
|
||||
),
|
||||
dict(
|
||||
module_name='Softplus',
|
||||
input_size=(10, 20),
|
||||
reference_fn=lambda i,_: torch.log(1 + torch.exp(i))
|
||||
reference_fn=lambda i, _: torch.log(1 + torch.exp(i))
|
||||
),
|
||||
dict(
|
||||
module_name='Softplus',
|
||||
constructor_args=(2,),
|
||||
input_size=(10, 20),
|
||||
reference_fn=lambda i,_: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
|
||||
reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)),
|
||||
desc='beta'
|
||||
),
|
||||
dict(
|
||||
@ -155,18 +164,47 @@ module_tests = [
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
input_size=(2, 3, 4, 5)
|
||||
input_size=(2, 3, 4),
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
desc='1d',
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
constructor_args=(3,),
|
||||
input_size=(2, 3, 4),
|
||||
desc='1d_multiparam',
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
input_size=(2, 3, 4, 5),
|
||||
desc='2d',
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
constructor_args=(3,),
|
||||
input_size=(2, 3, 4, 5),
|
||||
desc='multiparam'
|
||||
desc='2d_multiparam',
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
input_size=(2, 3, 4, 5, 6),
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
desc='3d',
|
||||
),
|
||||
dict(
|
||||
module_name='PReLU',
|
||||
constructor_args=(3,),
|
||||
input_size=(2, 3, 4, 5, 6),
|
||||
desc='3d_multiparam',
|
||||
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
|
||||
),
|
||||
dict(
|
||||
module_name='Softsign',
|
||||
input_size=(3, 2, 5),
|
||||
reference_fn=lambda i,_: i.div(1 + torch.abs(i))
|
||||
reference_fn=lambda i, _: i.div(1 + torch.abs(i))
|
||||
),
|
||||
dict(
|
||||
module_name='Softmin',
|
||||
@ -183,8 +221,8 @@ criterion_tests = [
|
||||
dict(module_name='L1Loss',
|
||||
input_size=(2, 3, 4),
|
||||
target=torch.randn(2, 3, 4),
|
||||
reference_fn=lambda i,t,_: 1./i.numel() * \
|
||||
sum((a-b).abs().sum() for a,b in zip(i, t))
|
||||
reference_fn=lambda i, t, _: 1. / i.numel() *
|
||||
sum((a - b).abs().sum() for a, b in zip(i, t))
|
||||
),
|
||||
dict(
|
||||
module_name='NLLLoss',
|
||||
@ -207,7 +245,7 @@ criterion_tests = [
|
||||
module_name='MSELoss',
|
||||
input=torch.randn(2, 3, 4, 5),
|
||||
target=torch.randn(2, 3, 4, 5),
|
||||
reference_fn=lambda i,t,_: (i-t).abs().pow(2).sum() / i.numel()
|
||||
reference_fn=lambda i, t, _: (i - t).abs().pow(2).sum() / i.numel()
|
||||
),
|
||||
dict(
|
||||
module_name='BCELoss',
|
||||
@ -238,6 +276,13 @@ criterion_tests = [
|
||||
input_size=(2, 3, 5, 5),
|
||||
target=torch.rand(2, 5, 5).mul(3).floor().long()
|
||||
),
|
||||
dict(
|
||||
module_name='NLLLoss2d',
|
||||
constructor_args=(torch.rand(3),),
|
||||
input_size=(2, 3, 5, 5),
|
||||
target=torch.rand(2, 5, 5).mul(3).floor().long(),
|
||||
desc='weights'
|
||||
),
|
||||
dict(
|
||||
module_name='HingeEmbeddingLoss',
|
||||
input=torch.rand(10),
|
||||
@ -321,14 +366,18 @@ class NNTestCase(TestCase):
|
||||
|
||||
def _flatten_tensors(self, x):
|
||||
if torch.is_tensor(x):
|
||||
if x.is_sparse:
|
||||
return x.to_dense().view(-1)
|
||||
else:
|
||||
return x.view(-1)
|
||||
elif isinstance(x, Variable):
|
||||
return x.data.view(-1)
|
||||
return self._flatten_tensors(x.data)
|
||||
else:
|
||||
return tuple(self._flatten_tensors(a) for a in x)
|
||||
|
||||
def _zero_grad_input(self, input):
|
||||
if isinstance(input, Variable):
|
||||
if input.requires_grad and input.grad is not None:
|
||||
input.grad.data.zero_()
|
||||
elif torch.is_tensor(input):
|
||||
return
|
||||
@ -364,9 +413,9 @@ class NNTestCase(TestCase):
|
||||
|
||||
if jacobian_input:
|
||||
for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)):
|
||||
jacobian_x[:,i] = d_x
|
||||
jacobian_x[:, i] = d_x
|
||||
if jacobian_parameters:
|
||||
jacobian_param[:,i] = torch.cat(self._flatten_tensors(d_param), 0)
|
||||
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
|
||||
|
||||
res = tuple()
|
||||
if jacobian_input:
|
||||
@ -393,9 +442,9 @@ class NNTestCase(TestCase):
|
||||
# TODO: enable non-contig tests
|
||||
input = contiguous(input)
|
||||
if jacobian_input:
|
||||
res += get_numerical_jacobian(fw, input, input),
|
||||
res += get_numerical_jacobian(fw, input, input, eps=1e-6),
|
||||
if jacobian_parameters:
|
||||
res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0),
|
||||
res += torch.cat(list(get_numerical_jacobian(fw, input, p, eps=1e-6) for p in param), 0),
|
||||
return res
|
||||
|
||||
def check_jacobian(self, module, input, jacobian_input=True):
|
||||
@ -427,7 +476,7 @@ class NNTestCase(TestCase):
|
||||
fx1 = self._forward_criterion(criterion, input, target)
|
||||
x[i] = original - eps
|
||||
fx2 = self._forward_criterion(criterion, input, target)
|
||||
deriv = (fx1 - fx2) / (2.*eps)
|
||||
deriv = (fx1 - fx2) / (2. * eps)
|
||||
d_x[i] = deriv
|
||||
x[i] = original
|
||||
|
||||
@ -441,6 +490,7 @@ class NNTestCase(TestCase):
|
||||
|
||||
|
||||
class TestBase(object):
|
||||
|
||||
def __init__(self, constructor, constructor_args=tuple(), input_size=None,
|
||||
input=None, desc='', reference_fn=None, fullname=None, **kwargs):
|
||||
if input_size is None and input is None:
|
||||
@ -490,6 +540,7 @@ class TestBase(object):
|
||||
|
||||
|
||||
class ModuleTest(TestBase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ModuleTest, self).__init__(*args, **kwargs)
|
||||
self.jacobian_input = kwargs.get('jacobian_input', True)
|
||||
@ -507,6 +558,8 @@ class ModuleTest(TestBase):
|
||||
expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0])
|
||||
test_case.assertEqual(out, expected_out)
|
||||
|
||||
self.test_noncontig(test_case, module, input)
|
||||
|
||||
# TODO: do this with in-memory files as soon as torch.save will support it
|
||||
with TemporaryFile() as f:
|
||||
test_case._forward(module, input)
|
||||
@ -517,6 +570,51 @@ class ModuleTest(TestBase):
|
||||
|
||||
self._do_test(test_case, module, input)
|
||||
|
||||
def noncontiguize(self, obj):
|
||||
if isinstance(obj, list):
|
||||
return [self.noncontiguize(o) for o in obj]
|
||||
tensor = obj.data if isinstance(obj, Variable) else obj
|
||||
ndim = tensor.dim()
|
||||
noncontig = torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
|
||||
assert noncontig.numel() == 1 or not noncontig.is_contiguous()
|
||||
if isinstance(obj, Variable):
|
||||
return Variable(noncontig, requires_grad=obj.requires_grad)
|
||||
return noncontig
|
||||
|
||||
def test_noncontig(self, test_case, module, input):
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(input)
|
||||
with freeze_rng_state():
|
||||
output = test_case._forward(module, input)
|
||||
grad_output = output
|
||||
if isinstance(grad_output, Variable):
|
||||
grad_output = grad_output.data.clone()
|
||||
else:
|
||||
grad_output = grad_output.clone()
|
||||
output = output.clone()
|
||||
grad_output.normal_()
|
||||
d_input = deepcopy(test_case._backward(module, input, output, grad_output))
|
||||
d_param = deepcopy(test_case._get_parameters(module)[1])
|
||||
|
||||
nc_input = self.noncontiguize(input)
|
||||
nc_grad_output = self.noncontiguize(grad_output)
|
||||
for contig_i, contig_g in product((True, False), repeat=2):
|
||||
i = input if contig_i else nc_input
|
||||
go = grad_output if contig_g else nc_grad_output
|
||||
test_case._zero_grad_parameters(module)
|
||||
test_case._zero_grad_input(i)
|
||||
with freeze_rng_state():
|
||||
try:
|
||||
out = test_case._forward(module, i)
|
||||
except Exception:
|
||||
# Some modules will fail because of non contiguous inputs and we're ok with that
|
||||
continue
|
||||
grad = test_case._backward(module, i, out, go)
|
||||
|
||||
test_case.assertEqual(out, output)
|
||||
test_case.assertEqual(grad, d_input, 1e-4)
|
||||
test_case.assertEqual(test_case._get_parameters(module)[1], d_param)
|
||||
|
||||
def test_cuda(self, test_case):
|
||||
if not TEST_CUDA or not self.should_test_cuda:
|
||||
raise unittest.SkipTest('Excluded from CUDA tests')
|
||||
@ -527,8 +625,6 @@ class ModuleTest(TestBase):
|
||||
|
||||
cpu_module = self.constructor(*self.constructor_args)
|
||||
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_param = test_case._get_parameters(cpu_module)
|
||||
gpu_param = test_case._get_parameters(gpu_module)
|
||||
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
|
||||
@ -538,6 +634,10 @@ class ModuleTest(TestBase):
|
||||
gpu_p = gpu_p.data
|
||||
gpu_p.copy_(cpu_p)
|
||||
|
||||
test_case._zero_grad_input(cpu_input)
|
||||
test_case._zero_grad_input(gpu_input)
|
||||
test_case._zero_grad_parameters(cpu_module)
|
||||
test_case._zero_grad_parameters(gpu_module)
|
||||
cpu_output = test_case._forward(cpu_module, cpu_input)
|
||||
gpu_output = test_case._forward(gpu_module, gpu_input)
|
||||
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
|
||||
@ -551,6 +651,8 @@ class ModuleTest(TestBase):
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
|
||||
test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4)
|
||||
|
||||
self.test_noncontig(test_case, gpu_module, gpu_input)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
# TODO: remove this after CUDA scatter_ is implemented
|
||||
@ -562,6 +664,7 @@ class ModuleTest(TestBase):
|
||||
|
||||
|
||||
class CriterionTest(TestBase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CriterionTest, self).__init__(*args, **kwargs)
|
||||
self.target = self._get_target(kwargs['target'])
|
||||
@ -607,10 +710,10 @@ class CriterionTest(TestBase):
|
||||
|
||||
cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target)
|
||||
gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target)
|
||||
test_case.assertEqual(cpu_output, gpu_output, 2e-4)
|
||||
test_case.assertEqual(cpu_output, gpu_output, 4e-4)
|
||||
|
||||
cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target)
|
||||
gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target)
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4)
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, 4e-4)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
@ -2,6 +2,7 @@ import torch.nn as nn
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.linear = nn.Linear(10, 20)
|
||||
|
||||
@ -2,6 +2,7 @@ import torch.nn as nn
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.linear = nn.Linear(10, 20)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import torch
|
||||
|
||||
|
||||
def check_error(desc, fn, *required_substrings):
|
||||
try:
|
||||
fn()
|
||||
@ -52,6 +53,7 @@ check_error('Invalid index type',
|
||||
lambda: torch.FloatStorage(10)['first item'],
|
||||
'str')
|
||||
|
||||
|
||||
def assign():
|
||||
torch.FloatStorage(10)[1:-1] = '1'
|
||||
check_error('Invalid value type',
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
|
||||
# th test.lua > lua.out
|
||||
th test.lua > lua.out
|
||||
python3 test.py > python.out
|
||||
|
||||
diff lua.out python.out >/dev/null 2>&1
|
||||
|
||||
5060
test/optim/lua.out
5060
test/optim/lua.out
File diff suppressed because it is too large
Load Diff
@ -1,39 +0,0 @@
|
||||
assert(arg[1])
|
||||
funcs = {
|
||||
'resizeAs', 'add', 'zero', 'mul', 'div', 'abs',
|
||||
'addcmul', 'addcdiv', 'copy', 'sqrt', 'fill',
|
||||
{'cmul', 'mul'},
|
||||
{'cdiv', 'div'},
|
||||
}
|
||||
for _, val in pairs(funcs) do
|
||||
local name, newname
|
||||
if type(val) == 'table' then
|
||||
name = val[1]
|
||||
newname = val[2]
|
||||
else
|
||||
name = val
|
||||
newname = val .. '_'
|
||||
end
|
||||
|
||||
command = "sed -i -r "
|
||||
.. "'/torch\\." .. name .. "\\(/b; " -- short-circuits
|
||||
.. "s/([a-zA-Z]*)\\." .. name .. "\\(" -- substitution
|
||||
.. "/"
|
||||
.. "\\1\\." .. newname .. "\\(/g' " .. arg[1]
|
||||
print(command)
|
||||
os.execute(command)
|
||||
command = "sed -i 's/math\\." .. newname
|
||||
.. "/math\\." .. name .. "/' " .. arg[1]
|
||||
print(command)
|
||||
os.execute(command)
|
||||
end
|
||||
|
||||
funcs = {
|
||||
{'torch\.cmul', 'torch\.mul'},
|
||||
{'torch\.cdiv', 'torch\.div'},
|
||||
}
|
||||
for _, val in pairs(funcs) do
|
||||
command = "sed -i 's/" .. val[1] .. "/" .. val[2] .. "/' " .. arg[1]
|
||||
print(command)
|
||||
os.execute(command)
|
||||
end
|
||||
33
test/optim/test.lua
Normal file
33
test/optim/test.lua
Normal file
@ -0,0 +1,33 @@
|
||||
local cjson = require 'cjson'
|
||||
require 'optim'
|
||||
|
||||
function rosenbrock(t)
|
||||
x, y = t[1], t[2]
|
||||
return (1 - x) ^ 2 + 100 * (y - x^2)^2
|
||||
end
|
||||
|
||||
function drosenbrock(t)
|
||||
x, y = t[1], t[2]
|
||||
return torch.DoubleTensor({-400 * x * (y - x^2) - 2 * (1 - x), 200 * x * (y - x^2)})
|
||||
end
|
||||
|
||||
local fd = io.open('tests.json', 'r')
|
||||
local tests = cjson.decode(fd:read('*a'))
|
||||
fd:close()
|
||||
|
||||
for i, test in ipairs(tests) do
|
||||
print(test.algorithm)
|
||||
algorithm = optim[test.algorithm]
|
||||
for i, config in ipairs(test.config) do
|
||||
print('================================================================================')
|
||||
params = torch.DoubleTensor({1.5, 1.5})
|
||||
for i = 1, 100 do
|
||||
function closure(x)
|
||||
return rosenbrock(x), drosenbrock(x)
|
||||
end
|
||||
algorithm(closure, params, config)
|
||||
print(string.format('%.8f\t%.8f', params[1], params[2]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -3,13 +3,15 @@ import torch
|
||||
import torch.legacy.optim as optim
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
def rosenbrock(tensor):
|
||||
x, y = tensor
|
||||
return (1 - x)**2 + 100 * (y - x**2)**2
|
||||
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
|
||||
|
||||
|
||||
def drosenbrock(tensor):
|
||||
x, y = tensor
|
||||
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * x * (y - x**2)))
|
||||
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * x * (y - x ** 2)))
|
||||
|
||||
algorithms = {
|
||||
'adadelta': optim.adadelta,
|
||||
@ -22,6 +24,7 @@ algorithms = {
|
||||
'rmsprop': optim.rmsprop,
|
||||
'rprop': optim.rprop,
|
||||
'sgd': optim.sgd,
|
||||
'lbfgs': optim.lbfgs,
|
||||
}
|
||||
|
||||
with open('tests.json', 'r') as f:
|
||||
@ -35,4 +38,4 @@ for test in tests:
|
||||
params = torch.DoubleTensor((1.5, 1.5))
|
||||
for i in range(100):
|
||||
algorithm(lambda x: (rosenbrock(x), drosenbrock(x)), params, config)
|
||||
print('{:.12f}\t{:.12f}\t'.format(params[0], params[1]))
|
||||
print('{:.8f}\t{:.8f}\t'.format(params[0], params[1]))
|
||||
|
||||
@ -98,5 +98,12 @@
|
||||
{"learningRate": 1e-4, "nesterov": true, "momentum": 0.95, "dampening": 0},
|
||||
{"weightDecay": 0.2}
|
||||
]
|
||||
},
|
||||
{
|
||||
"algorithm": "lbfgs",
|
||||
"config": [
|
||||
{},
|
||||
{"learningRate": 1e-1}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -2,8 +2,17 @@
|
||||
set -e
|
||||
|
||||
PYCMD=${PYCMD:="python"}
|
||||
if [ "$1" == "coverage" ];
|
||||
then
|
||||
COVERAGE=0
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-p|--python) PYCMD=$2; shift 2 ;;
|
||||
-c|--coverage) COVERAGE=1; shift 1;;
|
||||
--) shift; break ;;
|
||||
*) echo "Invalid argument: $1!" ; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $COVERAGE -eq 1 ]]; then
|
||||
coverage erase
|
||||
PYCMD="coverage run --parallel-mode --source torch "
|
||||
echo "coverage flag found. Setting python command to: \"$PYCMD\""
|
||||
@ -12,42 +21,68 @@ fi
|
||||
pushd "$(dirname "$0")"
|
||||
|
||||
echo "Running torch tests"
|
||||
$PYCMD test_torch.py
|
||||
$PYCMD test_torch.py $@
|
||||
|
||||
echo "Running autograd tests"
|
||||
$PYCMD test_autograd.py
|
||||
$PYCMD test_autograd.py $@
|
||||
|
||||
echo "Running sparse tests"
|
||||
$PYCMD test_sparse.py
|
||||
$PYCMD test_sparse.py $@
|
||||
|
||||
echo "Running nn tests"
|
||||
$PYCMD test_nn.py
|
||||
$PYCMD test_nn.py $@
|
||||
|
||||
echo "Running legacy nn tests"
|
||||
$PYCMD test_legacy_nn.py
|
||||
$PYCMD test_legacy_nn.py $@
|
||||
|
||||
echo "Running optim tests"
|
||||
$PYCMD test_optim.py
|
||||
$PYCMD test_optim.py $@
|
||||
|
||||
echo "Running multiprocessing tests"
|
||||
$PYCMD test_multiprocessing.py
|
||||
MULTIPROCESSING_METHOD=spawn $PYCMD test_multiprocessing.py
|
||||
MULTIPROCESSING_METHOD=forkserver $PYCMD test_multiprocessing.py
|
||||
$PYCMD test_multiprocessing.py $@
|
||||
MULTIPROCESSING_METHOD=spawn $PYCMD test_multiprocessing.py $@
|
||||
MULTIPROCESSING_METHOD=forkserver $PYCMD test_multiprocessing.py $@
|
||||
|
||||
echo "Running util tests"
|
||||
$PYCMD test_utils.py
|
||||
$PYCMD test_utils.py $@
|
||||
|
||||
echo "Running dataloader tests"
|
||||
$PYCMD test_dataloader.py
|
||||
$PYCMD test_dataloader.py $@
|
||||
|
||||
echo "Running cuda tests"
|
||||
$PYCMD test_cuda.py
|
||||
$PYCMD test_cuda.py $@
|
||||
|
||||
echo "Running NCCL tests"
|
||||
$PYCMD test_nccl.py
|
||||
$PYCMD test_nccl.py $@
|
||||
|
||||
if [ "$1" == "coverage" ];
|
||||
then
|
||||
################################################################################
|
||||
if [[ "$TEST_DISTRIBUTED" -eq 1 ]]; then
|
||||
distributed_set_up() {
|
||||
export TEMP_DIR="$(mktemp -d)"
|
||||
rm -rf "$TEMP_DIR/"*
|
||||
mkdir "$TEMP_DIR/barrier"
|
||||
mkdir "$TEMP_DIR/test_dir"
|
||||
}
|
||||
|
||||
distributed_tear_down() {
|
||||
rm -rf "$TEMP_DIR"
|
||||
}
|
||||
|
||||
trap distributed_tear_down EXIT SIGHUP SIGINT SIGTERM
|
||||
|
||||
echo "Running distributed tests for the TCP backend"
|
||||
distributed_set_up
|
||||
BACKEND=tcp WORLD_SIZE=3 $PYCMD ./test_distributed.py
|
||||
distributed_tear_down
|
||||
|
||||
echo "Running distributed tests for the MPI backend"
|
||||
distributed_set_up
|
||||
BACKEND=mpi mpiexec -n 3 $PYCMD ./test_distributed.py
|
||||
distributed_tear_down
|
||||
fi
|
||||
################################################################################
|
||||
|
||||
if [[ $COVERAGE -eq 1 ]]; then
|
||||
coverage combine
|
||||
coverage html
|
||||
fi
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -7,12 +7,15 @@ import torch
|
||||
import torch.cuda
|
||||
import torch.cuda.comm as comm
|
||||
|
||||
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state
|
||||
from test_torch import TestTorch
|
||||
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests
|
||||
|
||||
HAS_CUDA = True
|
||||
if not torch.cuda.is_available():
|
||||
print('CUDA not available, skipping tests')
|
||||
import sys
|
||||
sys.exit()
|
||||
TestCase = object # noqa: F811
|
||||
HAS_CUDA = False
|
||||
|
||||
|
||||
def is_floating(t):
|
||||
return type(t) in [torch.FloatTensor, torch.DoubleTensor,
|
||||
@ -33,6 +36,7 @@ float_types = [
|
||||
torch.DoubleTensor
|
||||
] # TODO: add half...
|
||||
|
||||
|
||||
def number(floating, integer, t):
|
||||
name = type(t).__name__
|
||||
if 'Double' in name or 'Float' in name or 'Half' in name:
|
||||
@ -44,181 +48,205 @@ def number(floating, integer, t):
|
||||
S = 10
|
||||
M = 50
|
||||
|
||||
|
||||
def make_tensor(t, *sizes):
|
||||
return t(*sizes).copy_(torch.randn(*sizes))
|
||||
|
||||
|
||||
def small_2d(t):
|
||||
return make_tensor(t, S, S)
|
||||
|
||||
|
||||
def small_2d_scaled(t, scale=10):
|
||||
return make_tensor(t, S, S).mul(scale)
|
||||
|
||||
|
||||
def small_2d_oneish(t):
|
||||
if is_floating(t):
|
||||
return make_tensor(t, S, S).clamp(min=0.99, max=1.01)
|
||||
else:
|
||||
return t(S, S).fill_(1)
|
||||
|
||||
|
||||
def small_3d(t):
|
||||
return make_tensor(t, S, S, S)
|
||||
|
||||
|
||||
def medium_1d(t):
|
||||
return make_tensor(t, M)
|
||||
|
||||
|
||||
def medium_2d(t):
|
||||
return make_tensor(t, M, M)
|
||||
|
||||
|
||||
def medium_2d_scaled(t, scale=10):
|
||||
return make_tensor(t, M, M).mul(scale)
|
||||
|
||||
|
||||
def small_3d_ones(t):
|
||||
return t(S, S, S).copy_(torch.ones(S, S, S))
|
||||
|
||||
|
||||
def small_3d_positive(t):
|
||||
min_val = 1e-3 if is_floating(t) else 2
|
||||
return make_tensor(t, S, S, S).clamp_(min_val, 120)
|
||||
|
||||
|
||||
def small_3d_unique(t):
|
||||
return t(S, S, S).copy_(torch.range(1, S*S*S))
|
||||
return t(S, S, S).copy_(torch.range(1, S * S * S))
|
||||
|
||||
|
||||
def small_1d_lapack(t):
|
||||
return t(1, 3).copy_(torch.range(1, 3).view(3))
|
||||
|
||||
|
||||
def small_2d_lapack(t):
|
||||
return t(3, 3).copy_(torch.range(1, 9).view(3, 3))
|
||||
|
||||
|
||||
def small_2d_lapack_skinny(t):
|
||||
return t(3, 4).copy_(torch.range(1, 12).view(3, 4))
|
||||
|
||||
|
||||
def small_2d_lapack_fat(t):
|
||||
return t(4, 3).copy_(torch.range(1, 12).view(4, 3))
|
||||
|
||||
|
||||
def new_t(*sizes):
|
||||
def tmp(t):
|
||||
return t(*sizes).copy_(torch.randn(*sizes))
|
||||
return tmp
|
||||
|
||||
tests = [
|
||||
('add', small_3d, lambda t: [number(3.14, 3, t)] ),
|
||||
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor' ),
|
||||
('sub', small_3d, lambda t: [number(3.14, 3, t)], ),
|
||||
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('mul', small_3d, lambda t: [number(3.14, 3, t)], ),
|
||||
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('div', small_3d, lambda t: [number(3.14, 3, t)], ),
|
||||
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('add', small_3d, lambda t: [number(3.14, 3, t)]),
|
||||
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'),
|
||||
('sub', small_3d, lambda t: [number(3.14, 3, t)],),
|
||||
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('mul', small_3d, lambda t: [number(3.14, 3, t)],),
|
||||
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('div', small_3d, lambda t: [number(3.14, 3, t)],),
|
||||
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types),
|
||||
('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types),
|
||||
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types),
|
||||
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
|
||||
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
|
||||
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
|
||||
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
|
||||
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ),
|
||||
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)], ),
|
||||
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar' ),
|
||||
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], ),
|
||||
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ),
|
||||
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], ),
|
||||
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar' ),
|
||||
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars' ),
|
||||
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ),
|
||||
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar' ),
|
||||
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars' ),
|
||||
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], ),
|
||||
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar' ),
|
||||
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars' ),
|
||||
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
|
||||
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
|
||||
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)],),
|
||||
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
|
||||
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'),
|
||||
('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)],),
|
||||
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t),
|
||||
small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar'),
|
||||
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)],),
|
||||
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'),
|
||||
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)],),
|
||||
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar'),
|
||||
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars'),
|
||||
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)],),
|
||||
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar'),
|
||||
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars'),
|
||||
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)],),
|
||||
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar'),
|
||||
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars'),
|
||||
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types),
|
||||
('fmod', small_3d, lambda t: [3], 'value' ),
|
||||
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('chunk', medium_2d, lambda t: [4], ),
|
||||
('chunk', medium_2d, lambda t: [4, 1], 'dim' ),
|
||||
('clamp', medium_2d_scaled, lambda t: [-1, 5], ),
|
||||
('clone', medium_2d, lambda t: [], ),
|
||||
('contiguous', medium_2d, lambda t: [], ),
|
||||
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)], ),
|
||||
('cumprod', small_3d, lambda t: [1], ),
|
||||
('cumsum', small_3d, lambda t: [1], ),
|
||||
('dim', small_3d, lambda t: [], ),
|
||||
('dist', small_2d, lambda t: [small_2d(t)], ),
|
||||
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm' ),
|
||||
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm' ),
|
||||
('dot', medium_1d, lambda t: [medium_1d(t)], ),
|
||||
('element_size', medium_1d, lambda t: [], ),
|
||||
('eq', small_3d_ones, lambda t: [small_3d(t)], ),
|
||||
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
|
||||
('ne', small_3d_ones, lambda t: [small_3d(t)], ),
|
||||
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
|
||||
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ),
|
||||
('equal', small_3d_ones, lambda t: [small_3d(t)], ),
|
||||
('expand', new_t(M, 1, M), lambda t: [M, 4, M], ),
|
||||
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)], ),
|
||||
('fill', medium_2d, lambda t: [number(3.14, 3, t)], ),
|
||||
('ge', medium_2d, lambda t: [medium_2d(t)], ),
|
||||
('le', medium_2d, lambda t: [medium_2d(t)], ),
|
||||
('gt', medium_2d, lambda t: [medium_2d(t)], ),
|
||||
('lt', medium_2d, lambda t: [medium_2d(t)], ),
|
||||
('is_contiguous', medium_2d, lambda t: [], ),
|
||||
('fmod', small_3d, lambda t: [3], 'value'),
|
||||
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('chunk', medium_2d, lambda t: [4],),
|
||||
('chunk', medium_2d, lambda t: [4, 1], 'dim'),
|
||||
('clamp', medium_2d_scaled, lambda t: [-1, 5],),
|
||||
('clone', medium_2d, lambda t: [],),
|
||||
('contiguous', medium_2d, lambda t: [],),
|
||||
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],),
|
||||
('cumprod', small_3d, lambda t: [1],),
|
||||
('cumsum', small_3d, lambda t: [1],),
|
||||
('dim', small_3d, lambda t: [],),
|
||||
('dist', small_2d, lambda t: [small_2d(t)],),
|
||||
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'),
|
||||
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm'),
|
||||
('dot', medium_1d, lambda t: [medium_1d(t)],),
|
||||
('element_size', medium_1d, lambda t: [],),
|
||||
('eq', small_3d_ones, lambda t: [small_3d(t)],),
|
||||
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
|
||||
('ne', small_3d_ones, lambda t: [small_3d(t)],),
|
||||
('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
|
||||
('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
|
||||
('equal', small_3d_ones, lambda t: [small_3d(t)],),
|
||||
('expand', new_t(M, 1, M), lambda t: [M, 4, M],),
|
||||
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],),
|
||||
('fill', medium_2d, lambda t: [number(3.14, 3, t)],),
|
||||
('ge', medium_2d, lambda t: [medium_2d(t)],),
|
||||
('le', medium_2d, lambda t: [medium_2d(t)],),
|
||||
('gt', medium_2d, lambda t: [medium_2d(t)],),
|
||||
('lt', medium_2d, lambda t: [medium_2d(t)],),
|
||||
('is_contiguous', medium_2d, lambda t: [],),
|
||||
# TODO: can't check negative case - GPU copy will be contiguous
|
||||
('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative' ),
|
||||
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive' ),
|
||||
('is_set_to', medium_2d, lambda t: [medium_2d(t)], ),
|
||||
('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative'),
|
||||
('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive'),
|
||||
('is_set_to', medium_2d, lambda t: [medium_2d(t)],),
|
||||
# TODO: positive case
|
||||
('kthvalue', small_3d_unique, lambda t: [3], ),
|
||||
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim' ),
|
||||
('lerp', small_3d, lambda t: [small_3d(t), 0.3], ),
|
||||
('max', small_3d_unique, lambda t: [], ),
|
||||
('max', small_3d_unique, lambda t: [1], 'dim' ),
|
||||
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ),
|
||||
('min', small_3d_unique, lambda t: [], ),
|
||||
('min', small_3d_unique, lambda t: [1], 'dim' ),
|
||||
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ),
|
||||
('mean', small_3d, lambda t: [], ),
|
||||
('mean', small_3d, lambda t: [1], 'dim' ),
|
||||
('mode', small_3d, lambda t: [], ),
|
||||
('mode', small_3d, lambda t: [1], 'dim' ),
|
||||
('remainder', small_3d, lambda t: [3], 'value' ),
|
||||
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ),
|
||||
('std', small_3d, lambda t: [], ),
|
||||
('std', small_3d, lambda t: [1], 'dim' ),
|
||||
('var', small_3d, lambda t: [], ),
|
||||
('var', small_3d, lambda t: [1], 'dim' ),
|
||||
('ndimension', small_3d, lambda t: [], ),
|
||||
('nelement', small_3d, lambda t: [], ),
|
||||
('numel', small_3d, lambda t: [], ),
|
||||
('narrow', small_3d, lambda t: [1, 3, 2], ),
|
||||
('nonzero', small_3d, lambda t: [], ),
|
||||
('norm', small_3d, lambda t: [], ),
|
||||
('norm', small_3d, lambda t: [3], '3_norm' ),
|
||||
('norm', small_3d, lambda t: [3, 0], '3_norm_dim' ),
|
||||
('ones', small_3d, lambda t: [1, 2, 3, 4, 5], ),
|
||||
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0], ),
|
||||
('prod', small_3d, lambda t: [], ),
|
||||
('prod', small_3d, lambda t: [1], 'dim' ),
|
||||
('sum', small_2d, lambda t: [], ),
|
||||
('sum', small_3d, lambda t: [1], 'dim' ),
|
||||
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm' ),
|
||||
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm' ),
|
||||
('repeat', small_2d, lambda t: [2, 2, 2], ),
|
||||
('size', new_t(1, 2, 3, 4), lambda t: [], ),
|
||||
('sort', small_3d_unique, lambda t: [], ),
|
||||
('sort', small_3d_unique, lambda t: [1], 'dim' ),
|
||||
('kthvalue', small_3d_unique, lambda t: [3],),
|
||||
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'),
|
||||
('lerp', small_3d, lambda t: [small_3d(t), 0.3],),
|
||||
('max', small_3d_unique, lambda t: [],),
|
||||
('max', small_3d_unique, lambda t: [1], 'dim'),
|
||||
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
|
||||
('min', small_3d_unique, lambda t: [],),
|
||||
('min', small_3d_unique, lambda t: [1], 'dim'),
|
||||
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
|
||||
('mean', small_3d, lambda t: [],),
|
||||
('mean', small_3d, lambda t: [1], 'dim'),
|
||||
('mode', small_3d, lambda t: [],),
|
||||
('mode', small_3d, lambda t: [1], 'dim'),
|
||||
('remainder', small_3d, lambda t: [3], 'value'),
|
||||
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
|
||||
('std', small_3d, lambda t: [],),
|
||||
('std', small_3d, lambda t: [1], 'dim'),
|
||||
('var', small_3d, lambda t: [],),
|
||||
('var', small_3d, lambda t: [1], 'dim'),
|
||||
('ndimension', small_3d, lambda t: [],),
|
||||
('nelement', small_3d, lambda t: [],),
|
||||
('numel', small_3d, lambda t: [],),
|
||||
('narrow', small_3d, lambda t: [1, 3, 2],),
|
||||
('nonzero', small_3d, lambda t: [],),
|
||||
('norm', small_3d, lambda t: [],),
|
||||
('norm', small_3d, lambda t: [3], '3_norm'),
|
||||
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'),
|
||||
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
|
||||
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
|
||||
('prod', small_2d_oneish, lambda t: [],),
|
||||
('prod', small_3d, lambda t: [1], 'dim'),
|
||||
('sum', small_2d, lambda t: [],),
|
||||
('sum', small_3d, lambda t: [1], 'dim'),
|
||||
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'),
|
||||
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'),
|
||||
('repeat', small_2d, lambda t: [2, 2, 2],),
|
||||
('size', new_t(1, 2, 3, 4), lambda t: [],),
|
||||
('sort', small_3d_unique, lambda t: [],),
|
||||
('sort', small_3d_unique, lambda t: [1], 'dim'),
|
||||
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
|
||||
('split', small_3d, lambda t: [2], ),
|
||||
('split', small_3d, lambda t: [2, 1], 'dim' ),
|
||||
('squeeze', new_t(1, 2, 1, 4), lambda t: [], ),
|
||||
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim' ),
|
||||
('t', new_t(1, 2), lambda t: [], ),
|
||||
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2], ),
|
||||
('to_list', small_3d, lambda t: [], ),
|
||||
('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort' ),
|
||||
('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort' ),
|
||||
('trace', medium_2d, lambda t: [], ),
|
||||
('tril', medium_2d, lambda t: [], ),
|
||||
('tril', medium_2d, lambda t: [2], 'positive' ),
|
||||
('tril', medium_2d, lambda t: [-2], 'negative' ),
|
||||
('triu', medium_2d, lambda t: [], ),
|
||||
('triu', medium_2d, lambda t: [2], 'positive' ),
|
||||
('triu', medium_2d, lambda t: [-2], 'negative' ),
|
||||
('view', small_3d, lambda t: [100, 10], ),
|
||||
('view_as', small_3d, lambda t: [t(100, 10)], ),
|
||||
('zero', small_3d, lambda t: [], ),
|
||||
('zeros', small_3d, lambda t: [1, 2, 3, 4], ),
|
||||
('split', small_3d, lambda t: [2],),
|
||||
('split', small_3d, lambda t: [2, 1], 'dim'),
|
||||
('squeeze', new_t(1, 2, 1, 4), lambda t: [],),
|
||||
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
|
||||
('t', new_t(1, 2), lambda t: [],),
|
||||
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
|
||||
('to_list', small_3d, lambda t: [],),
|
||||
('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort'),
|
||||
('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort'),
|
||||
('trace', medium_2d, lambda t: [],),
|
||||
('tril', medium_2d, lambda t: [],),
|
||||
('tril', medium_2d, lambda t: [2], 'positive'),
|
||||
('tril', medium_2d, lambda t: [-2], 'negative'),
|
||||
('triu', medium_2d, lambda t: [],),
|
||||
('triu', medium_2d, lambda t: [2], 'positive'),
|
||||
('triu', medium_2d, lambda t: [-2], 'negative'),
|
||||
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
|
||||
('view', small_3d, lambda t: [100, 10],),
|
||||
('view_as', small_3d, lambda t: [t(100, 10)],),
|
||||
('zero', small_3d, lambda t: [],),
|
||||
('zeros', small_3d, lambda t: [1, 2, 3, 4],),
|
||||
('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types),
|
||||
('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
|
||||
('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types),
|
||||
@ -275,6 +303,8 @@ for fn in simple_pointwise_float:
|
||||
tests.append((fn, small_3d, lambda t: [], None, float_types))
|
||||
|
||||
_cycles_per_ms = None
|
||||
|
||||
|
||||
def get_cycles_per_ms():
|
||||
"""Approximate number of cycles per millisecond for torch.cuda._sleep"""
|
||||
global _cycles_per_ms
|
||||
@ -288,6 +318,7 @@ def get_cycles_per_ms():
|
||||
_cycles_per_ms = 1000000 / start.elapsed_time(end)
|
||||
return _cycles_per_ms
|
||||
|
||||
|
||||
def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
|
||||
def tmp(self):
|
||||
cpu_tensor = tensor_constructor(t)
|
||||
@ -314,10 +345,11 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
|
||||
self.assertEqual(cpu_result, gpu_result, precision)
|
||||
return tmp
|
||||
|
||||
|
||||
class TestCuda(TestCase):
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_autogpu(self):
|
||||
if torch.cuda.device_count() > 1:
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.randn(5, 5).cuda()
|
||||
self.assertEqual(x.get_device(), 0)
|
||||
@ -352,7 +384,7 @@ class TestCuda(TestCase):
|
||||
self.assertEqual(z.get_device(), 0)
|
||||
self.assertIs(z.cuda(0), z)
|
||||
|
||||
def test_serialization(self):
|
||||
def test_serialization_array_with_storage(self):
|
||||
x = torch.randn(5, 5).cuda()
|
||||
y = torch.IntTensor(2, 5).fill_(0).cuda()
|
||||
q = [x, y, x, y.storage()]
|
||||
@ -412,7 +444,7 @@ class TestCuda(TestCase):
|
||||
y_cuda = y.cuda(1)
|
||||
result = comm.reduce_add((x_cuda, y_cuda))
|
||||
self.assertEqual(result.get_device(), 0)
|
||||
self.assertEqual(result.cpu(), x+y)
|
||||
self.assertEqual(result.cpu(), x + y)
|
||||
|
||||
def _test_scatter(self, input, chunk_sizes=None, dim=0):
|
||||
if torch.cuda.device_count() < 2:
|
||||
@ -473,7 +505,7 @@ class TestCuda(TestCase):
|
||||
self._test_gather(1)
|
||||
|
||||
def test_from_sequence(self):
|
||||
seq = [list(range(i*4,i*4+4)) for i in range(5)]
|
||||
seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)]
|
||||
reference = torch.range(0, 19).resize_(5, 4)
|
||||
for t in types:
|
||||
cuda_type = get_gpu_type(t)
|
||||
@ -490,6 +522,13 @@ class TestCuda(TestCase):
|
||||
self.assertEqual(x, y)
|
||||
self.assertEqual(torch.cuda.initial_seed(), 2)
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_cat_autogpu(self):
|
||||
x = torch.randn(4, 4).cuda(1)
|
||||
y = torch.randn(4, 4).cuda(1)
|
||||
z = torch.cat([x, y], 0)
|
||||
self.assertEqual(z.get_device(), x.get_device())
|
||||
|
||||
def test_serialization(self):
|
||||
x = torch.randn(4, 4).cuda()
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
@ -500,7 +539,7 @@ class TestCuda(TestCase):
|
||||
self.assertIs(type(x_copy), type(x))
|
||||
self.assertEqual(x_copy.get_device(), x.get_device())
|
||||
|
||||
def test_serialization_empty(self):
|
||||
def test_serialization_array_with_empty(self):
|
||||
x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
torch.save(x, f)
|
||||
@ -526,6 +565,7 @@ class TestCuda(TestCase):
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU")
|
||||
def test_multigpu_serialization_remap(self):
|
||||
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
|
||||
|
||||
def gpu_remap(storage, location):
|
||||
if location == 'cuda:1':
|
||||
return storage.cuda(0)
|
||||
@ -623,6 +663,38 @@ class TestCuda(TestCase):
|
||||
self.assertTrue(event.query())
|
||||
self.assertGreater(start_event.elapsed_time(event), 0)
|
||||
|
||||
def test_record_stream(self):
|
||||
cycles_per_ms = get_cycles_per_ms()
|
||||
|
||||
t = torch.FloatTensor([1, 2, 3, 4]).pin_memory()
|
||||
result = torch.cuda.FloatTensor(t.size())
|
||||
stream = torch.cuda.Stream()
|
||||
ptr = [None]
|
||||
|
||||
# Performs the CPU->GPU copy in a background stream
|
||||
def perform_copy():
|
||||
with torch.cuda.stream(stream):
|
||||
tmp = t.cuda(async=True)
|
||||
ptr[0] = tmp.data_ptr()
|
||||
torch.cuda.current_stream().wait_stream(stream)
|
||||
tmp.record_stream(torch.cuda.current_stream())
|
||||
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
|
||||
result.copy_(tmp)
|
||||
|
||||
perform_copy()
|
||||
with torch.cuda.stream(stream):
|
||||
tmp2 = torch.cuda.FloatTensor(t.size())
|
||||
tmp2.zero_()
|
||||
self.assertNotEqual(tmp2.data_ptr(), ptr[0], 'allocation re-used to soon')
|
||||
|
||||
self.assertEqual(result.tolist(), [1, 2, 3, 4])
|
||||
|
||||
# Check that the block will be re-used after the main stream finishes
|
||||
torch.cuda.current_stream().synchronize()
|
||||
with torch.cuda.stream(stream):
|
||||
tmp3 = torch.cuda.FloatTensor(t.size())
|
||||
self.assertEqual(tmp3.data_ptr(), ptr[0], 'allocation not re-used')
|
||||
|
||||
def test_caching_pinned_memory(self):
|
||||
cycles_per_ms = get_cycles_per_ms()
|
||||
|
||||
@ -642,8 +714,40 @@ class TestCuda(TestCase):
|
||||
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
|
||||
self.assertEqual(list(gpu_tensor), [1])
|
||||
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
|
||||
def test_caching_pinned_memory_multi_gpu(self):
|
||||
# checks that the events preventing pinned memory from being re-used
|
||||
# too early are recorded on the correct GPU
|
||||
cycles_per_ms = get_cycles_per_ms()
|
||||
|
||||
for decl in tests:
|
||||
t = torch.FloatTensor([1]).pin_memory()
|
||||
ptr = t.data_ptr()
|
||||
gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
|
||||
gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
|
||||
|
||||
with torch.cuda.device(1):
|
||||
torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
|
||||
gpu_tensor1.copy_(t, async=True)
|
||||
|
||||
del t
|
||||
t = torch.FloatTensor([2]).pin_memory()
|
||||
self.assertNotEqual(t.data_ptr(), ptr, 'allocation re-used too soon')
|
||||
|
||||
with torch.cuda.device(0):
|
||||
gpu_tensor0.copy_(t, async=True)
|
||||
|
||||
self.assertEqual(gpu_tensor1[0], 1)
|
||||
self.assertEqual(gpu_tensor0[0], 2)
|
||||
|
||||
def test_btrifact(self):
|
||||
TestTorch._test_btrifact(self, lambda t: t.cuda())
|
||||
|
||||
def test_btrisolve(self):
|
||||
TestTorch._test_btrisolve(self, lambda t: t.cuda())
|
||||
|
||||
|
||||
if HAS_CUDA:
|
||||
for decl in tests:
|
||||
for t in types:
|
||||
tensor = t()
|
||||
gpu_tensor = get_gpu_type(t)()
|
||||
@ -666,7 +770,8 @@ for decl in tests:
|
||||
if not hasattr(tensor, name_inner):
|
||||
continue
|
||||
if not hasattr(gpu_tensor, name_inner):
|
||||
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name_inner, gpu_tensor.__class__.__name__))
|
||||
print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(
|
||||
name_inner, gpu_tensor.__class__.__name__))
|
||||
continue
|
||||
|
||||
test_name = 'test_' + t.__name__ + '_' + name_inner
|
||||
@ -676,5 +781,6 @@ for decl in tests:
|
||||
assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name
|
||||
setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
@ -4,7 +4,7 @@ import torch
|
||||
import traceback
|
||||
import unittest
|
||||
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||
from common import TestCase
|
||||
from common import TestCase, run_tests, TEST_NUMPY
|
||||
from common_nn import TEST_CUDA
|
||||
|
||||
|
||||
@ -27,11 +27,12 @@ class TestTensorDataset(TestCase):
|
||||
l = torch.randn(15)
|
||||
source = TensorDataset(t, l)
|
||||
for i in range(15):
|
||||
self.assertEqual(t[i:i+1], source[i][0])
|
||||
self.assertEqual(l[i:i+1], source[i][1])
|
||||
self.assertEqual(t[i], source[i][0])
|
||||
self.assertEqual(l[i], source[i][1])
|
||||
|
||||
|
||||
class ErrorDataset(Dataset):
|
||||
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
@ -50,9 +51,9 @@ class TestDataLoader(TestCase):
|
||||
batch_size = loader.batch_size
|
||||
for i, (sample, target) in enumerate(loader):
|
||||
idx = i * batch_size
|
||||
self.assertEqual(sample, self.data[idx:idx+batch_size])
|
||||
self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1))
|
||||
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
|
||||
self.assertEqual(sample, self.data[idx:idx + batch_size])
|
||||
self.assertEqual(target, self.labels[idx:idx + batch_size])
|
||||
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
|
||||
|
||||
def _test_shuffle(self, loader):
|
||||
found_data = {i: 0 for i in range(self.data.size(0))}
|
||||
@ -65,11 +66,11 @@ class TestDataLoader(TestCase):
|
||||
self.assertFalse(found_data[data_point_idx])
|
||||
found_data[data_point_idx] += 1
|
||||
break
|
||||
self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1))
|
||||
self.assertEqual(target, self.labels[data_point_idx])
|
||||
found_labels[data_point_idx] += 1
|
||||
self.assertEqual(sum(found_data.values()), (i+1) * batch_size)
|
||||
self.assertEqual(sum(found_labels.values()), (i+1) * batch_size)
|
||||
self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size))
|
||||
self.assertEqual(sum(found_data.values()), (i + 1) * batch_size)
|
||||
self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size)
|
||||
self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size))
|
||||
|
||||
def _test_error(self, loader):
|
||||
it = iter(loader)
|
||||
@ -81,10 +82,9 @@ class TestDataLoader(TestCase):
|
||||
errors += 1
|
||||
except StopIteration:
|
||||
self.assertEqual(errors,
|
||||
math.ceil(float(len(loader.dataset))/loader.batch_size))
|
||||
math.ceil(float(len(loader.dataset)) / loader.batch_size))
|
||||
return
|
||||
|
||||
|
||||
def test_sequential(self):
|
||||
self._test_sequential(DataLoader(self.dataset))
|
||||
|
||||
@ -123,6 +123,22 @@ class TestDataLoader(TestCase):
|
||||
self.assertTrue(input.is_pinned())
|
||||
self.assertTrue(target.is_pinned())
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, "numpy unavailable")
|
||||
def test_numpy(self):
|
||||
import numpy as np
|
||||
|
||||
class TestDataset(torch.utils.data.Dataset):
|
||||
def __getitem__(self, i):
|
||||
return np.ones((2, 3, 4)) * i
|
||||
|
||||
def __len__(self):
|
||||
return 1000
|
||||
|
||||
loader = DataLoader(TestDataset(), batch_size=12)
|
||||
batch = next(iter(loader))
|
||||
self.assertIsInstance(batch, torch.DoubleTensor)
|
||||
self.assertEqual(batch.size(), torch.Size([12, 2, 3, 4]))
|
||||
|
||||
def test_error(self):
|
||||
self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
|
||||
|
||||
@ -158,5 +174,28 @@ class TestDataLoader(TestCase):
|
||||
check_len(DataLoader(self.dataset, batch_size=3), 34)
|
||||
|
||||
|
||||
class StringDataset(Dataset):
|
||||
def __init__(self):
|
||||
self.s = '12345'
|
||||
|
||||
def __len__(self):
|
||||
return len(self.s)
|
||||
|
||||
def __getitem__(self, ndx):
|
||||
return (self.s[ndx], ndx)
|
||||
|
||||
|
||||
class TestStringDataLoader(TestCase):
|
||||
def setUp(self):
|
||||
self.dataset = StringDataset()
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
|
||||
def test_shuffle_pin_memory(self):
|
||||
loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
|
||||
for batch_ndx, (s, n) in enumerate(loader):
|
||||
self.assertIsInstance(s[0], str)
|
||||
self.assertTrue(n.is_pinned())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
508
test/test_distributed.py
Normal file
508
test/test_distributed.py
Normal file
@ -0,0 +1,508 @@
|
||||
import fcntl
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
from functools import wraps, reduce
|
||||
from contextlib import contextmanager
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from common import TestCase
|
||||
|
||||
BACKEND = os.environ['BACKEND']
|
||||
TEMP_DIR = os.environ['TEMP_DIR']
|
||||
MASTER_PORT = '29500'
|
||||
MASTER_ADDR = '127.0.0.1:' + MASTER_PORT
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _lock():
|
||||
lockfile = os.path.join(TEMP_DIR, 'lockfile')
|
||||
with open(lockfile, 'w') as lf:
|
||||
try:
|
||||
fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
|
||||
yield
|
||||
finally:
|
||||
fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
|
||||
lf.close()
|
||||
|
||||
|
||||
def _build_tensor(size, value=None):
|
||||
if value is None:
|
||||
value = size
|
||||
return torch.FloatTensor(size, size, size).fill_(value)
|
||||
|
||||
|
||||
class Barrier(object):
|
||||
barrier_id = 0
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
cls.barrier_id = 0
|
||||
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
|
||||
for f_name in os.listdir(barrier_dir):
|
||||
os.unlink(os.path.join(barrier_dir, f_name))
|
||||
|
||||
@classmethod
|
||||
def sync(cls, timeout=5):
|
||||
cls.barrier_id += 1
|
||||
barrier_dir = os.path.join(TEMP_DIR, 'barrier')
|
||||
pid = str(os.getpid())
|
||||
barrier_file = os.path.join(barrier_dir, pid)
|
||||
with _lock():
|
||||
with open(barrier_file, 'w') as f:
|
||||
f.write(str(cls.barrier_id))
|
||||
|
||||
start_time = time.time()
|
||||
while True:
|
||||
arrived = 0
|
||||
with _lock():
|
||||
for f_name in os.listdir(barrier_dir):
|
||||
with open(os.path.join(barrier_dir, f_name), 'r') as f:
|
||||
data = f.read()
|
||||
if int(data) >= cls.barrier_id:
|
||||
arrived += 1
|
||||
if arrived == dist.get_num_processes():
|
||||
break
|
||||
|
||||
if time.time() - start_time > timeout:
|
||||
raise RuntimeError("barrier timeout")
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
class _DistTestBase(object):
|
||||
|
||||
def _barrier(self, *args, **kwargs):
|
||||
Barrier.sync(*args, **kwargs)
|
||||
|
||||
def _init_group_test(self):
|
||||
group = [1, 2]
|
||||
group_id = dist.new_group(group)
|
||||
rank = dist.get_rank()
|
||||
if rank not in group:
|
||||
return ([], None, rank)
|
||||
|
||||
return (group, group_id, rank)
|
||||
|
||||
def _init_global_test(self):
|
||||
group = [i for i in range(0, dist.get_num_processes())]
|
||||
group_id = dist.group.WORLD
|
||||
rank = dist.get_rank()
|
||||
return (group, group_id, rank)
|
||||
|
||||
# GET RANK
|
||||
def test_get_rank(self):
|
||||
test_dir = os.path.join(TEMP_DIR, 'test_dir')
|
||||
pid = str(os.getpid())
|
||||
num_processes = dist.get_num_processes()
|
||||
with open(os.path.join(test_dir, pid), 'w') as f:
|
||||
f.write(str(dist.get_rank()))
|
||||
|
||||
self._barrier()
|
||||
|
||||
all_ranks = set()
|
||||
for f_name in os.listdir(test_dir):
|
||||
with open(os.path.join(test_dir, f_name), 'r') as f:
|
||||
all_ranks.add(int(f.read()))
|
||||
self.assertEqual(len(all_ranks), num_processes)
|
||||
|
||||
self._barrier()
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
for f_name in os.listdir(test_dir):
|
||||
os.unlink(os.path.join(test_dir, f_name))
|
||||
|
||||
self._barrier()
|
||||
|
||||
# SEND RECV
|
||||
def test_send_recv(self):
|
||||
rank = dist.get_rank()
|
||||
tensor = _build_tensor(rank + 1)
|
||||
for dest in range(0, dist.get_num_processes()):
|
||||
if dest == rank:
|
||||
continue
|
||||
dist.send(tensor, dest)
|
||||
|
||||
for src in range(0, dist.get_num_processes()):
|
||||
if src == rank:
|
||||
continue
|
||||
tensor = _build_tensor(src + 1, value=-1)
|
||||
expected_tensor = _build_tensor(src + 1)
|
||||
dist.recv(tensor, src)
|
||||
self.assertEqual(tensor, expected_tensor)
|
||||
|
||||
self._barrier()
|
||||
|
||||
# SEND RECV ANY SOURCE
|
||||
def test_send_recv_any_source(self):
|
||||
rank = dist.get_rank()
|
||||
tensor = _build_tensor(10, rank)
|
||||
for dest in range(0, dist.get_num_processes()):
|
||||
if dest == rank:
|
||||
continue
|
||||
dist.send(tensor, dest)
|
||||
|
||||
recv_ranks = set()
|
||||
for src in range(0, dist.get_num_processes()):
|
||||
if src == rank:
|
||||
continue
|
||||
tensor = _build_tensor(10, value=-1)
|
||||
dist.recv(tensor)
|
||||
recv_ranks.add(tensor.resize_(1)[0])
|
||||
|
||||
self.assertEqual(len(recv_ranks), dist.get_num_processes() - 1)
|
||||
self._barrier()
|
||||
|
||||
# ISEND
|
||||
def test_isend(self):
|
||||
rank = dist.get_rank()
|
||||
world_size = dist.get_num_processes()
|
||||
|
||||
if rank == 0:
|
||||
requests = [
|
||||
dist.isend(_build_tensor(dest, 10), dest) for dest in range(1, world_size)
|
||||
]
|
||||
for request in requests:
|
||||
request.wait()
|
||||
self.assertTrue(request.is_completed())
|
||||
else:
|
||||
tensor = _build_tensor(rank, -1)
|
||||
dist.recv(tensor, 0)
|
||||
self.assertEqual(tensor, _build_tensor(rank, 10))
|
||||
|
||||
self._barrier()
|
||||
|
||||
# IRECV
|
||||
def test_irecv(self):
|
||||
rank = dist.get_rank()
|
||||
world_size = dist.get_num_processes()
|
||||
|
||||
if rank == 0:
|
||||
expected_tensors = [_build_tensor(src, -1) for src in range(1, world_size)]
|
||||
requests = [
|
||||
dist.irecv(expected_tensors[src - 1], src) for src in range(1, world_size)
|
||||
]
|
||||
|
||||
for src in range(1, world_size):
|
||||
requests[src - 1].wait()
|
||||
self.assertTrue(requests[src - 1].is_completed())
|
||||
self.assertEqual(expected_tensors[src - 1], _build_tensor(src, 10))
|
||||
else:
|
||||
tensor = _build_tensor(rank, 10)
|
||||
dist.send(tensor, 0)
|
||||
|
||||
self._barrier()
|
||||
|
||||
# BROADCAST
|
||||
def _test_broadcast_helper(self, group, group_id, rank):
|
||||
for src in group:
|
||||
expected_tensor = _build_tensor(src + 1)
|
||||
if rank == src:
|
||||
dist.broadcast(expected_tensor, src, group_id)
|
||||
else:
|
||||
tensor = _build_tensor(src + 1, -1)
|
||||
dist.broadcast(tensor, src, group_id)
|
||||
self.assertEqual(tensor, expected_tensor)
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_broadcast(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_broadcast_helper(group, group_id, rank)
|
||||
|
||||
def test_broadcast_group(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_broadcast_helper(group, group_id, rank)
|
||||
|
||||
# REDUCE
|
||||
def _test_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
|
||||
for src in group:
|
||||
if rank == src:
|
||||
tensor = _build_tensor(src + 1).fill_(master_value)
|
||||
dist.reduce(tensor, src, op, group_id)
|
||||
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
|
||||
else:
|
||||
tensor = _build_tensor(src + 1).fill_(worker_value)
|
||||
dist.reduce(tensor, src, op, group_id)
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_reduce_sum(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
|
||||
)
|
||||
|
||||
def test_reduce_product(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.PRODUCT,
|
||||
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
|
||||
)
|
||||
|
||||
def test_reduce_min(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
|
||||
)
|
||||
|
||||
def test_reduce_max(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
|
||||
)
|
||||
|
||||
def test_reduce_group_sum(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
|
||||
)
|
||||
|
||||
def test_reduce_group_product(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.PRODUCT,
|
||||
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
|
||||
)
|
||||
|
||||
def test_reduce_group_min(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
|
||||
)
|
||||
|
||||
def test_reduce_group_max(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
|
||||
)
|
||||
|
||||
# ALL REDUCE
|
||||
def _test_all_reduce_helper(self, group, group_id, rank, op, master_value, worker_value, expected_value):
|
||||
for src in group:
|
||||
if rank == src:
|
||||
tensor = _build_tensor(src + 1).fill_(master_value)
|
||||
dist.all_reduce(tensor, op, group_id)
|
||||
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
|
||||
else:
|
||||
tensor = _build_tensor(src + 1).fill_(worker_value)
|
||||
dist.all_reduce(tensor, op, group_id)
|
||||
self.assertEqual(tensor, _build_tensor(src + 1, expected_value))
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_all_reduce_sum(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
|
||||
)
|
||||
|
||||
def test_all_reduce_product(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.PRODUCT,
|
||||
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
|
||||
)
|
||||
|
||||
def test_all_reduce_min(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
|
||||
)
|
||||
|
||||
def test_all_reduce_max(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
|
||||
)
|
||||
|
||||
def test_all_reduce_group_sum(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.SUM, 2, 10, 2 + (10 * (len(group) - 1))
|
||||
)
|
||||
|
||||
def test_all_reduce_group_product(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.PRODUCT,
|
||||
2, 10, reduce((lambda x, y: x * y), [10] * (len(group) - 1), 2)
|
||||
)
|
||||
|
||||
def test_all_reduce_group_min(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MIN, 1010, 1, 1
|
||||
)
|
||||
|
||||
def test_all_reduce_group_max(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_all_reduce_helper(
|
||||
group, group_id, rank, dist.reduce_op.MAX, -1, 10, 10
|
||||
)
|
||||
|
||||
# SCATTER
|
||||
def _test_scatter_helper(self, group, group_id, rank):
|
||||
for dest in group:
|
||||
tensor = _build_tensor(dest + 1, -1)
|
||||
expected_tensor = _build_tensor(dest + 1, rank)
|
||||
if rank == dest:
|
||||
tensors = [_build_tensor(dest + 1, i) for i in group]
|
||||
dist.scatter_send(tensors, tensor, group_id)
|
||||
self.assertEqual(tensor, expected_tensor)
|
||||
else:
|
||||
dist.scatter_recv(tensor, dest, group_id)
|
||||
self.assertEqual(tensor, expected_tensor)
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_scatter(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_scatter_helper(group, group_id, rank)
|
||||
|
||||
def test_scatter_group(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_scatter_helper(group, group_id, rank)
|
||||
|
||||
# GATHER
|
||||
def _test_gather_helper(self, group, group_id, rank):
|
||||
for dest in group:
|
||||
tensor = _build_tensor(dest + 1, rank)
|
||||
if rank == dest:
|
||||
tensors = [_build_tensor(dest + 1, -1) for i in group]
|
||||
dist.gather_recv(tensors, tensor, group_id)
|
||||
|
||||
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
|
||||
for t1, t2 in zip(tensors, expected_tensors):
|
||||
self.assertEqual(t1, t2)
|
||||
else:
|
||||
dist.gather_send(tensor, dest, group_id)
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_gather(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_gather_helper(group, group_id, rank)
|
||||
|
||||
def test_gather_group(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_gather_helper(group, group_id, rank)
|
||||
|
||||
# ALL GATHER
|
||||
def _test_all_gather_helper(self, group, group_id, rank):
|
||||
for dest in group:
|
||||
tensor = _build_tensor(dest + 1, rank)
|
||||
tensors = [_build_tensor(dest + 1, -1) for i in group]
|
||||
dist.all_gather(tensors, tensor, group_id)
|
||||
|
||||
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
|
||||
for t1, t2 in zip(tensors, expected_tensors):
|
||||
self.assertEqual(t1, t2)
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_all_gather(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_all_gather_helper(group, group_id, rank)
|
||||
|
||||
def test_all_gather_group(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_all_gather_helper(group, group_id, rank)
|
||||
|
||||
# BARRIER
|
||||
def _test_barrier_helper(self, group, group_id, rank):
|
||||
WAIT_TIME = 0.3 # seconds
|
||||
|
||||
for dest in group:
|
||||
expected_time = torch.DoubleTensor(1).fill_(0.0)
|
||||
if dest == rank:
|
||||
expected_time.fill_(time.time() + WAIT_TIME)
|
||||
dist.broadcast(expected_time, dest, group_id)
|
||||
time.sleep(WAIT_TIME + 0.1) # sleep a little bit longer
|
||||
dist.barrier(group_id)
|
||||
else:
|
||||
dist.broadcast(expected_time, dest, group_id)
|
||||
dist.barrier(group_id)
|
||||
self.assertGreaterEqual(time.time(), expected_time[0])
|
||||
|
||||
self._barrier()
|
||||
|
||||
def test_barrier(self):
|
||||
group, group_id, rank = self._init_global_test()
|
||||
self._test_barrier_helper(group, group_id, rank)
|
||||
|
||||
def test_barrier_group(self):
|
||||
group, group_id, rank = self._init_group_test()
|
||||
self._test_barrier_helper(group, group_id, rank)
|
||||
|
||||
if BACKEND == 'tcp':
|
||||
WORLD_SIZE = os.environ['WORLD_SIZE']
|
||||
|
||||
class TestTCP(TestCase, _DistTestBase):
|
||||
|
||||
MANAGER_PROCESS_RANK = -1
|
||||
JOIN_TIMEOUT = 5
|
||||
|
||||
@staticmethod
|
||||
def manager_join(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(self):
|
||||
if self.rank == self.MANAGER_PROCESS_RANK:
|
||||
self._join_and_reduce()
|
||||
else:
|
||||
fn(self)
|
||||
return wrapper
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
os.environ['MASTER_ADDR'] = MASTER_ADDR
|
||||
os.environ['MASTER_PORT'] = MASTER_PORT
|
||||
os.environ['WORLD_SIZE'] = WORLD_SIZE
|
||||
for attr in dir(cls):
|
||||
if attr.startswith('test'):
|
||||
fn = getattr(cls, attr)
|
||||
setattr(cls, attr, cls.manager_join(fn))
|
||||
|
||||
def setUp(self):
|
||||
self.processes = []
|
||||
self.rank = self.MANAGER_PROCESS_RANK
|
||||
Barrier.init()
|
||||
for rank in range(int(WORLD_SIZE)):
|
||||
self.processes.append(self._spawn_process(rank))
|
||||
|
||||
def tearDown(self):
|
||||
for p in self.processes:
|
||||
p.terminate()
|
||||
|
||||
def _spawn_process(self, rank):
|
||||
os.environ['RANK'] = str(rank)
|
||||
name = 'process ' + str(rank)
|
||||
process = multiprocessing.Process(target=self._run, name=name,
|
||||
args=(rank,))
|
||||
process.start()
|
||||
return process
|
||||
|
||||
def _run(self, rank):
|
||||
self.rank = rank
|
||||
dist.init_process_group(backend=BACKEND)
|
||||
# self.id() == e.g. '__main__.TestDistributed.test_get_rank'
|
||||
# We're retreiving a corresponding test and executing it.
|
||||
getattr(self, self.id().split(".")[2])()
|
||||
sys.exit(0)
|
||||
|
||||
def _join_and_reduce(self):
|
||||
for p in self.processes:
|
||||
p.join(self.JOIN_TIMEOUT)
|
||||
self.assertEqual(p.exitcode, 0)
|
||||
|
||||
elif BACKEND == 'mpi':
|
||||
dist.init_process_group(backend='mpi')
|
||||
|
||||
class TestMPI(TestCase, _DistTestBase):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@ -7,9 +7,11 @@ import torch
|
||||
import torch.legacy.nn as nn
|
||||
from common_nn import NNTestCase, ModuleTest, CriterionTest, iter_tensors, \
|
||||
module_tests, criterion_tests, TEST_CUDA, PRECISION
|
||||
from common import to_gpu, freeze_rng_state
|
||||
from common import to_gpu, freeze_rng_state, run_tests
|
||||
|
||||
|
||||
class OldModuleTest(ModuleTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(OldModuleTest, self).__init__(*args, **kwargs)
|
||||
self.check_inplace = kwargs.get('check_inplace', False)
|
||||
@ -55,7 +57,7 @@ tests = [
|
||||
OldModuleTest(nn.AddConstant,
|
||||
(3.5,),
|
||||
input_size=(3, 5, 4),
|
||||
reference_fn=lambda i,_: i + 3.5,
|
||||
reference_fn=lambda i, _: i + 3.5,
|
||||
check_inplace=True),
|
||||
OldModuleTest(nn.BatchNormalization,
|
||||
(10,),
|
||||
@ -97,21 +99,21 @@ tests = [
|
||||
desc='3D_single_example'),
|
||||
OldModuleTest(nn.CMul,
|
||||
(1, 5),
|
||||
input=torch.randn(10, 3, 5)[:,1],
|
||||
input=torch.randn(10, 3, 5)[:, 1],
|
||||
desc='3D_noncontiguous'),
|
||||
OldModuleTest(nn.Exp,
|
||||
input_size=(2, 3, 4),
|
||||
reference_fn=lambda i,_: i.exp()),
|
||||
reference_fn=lambda i, _: i.exp()),
|
||||
OldModuleTest(nn.Log,
|
||||
input=torch.rand(2, 3, 2) + 0.1,
|
||||
reference_fn=lambda i,_: i.log()),
|
||||
reference_fn=lambda i, _: i.log()),
|
||||
OldModuleTest(nn.Clamp,
|
||||
(-2., 5.),
|
||||
input=torch.randn(3, 2, 50) * 6,
|
||||
reference_fn=lambda i,_: i.clamp(-2, 5)),
|
||||
reference_fn=lambda i, _: i.clamp(-2, 5)),
|
||||
OldModuleTest(nn.Abs,
|
||||
input_size=(3, 20, 5),
|
||||
reference_fn=lambda i,_: i.abs()),
|
||||
reference_fn=lambda i, _: i.abs()),
|
||||
OldModuleTest(nn.Bilinear,
|
||||
(2, 3, 10),
|
||||
input_size=[(4, 2), (4, 3)]),
|
||||
@ -138,112 +140,112 @@ tests = [
|
||||
input_size=[(5, 7), (5, 7)]),
|
||||
OldModuleTest(nn.Square,
|
||||
input_size=(10, 2, 4),
|
||||
reference_fn=lambda i,_: i.mul(i)),
|
||||
reference_fn=lambda i, _: i.mul(i)),
|
||||
OldModuleTest(nn.Sqrt,
|
||||
input=torch.rand(10, 2, 4)+0.01,
|
||||
reference_fn=lambda i,_: i.sqrt()),
|
||||
input=torch.rand(10, 2, 4) + 0.01,
|
||||
reference_fn=lambda i, _: i.sqrt()),
|
||||
OldModuleTest(nn.Squeeze,
|
||||
input_size=(2, 1, 1, 4, 5),
|
||||
reference_fn=lambda i,_: i.squeeze()),
|
||||
reference_fn=lambda i, _: i.squeeze()),
|
||||
OldModuleTest(nn.Squeeze,
|
||||
(1,),
|
||||
input_size=(2, 1, 1, 4, 5),
|
||||
reference_fn=lambda i,_: i.squeeze(1),
|
||||
reference_fn=lambda i, _: i.squeeze(1),
|
||||
desc='dim'),
|
||||
OldModuleTest(nn.Unsqueeze,
|
||||
(1,),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(2, 1, 4, 5)),
|
||||
reference_fn=lambda i, _: i.view(2, 1, 4, 5)),
|
||||
OldModuleTest(nn.Unsqueeze,
|
||||
(0,),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(1, 2, 4, 5),
|
||||
reference_fn=lambda i, _: i.view(1, 2, 4, 5),
|
||||
desc='fist_dim'),
|
||||
OldModuleTest(nn.Unsqueeze,
|
||||
(3,),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(2, 4, 5, 1),
|
||||
reference_fn=lambda i, _: i.view(2, 4, 5, 1),
|
||||
desc='last_dim'),
|
||||
OldModuleTest(nn.View,
|
||||
(-1, 2, 20),
|
||||
input_size=(2, 2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(-1, 2, 20),
|
||||
reference_fn=lambda i, _: i.view(-1, 2, 20),
|
||||
desc='infer_batch'),
|
||||
OldModuleTest(nn.View,
|
||||
(2, 2, 2, 5),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(2, 2, 2, 5),
|
||||
reference_fn=lambda i, _: i.view(2, 2, 2, 5),
|
||||
desc='split_dim'),
|
||||
OldModuleTest(nn.View,
|
||||
(2, -1, 2, 5),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(2, -1, 2, 5),
|
||||
reference_fn=lambda i, _: i.view(2, -1, 2, 5),
|
||||
desc='infer_middle'),
|
||||
OldModuleTest(nn.Sum,
|
||||
(1,),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.sum(1).squeeze(1)),
|
||||
reference_fn=lambda i, _: i.sum(1).squeeze(1)),
|
||||
OldModuleTest(nn.Sum,
|
||||
(1, True),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: i.sum(1).div(i.size(1)).squeeze(1),
|
||||
reference_fn=lambda i, _: i.sum(1).div(i.size(1)).squeeze(1),
|
||||
desc='sizeAverage'),
|
||||
OldModuleTest(nn.Mean,
|
||||
(1,),
|
||||
input_size=(2, 4, 5),
|
||||
reference_fn=lambda i,_: torch.mean(i, 1).squeeze(1)),
|
||||
reference_fn=lambda i, _: torch.mean(i, 1).squeeze(1)),
|
||||
OldModuleTest(lambda: nn.Sequential().add(nn.GradientReversal()).add(nn.GradientReversal()),
|
||||
input_size=(4, 3, 2, 2),
|
||||
fullname='GradientReversal'),
|
||||
OldModuleTest(nn.Identity,
|
||||
input_size=(4, 3, 2, 4),
|
||||
reference_fn=lambda i,_: i),
|
||||
reference_fn=lambda i, _: i),
|
||||
OldModuleTest(nn.DotProduct,
|
||||
input_size=[(10, 4), (10, 4)],
|
||||
reference_fn=lambda i,_: torch.Tensor(list(
|
||||
reference_fn=lambda i, _: torch.Tensor(list(
|
||||
a.dot(b) for a, b in zip(i[0], i[1])))
|
||||
),
|
||||
OldModuleTest(nn.CosineDistance,
|
||||
input_size=[(10, 4), (10, 4)],
|
||||
reference_fn=lambda i,_: torch.Tensor(list(
|
||||
reference_fn=lambda i, _: torch.Tensor(list(
|
||||
a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1])))
|
||||
),
|
||||
OldModuleTest(nn.JoinTable,
|
||||
(0,),
|
||||
input_size=[(10, 4), (10, 4)],
|
||||
reference_fn=lambda i,_: torch.cat(i, 0),
|
||||
reference_fn=lambda i, _: torch.cat(i, 0),
|
||||
desc='first_dim'),
|
||||
OldModuleTest(nn.JoinTable,
|
||||
(2,),
|
||||
input_size=[(2, 4, 2), (2, 4, 2)],
|
||||
reference_fn=lambda i,_: torch.cat(i, 2),
|
||||
reference_fn=lambda i, _: torch.cat(i, 2),
|
||||
desc='positive_dim_index'),
|
||||
OldModuleTest(nn.JoinTable,
|
||||
(-1,),
|
||||
input_size=[(2, 4, 2, 4), (2, 4, 2, 4)],
|
||||
reference_fn=lambda i,_: torch.cat(i, 3),
|
||||
reference_fn=lambda i, _: torch.cat(i, 3),
|
||||
desc='negative_dim_index'),
|
||||
OldModuleTest(nn.MM,
|
||||
input_size=[(4, 5, 3), (4, 3, 2)],
|
||||
reference_fn=lambda i,_: torch.bmm(*i)),
|
||||
reference_fn=lambda i, _: torch.bmm(*i)),
|
||||
OldModuleTest(nn.MV,
|
||||
input_size=[(4, 5, 3), (4, 3)],
|
||||
reference_fn=lambda i,_: torch.bmm(i[0], i[1].view(i[1].size(0), i[1].size(1), 1)).squeeze()),
|
||||
reference_fn=lambda i, _: torch.bmm(i[0], i[1].view(i[1].size(0), i[1].size(1), 1)).squeeze()),
|
||||
OldModuleTest(nn.Max,
|
||||
input_size=(4, 5, 3),
|
||||
reference_fn=lambda i,_: torch.max(i, 0)[0].squeeze()),
|
||||
reference_fn=lambda i, _: torch.max(i, 0)[0].squeeze()),
|
||||
OldModuleTest(nn.Max,
|
||||
(1,),
|
||||
input_size=(4, 5, 3),
|
||||
reference_fn=lambda i,_: torch.max(i, 1)[0].squeeze(),
|
||||
reference_fn=lambda i, _: torch.max(i, 1)[0].squeeze(),
|
||||
desc='with_dimension'),
|
||||
OldModuleTest(nn.Min,
|
||||
input_size=(4, 5, 3),
|
||||
reference_fn=lambda i,_: torch.min(i, 0)[0].squeeze()),
|
||||
reference_fn=lambda i, _: torch.min(i, 0)[0].squeeze()),
|
||||
OldModuleTest(nn.Min,
|
||||
(1,),
|
||||
input_size=(4, 5, 3),
|
||||
reference_fn=lambda i,_: torch.min(i, 1)[0].squeeze(),
|
||||
reference_fn=lambda i, _: torch.min(i, 1)[0].squeeze(),
|
||||
desc='with_dimension'),
|
||||
OldModuleTest(nn.MixtureTable,
|
||||
tuple(),
|
||||
@ -254,35 +256,35 @@ tests = [
|
||||
jacobian_input=False),
|
||||
OldModuleTest(nn.Mul,
|
||||
input_size=(2, 3, 4, 2),
|
||||
reference_fn=lambda i,p: i * p[0][0]),
|
||||
reference_fn=lambda i, p: i * p[0][0]),
|
||||
OldModuleTest(nn.MulConstant,
|
||||
(4,),
|
||||
input_size=(2, 3, 4, 2),
|
||||
reference_fn=lambda i,_: i * 4,
|
||||
reference_fn=lambda i, _: i * 4,
|
||||
check_inplace=True),
|
||||
OldModuleTest(nn.Narrow,
|
||||
(0, 0),
|
||||
input_size=(2, 3, 4, 2),
|
||||
reference_fn=lambda i,_: i.narrow(0, 0, 1)),
|
||||
reference_fn=lambda i, _: i.narrow(0, 0, 1)),
|
||||
OldModuleTest(nn.Narrow,
|
||||
(1, 1, 2),
|
||||
input_size=(2, 3, 4, 2),
|
||||
reference_fn=lambda i,_: i.narrow(1, 1, 2),
|
||||
reference_fn=lambda i, _: i.narrow(1, 1, 2),
|
||||
desc='length'),
|
||||
OldModuleTest(nn.Transpose,
|
||||
((1, 2), (1, 3)),
|
||||
input_size=(2, 3, 4, 5),
|
||||
reference_fn=lambda i,_: i.transpose(1, 2).transpose(1, 3)),
|
||||
reference_fn=lambda i, _: i.transpose(1, 2).transpose(1, 3)),
|
||||
OldModuleTest(nn.Transpose,
|
||||
((1, 2),),
|
||||
input_size=(2, 3, 4, 5),
|
||||
reference_fn=lambda i,_: i.transpose(1, 2),
|
||||
reference_fn=lambda i, _: i.transpose(1, 2),
|
||||
desc='single_arg'),
|
||||
# TODO: this seems to be very slow
|
||||
OldModuleTest(nn.Replicate,
|
||||
(2, 1),
|
||||
input_size=(10, 3, 4, 5),
|
||||
reference_fn=lambda i,_: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)),
|
||||
reference_fn=lambda i, _: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)),
|
||||
OldModuleTest(nn.Padding,
|
||||
(0, 2, -10),
|
||||
input_size=(2, 3, 4, 5)),
|
||||
@ -309,20 +311,20 @@ tests = [
|
||||
desc='fractional'),
|
||||
OldModuleTest(nn.Reshape,
|
||||
(4, 5),
|
||||
input_size=(3, 4*5),
|
||||
input_size=(3, 4 * 5),
|
||||
desc='add_dim'),
|
||||
OldModuleTest(nn.Reshape,
|
||||
(4*5,),
|
||||
(4 * 5,),
|
||||
input_size=(3, 4, 5),
|
||||
desc='squash_dim'),
|
||||
OldModuleTest(nn.Select,
|
||||
(1, 2),
|
||||
input_size=(3, 4, 5),
|
||||
reference_fn=lambda i,_: i.select(1, 2)),
|
||||
reference_fn=lambda i, _: i.select(1, 2)),
|
||||
OldModuleTest(nn.SelectTable,
|
||||
(1,),
|
||||
input_size=[(1,), (2,), (3,), (4,)],
|
||||
reference_fn=lambda i,_: i[1]),
|
||||
reference_fn=lambda i, _: i[1]),
|
||||
OldModuleTest(nn.SpatialAveragePooling,
|
||||
(2, 2),
|
||||
input_size=(2, 3, 6, 6)),
|
||||
@ -337,7 +339,7 @@ tests = [
|
||||
OldModuleTest(nn.SpatialAdaptiveMaxPooling,
|
||||
(4, 4),
|
||||
input_size=(2, 3, 8, 8),
|
||||
reference_fn=lambda i,_: nn.SpatialMaxPooling(2, 2).forward(i)),
|
||||
reference_fn=lambda i, _: nn.SpatialMaxPooling(2, 2).forward(i)),
|
||||
OldModuleTest(nn.SpatialAdaptiveMaxPooling,
|
||||
(4, 4),
|
||||
input_size=(2, 3, 7, 11),
|
||||
@ -481,14 +483,14 @@ tests = [
|
||||
input_size=(1, 2, 4, 4, 4)),
|
||||
OldModuleTest(nn.VolumetricMaxPooling,
|
||||
(2, 2, 2),
|
||||
input_size=(2, 3, 5, 5, 5)),
|
||||
input=(torch.randn(2, 3, 5, 5, 5) * 1000)),
|
||||
OldModuleTest(nn.VolumetricMaxPooling,
|
||||
(2, 2, 2, 2, 2, 2),
|
||||
input_size=(2, 3, 5, 5, 5),
|
||||
input=(torch.randn(2, 3, 5, 5, 5) * 1000),
|
||||
desc='stride'),
|
||||
OldModuleTest(nn.VolumetricMaxPooling,
|
||||
(2, 2, 2, 2, 2, 2, 1, 1, 1),
|
||||
input_size=(2, 3, 5, 5, 5),
|
||||
input=(torch.randn(2, 3, 5, 5, 5) * 1000),
|
||||
desc='stride_padding'),
|
||||
OldModuleTest(nn.VolumetricReplicationPadding,
|
||||
(1, 2, 3, 4, 5, 6),
|
||||
@ -530,10 +532,10 @@ for p in (1, 2, 1.5):
|
||||
(p,),
|
||||
input_size=(4, 5),
|
||||
# Eh, we need to use p as a default, so it's passed by value
|
||||
reference_fn=lambda i,_,p=p: i.div(i.norm(p, 1).expand_as(i)),
|
||||
reference_fn=lambda i, _, p=p: i.div(i.norm(p, 1).expand_as(i)),
|
||||
desc=str(p)),
|
||||
)
|
||||
for p in range(1, 4+1):
|
||||
for p in range(1, 4 + 1):
|
||||
tests.append(
|
||||
OldModuleTest(nn.PairwiseDistance,
|
||||
(p,),
|
||||
@ -541,6 +543,7 @@ for p in range(1, 4+1):
|
||||
desc=str(p))
|
||||
)
|
||||
|
||||
|
||||
def build_spatial_unpooling_net():
|
||||
pool = nn.SpatialMaxPooling(2, 2, 2, 2)
|
||||
unpool = nn.SpatialMaxUnpooling(pool)
|
||||
@ -550,7 +553,8 @@ tests.append(
|
||||
OldModuleTest(build_spatial_unpooling_net,
|
||||
input_size=(1, 3, 10, 10),
|
||||
desc='SpatialMaxUnpooling')
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def build_volumetric_unpooling_net():
|
||||
pool = nn.VolumetricMaxPooling(2, 2, 2, 2)
|
||||
@ -561,7 +565,8 @@ tests.append(
|
||||
OldModuleTest(build_volumetric_unpooling_net,
|
||||
input_size=(1, 3, 10, 10),
|
||||
desc='VolumetricMaxUnpooling')
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def prepare_tests():
|
||||
def add_test(test):
|
||||
@ -571,8 +576,8 @@ def prepare_tests():
|
||||
raise RuntimeError('Found two tests with the same name: ' + test_name)
|
||||
if hasattr(TestNN, cuda_test_name):
|
||||
raise RuntimeError('Found two tests with the same name: ' + cuda_test_name)
|
||||
setattr(TestNN, test_name, lambda self,test=test: test(self))
|
||||
setattr(TestNN, cuda_test_name, lambda self,test=test: test.test_cuda(self))
|
||||
setattr(TestNN, test_name, lambda self, test=test: test(self))
|
||||
setattr(TestNN, cuda_test_name, lambda self, test=test: test.test_cuda(self))
|
||||
name_remap = {
|
||||
'Conv2d': 'SpatialConvolution',
|
||||
'MaxPool2d': 'SpatialMaxPooling',
|
||||
@ -613,6 +618,7 @@ def prepare_tests():
|
||||
test = CriterionTest(**test_params)
|
||||
add_test(test)
|
||||
|
||||
|
||||
class TestNN(NNTestCase):
|
||||
|
||||
def _forward(self, module, input):
|
||||
@ -636,19 +642,19 @@ class TestNN(NNTestCase):
|
||||
|
||||
def test_Dropout(self):
|
||||
p = 0.2
|
||||
input = torch.Tensor(1000).fill_(1-p)
|
||||
input = torch.Tensor(1000).fill_(1 - p)
|
||||
|
||||
module = nn.Dropout(p)
|
||||
output = module.forward(input)
|
||||
self.assertLess(abs(output.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(output.mean() - (1 - p)), 0.05)
|
||||
gradInput = module.backward(input, input)
|
||||
self.assertLess(abs(gradInput.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05)
|
||||
|
||||
module = nn.Dropout(p, True)
|
||||
output = module.forward(input.clone())
|
||||
self.assertLess(abs(output.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(output.mean() - (1 - p)), 0.05)
|
||||
gradInput = module.backward(input.clone(), input.clone())
|
||||
self.assertLess(abs(gradInput.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05)
|
||||
|
||||
# Check that these don't raise errors
|
||||
module.__repr__()
|
||||
@ -664,9 +670,9 @@ class TestNN(NNTestCase):
|
||||
module = nn.SpatialDropout(p)
|
||||
module.training()
|
||||
output = module.forward(input)
|
||||
self.assertLess(abs(output.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(output.mean() - (1 - p)), 0.05)
|
||||
gradInput = module.backward(input, input)
|
||||
self.assertLess(abs(gradInput.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05)
|
||||
|
||||
# Check that these don't raise errors
|
||||
module.__repr__()
|
||||
@ -674,18 +680,18 @@ class TestNN(NNTestCase):
|
||||
|
||||
def test_VolumetricDropout(self):
|
||||
p = 0.2
|
||||
bsz = random.randint(1,5)
|
||||
t = random.randint(1,5)
|
||||
w = random.randint(1,5)
|
||||
h = random.randint(1,5)
|
||||
bsz = random.randint(1, 5)
|
||||
t = random.randint(1, 5)
|
||||
w = random.randint(1, 5)
|
||||
h = random.randint(1, 5)
|
||||
nfeats = 1000
|
||||
input = torch.Tensor(bsz, nfeats, t, w, h).fill_(1)
|
||||
module = nn.VolumetricDropout(p)
|
||||
module.training()
|
||||
output = module.forward(input)
|
||||
self.assertLess(abs(output.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(output.mean() - (1 - p)), 0.05)
|
||||
gradInput = module.backward(input, input)
|
||||
self.assertLess(abs(gradInput.mean() - (1-p)), 0.05)
|
||||
self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05)
|
||||
|
||||
# Check that these don't raise errors
|
||||
module.__repr__()
|
||||
@ -706,7 +712,7 @@ class TestNN(NNTestCase):
|
||||
self.assertTrue(output[input.lt(0)].eq(0).all())
|
||||
|
||||
def test_Copy(self):
|
||||
input = torch.randn(3,4).double()
|
||||
input = torch.randn(3, 4).double()
|
||||
c = nn.Copy(torch.DoubleTensor, torch.FloatTensor)
|
||||
output = c.forward(input)
|
||||
self.assertEqual(torch.typename(output), 'torch.FloatTensor')
|
||||
@ -833,9 +839,9 @@ class TestNN(NNTestCase):
|
||||
def test_ParallelTable(self):
|
||||
input = torch.randn(3, 4, 5)
|
||||
p = nn.ParallelTable()
|
||||
p.add(nn.View(4,5,1))
|
||||
p.add(nn.View(4,5,1))
|
||||
p.add(nn.View(4,5,1))
|
||||
p.add(nn.View(4, 5, 1))
|
||||
p.add(nn.View(4, 5, 1))
|
||||
p.add(nn.View(4, 5, 1))
|
||||
m = nn.Sequential()
|
||||
m.add(nn.SplitTable(0))
|
||||
m.add(p)
|
||||
@ -846,7 +852,7 @@ class TestNN(NNTestCase):
|
||||
str(p)
|
||||
|
||||
output = m.forward(input)
|
||||
output2 = input.transpose(0,2).transpose(0,1)
|
||||
output2 = input.transpose(0, 2).transpose(0, 1)
|
||||
self.assertEqual(output2, output)
|
||||
|
||||
gradInput = m.backward(input, output2)
|
||||
@ -857,7 +863,7 @@ class TestNN(NNTestCase):
|
||||
torch.randn(3, 4).float(), torch.randn(3, 4).float(), [torch.randn(3, 4).float()]
|
||||
]
|
||||
_gradOutput = [
|
||||
torch.randn(3, 3,4).float(), torch.randn(3, 3,4).float(), torch.randn(3, 3,4).float()
|
||||
torch.randn(3, 3, 4).float(), torch.randn(3, 3, 4).float(), torch.randn(3, 3, 4).float()
|
||||
]
|
||||
gradOutput = [
|
||||
[_gradOutput[0][0], _gradOutput[1][0], [_gradOutput[2][0]]],
|
||||
@ -878,7 +884,8 @@ class TestNN(NNTestCase):
|
||||
output2 = [input, input, input]
|
||||
self.assertEqual(output2, output)
|
||||
gradInput = module.backward(input, gradOutput)
|
||||
gradInput2 = [_gradOutput[0].sum(0).squeeze(0), _gradOutput[1].sum(0).squeeze(0), [_gradOutput[2].sum(0).squeeze(0)]]
|
||||
gradInput2 = [_gradOutput[0].sum(0).squeeze(0), _gradOutput[1].sum(
|
||||
0).squeeze(0), [_gradOutput[2].sum(0).squeeze(0)]]
|
||||
self.assertTrue(isinstance(gradInput, list))
|
||||
self.assertFalse(isinstance(gradInput[0], list))
|
||||
self.assertFalse(isinstance(gradInput[1], list))
|
||||
@ -910,17 +917,20 @@ class TestNN(NNTestCase):
|
||||
input = torch.randn(2, 3, 12, 12)
|
||||
gradOutput = torch.randn(2, int(outputSize.sum()), 12, 12)
|
||||
concat = nn.DepthConcat(1)
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[0], 1, 1, 1, 1)) #> 2, 5, 12, 12
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[1], 3, 3, 1, 1)) #> 2, 6, 10, 10
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[2], 4, 4, 1, 1)) #> 2, 7, 9, 9
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[3], 5, 5, 1, 1)) #> 2, 8, 8, 8
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[0], 1, 1, 1, 1)) # > 2, 5, 12, 12
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[1], 3, 3, 1, 1)) # > 2, 6, 10, 10
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[2], 4, 4, 1, 1)) # > 2, 7, 9, 9
|
||||
concat.add(nn.SpatialConvolution(3, outputSize[3], 5, 5, 1, 1)) # > 2, 8, 8, 8
|
||||
concat.zeroGradParameters()
|
||||
# forward/backward
|
||||
outputConcat = concat.forward(input)
|
||||
gradInputConcat = concat.backward(input, gradOutput)
|
||||
# the spatial dims are the largest, the nFilters is the sum
|
||||
output = torch.Tensor(2, int(outputSize.sum()), 12, 12).zero_() # zero for padding
|
||||
narrows = ( (slice(None), slice(0, 5), slice(None), slice(None)), (slice(None), slice(5, 11), slice(1, 11), slice(1, 11)), (slice(None), slice(11, 18), slice(1, 10), slice(1, 10)), (slice(None), slice(18, 26), slice(2, 10), slice(2, 10)) )
|
||||
narrows = ((slice(None), slice(0, 5), slice(None), slice(None)),
|
||||
(slice(None), slice(5, 11), slice(1, 11), slice(1, 11)),
|
||||
(slice(None), slice(11, 18), slice(1, 10), slice(1, 10)),
|
||||
(slice(None), slice(18, 26), slice(2, 10), slice(2, 10)))
|
||||
gradInput = input.clone().zero_()
|
||||
for i in range(4):
|
||||
conv = concat.get(i)
|
||||
@ -979,7 +989,7 @@ class TestNN(NNTestCase):
|
||||
weight = 1
|
||||
m = nn.L1Penalty(weight, False, False)
|
||||
|
||||
input = torch.rand(2,10).add_(-0.5)
|
||||
input = torch.rand(2, 10).add_(-0.5)
|
||||
input[0][0] = 0
|
||||
|
||||
m.forward(input)
|
||||
@ -1023,7 +1033,7 @@ class TestNN(NNTestCase):
|
||||
mc = nn.MultiCriterion().add(nll, 0.5).add(nll2)
|
||||
|
||||
output = mc.forward(input, target)
|
||||
output2 = nll.forward(input, target)/2 + nll2.forward(input, target)
|
||||
output2 = nll.forward(input, target) / 2 + nll2.forward(input, target)
|
||||
|
||||
self.assertEqual(output, output2)
|
||||
gradInput = mc.backward(input, target)
|
||||
@ -1072,7 +1082,7 @@ class TestNN(NNTestCase):
|
||||
mse = nn.MSECriterion()
|
||||
pc = nn.ParallelCriterion().add(nll, 0.5).add(mse)
|
||||
output = pc.forward(input, target)
|
||||
output2 = nll.forward(input[0], target[0])/2 + mse.forward(input[1], target[1])
|
||||
output2 = nll.forward(input[0], target[0]) / 2 + mse.forward(input[1], target[1])
|
||||
self.assertEqual(output, output2)
|
||||
gradInput2 = [nll.backward(input[0], target[0]).clone().div(2), mse.backward(input[1], target[1])]
|
||||
gradInput = pc.backward(input, target)
|
||||
@ -1096,7 +1106,7 @@ class TestNN(NNTestCase):
|
||||
mse = nn.MSECriterion()
|
||||
pc = nn.ParallelCriterion(True).add(mse, 0.5).add(nn.MSECriterion())
|
||||
output = pc.forward(input, target)
|
||||
output2 = mse.forward(input[0], target)/2 + mse.forward(input[1], target)
|
||||
output2 = mse.forward(input[0], target) / 2 + mse.forward(input[1], target)
|
||||
self.assertEqual(output, output2)
|
||||
gradInput = pc.backward(input, target)
|
||||
gradInput2 = [mse.backward(input[0], target).clone().div(2), mse.backward(input[1], target)]
|
||||
@ -1112,7 +1122,9 @@ class TestNN(NNTestCase):
|
||||
pc = nn.ParallelCriterion().add(nll, 0.5).add(mse)
|
||||
pc2 = nn.ParallelCriterion().add(nll2, 0.4).add(pc)
|
||||
output = pc2.forward(input, target)
|
||||
output2 = nll2.forward(input[0], target[0])*0.4 + nll.forward(input[1][0], target[1][0])/2 + mse.forward(input[1][1], target[1][1])
|
||||
output2 = (nll2.forward(input[0], target[0]) * 0.4 +
|
||||
nll.forward(input[1][0], target[1][0]) / 2 +
|
||||
mse.forward(input[1][1], target[1][1]))
|
||||
self.assertEqual(output, output2)
|
||||
gradInput2 = [
|
||||
nll2.backward(input[0], target[0]).clone().mul(0.4),
|
||||
@ -1142,6 +1154,15 @@ class TestNN(NNTestCase):
|
||||
module.__repr__()
|
||||
str(module)
|
||||
|
||||
def test_accUpdateGradParameters(self):
|
||||
module = nn.LookupTable(5, 3)
|
||||
module.weight.fill_(2)
|
||||
input = torch.LongTensor([1, 3])
|
||||
output = module.updateOutput(input)
|
||||
module.backwardUpdate(input, output, 0.1)
|
||||
self.assertEqual(module.weight[0, 0], 2)
|
||||
self.assertEqual(module.weight[3, 0], 1.8)
|
||||
|
||||
def _build_net(self):
|
||||
return (nn.Sequential()
|
||||
.add(nn.Concat(0)
|
||||
@ -1197,6 +1218,7 @@ class TestNN(NNTestCase):
|
||||
def test_apply(self):
|
||||
net = self._build_net()
|
||||
seen_modules = set()
|
||||
|
||||
def callback(module):
|
||||
self.assertNotIn(module, seen_modules)
|
||||
seen_modules.add(module)
|
||||
@ -1206,6 +1228,7 @@ class TestNN(NNTestCase):
|
||||
def test_listModules(self):
|
||||
net = self._build_net()
|
||||
module_list = list()
|
||||
|
||||
def callback(module):
|
||||
module_list.append(module)
|
||||
net.apply(callback)
|
||||
@ -1214,6 +1237,7 @@ class TestNN(NNTestCase):
|
||||
def test_replace(self):
|
||||
ref_net = self._build_net()
|
||||
net = self._build_net()
|
||||
|
||||
def callback(module):
|
||||
if isinstance(module, nn.ReLU):
|
||||
return nn.Tanh()
|
||||
@ -1227,6 +1251,8 @@ class TestNN(NNTestCase):
|
||||
self.assertIsInstance(module, type(reference))
|
||||
|
||||
|
||||
prepare_tests()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
prepare_tests()
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
@ -11,13 +11,15 @@ import torch.cuda
|
||||
import torch.multiprocessing as mp
|
||||
from torch.autograd import Variable
|
||||
from torch.nn import Parameter
|
||||
from common import TestCase
|
||||
from common import TestCase, run_tests
|
||||
|
||||
|
||||
TEST_REPEATS = 30
|
||||
HAS_SHM_FILES = os.path.isdir('/dev/shm')
|
||||
TEST_CUDA_IPC = torch.cuda.is_available() and \
|
||||
sys.version_info[0] == 3 and \
|
||||
sys.platform != 'darwin'
|
||||
TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1
|
||||
|
||||
|
||||
def simple_fill(queue, event):
|
||||
@ -74,13 +76,12 @@ def autograd_sharing(queue, ready, master_modified):
|
||||
master_modified.wait()
|
||||
|
||||
expected_var = torch.range(1, 25).view(5, 5)
|
||||
expected_var[0,0] = 1000
|
||||
expected_var[0, 0] = 1000
|
||||
is_ok = var.data.equal(expected_var)
|
||||
var.data[:] = torch.ones(5, 5)
|
||||
|
||||
if var.grad is not None:
|
||||
is_ok &= var.grad.data.equal(torch.ones(5, 5) * 4)
|
||||
var.grad.data[:] = torch.ones(5, 5)
|
||||
is_ok &= var.grad is None
|
||||
var._grad = Variable(torch.ones(5, 5), requires_grad=False)
|
||||
|
||||
queue.put(is_ok)
|
||||
|
||||
@ -113,7 +114,7 @@ class leak_checker(object):
|
||||
# one-off initialization that may use up a file descriptor
|
||||
available_fds = self._get_next_fds(10)
|
||||
self.test_case.assertLessEqual(
|
||||
available_fds[-1] - self.next_fds[-1], 4)
|
||||
available_fds[-1] - self.next_fds[-1], 5)
|
||||
self.test_case.assertFalse(self.has_shm_files())
|
||||
return False
|
||||
|
||||
@ -148,9 +149,6 @@ class leak_checker(object):
|
||||
|
||||
class TestMultiprocessing(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TestMultiprocessing, self).__init__(*args, **kwargs)
|
||||
|
||||
def _test_sharing(self, ctx=mp, type=torch.FloatTensor, repeat=1):
|
||||
def test_fill():
|
||||
x = torch.zeros(5, 5).type(type)
|
||||
@ -159,9 +157,11 @@ class TestMultiprocessing(TestCase):
|
||||
data = [x, x[:, 1]]
|
||||
q.put(data)
|
||||
p = ctx.Process(target=simple_fill, args=(q, e))
|
||||
p.daemon = True
|
||||
lc.check_pid(p.pid)
|
||||
p.start()
|
||||
e.wait()
|
||||
e.wait(10)
|
||||
self.assertTrue(e.is_set())
|
||||
self.assertTrue(data[0].eq(4).all())
|
||||
self.assertTrue(data[1].eq(4).all())
|
||||
p.join(1)
|
||||
@ -171,6 +171,7 @@ class TestMultiprocessing(TestCase):
|
||||
q = ctx.Queue()
|
||||
e = ctx.Event()
|
||||
p = ctx.Process(target=send_tensor, args=(q, e, type))
|
||||
p.daemon = True
|
||||
lc.check_pid(p.pid)
|
||||
p.start()
|
||||
t1 = q.get()
|
||||
@ -182,17 +183,17 @@ class TestMultiprocessing(TestCase):
|
||||
self.assertFalse(p.is_alive())
|
||||
|
||||
with leak_checker(self) as lc:
|
||||
for i in range(repeat):
|
||||
for _ in range(repeat):
|
||||
test_fill()
|
||||
test_receive()
|
||||
|
||||
def _test_preserve_sharing(self, ctx=mp, repeat=1):
|
||||
def do_test():
|
||||
x = torch.randn(5, 5)
|
||||
data = [x.storage(), x.storage()[1:4], x, x[2], x[:,1]]
|
||||
data = [x.storage(), x.storage()[1:4], x, x[2], x[:, 1]]
|
||||
q = ctx.Queue()
|
||||
q.put(data)
|
||||
new_data = q.get()
|
||||
new_data = q.get(timeout=1)
|
||||
self.assertEqual(new_data, data, 0)
|
||||
storage_cdata = data[0]._cdata
|
||||
self.assertEqual(new_data[0]._cdata, storage_cdata)
|
||||
@ -229,27 +230,27 @@ class TestMultiprocessing(TestCase):
|
||||
|
||||
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
|
||||
def test_fd_sharing(self):
|
||||
self._test_sharing(repeat=20)
|
||||
self._test_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
|
||||
def test_fd_preserve_sharing(self):
|
||||
self._test_preserve_sharing(repeat=20)
|
||||
self._test_preserve_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(platform == 'darwin', "file descriptor strategy is not supported on OS X")
|
||||
def test_fd_pool(self):
|
||||
self._test_pool(repeat=20)
|
||||
self._test_pool(repeat=TEST_REPEATS)
|
||||
|
||||
def test_fs_sharing(self):
|
||||
with fs_sharing():
|
||||
self._test_sharing(repeat=20)
|
||||
self._test_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
def test_fs_preserve_sharing(self):
|
||||
with fs_sharing():
|
||||
self._test_preserve_sharing(repeat=20)
|
||||
self._test_preserve_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
def test_fs_pool(self):
|
||||
with fs_sharing():
|
||||
self._test_pool(repeat=20)
|
||||
self._test_pool(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(not HAS_SHM_FILES, "don't not how to check if shm files exist")
|
||||
def test_fs(self):
|
||||
@ -263,7 +264,7 @@ class TestMultiprocessing(TestCase):
|
||||
q.get()
|
||||
|
||||
with fs_sharing(), leak_checker(self) as lc:
|
||||
for i in range(20):
|
||||
for _ in range(TEST_REPEATS):
|
||||
queue_put()
|
||||
|
||||
def test_inherit_tensor(self):
|
||||
@ -271,6 +272,7 @@ class TestMultiprocessing(TestCase):
|
||||
def __init__(self, tensor):
|
||||
super(SubProcess, self).__init__()
|
||||
self.tensor = tensor
|
||||
self.daemon = True
|
||||
|
||||
def run(self):
|
||||
self.tensor.add_(3)
|
||||
@ -278,7 +280,7 @@ class TestMultiprocessing(TestCase):
|
||||
t = torch.zeros(5, 5)
|
||||
p = SubProcess(t.share_memory_())
|
||||
p.start()
|
||||
p.join()
|
||||
p.join(1)
|
||||
self.assertEqual(t, torch.ones(5, 5) * 3, 0)
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
|
||||
@ -286,8 +288,8 @@ class TestMultiprocessing(TestCase):
|
||||
torch.cuda.FloatTensor([1]) # initialize CUDA outside of leak checker
|
||||
self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor)
|
||||
|
||||
|
||||
@unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available')
|
||||
@unittest.skipIf(not TEST_MULTIGPU, 'found only 1 GPU')
|
||||
def test_cuda_small_tensors(self):
|
||||
# Check multiple small tensors which will likely use the same
|
||||
# underlying cached allocation
|
||||
@ -355,12 +357,13 @@ class TestMultiprocessing(TestCase):
|
||||
master_modified = mp.Event()
|
||||
queue = mp.Queue()
|
||||
p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
|
||||
p.daemon = True
|
||||
p.start()
|
||||
var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
|
||||
queue.put(var)
|
||||
|
||||
ready.wait()
|
||||
var.data[0,0] = 1000
|
||||
if var.grad is not None:
|
||||
var.data[0, 0] = 1000
|
||||
var.grad.data[:] = torch.ones(5, 5) * 4
|
||||
master_modified.set()
|
||||
|
||||
@ -368,9 +371,9 @@ class TestMultiprocessing(TestCase):
|
||||
self.assertTrue(worker_ok)
|
||||
|
||||
self.assertEqual(var.data, torch.ones(5, 5))
|
||||
if var.grad is not None:
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5))
|
||||
p.join()
|
||||
self.assertEqual(var.grad.data, torch.ones(5, 5) * 4)
|
||||
p.join(1)
|
||||
self.assertFalse(p.is_alive())
|
||||
|
||||
def test_variable_sharing(self):
|
||||
configs = [
|
||||
@ -409,4 +412,4 @@ class TestMultiprocessing(TestCase):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
@ -4,14 +4,12 @@ import torch
|
||||
import torch.cuda.nccl as nccl
|
||||
import torch.cuda
|
||||
|
||||
from common import TestCase
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print('CUDA not available, skipping tests')
|
||||
import sys
|
||||
sys.exit()
|
||||
from common import TestCase, run_tests
|
||||
|
||||
nGPUs = torch.cuda.device_count()
|
||||
if nGPUs == 0:
|
||||
print('CUDA not available, skipping tests')
|
||||
TestCase = object # noqa: F811
|
||||
|
||||
|
||||
class TestNCCL(TestCase):
|
||||
@ -87,4 +85,4 @@ class TestNCCL(TestCase):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
1221
test/test_nn.py
1221
test/test_nn.py
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,12 @@
|
||||
import unittest
|
||||
import functools
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torch.legacy.optim as old_optim
|
||||
from torch.autograd import Variable
|
||||
|
||||
from common import TestCase
|
||||
from common import TestCase, run_tests
|
||||
|
||||
|
||||
def rosenbrock(tensor):
|
||||
@ -14,7 +16,7 @@ def rosenbrock(tensor):
|
||||
|
||||
def drosenbrock(tensor):
|
||||
x, y = tensor
|
||||
return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * (y - x**2)))
|
||||
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
|
||||
|
||||
|
||||
def wrap_old_fn(old_fn, **config):
|
||||
@ -36,12 +38,19 @@ class TestOptim(TestCase):
|
||||
initial_dist = params.data.dist(solution)
|
||||
|
||||
def eval():
|
||||
optimizer.zero_grad()
|
||||
loss = rosenbrock(params)
|
||||
loss.backward()
|
||||
# loss.backward() will give **slightly** different
|
||||
# gradients, than drosenbtock, because of a different ordering
|
||||
# of floating point operations. In most cases it doesn't matter,
|
||||
# but some optimizers are so sensitive that they can temporarily
|
||||
# diverge up to 1e-4, just to converge again. This makes the
|
||||
# comparison more stable.
|
||||
params.grad.data.copy_(drosenbrock(params.data))
|
||||
return loss
|
||||
|
||||
for i in range(2000):
|
||||
optimizer.zero_grad()
|
||||
optimizer.step(eval)
|
||||
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
|
||||
params_t, state)
|
||||
@ -52,25 +61,65 @@ class TestOptim(TestCase):
|
||||
def _test_basic_cases_template(self, weight, bias, input, constructor):
|
||||
weight = Variable(weight, requires_grad=True)
|
||||
bias = Variable(bias, requires_grad=True)
|
||||
input = Variable(input, requires_grad=False)
|
||||
input = Variable(input)
|
||||
optimizer = constructor(weight, bias)
|
||||
|
||||
def fn():
|
||||
optimizer.zero_grad()
|
||||
y = weight.mv(input)
|
||||
if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
|
||||
y = y.cuda(bias.get_device())
|
||||
return (y + bias).abs().sum()
|
||||
loss = (y + bias).pow(2).sum()
|
||||
loss.backward()
|
||||
return loss
|
||||
|
||||
initial_value = fn().data[0]
|
||||
for i in range(200):
|
||||
weight.grad.data.zero_()
|
||||
bias.grad.data.zero_()
|
||||
fn().backward()
|
||||
optimizer.step()
|
||||
optimizer.step(fn)
|
||||
self.assertLess(fn().data[0], initial_value)
|
||||
|
||||
self.assertLessEqual(fn().data[0], initial_value)
|
||||
def _test_state_dict(self, weight, bias, input, constructor):
|
||||
weight = Variable(weight, requires_grad=True)
|
||||
bias = Variable(bias, requires_grad=True)
|
||||
input = Variable(input)
|
||||
|
||||
def _test_basic_cases(self, constructor):
|
||||
def fn_base(optimizer, weight, bias):
|
||||
optimizer.zero_grad()
|
||||
loss = (weight.mv(input) + bias).pow(2).sum()
|
||||
loss.backward()
|
||||
return loss
|
||||
|
||||
optimizer = constructor(weight, bias)
|
||||
fn = functools.partial(fn_base, optimizer, weight, bias)
|
||||
|
||||
# Prime the optimizer
|
||||
for i in range(20):
|
||||
optimizer.step(fn)
|
||||
# Clone the weights and construct new optimizer for them
|
||||
weight_c = Variable(weight.data.clone(), requires_grad=True)
|
||||
bias_c = Variable(bias.data.clone(), requires_grad=True)
|
||||
optimizer_c = constructor(weight_c, bias_c)
|
||||
fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)
|
||||
# Load state dict
|
||||
state_dict = deepcopy(optimizer.state_dict())
|
||||
state_dict_c = deepcopy(optimizer.state_dict())
|
||||
optimizer_c.load_state_dict(state_dict_c)
|
||||
# Run both optimizations in parallel
|
||||
for i in range(20):
|
||||
optimizer.step(fn)
|
||||
optimizer_c.step(fn_c)
|
||||
self.assertEqual(weight, weight_c)
|
||||
self.assertEqual(bias, bias_c)
|
||||
# Make sure state dict wasn't modified
|
||||
self.assertEqual(state_dict, state_dict_c)
|
||||
|
||||
def _test_basic_cases(self, constructor, ignore_multidevice=False):
|
||||
self._test_state_dict(
|
||||
torch.randn(10, 5),
|
||||
torch.randn(10),
|
||||
torch.randn(5),
|
||||
constructor
|
||||
)
|
||||
self._test_basic_cases_template(
|
||||
torch.randn(10, 5),
|
||||
torch.randn(10),
|
||||
@ -79,8 +128,8 @@ class TestOptim(TestCase):
|
||||
)
|
||||
# non-contiguous parameters
|
||||
self._test_basic_cases_template(
|
||||
torch.randn(10, 5, 2)[...,0],
|
||||
torch.randn(10, 2)[...,0],
|
||||
torch.randn(10, 5, 2)[..., 0],
|
||||
torch.randn(10, 2)[..., 0],
|
||||
torch.randn(5),
|
||||
constructor
|
||||
)
|
||||
@ -94,12 +143,12 @@ class TestOptim(TestCase):
|
||||
constructor
|
||||
)
|
||||
# Multi-GPU
|
||||
if not torch.cuda.device_count() > 1:
|
||||
if not torch.cuda.device_count() > 1 or ignore_multidevice:
|
||||
return
|
||||
self._test_basic_cases_template(
|
||||
torch.randn(10, 5).cuda(),
|
||||
torch.randn(10).cuda(),
|
||||
torch.randn(5).cuda(),
|
||||
torch.randn(10, 5).cuda(0),
|
||||
torch.randn(10).cuda(1),
|
||||
torch.randn(5).cuda(0),
|
||||
constructor
|
||||
)
|
||||
|
||||
@ -275,10 +324,24 @@ class TestOptim(TestCase):
|
||||
lr=1e-3)
|
||||
)
|
||||
|
||||
def test_lbfgs(self):
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.LBFGS(params),
|
||||
wrap_old_fn(old_optim.lbfgs)
|
||||
)
|
||||
self._test_rosenbrock(
|
||||
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
|
||||
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
|
||||
)
|
||||
self._test_basic_cases(
|
||||
lambda weight, bias: optim.LBFGS([weight, bias]),
|
||||
ignore_multidevice=True
|
||||
)
|
||||
|
||||
def test_invalid_param_type(self):
|
||||
with self.assertRaises(TypeError):
|
||||
optim.SGD(Variable(torch.randn(5, 5)), lr=3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
@ -4,22 +4,25 @@ from torch import sparse
|
||||
import itertools
|
||||
import random
|
||||
import unittest
|
||||
from common import TestCase
|
||||
from common import TestCase, run_tests
|
||||
from numbers import Number
|
||||
|
||||
SparseTensor = sparse.DoubleTensor
|
||||
|
||||
|
||||
class TestSparse(TestCase):
|
||||
|
||||
@staticmethod
|
||||
def _gen_sparse(d, nnz, with_size):
|
||||
v = torch.randn(nnz)
|
||||
if isinstance(with_size, Number):
|
||||
v = torch.randn(nnz)
|
||||
i = (torch.rand(d, nnz) * with_size).type(torch.LongTensor)
|
||||
x = SparseTensor(i, v)
|
||||
else:
|
||||
v_size = [nnz] + list(with_size[d:])
|
||||
v = torch.randn(*v_size)
|
||||
i = torch.rand(d, nnz) * \
|
||||
torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1)
|
||||
torch.Tensor(with_size[:d]).repeat(nnz, 1).transpose(0, 1)
|
||||
i = i.type(torch.LongTensor)
|
||||
x = SparseTensor(i, v, torch.Size(with_size))
|
||||
|
||||
@ -72,6 +75,33 @@ class TestSparse(TestCase):
|
||||
x.to_dense()
|
||||
self.assertEqual(res, x.to_dense())
|
||||
|
||||
def test_to_dense_hybrid(self):
|
||||
i = torch.LongTensor([
|
||||
[0, 1, 2, 2],
|
||||
[0, 0, 0, 3],
|
||||
])
|
||||
v = torch.Tensor([[2, 3], [1, 2], [3, 4], [4, 5]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 2]))
|
||||
res = torch.Tensor([
|
||||
[[2, 3],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0]],
|
||||
[[1, 2],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0]],
|
||||
[[3, 4],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[4, 5]],
|
||||
])
|
||||
|
||||
x.to_dense() # Tests double to_dense for memory corruption
|
||||
x.to_dense()
|
||||
x.to_dense()
|
||||
self.assertEqual(res, x.to_dense())
|
||||
|
||||
def test_contig(self):
|
||||
i = torch.LongTensor([
|
||||
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
|
||||
@ -125,6 +155,65 @@ class TestSparse(TestCase):
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
def test_contig_hybrid(self):
|
||||
i = torch.LongTensor([
|
||||
[1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
|
||||
[92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
|
||||
])
|
||||
v = torch.Tensor([
|
||||
[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
|
||||
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
|
||||
])
|
||||
x = SparseTensor(i, v, torch.Size([100, 100, 2]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
|
||||
[31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
|
||||
])
|
||||
exp_v = torch.Tensor([
|
||||
[2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
|
||||
[3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
|
||||
])
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
i = torch.LongTensor([
|
||||
[2, 0, 2, 1],
|
||||
[0, 0, 3, 0],
|
||||
[1, 0, 4, 0],
|
||||
])
|
||||
v = torch.Tensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 1, 2, 2],
|
||||
[0, 0, 0, 3],
|
||||
[0, 0, 1, 4],
|
||||
])
|
||||
exp_v = torch.Tensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]])
|
||||
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
# Duplicate indices
|
||||
i = torch.LongTensor([
|
||||
[0, 0, 2, 0],
|
||||
[0, 0, 3, 0],
|
||||
[0, 0, 4, 0],
|
||||
])
|
||||
v = torch.Tensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]])
|
||||
x = SparseTensor(i, v, torch.Size([3, 4, 5, 3]))
|
||||
exp_i = torch.LongTensor([
|
||||
[0, 2],
|
||||
[0, 3],
|
||||
[0, 4],
|
||||
])
|
||||
exp_v = torch.Tensor([[6, 4, 5], [4, 3, 4]])
|
||||
|
||||
x.contiguous()
|
||||
self.assertEqual(exp_i, x.indices())
|
||||
self.assertEqual(exp_v, x.values())
|
||||
|
||||
def test_transpose(self):
|
||||
x = self._gen_sparse(4, 20, 5)[0]
|
||||
y = x.to_dense()
|
||||
@ -186,9 +275,9 @@ class TestSparse(TestCase):
|
||||
test_shape(1000, 100, 100)
|
||||
test_shape(3000, 64, 300)
|
||||
|
||||
def test_spadd(self):
|
||||
def test_shape(*shape):
|
||||
x, _, _ = self._gen_sparse(len(shape), 10, shape)
|
||||
def _test_spadd_shape(self, shape_i, shape_v=None):
|
||||
shape = shape_i + (shape_v or [])
|
||||
x, _, _ = self._gen_sparse(len(shape_i), 10, shape)
|
||||
y = torch.randn(*shape)
|
||||
r = random.random()
|
||||
|
||||
@ -209,12 +298,75 @@ class TestSparse(TestCase):
|
||||
|
||||
self.assertEqual(res, expected)
|
||||
|
||||
test_shape(5, 6)
|
||||
test_shape(10, 10, 10)
|
||||
test_shape(50, 30, 20)
|
||||
test_shape(5, 5, 5, 5, 5, 5)
|
||||
def test_spadd(self):
|
||||
self._test_spadd_shape([5, 6])
|
||||
self._test_spadd_shape([10, 10, 10])
|
||||
self._test_spadd_shape([50, 30, 20])
|
||||
self._test_spadd_shape([5, 5, 5, 5, 5, 5])
|
||||
|
||||
def test_spadd_hybrid(self):
|
||||
self._test_spadd_shape([5, 6], [2, 3])
|
||||
self._test_spadd_shape([10, 10, 10], [3])
|
||||
self._test_spadd_shape([50, 30, 20], [2])
|
||||
self._test_spadd_shape([5, 5, 5, 5, 5, 5], [2])
|
||||
|
||||
def _test_basic_ops_shape(self, shape_i, shape_v=None):
|
||||
shape = shape_i + (shape_v or [])
|
||||
x1, _, _ = self._gen_sparse(len(shape_i), 9, shape)
|
||||
x2, _, _ = self._gen_sparse(len(shape_i), 12, shape)
|
||||
|
||||
y1 = x1 + x2
|
||||
y2 = x1.clone()
|
||||
y2.add_(x2)
|
||||
expected = x1.to_dense() + x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y1 = x1 - x2
|
||||
y2 = x1.clone()
|
||||
y2.sub_(x2)
|
||||
expected = x1.to_dense() - x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y1 = x1 * x2
|
||||
y2 = x1.clone()
|
||||
y2.mul_(x2)
|
||||
expected = x1.to_dense() * x2.to_dense()
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y1 = x1 * 37.5
|
||||
y2 = x1.clone()
|
||||
y2.mul_(37.5)
|
||||
expected = x1.to_dense() * 37.5
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y1 = x1 / 37.5
|
||||
y2 = x1.clone()
|
||||
y2.div_(37.5)
|
||||
expected = x1.to_dense() / 37.5
|
||||
self.assertEqual(y1.to_dense(), expected)
|
||||
self.assertEqual(y2.to_dense(), expected)
|
||||
|
||||
y = x1.clone()
|
||||
y.zero_()
|
||||
expected = torch.zeros(x1.size())
|
||||
self.assertEqual(y.to_dense(), expected)
|
||||
|
||||
def test_basic_ops(self):
|
||||
self._test_basic_ops_shape([5, 6])
|
||||
self._test_basic_ops_shape([10, 10, 10])
|
||||
self._test_basic_ops_shape([50, 30, 20])
|
||||
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5])
|
||||
|
||||
def test_basic_ops_hybrid(self):
|
||||
self._test_basic_ops_shape([5, 6], [2, 3])
|
||||
self._test_basic_ops_shape([10, 10, 10], [3])
|
||||
self._test_basic_ops_shape([50, 30, 20], [2])
|
||||
self._test_basic_ops_shape([5, 5, 5, 5, 5, 5], [2])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
run_tests()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,6 @@ import shutil
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
import sys
|
||||
import traceback
|
||||
import torch
|
||||
import torch.cuda
|
||||
@ -19,7 +18,7 @@ from torch.utils.serialization import load_lua
|
||||
|
||||
HAS_CUDA = torch.cuda.is_available()
|
||||
|
||||
from common import TestCase
|
||||
from common import TestCase, run_tests, download_file
|
||||
|
||||
try:
|
||||
import cffi
|
||||
@ -28,7 +27,9 @@ try:
|
||||
except ImportError:
|
||||
HAS_CFFI = False
|
||||
|
||||
|
||||
class SimplePlugin(Plugin):
|
||||
|
||||
def __init__(self, interval):
|
||||
super(SimplePlugin, self).__init__(interval)
|
||||
self.trainer = None
|
||||
@ -58,6 +59,7 @@ class SimplePlugin(Plugin):
|
||||
|
||||
|
||||
class ModelMock(object):
|
||||
|
||||
def __init__(self):
|
||||
self.num_calls = 0
|
||||
self.output = Variable(torch.ones(1, 1), requires_grad=True)
|
||||
@ -68,6 +70,7 @@ class ModelMock(object):
|
||||
|
||||
|
||||
class CriterionMock(object):
|
||||
|
||||
def __init__(self):
|
||||
self.num_calls = 0
|
||||
|
||||
@ -95,6 +98,7 @@ class OptimizerMock(object):
|
||||
|
||||
|
||||
class DatasetMock(object):
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(10):
|
||||
yield torch.randn(2, 10), torch.randperm(10)[:2]
|
||||
@ -183,6 +187,7 @@ class TestTrainer(TestCase):
|
||||
|
||||
test_dir = os.path.abspath(os.path.dirname(str(__file__)))
|
||||
|
||||
|
||||
class TestFFI(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@ -290,35 +295,13 @@ class TestLuaReader(TestCase):
|
||||
self.assertEqual(grad_input, test['grad_input'])
|
||||
return do_test
|
||||
|
||||
@classmethod
|
||||
def _download_data(cls, test_file_path):
|
||||
if os.path.exists(test_file_path):
|
||||
return
|
||||
print('Downloading test file for TestLuaReader.')
|
||||
DATA_URL = 'https://s3.amazonaws.com/pytorch/legacy_modules.t7'
|
||||
urllib = cls._get_urllib('request')
|
||||
data = urllib.urlopen(DATA_URL, timeout=15).read()
|
||||
with open(test_file_path, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
@staticmethod
|
||||
def _get_urllib(submodule):
|
||||
if sys.version_info < (3,):
|
||||
import urllib2
|
||||
return urllib2
|
||||
else:
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
return getattr(urllib, submodule)
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
DATA_URL = 'https://download.pytorch.org/test_data/legacy_modules.t7'
|
||||
data_dir = os.path.join(os.path.dirname(__file__), 'data')
|
||||
test_file_path = os.path.join(data_dir, 'legacy_modules.t7')
|
||||
urllib = cls._get_urllib('error')
|
||||
try:
|
||||
cls._download_data(test_file_path)
|
||||
except urllib.URLError as e:
|
||||
succ = download_file(DATA_URL, test_file_path)
|
||||
if not succ:
|
||||
warnings.warn(("Couldn't download the test file for TestLuaReader! "
|
||||
"Tests will be incomplete!"), RuntimeWarning)
|
||||
return
|
||||
@ -364,4 +347,4 @@ class TestLuaReader(TestCase):
|
||||
|
||||
TestLuaReader.init()
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
run_tests()
|
||||
|
||||
@ -7,6 +7,8 @@ from .plugins import ArgcountChecker, OptionalArguments, ArgumentReferences, \
|
||||
|
||||
|
||||
class cwrap(object):
|
||||
BASE_INDENT_SIZE = 6
|
||||
|
||||
RETURN_WRAPPERS = {
|
||||
'void': Template('Py_RETURN_NONE;'),
|
||||
'long': Template('return PyLong_FromLong($result);'),
|
||||
@ -16,17 +18,22 @@ class cwrap(object):
|
||||
|
||||
OPTION_TEMPLATE = Template("""
|
||||
${els}if ($arg_check) {
|
||||
$pre_arg_assign
|
||||
$arg_assign
|
||||
$code
|
||||
""")
|
||||
|
||||
ARG_ASSIGN_TEMPLATE = Template("""${type} ${name} = ${unpack};""")
|
||||
|
||||
OPTION_CODE_TEMPLATE = [
|
||||
'$call',
|
||||
'$return_result',
|
||||
]
|
||||
|
||||
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($arg_unpack);")
|
||||
FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($call_arg);")
|
||||
|
||||
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
|
||||
DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments,
|
||||
ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease]
|
||||
|
||||
def __init__(self, source, destination=None, plugins=[], default_plugins=True):
|
||||
if destination is None:
|
||||
@ -87,7 +94,7 @@ class cwrap(object):
|
||||
with open(fname, 'r') as f:
|
||||
included = f.read().split('\n')
|
||||
# insert it into lines at position i+1
|
||||
lines[i+1:i+1] = included
|
||||
lines[i + 1:i + 1] = included
|
||||
else:
|
||||
output.append(line)
|
||||
i += 1
|
||||
@ -97,10 +104,10 @@ class cwrap(object):
|
||||
def set_declaration_defaults(self, declaration):
|
||||
declaration.setdefault('arguments', [])
|
||||
declaration.setdefault('return', 'void')
|
||||
if not 'cname' in declaration:
|
||||
if 'cname' not in declaration:
|
||||
declaration['cname'] = declaration['name']
|
||||
# Simulate multiple dispatch, even if it's not necessary
|
||||
if not 'options' in declaration:
|
||||
if 'options' not in declaration:
|
||||
declaration['options'] = [{'arguments': declaration['arguments']}]
|
||||
del declaration['arguments']
|
||||
# Parse arguments (some of them can be strings)
|
||||
@ -136,10 +143,10 @@ class cwrap(object):
|
||||
return fallback(*args)
|
||||
|
||||
def get_type_check(self, arg, option):
|
||||
return self.search_plugins('get_type_check', (arg, option), lambda arg,_: None)
|
||||
return self.search_plugins('get_type_check', (arg, option), lambda arg, _: None)
|
||||
|
||||
def get_type_unpack(self, arg, option):
|
||||
return self.search_plugins('get_type_unpack', (arg, option), lambda arg,_: None)
|
||||
return self.search_plugins('get_type_unpack', (arg, option), lambda arg, _: None)
|
||||
|
||||
def get_return_wrapper(self, option):
|
||||
return self.search_plugins('get_return_wrapper', (option,), lambda _: self.RETURN_WRAPPERS[option['return']])
|
||||
@ -147,6 +154,9 @@ class cwrap(object):
|
||||
def get_wrapper_template(self, declaration):
|
||||
return self.search_plugins('get_wrapper_template', (declaration,), lambda _: None)
|
||||
|
||||
def get_assign_args(self, arguments):
|
||||
return self.search_plugins('get_assign_args', (arguments,), lambda _: arguments)
|
||||
|
||||
def get_arg_accessor(self, arg, option):
|
||||
def wrap_accessor(arg, _):
|
||||
if arg.get('idx') is None:
|
||||
@ -177,12 +187,47 @@ class cwrap(object):
|
||||
res = tmpl.substitute(arg=accessor, idx=arg.get('idx'))
|
||||
for plugin in self.plugins:
|
||||
res = getattr(plugin, plugin_fn_name)(res, arg, accessor)
|
||||
|
||||
result.append(res)
|
||||
return result
|
||||
|
||||
def build_option_args(self, arguments, arg_unpack):
|
||||
assignement = []
|
||||
call_arg = []
|
||||
# If types or names needs to be changed
|
||||
arguments = self.get_assign_args(arguments)
|
||||
for arg, unpack in zip(arguments, arg_unpack):
|
||||
if arg['type'] == 'CONSTANT':
|
||||
call_arg.append(str(arg['name']))
|
||||
else:
|
||||
var_name = "arg_" + str(arg.get('assign_name', arg['name']))
|
||||
res = self.ARG_ASSIGN_TEMPLATE.substitute(
|
||||
type=arg['type'],
|
||||
name=var_name,
|
||||
unpack=unpack)
|
||||
|
||||
if var_name not in call_arg:
|
||||
assignement.append(res)
|
||||
call_arg.append(var_name)
|
||||
return assignement, call_arg
|
||||
|
||||
def indent_code(self, code):
|
||||
if code == '':
|
||||
return code
|
||||
code_lines = map(lambda s: s.strip(), code.split('\n'))
|
||||
code = '\n'
|
||||
depth = self.BASE_INDENT_SIZE
|
||||
for line in code_lines:
|
||||
depth -= line.count('}') * 2
|
||||
code += ' ' * depth + line + '\n'
|
||||
depth += line.count('{') * 2
|
||||
depth += line.count('(') * 4
|
||||
depth -= line.count(')') * 4
|
||||
return code[:-1]
|
||||
|
||||
def generate_option(self, option, is_first):
|
||||
checked_args = list(filter(
|
||||
lambda arg: not 'ignore_check' in arg or not arg['ignore_check'],
|
||||
lambda arg: 'ignore_check' not in arg or not arg['ignore_check'],
|
||||
option['arguments']))
|
||||
option['num_checked_args'] = len(checked_args)
|
||||
idx_args = list(filter(
|
||||
@ -198,22 +243,29 @@ class cwrap(object):
|
||||
for plugin in self.plugins:
|
||||
arg_checks = plugin.process_all_checks(arg_checks, option)
|
||||
|
||||
# Generate unpacks
|
||||
# Generate pre_arg assign
|
||||
pre_arg_assign = []
|
||||
for plugin in self.plugins:
|
||||
pre_arg_assign = plugin.process_pre_arg_assign(pre_arg_assign, option)
|
||||
|
||||
# Generate arg assignment and call arguments
|
||||
arg_unpack = self.map_selected_arguments('get_type_unpack',
|
||||
'process_single_unpack', option, option['arguments'])
|
||||
arg_unpack = ', '.join(arg_unpack)
|
||||
arg_assign, call_arg = self.build_option_args(option['arguments'], arg_unpack)
|
||||
|
||||
call_arg = ', '.join(call_arg)
|
||||
for plugin in self.plugins:
|
||||
arg_unpack = plugin.process_all_unpacks(arg_unpack, option)
|
||||
call_arg = plugin.process_all_call_arg(call_arg, option)
|
||||
|
||||
# Generate call
|
||||
try:
|
||||
return_result = self.get_return_wrapper(option).substitute()
|
||||
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result='',
|
||||
cname=option['cname'], arg_unpack=arg_unpack)
|
||||
cname=option['cname'], call_arg=call_arg)
|
||||
except KeyError:
|
||||
return_result = self.get_return_wrapper(option).substitute(result='__result')
|
||||
call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result=(option['return'] + ' __result = '),
|
||||
cname=option['cname'], arg_unpack=arg_unpack)
|
||||
cname=option['cname'], call_arg=call_arg)
|
||||
|
||||
code_template = deepcopy(self.OPTION_CODE_TEMPLATE)
|
||||
for plugin in self.plugins:
|
||||
@ -221,17 +273,15 @@ class cwrap(object):
|
||||
option)
|
||||
code_template = Template('\n'.join(code_template))
|
||||
code = code_template.substitute(call=call, return_result=return_result)
|
||||
code_lines = map(lambda s: s.strip(), code.split('\n'))
|
||||
code = '\n'
|
||||
depth = 6
|
||||
for line in code_lines:
|
||||
depth -= line.count('}') * 2
|
||||
code += ' ' * depth + line + '\n'
|
||||
depth += line.count('{') * 2
|
||||
code = self.indent_code(code)
|
||||
pre_arg_assign = self.indent_code('\n'.join(pre_arg_assign))
|
||||
arg_assign = self.indent_code('\n'.join(arg_assign))
|
||||
|
||||
# Put everything together
|
||||
return self.OPTION_TEMPLATE.substitute(
|
||||
els=('} else ' if not is_first else ''),
|
||||
arg_check=arg_checks,
|
||||
pre_arg_assign=pre_arg_assign,
|
||||
arg_assign=arg_assign,
|
||||
code=code,
|
||||
)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from . import CWrapPlugin
|
||||
|
||||
|
||||
class ArgcountChecker(CWrapPlugin):
|
||||
|
||||
def process_all_checks(self, checks, option):
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from . import CWrapPlugin
|
||||
|
||||
|
||||
class ArgcountSortPlugin(CWrapPlugin):
|
||||
|
||||
def __init__(self, descending=True):
|
||||
@ -11,4 +12,3 @@ class ArgcountSortPlugin(CWrapPlugin):
|
||||
for declaration in declarations:
|
||||
declaration['options'].sort(key=num_checked_args, reverse=self.descending)
|
||||
return declarations
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class ArgumentReferences(CWrapPlugin):
|
||||
|
||||
def initialize(self, cwrap):
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from . import CWrapPlugin
|
||||
|
||||
|
||||
class AutoGPU(CWrapPlugin):
|
||||
|
||||
def __init__(self, has_self=True, condition=None):
|
||||
@ -14,7 +15,9 @@ class AutoGPU(CWrapPlugin):
|
||||
#endif
|
||||
"""
|
||||
|
||||
def process_option_code_template(self, template, option):
|
||||
def process_pre_arg_assign(self, template, option):
|
||||
if not option.get('auto_gpu', True):
|
||||
return template
|
||||
call = 'THCPAutoGPU __autogpu_guard = THCPAutoGPU(args{});'.format(
|
||||
', (PyObject*)self' if self.has_self else '')
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class BeforeAfterCall(CWrapPlugin):
|
||||
|
||||
def initialize(self, cwrap):
|
||||
@ -23,5 +24,5 @@ class BeforeAfterCall(CWrapPlugin):
|
||||
self.insert_snippet(template, option, call_idx, 'before_call')
|
||||
# call position might have changed
|
||||
call_idx = template.index('$call')
|
||||
self.insert_snippet(template, option, call_idx+1, 'after_call')
|
||||
self.insert_snippet(template, option, call_idx + 1, 'after_call')
|
||||
return template
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class BoolOption(CWrapPlugin):
|
||||
|
||||
UNPACK_TEMPLATE = Template('$arg == Py_True ? $if_true : $if_false')
|
||||
@ -8,12 +9,20 @@ class BoolOption(CWrapPlugin):
|
||||
def is_bool_option(self, arg):
|
||||
return arg['type'] == 'bool' and 'if_true' in arg and 'if_false' in arg
|
||||
|
||||
def get_type_check(self, arg, option):
|
||||
def process_declarations(self, declarations):
|
||||
for declaration in declarations:
|
||||
for option in declaration['options']:
|
||||
for arg in option['arguments']:
|
||||
if self.is_bool_option(arg):
|
||||
arg['is_bool_option'] = True
|
||||
arg['type'] = 'const char*'
|
||||
return declarations
|
||||
|
||||
def get_type_check(self, arg, option):
|
||||
if arg.get('is_bool_option', False):
|
||||
return Template('PyBool_Check($arg)')
|
||||
|
||||
def get_type_unpack(self, arg, option):
|
||||
if self.is_bool_option(arg):
|
||||
if arg.get('is_bool_option', False):
|
||||
return Template(self.UNPACK_TEMPLATE.safe_substitute(
|
||||
if_true=arg['if_true'], if_false=arg['if_false']))
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class ConstantArguments(CWrapPlugin):
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
@ -18,5 +19,3 @@ class ConstantArguments(CWrapPlugin):
|
||||
def get_arg_accessor(self, arg, option):
|
||||
if arg['type'] == 'CONSTANT':
|
||||
return arg['name']
|
||||
|
||||
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
from string import Template
|
||||
import copy
|
||||
from copy import deepcopy
|
||||
from . import CWrapPlugin
|
||||
from itertools import product
|
||||
|
||||
|
||||
class CuDNNPlugin(CWrapPlugin):
|
||||
|
||||
TYPE_UNPACK = {
|
||||
@ -16,6 +18,10 @@ class CuDNNPlugin(CWrapPlugin):
|
||||
'double': Template('THPDoubleUtils_unpackReal($arg)'),
|
||||
}
|
||||
|
||||
INPUT_ARGUMENT_MAP = {
|
||||
'THTensor*': 'THVoidTensor*',
|
||||
}
|
||||
|
||||
TYPE_CHECK = {
|
||||
'Convolution*': Template('THPWrapper_check($arg)'),
|
||||
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'),
|
||||
@ -78,6 +84,16 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
def get_type_check(self, arg, option):
|
||||
return self.TYPE_CHECK.get(arg['type'], None)
|
||||
|
||||
def get_assign_args(self, arguments):
|
||||
assign_args = []
|
||||
for arg in arguments:
|
||||
arg = copy.copy(arg)
|
||||
new_type = self.INPUT_ARGUMENT_MAP.get(arg['type'])
|
||||
if new_type is not None:
|
||||
arg['type'] = new_type
|
||||
assign_args.append(arg)
|
||||
return assign_args
|
||||
|
||||
def get_wrapper_template(self, declaration):
|
||||
arg_desc = []
|
||||
for option in declaration['options']:
|
||||
@ -123,7 +139,8 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
|
||||
def filter_unique_options(self, options):
|
||||
def signature(option):
|
||||
return '#'.join(arg['type'] for arg in option['arguments'] if not 'ignore_check' in arg or not arg['ignore_check'])
|
||||
return '#'.join(arg['type'] for arg in option['arguments']
|
||||
if 'ignore_check' not in arg or not arg['ignore_check'])
|
||||
seen_signatures = set()
|
||||
unique = []
|
||||
for option in options:
|
||||
@ -141,7 +158,7 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
return self.preprocessor_guard(code, declaration['defined_if'])
|
||||
return code
|
||||
|
||||
def process_all_unpacks(self, code, option):
|
||||
def process_all_call_arg(self, code, option):
|
||||
return 'state, ' + code
|
||||
|
||||
def declare_methods(self):
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class GILRelease(CWrapPlugin):
|
||||
|
||||
OPTION_START = [
|
||||
@ -24,6 +25,5 @@ class GILRelease(CWrapPlugin):
|
||||
def process_option_code_template(self, template, option):
|
||||
call_idx = template.index('$call')
|
||||
template.insert(call_idx, self.BEFORE_CALL)
|
||||
template.insert(call_idx+2, self.AFTER_CALL)
|
||||
template.insert(call_idx + 2, self.AFTER_CALL)
|
||||
return self.OPTION_START + template + self.OPTION_END
|
||||
|
||||
|
||||
223
tools/cwrap/plugins/GenericNN.py
Normal file
223
tools/cwrap/plugins/GenericNN.py
Normal file
@ -0,0 +1,223 @@
|
||||
import copy
|
||||
from string import Template
|
||||
from . import CWrapPlugin
|
||||
|
||||
|
||||
class GenericNN(CWrapPlugin):
|
||||
INPUT_TYPE_CHECK = Template("checkTypes(is_cuda, $type, $tensor_args);")
|
||||
|
||||
HEADER_TEMPLATE = Template("void $name($args);")
|
||||
|
||||
WRAPPER_TEMPLATE = Template("""\
|
||||
void $name($args)
|
||||
{
|
||||
bool is_cuda = $input->isCuda();
|
||||
auto type = $input->type();
|
||||
$type_check
|
||||
$options
|
||||
} else {
|
||||
throw std::runtime_error("invalid arguments");
|
||||
}
|
||||
}
|
||||
""")
|
||||
|
||||
THNN_TEMPLATE = Template("""\
|
||||
if (type == thpp::Type::FLOAT) {
|
||||
THNN_Float$name(
|
||||
NULL,
|
||||
$float_args);
|
||||
} else if (type == thpp::Type::DOUBLE) {
|
||||
THNN_Double$name(
|
||||
NULL,
|
||||
$double_args);
|
||||
} else {
|
||||
throw std::runtime_error("unsupported tensor type");
|
||||
}""")
|
||||
|
||||
THCUNN_TEMPLATE = Template("""\
|
||||
#ifdef WITH_CUDA
|
||||
if (type == thpp::Type::FLOAT) {
|
||||
THNN_Cuda$name(
|
||||
state,
|
||||
$float_args);
|
||||
} else if (type == thpp::Type::DOUBLE) {
|
||||
THNN_CudaDouble$name(
|
||||
state,
|
||||
$double_args);
|
||||
} else if (type == thpp::Type::HALF) {
|
||||
THNN_CudaHalf$name(
|
||||
state,
|
||||
$half_args);
|
||||
} else {
|
||||
throw std::runtime_error("unsupported tensor type");
|
||||
}
|
||||
#endif
|
||||
""")
|
||||
|
||||
INDEX_TENSOR_TYPES = {'THIndexTensor*', 'THCIndexTensor*'}
|
||||
|
||||
REAL_TENSOR_TYPES = {'THTensor*', 'THCTensor*'}
|
||||
|
||||
INPUT_ARGUMENT_MAP = {
|
||||
'THNNState*': 'void*',
|
||||
'THCState*': 'void*',
|
||||
'THTensor*': 'thpp::Tensor*',
|
||||
'THCTensor*': 'thpp::Tensor*',
|
||||
'THIndexTensor*': 'thpp::Tensor*',
|
||||
'THCIndexTensor*': 'thpp::Tensor*',
|
||||
'THIndex_t': 'long',
|
||||
'accreal': 'double',
|
||||
}
|
||||
|
||||
def __init__(self, header=False):
|
||||
self.header = header
|
||||
self.declarations = []
|
||||
|
||||
def process_full_file(self, base_wrapper):
|
||||
if self.header:
|
||||
wrapper = '#pragma once\n\n'
|
||||
wrapper += '#include <THPP/Tensor.hpp>\n\n'
|
||||
else:
|
||||
wrapper = '#include "THNN_generic.h"\n'
|
||||
wrapper = '#include "THNN_generic.inc.h"\n\n'
|
||||
wrapper += 'namespace torch { namespace nn {\n\n'
|
||||
wrapper += base_wrapper
|
||||
wrapper += '}} // namespace torch::nn\n'
|
||||
return wrapper
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
for declaration in declarations:
|
||||
base_args = declaration['options'][0]['arguments']
|
||||
for option in declaration['options']:
|
||||
for idx, arg in enumerate(option['arguments']):
|
||||
arg['assign_name'] = base_args[idx]['name']
|
||||
arg['assign_type'] = base_args[idx]['type']
|
||||
if idx != 1:
|
||||
arg['ignore_check'] = True
|
||||
return declarations
|
||||
|
||||
def get_arg_accessor(self, arg, option):
|
||||
return self.get_type_unpack(arg, option)
|
||||
|
||||
def process_pre_arg_assign(self, pre_arg_assign, option):
|
||||
if option['backend'] == 'cunn':
|
||||
# Enclose arg_assign with CUDA guard
|
||||
pre_arg_assign.append('#ifdef WITH_CUDA')
|
||||
return pre_arg_assign
|
||||
|
||||
def process_option_code_template(self, template, option):
|
||||
template = []
|
||||
if option['backend'] == 'cunn':
|
||||
template.append('#endif')
|
||||
|
||||
def base_cast(arg, CReal, real):
|
||||
name = 'arg_' + arg['assign_name']
|
||||
type = arg['type']
|
||||
if type in self.REAL_TENSOR_TYPES:
|
||||
return ('(TH{CReal}Tensor*){name}->cdata()'
|
||||
.format(CReal=CReal, name=name))
|
||||
elif type in self.INDEX_TENSOR_TYPES:
|
||||
return '({type}){name}->cdata()'.format(type=type, name=name)
|
||||
elif type == 'THCState*':
|
||||
return '({}){}'.format(type, name)
|
||||
elif type == 'real':
|
||||
if real == 'half':
|
||||
return 'THC_float2half({})'.format(name)
|
||||
return '({real}){name}'.format(real=real, name=name)
|
||||
return name
|
||||
|
||||
def cast(arg, CReal, real):
|
||||
expr = base_cast(arg, CReal, real)
|
||||
if arg.get('optional', False):
|
||||
name = 'arg_' + arg['assign_name']
|
||||
return '{name} ? {expr} : NULL'.format(name=name, expr=expr)
|
||||
return expr
|
||||
|
||||
if option['backend'] == 'nn':
|
||||
float_args = []
|
||||
double_args = []
|
||||
for idx, arg in enumerate(option['arguments']):
|
||||
float_args.append(cast(arg, 'Float', 'float'))
|
||||
double_args.append(cast(arg, 'Double', 'double'))
|
||||
|
||||
code = self.THNN_TEMPLATE.substitute(
|
||||
name=option['cname'],
|
||||
float_args=',\n'.join(float_args),
|
||||
double_args=',\n'.join(double_args))
|
||||
template.append(code)
|
||||
|
||||
elif option['backend'] == 'cunn':
|
||||
float_args = []
|
||||
double_args = []
|
||||
half_args = []
|
||||
for idx, arg in enumerate(option['arguments']):
|
||||
float_args.append(cast(arg, 'Cuda', 'float'))
|
||||
double_args.append(cast(arg, 'CudaDouble', 'double'))
|
||||
half_args.append(cast(arg, 'CudaHalf', 'half'))
|
||||
|
||||
code = self.THCUNN_TEMPLATE.substitute(
|
||||
name=option['cname'],
|
||||
float_args=',\n'.join(float_args),
|
||||
double_args=',\n'.join(double_args),
|
||||
half_args=',\n'.join(half_args))
|
||||
template.append(code)
|
||||
|
||||
template.append('')
|
||||
return template
|
||||
|
||||
def get_type_unpack(self, arg, option):
|
||||
return Template(arg.get('assign_name', arg['name']))
|
||||
|
||||
def get_type_check(self, arg, option):
|
||||
if option['backend'] == 'cunn':
|
||||
return Template('is_cuda')
|
||||
else:
|
||||
return Template('!is_cuda')
|
||||
|
||||
def get_assign_args(self, arguments):
|
||||
assign_args = []
|
||||
for arg in arguments:
|
||||
arg = copy.copy(arg)
|
||||
new_type = self.INPUT_ARGUMENT_MAP.get(arg['type'])
|
||||
if new_type is not None:
|
||||
arg['type'] = new_type
|
||||
assign_args.append(arg)
|
||||
return assign_args
|
||||
|
||||
def get_wrapper_template(self, declaration):
|
||||
# get assign arguments string
|
||||
base_arguments = declaration['options'][0]['arguments']
|
||||
args = self.get_assign_args(base_arguments)
|
||||
arg_str = ', '.join([arg['type'] + ' ' + arg['name'] for arg in args])
|
||||
|
||||
if self.header:
|
||||
return Template(self.HEADER_TEMPLATE.safe_substitute(args=arg_str))
|
||||
|
||||
def get_checked_args(tensor_types):
|
||||
checked_args = []
|
||||
for arg in base_arguments:
|
||||
if arg['type'] in tensor_types:
|
||||
name = arg.get('assign_name', arg['name'])
|
||||
name_str = name
|
||||
if arg.get('optional', False):
|
||||
name_str = '?' + name_str
|
||||
checked_args += ['"' + name_str + '"', name]
|
||||
checked_args += ['NULL']
|
||||
return checked_args
|
||||
|
||||
real_args = get_checked_args(self.REAL_TENSOR_TYPES)
|
||||
long_args = get_checked_args(self.INDEX_TENSOR_TYPES)
|
||||
|
||||
# check input types
|
||||
types_checks = []
|
||||
if len(real_args) > 1:
|
||||
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
|
||||
type='type', tensor_args=', '.join(real_args)))
|
||||
if len(long_args) > 1:
|
||||
types_checks.append(self.INPUT_TYPE_CHECK.substitute(
|
||||
type='thpp::Type::LONG', tensor_args=', '.join(long_args)))
|
||||
|
||||
return Template(self.WRAPPER_TEMPLATE.safe_substitute(
|
||||
input=args[0]['name'],
|
||||
args=arg_str,
|
||||
type_check='\n '.join(types_checks)))
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class KwargsPlugin(CWrapPlugin):
|
||||
|
||||
ACCESSOR_TEMPLATE = Template('(__tuplecount > $idx ? PyTuple_GET_ITEM(args, $idx) : __kw_$name)')
|
||||
@ -23,6 +24,16 @@ class KwargsPlugin(CWrapPlugin):
|
||||
for option in declaration['options']:
|
||||
for arg in option['arguments']:
|
||||
arg['no_kwargs'] = True
|
||||
# we need to use offsets for arg position in *arg if kwarg_only args
|
||||
# are not at the end
|
||||
for declaration in declarations:
|
||||
for option in declaration['options']:
|
||||
offset = 0
|
||||
for arg in option['arguments']:
|
||||
if arg.get('kwarg_only') and not arg.get('ignore_check', False):
|
||||
offset += 1
|
||||
else:
|
||||
arg['kwarg_offset'] = offset
|
||||
return declarations
|
||||
|
||||
def get_arg_accessor(self, arg, option):
|
||||
@ -30,14 +41,14 @@ class KwargsPlugin(CWrapPlugin):
|
||||
return
|
||||
if arg.get('kwarg_only'):
|
||||
return self.KWARG_ONLY_ACCESSOR_TEMPLATE.substitute(name=arg['name'])
|
||||
return self.ACCESSOR_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'])
|
||||
return self.ACCESSOR_TEMPLATE.substitute(idx=arg['idx'] - arg['kwarg_offset'], name=arg['name'])
|
||||
|
||||
def process_single_check(self, code, arg, arg_accessor):
|
||||
if arg.get('no_kwargs'):
|
||||
return code
|
||||
if arg.get('kwarg_only'):
|
||||
return self.KWARG_ONLY_CHECK_TEMPLATE.substitute(name=arg['name'], code=code)
|
||||
return self.CHECK_TEMPLATE.substitute(idx=arg['idx'], name=arg['name'], code=code)
|
||||
return self.CHECK_TEMPLATE.substitute(idx=arg['idx'] - arg['kwarg_offset'], name=arg['name'], code=code)
|
||||
|
||||
def process_wrapper(self, code, declaration):
|
||||
if declaration.get('no_kwargs'):
|
||||
@ -52,8 +63,9 @@ class KwargsPlugin(CWrapPlugin):
|
||||
name not in seen_args):
|
||||
seen_args.add(name)
|
||||
args.append(name)
|
||||
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args])
|
||||
lookups = '\n '.join(['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args])
|
||||
declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(a) for a in args])
|
||||
lookups = '\n '.join(
|
||||
['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=a) for a in args])
|
||||
start_idx = code.find('{') + 1
|
||||
new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups)
|
||||
return code[:start_idx] + new_code + code[start_idx:]
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
from . import CWrapPlugin
|
||||
|
||||
|
||||
class NullableArguments(CWrapPlugin):
|
||||
|
||||
def process_single_check(self, code, arg, arg_accessor):
|
||||
if 'nullable' in arg and arg['nullable']:
|
||||
return '({} || {} == Py_None)'.format(code, arg_accessor)
|
||||
@ -10,5 +12,3 @@ class NullableArguments(CWrapPlugin):
|
||||
if 'nullable' in arg and arg['nullable']:
|
||||
return '({} == Py_None ? NULL : {})'.format(arg_accessor, code)
|
||||
return code
|
||||
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ from copy import deepcopy
|
||||
from . import CWrapPlugin
|
||||
from itertools import product
|
||||
|
||||
|
||||
class OptionalArguments(CWrapPlugin):
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
@ -45,7 +46,7 @@ class OptionalArguments(CWrapPlugin):
|
||||
seen_signatures = set()
|
||||
unique = []
|
||||
for option in options:
|
||||
for num_kwarg_only in range(0, len(option['arguments'])+1):
|
||||
for num_kwarg_only in range(0, len(option['arguments']) + 1):
|
||||
sig = signature(option, num_kwarg_only)
|
||||
if sig not in seen_signatures:
|
||||
if num_kwarg_only > 0:
|
||||
@ -55,4 +56,3 @@ class OptionalArguments(CWrapPlugin):
|
||||
seen_signatures.add(sig)
|
||||
break
|
||||
return unique
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from . import CWrapPlugin
|
||||
from string import Template
|
||||
|
||||
|
||||
class ReturnArguments(CWrapPlugin):
|
||||
ARGUMENT_RETURN_TEMPLATE = Template("Py_INCREF($arg);\nreturn (PyObject*)($arg);")
|
||||
TUPLE_RETURN_TEMPLATE = Template("return PyTuple_Pack($num_args, $args);")
|
||||
@ -16,4 +17,5 @@ class ReturnArguments(CWrapPlugin):
|
||||
if len(args) == 1:
|
||||
return Template(self.ARGUMENT_RETURN_TEMPLATE.safe_substitute(arg=accessors[0]))
|
||||
else:
|
||||
return Template(self.TUPLE_RETURN_TEMPLATE.safe_substitute(num_args=len(args), args=', '.join(accessors)))
|
||||
return Template(self.TUPLE_RETURN_TEMPLATE.safe_substitute(num_args=len(args),
|
||||
args=', '.join(accessors)))
|
||||
|
||||
@ -131,6 +131,7 @@ PyObject * $name(PyObject *_unused, PyObject *args)
|
||||
|
||||
def get_wrapper_template(self, declaration):
|
||||
arg_desc = []
|
||||
|
||||
def describe_arg(arg):
|
||||
desc = self.TYPE_NAMES[arg['type']] + ' ' + arg['name']
|
||||
if arg.get('nullable'):
|
||||
|
||||
@ -4,6 +4,7 @@ from . import CWrapPlugin
|
||||
from itertools import product, chain
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class THPPlugin(CWrapPlugin):
|
||||
|
||||
TYPE_UNPACK = {
|
||||
@ -14,6 +15,11 @@ class THPPlugin(CWrapPlugin):
|
||||
'THTensor*': Template('((THPTensor*)$arg)->cdata'),
|
||||
'THBoolTensor*': Template('((THPBoolTensor*)$arg)->cdata'),
|
||||
'THIndexTensor*': Template('((THPIndexTensor*)$arg)->cdata'),
|
||||
'THIntegerTensor*': Template('((THPIntegerTensor*)$arg)->cdata'),
|
||||
|
||||
'THCudaTensor*': Template('((THCPFloatTensor*)$arg)->cdata'),
|
||||
'THCudaDoubleTensor*': Template('((THCPDoubleTensor*)$arg)->cdata'),
|
||||
'THCudaLongTensor*': Template('((THCPLongTensor*)$arg)->cdata'),
|
||||
|
||||
'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'),
|
||||
'THSDoubleTensor*': Template('((THSPDoubleTensor*)$arg)->cdata'),
|
||||
@ -43,10 +49,14 @@ class THPPlugin(CWrapPlugin):
|
||||
'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'),
|
||||
'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'),
|
||||
'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'),
|
||||
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
|
||||
'THTensor*': Template('(PyObject*)Py_TYPE($arg) == THPTensorClass'),
|
||||
'THBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THPBoolTensorClass'),
|
||||
'THIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIndexTensorClass'),
|
||||
'THIntegerTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntegerTensorClass'),
|
||||
|
||||
'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'),
|
||||
'THCudaDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPDoubleTensorClass'),
|
||||
'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'),
|
||||
|
||||
'THSDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPDoubleTensorClass'),
|
||||
'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'),
|
||||
@ -78,6 +88,7 @@ class THPPlugin(CWrapPlugin):
|
||||
'THSTensor*': Template('return THSPTensor_(New)($result);'),
|
||||
'THLongTensor*': Template('return THPLongTensor_New($result);'),
|
||||
'THLongStorage*': Template('return THPLongStorage_New($result);'),
|
||||
'THCudaLongTensor*': Template('return THCPLongTensor_New($result);'),
|
||||
# TODO: make it smarter - it should return python long if result doesn't fit into an int
|
||||
'long': Template('return PyInt_FromLong($result);'),
|
||||
'accreal': Template('return THPUtils_(newAccreal)($result);'),
|
||||
@ -143,6 +154,7 @@ ${cpu}
|
||||
'THIntTensor*': _allocate('Int', ALLOCATE_TMPL),
|
||||
'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA),
|
||||
'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA),
|
||||
'THIntegerTensor*': _allocate('Int', ALLOCATE_TMPL, ALLOCATE_CUDA),
|
||||
|
||||
'THSTensor*': _allocate('', ALLOCATE_TMPL, sparse=True),
|
||||
}
|
||||
@ -157,8 +169,12 @@ ${cpu}
|
||||
'THIntTensor*': '" THPModuleStr "IntTensor',
|
||||
'THBoolTensor*': '" THPModuleStr "ByteTensor',
|
||||
'THIndexTensor*': '" THPModuleStr "LongTensor',
|
||||
'THIntegerTensor*': '" THPModuleStr "IntTensor',
|
||||
'THFloatTensor*': '" THPModuleStr "FloatTensor',
|
||||
'THDoubleTensor*': '" THPModuleStr "DoubleTensor',
|
||||
'THCudaTensor*': 'torch.cuda.FloatTensor',
|
||||
'THCudaDoubleTensor*': 'torch.cuda.DoubleTensor',
|
||||
'THCudaLongTensor*': 'torch.cuda.LongTensor',
|
||||
'THSize*': 'torch.Size',
|
||||
'THStride*': 'tuple',
|
||||
'long': 'int',
|
||||
@ -166,6 +182,7 @@ ${cpu}
|
||||
'double': 'float',
|
||||
'accreal': '" RealStr "',
|
||||
'bool': 'bool',
|
||||
'const char*': 'bool', # Can come only from bool option.
|
||||
}
|
||||
|
||||
OUT_INIT = """
|
||||
@ -198,8 +215,8 @@ ${cpu}
|
||||
def format_args(args, var_args=False):
|
||||
option_desc = [format_arg(arg, var_args)
|
||||
for arg in args
|
||||
if not arg.get('ignore_check', False)
|
||||
and not arg.get('output')]
|
||||
if not arg.get('ignore_check', False) and
|
||||
not arg.get('output')]
|
||||
output_args = list(filter(lambda a: a.get('output'), args))
|
||||
if output_args:
|
||||
if len(output_args) > 1:
|
||||
@ -295,8 +312,6 @@ ${cpu}
|
||||
|
||||
def process_declarations(self, declarations):
|
||||
new_declarations = []
|
||||
register_only = [d for d in declarations if d.get('only_register', False)]
|
||||
declarations = [d for d in declarations if not d.get('only_register', False)]
|
||||
|
||||
def has_arg_type(declaration, type_name):
|
||||
return any(arg['type'] == type_name
|
||||
@ -314,8 +329,16 @@ ${cpu}
|
||||
for arg in option['arguments'])
|
||||
|
||||
for declaration in declarations:
|
||||
# Disable all methods for THHalfTensor, unless cpu_half is True
|
||||
if not declaration.get('cpu_half', False):
|
||||
defined_if = '!defined(TH_REAL_IS_HALF)'
|
||||
if 'defined_if' in declaration:
|
||||
defined_if += ' && (' + declaration['defined_if'] + ')'
|
||||
declaration['defined_if'] = defined_if
|
||||
|
||||
if declaration.get('only_register', False):
|
||||
continue
|
||||
|
||||
declaration.setdefault('python_name', declaration['name'])
|
||||
declaration.setdefault('variables', [])
|
||||
if has_arg_type(declaration, 'THSize*'):
|
||||
@ -345,8 +368,9 @@ ${cpu}
|
||||
if arg['name'] == 'self':
|
||||
arg['ignore_check'] = True
|
||||
|
||||
|
||||
declarations = [d for d in declarations if not d.get('only_stateless', False)]
|
||||
register_only = [d for d in declarations if d.get('only_register', False)]
|
||||
declarations = [d for d in declarations
|
||||
if (not d.get('only_stateless', False)) and (not d.get('only_register', False))]
|
||||
self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only))
|
||||
self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only))
|
||||
|
||||
@ -362,6 +386,7 @@ ${cpu}
|
||||
for option in declaration['options']:
|
||||
for arg in option['arguments']:
|
||||
if arg['name'] == 'self':
|
||||
arg['assign_name'] = 'self'
|
||||
arg['name'] = 'source'
|
||||
return declaration
|
||||
|
||||
@ -383,21 +408,24 @@ ${cpu}
|
||||
if 'defined_if' in declaration:
|
||||
entry = self.preprocessor_guard(entry, declaration['defined_if'])
|
||||
tensor_methods += entry
|
||||
return self.TENSOR_METHODS_DECLARATION.substitute(
|
||||
generated = self.TENSOR_METHODS_DECLARATION.substitute(
|
||||
methods=tensor_methods,
|
||||
stateless=('' if not stateless else 'stateless_'),
|
||||
sparse=('' if not sparse else 'S'),
|
||||
)
|
||||
if sparse:
|
||||
generated = '#ifndef TH_REAL_IS_HALF\n' + generated + '\n#endif\n\n'
|
||||
return generated
|
||||
|
||||
def process_full_file(self, code):
|
||||
# We have to find a place before all undefs
|
||||
idx = code.find('// PUT DEFINITIONS IN HERE PLEASE')
|
||||
return (code[:idx]
|
||||
+ self.declare_methods(False, False)
|
||||
+ self.declare_methods(True, False)
|
||||
+ self.declare_methods(False, True)
|
||||
+ self.declare_methods(True, True)
|
||||
+ code[idx:]
|
||||
return (code[:idx] +
|
||||
self.declare_methods(False, False) +
|
||||
self.declare_methods(True, False) +
|
||||
self.declare_methods(False, True) +
|
||||
self.declare_methods(True, True) +
|
||||
code[idx:]
|
||||
)
|
||||
|
||||
def preprocessor_guard(self, code, condition):
|
||||
@ -408,7 +436,7 @@ ${cpu}
|
||||
return self.preprocessor_guard(code, declaration['defined_if'])
|
||||
return code
|
||||
|
||||
def process_all_unpacks(self, code, option):
|
||||
def process_all_call_arg(self, code, option):
|
||||
return 'LIBRARY_STATE ' + code
|
||||
|
||||
def process_all_checks(self, code, option):
|
||||
@ -432,7 +460,7 @@ ${cpu}
|
||||
|
||||
return code
|
||||
|
||||
def process_option_code_template(self, template, option):
|
||||
def process_pre_arg_assign(self, template, option):
|
||||
new_args = []
|
||||
for arg in option['arguments']:
|
||||
if not option.get('output_provided', True) and arg.get('output'):
|
||||
|
||||
@ -16,6 +16,9 @@ class CWrapPlugin(object):
|
||||
def get_wrapper_template(self, declaration):
|
||||
pass
|
||||
|
||||
def get_assign_args(self, arguments):
|
||||
pass
|
||||
|
||||
def get_arg_accessor(self, arg, option):
|
||||
pass
|
||||
|
||||
@ -31,7 +34,7 @@ class CWrapPlugin(object):
|
||||
def process_single_unpack(self, code, arg, arg_accessor):
|
||||
return code
|
||||
|
||||
def process_all_unpacks(self, code, option):
|
||||
def process_all_call_arg(self, code, option):
|
||||
return code
|
||||
|
||||
def process_option_code(self, code, option):
|
||||
@ -46,6 +49,9 @@ class CWrapPlugin(object):
|
||||
def process_option_code_template(self, template, option):
|
||||
return template
|
||||
|
||||
def process_pre_arg_assign(self, template, option):
|
||||
return template
|
||||
|
||||
|
||||
from .StandaloneExtension import StandaloneExtension
|
||||
from .NullableArguments import NullableArguments
|
||||
@ -58,3 +64,4 @@ from .ReturnArguments import ReturnArguments
|
||||
from .GILRelease import GILRelease
|
||||
from .AutoGPU import AutoGPU
|
||||
from .CuDNNPlugin import CuDNNPlugin
|
||||
from .GenericNN import GenericNN
|
||||
|
||||
@ -1 +1,2 @@
|
||||
from .generate_wrappers import generate_wrappers, wrap_function, import_module
|
||||
from .generate_wrappers import generate_wrappers, wrap_function, \
|
||||
import_module, wrap_generic_function
|
||||
|
||||
@ -2,12 +2,13 @@ import os
|
||||
import sys
|
||||
from string import Template, ascii_lowercase
|
||||
from ..cwrap import cwrap
|
||||
from ..cwrap.plugins import StandaloneExtension, NullableArguments, AutoGPU
|
||||
from ..cwrap.plugins import StandaloneExtension, GenericNN, NullableArguments, AutoGPU
|
||||
|
||||
BASE_PATH = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
|
||||
WRAPPER_PATH = os.path.join(BASE_PATH, 'torch', 'csrc', 'nn')
|
||||
THNN_UTILS_PATH = os.path.join(BASE_PATH, 'torch', '_thnn', 'utils.py')
|
||||
|
||||
|
||||
def import_module(name, path):
|
||||
if sys.version_info >= (3, 5):
|
||||
import importlib.util
|
||||
@ -51,22 +52,27 @@ TYPE_TRANSFORMS = {
|
||||
'Float': {
|
||||
'THTensor*': 'THFloatTensor*',
|
||||
'real': 'float',
|
||||
'accreal': 'double',
|
||||
},
|
||||
'Double': {
|
||||
'THTensor*': 'THDoubleTensor*',
|
||||
'real': 'double',
|
||||
'accreal': 'double',
|
||||
},
|
||||
'CudaHalf': {
|
||||
'THCTensor*': 'THCudaHalfTensor*',
|
||||
'real': 'half',
|
||||
'accreal': 'float',
|
||||
},
|
||||
'Cuda': {
|
||||
'THCTensor*': 'THCudaTensor*',
|
||||
'real': 'float',
|
||||
'accreal': 'float',
|
||||
},
|
||||
'CudaDouble': {
|
||||
'THCTensor*': 'THCudaDoubleTensor*',
|
||||
'real': 'double',
|
||||
'accreal': 'double',
|
||||
},
|
||||
}
|
||||
for t, transforms in TYPE_TRANSFORMS.items():
|
||||
@ -81,7 +87,8 @@ for t in ['CudaHalf', 'Cuda', 'CudaDouble']:
|
||||
def wrap_function(name, type, arguments):
|
||||
cname = 'THNN_' + type + name
|
||||
declaration = ''
|
||||
declaration += 'extern "C" void ' + cname + '(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n'
|
||||
declaration += 'extern "C" void ' + cname + \
|
||||
'(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n'
|
||||
declaration += FUNCTION_TEMPLATE.substitute(name=type + name, cname=cname)
|
||||
indent = ' ' * 4
|
||||
dict_indent = ' ' * 6
|
||||
@ -97,9 +104,12 @@ def wrap_function(name, type, arguments):
|
||||
declaration += ']]\n\n\n'
|
||||
return declaration
|
||||
|
||||
|
||||
def generate_wrappers():
|
||||
wrap_nn()
|
||||
wrap_cunn()
|
||||
wrap_generic()
|
||||
|
||||
|
||||
def wrap_nn():
|
||||
wrapper = '#include <TH/TH.h>\n\n\n'
|
||||
@ -114,6 +124,7 @@ def wrap_nn():
|
||||
NullableArguments(),
|
||||
])
|
||||
|
||||
|
||||
def wrap_cunn():
|
||||
wrapper = '#include <TH/TH.h>\n'
|
||||
wrapper += '#include <THC/THC.h>\n\n\n'
|
||||
@ -128,3 +139,66 @@ def wrap_cunn():
|
||||
NullableArguments(),
|
||||
AutoGPU(has_self=False),
|
||||
])
|
||||
|
||||
GENERIC_FUNCTION_TEMPLATE = Template("""\
|
||||
[[
|
||||
name: $name
|
||||
return: void
|
||||
options:
|
||||
""")
|
||||
|
||||
|
||||
def wrap_generic_function(name, backends):
|
||||
declaration = ''
|
||||
declaration += GENERIC_FUNCTION_TEMPLATE.substitute(name=name)
|
||||
for backend in backends:
|
||||
declaration += ' - cname: ' + name + '\n'
|
||||
declaration += ' backend: ' + backend['name'] + '\n'
|
||||
declaration += ' arguments:\n'
|
||||
for arg in backend['arguments']:
|
||||
declaration += ' - arg: ' + arg.type + ' ' + arg.name + '\n'
|
||||
if arg.is_optional:
|
||||
declaration += ' optional: True\n'
|
||||
declaration += ']]\n\n\n'
|
||||
return declaration
|
||||
|
||||
|
||||
def wrap_generic():
|
||||
from collections import OrderedDict
|
||||
defs = OrderedDict()
|
||||
|
||||
def should_wrap_function(name):
|
||||
if name.startswith('LookupTable'):
|
||||
return False
|
||||
return (name.endswith('updateOutput') or
|
||||
name.endswith('updateGradInput') or
|
||||
name.endswith('accGradParameters') or
|
||||
name.endswith('backward'))
|
||||
|
||||
def add_functions(name, functions):
|
||||
for fn in functions:
|
||||
if not should_wrap_function(fn.name):
|
||||
continue
|
||||
if fn.name not in defs:
|
||||
defs[fn.name] = []
|
||||
defs[fn.name] += [{
|
||||
'name': name,
|
||||
'arguments': fn.arguments[1:],
|
||||
}]
|
||||
|
||||
add_functions('nn', thnn_utils.parse_header(thnn_utils.THNN_H_PATH))
|
||||
add_functions('cunn', thnn_utils.parse_header(thnn_utils.THCUNN_H_PATH))
|
||||
|
||||
wrapper = ''
|
||||
for name, backends in defs.items():
|
||||
wrapper += wrap_generic_function(name, backends)
|
||||
with open('torch/csrc/nn/THNN_generic.cwrap', 'w') as f:
|
||||
f.write(wrapper)
|
||||
|
||||
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
|
||||
GenericNN(header=True),
|
||||
], default_plugins=False, destination='torch/csrc/nn/THNN_generic.h')
|
||||
|
||||
cwrap('torch/csrc/nn/THNN_generic.cwrap', plugins=[
|
||||
GenericNN(),
|
||||
], default_plugins=False)
|
||||
|
||||
@ -1,8 +1,17 @@
|
||||
import ctypes.util
|
||||
import os
|
||||
|
||||
from .env import check_env_flag
|
||||
|
||||
CUDA_HOME = os.getenv('CUDA_HOME', '/usr/local/cuda')
|
||||
WITH_CUDA = not check_env_flag('NO_CUDA') and os.path.exists(CUDA_HOME)
|
||||
if not WITH_CUDA:
|
||||
if check_env_flag('NO_CUDA'):
|
||||
WITH_CUDA = False
|
||||
CUDA_HOME = None
|
||||
else:
|
||||
CUDA_HOME = os.getenv('CUDA_HOME', '/usr/local/cuda')
|
||||
if not os.path.exists(CUDA_HOME):
|
||||
cudart_path = ctypes.util.find_library('cudart')
|
||||
if cudart_path is not None:
|
||||
CUDA_HOME = os.path.dirname(cudart_path)
|
||||
else:
|
||||
CUDA_HOME = None
|
||||
WITH_CUDA = CUDA_HOME is not None
|
||||
|
||||
@ -1,9 +1,15 @@
|
||||
import os
|
||||
import glob
|
||||
from itertools import chain
|
||||
|
||||
from .env import check_env_flag
|
||||
from .cuda import WITH_CUDA, CUDA_HOME
|
||||
|
||||
|
||||
def gather_paths(env_vars):
|
||||
return list(chain(*(os.getenv(v, '').split(':') for v in env_vars)))
|
||||
|
||||
|
||||
WITH_CUDNN = False
|
||||
CUDNN_LIB_DIR = None
|
||||
CUDNN_INCLUDE_DIR = None
|
||||
@ -13,12 +19,18 @@ if WITH_CUDA and not check_env_flag('NO_CUDNN'):
|
||||
os.path.join(CUDA_HOME, 'lib'),
|
||||
os.path.join(CUDA_HOME, 'lib64'),
|
||||
'/usr/lib/x86_64-linux-gnu/',
|
||||
]))
|
||||
] + gather_paths([
|
||||
'LIBRARY_PATH',
|
||||
])))
|
||||
include_paths = list(filter(bool, [
|
||||
os.getenv('CUDNN_INCLUDE_DIR'),
|
||||
os.path.join(CUDA_HOME, 'include'),
|
||||
'/usr/include/'
|
||||
]))
|
||||
'/usr/include/',
|
||||
] + gather_paths([
|
||||
'CPATH',
|
||||
'C_INCLUDE_PATH',
|
||||
'CPLUS_INCLUDE_PATH',
|
||||
])))
|
||||
for path in lib_paths:
|
||||
if path is None or not os.path.exists(path):
|
||||
continue
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import os
|
||||
|
||||
|
||||
def check_env_flag(name):
|
||||
return os.getenv(name) in ['ON', '1', 'YES', 'TRUE', 'Y']
|
||||
|
||||
@ -10,6 +10,7 @@ on an NVIDIA GPU with compute capability >= 2.0.
|
||||
|
||||
import sys
|
||||
from ._utils import _import_dotted_name
|
||||
from .version import __version__
|
||||
|
||||
__all__ = [
|
||||
'typename', 'is_tensor', 'is_storage', 'set_default_tensor_type',
|
||||
@ -30,6 +31,13 @@ __all__ = [
|
||||
# automatically filled by the dynamic loader.
|
||||
import os as _dl_flags
|
||||
|
||||
# if we have numpy, it *must* be imported before the call to setdlopenflags()
|
||||
# or there is risk that later c modules will segfault when importing numpy
|
||||
try:
|
||||
import numpy as np
|
||||
except:
|
||||
pass
|
||||
|
||||
# first check if the os package has the required flags
|
||||
if not hasattr(_dl_flags, 'RTLD_GLOBAL') or not hasattr(_dl_flags, 'RTLD_NOW'):
|
||||
try:
|
||||
@ -56,6 +64,7 @@ del old_flags
|
||||
# Define basic utilities
|
||||
################################################################################
|
||||
|
||||
|
||||
def typename(o):
|
||||
module = ''
|
||||
class_name = ''
|
||||
@ -74,10 +83,20 @@ def typename(o):
|
||||
|
||||
|
||||
def is_tensor(obj):
|
||||
r"""Returns True if `obj` is a pytorch tensor.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _tensor_classes
|
||||
|
||||
|
||||
def is_storage(obj):
|
||||
r"""Returns True if `obj` is a pytorch storage object.
|
||||
|
||||
Args:
|
||||
obj (Object): Object to test
|
||||
"""
|
||||
return obj.__class__ in _storage_classes
|
||||
|
||||
|
||||
@ -130,61 +149,115 @@ from ._tensor_str import set_printoptions
|
||||
from .storage import _StorageBase
|
||||
from .tensor import _TensorBase
|
||||
|
||||
|
||||
class DoubleStorage(_C.DoubleStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class FloatStorage(_C.FloatStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class HalfStorage(_C.HalfStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class LongStorage(_C.LongStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class IntStorage(_C.IntStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class ShortStorage(_C.ShortStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class CharStorage(_C.CharStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class ByteStorage(_C.ByteStorageBase, _StorageBase):
|
||||
pass
|
||||
|
||||
|
||||
class DoubleTensor(_C.DoubleTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return DoubleStorage
|
||||
|
||||
|
||||
class FloatTensor(_C.FloatTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return FloatStorage
|
||||
class LongTensor(_C.LongTensorBase, _TensorBase):
|
||||
|
||||
|
||||
class HalfTensor(_C.HalfTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return HalfStorage
|
||||
|
||||
|
||||
class LongTensor(_C.LongTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return LongStorage
|
||||
|
||||
|
||||
class IntTensor(_C.IntTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return IntStorage
|
||||
|
||||
|
||||
class ShortTensor(_C.ShortTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return ShortStorage
|
||||
|
||||
|
||||
class CharTensor(_C.CharTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
# TODO
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return CharStorage
|
||||
|
||||
|
||||
class ByteTensor(_C.ByteTensorBase, _TensorBase):
|
||||
|
||||
def is_signed(self):
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def storage_type(cls):
|
||||
return ByteStorage
|
||||
@ -209,19 +282,21 @@ set_default_tensor_type('torch.FloatTensor')
|
||||
|
||||
from .functional import *
|
||||
|
||||
|
||||
################################################################################
|
||||
# Initialize extension
|
||||
################################################################################
|
||||
|
||||
def manager_path():
|
||||
import os
|
||||
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'lib', 'torch_shm_manager')
|
||||
if not os.path.exists(path):
|
||||
raise RuntimeError("Unable to find torch_shm_manager at " + path)
|
||||
return path.encode('utf-8')
|
||||
|
||||
|
||||
# Shared memory manager needs to know the exact location of manager executable
|
||||
import os
|
||||
manager_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'lib', 'torch_shm_manager')
|
||||
if sys.version_info[0] >= 3:
|
||||
manager_path = bytes(manager_path, 'ascii')
|
||||
|
||||
_C._initExtension(manager_path)
|
||||
|
||||
del os
|
||||
_C._initExtension(manager_path())
|
||||
del manager_path
|
||||
|
||||
################################################################################
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@ def set_printoptions(
|
||||
edgeitems=None,
|
||||
linewidth=None,
|
||||
profile=None,
|
||||
):
|
||||
):
|
||||
"""Set options for printing. Items shamelessly taken from Numpy
|
||||
|
||||
Args:
|
||||
@ -119,7 +119,7 @@ def _number_format(tensor, min_sz=-1):
|
||||
else:
|
||||
if exp_max > prec + 1 or exp_max < 0:
|
||||
sz = max(min_sz, 7)
|
||||
scale = math.pow(10, exp_max-1)
|
||||
scale = math.pow(10, exp_max - 1)
|
||||
else:
|
||||
if exp_max == 0:
|
||||
sz = 7
|
||||
@ -132,19 +132,19 @@ def _number_format(tensor, min_sz=-1):
|
||||
|
||||
def _tensor_str(self):
|
||||
n = PRINT_OPTS.edgeitems
|
||||
has_hdots = self.size()[-1] > 2*n
|
||||
has_vdots = self.size()[-2] > 2*n
|
||||
has_hdots = self.size()[-1] > 2 * n
|
||||
has_vdots = self.size()[-2] > 2 * n
|
||||
print_full_mat = not has_hdots and not has_vdots
|
||||
formatter = _number_format(self, min_sz=3 if not print_full_mat else 0)
|
||||
print_dots = self.numel() >= PRINT_OPTS.threshold
|
||||
|
||||
dim_sz = max(2, max(len(str(x)) for x in self.size()))
|
||||
dim_fmt = "{:^" + str(dim_sz) + "}"
|
||||
dot_fmt = u"{:^" + str(dim_sz+1) + "}"
|
||||
dot_fmt = u"{:^" + str(dim_sz + 1) + "}"
|
||||
|
||||
counter_dim = self.ndimension() - 2
|
||||
counter = torch.LongStorage(counter_dim).fill_(0)
|
||||
counter[counter.size()-1] = -1
|
||||
counter[counter.size() - 1] = -1
|
||||
finished = False
|
||||
strt = ''
|
||||
while True:
|
||||
@ -152,7 +152,7 @@ def _tensor_str(self):
|
||||
nskipped = [False for i in counter]
|
||||
for i in _range(counter_dim - 1, -1, -1):
|
||||
counter[i] += 1
|
||||
if print_dots and counter[i] == n and self.size(i) > 2*n:
|
||||
if print_dots and counter[i] == n and self.size(i) > 2 * n:
|
||||
counter[i] = self.size(i) - n
|
||||
nskipped[i] = True
|
||||
if counter[i] == self.size(i):
|
||||
@ -188,18 +188,18 @@ def __repr_row(row, indent, fmt, scale, sz, truncate=None):
|
||||
if truncate is not None:
|
||||
dotfmt = " {:^5} "
|
||||
return (indent +
|
||||
' '.join(fmt.format(val/scale) for val in row[:truncate]) +
|
||||
' '.join(fmt.format(val / scale) for val in row[:truncate]) +
|
||||
dotfmt.format('...') +
|
||||
' '.join(fmt.format(val/scale) for val in row[-truncate:]) +
|
||||
' '.join(fmt.format(val / scale) for val in row[-truncate:]) +
|
||||
'\n')
|
||||
else:
|
||||
return indent + ' '.join(fmt.format(val/scale) for val in row) + '\n'
|
||||
return indent + ' '.join(fmt.format(val / scale) for val in row) + '\n'
|
||||
|
||||
|
||||
def _matrix_str(self, indent='', formatter=None, force_truncate=False):
|
||||
n = PRINT_OPTS.edgeitems
|
||||
has_hdots = self.size(1) > 2*n
|
||||
has_vdots = self.size(0) > 2*n
|
||||
has_hdots = self.size(1) > 2 * n
|
||||
has_vdots = self.size(0) > 2 * n
|
||||
print_full_mat = not has_hdots and not has_vdots
|
||||
|
||||
if formatter is None:
|
||||
@ -207,14 +207,14 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False):
|
||||
min_sz=5 if not print_full_mat else 0)
|
||||
else:
|
||||
fmt, scale, sz = formatter
|
||||
nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth-len(indent))/(sz+1)))
|
||||
nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth - len(indent)) / (sz + 1)))
|
||||
strt = ''
|
||||
firstColumn = 0
|
||||
|
||||
if not force_truncate and \
|
||||
(self.numel() < PRINT_OPTS.threshold or print_full_mat):
|
||||
while firstColumn < self.size(1):
|
||||
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1)-1)
|
||||
lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1) - 1)
|
||||
if nColumnPerLine < self.size(1):
|
||||
strt += '\n' if firstColumn != 1 else ''
|
||||
strt += 'Columns {} to {} \n{}'.format(
|
||||
@ -223,15 +223,15 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False):
|
||||
strt += SCALE_FORMAT.format(scale)
|
||||
for l in _range(self.size(0)):
|
||||
strt += indent + (' ' if scale != 1 else '')
|
||||
row_slice = self[l, firstColumn:lastColumn+1]
|
||||
strt += ' '.join(fmt.format(val/scale) for val in row_slice)
|
||||
row_slice = self[l, firstColumn:lastColumn + 1]
|
||||
strt += ' '.join(fmt.format(val / scale) for val in row_slice)
|
||||
strt += '\n'
|
||||
firstColumn = lastColumn + 1
|
||||
else:
|
||||
if scale != 1:
|
||||
strt += SCALE_FORMAT.format(scale)
|
||||
if has_vdots and has_hdots:
|
||||
vdotfmt = "{:^" + str((sz+1)*n-1) + "}"
|
||||
vdotfmt = "{:^" + str((sz + 1) * n - 1) + "}"
|
||||
ddotfmt = u"{:^5}"
|
||||
for row in self[:n]:
|
||||
strt += __repr_row(row, indent, fmt, scale, sz, n)
|
||||
@ -269,13 +269,13 @@ def _vector_str(self):
|
||||
ident = ' '
|
||||
if self.numel() < PRINT_OPTS.threshold:
|
||||
return (strt +
|
||||
'\n'.join(ident + fmt.format(val/scale) for val in self) +
|
||||
'\n'.join(ident + fmt.format(val / scale) for val in self) +
|
||||
'\n')
|
||||
else:
|
||||
return (strt +
|
||||
'\n'.join(ident + fmt.format(val/scale) for val in self[:n]) +
|
||||
'\n'.join(ident + fmt.format(val / scale) for val in self[:n]) +
|
||||
'\n' + (ident + dotfmt.format(u"\u22EE")) +
|
||||
'\n'.join(ident + fmt.format(val/scale) for val in self[-n:]) +
|
||||
'\n'.join(ident + fmt.format(val / scale) for val in self[-n:]) +
|
||||
'\n')
|
||||
|
||||
|
||||
@ -295,4 +295,3 @@ def _str(self):
|
||||
strt += '[{} of size {}{}]\n'.format(torch.typename(self),
|
||||
size_str, device_str)
|
||||
return '\n' + strt
|
||||
|
||||
|
||||
@ -2,7 +2,9 @@ import threading
|
||||
import torch.cuda
|
||||
from .utils import THNN_H_PATH, THCUNN_H_PATH, parse_header, load_backend
|
||||
|
||||
|
||||
class Backends(object):
|
||||
|
||||
def __init__(self):
|
||||
self.backends = {}
|
||||
|
||||
@ -14,6 +16,7 @@ class Backends(object):
|
||||
|
||||
|
||||
class Backend(object):
|
||||
|
||||
def __init__(self, lib_prefix, lib_name, functions, mixins=tuple()):
|
||||
self.lib_prefix = lib_prefix
|
||||
self.lib_name = lib_name
|
||||
@ -37,6 +40,7 @@ class Backend(object):
|
||||
|
||||
|
||||
class THNNCudaBackendStateMixin(object):
|
||||
|
||||
@property
|
||||
def library_state(self):
|
||||
return torch.cuda._state_cdata
|
||||
@ -54,7 +58,10 @@ for t in ['Float', 'Double']:
|
||||
type2backend.backends['torch.{}Tensor'.format(t)] = backend
|
||||
type2backend.backends[getattr(torch, '{}Tensor'.format(t))] = backend
|
||||
|
||||
backend = Backend('Cuda', 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCudaBackend'] = backend
|
||||
type2backend.backends['torch.cuda.FloatTensor'] = backend
|
||||
type2backend.backends[torch.cuda.FloatTensor] = backend
|
||||
|
||||
for t in ['Half', '', 'Double']:
|
||||
backend = Backend('Cuda' + t, 'torch._thnn._THCUNN', _thcunn_headers, (THNNCudaBackendStateMixin,))
|
||||
type2backend.backends['THNNCuda{}Backend'.format(t)] = backend
|
||||
py_name = 'Float' if t == '' else t
|
||||
type2backend.backends['torch.cuda.{}Tensor'.format(py_name)] = backend
|
||||
type2backend.backends[getattr(torch.cuda, '{}Tensor'.format(py_name))] = backend
|
||||
|
||||
@ -12,6 +12,7 @@ def _unpickle_backend(backend_name):
|
||||
|
||||
|
||||
class THNNBackendBase(object):
|
||||
|
||||
def __init__(self):
|
||||
self.methods = {}
|
||||
|
||||
@ -33,6 +34,7 @@ class THNNBackendBase(object):
|
||||
|
||||
|
||||
class Function(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.arguments = []
|
||||
@ -46,6 +48,7 @@ class Function(object):
|
||||
|
||||
|
||||
class Argument(object):
|
||||
|
||||
def __init__(self, _type, name, is_optional):
|
||||
self.type = _type
|
||||
self.name = name
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
||||
import torch
|
||||
import importlib
|
||||
|
||||
|
||||
def _type(self, new_type=None, async=False):
|
||||
@ -21,6 +22,15 @@ def _type(self, new_type=None, async=False):
|
||||
new_type = _import_dotted_name(new_type)
|
||||
if new_type == type(self):
|
||||
return self
|
||||
if self.is_sparse:
|
||||
if not new_type.is_sparse:
|
||||
raise RuntimeError("Cannot cast sparse tensor to dense tensor")
|
||||
new_type_name = new_type.__module__ + '.' + new_type.__name__
|
||||
new_values_type_name = new_type_name.replace('.sparse', '')
|
||||
new_values = self.values().type(new_values_type_name, async)
|
||||
return new_type(self.indices(), new_values, self.size())
|
||||
if new_type.is_sparse:
|
||||
raise RuntimeError("Cannot cast dense tensor to sparse tensor")
|
||||
return new_type(self.size()).copy_(self, async)
|
||||
|
||||
|
||||
@ -39,16 +49,27 @@ def _cuda(self, device=None, async=False):
|
||||
if self.is_cuda:
|
||||
if device is None:
|
||||
device = torch.cuda.current_device()
|
||||
if self.get_device() != device:
|
||||
with torch.cuda.device(device):
|
||||
return type(self)(self.size()).copy_(self, async)
|
||||
else:
|
||||
if self.get_device() == device:
|
||||
return self
|
||||
else:
|
||||
if device is None:
|
||||
device = -1
|
||||
with torch.cuda.device(device):
|
||||
return self.type(getattr(torch.cuda, self.__class__.__name__), async)
|
||||
if self.is_sparse:
|
||||
new_type = getattr(torch.cuda.sparse, self.__class__.__name__)
|
||||
indices = self.indices().cuda(device, async)
|
||||
values = self.values().cuda(device, async)
|
||||
return new_type(indices, values, self.size())
|
||||
else:
|
||||
new_type = getattr(torch.cuda, self.__class__.__name__)
|
||||
return new_type(self.size()).copy_(self, async)
|
||||
|
||||
|
||||
def _rebuild_tensor(storage, storage_offset, size, stride):
|
||||
class_name = storage.__class__.__name__.replace('Storage', 'Tensor')
|
||||
module = importlib.import_module(storage.__module__)
|
||||
tensor_class = getattr(module, class_name)
|
||||
return tensor_class().set_(storage, storage_offset, size, stride)
|
||||
|
||||
|
||||
def _range(*args, **kwargs):
|
||||
|
||||
@ -9,9 +9,11 @@ import torch
|
||||
from .variable import Variable
|
||||
from .function import Function, NestedIOFunction
|
||||
from .stochastic_function import StochasticFunction
|
||||
from .gradcheck import gradcheck
|
||||
|
||||
__all__ = ['Variable', 'Function', 'StochasticFunction', 'backward']
|
||||
|
||||
|
||||
def backward(variables, grad_variables, retain_variables=False):
|
||||
"""Computes the sum of gradients of given variables w.r.t. graph leaves.
|
||||
|
||||
@ -28,7 +30,7 @@ def backward(variables, grad_variables, retain_variables=False):
|
||||
Arguments:
|
||||
variables (sequence of Variable): Variables of which the derivative will be
|
||||
computed.
|
||||
grad_variables (sequence of Variable): Gradients w.r.t. each element of
|
||||
grad_variables (sequence of Tensor): Gradients w.r.t. each element of
|
||||
corresponding variables. Required only for non-scalar variables that
|
||||
require gradient.
|
||||
retain_variables (bool): If ``True``, buffers necessary for computing
|
||||
|
||||
@ -5,4 +5,4 @@ from .reduce import *
|
||||
from .linalg import *
|
||||
from .blas import *
|
||||
from .stochastic import *
|
||||
|
||||
from .compare import *
|
||||
|
||||
@ -3,9 +3,16 @@ from ..function import Function, InplaceFunction
|
||||
import math
|
||||
|
||||
|
||||
def maybe_view(tensor, size):
|
||||
if tensor.size() == size:
|
||||
return tensor
|
||||
return tensor.contiguous().view(size)
|
||||
|
||||
|
||||
class Add(InplaceFunction):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
if self.inplace:
|
||||
self.mark_dirty(a)
|
||||
return a.add_(b)
|
||||
@ -13,12 +20,13 @@ class Add(InplaceFunction):
|
||||
return a.add(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
return grad_output, grad_output
|
||||
return grad_output, maybe_view(grad_output, self.b_size)
|
||||
|
||||
|
||||
class Sub(InplaceFunction):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
if self.inplace:
|
||||
self.mark_dirty(a)
|
||||
return a.sub_(b)
|
||||
@ -26,40 +34,43 @@ class Sub(InplaceFunction):
|
||||
return a.sub(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
return grad_output, grad_output.neg()
|
||||
return grad_output, maybe_view(grad_output.neg(), self.b_size)
|
||||
|
||||
|
||||
class Mul(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.mul(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.mul(b), grad_output.mul(a)
|
||||
return grad_output.mul(b), maybe_view(grad_output.mul(a), self.b_size)
|
||||
|
||||
|
||||
class Div(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.div(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.div(b), grad_output.neg().mul(a).div_(b).div_(b)
|
||||
return grad_output.div(b), maybe_view(grad_output.neg().mul(a).div_(b).div_(b), self.b_size)
|
||||
|
||||
|
||||
class Pow(Function):
|
||||
|
||||
def forward(self, a, b):
|
||||
self.b_size = b.size()
|
||||
self.save_for_backward(a, b)
|
||||
return a.pow(b)
|
||||
|
||||
def backward(self, grad_output):
|
||||
a, b = self.saved_tensors
|
||||
return grad_output.mul(b).mul_(a.pow(b-1)), grad_output.mul(a.pow(b)).mul_(a.log())
|
||||
return grad_output.mul(b).mul_(a.pow(b - 1)), maybe_view(grad_output.mul(a.pow(b)).mul_(a.log()), self.b_size)
|
||||
|
||||
|
||||
class AddConstant(InplaceFunction):
|
||||
@ -174,7 +185,7 @@ class PowConstant(Function):
|
||||
return grad_output.mul(self.fw_result).mul_(math.log(self.constant))
|
||||
else:
|
||||
a = self.saved_tensors[0]
|
||||
return grad_output.mul(self.constant).mul_(a.pow(self.constant-1))
|
||||
return grad_output.mul(self.constant).mul_(a.pow(self.constant - 1))
|
||||
|
||||
|
||||
class Negate(InplaceFunction):
|
||||
|
||||
@ -168,7 +168,7 @@ class Addr(_BlasBase):
|
||||
|
||||
if self.needs_input_grad[2]:
|
||||
# TODO: maybe it's better to do transpose + mv + transpose
|
||||
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output)
|
||||
grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
|
||||
if self.beta != 1:
|
||||
grad_vector2 *= self.beta
|
||||
|
||||
@ -192,11 +192,3 @@ class Dot(Function):
|
||||
grad_vector2 = vector1.mul(grad_output[0])
|
||||
|
||||
return grad_vector1, grad_vector2
|
||||
|
||||
|
||||
# TODO: cross
|
||||
# TODO: diag
|
||||
# TODO: trace
|
||||
# TODO: tril
|
||||
# TODO: triu
|
||||
|
||||
|
||||
40
torch/autograd/_functions/compare.py
Normal file
40
torch/autograd/_functions/compare.py
Normal file
@ -0,0 +1,40 @@
|
||||
import torch
|
||||
|
||||
from ..function import Function
|
||||
|
||||
|
||||
class _CompareOp(Function):
|
||||
|
||||
def __init__(self, scalar=None):
|
||||
super(_CompareOp, self).__init__()
|
||||
self.scalar = scalar
|
||||
|
||||
def forward(self, tensor1, tensor2=None):
|
||||
other = tensor2 if tensor2 is not None else self.scalar
|
||||
mask = getattr(tensor1, self.fn_name)(other)
|
||||
self.mark_non_differentiable(mask)
|
||||
return mask
|
||||
|
||||
|
||||
class Eq(_CompareOp):
|
||||
fn_name = 'eq'
|
||||
|
||||
|
||||
class Ne(_CompareOp):
|
||||
fn_name = 'ne'
|
||||
|
||||
|
||||
class Gt(_CompareOp):
|
||||
fn_name = 'gt'
|
||||
|
||||
|
||||
class Ge(_CompareOp):
|
||||
fn_name = 'ge'
|
||||
|
||||
|
||||
class Lt(_CompareOp):
|
||||
fn_name = 'lt'
|
||||
|
||||
|
||||
class Le(_CompareOp):
|
||||
fn_name = 'le'
|
||||
@ -41,5 +41,31 @@ class Triu(Function):
|
||||
def backward(self, grad_output):
|
||||
return grad_output.triu(self.diagonal_idx)
|
||||
|
||||
# TODO: trace
|
||||
|
||||
class Trace(Function):
|
||||
|
||||
def forward(self, input):
|
||||
self.isize = input.size()
|
||||
return input.new((input.trace(),))
|
||||
|
||||
def backward(self, grad_output):
|
||||
isize = self.isize
|
||||
grad_input = grad_output.new(isize).zero_()
|
||||
grad_input.view(-1)[::(isize[1] + 1)] = grad_output[0]
|
||||
return grad_input
|
||||
|
||||
|
||||
class Cross(Function):
|
||||
|
||||
def __init__(self, dim=-1):
|
||||
self.dim = dim
|
||||
|
||||
def forward(self, input, other):
|
||||
self.save_for_backward(input, other)
|
||||
return torch.cross(input, other, self.dim)
|
||||
|
||||
def backward(self, grad_output):
|
||||
input, other = self.saved_tensors
|
||||
grad_input = torch.cross(other, grad_output, self.dim)
|
||||
grad_other = torch.cross(grad_output, input, self.dim)
|
||||
return grad_input, grad_other
|
||||
|
||||
@ -165,6 +165,7 @@ class Tan(Function):
|
||||
|
||||
|
||||
class Asin(Function):
|
||||
|
||||
def forward(self, i):
|
||||
self.save_for_backward(i)
|
||||
return i.asin()
|
||||
@ -175,6 +176,7 @@ class Asin(Function):
|
||||
|
||||
|
||||
class Acos(Function):
|
||||
|
||||
def forward(self, i):
|
||||
self.save_for_backward(i)
|
||||
return i.acos()
|
||||
@ -185,6 +187,7 @@ class Acos(Function):
|
||||
|
||||
|
||||
class Atan(Function):
|
||||
|
||||
def forward(self, i):
|
||||
self.save_for_backward(i)
|
||||
return i.atan()
|
||||
|
||||
@ -4,6 +4,7 @@ from ..function import Function
|
||||
|
||||
|
||||
class _DimReduceFunction(Function):
|
||||
|
||||
def __init__(self, dim=None):
|
||||
super(_DimReduceFunction, self).__init__()
|
||||
self.dim = dim
|
||||
@ -45,13 +46,45 @@ class Prod(_DimReduceFunction):
|
||||
def backward(self, grad_output):
|
||||
if self.dim is None:
|
||||
input, = self.saved_tensors
|
||||
grad_input = grad_output.new(self.input_size).fill_(self.result)
|
||||
return grad_input.div(input)
|
||||
zero_idx = (input == 0).nonzero()
|
||||
if zero_idx.dim() == 0:
|
||||
return grad_output.mul(self.result).expand_as(input).div(input)
|
||||
elif zero_idx.size(0) > 1:
|
||||
return grad_output.new(self.input_size).zero_()
|
||||
else:
|
||||
grad_input = grad_output.new(self.input_size).zero_()
|
||||
zero_idx = tuple(zero_idx[0].cpu())
|
||||
input_copy = input.clone()
|
||||
input_copy[zero_idx] = 1.
|
||||
grad_input[zero_idx] = grad_output[0] * input_copy.prod()
|
||||
return grad_input
|
||||
else:
|
||||
input, output = self.saved_tensors
|
||||
repeats = [1 for _ in self.input_size]
|
||||
repeats[self.dim] = self.input_size[self.dim]
|
||||
return output.mul(grad_output).repeat(*repeats).div_(input)
|
||||
zero_mask = input == 0
|
||||
slice_zero_count = zero_mask.sum(self.dim)
|
||||
total_zeros = slice_zero_count.sum()
|
||||
grad_input = grad_output.mul(output).expand_as(input).div(input)
|
||||
if total_zeros == 0:
|
||||
return grad_input
|
||||
|
||||
some_zeros = slice_zero_count.gt(0).expand_as(grad_input)
|
||||
grad_input[some_zeros] = 0
|
||||
|
||||
single_zero_idx = slice_zero_count.eq(1).nonzero()
|
||||
for idx in single_zero_idx:
|
||||
idx_tuple = tuple(idx.cpu())
|
||||
input_idx_tuple = idx_tuple[:self.dim] + (slice(0, None),) + idx_tuple[self.dim + 1:]
|
||||
|
||||
# slice_mask and input_copy are 1D
|
||||
slice_mask = zero_mask[input_idx_tuple]
|
||||
input_copy = input[input_idx_tuple].clone()
|
||||
zero_idx = slice_mask.nonzero()[0, 0]
|
||||
input_copy[zero_idx] = 1.
|
||||
|
||||
grad_idx_tuple = idx_tuple[:self.dim] + (zero_idx,) + idx_tuple[self.dim + 1:]
|
||||
grad_input[grad_idx_tuple] = grad_output[idx_tuple] * input_copy.prod()
|
||||
|
||||
return grad_input
|
||||
|
||||
|
||||
class Mean(_DimReduceFunction):
|
||||
@ -139,6 +172,7 @@ class Kthvalue(_SelectionFunction):
|
||||
|
||||
|
||||
class Norm(Function):
|
||||
|
||||
def __init__(self, norm_type=2, dim=None):
|
||||
super(Norm, self).__init__()
|
||||
self.norm_type = norm_type
|
||||
|
||||
@ -83,9 +83,9 @@ class Normal(StochasticFunction):
|
||||
stddevs_cb = stddevs_sq * stddevs
|
||||
stddevs_sq += 1e-6
|
||||
stddevs_cb += 1e-6
|
||||
grad_stddevs = (grad_means * grad_means) / stddevs_cb
|
||||
grad_stddevs = (stddevs - grad_stddevs) * reward
|
||||
grad_stddevs = (stddevs_sq - (grad_means * grad_means))
|
||||
grad_stddevs /= stddevs_cb
|
||||
grad_stddevs *= reward
|
||||
grad_means /= stddevs_sq
|
||||
grad_means *= reward
|
||||
return grad_means, grad_stddevs
|
||||
|
||||
|
||||
@ -18,9 +18,8 @@ class Index(Function):
|
||||
return result
|
||||
|
||||
def backward(self, grad_output):
|
||||
# TODO: this won't have to be zeroed
|
||||
grad_input = grad_output.new(self.input_size).zero_()
|
||||
grad_input.index(self.index).copy_(grad_output)
|
||||
grad_input._set_index(self.index, grad_output)
|
||||
return grad_input
|
||||
|
||||
|
||||
@ -33,20 +32,23 @@ class SetItem(InplaceFunction):
|
||||
|
||||
def forward(self, i, value=None):
|
||||
self.mark_dirty(i)
|
||||
if value is None:
|
||||
if value is None: # value is scalar
|
||||
value = self.value
|
||||
i.set_index(self.index, value)
|
||||
else: # value is Tensor
|
||||
self.value_size = value.size()
|
||||
i._set_index(self.index, value)
|
||||
return i
|
||||
|
||||
def backward(self, grad_output):
|
||||
if self.value is None:
|
||||
if self.value is None: # value is Tensor
|
||||
grad_input = grad_output.clone()
|
||||
grad_input.set_index(self.index, 0)
|
||||
grad_input._set_index(self.index, 0)
|
||||
grad_value = grad_output.index(self.index).clone()
|
||||
grad_value = grad_value.view(self.value_size)
|
||||
return grad_input, grad_value
|
||||
else:
|
||||
grad_input = grad_output.clone()
|
||||
grad_input.set_index(self.index, 0)
|
||||
grad_input._set_index(self.index, 0)
|
||||
return grad_input
|
||||
|
||||
|
||||
@ -99,25 +101,29 @@ class View(Function):
|
||||
|
||||
def backward(self, grad_output):
|
||||
# TODO: not sure if this clone is necessary
|
||||
return grad_output.clone().view(self.input_size)
|
||||
return grad_output.contiguous().view(self.input_size)
|
||||
|
||||
|
||||
class Expand(Function):
|
||||
|
||||
def __init__(self, sizes):
|
||||
super(Expand, self).__init__()
|
||||
self.sizes = sizes
|
||||
self.expanded_dims = []
|
||||
|
||||
def forward(self, i):
|
||||
self.expanded_dims = [dim for dim, (expanded, original)
|
||||
in enumerate(zip(self.sizes, i.size()))
|
||||
if expanded != original]
|
||||
result = i.expand(*self.sizes)
|
||||
self.num_unsqueezed = len(self.sizes) - i.dim()
|
||||
self.expanded_dims = [dim for dim, (expanded, original)
|
||||
in enumerate(zip(self.sizes[self.num_unsqueezed:], i.size()))
|
||||
if expanded != original]
|
||||
self.mark_shared_storage((i, result))
|
||||
return result
|
||||
|
||||
def backward(self, grad_output):
|
||||
grad_input = grad_output
|
||||
for i in range(self.num_unsqueezed):
|
||||
grad_input = grad_input.sum(0).squeeze(0)
|
||||
for dim in self.expanded_dims:
|
||||
grad_input = grad_input.sum(dim)
|
||||
return grad_input
|
||||
@ -288,7 +294,7 @@ class IndexSelect(Function):
|
||||
if self.needs_input_grad[0]:
|
||||
index, = self.saved_tensors
|
||||
grad_tensor = grad_output.new(*self.input_size).zero_()
|
||||
grad_tensor.index_copy_(self.dim, index, grad_output)
|
||||
grad_tensor.index_add_(self.dim, index, grad_output)
|
||||
|
||||
return grad_tensor, None
|
||||
|
||||
@ -304,7 +310,7 @@ class Concat(Function):
|
||||
return torch.cat(inputs, self.dim)
|
||||
|
||||
def backward(self, grad_output):
|
||||
return tuple(grad_output.narrow(self.dim, end-size, size) for size, end
|
||||
return tuple(grad_output.narrow(self.dim, end - size, size) for size, end
|
||||
in zip(self.input_sizes, _accumulate(self.input_sizes)))
|
||||
|
||||
|
||||
@ -474,7 +480,7 @@ class _MultiSelectionFunction(Function):
|
||||
|
||||
class Sort(_MultiSelectionFunction):
|
||||
|
||||
def __init__(self, dim=None, descending=False, return_indices=False):
|
||||
def __init__(self, dim=None, descending=False, return_indices=True):
|
||||
super(Sort, self).__init__(dim, return_indices)
|
||||
self.descending = descending
|
||||
|
||||
@ -486,14 +492,14 @@ class Sort(_MultiSelectionFunction):
|
||||
|
||||
class Topk(_MultiSelectionFunction):
|
||||
|
||||
def __init__(self, k, dim=None, largest=True, sort=True, return_indices=False):
|
||||
def __init__(self, k, dim=None, largest=True, sort=True, return_indices=True):
|
||||
super(Topk, self).__init__(dim, return_indices)
|
||||
self.k = k
|
||||
self.largest = largest
|
||||
self.sort = sort
|
||||
|
||||
def forward(self, input):
|
||||
dim = self.dim if self.dim is not None else input.dim()-1
|
||||
dim = self.dim if self.dim is not None else input.dim() - 1
|
||||
self.args = (self.k, dim, self.largest, self.sort)
|
||||
return super(Topk, self).forward(input)
|
||||
|
||||
@ -567,9 +573,41 @@ class Scatter(InplaceFunction):
|
||||
return grad_input, None, grad_source
|
||||
|
||||
|
||||
# TODO: kthvalue
|
||||
# TODO: repeat
|
||||
# TODO: sort
|
||||
# TODO: split
|
||||
# TODO: topk
|
||||
class Repeat(Function):
|
||||
|
||||
def __init__(self, repeats):
|
||||
super(Repeat, self).__init__()
|
||||
self.repeats = repeats
|
||||
|
||||
def forward(self, input):
|
||||
return input.repeat(self.repeats)
|
||||
|
||||
def backward(self, grad_output):
|
||||
grad_input = grad_output
|
||||
for dim, repeat in enumerate(self.repeats):
|
||||
if repeat == 1:
|
||||
continue
|
||||
grad_input = sum(grad_input.chunk(repeat, dim))
|
||||
return grad_input
|
||||
|
||||
|
||||
class Cumsum(Function):
|
||||
|
||||
def __init__(self, dim):
|
||||
super(Cumsum, self).__init__()
|
||||
self.dim = dim
|
||||
|
||||
def forward(self, input):
|
||||
return torch.cumsum(input, dim=self.dim)
|
||||
|
||||
def backward(self, grad_output):
|
||||
grad_input = torch.cumsum(-grad_output, dim=self.dim)
|
||||
|
||||
end_idx = grad_input.size(self.dim) - 1
|
||||
grad_sum = grad_input.narrow(self.dim, end_idx, 1)
|
||||
grad_input -= grad_sum.expand_as(grad_input)
|
||||
grad_input += grad_output
|
||||
return grad_input
|
||||
|
||||
|
||||
# TODO: unfold
|
||||
|
||||
@ -2,7 +2,6 @@ import torch
|
||||
import torch._C as _C
|
||||
import torch.utils.hooks as hooks
|
||||
from collections import OrderedDict
|
||||
from itertools import chain
|
||||
|
||||
|
||||
class Function(_C._FunctionBase):
|
||||
@ -98,21 +97,22 @@ class Function(_C._FunctionBase):
|
||||
**This should be called at most once, only from inside the**
|
||||
:func:`forward` **method, and all arguments should be outputs.**
|
||||
|
||||
This will mark outputs as non requiring gradient, increasing the
|
||||
This will mark outputs as not requiring gradients, increasing the
|
||||
efficiency of backward computation. You still need to accept a gradient
|
||||
for this output in :meth:`~Function.backward`, but it's always going to
|
||||
for each output in :meth:`~Function.backward`, but it's always going to
|
||||
be ``None``.
|
||||
|
||||
This is used e.g. for indices returned from a max :class:`Function`.
|
||||
"""
|
||||
self.non_differentiable = args
|
||||
|
||||
def register_hook(self, hook):
|
||||
if self._backward_hooks is None:
|
||||
self._backward_hooks = OrderedDict()
|
||||
handle = hooks.RemovableHandle(self._backward_hooks)
|
||||
self._backward_hooks[id(handle)] = hook
|
||||
return handle
|
||||
@staticmethod
|
||||
def _register_hook(backward_hooks, hook):
|
||||
if backward_hooks is None:
|
||||
backward_hooks = OrderedDict()
|
||||
handle = hooks.RemovableHandle(backward_hooks)
|
||||
backward_hooks[handle.id] = hook
|
||||
return backward_hooks, handle
|
||||
|
||||
def forward(self, *input):
|
||||
"""Performs the operation.
|
||||
@ -157,6 +157,7 @@ def _nested_map(condition, fn):
|
||||
"an input object of type " + torch.typename(obj))
|
||||
return _map
|
||||
|
||||
|
||||
def _iter_filter(condition):
|
||||
def _iter(obj):
|
||||
if condition(obj):
|
||||
@ -173,13 +174,25 @@ def _iter_filter(condition):
|
||||
return _iter
|
||||
|
||||
|
||||
def _unflatten(input, proto):
|
||||
# unflatten a list or tuple input into a nested list/tuple structure
|
||||
# specified by proto
|
||||
def unflatten_helper(input, proto):
|
||||
res = []
|
||||
if not isinstance(proto, (list, tuple)):
|
||||
return input[0], input[1:]
|
||||
for e in proto:
|
||||
res_e, input = unflatten_helper(input, e)
|
||||
res.append(res_e)
|
||||
return type(proto)(res), input
|
||||
|
||||
return unflatten_helper(input, proto)[0]
|
||||
|
||||
_iter_variables = _iter_filter(lambda o: isinstance(o, torch.autograd.Variable))
|
||||
_iter_tensors = _iter_filter(torch.is_tensor)
|
||||
_iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o))
|
||||
_map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data)
|
||||
|
||||
def _map_tensor_fromiter(itr):
|
||||
return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr))
|
||||
|
||||
class NestedIOFunction(Function):
|
||||
|
||||
@ -188,14 +201,20 @@ class NestedIOFunction(Function):
|
||||
flat_input = tuple(_iter_variables(input))
|
||||
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
|
||||
nested_output = self._nested_output
|
||||
nested_variables = _map_tensor_fromiter(iter(flat_output))(self._nested_output)
|
||||
nested_variables = _unflatten(flat_output, self._nested_output)
|
||||
return nested_variables
|
||||
|
||||
def backward(self, *gradients):
|
||||
nested_gradients = _map_tensor_fromiter(iter(gradients))(self._nested_output)
|
||||
def _do_backward(self, gradients, retain_variables):
|
||||
self.retain_variables = retain_variables
|
||||
result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
|
||||
if not retain_variables:
|
||||
del self._nested_output
|
||||
result = self.backward_extended(*nested_gradients)
|
||||
del self._to_save_nested
|
||||
return result
|
||||
|
||||
def backward(self, *gradients):
|
||||
nested_gradients = _unflatten(gradients, self._nested_output)
|
||||
result = self.backward_extended(*nested_gradients)
|
||||
return tuple(_iter_None_tensors(result))
|
||||
|
||||
__call__ = _do_forward
|
||||
@ -214,7 +233,7 @@ class NestedIOFunction(Function):
|
||||
@property
|
||||
def saved_tensors(self):
|
||||
flat_tensors = super(NestedIOFunction, self).saved_tensors
|
||||
return _map_tensor_fromiter(iter(flat_tensors))(self._to_save_nested)
|
||||
return _unflatten(flat_tensors, self._to_save_nested)
|
||||
|
||||
def mark_dirty(self, *args, **kwargs):
|
||||
self.dirty_tensors = tuple(_iter_tensors((args, kwargs)))
|
||||
|
||||
160
torch/autograd/gradcheck.py
Normal file
160
torch/autograd/gradcheck.py
Normal file
@ -0,0 +1,160 @@
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
def iter_gradients(x):
|
||||
if isinstance(x, Variable):
|
||||
if x.requires_grad:
|
||||
yield x.grad.data if x.grad is not None else None
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_gradients(elem):
|
||||
yield result
|
||||
|
||||
|
||||
def zero_gradients(i):
|
||||
for t in iter_gradients(i):
|
||||
if t is not None:
|
||||
t.zero_()
|
||||
|
||||
|
||||
def make_jacobian(input, num_out):
|
||||
if isinstance(input, Variable) and not input.requires_grad:
|
||||
return None
|
||||
if torch.is_tensor(input) or isinstance(input, Variable):
|
||||
return torch.zeros(input.nelement(), num_out)
|
||||
else:
|
||||
return type(input)(filter(lambda x: x is not None,
|
||||
(make_jacobian(elem, num_out) for elem in input)))
|
||||
|
||||
|
||||
def iter_tensors(x, only_requiring_grad=False):
|
||||
if torch.is_tensor(x):
|
||||
yield x
|
||||
elif isinstance(x, Variable):
|
||||
if x.requires_grad or not only_requiring_grad:
|
||||
yield x.data
|
||||
else:
|
||||
for elem in x:
|
||||
for result in iter_tensors(elem, only_requiring_grad):
|
||||
yield result
|
||||
|
||||
|
||||
def contiguous(input):
|
||||
if torch.is_tensor(input):
|
||||
return input.contiguous()
|
||||
elif isinstance(input, Variable):
|
||||
return input.contiguous()
|
||||
else:
|
||||
return type(input)(contiguous(e) for e in input)
|
||||
|
||||
|
||||
def get_numerical_jacobian(fn, input, target, eps=1e-3):
|
||||
# To be able to use .view(-1) input must be contiguous
|
||||
input = contiguous(input)
|
||||
output_size = fn(input).numel()
|
||||
jacobian = make_jacobian(target, output_size)
|
||||
|
||||
# It's much easier to iterate over flattened lists of tensors.
|
||||
# These are reference to the same objects in jacobian, so any changes
|
||||
# will be reflected in it as well.
|
||||
x_tensors = [t for t in iter_tensors(target, True)]
|
||||
j_tensors = [t for t in iter_tensors(jacobian)]
|
||||
|
||||
outa = torch.DoubleTensor(output_size)
|
||||
outb = torch.DoubleTensor(output_size)
|
||||
|
||||
# TODO: compare structure
|
||||
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
|
||||
flat_tensor = x_tensor.view(-1)
|
||||
for i in range(flat_tensor.nelement()):
|
||||
orig = flat_tensor[i]
|
||||
flat_tensor[i] = orig - eps
|
||||
outa.copy_(fn(input))
|
||||
flat_tensor[i] = orig + eps
|
||||
outb.copy_(fn(input))
|
||||
flat_tensor[i] = orig
|
||||
|
||||
outb.add_(-1, outa).div_(2 * eps)
|
||||
d_tensor[i] = outb
|
||||
|
||||
return jacobian
|
||||
|
||||
|
||||
def get_analytical_jacobian(input, output):
|
||||
jacobian = make_jacobian(input, output.numel())
|
||||
grad_output = output.data.clone().zero_()
|
||||
flat_grad_output = grad_output.view(-1)
|
||||
|
||||
for i in range(flat_grad_output.numel()):
|
||||
flat_grad_output.zero_()
|
||||
flat_grad_output[i] = 1
|
||||
zero_gradients(input)
|
||||
output.backward(grad_output, retain_variables=True)
|
||||
for jacobian_x, d_x in zip(jacobian, iter_gradients(input)):
|
||||
if d_x is None:
|
||||
jacobian_x[:, i].zero_()
|
||||
else:
|
||||
jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
|
||||
|
||||
return jacobian
|
||||
|
||||
|
||||
def _as_tuple(x):
|
||||
if isinstance(x, tuple):
|
||||
return x
|
||||
elif isinstance(x, list):
|
||||
return tuple(x)
|
||||
else:
|
||||
return x,
|
||||
|
||||
|
||||
def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3):
|
||||
"""Check gradients computed via small finite differences
|
||||
against analytical gradients
|
||||
|
||||
The check between numerical and analytical has the same behaviour as
|
||||
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
|
||||
meaning it check that
|
||||
absolute(a - n) <= (atol + rtol * absolute(n))
|
||||
is true for all elements of analytical jacobian a and numerical jacobian n.
|
||||
|
||||
Args:
|
||||
func: Python function that takes Variable inputs and returns
|
||||
a tuple of Variables
|
||||
inputs: tuple of Variables
|
||||
eps: perturbation for finite differences
|
||||
atol: absolute tolerance
|
||||
rtol: relative tolerance
|
||||
|
||||
Returns:
|
||||
True if all differences satisfy allclose condition
|
||||
"""
|
||||
output = func(*inputs)
|
||||
output = _as_tuple(output)
|
||||
|
||||
for i, o in enumerate(output):
|
||||
if not o.requires_grad:
|
||||
continue
|
||||
|
||||
def fn(input):
|
||||
return _as_tuple(func(*input))[i].data
|
||||
|
||||
numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
|
||||
analytical = get_analytical_jacobian(_as_tuple(inputs), o)
|
||||
|
||||
for a, n in zip(analytical, numerical):
|
||||
if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
|
||||
return False
|
||||
|
||||
# check if the backward multiplies by grad_output
|
||||
zero_gradients(inputs)
|
||||
output = _as_tuple(func(*inputs))
|
||||
torch.autograd.backward(output, [o.data.new(o.size()).zero_() for o in output])
|
||||
for i in inputs:
|
||||
if i.grad is None:
|
||||
continue
|
||||
if not i.grad.data.eq(0).all():
|
||||
return False
|
||||
|
||||
return True
|
||||
@ -1,7 +1,10 @@
|
||||
import torch
|
||||
from numbers import Number
|
||||
from .function import Function
|
||||
|
||||
_NOT_PROVIDED = object()
|
||||
|
||||
|
||||
class StochasticFunction(Function):
|
||||
|
||||
def __init__(self):
|
||||
@ -16,6 +19,26 @@ class StochasticFunction(Function):
|
||||
self.reward = None
|
||||
return result
|
||||
|
||||
def _reinforce(self, reward):
|
||||
self.reward = reward
|
||||
def _do_forward(self, *inputs):
|
||||
result = super(StochasticFunction, self)._do_forward(*inputs)
|
||||
# save output type and size, to check the type of reward
|
||||
assert isinstance(result, torch.autograd.Variable), \
|
||||
"stochastic functions support only a single output at the moment"
|
||||
self.reward_info = (type(inputs[0].data), result.size())
|
||||
return result
|
||||
|
||||
__call__ = _do_forward
|
||||
|
||||
def _reinforce(self, reward):
|
||||
is_number = isinstance(reward, Number)
|
||||
if not is_number and type(reward) != self.reward_info[0]:
|
||||
raise TypeError("mismatch between reward and output type: got {}, "
|
||||
"but expected {}".format(torch.typename(reward),
|
||||
torch.typename(self.reward_info[0])))
|
||||
if not is_number and reward.size() != self.reward_info[1]:
|
||||
raise ValueError("got reward of size {}, but expected a tensor of size {}".format(
|
||||
'x'.join(map(str, reward.size())),
|
||||
'x'.join(map(str, self.reward_info[1]))))
|
||||
if self.reward is not _NOT_PROVIDED:
|
||||
raise RuntimeError("you can only reinforce a stochastic Function once")
|
||||
self.reward = reward
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import torch._C as _C
|
||||
from collections import OrderedDict
|
||||
import torch.sparse as sparse
|
||||
import torch.utils.hooks as hooks
|
||||
|
||||
from ._functions import *
|
||||
@ -56,30 +57,6 @@ class Variable(_C._VariableBase):
|
||||
'is_cuda',
|
||||
}
|
||||
|
||||
@property
|
||||
def grad(self):
|
||||
if self.requires_grad and self._grad is None:
|
||||
# TODO: this won't have to be zeroed in the future
|
||||
self._grad = Variable(self.data.new(self.data.size()).zero_())
|
||||
return self._grad
|
||||
|
||||
@property
|
||||
def requires_grad(self):
|
||||
return self._requires_grad
|
||||
|
||||
@requires_grad.setter
|
||||
def requires_grad(self, value):
|
||||
if self.creator is not None:
|
||||
if value is False:
|
||||
hint = (" If you want to use a computed variable in a subgraph "
|
||||
"that doesn't require differentiation use "
|
||||
"var_no_grad = var.detach().")
|
||||
else:
|
||||
hint = ''
|
||||
raise RuntimeError("you can only change requires_grad flags of "
|
||||
"leaf variables." + hint)
|
||||
self._requires_grad = value
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._fallthrough_methods:
|
||||
return getattr(self.data, name)
|
||||
@ -108,19 +85,30 @@ class Variable(_C._VariableBase):
|
||||
if self.creator is not None:
|
||||
raise RuntimeError("Only Variables created explicitly by the user "
|
||||
"(graph leaves) support the deepcopy protocol at the moment")
|
||||
result = type(self)(self.data.clone(), requires_grad=self.requires_grad,
|
||||
volatile=self.volatile)
|
||||
result = type(self)(self.data.clone())
|
||||
result.requires_grad = self.requires_grad
|
||||
result.volatile = self.volatile
|
||||
memo[id(self)] = result
|
||||
return result
|
||||
|
||||
def __reduce_ex__(self, proto):
|
||||
state = (self.requires_grad, self.volatile, self._backward_hooks)
|
||||
if proto > 1:
|
||||
return super(Variable, self).__reduce_ex__(proto)
|
||||
return type(self), (self.data,), state
|
||||
if sys.version_info[0] == 2:
|
||||
from copy_reg import __newobj__
|
||||
else:
|
||||
from copyreg import __newobj__
|
||||
return __newobj__, (type(self),), self.__getstate__()
|
||||
return __newobj__, (type(self), self.data), state
|
||||
|
||||
def __setstate__(self, state):
|
||||
if len(state) == 5:
|
||||
# legacy serialization of Variable
|
||||
self.data = state[0]
|
||||
state = (state[3], state[4], state[2])
|
||||
if self.creator is not None:
|
||||
raise RuntimeError('__setstate__ can be only called on leaf variables')
|
||||
self.requires_grad, self.volatile, self._backward_hooks = state
|
||||
|
||||
def __repr__(self):
|
||||
return 'Variable containing:' + self.data.__repr__()
|
||||
@ -131,7 +119,7 @@ class Variable(_C._VariableBase):
|
||||
The graph is differentiated using the chain rule. If the variable is
|
||||
non-scalar (i.e. its data has more than one element) and requires
|
||||
gradient, the function additionaly requires specifying ``gradient``.
|
||||
It should be a tensor of matching type and location, that containins
|
||||
It should be a tensor of matching type and location, that contains
|
||||
the gradient of the differentiated function w.r.t. ``self``.
|
||||
|
||||
This function accumulates gradients in the leaves - you might need to zero
|
||||
@ -151,7 +139,9 @@ class Variable(_C._VariableBase):
|
||||
raise RuntimeError('calling backward on a volatile variable')
|
||||
if gradient is None and self.requires_grad:
|
||||
if self.data.numel() != 1:
|
||||
raise RuntimeError('backward should be called only on a scalar (i.e. 1-element tensor) or with gradient w.r.t. the variable')
|
||||
raise RuntimeError(
|
||||
'backward should be called only on a scalar (i.e. 1-element tensor) '
|
||||
'or with gradient w.r.t. the variable')
|
||||
gradient = self.data.new().resize_as_(self.data).fill_(1)
|
||||
self._execution_engine.run_backward((self,), (gradient,), retain_variables)
|
||||
|
||||
@ -161,7 +151,7 @@ class Variable(_C._VariableBase):
|
||||
The hook will be called every time a gradient with respect to the
|
||||
variable is computed. The hook should have the following signature::
|
||||
|
||||
hook(grad) -> Tensor or None
|
||||
hook(grad) -> Variable or None
|
||||
|
||||
The hook should not modify its argument, but it can optionally return
|
||||
a new gradient which will be used in place of :attr:`grad`.
|
||||
@ -190,22 +180,9 @@ class Variable(_C._VariableBase):
|
||||
if self.creator is not None:
|
||||
self.creator._register_hook_dict(self)
|
||||
handle = hooks.RemovableHandle(self._backward_hooks)
|
||||
self._backward_hooks[id(handle)] = hook
|
||||
self._backward_hooks[handle.id] = hook
|
||||
return handle
|
||||
|
||||
def _do_backward(self, grad_output, retain_variables):
|
||||
assert len(grad_output) == 1
|
||||
assert self._version == 0 and self.creator is None, \
|
||||
"leaf variable was used in an inplace operation"
|
||||
unpacked_grad = grad_output[0]
|
||||
if self._backward_hooks:
|
||||
for hook in self._backward_hooks.values():
|
||||
result = hook(unpacked_grad)
|
||||
if result is not None:
|
||||
unpacked_grad = result
|
||||
self.grad.data.add_(unpacked_grad)
|
||||
return tuple()
|
||||
|
||||
def reinforce(self, reward):
|
||||
"""Registers a reward obtained as a result of a stochastic process.
|
||||
|
||||
@ -223,8 +200,25 @@ class Variable(_C._VariableBase):
|
||||
self.creator._reinforce(reward)
|
||||
|
||||
def detach(self):
|
||||
"""Detaches the Variable from the graph that created it."""
|
||||
return NoGrad()(self)
|
||||
"""Returns a new Variable, detached from the current graph.
|
||||
|
||||
Result will never require gradient. If the input is volatile, the output
|
||||
will be volatile too.
|
||||
|
||||
.. note::
|
||||
|
||||
Returned Variable uses the same data tensor, as the original one, and
|
||||
in-place modifications on either of them will be seen, and may trigger
|
||||
errors in correctness checks.
|
||||
"""
|
||||
result = NoGrad()(self) # this is needed, because it merges version counters
|
||||
result._creator = None
|
||||
return result
|
||||
|
||||
def detach_(self):
|
||||
"""Detaches the Variable from the graph that created it, making it a leaf."""
|
||||
self._creator = None
|
||||
self.requires_grad = False
|
||||
|
||||
def contiguous(self):
|
||||
self.data = self.data.contiguous()
|
||||
@ -424,12 +418,6 @@ class Variable(_C._VariableBase):
|
||||
def trunc(self):
|
||||
return Trunc()(self)
|
||||
|
||||
def floor(self):
|
||||
return Floor()(self)
|
||||
|
||||
def ceil(self):
|
||||
return Ceil()(self)
|
||||
|
||||
def fmod(self, value):
|
||||
return Fmod(value)(self)
|
||||
|
||||
@ -482,6 +470,40 @@ class Variable(_C._VariableBase):
|
||||
def view_as(self, tensor):
|
||||
return View(*tensor.size())(self)
|
||||
|
||||
def split(self, split_size, dim=0):
|
||||
return torch.split(self, split_size, dim)
|
||||
|
||||
def repeat(self, *repeats):
|
||||
if len(repeats) == 1 and isinstance(repeats[0], torch.Size):
|
||||
repeats = repeats[0]
|
||||
else:
|
||||
repeats = torch.Size(repeats)
|
||||
return Repeat(repeats)(self)
|
||||
|
||||
def cumsum(self, dim):
|
||||
return Cumsum(dim)(self)
|
||||
|
||||
def var(self, dim=None, unbiased=True):
|
||||
mean = self.mean(dim)
|
||||
if dim is None:
|
||||
mean = mean.view(*(1 for s in self.size()))
|
||||
mean_expanded = mean.expand_as(self)
|
||||
zero_centered = self.sub(mean_expanded)
|
||||
var = zero_centered.mul(zero_centered).sum(dim)
|
||||
numel = self.numel() if dim is None else self.size(dim)
|
||||
return var.div(numel - int(unbiased))
|
||||
|
||||
def std(self, dim=None, unbiased=True):
|
||||
return self.var(dim, unbiased).sqrt()
|
||||
|
||||
def renorm(self, norm_type, dim, maxnorm):
|
||||
t = self.transpose(dim, 0)
|
||||
flat = t.contiguous().view(self.size(0), -1)
|
||||
norms = flat.norm(norm_type, 1)
|
||||
norms = norms.clamp(max=maxnorm).div(norms.add(1e-7))
|
||||
flat_out = flat.mul(norms.expand_as(flat))
|
||||
return flat_out.view(t.size()).transpose(dim, 0)
|
||||
|
||||
@staticmethod
|
||||
def _static_blas(cls, args, inplace):
|
||||
num_args = len(args)
|
||||
@ -641,7 +663,7 @@ class Variable(_C._VariableBase):
|
||||
|
||||
def narrow(self, dim, start_index, length):
|
||||
index = tuple(slice(None, None) for _ in range(dim)) + \
|
||||
(slice(start_index, start_index+length),)
|
||||
(slice(start_index, start_index + length),)
|
||||
|
||||
return Index(index)(self)
|
||||
|
||||
@ -666,12 +688,54 @@ class Variable(_C._VariableBase):
|
||||
def triu(self, diagonal_idx=0):
|
||||
return Triu(diagonal_idx)(self)
|
||||
|
||||
def trace(self):
|
||||
return Trace()(self)
|
||||
|
||||
def cross(self, other, dim=-1):
|
||||
return Cross(dim)(self, other)
|
||||
|
||||
def multinomial(self, num_samples=1, with_replacement=False):
|
||||
return Multinomial(num_samples, with_replacement)(self)
|
||||
|
||||
def bernoulli(self):
|
||||
return Bernoulli()(self)
|
||||
|
||||
def eq(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Eq()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Eq(other)(self)
|
||||
|
||||
def ne(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Ne()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Ne(other)(self)
|
||||
|
||||
def gt(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Gt()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Gt(other)(self)
|
||||
|
||||
def ge(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Ge()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Ge(other)(self)
|
||||
|
||||
def lt(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Lt()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Lt(other)(self)
|
||||
|
||||
def le(self, other):
|
||||
if isinstance(other, Variable):
|
||||
return Le()(self, other)
|
||||
assert not torch.is_tensor(other), "can't compare Variable and tensor"
|
||||
return Le(other)(self)
|
||||
|
||||
def __add__(self, other):
|
||||
return self.add(other)
|
||||
__radd__ = __add__
|
||||
@ -741,6 +805,30 @@ class Variable(_C._VariableBase):
|
||||
def __iter__(self):
|
||||
return iter(map(lambda i: self[i], range(self.size(0))))
|
||||
|
||||
def __mod__(self, other):
|
||||
return self.remainder(other)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.eq(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return self.ne(other)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.lt(other)
|
||||
|
||||
def __le__(self, other):
|
||||
return self.le(other)
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.gt(other)
|
||||
|
||||
def __ge__(self, other):
|
||||
return self.ge(other)
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
class _torch(object):
|
||||
|
||||
@staticmethod
|
||||
@ -748,11 +836,11 @@ class Variable(_C._VariableBase):
|
||||
return Concat(dim)(*iterable)
|
||||
|
||||
@staticmethod
|
||||
def normal(means, stddev=1):
|
||||
if isinstance(stddev, Variable):
|
||||
return Normal()(means, stddev)
|
||||
def normal(means, std=1):
|
||||
if isinstance(std, Variable):
|
||||
return Normal()(means, std)
|
||||
else:
|
||||
return Normal(stddev)(means)
|
||||
return Normal(std)(means)
|
||||
|
||||
@staticmethod
|
||||
def _blas(cls, args, inplace):
|
||||
|
||||
@ -1,43 +1,32 @@
|
||||
import torch._C as _C
|
||||
import ctypes
|
||||
import warnings
|
||||
import torch.cuda
|
||||
import sys
|
||||
import os.path as path
|
||||
import torch
|
||||
import warnings
|
||||
|
||||
enabled = True # set to False to globally disable cuDNN
|
||||
|
||||
lib = None
|
||||
# TODO: fix libname for Windows
|
||||
__cudnn_version = None
|
||||
# TODO: dynamic version checks via cudnnGetVersion
|
||||
# TODO: load 5.1.3 if using CUDA 7.5 and 5.1.5 if using CUDA 8.0
|
||||
thisdir = path.dirname(__file__)
|
||||
libpaths = ['', path.join(thisdir, '../../lib')]
|
||||
if sys.platform.startswith('linux'):
|
||||
libnames = ['libcudnn.so.5.1.5', 'libcudnn.so.5.1.3', 'libcudnn.so.5.0.5', 'libcudnn.so.5.1.10']
|
||||
elif sys.platform == 'darwin':
|
||||
libnames = ['libcudnn.5.dylib']
|
||||
else:
|
||||
libnames = []
|
||||
|
||||
def _loadlib():
|
||||
global lib
|
||||
loaded = False
|
||||
for libpath in libpaths:
|
||||
for libname in libnames:
|
||||
try:
|
||||
lib = ctypes.cdll.LoadLibrary(path.join(libpath, libname))
|
||||
loaded = True
|
||||
break
|
||||
except OSError:
|
||||
continue
|
||||
if loaded:
|
||||
break
|
||||
if loaded:
|
||||
|
||||
def _libcudnn():
|
||||
global lib, __cudnn_version
|
||||
if lib is None:
|
||||
lib = ctypes.cdll.LoadLibrary(None)
|
||||
if hasattr(lib, 'cudnnGetErrorString'):
|
||||
lib.cudnnGetErrorString.restype = ctypes.c_char_p
|
||||
__cudnn_version = lib.cudnnGetVersion()
|
||||
else:
|
||||
lib = None
|
||||
raise OSError("Could not load cuDNN")
|
||||
return lib
|
||||
|
||||
|
||||
def version():
|
||||
if _libcudnn() is None:
|
||||
return None
|
||||
return __cudnn_version
|
||||
|
||||
|
||||
def is_acceptable(tensor):
|
||||
if not enabled:
|
||||
@ -46,57 +35,30 @@ def is_acceptable(tensor):
|
||||
isinstance(tensor, torch.cuda.FloatTensor) or
|
||||
isinstance(tensor, torch.cuda.DoubleTensor)):
|
||||
return False
|
||||
if lib is None:
|
||||
try:
|
||||
_loadlib()
|
||||
except Exception:
|
||||
if not torch._C.has_cudnn:
|
||||
warnings.warn(
|
||||
"PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "
|
||||
"PyTorch making sure the library is visible to the build system.")
|
||||
return False
|
||||
if _libcudnn() is None:
|
||||
warnings.warn('cuDNN library not found. Check your {libpath}'.format(
|
||||
libpath={
|
||||
'darwin': 'DYLD_LIBRARY_PATH',
|
||||
'win32': 'PATH'
|
||||
}.get(sys.platform, 'LD_LIBRARY_PATH')))
|
||||
return False
|
||||
if not _C.has_cudnn:
|
||||
warnings.warn("cuDNN library has been detected, but your pytorch "
|
||||
"installation was compiled without support for it. You "
|
||||
"might want to rebuild pytorch, making sure the library "
|
||||
"is visible to the build system.")
|
||||
return False
|
||||
return True
|
||||
|
||||
__cudnn_version = []
|
||||
def version():
|
||||
if not lib:
|
||||
raise RuntimeError("cuDNN not initialized")
|
||||
if len(__cudnn_version) == 0:
|
||||
__cudnn_version.append(lib.cudnnGetVersion())
|
||||
return __cudnn_version[0]
|
||||
|
||||
_handles = {}
|
||||
|
||||
benchmark = False
|
||||
verbose = False
|
||||
workspace_limit = None
|
||||
|
||||
CUDNN_DATA_FLOAT = 0
|
||||
CUDNN_DATA_DOUBLE = 1
|
||||
CUDNN_DATA_HALF = 2
|
||||
|
||||
CUDNN_CONVOLUTION = 0
|
||||
CUDNN_CROSS_CORRELATION = 1
|
||||
|
||||
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0
|
||||
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1
|
||||
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2
|
||||
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE = 0
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST = 1
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT = 2
|
||||
|
||||
CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE = 0
|
||||
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST = 1
|
||||
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT = 2
|
||||
|
||||
CUDNN_TENSOR_NCHW = 0
|
||||
CUDNN_TENSOR_NHWC = 1
|
||||
|
||||
@ -108,6 +70,11 @@ CUDNN_GRU = 3
|
||||
CUDNN_LINEAR_INPUT = 0
|
||||
CUDNN_SKIP_INPUT = 1
|
||||
|
||||
CUDNN_RNN_ALGO_STANDARD = 0
|
||||
CUDNN_RNN_ALGO_PERSIST_STATIC = 1
|
||||
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2
|
||||
|
||||
|
||||
class CuDNNHandle:
|
||||
def __init__(self):
|
||||
ptr = ctypes.c_void_p()
|
||||
@ -117,6 +84,7 @@ class CuDNNHandle:
|
||||
def __del__(self):
|
||||
check_error(lib.cudnnDestroy(self))
|
||||
|
||||
|
||||
class CuDNNError(RuntimeError):
|
||||
def __init__(self, status):
|
||||
self.status = status
|
||||
@ -161,40 +129,21 @@ class TensorDescriptorArray(object):
|
||||
def __getitem__(self, key):
|
||||
return ctypes.c_void_p(self.ptrs[key])
|
||||
|
||||
def set(self, tensor):
|
||||
self._type = tensor.type()
|
||||
self._size = tensor.size()
|
||||
self._stride = tensor.stride()
|
||||
def set_all(self, tensor):
|
||||
_type = _typemap[tensor.type()]
|
||||
_ndim = tensor.dim()
|
||||
_size = int_array(tensor.size())
|
||||
_stride = int_array(tensor.stride())
|
||||
for ptr in self.ptrs:
|
||||
check_error(lib.cudnnSetTensorNdDescriptor(
|
||||
ctypes.c_void_p(ptr), _typemap[tensor.type()], tensor.dim(),
|
||||
int_array(tensor.size()), int_array(tensor.stride())))
|
||||
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
|
||||
|
||||
def as_tuple(self):
|
||||
return (self._type, tuple(self._size), tuple(self._stride))
|
||||
def set_raw(self, i, _type, _ndim, _size, _stride):
|
||||
ptr = self.ptrs[i]
|
||||
check_error(lib.cudnnSetTensorNdDescriptor(
|
||||
ctypes.c_void_p(ptr), _type, _ndim, _size, _stride))
|
||||
|
||||
|
||||
class ConvolutionDescriptor(object):
|
||||
def __init__(self):
|
||||
ptr = ctypes.c_void_p()
|
||||
check_error(lib.cudnnCreateConvolutionDescriptor(ctypes.byref(ptr)))
|
||||
self._as_parameter_ = ptr
|
||||
|
||||
def __del__(self):
|
||||
check_error(lib.cudnnDestroyConvolutionDescriptor(self._as_parameter_))
|
||||
del self._as_parameter_
|
||||
|
||||
def set(self, typename, pad, stride):
|
||||
self._pad = pad
|
||||
self._stride = stride
|
||||
upscale = int_array([1, 1])
|
||||
check_error(lib.cudnnSetConvolutionNdDescriptor(
|
||||
self, 2, int_array(pad), int_array(stride), upscale,
|
||||
CUDNN_CROSS_CORRELATION, _typemap[typename]))
|
||||
|
||||
def as_tuple(self):
|
||||
return (self._pad, self._stride)
|
||||
|
||||
class FilterDescriptor(object):
|
||||
def __init__(self):
|
||||
ptr = ctypes.c_void_p()
|
||||
@ -209,7 +158,8 @@ class FilterDescriptor(object):
|
||||
self._size = weight.size()
|
||||
datatype = _typemap[weight.type()]
|
||||
check_error(lib.cudnnSetFilterNdDescriptor(
|
||||
self, datatype, CUDNN_TENSOR_NCHW, weight.ndimension(), int_array(weight.size())))
|
||||
self, datatype, CUDNN_TENSOR_NCHW, weight.ndimension(),
|
||||
int_array(weight.size())))
|
||||
|
||||
def as_tuple(self):
|
||||
return tuple(self._size)
|
||||
@ -219,36 +169,66 @@ class DropoutDescriptor(object):
|
||||
def __init__(self, handle, dropout, seed):
|
||||
ptr = ctypes.c_void_p()
|
||||
check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr)))
|
||||
self._as_parameter_ = ptr
|
||||
|
||||
self._as_parameter_ = ptr
|
||||
self.state = None
|
||||
self.dropout = dropout
|
||||
self.handle = handle
|
||||
|
||||
self._set(dropout, seed)
|
||||
|
||||
def set_dropout(self, dropout, seed):
|
||||
if dropout != self.dropout:
|
||||
self._set(dropout, seed)
|
||||
|
||||
def _set(self, dropout, seed):
|
||||
if self.state is None and dropout > 0:
|
||||
dropout_states_size = ctypes.c_long()
|
||||
check_error(lib.cudnnDropoutGetStatesSize(
|
||||
handle,
|
||||
self.handle,
|
||||
ctypes.byref(dropout_states_size)))
|
||||
|
||||
self.state = torch.cuda.ByteTensor(dropout_states_size.value)
|
||||
state_ptr = self.state.data_ptr()
|
||||
state_size = self.state.size(0)
|
||||
else:
|
||||
state_ptr = None
|
||||
state_size = 0
|
||||
|
||||
check_error(lib.cudnnSetDropoutDescriptor(
|
||||
self,
|
||||
handle,
|
||||
self.handle,
|
||||
ctypes.c_float(dropout),
|
||||
ctypes.c_void_p(self.state.data_ptr()),
|
||||
ctypes.c_size_t(self.state.size(0)),
|
||||
ctypes.c_void_p(state_ptr),
|
||||
ctypes.c_size_t(state_size),
|
||||
ctypes.c_ulonglong(seed),
|
||||
))
|
||||
|
||||
self.dropout = dropout
|
||||
|
||||
def __del__(self):
|
||||
check_error(lib.cudnnDestroyDropoutDescriptor(self))
|
||||
|
||||
|
||||
|
||||
class RNNDescriptor(object):
|
||||
def __init__(self, hidden_size, num_layers, dropout_desc, input_mode,
|
||||
def __init__(self, handle, hidden_size, num_layers, dropout_desc, input_mode,
|
||||
bidirectional, mode, datatype):
|
||||
ptr = ctypes.c_void_p()
|
||||
check_error(lib.cudnnCreateRNNDescriptor(ctypes.byref(ptr)))
|
||||
self._as_parameter_ = ptr
|
||||
|
||||
if version() >= 6000:
|
||||
check_error(lib.cudnnSetRNNDescriptor_v6(
|
||||
handle,
|
||||
self,
|
||||
hidden_size,
|
||||
num_layers,
|
||||
dropout_desc,
|
||||
input_mode,
|
||||
bidirectional,
|
||||
mode,
|
||||
CUDNN_RNN_ALGO_STANDARD,
|
||||
datatype
|
||||
))
|
||||
else:
|
||||
check_error(lib.cudnnSetRNNDescriptor(
|
||||
self,
|
||||
hidden_size,
|
||||
@ -264,24 +244,18 @@ class RNNDescriptor(object):
|
||||
check_error(lib.cudnnDestroyRNNDescriptor(self))
|
||||
|
||||
|
||||
class ConvolutionAlgoPerf(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("algo", ctypes.c_int),
|
||||
("status", ctypes.c_int),
|
||||
("time", ctypes.c_float),
|
||||
("memory", ctypes.c_size_t),
|
||||
]
|
||||
|
||||
def check_error(status):
|
||||
if status is not 0:
|
||||
raise CuDNNError(status)
|
||||
|
||||
|
||||
def get_error_string(status):
|
||||
return lib.cudnnGetErrorString(status)
|
||||
|
||||
|
||||
def get_handle():
|
||||
if lib is None:
|
||||
_loadlib()
|
||||
if _libcudnn() is None:
|
||||
raise RuntimeError('cuDNN not available')
|
||||
current_device = torch.cuda.current_device()
|
||||
handle = _handles.get(current_device, None)
|
||||
if handle is None:
|
||||
@ -289,6 +263,7 @@ def get_handle():
|
||||
_handles[current_device] = handle
|
||||
return handle
|
||||
|
||||
|
||||
_typemap = {
|
||||
'torch.cuda.HalfTensor': CUDNN_DATA_HALF,
|
||||
'torch.cuda.FloatTensor': CUDNN_DATA_FLOAT,
|
||||
@ -296,11 +271,12 @@ _typemap = {
|
||||
}
|
||||
|
||||
_sizeofmap = {
|
||||
CUDNN_DATA_HALF : 2,
|
||||
CUDNN_DATA_FLOAT : 4,
|
||||
CUDNN_DATA_DOUBLE : 8,
|
||||
CUDNN_DATA_HALF: 2,
|
||||
CUDNN_DATA_FLOAT: 4,
|
||||
CUDNN_DATA_DOUBLE: 8,
|
||||
}
|
||||
|
||||
|
||||
def c_type(tensor):
|
||||
if isinstance(tensor, torch.cuda.HalfTensor):
|
||||
return ctypes.c_float
|
||||
@ -311,127 +287,36 @@ def c_type(tensor):
|
||||
else:
|
||||
raise ValueError("unknown type '{}'".format(type(tensor)))
|
||||
|
||||
|
||||
def int_array(itr):
|
||||
array_type = ctypes.c_int * len(itr)
|
||||
return array_type(*itr)
|
||||
|
||||
|
||||
def descriptor(tensor, N=None):
|
||||
padded_size = tensor.size() + ((1,) * (5 - tensor.dim()))
|
||||
tensor = tensor.view(padded_size)
|
||||
if N is not None:
|
||||
descriptor = TensorDescriptorArray(N)
|
||||
descriptor.set_all(tensor)
|
||||
else:
|
||||
descriptor = TensorDescriptor()
|
||||
if tensor.dim() == 2:
|
||||
tensor = tensor.view(tensor.size(0), tensor.size(1), 1, 1)
|
||||
elif tensor.dim() == 3:
|
||||
tensor = tensor.view(tensor.size(0), tensor.size(1), tensor.size(2), 1)
|
||||
descriptor.set(tensor)
|
||||
return descriptor
|
||||
|
||||
_autotuner_forward = {}
|
||||
_autotuner_backward_data = {}
|
||||
_autotuner_backward_filter = {}
|
||||
|
||||
def convolution_autotuner_key(idesc, weight_desc, conv_desc):
|
||||
return (idesc.as_tuple(), weight_desc.as_tuple(), conv_desc.as_tuple())
|
||||
def descriptor_sequence(tensor, batch_sizes):
|
||||
descriptors = TensorDescriptorArray(len(batch_sizes))
|
||||
_type = _typemap[tensor.type()]
|
||||
_ndim = 5
|
||||
dim_pad = (1,) * (5 - tensor.dim())
|
||||
_size = int_array(tensor.size() + dim_pad)
|
||||
_stride = int_array(tensor.stride() + dim_pad)
|
||||
for i, batch_size in enumerate(batch_sizes):
|
||||
_size[0] = batch_size
|
||||
descriptors.set_raw(i, _type, _ndim, _size, _stride)
|
||||
return descriptors
|
||||
|
||||
def convolution_forward_algorithm(idesc, weight_desc, conv_desc, odesc):
|
||||
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
|
||||
if k in _autotuner_forward:
|
||||
return _autotuner_forward[k]
|
||||
|
||||
if benchmark:
|
||||
perf_results = ConvolutionAlgoPerf()
|
||||
algo_count = ctypes.c_int()
|
||||
check_error(lib.cudnnFindConvolutionForwardAlgorithm(
|
||||
get_handle(), idesc, weight_desc, conv_desc, odesc, 1,
|
||||
ctypes.byref(algo_count), ctypes.byref(perf_results)))
|
||||
_autotuner_forward[k] = perf_results.algo
|
||||
return perf_results.algo
|
||||
|
||||
search_mode = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST
|
||||
wlimit = 0
|
||||
if workspace_limit is not None:
|
||||
wlimit = workspace_limit
|
||||
search_mode = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
|
||||
|
||||
fwd_alg = ctypes.c_int()
|
||||
check_error(lib.cudnnGetConvolutionForwardAlgorithm(
|
||||
get_handle(), idesc, weight_desc, conv_desc, odesc, search_mode,
|
||||
wlimit, ctypes.byref(fwd_alg)))
|
||||
return fwd_alg
|
||||
|
||||
def convolution_forward_workspace_size(*args):
|
||||
check_error(lib.cudnnGetConvolutionForwardWorkspaceSize(*args))
|
||||
|
||||
def convolution_forward(*args):
|
||||
check_error(lib.cudnnConvolutionForward(*args))
|
||||
|
||||
def convolution_backward_data(*args):
|
||||
return check_error(lib.cudnnConvolutionBackwardData(*args))
|
||||
|
||||
def convolution_backward_data_algorithm(weight_desc, odesc, conv_desc, idesc):
|
||||
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
|
||||
if k in _autotuner_backward_data:
|
||||
return _autotuner_backward_data[k]
|
||||
|
||||
if benchmark:
|
||||
perf_results = ConvolutionAlgoPerf()
|
||||
algo_count = ctypes.c_int()
|
||||
check_error(lib.cudnnFindConvolutionBackwardDataAlgorithm(
|
||||
get_handle(), weight_desc, odesc, conv_desc, idesc, 1,
|
||||
ctypes.byref(algo_count), ctypes.byref(perf_results)))
|
||||
_autotuner_backward_data[k] = perf_results.algo
|
||||
return perf_results.algo
|
||||
|
||||
search_mode = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST
|
||||
wlimit = 0
|
||||
if workspace_limit is not None:
|
||||
wlimit = workspace_limit
|
||||
search_mode = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
|
||||
|
||||
bwd_data_alg = ctypes.c_int()
|
||||
check_error(lib.cudnnGetConvolutionBackwardDataAlgorithm(
|
||||
get_handle(), weight_desc, odesc, conv_desc, idesc, search_mode,
|
||||
wlimit, ctypes.byref(bwd_data_alg)))
|
||||
return bwd_data_alg
|
||||
|
||||
def convolution_backward_data_workspace_size(*args):
|
||||
return check_error(lib.cudnnGetConvolutionBackwardDataWorkspaceSize(*args))
|
||||
|
||||
def convolution_backward_filter(*args):
|
||||
return check_error(lib.cudnnConvolutionBackwardFilter(*args))
|
||||
|
||||
def convolution_backward_filter_algorithm(idesc, odesc, conv_desc, weight_desc):
|
||||
k = convolution_autotuner_key(idesc, weight_desc, conv_desc)
|
||||
if k in _autotuner_backward_filter:
|
||||
return _autotuner_backward_filter[k]
|
||||
|
||||
if benchmark:
|
||||
perf_results = ConvolutionAlgoPerf()
|
||||
algo_count = ctypes.c_int()
|
||||
check_error(lib.cudnnFindConvolutionBackwardFilterAlgorithm(
|
||||
get_handle(), idesc, odesc, conv_desc, weight_desc, 1,
|
||||
ctypes.byref(algo_count), ctypes.byref(perf_results)))
|
||||
_autotuner_backward_filter[k] = perf_results.algo
|
||||
return perf_results.algo
|
||||
|
||||
search_mode = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST
|
||||
wlimit = 0
|
||||
if workspace_limit is not None:
|
||||
wlimit = workspace_limit
|
||||
search_mode = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
|
||||
|
||||
bwd_filter_alg = ctypes.c_int()
|
||||
check_error(lib.cudnnGetConvolutionBackwardFilterAlgorithm(
|
||||
get_handle(), idesc, odesc, conv_desc, weight_desc, search_mode,
|
||||
wlimit, ctypes.byref(bwd_filter_alg)))
|
||||
return bwd_filter_alg
|
||||
|
||||
def convolution_backward_filter_workspace_size(*args):
|
||||
return check_error(lib.cudnnGetConvolutionBackwardFilterWorkspaceSize(*args))
|
||||
|
||||
def convolution_backward_bias(*args):
|
||||
check_error(lib.cudnnConvolutionBackwardBias(*args))
|
||||
|
||||
def add_tensor(*args):
|
||||
check_error(lib.cudnnAddTensor(*args))
|
||||
|
||||
@ -3,6 +3,7 @@ import torch.backends.cudnn as cudnn
|
||||
from torch.backends.cudnn import check_error
|
||||
import ctypes
|
||||
|
||||
|
||||
def get_cudnn_mode(mode):
|
||||
if mode == 'RNN_RELU':
|
||||
return cudnn.CUDNN_RNN_RELU
|
||||
@ -17,6 +18,7 @@ def get_cudnn_mode(mode):
|
||||
|
||||
|
||||
class Unserializable(object):
|
||||
|
||||
def __init__(self, inner):
|
||||
self.inner = inner
|
||||
|
||||
@ -32,18 +34,20 @@ class Unserializable(object):
|
||||
self.inner = None
|
||||
|
||||
|
||||
def init_dropout_descriptor(fn, handle):
|
||||
return cudnn.DropoutDescriptor(
|
||||
handle,
|
||||
fn.dropout,
|
||||
fn.dropout_seed
|
||||
def init_rnn_descriptor(fn, handle):
|
||||
dropout_desc_name = 'desc_' + str(torch.cuda.current_device())
|
||||
dropout_p = fn.dropout if fn.train else 0
|
||||
if (dropout_desc_name not in fn.dropout_state) or (fn.dropout_state[dropout_desc_name].get() is None):
|
||||
fn.dropout_state[dropout_desc_name] = Unserializable(
|
||||
cudnn.DropoutDescriptor(handle, dropout_p, fn.dropout_seed)
|
||||
)
|
||||
|
||||
def init_rnn_descriptor(fn):
|
||||
dropout_desc = fn.dropout_state[dropout_desc_name].get()
|
||||
dropout_desc.set_dropout(dropout_p, fn.dropout_seed)
|
||||
return cudnn.RNNDescriptor(
|
||||
handle,
|
||||
fn.hidden_size,
|
||||
fn.num_layers,
|
||||
fn.dropout_state['desc'].get(),
|
||||
dropout_desc,
|
||||
fn.input_mode,
|
||||
fn.bidirectional,
|
||||
fn.mode,
|
||||
@ -58,7 +62,10 @@ def init_weight_descriptor(fn, weight):
|
||||
return w_desc
|
||||
|
||||
|
||||
def _input_size(fn):
|
||||
def _input_size(fn, input):
|
||||
if fn.batch_sizes is not None:
|
||||
return (input.size(0), fn.input_size)
|
||||
else:
|
||||
return (fn.seq_length, fn.mini_batch, fn.input_size)
|
||||
|
||||
|
||||
@ -66,7 +73,10 @@ def _hidden_size(fn):
|
||||
return (fn.num_layers * fn.num_directions, fn.mini_batch, fn.hidden_size)
|
||||
|
||||
|
||||
def _output_size(fn):
|
||||
def _output_size(fn, input):
|
||||
if fn.batch_sizes is not None:
|
||||
return (input.size(0), fn.hidden_size * fn.num_directions)
|
||||
else:
|
||||
return (fn.seq_length, fn.mini_batch, fn.hidden_size * fn.num_directions)
|
||||
|
||||
|
||||
@ -80,7 +90,7 @@ def get_num_weights(handle, rnn_desc, x_desc, datatype):
|
||||
datatype
|
||||
))
|
||||
elem_size = cudnn._sizeofmap[datatype]
|
||||
assert(weight_size.value % elem_size == 0)
|
||||
assert weight_size.value % elem_size == 0
|
||||
return weight_size.value // elem_size
|
||||
|
||||
|
||||
@ -139,10 +149,11 @@ def get_parameters(fn, handle, weight_buf):
|
||||
ctypes.byref(nb_dims),
|
||||
ctypes.c_void_p(filter_dim_a.data_ptr())))
|
||||
|
||||
filter_dim_a.resize_(nb_dims.value)
|
||||
assert nb_dims.value <= min_dim
|
||||
filter_dim_a = filter_dim_a[:nb_dims.value]
|
||||
elem_size = cudnn._sizeofmap[fn.datatype]
|
||||
offset_bytes = (matrix_pointer.value - weight_buf.data_ptr())
|
||||
assert(offset_bytes % elem_size == 0)
|
||||
assert offset_bytes % elem_size == 0
|
||||
offset = offset_bytes // elem_size
|
||||
|
||||
# for all the RNN types provided by CUDNN, all the ih weights
|
||||
@ -151,17 +162,16 @@ def get_parameters(fn, handle, weight_buf):
|
||||
# Since we're storing all the weights in a single tensor anyway,
|
||||
# might as well merge the CUDNN ones into a single tensor as well
|
||||
if linear_id == 0 or linear_id == num_linear_layers / 2:
|
||||
assert(filter_dim_a.prod() == filter_dim_a[0])
|
||||
assert filter_dim_a.prod() == filter_dim_a[0]
|
||||
param = fn.weight_buf.new().set_(
|
||||
weight_buf.storage(), offset,
|
||||
filter_dim_a[0] * num_linear_layers // 2, filter_dim_a[2])
|
||||
layer_params.append(param)
|
||||
else:
|
||||
assert(cur_offset == offset)
|
||||
assert cur_offset == offset
|
||||
|
||||
cur_offset = offset + filter_dim_a[0]
|
||||
|
||||
|
||||
params.append(layer_params)
|
||||
|
||||
return params
|
||||
@ -170,7 +180,7 @@ def get_parameters(fn, handle, weight_buf):
|
||||
def _copyParams(params_from, params_to):
|
||||
for layer_params_from, layer_params_to in zip(params_from, params_to):
|
||||
for param_from, param_to in zip(layer_params_from, layer_params_to):
|
||||
assert(param_from.type() == param_to.type())
|
||||
assert param_from.type() == param_to.type()
|
||||
param_to.copy_(param_from)
|
||||
|
||||
|
||||
@ -179,6 +189,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
lib = cudnn.lib
|
||||
handle = cudnn.get_handle()
|
||||
fn.datatype = cudnn._typemap[input.type()]
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
hx, cx = hx
|
||||
@ -186,35 +197,43 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
else:
|
||||
cx, cy = None, None
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
|
||||
if input.dim() != 3:
|
||||
if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
|
||||
raise RuntimeError(
|
||||
'input must have 3 dimensions, got {}'.format(input.dim()))
|
||||
if fn.input_size != input.size(2):
|
||||
raise RuntimeError('input.size(2) must be equal to input_size. Expected {}, got {}'.format(
|
||||
fn.input_size
|
||||
if fn.input_size != input.size(-1):
|
||||
raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
|
||||
fn.input_size, input.size(-1)
|
||||
))
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v5.1 and above')
|
||||
|
||||
if is_input_packed:
|
||||
fn.seq_length = len(fn.batch_sizes)
|
||||
fn.mini_batch = fn.batch_sizes[0]
|
||||
fn.input_size = input.size(-1)
|
||||
else:
|
||||
fn.seq_length, fn.mini_batch, fn.input_size = input.size()
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
output_size = _output_size(fn, input)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
output.resize_(*output_size)
|
||||
hy.resize_(*hidden_size).zero_()
|
||||
hy.resize_(*hidden_size)
|
||||
if cy is not None:
|
||||
cy.resize_(*hidden_size).zero_()
|
||||
cy.resize_(*hidden_size)
|
||||
y = output
|
||||
|
||||
# init descriptors
|
||||
if ('desc' not in fn.dropout_state) or (fn.dropout_state['desc'].get() is None):
|
||||
fn.dropout_state['desc'] = Unserializable(
|
||||
init_dropout_descriptor(fn, handle)
|
||||
)
|
||||
fn.rnn_desc = init_rnn_descriptor(fn)
|
||||
fn.rnn_desc = init_rnn_descriptor(fn, handle)
|
||||
if is_input_packed:
|
||||
fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
|
||||
fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
|
||||
else:
|
||||
fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
|
||||
fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
|
||||
fn.hx_desc = cudnn.descriptor(hx)
|
||||
@ -225,7 +244,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
# create the weight buffer and copy the weights into it
|
||||
num_weights = get_num_weights(
|
||||
handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
|
||||
fn.weight_buf = input.new(num_weights)
|
||||
fn.weight_buf = x.new(num_weights)
|
||||
fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
|
||||
w = fn.weight_buf
|
||||
# this zero might not seem necessary, but it is in the case
|
||||
@ -251,7 +270,7 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
ctypes.byref(workspace_size)
|
||||
))
|
||||
fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
|
||||
if fn.train:
|
||||
if fn.requires_grad:
|
||||
reserve_size = ctypes.c_long()
|
||||
check_error(lib.cudnnGetRNNTrainingReserveSize(
|
||||
handle,
|
||||
@ -291,13 +310,13 @@ def forward(fn, input, hx, weight, output, hy):
|
||||
ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0)
|
||||
))
|
||||
|
||||
if fn.batch_first:
|
||||
output = output.transpose_(0, 1)
|
||||
|
||||
if fn.batch_first and not is_input_packed:
|
||||
output.transpose_(0, 1)
|
||||
|
||||
|
||||
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
|
||||
with torch.cuda.device_of(input):
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
handle = cudnn.get_handle()
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
@ -307,33 +326,35 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
else:
|
||||
cx, grad_cx, grad_cy = None, None, None
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
grad_output = grad_output.transpose(0, 1)
|
||||
output = output.transpose(0, 1)
|
||||
|
||||
input_size = _input_size(fn)
|
||||
input_size = _input_size(fn, input)
|
||||
hidden_size = _hidden_size(fn)
|
||||
output_size = _output_size(fn)
|
||||
output_size = _output_size(fn, input)
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
dy = grad_output.contiguous()
|
||||
y = output
|
||||
w = fn.weight_buf
|
||||
dx = grad_input.resize_as_(input)
|
||||
dhy = grad_hy.resize_(*hidden_size)
|
||||
dcy = grad_cy.resize_(*hidden_size) if grad_cy is not None else None
|
||||
dhy = grad_hy.contiguous().view(*hidden_size)
|
||||
dcy = grad_cy.contiguous().view(*hidden_size) if grad_cy is not None else None
|
||||
dhx = grad_hx.resize_(*hidden_size)
|
||||
dcx = grad_cx.resize_(*hidden_size) if grad_cx is not None else None
|
||||
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_grad can only be called when training!')
|
||||
if not fn.requires_grad:
|
||||
raise RuntimeError('backward_grad can only be called when the function requires grad!')
|
||||
if tuple(input.size()) != input_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
input_size, tuple(input.size())))
|
||||
if tuple(output.size()) != _output_size(fn):
|
||||
if tuple(output.size()) != output_size:
|
||||
raise RuntimeError('Expected output size {}, got {}'.format(
|
||||
output_size, output.size()))
|
||||
if hx is not None and tuple(hx.size()) != hidden_size:
|
||||
@ -348,6 +369,8 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
if dcy is not None and tuple(dcy.size()) != hidden_size:
|
||||
raise RuntimeError('Expected d_cell size {}, got {}'.format(
|
||||
hidden_size, dcy.size()))
|
||||
if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
|
||||
raise RuntimeError('Gradients aren\'t CUDA tensors')
|
||||
|
||||
check_error(cudnn.lib.cudnnRNNBackwardData(
|
||||
handle,
|
||||
@ -367,7 +390,7 @@ def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_inpu
|
||||
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
|
||||
))
|
||||
|
||||
if fn.batch_first:
|
||||
if fn.batch_first and not is_input_packed:
|
||||
grad_input = grad_input.transpose_(0, 1)
|
||||
|
||||
|
||||
@ -386,30 +409,32 @@ def _num_linear_layers(fn):
|
||||
|
||||
def backward_weight(fn, input, hx, output, weight, grad_weight):
|
||||
with torch.cuda.device_of(input):
|
||||
is_input_packed = fn.batch_sizes is not None
|
||||
handle = cudnn.get_handle()
|
||||
|
||||
if fn.mode == cudnn.CUDNN_LSTM:
|
||||
hx, cx = hx
|
||||
else:
|
||||
cx = None
|
||||
if fn.batch_first:
|
||||
|
||||
if fn.batch_first and not is_input_packed:
|
||||
input = input.transpose(0, 1)
|
||||
output = output.transpose(0, 1)
|
||||
input_size = _input_size(fn)
|
||||
input_size = _input_size(fn, input)
|
||||
hidden_size = _hidden_size(fn)
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_weight can only be called when training!')
|
||||
if not fn.requires_grad:
|
||||
raise RuntimeError('backward_weight can only be called when the function requires grad!')
|
||||
if fn.dropout != 0 and cudnn.version() < 5103:
|
||||
raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
|
||||
if tuple(input.size()) != input_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
input_size, tuple(input.size())))
|
||||
if not fn.train:
|
||||
raise RuntimeError('backward_weight can only be called when training!')
|
||||
if tuple(hx.size()) != hidden_size:
|
||||
raise RuntimeError('Expected input size {}, got {}'.format(
|
||||
hidden_size, hx.size()))
|
||||
|
||||
assert hx.is_contiguous()
|
||||
assert cx is None or cx.is_contiguous()
|
||||
x = input.contiguous()
|
||||
y = output
|
||||
dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()
|
||||
|
||||
181
torch/csrc/DynamicTypes.cpp
Normal file
181
torch/csrc/DynamicTypes.cpp
Normal file
@ -0,0 +1,181 @@
|
||||
#include "DynamicTypes.h"
|
||||
|
||||
#include "THP.h"
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <THPP/tensors/THTensor.hpp>
|
||||
#include <THPP/tensors/THSTensor.hpp>
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include <THC/THC.h>
|
||||
#include <THCS/THCS.h>
|
||||
#include <THPP/tensors/THCTensor.hpp>
|
||||
#include <THPP/tensors/THCSTensor.hpp>
|
||||
extern THCState* state;
|
||||
#endif
|
||||
|
||||
|
||||
using namespace thpp;
|
||||
|
||||
namespace torch {
|
||||
|
||||
struct TensorType {
|
||||
Type data_type;
|
||||
bool is_cuda;
|
||||
bool is_sparse;
|
||||
|
||||
friend bool operator==(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return (t1.data_type == t2.data_type &&
|
||||
t1.is_cuda == t2.is_cuda &&
|
||||
t1.is_sparse == t2.is_sparse);
|
||||
}
|
||||
|
||||
friend bool operator!=(const TensorType &t1, const TensorType &t2)
|
||||
{
|
||||
return !(t1 == t2);
|
||||
}
|
||||
};
|
||||
|
||||
struct TensorTypeHasher
|
||||
{
|
||||
std::size_t operator()(const TensorType& k) const
|
||||
{
|
||||
size_t hash = static_cast<size_t>(k.data_type);
|
||||
hash = (hash << 8) + k.is_cuda;
|
||||
hash = (hash << 1) + k.is_sparse;
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
static std::unordered_map<std::string, Type> type_names = {
|
||||
{"Float", Type::FLOAT},
|
||||
{"Double", Type::DOUBLE},
|
||||
{"Half", Type::HALF},
|
||||
{"Byte", Type::UCHAR},
|
||||
{"Char", Type::CHAR},
|
||||
{"Short", Type::SHORT},
|
||||
{"Int", Type::INT},
|
||||
{"Long", Type::LONG},
|
||||
};
|
||||
static std::unordered_map<PyTypeObject*, TensorType> pytype_to_tensortype;
|
||||
static std::unordered_map<TensorType, PyTypeObject*, TensorTypeHasher> tensortype_to_pytype;
|
||||
|
||||
void registerPyTypeObject(PyTypeObject *pytype, const std::string& name, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = type_names.at(name);
|
||||
type.is_cuda = is_cuda;
|
||||
type.is_sparse = is_sparse;
|
||||
|
||||
pytype_to_tensortype[pytype] = type;
|
||||
tensortype_to_pytype[type] = pytype;
|
||||
}
|
||||
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
TensorType type;
|
||||
type.data_type = tensor.type();
|
||||
type.is_cuda = tensor.isCuda();
|
||||
type.is_sparse = tensor.isSparse();
|
||||
|
||||
return tensortype_to_pytype.at(type);
|
||||
}
|
||||
|
||||
static std::unique_ptr<Tensor> createTensor(void *tensor, Type type, bool is_cuda, bool is_sparse)
|
||||
{
|
||||
if (is_cuda) {
|
||||
#ifdef WITH_CUDA
|
||||
if (is_sparse) {
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<unsigned char>(state, (THCSByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<char>(state, (THCSCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<short>(state, (THCSShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<int>(state, (THCSIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<long>(state, (THCSLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<float>(state, (THCSFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<double>(state, (THCSDoubleTensor*)tensor));
|
||||
} else if (type == Type::HALF) {
|
||||
return std::unique_ptr<Tensor>(new THCSTensor<half>(state, (THCSHalfTensor*)tensor));
|
||||
}
|
||||
} else if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<unsigned char>(state, (THCudaByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<char>(state, (THCudaCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<short>(state, (THCudaShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<int>(state, (THCudaIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<long>(state, (THCudaLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<float>(state, (THCudaTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<double>(state, (THCudaDoubleTensor*)tensor));
|
||||
} else if (type == Type::HALF) {
|
||||
return std::unique_ptr<Tensor>(new THCTensor<half>(state, (THCudaHalfTensor*)tensor));
|
||||
}
|
||||
#else
|
||||
throw std::runtime_error("Compiled without CUDA support");
|
||||
#endif
|
||||
} else if (is_sparse) {
|
||||
if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<unsigned char>((THSByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<char>((THSCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<short>((THSShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<int>((THSIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<long>((THSLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<float>((THSFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THSTensor<double>((THSDoubleTensor*)tensor));
|
||||
}
|
||||
} else if (type == Type::UCHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<unsigned char>((THByteTensor*)tensor));
|
||||
} else if (type == Type::CHAR) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<char>((THCharTensor*)tensor));
|
||||
} else if (type == Type::SHORT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<short>((THShortTensor*)tensor));
|
||||
} else if (type == Type::INT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<int>((THIntTensor*)tensor));
|
||||
} else if (type == Type::LONG) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<long>((THLongTensor*)tensor));
|
||||
} else if (type == Type::FLOAT) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<float>((THFloatTensor*)tensor));
|
||||
} else if (type == Type::DOUBLE) {
|
||||
return std::unique_ptr<Tensor>(new THTensor<double>((THDoubleTensor*)tensor));
|
||||
}
|
||||
throw std::invalid_argument("Unsupported tensor type");
|
||||
}
|
||||
|
||||
std::unique_ptr<Tensor> createTensor(PyObject *data)
|
||||
{
|
||||
auto tensor_type = pytype_to_tensortype.at(Py_TYPE(data));
|
||||
auto type = tensor_type.data_type;
|
||||
auto tensor = ((THPVoidTensor *)data)->cdata;
|
||||
auto wrapper = createTensor(tensor, type, tensor_type.is_cuda, tensor_type.is_sparse);
|
||||
wrapper->retain();
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor)
|
||||
{
|
||||
auto type = getPyTypeObject(tensor);
|
||||
PyObject *obj = type->tp_alloc(type, 0);
|
||||
if (obj) {
|
||||
((THPVoidTensor*)obj)->cdata = (THVoidTensor *)const_cast<thpp::Tensor&>(tensor).retain().cdata();
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
25
torch/csrc/DynamicTypes.h
Normal file
25
torch/csrc/DynamicTypes.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
// Provides conversions between Python tensor objects and thpp::Tensors.
|
||||
|
||||
#include <memory>
|
||||
#include <Python.h>
|
||||
#include <THPP/THPP.h>
|
||||
|
||||
namespace torch {
|
||||
|
||||
// Register a PyTypeObject* with the given attributes
|
||||
void registerPyTypeObject(
|
||||
PyTypeObject *pytype, const std::string& name,
|
||||
bool is_cuda, bool is_sparse);
|
||||
|
||||
// Gets the PyTypeObject* corresponding to the Tensor
|
||||
PyTypeObject* getPyTypeObject(const thpp::Tensor& tensor);
|
||||
|
||||
// Creates a Tensor from a Python tensor object
|
||||
std::unique_ptr<thpp::Tensor> createTensor(PyObject *data);
|
||||
|
||||
// Creates Python tensor object from a Tensor
|
||||
PyObject* createPyObject(const thpp::Tensor& tensor);
|
||||
|
||||
} // namespace torch
|
||||
@ -4,13 +4,15 @@
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "THP.h"
|
||||
#include "torch/csrc/utils/object_ptr.h"
|
||||
#include "torch/csrc/utils/auto_gil.h"
|
||||
|
||||
#define HANDLE_TH_ERRORS \
|
||||
try {
|
||||
|
||||
#define END_HANDLE_TH_ERRORS_RET(retval) \
|
||||
} catch (python_error &e) { \
|
||||
return retval; \
|
||||
} catch (std::exception &e) { \
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what()); \
|
||||
return retval; \
|
||||
@ -20,7 +22,47 @@
|
||||
|
||||
extern PyObject *THPException_FatalError;
|
||||
|
||||
// Throwing this exception means that the python error flags have been already
|
||||
// set and control should be immediately returned to the interpreter.
|
||||
struct python_error : public std::exception {
|
||||
python_error() : type(nullptr), value(nullptr), traceback(nullptr) {}
|
||||
|
||||
~python_error() {
|
||||
if (type || value || traceback) {
|
||||
AutoGIL gil;
|
||||
Py_XDECREF(type);
|
||||
Py_XDECREF(value);
|
||||
Py_XDECREF(traceback);
|
||||
}
|
||||
}
|
||||
|
||||
/** Saves the exception so that it can be re-thrown on a different thread */
|
||||
inline void persist() {
|
||||
// PyErr_Fetch overwrites the pointers
|
||||
AutoGIL gil;
|
||||
Py_XDECREF(type);
|
||||
Py_XDECREF(value);
|
||||
Py_XDECREF(traceback);
|
||||
PyErr_Fetch(&type, &value, &traceback);
|
||||
}
|
||||
|
||||
/** Sets the current Python error from this exception */
|
||||
inline void restore() {
|
||||
// PyErr_Restore steals references
|
||||
AutoGIL gil;
|
||||
Py_XINCREF(type);
|
||||
Py_XINCREF(value);
|
||||
Py_XINCREF(traceback);
|
||||
PyErr_Restore(type, value, traceback);
|
||||
}
|
||||
|
||||
PyObject* type;
|
||||
PyObject* value;
|
||||
PyObject* traceback;
|
||||
};
|
||||
|
||||
#ifdef _THP_CORE
|
||||
|
||||
struct THException: public std::exception {
|
||||
THException(const char* msg): msg(msg) {};
|
||||
|
||||
|
||||
@ -33,25 +33,25 @@ static bool THPModule_loadClasses(PyObject *self)
|
||||
THPUtils_setError("class loader couldn't access torch module");
|
||||
return false;
|
||||
}
|
||||
PyObject* module_dict = PyModule_GetDict(torch_module);
|
||||
|
||||
ASSERT_NOT_NULL(tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_tensor_classes"));
|
||||
ASSERT_NOT_NULL(tensor_classes = PyObject_GetAttrString(torch_module, (char*)"_tensor_classes"));
|
||||
if (!THPDoubleTensor_postInit(torch_module)) return false;
|
||||
if (!THPFloatTensor_postInit(torch_module)) return false;
|
||||
if (!THPHalfTensor_postInit(torch_module)) return false;
|
||||
if (!THPLongTensor_postInit(torch_module)) return false;
|
||||
if (!THPIntTensor_postInit(torch_module)) return false;
|
||||
if (!THPShortTensor_postInit(torch_module)) return false;
|
||||
if (!THPCharTensor_postInit(torch_module)) return false;
|
||||
if (!THPByteTensor_postInit(torch_module)) return false;
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyMapping_GetItemString(module_dict,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyMapping_GetItemString(module_dict,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyMapping_GetItemString(module_dict,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyMapping_GetItemString(module_dict,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyMapping_GetItemString(module_dict,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyMapping_GetItemString(module_dict,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyMapping_GetItemString(module_dict,(char*)"ByteStorage"));
|
||||
|
||||
ASSERT_NOT_NULL(THPDoubleTensorClass = PyMapping_GetItemString(module_dict,(char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THPFloatTensorClass = PyMapping_GetItemString(module_dict,(char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THPLongTensorClass = PyMapping_GetItemString(module_dict,(char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THPIntTensorClass = PyMapping_GetItemString(module_dict,(char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THPShortTensorClass = PyMapping_GetItemString(module_dict,(char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THPCharTensorClass = PyMapping_GetItemString(module_dict,(char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THPByteTensorClass = PyMapping_GetItemString(module_dict,(char*)"ByteTensor"));
|
||||
ASSERT_NOT_NULL(THPDoubleStorageClass = PyObject_GetAttrString(torch_module,(char*)"DoubleStorage"));
|
||||
ASSERT_NOT_NULL(THPFloatStorageClass = PyObject_GetAttrString(torch_module,(char*)"FloatStorage"));
|
||||
ASSERT_NOT_NULL(THPHalfStorageClass = PyObject_GetAttrString(torch_module,(char*)"HalfStorage"));
|
||||
ASSERT_NOT_NULL(THPLongStorageClass = PyObject_GetAttrString(torch_module,(char*)"LongStorage"));
|
||||
ASSERT_NOT_NULL(THPIntStorageClass = PyObject_GetAttrString(torch_module,(char*)"IntStorage"));
|
||||
ASSERT_NOT_NULL(THPShortStorageClass = PyObject_GetAttrString(torch_module,(char*)"ShortStorage"));
|
||||
ASSERT_NOT_NULL(THPCharStorageClass = PyObject_GetAttrString(torch_module,(char*)"CharStorage"));
|
||||
ASSERT_NOT_NULL(THPByteStorageClass = PyObject_GetAttrString(torch_module,(char*)"ByteStorage"));
|
||||
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
@ -72,6 +72,7 @@ static bool THPModule_assignStateless(PyObject *self)
|
||||
PyObject *stateless;
|
||||
INIT_STATELESS(Double);
|
||||
INIT_STATELESS(Float);
|
||||
INIT_STATELESS(Half);
|
||||
INIT_STATELESS(Long);
|
||||
INIT_STATELESS(Int);
|
||||
INIT_STATELESS(Short);
|
||||
@ -92,6 +93,7 @@ static PyObject * THPModule_initExtension(PyObject *self, PyObject *shm_manager_
|
||||
libshm_init(THPUtils_bytesAsString(shm_manager_path));
|
||||
if (!THPModule_loadClasses(self)) return NULL;
|
||||
if (!THPModule_assignStateless(self)) return NULL;
|
||||
if (!THPAutograd_initFunctions(self)) return NULL;
|
||||
return PyBool_FromLong(true);
|
||||
}
|
||||
|
||||
@ -139,6 +141,8 @@ PyObject * THPModule_fromNumpy(PyObject *_unused, PyObject *array)
|
||||
return PyObject_CallFunctionObjArgs(THPLongTensorClass, array, NULL);
|
||||
} else if (type == NPY_INT32) {
|
||||
return PyObject_CallFunctionObjArgs(THPIntTensorClass, array, NULL);
|
||||
} else if (type == NPY_INT16) {
|
||||
return PyObject_CallFunctionObjArgs(THPShortTensorClass, array, NULL);
|
||||
} else if (type == NPY_UINT8) {
|
||||
return PyObject_CallFunctionObjArgs(THPByteTensorClass, array, NULL);
|
||||
}
|
||||
@ -176,13 +180,7 @@ static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *arg
|
||||
} \
|
||||
\
|
||||
dispatch: \
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
|
||||
THPUtils_assert(methods, "Type %s doesn't implement stateless methods", \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, #name); \
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method " #name, \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
return PyObject_Call(method, args, kwargs); \
|
||||
return THPUtils_dispatchStateless(tensor, #name, args, kwargs); \
|
||||
}
|
||||
|
||||
IMPLEMENT_STATELESS(sigmoid)
|
||||
@ -243,6 +241,7 @@ IMPLEMENT_STATELESS(topk)
|
||||
IMPLEMENT_STATELESS(t)
|
||||
IMPLEMENT_STATELESS(transpose)
|
||||
IMPLEMENT_STATELESS(squeeze)
|
||||
IMPLEMENT_STATELESS(unsqueeze)
|
||||
IMPLEMENT_STATELESS(renorm)
|
||||
IMPLEMENT_STATELESS(dist)
|
||||
IMPLEMENT_STATELESS(linspace)
|
||||
@ -293,6 +292,8 @@ IMPLEMENT_STATELESS(qr)
|
||||
IMPLEMENT_STATELESS(geqrf)
|
||||
IMPLEMENT_STATELESS(orgqr)
|
||||
IMPLEMENT_STATELESS(ormqr)
|
||||
IMPLEMENT_STATELESS(btrifact)
|
||||
IMPLEMENT_STATELESS(btrisolve)
|
||||
|
||||
#undef IMPLEMENT_STATELESS
|
||||
|
||||
@ -321,13 +322,7 @@ static PyObject * TH_CONCAT_2(THPModule_, name)(PyObject *_unused, PyObject *arg
|
||||
} \
|
||||
\
|
||||
dispatch: \
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
|
||||
THPUtils_assert(methods, "Type %s doesn't implement stateless methods", \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, #name); \
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method " #name, \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
return PyObject_Call(method, args, kwargs); \
|
||||
return THPUtils_dispatchStateless(tensor, #name, args, kwargs); \
|
||||
}
|
||||
|
||||
IMPLEMENT_STATELESS_REVERSED(gt)
|
||||
@ -349,14 +344,7 @@ static PyObject * THPModule_nonzero(PyObject *_unused, PyObject *args, PyObject
|
||||
tensor = PyTuple_GET_ITEM(args, 0);
|
||||
else if (PyTuple_Size(args) == 2)
|
||||
tensor = PyTuple_GET_ITEM(args, 1);
|
||||
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
|
||||
THPUtils_assert(methods, "Type %s doesn't implement stateless methods",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, "nonzero");
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method nonzero",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
return PyObject_Call(method, args, kwargs);
|
||||
return THPUtils_dispatchStateless(tensor, "nonzero", args, kwargs);
|
||||
}
|
||||
|
||||
static PyObject * THPModule_randperm(PyObject *_unused, PyObject *args, PyObject *kwargs)
|
||||
@ -365,13 +353,7 @@ static PyObject * THPModule_randperm(PyObject *_unused, PyObject *args, PyObject
|
||||
PyObject *out;
|
||||
if (kwargs && (out = PyDict_GetItemString(kwargs, "out")))
|
||||
tensor = out;
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
|
||||
THPUtils_assert(methods, "Type %s doesn't implement stateless methods",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, "randperm");
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method randperm",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
return PyObject_Call(method, args, kwargs);
|
||||
return THPUtils_dispatchStateless(tensor, "randperm", args, kwargs);
|
||||
}
|
||||
|
||||
static PyObject * THPModule_cat(PyObject *_unused, PyObject *args)
|
||||
@ -383,8 +365,8 @@ static PyObject * THPModule_cat(PyObject *_unused, PyObject *args)
|
||||
PyObject *first_arg = PyTuple_GET_ITEM(args, 0);
|
||||
if (THPModule_isTensor(first_arg)) {
|
||||
tensor = first_arg;
|
||||
} else if ((iterator = PyObject_GetIter(first_arg))) {
|
||||
item = PyIter_Next(iterator);
|
||||
} else if (PySequence_Check(first_arg)) {
|
||||
item = PySequence_GetItem(first_arg, 0);
|
||||
if (item && (THPModule_isTensor(item) || THPVariable_Check(item))) {
|
||||
tensor = item;
|
||||
}
|
||||
@ -392,13 +374,7 @@ static PyObject * THPModule_cat(PyObject *_unused, PyObject *args)
|
||||
PyErr_Clear();
|
||||
}
|
||||
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME);
|
||||
THPUtils_assert(methods, "Type %s doesn't implement statless methods",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, "cat");
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method cat",
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor));
|
||||
return PyObject_Call(method, args, NULL);
|
||||
return THPUtils_dispatchStateless(tensor, "cat", args, NULL);
|
||||
}
|
||||
|
||||
PyObject *THPModule_safeCall(PyObject *_unused, PyObject *args, PyObject *kwargs)
|
||||
@ -478,6 +454,7 @@ extern PyObject * THCPModule_setDevice_wrap(PyObject *self, PyObject *arg);
|
||||
extern PyObject * THCPModule_getDevice_wrap(PyObject *self);
|
||||
extern PyObject * THCPModule_getDeviceCount_wrap(PyObject *self);
|
||||
extern PyObject * THCPModule_getCurrentStream_wrap(PyObject *self);
|
||||
extern PyObject * THCPModule_getCurrentBlasHandle_wrap(PyObject *self);
|
||||
extern PyObject * THCPModule_setStream_wrap(PyObject *self, PyObject *stream);
|
||||
extern PyObject * THCPModule_getDriverVersion(PyObject *self);
|
||||
extern PyObject * THCPModule_isDriverSufficient(PyObject *self);
|
||||
@ -492,6 +469,8 @@ extern PyObject * THCPModule_cudaHostAllocator(PyObject *_unused);
|
||||
extern PyObject * THCPModule_cudaSynchronize(PyObject *_unused);
|
||||
extern PyObject * THCPModule_getLibPath(PyObject *_unused);
|
||||
extern PyObject * THCPModule_cudaSleep(PyObject *_unused, PyObject *cycles);
|
||||
extern PyObject * THCPModule_cudaLockMutex(PyObject *module);
|
||||
extern PyObject * THCPModule_cudaUnlockMutex(PyObject *module);
|
||||
|
||||
extern PyObject * THCSPModule_initExtension(PyObject *self);
|
||||
#endif
|
||||
@ -507,6 +486,7 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"_cuda_getDevice", (PyCFunction)THCPModule_getDevice_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getDeviceCount", (PyCFunction)THCPModule_getDeviceCount_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getCurrentStream", (PyCFunction)THCPModule_getCurrentStream_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_getCurrentBlasHandle", (PyCFunction)THCPModule_getCurrentBlasHandle_wrap, METH_NOARGS, NULL},
|
||||
{"_cuda_setStream", (PyCFunction)THCPModule_setStream_wrap, METH_O, NULL},
|
||||
{"_cuda_isDriverSufficient", (PyCFunction)THCPModule_isDriverSufficient, METH_NOARGS, NULL},
|
||||
{"_cuda_getDriverVersion", (PyCFunction)THCPModule_getDriverVersion, METH_NOARGS, NULL},
|
||||
@ -522,6 +502,8 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"_cuda_getLibPath", (PyCFunction)THCPModule_getLibPath, METH_NOARGS, NULL},
|
||||
{"_cuda_sleep", (PyCFunction)THCPModule_cudaSleep, METH_O, NULL},
|
||||
{"_cuda_sparse_init", (PyCFunction)THCSPModule_initExtension, METH_NOARGS, NULL},
|
||||
{"_cuda_lock_mutex", (PyCFunction)THCPModule_cudaLockMutex, METH_NOARGS, NULL},
|
||||
{"_cuda_unlock_mutex", (PyCFunction)THCPModule_cudaUnlockMutex, METH_NOARGS, NULL},
|
||||
#endif
|
||||
{"_safe_call", (PyCFunction)THPModule_safeCall, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"_set_default_tensor_type", (PyCFunction)THPModule_setDefaultTensorType, METH_O, NULL},
|
||||
@ -593,6 +575,7 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"t", (PyCFunction)THPModule_t, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"transpose", (PyCFunction)THPModule_transpose, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"squeeze", (PyCFunction)THPModule_squeeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"unsqueeze", (PyCFunction)THPModule_unsqueeze, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"nonzero", (PyCFunction)THPModule_nonzero, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"renorm", (PyCFunction)THPModule_renorm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"dist", (PyCFunction)THPModule_dist, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
@ -645,10 +628,13 @@ static PyMethodDef TorchMethods[] = {
|
||||
{"geqrf", (PyCFunction)THPModule_geqrf, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"orgqr", (PyCFunction)THPModule_orgqr, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"ormqr", (PyCFunction)THPModule_ormqr, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"btrifact", (PyCFunction)THPModule_btrifact, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"btrisolve", (PyCFunction)THPModule_btrisolve, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
|
||||
// Sparse functions
|
||||
{"smm", (PyCFunction)THSPModule_sspmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"saddmm", (PyCFunction)THSPModule_sspaddmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{"dsmm", (PyCFunction)THSPModule_spmm, METH_VARARGS | METH_KEYWORDS, NULL},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
@ -697,8 +683,30 @@ bool THCSPShortTensor_init(PyObject *module);
|
||||
bool THCSPCharTensor_init(PyObject *module);
|
||||
bool THCSPByteTensor_init(PyObject *module);
|
||||
|
||||
bool THDPDoubleStorage_init(PyObject *module);
|
||||
bool THDPFloatStorage_init(PyObject *module);
|
||||
//bool THDPHalfStorage_init(PyObject *module);
|
||||
bool THDPLongStorage_init(PyObject *module);
|
||||
bool THDPIntStorage_init(PyObject *module);
|
||||
bool THDPShortStorage_init(PyObject *module);
|
||||
bool THDPCharStorage_init(PyObject *module);
|
||||
bool THDPByteStorage_init(PyObject *module);
|
||||
|
||||
bool THDPDoubleTensor_init(PyObject *module);
|
||||
bool THDPFloatTensor_init(PyObject *module);
|
||||
//bool THDPHalfTensor_init(PyObject *module);
|
||||
bool THDPLongTensor_init(PyObject *module);
|
||||
bool THDPIntTensor_init(PyObject *module);
|
||||
bool THDPShortTensor_init(PyObject *module);
|
||||
bool THDPCharTensor_init(PyObject *module);
|
||||
bool THDPByteTensor_init(PyObject *module);
|
||||
|
||||
static std::vector<PyMethodDef> methods;
|
||||
|
||||
#ifdef WITH_DISTRIBUTED
|
||||
PyMethodDef* THDPModule_methods();
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
PyMODINIT_FUNC init_C()
|
||||
#else
|
||||
@ -716,6 +724,9 @@ PyMODINIT_FUNC PyInit__C()
|
||||
#ifdef WITH_CUDNN
|
||||
THPUtils_addPyMethodDefs(methods, THCUDNN_methods());
|
||||
#endif
|
||||
#ifdef WITH_DISTRIBUTED
|
||||
THPUtils_addPyMethodDefs(methods, THDPModule_methods());
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
ASSERT_TRUE(module = Py_InitModule("torch._C", methods.data()));
|
||||
@ -729,6 +740,7 @@ PyMODINIT_FUNC PyInit__C()
|
||||
};
|
||||
ASSERT_TRUE(module = PyModule_Create(&torchmodule));
|
||||
#endif
|
||||
ASSERT_TRUE(THPWrapper_init(module));
|
||||
ASSERT_TRUE(THPGenerator_init(module));
|
||||
ASSERT_TRUE(THPException_init(module));
|
||||
ASSERT_TRUE(THPSize_init(module));
|
||||
@ -738,6 +750,7 @@ PyMODINIT_FUNC PyInit__C()
|
||||
|
||||
ASSERT_TRUE(THPDoubleStorage_init(module));
|
||||
ASSERT_TRUE(THPFloatStorage_init(module));
|
||||
ASSERT_TRUE(THPHalfStorage_init(module));
|
||||
ASSERT_TRUE(THPLongStorage_init(module));
|
||||
ASSERT_TRUE(THPIntStorage_init(module));
|
||||
ASSERT_TRUE(THPShortStorage_init(module));
|
||||
@ -746,6 +759,7 @@ PyMODINIT_FUNC PyInit__C()
|
||||
|
||||
ASSERT_TRUE(THPDoubleTensor_init(module));
|
||||
ASSERT_TRUE(THPFloatTensor_init(module));
|
||||
ASSERT_TRUE(THPHalfTensor_init(module));
|
||||
ASSERT_TRUE(THPLongTensor_init(module));
|
||||
ASSERT_TRUE(THPIntTensor_init(module));
|
||||
ASSERT_TRUE(THPShortTensor_init(module));
|
||||
@ -796,7 +810,6 @@ PyMODINIT_FUNC PyInit__C()
|
||||
#endif
|
||||
|
||||
#ifdef WITH_CUDNN
|
||||
ASSERT_TRUE(THCUDNNModule_initModule(module));
|
||||
PyObject *has_cudnn = Py_True;
|
||||
#else
|
||||
PyObject *has_cudnn = Py_False;
|
||||
@ -804,6 +817,28 @@ PyMODINIT_FUNC PyInit__C()
|
||||
Py_INCREF(has_cudnn);
|
||||
ASSERT_TRUE(PyModule_AddObject(module, "has_cudnn", has_cudnn) == 0);
|
||||
|
||||
// TODO THD: enable once master-worker mode is implemented
|
||||
#if 0 && defined(WITH_DISTRIBUTED)
|
||||
// See comment on CUDA objects
|
||||
ASSERT_TRUE(THDPDoubleStorage_init(module));
|
||||
ASSERT_TRUE(THDPFloatStorage_init(module));
|
||||
//ASSERT_TRUE(THDPHalfStorage_init(module));
|
||||
ASSERT_TRUE(THDPLongStorage_init(module));
|
||||
ASSERT_TRUE(THDPIntStorage_init(module));
|
||||
ASSERT_TRUE(THDPShortStorage_init(module));
|
||||
ASSERT_TRUE(THDPCharStorage_init(module));
|
||||
ASSERT_TRUE(THDPByteStorage_init(module));
|
||||
|
||||
ASSERT_TRUE(THDPDoubleTensor_init(module));
|
||||
ASSERT_TRUE(THDPFloatTensor_init(module));
|
||||
//ASSERT_TRUE(THDPHalfTensor_init(module));
|
||||
ASSERT_TRUE(THDPLongTensor_init(module));
|
||||
ASSERT_TRUE(THDPIntTensor_init(module));
|
||||
ASSERT_TRUE(THDPShortTensor_init(module));
|
||||
ASSERT_TRUE(THDPCharTensor_init(module));
|
||||
ASSERT_TRUE(THDPByteTensor_init(module));
|
||||
#endif
|
||||
|
||||
THPDefaultGenerator = (THPGenerator*)THPGenerator_New();
|
||||
ASSERT_TRUE(THPDefaultGenerator != nullptr);
|
||||
ASSERT_TRUE(PyModule_AddObject(module, "default_generator", (PyObject*)THPDefaultGenerator) == 0);
|
||||
|
||||
@ -6,20 +6,16 @@ PyObject* sparse_tensor_classes;
|
||||
// SPARSE MODULE INITIALIZATION
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool THSPModule_loadClasses(PyObject *module_dict)
|
||||
static bool THSPModule_loadClasses(PyObject *sparse_module)
|
||||
{
|
||||
#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; }
|
||||
ASSERT_NOT_NULL(sparse_tensor_classes = PyMapping_GetItemString(module_dict, (char*)"_sparse_tensor_classes"));
|
||||
ASSERT_NOT_NULL(THSPDoubleTensorClass = PyMapping_GetItemString(module_dict, (char*)"DoubleTensor"));
|
||||
ASSERT_NOT_NULL(THSPFloatTensorClass = PyMapping_GetItemString(module_dict, (char*)"FloatTensor"));
|
||||
ASSERT_NOT_NULL(THSPLongTensorClass = PyMapping_GetItemString(module_dict, (char*)"LongTensor"));
|
||||
ASSERT_NOT_NULL(THSPIntTensorClass = PyMapping_GetItemString(module_dict, (char*)"IntTensor"));
|
||||
ASSERT_NOT_NULL(THSPShortTensorClass = PyMapping_GetItemString(module_dict, (char*)"ShortTensor"));
|
||||
ASSERT_NOT_NULL(THSPCharTensorClass = PyMapping_GetItemString(module_dict, (char*)"CharTensor"));
|
||||
ASSERT_NOT_NULL(THSPByteTensorClass = PyMapping_GetItemString(module_dict, (char*)"ByteTensor"));
|
||||
|
||||
if (!THSPDoubleTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPFloatTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPLongTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPIntTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPShortTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPCharTensor_postInit(sparse_module)) return false;
|
||||
if (!THSPByteTensor_postInit(sparse_module)) return false;
|
||||
return true;
|
||||
#undef ASSERT_NOT_NULL
|
||||
}
|
||||
|
||||
static bool THSPModule_assignStateless()
|
||||
@ -50,18 +46,11 @@ static bool THSPModule_assignStateless()
|
||||
// Callback for python part. Used for additional initialization of python classes
|
||||
PyObject *THSPModule_initExtension(PyObject *self)
|
||||
{
|
||||
#define ASSERT_TRUE(cond) if (!(cond)) { Py_RETURN_FALSE; }
|
||||
PyObject *module = PyImport_ImportModule("torch.sparse");
|
||||
if (!module) {
|
||||
THPUtils_setError("class loader couldn't access torch.sparse module");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject* module_dict = PyModule_GetDict(module);
|
||||
ASSERT_TRUE(THSPModule_loadClasses(module_dict));
|
||||
ASSERT_TRUE(THSPModule_assignStateless());
|
||||
Py_RETURN_TRUE;
|
||||
#undef ASSERT_TRUE
|
||||
if (!module) return NULL;
|
||||
if (!THSPModule_loadClasses(module)) return NULL;
|
||||
if (!THSPModule_assignStateless()) return NULL;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -85,14 +74,14 @@ static PyObject * TH_CONCAT_2(THSPModule_, name)(PyObject *_unused, PyObject *ar
|
||||
Py_ssize_t pos = 0; \
|
||||
for (int i = 0; i < PyTuple_Size(args); i++) { \
|
||||
PyObject *item = PyTuple_GET_ITEM(args, i); \
|
||||
if (THPModule_isTensor(item) || THPVariable_CheckType(item, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(item) || THPVariable_Check(item)) { \
|
||||
tensor = item; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
} \
|
||||
if (kwargs) { \
|
||||
while (PyDict_Next(kwargs, &pos, &key, &value)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_CheckType(value, THPModule_isSparseTensor)) { \
|
||||
if (THPModule_isTensor(value) || THPVariable_Check(value)) { \
|
||||
tensor = value; \
|
||||
goto dispatch; \
|
||||
} \
|
||||
@ -100,15 +89,10 @@ static PyObject * TH_CONCAT_2(THSPModule_, name)(PyObject *_unused, PyObject *ar
|
||||
} \
|
||||
\
|
||||
dispatch: \
|
||||
THPObjectPtr methods = PyObject_GetAttrString(tensor, THP_STATELESS_ATTRIBUTE_NAME); \
|
||||
THPUtils_assert(methods, "Type %s doesn't implement stateless methods", \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
THPObjectPtr method = PyObject_GetAttrString(methods, #name); \
|
||||
THPUtils_assert(method, "Type %s doesn't implement stateless method " #name, \
|
||||
tensor == THPDefaultTensorClass ? THPUtils_classname(tensor) : THPUtils_typename(tensor)); \
|
||||
return PyObject_Call(method, args, kwargs); \
|
||||
return THPUtils_dispatchStateless(tensor, #name, args, kwargs); \
|
||||
}
|
||||
|
||||
IMPLEMENT_SPARSE_STATELESS(spmm);
|
||||
IMPLEMENT_SPARSE_STATELESS(sspmm);
|
||||
IMPLEMENT_SPARSE_STATELESS(sspaddmm);
|
||||
|
||||
|
||||
@ -52,7 +52,7 @@ static void THPWrapper_dealloc(THPWrapper* self)
|
||||
|
||||
PyTypeObject THPWrapperType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch._C._CppWrapper", /* tp_name */
|
||||
"torch._C._PtrWrapper", /* tp_name */
|
||||
sizeof(THPWrapper), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
(destructor)THPWrapper_dealloc, /* tp_dealloc */
|
||||
@ -1,6 +1,7 @@
|
||||
#ifndef THP_CUDNN_CPP_WRAPPER_INC
|
||||
#define THP_CUDNN_CPP_WRAPPER_INC
|
||||
#ifndef THP_PTR_WRAPPER_H
|
||||
#define THP_PTR_WRAPPER_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <functional>
|
||||
|
||||
/**
|
||||
@ -24,18 +24,17 @@ PyObject * THPSize_New(int dim, long *sizes)
|
||||
|
||||
static PyObject * THPSize_pynew(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *self = PyTuple_Type.tp_new(type, args, kwargs);
|
||||
THPObjectPtr self = PyTuple_Type.tp_new(type, args, kwargs);
|
||||
if (self) {
|
||||
for (Py_ssize_t i = 0; i < PyTuple_Size(self); ++i) {
|
||||
PyObject *item = PyTuple_GET_ITEM(self, i);
|
||||
PyObject *item = PyTuple_GET_ITEM(self.get(), i);
|
||||
if (!THPUtils_checkLong(item)) {
|
||||
Py_DECREF(self);
|
||||
return PyErr_Format(PyExc_TypeError, "torch.Size() takes an iterable of 'int' (item %zd is '%s')",
|
||||
i, Py_TYPE(item)->tp_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return self;
|
||||
return self.release();
|
||||
}
|
||||
|
||||
static PyObject * THPSize_repr(THPSize *self)
|
||||
@ -55,6 +54,50 @@ static PyObject * THPSize_repr(THPSize *self)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern PyTypeObject THPSizeType;
|
||||
|
||||
template<typename FnType, FnType fn, typename ...Args>
|
||||
static PyObject* wrap_tuple_fn(Args ... args)
|
||||
{
|
||||
PyObject *result = (*fn)(std::forward<Args>(args)...);
|
||||
if (!result) return NULL;
|
||||
if (PyTuple_Check(result)) {
|
||||
return PyObject_CallFunctionObjArgs((PyObject*)&THPSizeType, result, NULL);
|
||||
}
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static auto sq_concat = PyTuple_Type.tp_as_sequence->sq_concat;
|
||||
static auto sq_repeat = PyTuple_Type.tp_as_sequence->sq_repeat;
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
static auto sq_slice = PyTuple_Type.tp_as_sequence->sq_slice;
|
||||
#endif
|
||||
static auto mp_subscript = PyTuple_Type.tp_as_mapping->mp_subscript;
|
||||
|
||||
|
||||
static PySequenceMethods THPSize_as_sequence = {
|
||||
PyTuple_Type.tp_as_sequence->sq_length,
|
||||
wrap_tuple_fn<decltype(&sq_concat), &sq_concat>,
|
||||
wrap_tuple_fn<decltype(&sq_repeat), &sq_repeat>,
|
||||
PyTuple_Type.tp_as_sequence->sq_item,
|
||||
#if PY_MAJOR_VERSION == 2
|
||||
wrap_tuple_fn<decltype(&sq_slice), &sq_slice>,
|
||||
#else
|
||||
0, /* sq_slice */
|
||||
#endif
|
||||
0, /* sq_ass_item */
|
||||
0, /* sq_ass_slice */
|
||||
PyTuple_Type.tp_as_sequence->sq_contains
|
||||
};
|
||||
|
||||
static PyMappingMethods THPSize_as_mapping = {
|
||||
PyTuple_Type.tp_as_mapping->mp_length,
|
||||
wrap_tuple_fn<decltype(&mp_subscript), &mp_subscript>,
|
||||
0
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject THPSizeType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"torch.Size", /* tp_name */
|
||||
@ -67,8 +110,8 @@ PyTypeObject THPSizeType = {
|
||||
0, /* tp_reserved */
|
||||
(reprfunc)THPSize_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
&THPSize_as_sequence, /* tp_as_sequence */
|
||||
&THPSize_as_mapping, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
#define THP_HOST_HALF
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <TH/TH.h>
|
||||
#include <libshm.h>
|
||||
@ -9,3 +11,6 @@
|
||||
|
||||
#include "generic/Storage.cpp"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Storage.cpp"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
@ -10,6 +10,8 @@
|
||||
PyObject_IsInstance(obj, THPDoubleStorageClass)
|
||||
#define THPFloatStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPFloatStorageClass)
|
||||
#define THPHalfStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPFloatStorageClass)
|
||||
#define THPLongStorage_Check(obj) \
|
||||
PyObject_IsInstance(obj, THPLongStorageClass)
|
||||
#define THPIntStorage_Check(obj) \
|
||||
@ -23,6 +25,7 @@
|
||||
|
||||
#define THPDoubleStorage_CData(obj) (obj)->cdata
|
||||
#define THPFloatStorage_CData(obj) (obj)->cdata
|
||||
#define THPHalfStorage_CData(obj) (obj)->cdata
|
||||
#define THPLongStorage_CData(obj) (obj)->cdata
|
||||
#define THPIntStorage_CData(obj) (obj)->cdata
|
||||
#define THPShortStorage_CData(obj) (obj)->cdata
|
||||
@ -37,4 +40,7 @@
|
||||
#include "generic/Storage.h"
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include "generic/Storage.h"
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#endif
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
|
||||
#define THP_API extern "C"
|
||||
|
||||
#include "PtrWrapper.h"
|
||||
#include "Exceptions.h"
|
||||
#include "Generator.h"
|
||||
#include "Storage.h"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user