mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 21:49:24 +08:00
Compare commits
1708 Commits
Author | SHA1 | Date | |
---|---|---|---|
db5d3131d1 | |||
524574ab73 | |||
ca6311d909 | |||
e114527d19 | |||
50936cb06e | |||
55092b1cc6 | |||
1c2273c8e9 | |||
999690ff3d | |||
be47470c91 | |||
3ae721d350 | |||
90b1196ac4 | |||
e1eb32d9f1 | |||
62f4db6d8a | |||
dbf6d12776 | |||
2d958b7f77 | |||
a80a46a6d0 | |||
0b1b72e975 | |||
0573ef664e | |||
f89de64796 | |||
ecc17fe3dd | |||
c79e305add | |||
8dfebc16cc | |||
38eb1beff5 | |||
78a9e7d83f | |||
d76e411d8c | |||
2d3cf98b49 | |||
33ea7eafef | |||
3237103624 | |||
a66669a110 | |||
d872af9282 | |||
86b4dd8bb2 | |||
f6f24cf0f4 | |||
d76fd43294 | |||
c3bfa0e52b | |||
c21f090ab4 | |||
9e1f4ba124 | |||
53a9d4f312 | |||
5ed9dfad98 | |||
2d56df7892 | |||
c7c5eed686 | |||
374b797569 | |||
3aba2d99e1 | |||
25c9a8b1fc | |||
875be849e9 | |||
e829a52977 | |||
a597c0ca05 | |||
ba70cf22fa | |||
ef91cfd68b | |||
773f4d8081 | |||
16558a1e9d | |||
7e4a5b89fe | |||
ff7deb95d7 | |||
7bc489c827 | |||
86ffc2a5f1 | |||
9e58c4ef91 | |||
264111bfc1 | |||
33b1f9f71a | |||
1921816f85 | |||
6e0c5a8a4e | |||
cbd805169f | |||
c7f93668dc | |||
8812a5d42e | |||
862b8cae51 | |||
a23863fd6f | |||
d429e78a9a | |||
e8e494caf8 | |||
95e5a5ae0c | |||
9fbc2d3153 | |||
d063c9c330 | |||
46772dba0c | |||
1098500e9b | |||
771eebad7b | |||
5a4082612f | |||
c303fcb9cb | |||
119f9ec291 | |||
bb546b2e5b | |||
9a932b8b90 | |||
44894915d6 | |||
7b6c6f76f7 | |||
22ab6183c5 | |||
78d594f46c | |||
4b31572375 | |||
298b775577 | |||
9ac845f734 | |||
e0f68671bd | |||
b8da44dc13 | |||
68ffe46991 | |||
b768db0810 | |||
9783ce3825 | |||
6385d00185 | |||
5a2f5a216f | |||
b5181ba1df | |||
4b90702037 | |||
cac03280f9 | |||
18eaec7121 | |||
b1faab3d8f | |||
a49bf21d50 | |||
eb71df3e63 | |||
5ee8312b63 | |||
7da2448d62 | |||
b15242f70c | |||
737efa78ba | |||
b96e6ee98d | |||
af95f712b0 | |||
5c89190340 | |||
74c3cbc013 | |||
5268dd468c | |||
35c8f93fd2 | |||
da2c3afa47 | |||
4c11dee0e8 | |||
143e171cb9 | |||
170ff7764f | |||
1c21dc6e16 | |||
c71edcc747 | |||
fd17fd4aa9 | |||
7f42d1c98a | |||
404ad939e5 | |||
aeb38cfcea | |||
1d464d7f3e | |||
26f3fb34a1 | |||
36c5f40ec0 | |||
c3a2b1e155 | |||
a84e873bb1 | |||
5c1692840e | |||
4b915260c7 | |||
738fc7054b | |||
f45405bf5b | |||
814b5715ba | |||
c042f69dbb | |||
5ae0ed8552 | |||
c03851e93a | |||
5c65a7812e | |||
e3840419ec | |||
0786dfee7c | |||
c1c841a4e7 | |||
edb3ddf1a5 | |||
67308a9323 | |||
2e0f3b038c | |||
f6354d903a | |||
aa842fe101 | |||
ad1b874a36 | |||
37627a182b | |||
ff91de43de | |||
89c3dbcad8 | |||
1f6d9f44fc | |||
3648c269e9 | |||
11ef5191ff | |||
1975917d0e | |||
220ce8046e | |||
9127ab3866 | |||
e227aa9e2e | |||
67e3905bc6 | |||
0d3cb91d8c | |||
1a9602d5db | |||
8617b780cf | |||
fd31eae9ad | |||
2b7345bcd5 | |||
75eccffdfe | |||
15e8bb379e | |||
2752ad8045 | |||
dc7498c84d | |||
69d3c00ae1 | |||
c7f828809b | |||
9aea856115 | |||
7879c979b5 | |||
6fe1867c23 | |||
5805ef5a83 | |||
666d383a00 | |||
a2d8e84594 | |||
0d663cec30 | |||
bdaa0e38b8 | |||
eeb0d67b92 | |||
2901777a0e | |||
1b0b2e69f8 | |||
31b3d81714 | |||
cf059028f0 | |||
fb6806f6e9 | |||
4ec6bd7356 | |||
7c24a16f82 | |||
29d697aec4 | |||
44cb43bcc1 | |||
9e93a02624 | |||
ba25b37e9b | |||
70e3736e20 | |||
db15f2e13f | |||
6d63e9dbff | |||
105fa58748 | |||
186341c5dc | |||
533668d7e4 | |||
da9e49e586 | |||
0cfbbceac3 | |||
f80d34a1c8 | |||
fb7e40b7eb | |||
ca55c5411f | |||
3aeb288e40 | |||
e3711aa93f | |||
f6dfd9d545 | |||
5f07b33857 | |||
aec4c19460 | |||
bcd7b03c2a | |||
0f62af4ab1 | |||
7c4aef9dfc | |||
7749804099 | |||
c32debb916 | |||
a02b3374d4 | |||
6039e25e8d | |||
8901935ad4 | |||
302caef154 | |||
c638f379b3 | |||
68251fb931 | |||
be7c618fd7 | |||
a2fcd4dee5 | |||
e8754ee017 | |||
a38ed0268e | |||
3d98810fbd | |||
7d07fcd215 | |||
4cdcbbf410 | |||
a0def0b57e | |||
b08a186153 | |||
662f66ebb9 | |||
d75f751bec | |||
23f901a737 | |||
82175f31b4 | |||
6f2307ba6a | |||
23d111c87f | |||
226a01e5a1 | |||
75bac5ab32 | |||
1620161d6b | |||
006505bb8f | |||
0199d59d3a | |||
ae1b37650c | |||
5c84145354 | |||
ba6c49cb9c | |||
e392d428b1 | |||
b7856a32f6 | |||
1b93cb7631 | |||
1b80644b4d | |||
f9c27d60c3 | |||
3257ac1ff3 | |||
ed10ef97da | |||
6c2e816268 | |||
3d4d09fe06 | |||
507ed9032e | |||
3a71d5ee49 | |||
0b10f147b6 | |||
8b1ca2810b | |||
44e21cf5bb | |||
50e9c56830 | |||
3fca4bde50 | |||
ffbc3905a1 | |||
5fefb29a53 | |||
e22cc7c072 | |||
8408dff55a | |||
bd629481fb | |||
66c8bbf021 | |||
2cc35c161a | |||
9598d380b0 | |||
03864b7b11 | |||
52f50220d9 | |||
5865561a9a | |||
936c2bba23 | |||
50bc9dc9c3 | |||
a3cfab2d63 | |||
49fe678fec | |||
c19af59a6e | |||
c5cc1e3ab2 | |||
388258fb5e | |||
3ff70712c2 | |||
a0ef8afd7e | |||
f019a2d9b3 | |||
7953b32dc4 | |||
34239006b0 | |||
507cb16583 | |||
12558019a8 | |||
9e1805d38e | |||
2d6f039766 | |||
f639249d51 | |||
6f3002a50e | |||
1ca0ec7299 | |||
cef23a4b1d | |||
afb2c0ce86 | |||
b18063b39a | |||
5918de8e84 | |||
bb7fb7e45f | |||
735cd06536 | |||
b13f91dbd9 | |||
8cc5d54b66 | |||
0d1f382e39 | |||
2fa3c8327c | |||
80ba65e2f5 | |||
0651b594d8 | |||
a10a993872 | |||
90ed2f5aca | |||
fa73037233 | |||
b30c803662 | |||
a13fd7ec28 | |||
a60368982b | |||
1ef949036c | |||
60e7d04961 | |||
6e1e2032d3 | |||
33d091f432 | |||
8e3240d022 | |||
7557a993ab | |||
c36156eded | |||
1e05f4be73 | |||
d55b25a633 | |||
f79fb58744 | |||
7fc34a4122 | |||
08b77d3844 | |||
0e93a03a3a | |||
4160c13cd2 | |||
e91c8e2f2d | |||
3918e226fd | |||
fb8c3d62fe | |||
8c4910b095 | |||
992e2750fd | |||
341b48529e | |||
b26f82b0ec | |||
b149456645 | |||
d55ba77a5d | |||
85d3fccee7 | |||
d9cdcc9a3b | |||
34db39d87a | |||
60963c2ecb | |||
accbcca338 | |||
2cacb39a21 | |||
fe068d9032 | |||
31ba34b73c | |||
6ce9907d51 | |||
91c0b7159a | |||
788d2e87bd | |||
af82396f7f | |||
166ee86b46 | |||
7a654617eb | |||
17432a1051 | |||
bb301a431d | |||
1acaafbe70 | |||
8f20d40bb7 | |||
d6bfc53b9e | |||
1f871f126f | |||
1224ef9ea1 | |||
9a281451ed | |||
3c2462cf24 | |||
4224ce10a8 | |||
3c0ce51484 | |||
acd7811e33 | |||
c96b72d61f | |||
6dacc20073 | |||
a036f9a65f | |||
6dc28e666c | |||
03a02b6fd5 | |||
b807970aea | |||
7d1db89ef9 | |||
50b914aeeb | |||
e58bbbac18 | |||
a20c7ce848 | |||
1a29950478 | |||
1c2ed4eb23 | |||
8aa5174106 | |||
f34c848f52 | |||
7fd1ea6ab7 | |||
b6edd7bbb4 | |||
1e73ab25f5 | |||
6b4852213d | |||
b6290531aa | |||
e387d945c2 | |||
751b5ea941 | |||
fc61f1a1d1 | |||
ce85150cb4 | |||
48099c23b4 | |||
928687bb24 | |||
2681852438 | |||
92dbd0219f | |||
55b25365e9 | |||
ef3d7963d8 | |||
07a8a730af | |||
a5891e6124 | |||
1bafa6236f | |||
12bb4742ad | |||
a30ade1139 | |||
390bf1e779 | |||
505dedf6ad | |||
4f0434d5ab | |||
fade36668a | |||
a43037fa11 | |||
afc91e4900 | |||
6d9a7d0e60 | |||
351478439f | |||
d56b2258f4 | |||
c905a81c92 | |||
bb404e7a32 | |||
c784f847de | |||
cbc94894fb | |||
86dc3ab252 | |||
a1fa9d8cf9 | |||
eeb3e67eeb | |||
778e23606b | |||
ce6192a21f | |||
55e1b1ec3e | |||
8610ff1072 | |||
fb6535ec70 | |||
96e5d23bad | |||
5b1b8682a3 | |||
8e91da4cb3 | |||
2c21de2007 | |||
c192788188 | |||
0d7a986da1 | |||
30018fcd0b | |||
5a53861d3a | |||
1256cbaa69 | |||
2983998bb3 | |||
cb86ae304e | |||
77c2f4d0d7 | |||
7c053b7e64 | |||
3c7b575a14 | |||
562f61a662 | |||
e4bb56570c | |||
c7a247facf | |||
d7b95dda51 | |||
e7f5fceb99 | |||
7b0f674367 | |||
1b1cdd944c | |||
a3f39f1ebb | |||
2fe4711eb4 | |||
45fd77d3b7 | |||
3808e9fad3 | |||
1e8aeb0bee | |||
3a15de9e44 | |||
037d6b697b | |||
f66cb02016 | |||
f17b2fdf1b | |||
37cb357d8d | |||
8f4dc192b6 | |||
f930c4307c | |||
874a8a321b | |||
31d41a983a | |||
6d378d3740 | |||
0d29846d5e | |||
c5afad5579 | |||
0e93500841 | |||
0573169e23 | |||
84d464f8f9 | |||
90b0c4f43d | |||
136f5c9fe1 | |||
3fbb753512 | |||
d91c686c33 | |||
c7e0db140e | |||
c3578b561c | |||
4b7c6150d8 | |||
3de0fd846f | |||
5639332a28 | |||
b8de8f6261 | |||
464c0c2204 | |||
17b2d2d373 | |||
4574ea3bec | |||
6d094224b9 | |||
ddbd87e310 | |||
5390ab1d52 | |||
eb024cd1d0 | |||
20e395a130 | |||
e3bb6ff334 | |||
4b0fc5200b | |||
72da09bb4d | |||
7ea9c674bc | |||
2356c8d542 | |||
fed8d8975a | |||
02152c515e | |||
6811e32f03 | |||
7daa829bce | |||
ff4f4a0a35 | |||
61a0df5af0 | |||
01d606e048 | |||
9e1655bb22 | |||
af6d1ec52c | |||
a7d43702d4 | |||
f446c67e2f | |||
587f769a99 | |||
0478d32cb8 | |||
4983397c02 | |||
143ba72264 | |||
53c3a92a50 | |||
96663edca6 | |||
35a24a9a94 | |||
dead6632b3 | |||
4341dd2753 | |||
46c0e2c268 | |||
a440629f14 | |||
fbabe5bf62 | |||
db5aeafa60 | |||
1e45e7a404 | |||
109dd5b412 | |||
97036d3c30 | |||
e2a7d43dfd | |||
2871d3951f | |||
346c418fc9 | |||
5151d33287 | |||
f4e502a8c5 | |||
5059beb644 | |||
0bedaf9cf6 | |||
79ec5de3fc | |||
c3680e2b19 | |||
3002cb2ad0 | |||
76d8979afe | |||
fbd50bbfb9 | |||
30676bdcd3 | |||
8311bbee7f | |||
f649d8b3a9 | |||
7c1fe17288 | |||
cd49afce64 | |||
23e19ebfa7 | |||
dfa4767754 | |||
c46dd5163f | |||
5163a28917 | |||
53bc5fb043 | |||
5cfccd76e6 | |||
283062f574 | |||
e030ee8197 | |||
9d36c37bdb | |||
96a01f82d1 | |||
60a85857dd | |||
561bc09026 | |||
0d2762e876 | |||
266bb8bf30 | |||
98b450deb9 | |||
bbc7412615 | |||
8559fcf791 | |||
f6e4fc071a | |||
f112aa746a | |||
a83a1544b1 | |||
e43fb1d26d | |||
7f002008f1 | |||
a7eee0a1e9 | |||
a17c0118a5 | |||
0bfbdcac89 | |||
ce48958606 | |||
cec3455a8b | |||
1600649792 | |||
b052fe6c2f | |||
8480fe0105 | |||
03c0f4fbe7 | |||
fc79f70f9a | |||
8de9564c12 | |||
f1a2bc4eae | |||
53a3c46950 | |||
1caa341c68 | |||
1a0cb08918 | |||
75bf877534 | |||
686e83223f | |||
e3839dfc35 | |||
5bf14c23b7 | |||
309cc76469 | |||
6093f29409 | |||
d8f35c42be | |||
0c375571f5 | |||
bf00008aa1 | |||
aef9e76283 | |||
b7a7ab364b | |||
8752214fb7 | |||
7e8572be2d | |||
003f97cefa | |||
e35418b3be | |||
4b86a215ca | |||
d97ac82bf5 | |||
786f9ba6ea | |||
c3603301d7 | |||
4c3b76c402 | |||
7c02f285dc | |||
5923d76f96 | |||
c85463fc74 | |||
4f622c26b9 | |||
d02781a2ef | |||
079e86a915 | |||
e552c04d53 | |||
7b2fb012a8 | |||
e3e6ca1102 | |||
014ea1e1f8 | |||
ae7c6bcfcf | |||
09369fa9d7 | |||
79ceecec8e | |||
607094c4bf | |||
107e067654 | |||
332a7db35e | |||
a63ef1d605 | |||
a1b2f1710d | |||
10a1534c43 | |||
9ffabcfcaa | |||
dca3c2c60f | |||
4484f67b47 | |||
26751ce300 | |||
44fb23a2f5 | |||
5ae3b44255 | |||
32b3fe8ce6 | |||
2ee4ef5290 | |||
55964abb11 | |||
a8e303dc46 | |||
dd3f52fbe6 | |||
c9be135bb9 | |||
42001e7c17 | |||
89b54229b1 | |||
00e752a46e | |||
51f58f0990 | |||
bff931a10d | |||
65ff84b49e | |||
8a5869a3f7 | |||
85bde3801b | |||
64a910bac7 | |||
4fadf571fd | |||
59d021b63a | |||
0a090fe60a | |||
a92ff57a4d | |||
869ef71343 | |||
556ff8e7b7 | |||
d01cb70497 | |||
8581d3ec67 | |||
fd9aaa6b79 | |||
3e877a70e3 | |||
df022f8078 | |||
4472ad3b2f | |||
de41d1ae0b | |||
21991c05a9 | |||
411d89ca64 | |||
90ea61800f | |||
2448a83d30 | |||
5dd153b1c2 | |||
6bfce16873 | |||
1616587540 | |||
87b47ff850 | |||
eb88098e11 | |||
c8bb665b5d | |||
9900a8dd89 | |||
7978ba45ba | |||
bf9b5dffbf | |||
d4f9dbfa66 | |||
dceec1de30 | |||
216c5d0bdc | |||
94fe8faa00 | |||
1413dd4bfc | |||
044d00516c | |||
afc7dbd586 | |||
0f59dcb317 | |||
6c8ac50753 | |||
674e23bbab | |||
2fe9e3a207 | |||
e7652cfb40 | |||
ab0c72ab6f | |||
b652c2de50 | |||
4326873330 | |||
9403eddce4 | |||
edd2e38023 | |||
10fdcf748a | |||
398d310bac | |||
a228a95b94 | |||
4794da03f8 | |||
57ec8f111f | |||
e60a7c2c88 | |||
e70321ed9e | |||
2ae8e46105 | |||
7341ab0a33 | |||
a132a7d9ce | |||
a1ba29a2c0 | |||
7d64c9df39 | |||
dbc467545f | |||
14004cbef6 | |||
392ca1e59f | |||
ce6edbfbd9 | |||
2cd912bcc2 | |||
eb29485ed8 | |||
bc1de6ae7d | |||
619c2f8b44 | |||
508f676c50 | |||
6cf450744f | |||
feff7be294 | |||
18de330e86 | |||
76c1b5cd79 | |||
e73943e488 | |||
fbe3c3f57f | |||
393ad6582d | |||
be424de869 | |||
056f2cd238 | |||
06bfabf1f5 | |||
137150be88 | |||
1906305c07 | |||
7ffa864953 | |||
464dc31532 | |||
6aee5488b5 | |||
a7ee632dff | |||
9ca9469de6 | |||
3c1d593a27 | |||
95ca66763d | |||
d03c6ba50d | |||
3c32f897ca | |||
bbacd859ab | |||
fc6a9a19ea | |||
10d67716db | |||
bad8235a3a | |||
9ef98624b3 | |||
02d3787a19 | |||
ebaabfbbd5 | |||
a340dce133 | |||
e2272dd312 | |||
af4a228426 | |||
2398a3255e | |||
b1c57caaf9 | |||
b7c9575c93 | |||
8fafa7b6ac | |||
1969898647 | |||
23e3a12d5e | |||
df67d4180a | |||
7b9d755d88 | |||
1b64c0f8fe | |||
74819087de | |||
84cfc28f23 | |||
f6ff5d8934 | |||
f3c197d6fa | |||
7faca2a217 | |||
d2f26a450e | |||
d50dd47ccd | |||
8f0f97749c | |||
75c2b34c86 | |||
9cfe9418e6 | |||
98f5c005da | |||
b2127cfa9a | |||
5f514a483c | |||
e06f92785c | |||
8c182cd89e | |||
482b1366e6 | |||
f0ed927b62 | |||
07f8b61cc6 | |||
e7242cbaf2 | |||
3ea64bd80b | |||
e988dc621b | |||
505f9b4d63 | |||
54e8623d26 | |||
274f3c0951 | |||
246d5282b3 | |||
fdf34c8da8 | |||
f000101b81 | |||
6b578cd388 | |||
c1ed1b4779 | |||
2a6850bf73 | |||
8be0efaa8c | |||
9e432b593d | |||
149afef5c4 | |||
d40b23e750 | |||
2bc6a7a260 | |||
2b280c6b74 | |||
0479517325 | |||
4bca51e3e7 | |||
8fc63e523e | |||
f74fa91b8e | |||
519570def8 | |||
7b48a7c3f6 | |||
da029ca042 | |||
34dd831dc2 | |||
cc3cecdba0 | |||
2827fc7681 | |||
9f2b2cac37 | |||
3d392cc5ec | |||
bcb851a3d6 | |||
1e1dd88c4a | |||
2f82a06826 | |||
61a2d47ec6 | |||
86192301b3 | |||
5fbaf0eaf8 | |||
a0e783768f | |||
57e162da56 | |||
6d2b3cc869 | |||
0fd176fea4 | |||
24839aac59 | |||
e6b6cc06ee | |||
421f3f3e52 | |||
27002e3fd5 | |||
d843f63f2a | |||
d714ecf879 | |||
26a8bb62ee | |||
81438f1220 | |||
a1728602da | |||
469c6b0539 | |||
edc6d721e0 | |||
99ce499bfe | |||
e2e560d9c8 | |||
54d63c5752 | |||
c2dd0b9fad | |||
de0d85ba98 | |||
8f2bc1bc56 | |||
70db53661b | |||
99a5d19591 | |||
a5b627a0bf | |||
004fc2f430 | |||
c0e24443f7 | |||
8444ed951d | |||
86e1009497 | |||
10a6a3e404 | |||
c76fc75292 | |||
96ab7cbe5c | |||
6fe089c6ea | |||
2df6d3e3c7 | |||
a682ce9144 | |||
eaf141dd64 | |||
2e1b7a6f4f | |||
edd902594a | |||
470bfaa586 | |||
48db74ea03 | |||
479b8266bf | |||
a4778862c7 | |||
7b47262936 | |||
a376f3a53f | |||
f9c0a08eed | |||
9577811908 | |||
08b7c791ff | |||
404f8660e7 | |||
b3ef98450b | |||
f30c74558c | |||
93b16b6422 | |||
b1fe541de3 | |||
a43c6385f1 | |||
f5b34e3446 | |||
a4f00c3d1e | |||
cda44ffa81 | |||
04e8a6d9ef | |||
2cebcbae8c | |||
a25d3b4d8c | |||
488d393ea6 | |||
27ccc8787f | |||
cb87319eb0 | |||
4c06f1f2bb | |||
bc74ec80d0 | |||
b200b51602 | |||
0aaff5eaf9 | |||
e5d56659ec | |||
e93c721da1 | |||
a3410f7994 | |||
e6ace54840 | |||
e475d3ede3 | |||
dc854c0ee6 | |||
44d2ca660a | |||
bfe7df2211 | |||
865a10feba | |||
265c97decf | |||
59f8e8ada7 | |||
c7027a511f | |||
3c66520dd8 | |||
13b9fd3e05 | |||
cb5f374f6c | |||
7d9ab140bf | |||
4d141bee98 | |||
f1d02f6d1c | |||
11a16961a5 | |||
d2659f6689 | |||
f58e4fbc45 | |||
77b8aade58 | |||
ed60f94dba | |||
9ca8a76645 | |||
c68b82ebc8 | |||
cc3618ce36 | |||
ce469e6c71 | |||
9af18d847a | |||
f04a705cb2 | |||
c0411719fc | |||
3a81984bde | |||
ce51e3fe55 | |||
3cb2470bb3 | |||
a35162f1bc | |||
0db505bf27 | |||
264deae5da | |||
017b91f861 | |||
518b0d0600 | |||
5ba952afcc | |||
5b15a501da | |||
ec754adb14 | |||
10de2c1187 | |||
ac64724ed9 | |||
f06b70a6e9 | |||
8a888c48da | |||
8c2d0c831f | |||
ee010a2bee | |||
47c0d88739 | |||
bb703b1ff5 | |||
91e87c0395 | |||
c82e8bf988 | |||
4a3baec961 | |||
955a01562d | |||
6071389a90 | |||
8260441b45 | |||
fbd497f169 | |||
d8dab6ffa8 | |||
3365d74df9 | |||
50a8f8531b | |||
9d9e5f8d1e | |||
33b00bdbb8 | |||
7956e9718b | |||
2e19529bd1 | |||
2cfe439cc7 | |||
3c78cc6c2b | |||
5a2b2aa6af | |||
8ad69a80e3 | |||
db0b5c7ab7 | |||
aabdcaa8fa | |||
a69af69ffc | |||
380d2dfb27 | |||
1c8a823b3b | |||
48b98d2f7f | |||
62b27d27b7 | |||
b818d31a3e | |||
dcbca53e58 | |||
b1cf3ad1c2 | |||
dbab9b73b6 | |||
bb96b6635c | |||
5be20f92ca | |||
1032cf9fe4 | |||
52b6460d3a | |||
9e6a695116 | |||
74ac86d2fe | |||
277b637811 | |||
1a4473bbd7 | |||
175f248310 | |||
71113c6b9e | |||
4276fe7867 | |||
4fe8ca74af | |||
34799faccd | |||
1fe8278559 | |||
4d62eef505 | |||
0f261ee359 | |||
df8c5a3572 | |||
f8864f0505 | |||
bc352ace7c | |||
5182fdad0b | |||
7a6e0bd77e | |||
723f40d94e | |||
dae7616078 | |||
7637b7c966 | |||
537d671829 | |||
3ca272cf5a | |||
620ece2668 | |||
63ce3fbde8 | |||
9e6bb605f6 | |||
80f766e5cd | |||
eea2ee6d29 | |||
06392bd6a3 | |||
883da952be | |||
1bec8f773b | |||
e13e86724e | |||
b090a54a38 | |||
e6ce9f303f | |||
f282fa1afe | |||
0687f58441 | |||
c21471c77f | |||
9f9f06c937 | |||
7ca995c815 | |||
8797bb1d30 | |||
ce0d3e9b35 | |||
a70573b589 | |||
2f1542839f | |||
a7ba4cb383 | |||
46ef2b2898 | |||
435228508e | |||
929bffe020 | |||
c95fa4b904 | |||
8e1e3ba7b8 | |||
f72f91610f | |||
dc211c7de4 | |||
5e73b828bd | |||
4870b1b68f | |||
60c0508d96 | |||
5cbb33f939 | |||
efab8e8fdf | |||
b827a40880 | |||
3ac9a9577c | |||
99d24aefc3 | |||
852d6e8b65 | |||
defe96eb6c | |||
526460fc8b | |||
4e1c64caee | |||
569a29b81a | |||
f6ccb6a0f9 | |||
49046239f2 | |||
be99eff75a | |||
c47f680086 | |||
4f94d82c7f | |||
c6defa0847 | |||
979560c9fc | |||
d6fe812187 | |||
14ea4bf0d1 | |||
e07e63f0b3 | |||
175e553974 | |||
c91d982691 | |||
9cb4bce847 | |||
faa354e102 | |||
cb15c7615a | |||
ae44627661 | |||
314d95a5f2 | |||
557db18c85 | |||
ab40eff5dd | |||
796181d762 | |||
eac3e7ab7c | |||
9fefab5ac6 | |||
e5752f2cb4 | |||
1720757220 | |||
2a6431ba2d | |||
956e620c64 | |||
c368f26f88 | |||
e8613d99b5 | |||
6995b84d45 | |||
f1e4304d19 | |||
b883afc928 | |||
c250f6f3d5 | |||
69906afaee | |||
2d9b1fcd09 | |||
b4d0dc77be | |||
fc1c8f8b5b | |||
444cc0ee0a | |||
478886be30 | |||
3761adc889 | |||
3fa9ccf1ba | |||
e0a8665d03 | |||
ef019a2d18 | |||
3b919a6f82 | |||
9573ecefe3 | |||
e290a9d2fd | |||
ccfaf46431 | |||
e1243cef88 | |||
86881cdb39 | |||
6727133f3d | |||
b790fcaf39 | |||
a4475d529d | |||
917b203b01 | |||
2ac7b6b683 | |||
cccd457a1e | |||
ab253c2bf1 | |||
b55dc8d971 | |||
be43a0faa9 | |||
07c0f4a097 | |||
4b5d13abab | |||
10046c2b2b | |||
c64a65c977 | |||
0f5cee2f6b | |||
97b6a25329 | |||
df47bbe9c1 | |||
2dacf28b66 | |||
dd7c2d4284 | |||
1bea5fc3ad | |||
353fdefdd6 | |||
0a190c8869 | |||
fcf801f061 | |||
8355219e68 | |||
85273acca8 | |||
448a32e0ee | |||
6c8d47f2af | |||
52beb338ab | |||
46162ccdb9 | |||
e0f21a4977 | |||
88f70fcef9 | |||
7863c17b26 | |||
046672eed5 | |||
cf235e0894 | |||
d72de9fb1e | |||
34cca9f05b | |||
ca03c10cef | |||
924326e171 | |||
97d4c05566 | |||
17c6d168de | |||
53ac4de79d | |||
4e0b6c8500 | |||
08d99c4486 | |||
9c1195fe61 | |||
f9b7ce9c99 | |||
ff508c91a1 | |||
a6949abb15 | |||
dd00c2997f | |||
821b04e819 | |||
83f788d088 | |||
b8a11cffdb | |||
223a96a9a0 | |||
470e766062 | |||
21285e73da | |||
8e4bea107a | |||
9ea19cb079 | |||
af78d4cd49 | |||
3fb3a07f54 | |||
1b07eb7148 | |||
428300d318 | |||
d401dc4374 | |||
27af265a5e | |||
dd823ccd28 | |||
8d7607e346 | |||
bc1d96ca98 | |||
952df2ba8f | |||
3894ed22a8 | |||
da2da55170 | |||
8c514627a4 | |||
8d3e7e2fcb | |||
8682999767 | |||
f575e138d8 | |||
e64f75a1d8 | |||
95caa37565 | |||
283d41885d | |||
5f37c0afda | |||
56bf4850cb | |||
1b530fdae0 | |||
6cc15c1a22 | |||
3092a69546 | |||
cfb7f0a8f2 | |||
8f51c513a6 | |||
df06fba1f1 | |||
5e8e199f8d | |||
a022fd2d6b | |||
96d826f635 | |||
da73d709a8 | |||
c774cb8913 | |||
25f4b3efe3 | |||
01227f3ba7 | |||
1e8064dec0 | |||
b357470421 | |||
ed02619ba0 | |||
a839a67aad | |||
7dbb38e856 | |||
d120b9af5a | |||
8cb0848bdc | |||
202893fe1a | |||
7921e16ca2 | |||
bf99ffc4d2 | |||
14ff866505 | |||
f3e1fe5ca5 | |||
186219a643 | |||
68f4a4b3ba | |||
6ec2f09188 | |||
7837ec553c | |||
6190408e24 | |||
d736f4f0a7 | |||
31232061aa | |||
373b5080da | |||
d783249674 | |||
ca5dc9f13a | |||
aa6f47e229 | |||
f47d12b0ef | |||
40ff69b796 | |||
8a35aafca6 | |||
0fa69c0276 | |||
a85174b46a | |||
87d3d209a6 | |||
99bc541b5b | |||
89bf98ac4c | |||
a223c5ed2c | |||
f9d1b63d18 | |||
f380f0ba27 | |||
79709f02e9 | |||
92890d4314 | |||
324a510f9c | |||
6058886b03 | |||
68843c683d | |||
ee563c5899 | |||
9473e57eca | |||
ed317b6203 | |||
805f4d5cb8 | |||
57ddc08a57 | |||
dec9bc5f0b | |||
63cd051867 | |||
2c566a17c7 | |||
9c617140f7 | |||
3fe35300ed | |||
545f22c070 | |||
5b971445a6 | |||
2b63b7a0a5 | |||
e240e89984 | |||
963b012bd8 | |||
12be60cc04 | |||
eb6a1245a2 | |||
b1a6fa90e1 | |||
710191e292 | |||
f1e7d384b6 | |||
f4944f0f8a | |||
cffeb03a2d | |||
ed5eb7196b | |||
08aab4dfdd | |||
cd88c5ccf4 | |||
84ce3ab47e | |||
5ccdd7a626 | |||
21ff6de4b3 | |||
ab1a25aa9b | |||
7d5f7ed270 | |||
348867c10b | |||
dd7501e3a8 | |||
6cbf1992bd | |||
25db86cca5 | |||
9a76e84a08 | |||
459cff93fe | |||
e027f7a913 | |||
9d79030d38 | |||
409ee5bcd9 | |||
1a6071d436 | |||
7edfe11ba4 | |||
7b7bf09e3c | |||
90737f7f5d | |||
0521c47c91 | |||
8c873def88 | |||
70c527dacd | |||
034c969f3c | |||
d34578026c | |||
c8ac878b98 | |||
84edd4a48b | |||
1bf642800d | |||
e497aa1e35 | |||
ba25e13782 | |||
5416260b1e | |||
3709734b1c | |||
3bfa7258b3 | |||
b1892226aa | |||
0054df19b1 | |||
81975a497f | |||
dc07102b17 | |||
50c0aedbec | |||
6476e4598c | |||
d0df1e8ec9 | |||
30aaa07594 | |||
ac994f2c78 | |||
c414eb2618 | |||
4d698cae2e | |||
f53d5e0a75 | |||
e15501fb68 | |||
00f0dca4b5 | |||
7035975508 | |||
a9981c8477 | |||
7d24985852 | |||
5b8a640d0b | |||
0916f4a337 | |||
04d4ec285c | |||
eb02a1d8a7 | |||
31d8e5e71a | |||
f2b62e113c | |||
058c1284be | |||
a1dd608260 | |||
d80a3eb549 | |||
01a333fd7f | |||
083e037dea | |||
23c4dbd6d7 | |||
7a52117792 | |||
52cbf4b774 | |||
e22a776890 | |||
0b96e5d792 | |||
ade97afc74 | |||
ab7520eb50 | |||
03429e4eaf | |||
ef18f74e20 | |||
bb35d085ef | |||
86aa6a61e0 | |||
71d142604f | |||
4c21b2f2d3 | |||
c6f0fe5f26 | |||
6f339cac6b | |||
bbe6ef3864 | |||
e8d8ccb34a | |||
a74cc03aa7 | |||
713e706618 | |||
aef8cadb9a | |||
189c1e1afb | |||
a6c7cf8741 | |||
0740a5d521 | |||
d5eae90537 | |||
d17b0bc679 | |||
a1bbe80e21 | |||
151b28521a | |||
7326739188 | |||
07d67aa17a | |||
1014c8a7db | |||
6dd71947ea | |||
eaf33f22c8 | |||
02695c11db | |||
0c6ab0e8f4 | |||
a98958d3bd | |||
e986f307c3 | |||
a91f3338a0 | |||
1f94ce1f97 | |||
635cbff300 | |||
6bc8d303eb | |||
63a220f54d | |||
53f4dbc9ac | |||
17ab3bd502 | |||
7a1b668283 | |||
134b5d62e8 | |||
49256ddb4a | |||
3f52a0aad7 | |||
239b2ac718 | |||
15bdb9fe61 | |||
7da4643232 | |||
c1d0784dcb | |||
97eec33f80 | |||
5317429e82 | |||
6069f6f454 | |||
12686ec656 | |||
dfad8b60ba | |||
038d5ca943 | |||
63e09707a2 | |||
b57fdf1db5 | |||
48bc57fa8d | |||
0cf3c1ce66 | |||
2279299c6c | |||
1be8b7cc56 | |||
0df4d66210 | |||
fa99ed9b30 | |||
01cb90adf1 | |||
eb5fdc5fb5 | |||
97bee5cd80 | |||
957142a4fe | |||
93a4b76114 | |||
8ac8b823c2 | |||
d9eff40546 | |||
5973312abc | |||
a1487bf874 | |||
0ee2e7c398 | |||
0f9807ee61 | |||
7b0f5d6631 | |||
033e00cd3f | |||
666bebc7d2 | |||
eef083e477 | |||
a4120fa132 | |||
8482ea8774 | |||
cee19eb31c | |||
7acb145893 | |||
229397b439 | |||
1c7832c854 | |||
234e6b3797 | |||
1f7cbea984 | |||
170d84228e | |||
2b033332c8 | |||
8734b174ca | |||
b89a3b50fb | |||
c32839fc90 | |||
89010d60f9 | |||
25bd7fe488 | |||
da3dd9af12 | |||
8399778049 | |||
543048d275 | |||
7724807551 | |||
0d50c117db | |||
a442853f4f | |||
b51901f7d3 | |||
45db8274de | |||
c2a57d082d | |||
033e95765c | |||
727609f435 | |||
e25b8869f7 | |||
3829f86c7a | |||
283f21d518 | |||
16b8075acd | |||
f54ab540af | |||
caf8b0777a | |||
f989d4b18e | |||
58b247fc42 | |||
64f707cd26 | |||
dcd9d73d47 | |||
8468b7d3f0 | |||
2b22c60980 | |||
b572e27502 | |||
c96afa3322 | |||
ea79f7c032 | |||
a3fb004b18 | |||
1c69d368e1 | |||
f564163951 | |||
11c31aef04 | |||
1a0d82e4f4 | |||
c959be9d1d | |||
8414094562 | |||
d400502b1d | |||
cdead5ace1 | |||
5a0d2c7138 | |||
1ee6fc4002 | |||
dd4b9b06a4 | |||
c5d7494ca1 | |||
c3987a0fc3 | |||
d0e1dca0f5 | |||
5bac46508a | |||
d4b4c1fbec | |||
a55b9f77a0 | |||
d181e0f1fc | |||
cf2b88fa30 | |||
7103d0d938 | |||
e7653c7561 | |||
f1f521f71b | |||
00aedfc0e2 | |||
83b4dc6822 | |||
28e1571843 | |||
def655ec27 | |||
8689d8af36 | |||
0e44db8b0d | |||
db8d01b248 | |||
6f664d3917 | |||
ac9bb8ecef | |||
0e966fc9f9 | |||
695465915a | |||
14b48a2404 | |||
92b0e7026e | |||
058a31839d | |||
3f04ca9a91 | |||
e1fe617600 | |||
99de4565dd | |||
b937cbb776 | |||
57fcc57f31 | |||
54d9823d00 | |||
f9fb37ca79 | |||
bd09ab6687 | |||
c7e8044fc8 | |||
f8086845aa | |||
e2d2b270db | |||
705d80b51e | |||
9ebac3d7fe | |||
0ebbfc25f3 | |||
dd2c487ab0 | |||
7788ec9dd1 | |||
1e7050072b | |||
b3cdaee6db | |||
5cb2b2358c | |||
f494f004b7 | |||
d4c58216d7 | |||
d9ba2b6894 | |||
bd8980e8c0 | |||
6544cd4590 | |||
8e5ac43b4e | |||
16e21e14e3 | |||
f0b73ff790 | |||
895994a7c3 | |||
a98489747d | |||
39bd73ae51 | |||
c064f8a89d | |||
ae7a7fb398 | |||
6b79e16d6d | |||
83de6f0dac | |||
bcb62cb525 | |||
1962646d0f | |||
38f3d1fc40 | |||
c9f7d7b506 | |||
8c64655460 | |||
15367ba9bc | |||
07bb79bd8b | |||
faab6ea922 | |||
74dc4460eb | |||
bcc2a0599b | |||
c9f9df002d | |||
557015fd93 | |||
6b9afc894b | |||
fe10f3d0c6 | |||
8aa23907e8 | |||
b548f8320d | |||
2217c0b408 | |||
3db9738b30 | |||
01d835c9b2 | |||
d1ac1eba3b | |||
c029c839a1 | |||
a839ec805a | |||
b911ca9b0d | |||
080266e79c | |||
1fb8925efe | |||
c0ed48a57e | |||
06360c3050 | |||
a76216b8ed | |||
035d04299c | |||
04b0774964 | |||
7c678746ef | |||
29e5ba8a7b | |||
1d3f650ce4 | |||
ff608a9ff3 | |||
696498d9e4 | |||
2cbcaf4544 | |||
8af06d8114 | |||
ecace9eb21 | |||
26df16eb21 | |||
23f86ad57f | |||
35becd1879 | |||
8fa7de35f2 | |||
15d28e400f | |||
1b59cf8b51 | |||
06f535d8a0 | |||
eba1cf2145 | |||
3010dc4208 | |||
ecb3835387 | |||
7d7d336c45 | |||
e43ffb0148 | |||
006171fffc | |||
fed91f873f | |||
f3c32a4b54 | |||
9768b4d4ff | |||
c3817e85fa | |||
572132fb17 | |||
93ecf4d72a | |||
5ffc915f26 | |||
40aa212cd6 | |||
a2ebbccc9f | |||
878e7740fd | |||
22ce6060ec | |||
ebc2643498 | |||
0a5dfa5a52 | |||
08e5ca1262 | |||
60061a20d9 | |||
7b2c0a09e4 | |||
0e779c27e1 | |||
ab9a5976a0 | |||
8009b6cdb5 | |||
e7e10e60e0 | |||
65bf181ddf | |||
0aff3cc559 | |||
b0248df72a | |||
5be0baefa2 | |||
d291cf7de6 | |||
04c0971679 | |||
f5a0c337ba | |||
bbae57d06e | |||
3eb5940cf5 | |||
149403f849 | |||
7f35e92af2 | |||
a5818047c4 | |||
a86a61b004 | |||
2021b26bcb | |||
976a9e0454 | |||
b0e48aa197 | |||
8c533c2c90 | |||
d02478e607 | |||
dd73d57643 | |||
00c6fb16e7 | |||
6a2dbc9808 | |||
c5fc2f1105 | |||
e8cb6cb9d2 | |||
f6abd16a9d | |||
1619264ca5 | |||
c35f85a6d4 | |||
80e3081c28 | |||
6e7e63fda3 | |||
13cf39294d | |||
a72603f8f8 | |||
9c49bb9ddf | |||
383d340e88 | |||
5da8a8c785 | |||
325101263a | |||
0f81039eaf | |||
db5f8d42bb | |||
478803a75f | |||
75b1ae1acd | |||
1b45f68397 | |||
6ff568df4d | |||
d9c27f4d8d | |||
c2f8f5076c | |||
78fe149ab9 | |||
18f9c07b18 | |||
b535aecd7c | |||
02d7c88fa4 | |||
c8a0b11b7f | |||
21ed7e51b6 | |||
65cbb8226b | |||
b7ebc00979 | |||
8ff435c8f6 | |||
807de9a1e3 | |||
db2f7de5c3 | |||
28dba2f928 | |||
90bcf41291 | |||
658386a63f | |||
b7b9e3c7e8 | |||
1e28294487 | |||
e53e8df20b | |||
aa1adde80b | |||
ceadde2a7f | |||
b263078bc3 | |||
a106388187 | |||
8f0db9bbbb | |||
94c513cc7f | |||
364ae10bb8 | |||
7122f8b3bb | |||
d7e11e3aae | |||
3deb4791c3 | |||
fcb3ccf23f | |||
0947712e5d | |||
d4ce41c4de | |||
71b99f28be | |||
2cdf98a74d | |||
3417a1e7e4 | |||
17a65bf9b6 | |||
dfa03e94eb | |||
86e025fca2 | |||
5d4624a1d9 | |||
9068a46dba | |||
a830964007 | |||
3ae6ee4ebd | |||
b7c302da1a | |||
70e4b3ef59 | |||
e05d689c49 | |||
1c09bfde1b | |||
51414822f5 | |||
ffbac7d0bb | |||
a6f1ae7f20 | |||
1a1d79e761 | |||
a9e6a673ae | |||
1178851280 | |||
76ab26cc3e | |||
a6630e25af | |||
5d0f1c3c8f | |||
7517e53468 | |||
f15474ade8 | |||
1c282ab99a | |||
825181ea9d | |||
d0db23e95a | |||
de11fe0c83 | |||
89d56ae435 | |||
b5f60af94c | |||
7efbf3a827 | |||
1cf5b0c7c1 | |||
1ad7e0c5ec | |||
4e65fbfee5 | |||
00fe2c5606 | |||
775358e4c2 | |||
23f5b2abbe | |||
c2a2110d71 | |||
c6a14b1edd | |||
817e83fc01 | |||
6834dcab1c | |||
784d345828 | |||
e655f16c35 | |||
4fb7e72fe5 | |||
48c8adfe1b | |||
11bd2f2509 | |||
a7afd133f5 | |||
58d28a5f12 | |||
0d9be2135f | |||
b2b05b7c20 | |||
17cd426c72 | |||
d712a71741 | |||
30521a37ad | |||
a9459bf7b5 | |||
9c44c60794 | |||
9f0d9db6e4 | |||
c7751f4df0 | |||
b91b15d86e | |||
6100c0ea14 | |||
068eac255b | |||
d8f6be686d | |||
24ec813967 | |||
9cd0ae5e2d | |||
87701289a3 | |||
0927386890 | |||
1c77f9e543 | |||
8f4601fbac | |||
23dd5b4a53 | |||
83740eae4a | |||
c64331f48f | |||
aa8cd7319a | |||
c22dcc266f | |||
1091c5e59f | |||
6831d64591 | |||
ae1a972d78 | |||
6302e4001a | |||
f4d25039cb | |||
b06e35b568 | |||
cedd12d86a | |||
24e958a0a7 | |||
cf5a21e4a1 | |||
c30790797f | |||
ce55767091 | |||
3b1a5a1b8a | |||
52472508e9 | |||
fa32317780 | |||
8c3a94eaf2 | |||
b3a2665e0f | |||
32494c226e | |||
8601b33c07 | |||
b46f1b8ca7 | |||
77af40c025 | |||
53b5f14f59 | |||
a26ad5a332 | |||
8aedc27a63 | |||
e80d1d2876 | |||
2c358eaf51 | |||
1f34be47d9 | |||
a79f5d77ad | |||
1c8686001f | |||
3da8d71d7d | |||
53cf628503 | |||
e585f2fb48 | |||
8ad846fda5 | |||
d4e1fa45d0 | |||
7d25fa3c72 | |||
1d399a80a0 | |||
9eb72889b4 | |||
47956ddf7e | |||
540ef9b1fc | |||
2732c8bae1 | |||
98aebed88e | |||
6073f3073e | |||
c8fbeb3aa2 | |||
e00fb69b25 | |||
4ee0a78ee6 | |||
e2bc95e1bd | |||
91b6458e2d | |||
e734c94fa2 | |||
407a9fee0c | |||
63c811b3a6 | |||
bd43d64dd5 | |||
a02685e109 | |||
3cbec5453b | |||
a7cbcb1bb9 | |||
63e384a381 | |||
7f0dd2487d | |||
e8ecbcdf01 | |||
d4dde0bcaf | |||
24a8c13f36 | |||
7d0657f13c | |||
3819d25418 | |||
ca5def1b8f | |||
3ce17bf8f6 | |||
7df6650e9c | |||
7671f4ab1c | |||
29610621ec | |||
336323f53c |
1115
.circleci/config.yml
1115
.circleci/config.yml
File diff suppressed because it is too large
Load Diff
60
.clang-tidy
60
.clang-tidy
@ -1,51 +1,31 @@
|
||||
---
|
||||
# NOTE: there must be no spaces before the '-', so put the comma first.
|
||||
# NOTE there must be no spaces before the '-', so put the comma first.
|
||||
Checks: '
|
||||
*
|
||||
,clang-analyzer-*
|
||||
,modernize-*
|
||||
,-cert-dcl21-cpp
|
||||
,-cert-err58-cpp
|
||||
,-cert-err60-cpp
|
||||
,-clang-diagnostic-*
|
||||
,-cppcoreguidelines-owning-memory
|
||||
-*
|
||||
,bugprone-*
|
||||
,-bugprone-macro-parentheses
|
||||
,-bugprone-forward-declaration-namespace
|
||||
,cppcoreguidelines-*
|
||||
,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
|
||||
,-cppcoreguidelines-pro-bounds-constant-array-index
|
||||
,-cppcoreguidelines-pro-type-member-init
|
||||
,-cppcoreguidelines-pro-type-static-cast-downcast
|
||||
,-cppcoreguidelines-pro-type-union-access
|
||||
,-cppcoreguidelines-pro-bounds-pointer-arithmetic
|
||||
,-cppcoreguidelines-pro-bounds-constant-array-index
|
||||
,-cppcoreguidelines-pro-type-cstyle-cast
|
||||
,-cppcoreguidelines-pro-type-reinterpret-cast
|
||||
,-cppcoreguidelines-pro-type-vararg
|
||||
,-cppcoreguidelines-special-member-functions
|
||||
,-fuchsia-*
|
||||
,-google-build-using-namespace
|
||||
,-google-default-arguments
|
||||
,-google-explicit-constructor
|
||||
,-google-readability-braces-around-statements
|
||||
,-google-readability-namespace-comments
|
||||
,-google-readability-todo
|
||||
,-google-runtime-references
|
||||
,-google-runtime-references
|
||||
,-hicpp-braces-around-statements
|
||||
,-hicpp-explicit-conversions
|
||||
,-hicpp-member-init
|
||||
,-hicpp-no-array-decay
|
||||
,-hicpp-signed-bitwise
|
||||
,-hicpp-special-member-functions
|
||||
,-hicpp-vararg
|
||||
,-llvm-header-guard
|
||||
,-llvm-include-order
|
||||
,-llvm-namespace-comment
|
||||
,-misc-unused-parameters
|
||||
,-modernize-make-unique
|
||||
,-cppcoreguidelines-interfaces-global-init
|
||||
,-cppcoreguidelines-owning-memory
|
||||
,hicpp-signed-bitwise
|
||||
,hicpp-exception-baseclass
|
||||
,hicpp-avoid-goto
|
||||
,modernize-*
|
||||
,-modernize-use-default-member-init
|
||||
,-performance-unnecessary-value-param
|
||||
,-readability-braces-around-statements
|
||||
,-readability-else-after-return
|
||||
,-readability-implicit-bool-conversion
|
||||
,-readability-named-parameter
|
||||
,-modernize-return-braced-init-list
|
||||
,-modernize-use-auto
|
||||
'
|
||||
WarningsAsErrors: ''
|
||||
HeaderFilterRegex: 'torch/csrc/'
|
||||
WarningsAsErrors: '*'
|
||||
HeaderFilterRegex: 'torch/csrc/.*'
|
||||
AnalyzeTemporaryDtors: false
|
||||
CheckOptions:
|
||||
...
|
||||
|
49
.github/ISSUE_TEMPLATE/bug-report.md
vendored
Normal file
49
.github/ISSUE_TEMPLATE/bug-report.md
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
name: "\U0001F41B Bug Report"
|
||||
about: Submit a bug report to help us improve PyTorch
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug
|
||||
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
## To Reproduce
|
||||
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1.
|
||||
1.
|
||||
1.
|
||||
|
||||
<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
|
||||
|
||||
## Expected behavior
|
||||
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
## Environment
|
||||
|
||||
Please copy and paste the output from our
|
||||
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py)
|
||||
(or fill out the checklist below manually).
|
||||
|
||||
You can get the script and run it with:
|
||||
```
|
||||
wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
|
||||
# For security purposes, please check the contents of collect_env.py before running it.
|
||||
python collect_env.py
|
||||
```
|
||||
|
||||
- PyTorch Version (e.g., 1.0):
|
||||
- OS (e.g., Linux):
|
||||
- How you installed PyTorch (`conda`, `pip`, source):
|
||||
- Build command you used (if compiling from source):
|
||||
- Python version:
|
||||
- CUDA/cuDNN version:
|
||||
- GPU models and configuration:
|
||||
- Any other relevant information:
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
9
.github/ISSUE_TEMPLATE/documentation.md
vendored
Normal file
9
.github/ISSUE_TEMPLATE/documentation.md
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
name: "\U0001F4DA Documentation"
|
||||
about: Report an issue related to https://pytorch.org/docs
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
<!-- A clear and concise description of what content in https://pytorch.org/docs is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new -->
|
24
.github/ISSUE_TEMPLATE/feature-request.md
vendored
Normal file
24
.github/ISSUE_TEMPLATE/feature-request.md
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
name: "\U0001F680Feature Request"
|
||||
about: Submit a proposal/request for a new PyTorch feature
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Feature
|
||||
<!-- A clear and concise description of the feature proposal -->
|
||||
|
||||
## Motivation
|
||||
|
||||
<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
|
||||
|
||||
## Pitch
|
||||
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
## Alternatives
|
||||
|
||||
<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
|
||||
|
||||
## Additional context
|
||||
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
13
.github/ISSUE_TEMPLATE/questions-help-support.md
vendored
Normal file
13
.github/ISSUE_TEMPLATE/questions-help-support.md
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
name: "❓Questions/Help/Support"
|
||||
about: Do you need support? We have resources.
|
||||
|
||||
---
|
||||
|
||||
## ❓ Questions and Help
|
||||
|
||||
### Please note that this issue tracker is not a help form and this issue will be closed.
|
||||
|
||||
We have a set of [listed resources available on the website](https://pytorch.org/resources). Our primary means of support is our discussion forum:
|
||||
|
||||
- [Discussion Forum](https://discuss.pytorch.org/)
|
22
.gitignore
vendored
22
.gitignore
vendored
@ -25,9 +25,8 @@ aten/src/ATen/cuda/CUDAConfig.h
|
||||
build/
|
||||
dist/
|
||||
docs/src/**/*
|
||||
docs/cpp/xml/
|
||||
docs/cpp/html/
|
||||
docs/cpp/api/
|
||||
docs/cpp/build
|
||||
docs/cpp/source/api
|
||||
test/.coverage
|
||||
test/cpp/api/mnist
|
||||
test/custom_operator/model.pt
|
||||
@ -45,7 +44,7 @@ torch/csrc/cudnn/cuDNN.cpp
|
||||
torch/csrc/generated
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
torch/csrc/jit/generated/*
|
||||
torch/csrc/jit/fusers/Config.h
|
||||
torch/csrc/jit/fuser/config.h
|
||||
torch/csrc/nn/THCUNN.cpp
|
||||
torch/csrc/nn/THCUNN.cwrap
|
||||
torch/csrc/nn/THNN_generic.cpp
|
||||
@ -200,6 +199,14 @@ caffe2.egg-info
|
||||
# Atom/Watchman required file
|
||||
.watchmanconfig
|
||||
|
||||
# Files generated by CLion
|
||||
cmake-build-debug
|
||||
|
||||
# Files generated by ctags
|
||||
CTAGS
|
||||
tags
|
||||
TAGS
|
||||
|
||||
# BEGIN NOT-CLEAN-FILES (setup.py handles this marker. Do not change.)
|
||||
#
|
||||
# Below files are not deleted by "setup.py clean".
|
||||
@ -207,3 +214,10 @@ caffe2.egg-info
|
||||
# Visual Studio Code files
|
||||
.vscode
|
||||
.vs
|
||||
|
||||
# YouCompleteMe config file
|
||||
.ycm_extra_conf.py
|
||||
|
||||
# Files generated when a patch is rejected
|
||||
*.orig
|
||||
*.rej
|
||||
|
24
.gitmodules
vendored
24
.gitmodules
vendored
@ -1,6 +1,3 @@
|
||||
[submodule "third_party/catch"]
|
||||
path = third_party/catch
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
[submodule "third_party/pybind11"]
|
||||
path = third_party/pybind11
|
||||
url = https://github.com/pybind/pybind11.git
|
||||
@ -13,9 +10,6 @@
|
||||
[submodule "third_party/googletest"]
|
||||
path = third_party/googletest
|
||||
url = https://github.com/google/googletest.git
|
||||
[submodule "third_party/nervanagpu"]
|
||||
path = third_party/nervanagpu
|
||||
url = https://github.com/NervanaSystems/nervanagpu.git
|
||||
[submodule "third_party/benchmark"]
|
||||
path = third_party/benchmark
|
||||
url = https://github.com/google/benchmark.git
|
||||
@ -64,9 +58,6 @@
|
||||
[submodule "third_party/onnx"]
|
||||
path = third_party/onnx
|
||||
url = https://github.com/onnx/onnx.git
|
||||
[submodule "third_party/cereal"]
|
||||
path = third_party/cereal
|
||||
url = https://github.com/USCiLab/cereal
|
||||
[submodule "third_party/onnx-tensorrt"]
|
||||
path = third_party/onnx-tensorrt
|
||||
url = https://github.com/onnx/onnx-tensorrt
|
||||
@ -76,3 +67,18 @@
|
||||
[submodule "third_party/ideep"]
|
||||
path = third_party/ideep
|
||||
url = https://github.com/intel/ideep
|
||||
[submodule "third_party/nccl/nccl"]
|
||||
path = third_party/nccl/nccl
|
||||
url = https://github.com/NVIDIA/nccl
|
||||
[submodule "third_party/gemmlowp/gemmlowp"]
|
||||
path = third_party/gemmlowp/gemmlowp
|
||||
url = https://github.com/google/gemmlowp.git
|
||||
[submodule "third_party/QNNPACK"]
|
||||
path = third_party/QNNPACK
|
||||
url = https://github.com/pytorch/QNNPACK
|
||||
[submodule "third_party/neon2sse"]
|
||||
path = third_party/neon2sse
|
||||
url = https://github.com/intel/ARM_NEON_2_x86_SSE.git
|
||||
[submodule "third_party/fbgemm"]
|
||||
path = third_party/fbgemm
|
||||
url = https://github.com/pytorch/fbgemm
|
||||
|
@ -4,7 +4,6 @@ set -ex
|
||||
|
||||
pip install --user --no-cache-dir hypothesis==3.59.0
|
||||
|
||||
|
||||
# The INSTALL_PREFIX here must match up with test.sh
|
||||
INSTALL_PREFIX="/usr/local/caffe2"
|
||||
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
@ -26,22 +25,29 @@ if [ "$(which gcc)" != "/root/sccache/gcc" ]; then
|
||||
fi
|
||||
|
||||
# Setup wrapper scripts
|
||||
for compiler in cc c++ gcc g++ x86_64-linux-gnu-gcc; do
|
||||
wrapped="cc c++ gcc g++ x86_64-linux-gnu-gcc"
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then
|
||||
wrapped="$wrapped nvcc"
|
||||
fi
|
||||
for compiler in $wrapped; do
|
||||
(
|
||||
echo "#!/bin/sh"
|
||||
|
||||
# TODO: if/when sccache gains native support for an
|
||||
# SCCACHE_DISABLE flag analogous to ccache's CCACHE_DISABLE,
|
||||
# this can be removed. Alternatively, this can be removed when
|
||||
# https://github.com/pytorch/pytorch/issues/13362 is fixed.
|
||||
#
|
||||
# NOTE: carefully quoted - we want `which compiler` to be
|
||||
# resolved as we execute the script, but SCCACHE_DISABLE and
|
||||
# $@ to be evaluated when we execute the script
|
||||
echo 'test $SCCACHE_DISABLE && exec '"$(which $compiler)"' "$@"'
|
||||
|
||||
echo "exec $SCCACHE $(which $compiler) \"\$@\""
|
||||
) > "./sccache/$compiler"
|
||||
chmod +x "./sccache/$compiler"
|
||||
done
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then
|
||||
(
|
||||
echo "#!/bin/sh"
|
||||
echo "exec $SCCACHE $(which nvcc) \"\$@\""
|
||||
) > "./sccache/nvcc"
|
||||
chmod +x "./sccache/nvcc"
|
||||
fi
|
||||
|
||||
export CACHE_WRAPPER_DIR="$PWD/sccache"
|
||||
|
||||
# CMake must find these wrapper scripts
|
||||
@ -93,7 +99,7 @@ fi
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Use special scripts for Android, conda, and setup builds
|
||||
# Use special scripts for Android and setup builds
|
||||
###############################################################################
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
export ANDROID_NDK=/opt/ndk
|
||||
@ -103,19 +109,6 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
CMAKE_ARGS+=("-DUSE_ZSTD=ON")
|
||||
"${ROOT_DIR}/scripts/build_android.sh" ${CMAKE_ARGS[*]} "$@"
|
||||
exit 0
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
|
||||
"${ROOT_DIR}/scripts/build_anaconda.sh" --skip-tests --install-locally "$@"
|
||||
report_compile_cache_stats
|
||||
|
||||
# This build will be tested against onnx tests, which needs onnx installed.
|
||||
# At this point the visible protbuf installation will be in conda, since one
|
||||
# of Caffe2's dependencies uses conda, so the correct protobuf include
|
||||
# headers are those in conda as well
|
||||
# This path comes from install_anaconda.sh which installs Anaconda into the
|
||||
# docker image
|
||||
PROTOBUF_INCDIR=/opt/conda/include pip install -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
|
||||
report_compile_cache_stats
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
@ -149,26 +142,19 @@ if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
|
||||
export PATH="/usr/local/cuda/bin:$PATH"
|
||||
fi
|
||||
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
|
||||
# TODO: This is patching the official FindHip to properly handly
|
||||
# cmake generator expression. A PR is opened in the upstream repo here:
|
||||
# https://github.com/ROCm-Developer-Tools/HIP/pull/516
|
||||
# remove this hack once it's merged.
|
||||
if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then
|
||||
sudo sed -i 's/\ -I${dir}/\ $<$<BOOL:${dir}>:-I${dir}>/' /opt/rocm/hip/cmake/FindHIP.cmake
|
||||
fi
|
||||
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
export HCC_AMDGPU_TARGET=gfx900
|
||||
|
||||
# The link time of libcaffe2_hip.so takes 40 minutes, according to
|
||||
# https://github.com/RadeonOpenCompute/hcc#thinlto-phase-1---implemented
|
||||
# using using ThinLTO could significantly improve link-time performance.
|
||||
export KMTHINLTO=1
|
||||
# This is needed to enable ImageInput operator in resnet50_trainer
|
||||
CMAKE_ARGS+=("-USE_OPENCV=ON")
|
||||
# This is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip
|
||||
CMAKE_ARGS+=("-USE_LMDB=ON")
|
||||
|
||||
########## HIPIFY Caffe2 operators
|
||||
${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py"
|
||||
${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py"
|
||||
${PYTHON} "${ROOT_DIR}/tools/amd_build/build_amd.py"
|
||||
fi
|
||||
|
||||
# building bundled nccl in this config triggers a bug in nvlink. For
|
||||
# more, see https://github.com/pytorch/pytorch/issues/14486
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-cuda8*-cudnn7* ]]; then
|
||||
CMAKE_ARGS+=("-DUSE_SYSTEM_NCCL=ON")
|
||||
fi
|
||||
|
||||
# Try to include Redis support for Linux builds
|
||||
@ -236,7 +222,6 @@ else
|
||||
report_compile_cache_stats
|
||||
fi
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Install ONNX
|
||||
###############################################################################
|
||||
|
@ -15,14 +15,6 @@ fi
|
||||
# The prefix must mirror the setting from build.sh
|
||||
INSTALL_PREFIX="/usr/local/caffe2"
|
||||
|
||||
# Anaconda builds have a special install prefix and python
|
||||
if [[ "$BUILD_ENVIRONMENT" == conda* ]]; then
|
||||
# This path comes from install_anaconda.sh which installs Anaconda into the
|
||||
# docker image
|
||||
PYTHON="/opt/conda/bin/python"
|
||||
INSTALL_PREFIX="/opt/conda/"
|
||||
fi
|
||||
|
||||
# Add the site-packages in the caffe2 install prefix to the PYTHONPATH
|
||||
SITE_DIR=$($PYTHON -c "from distutils import sysconfig; print(sysconfig.get_python_lib(prefix=''))")
|
||||
INSTALL_SITE_DIR="${INSTALL_PREFIX}/${SITE_DIR}"
|
||||
@ -34,11 +26,9 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
fi
|
||||
|
||||
# Set PYTHONPATH and LD_LIBRARY_PATH so that python can find the installed
|
||||
# Caffe2. This shouldn't be done on Anaconda, as Anaconda should handle this.
|
||||
if [[ "$BUILD_ENVIRONMENT" != conda* ]]; then
|
||||
export PYTHONPATH="${PYTHONPATH}:$INSTALL_SITE_DIR"
|
||||
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${INSTALL_PREFIX}/lib"
|
||||
fi
|
||||
# Caffe2.
|
||||
export PYTHONPATH="${PYTHONPATH}:$INSTALL_SITE_DIR"
|
||||
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${INSTALL_PREFIX}/lib"
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
@ -97,18 +87,8 @@ if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]]; then
|
||||
EXTRA_TESTS+=("$CAFFE2_PYPATH/contrib/nccl")
|
||||
fi
|
||||
|
||||
conda_ignore_test=()
|
||||
if [[ $BUILD_ENVIRONMENT == conda* ]]; then
|
||||
# These tests both assume Caffe2 was built with leveldb, which is not the case
|
||||
conda_ignore_test+=("--ignore $CAFFE2_PYPATH/python/dataio_test.py")
|
||||
conda_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/checkpoint_test.py")
|
||||
fi
|
||||
|
||||
rocm_ignore_test=()
|
||||
if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
|
||||
# Currently these tests are failing on ROCM platform:
|
||||
|
||||
# Unknown reasons, need to debug
|
||||
@ -116,31 +96,23 @@ if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
|
||||
rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/piecewise_linear_transform_test.py")
|
||||
rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/softmax_ops_test.py")
|
||||
rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/unique_ops_test.py")
|
||||
|
||||
# Need to go through roi ops to replace max(...) with fmaxf(...)
|
||||
rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/roi_align_rotated_op_test.py")
|
||||
|
||||
# Our cuda top_k op has some asm code, the hipified version doesn't
|
||||
# compile yet, so we don't have top_k operator for now
|
||||
rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/top_k_test.py")
|
||||
|
||||
# Our AMD CI boxes have 4 gpus on each
|
||||
# Remove this once we have added multi-gpu support
|
||||
export HIP_VISIBLE_DEVICES=$(($BUILD_NUMBER % 4))
|
||||
fi
|
||||
|
||||
# Python tests
|
||||
# NB: Warnings are disabled because they make it harder to see what
|
||||
# the actual erroring test is
|
||||
echo "Running Python tests.."
|
||||
pip install --user pytest-sugar
|
||||
"$PYTHON" \
|
||||
-m pytest \
|
||||
-x \
|
||||
-v \
|
||||
--disable-warnings \
|
||||
--junit-xml="$TEST_DIR/python/result.xml" \
|
||||
--ignore "$CAFFE2_PYPATH/python/test/executor_test.py" \
|
||||
--ignore "$CAFFE2_PYPATH/python/operator_test/matmul_op_test.py" \
|
||||
--ignore "$CAFFE2_PYPATH/python/operator_test/pack_ops_test.py" \
|
||||
--ignore "$CAFFE2_PYPATH/python/mkl/mkl_sbn_speed_test.py" \
|
||||
${conda_ignore_test[@]} \
|
||||
${rocm_ignore_test[@]} \
|
||||
"$CAFFE2_PYPATH/python" \
|
||||
"${EXTRA_TESTS[@]}"
|
||||
|
@ -14,8 +14,18 @@ clang --version
|
||||
# symbolize=1: Gives us much better errors when things go wrong
|
||||
export ASAN_OPTIONS=detect_leaks=0:symbolize=1
|
||||
|
||||
# FIXME: Remove the hardcoded "-pthread" option.
|
||||
# With asan build, the cmake thread CMAKE_HAVE_LIBC_CREATE[1] checking will
|
||||
# succeed because "pthread_create" is in libasan.so. However, libasan doesn't
|
||||
# have the full pthread implementation. Other advanced pthread functions doesn't
|
||||
# exist in libasan.so[2]. If we need some pthread advanced functions, we still
|
||||
# need to link the pthread library.
|
||||
# [1] https://github.com/Kitware/CMake/blob/8cabaaf054a16ea9c8332ce8e9291bd026b38c62/Modules/FindThreads.cmake#L135
|
||||
# [2] https://wiki.gentoo.org/wiki/AddressSanitizer/Problems
|
||||
#
|
||||
# TODO: Make the ASAN flags a more unified env var
|
||||
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
|
||||
CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -shared-libasan" \
|
||||
NO_CUDA=1 \
|
||||
CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -shared-libasan -pthread" \
|
||||
CXX_FLAGS="-pthread" \
|
||||
NO_CUDA=1 USE_MKLDNN=0 \
|
||||
python setup.py install
|
||||
|
@ -1,5 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-build"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
# For distributed, four environmental configs:
|
||||
# (1) build with only NCCL
|
||||
# (2) build with NCCL and MPI
|
||||
@ -7,15 +14,19 @@
|
||||
# (4) build with neither
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]] || [[ "$BUILD_ENVIRONMENT" == *-trusty-py2.7.9* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9*gcc7* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]] || [[ "$BUILD_ENVIRONMENT" == *-trusty-py2.7.9* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
sudo apt-get install -y --no-install-recommends openssh-client openssh-server
|
||||
sudo apt-get -qq update
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-trusty-py2.7.9* ]]; then
|
||||
sudo apt-get -qq install openmpi-bin libopenmpi-dev
|
||||
else
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
fi
|
||||
sudo apt-get -qq install --no-install-recommends openssh-client openssh-server
|
||||
sudo mkdir -p /var/run/sshd
|
||||
fi
|
||||
|
||||
@ -23,13 +34,6 @@ if [[ "$BUILD_ENVIRONMENT" == "pytorch-linux-xenial-py3-clang5-asan" ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" $*
|
||||
fi
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-build"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Python version:"
|
||||
python --version
|
||||
|
||||
@ -40,34 +44,56 @@ echo "CMake version:"
|
||||
cmake --version
|
||||
|
||||
# TODO: Don't run this...
|
||||
pip install -r requirements.txt || true
|
||||
pip install -q -r requirements.txt || true
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
# This is necessary in order to cross compile (or else we'll have missing GPU device).
|
||||
export HCC_AMDGPU_TARGET=gfx900
|
||||
# When hcc runs out of memory, it silently exits without stopping
|
||||
# the build process, leaving undefined symbols in the shared lib
|
||||
# which will cause undefined symbol errors when later running
|
||||
# tests. Setting MAX_JOBS to smaller number to make CI less flaky.
|
||||
export MAX_JOBS=4
|
||||
|
||||
# These environment variables are not set on CI when we were running as the Jenkins user.
|
||||
# The HIP Utility scripts require these environment variables to be set in order to run without error.
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
# ROCm CI is using Caffe2 docker images, which needs these wrapper
|
||||
# scripts to correctly use sccache.
|
||||
if [ -n "${SCCACHE_BUCKET}" ]; then
|
||||
mkdir -p ./sccache
|
||||
|
||||
# This environment variable enabled HCC Optimizations that speed up the linking stage.
|
||||
# https://github.com/RadeonOpenCompute/hcc#hcc-with-thinlto-linking
|
||||
export KMTHINLTO=1
|
||||
SCCACHE="$(which sccache)"
|
||||
if [ -z "${SCCACHE}" ]; then
|
||||
echo "Unable to find sccache..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
|
||||
sudo apt-get install libc++1
|
||||
sudo apt-get install libc++abi1
|
||||
# Setup wrapper scripts
|
||||
for compiler in cc c++ gcc g++ x86_64-linux-gnu-gcc; do
|
||||
(
|
||||
echo "#!/bin/sh"
|
||||
echo "exec $SCCACHE $(which $compiler) \"\$@\""
|
||||
) > "./sccache/$compiler"
|
||||
chmod +x "./sccache/$compiler"
|
||||
done
|
||||
|
||||
python tools/amd_build/build_pytorch_amd.py
|
||||
python tools/amd_build/build_caffe2_amd.py
|
||||
USE_ROCM=1 python setup.py install --user
|
||||
export CACHE_WRAPPER_DIR="$PWD/sccache"
|
||||
|
||||
# CMake must find these wrapper scripts
|
||||
export PATH="$CACHE_WRAPPER_DIR:$PATH"
|
||||
fi
|
||||
|
||||
python tools/amd_build/build_amd.py
|
||||
# OPENCV is needed to enable ImageInput operator in caffe2 resnet5_trainer
|
||||
# LMDB is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip
|
||||
USE_ROCM=1 USE_LMDB=1 USE_OPENCV=1 python setup.py install --user
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# TODO: Don't install this here
|
||||
if ! which conda; then
|
||||
pip install mkl mkl-devel
|
||||
pip install -q mkl mkl-devel
|
||||
if [[ "$BUILD_ENVIRONMENT" == *trusty-py3.6-gcc7.2* ]] || [[ "$BUILD_ENVIRONMENT" == *trusty-py3.6-gcc4.8* ]]; then
|
||||
export USE_MKLDNN=1
|
||||
else
|
||||
export USE_MKLDNN=0
|
||||
fi
|
||||
fi
|
||||
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
@ -102,26 +128,24 @@ fi
|
||||
# Add the test binaries so that they won't be git clean'ed away
|
||||
git add -f build/bin
|
||||
|
||||
# Test C FFI plugins
|
||||
# cffi install doesn't work for Python 3.7
|
||||
if [[ "$BUILD_ENVIRONMENT" != *pynightly* ]]; then
|
||||
# TODO: Don't run this here
|
||||
pip install cffi
|
||||
git clone https://github.com/pytorch/extension-ffi.git
|
||||
pushd extension-ffi/script
|
||||
python build.py
|
||||
popd
|
||||
fi
|
||||
|
||||
# Test documentation build
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda8-cudnn6-py3* ]]; then
|
||||
pushd docs
|
||||
# TODO: Don't run this here
|
||||
pip install -r requirements.txt || true
|
||||
pip install -q -r requirements.txt || true
|
||||
LC_ALL=C make html
|
||||
popd
|
||||
fi
|
||||
|
||||
# Test standalone c10 build
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda8-cudnn6-py3* ]]; then
|
||||
mkdir -p c10/build
|
||||
pushd c10/build
|
||||
cmake ..
|
||||
make -j
|
||||
popd
|
||||
fi
|
||||
|
||||
# Test no-Python build
|
||||
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
|
||||
echo "Building libtorch"
|
||||
|
@ -14,6 +14,8 @@ pytorch-linux-xenial-cuda9-cudnn7-py3-build
|
||||
pytorch-linux-xenial-cuda9-cudnn7-py3-test
|
||||
pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-build
|
||||
pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-test
|
||||
pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7-build
|
||||
pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7-test
|
||||
pytorch-linux-xenial-py3-clang5-asan-build
|
||||
pytorch-linux-xenial-py3-clang5-asan-test
|
||||
pytorch-linux-trusty-py2.7.9-build
|
||||
@ -40,8 +42,9 @@ pytorch-macos-10.13-cuda9.2-cudnn7-py3-build
|
||||
pytorch-docker-build-test
|
||||
short-perf-test-cpu
|
||||
short-perf-test-gpu
|
||||
py2-clang3.8-rocm1.7.1-ubuntu16.04-build
|
||||
py2-clang3.8-rocm1.7.1-ubuntu16.04-test
|
||||
py2-clang7-rocmdeb-ubuntu16.04-build
|
||||
py2-clang7-rocmdeb-ubuntu16.04-test
|
||||
py2-devtoolset7-rocmrpm-centos7.5-build
|
||||
pytorch-ppc64le-cuda9.2-cudnn7-py3-build
|
||||
pytorch-ppc64le-cuda9.2-cudnn7-py3-test
|
||||
pytorch-ppc64le-cuda9.1-cudnn7-py3-build
|
||||
|
@ -15,7 +15,8 @@ if [ ! -d "${PYTORCH_ENV_DIR}/miniconda3" ]; then
|
||||
fi
|
||||
export PATH="${PYTORCH_ENV_DIR}/miniconda3/bin:$PATH"
|
||||
source ${PYTORCH_ENV_DIR}/miniconda3/bin/activate
|
||||
conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja
|
||||
conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja six
|
||||
pip install hypothesis
|
||||
if [ -z "${IN_CIRCLECI}" ]; then
|
||||
rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch*
|
||||
fi
|
||||
|
@ -1,5 +1,6 @@
|
||||
import sys
|
||||
import json
|
||||
import math
|
||||
import numpy
|
||||
import argparse
|
||||
|
||||
@ -35,14 +36,25 @@ else:
|
||||
print("population mean: ", mean)
|
||||
print("population sigma: ", sigma)
|
||||
|
||||
# Let the test pass if baseline number is NaN (which happened in
|
||||
# the past when we didn't have logic for catching NaN numbers)
|
||||
if math.isnan(mean) or math.isnan(sigma):
|
||||
mean = sys.maxsize
|
||||
sigma = 0.001
|
||||
|
||||
sample_stats_data = json.loads(args.sample_stats)
|
||||
|
||||
sample_mean = sample_stats_data['mean']
|
||||
sample_sigma = sample_stats_data['sigma']
|
||||
sample_mean = float(sample_stats_data['mean'])
|
||||
sample_sigma = float(sample_stats_data['sigma'])
|
||||
|
||||
print("sample mean: ", sample_mean)
|
||||
print("sample sigma: ", sample_sigma)
|
||||
|
||||
if math.isnan(sample_mean):
|
||||
raise Exception('''Error: sample mean is NaN''')
|
||||
elif math.isnan(sample_sigma):
|
||||
raise Exception('''Error: sample sigma is NaN''')
|
||||
|
||||
z_value = (sample_mean - mean) / sigma
|
||||
|
||||
print("z-value: ", z_value)
|
||||
|
@ -20,6 +20,9 @@ test_gpu_speed_mnist () {
|
||||
SAMPLE_ARRAY=()
|
||||
NUM_RUNS=$1
|
||||
|
||||
# Needs warm up to get accurate number
|
||||
python main.py --epochs 1 --no-log
|
||||
|
||||
for (( i=1; i<=$NUM_RUNS; i++ )) do
|
||||
runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log)
|
||||
echo $runtime
|
||||
|
@ -1,36 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-test"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-test"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch"
|
||||
|
||||
if [ -n "${IN_CIRCLECI}" ]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
sudo apt-get install -y --no-install-recommends openssh-client openssh-server
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
sudo apt-get -qq install --no-install-recommends openssh-client openssh-server
|
||||
sudo mkdir -p /var/run/sshd
|
||||
fi
|
||||
fi
|
||||
|
||||
# JIT C++ extensions require ninja.
|
||||
git clone https://github.com/ninja-build/ninja --quiet
|
||||
pushd ninja
|
||||
python ./configure.py --bootstrap
|
||||
export PATH="$PWD:$PATH"
|
||||
popd
|
||||
# --user breaks ppc64le builds and these packages are already in ppc64le docker
|
||||
if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
|
||||
# JIT C++ extensions require ninja.
|
||||
pip install -q ninja --user
|
||||
# ninja is installed in /var/lib/jenkins/.local/bin
|
||||
export PATH="/var/lib/jenkins/.local/bin:$PATH"
|
||||
|
||||
# TODO: move this to Docker
|
||||
pip install -q hypothesis --user
|
||||
fi
|
||||
|
||||
# DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems
|
||||
# if you're not careful. Check this if you made some changes and the
|
||||
@ -72,6 +76,8 @@ fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
export PYTORCH_TEST_WITH_ROCM=1
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
fi
|
||||
|
||||
if [[ "${JOB_BASE_NAME}" == *-NO_AVX-* ]]; then
|
||||
@ -102,7 +108,9 @@ test_aten() {
|
||||
SUDO=sudo
|
||||
fi
|
||||
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libc10* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libmkldnn* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
|
||||
|
||||
ls build/bin
|
||||
@ -124,7 +132,7 @@ test_torchvision() {
|
||||
# this should be a transient requirement...)
|
||||
# See https://github.com/pytorch/pytorch/issues/7525
|
||||
#time python setup.py install
|
||||
pip install --user .
|
||||
pip install -q --user .
|
||||
popd
|
||||
}
|
||||
|
||||
@ -137,7 +145,7 @@ test_libtorch() {
|
||||
else
|
||||
"$CPP_BUILD"/caffe2/bin/test_jit "[cpu]"
|
||||
fi
|
||||
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
python tools/download_mnist.py --quiet -d mnist
|
||||
OMP_NUM_THREADS=2 "$CPP_BUILD"/caffe2/bin/test_api
|
||||
fi
|
||||
}
|
||||
@ -158,19 +166,20 @@ test_custom_script_ops() {
|
||||
}
|
||||
|
||||
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
|
||||
test_torchvision
|
||||
test_python_nn
|
||||
test_python_all_except_nn
|
||||
test_aten
|
||||
test_torchvision
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
else
|
||||
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
|
||||
test_torchvision
|
||||
test_python_nn
|
||||
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
|
||||
test_torchvision
|
||||
test_python_all_except_nn
|
||||
test_aten
|
||||
test_torchvision
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
fi
|
||||
|
@ -55,11 +55,11 @@ set LIB=%cd%\\mkl\\lib;%LIB
|
||||
:: Install MAGMA
|
||||
if "%REBUILD%"=="" (
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
curl -k https://s3.amazonaws.com/ossci-windows/magma_cuda90_release_mkl_2018.2.185.7z --output magma_cuda90_release_mkl_2018.2.185.7z
|
||||
curl -k https://s3.amazonaws.com/ossci-windows/magma_2.4.0_cuda90_release.7z --output magma_2.4.0_cuda90_release.7z
|
||||
) else (
|
||||
aws s3 cp s3://ossci-windows/magma_cuda90_release_mkl_2018.2.185.7z magma_cuda90_release_mkl_2018.2.185.7z --quiet
|
||||
aws s3 cp s3://ossci-windows/magma_2.4.0_cuda90_release.7z magma_2.4.0_cuda90_release.7z --quiet
|
||||
)
|
||||
7z x -aoa magma_cuda90_release_mkl_2018.2.185.7z -omagma
|
||||
7z x -aoa magma_2.4.0_cuda90_release.7z -omagma
|
||||
)
|
||||
set MAGMA_HOME=%cd%\\magma
|
||||
|
||||
@ -80,18 +80,29 @@ if "%REBUILD%"=="" (
|
||||
)
|
||||
|
||||
:: Install Miniconda3
|
||||
if "%REBUILD%"=="" (
|
||||
IF EXIST C:\\Jenkins\\Miniconda3 ( rd /s /q C:\\Jenkins\\Miniconda3 )
|
||||
curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
|
||||
.\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=C:\\Jenkins\\Miniconda3
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
set CONDA_PARENT_DIR=%CD%
|
||||
) else (
|
||||
set CONDA_PARENT_DIR=C:\\Jenkins
|
||||
)
|
||||
if "%REBUILD%"=="" (
|
||||
IF EXIST %CONDA_PARENT_DIR%\\Miniconda3 ( rd /s /q %CONDA_PARENT_DIR%\\Miniconda3 )
|
||||
curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
|
||||
.\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\\Miniconda3
|
||||
)
|
||||
call %CONDA_PARENT_DIR%\\Miniconda3\\Scripts\\activate.bat %CONDA_PARENT_DIR%\\Miniconda3
|
||||
if "%REBUILD%"=="" (
|
||||
:: We have to pin Python version to 3.6.7, until mkl supports Python 3.7
|
||||
call conda install -y -q python=3.6.7 numpy cffi pyyaml boto3
|
||||
)
|
||||
call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3
|
||||
if "%REBUILD%"=="" ( call conda install -y -q numpy cffi pyyaml boto3 )
|
||||
|
||||
:: Install ninja
|
||||
if "%REBUILD%"=="" ( pip install ninja )
|
||||
|
||||
set WORKING_DIR=%CD%
|
||||
call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x64
|
||||
call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x86_amd64
|
||||
cd %WORKING_DIR%
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
@ -129,7 +140,7 @@ if not "%USE_CUDA%"=="0" (
|
||||
if "%REBUILD%"=="" (
|
||||
sccache --show-stats
|
||||
sccache --zero-stats
|
||||
rd /s /q C:\\Jenkins\\Miniconda3\\Lib\\site-packages\\torch
|
||||
rd /s /q %CONDA_PARENT_DIR%\\Miniconda3\\Lib\\site-packages\\torch
|
||||
copy %CD%\\tmp_bin\\sccache.exe tmp_bin\\nvcc.exe
|
||||
)
|
||||
|
||||
@ -139,9 +150,10 @@ if not "%USE_CUDA%"=="0" (
|
||||
|
||||
python setup.py install && sccache --show-stats && (
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
echo "NOTE: To run \`import torch\`, please make sure to activate the conda environment by running \`call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3\` in Command Prompt before running Git Bash."
|
||||
echo NOTE: To run \`import torch\`, please make sure to activate the conda environment by running \`call %CONDA_PARENT_DIR%\\Miniconda3\\Scripts\\activate.bat %CONDA_PARENT_DIR%\\Miniconda3\` in Command Prompt before running Git Bash.
|
||||
) else (
|
||||
7z a %IMAGE_COMMIT_TAG%.7z C:\\Jenkins\\Miniconda3\\Lib\\site-packages\\torch && python ci_scripts\\upload_image.py %IMAGE_COMMIT_TAG%.7z
|
||||
mv %CD%\\build\\bin\\test_api.exe %CONDA_PARENT_DIR%\\Miniconda3\\Lib\\site-packages\\torch\\lib
|
||||
7z a %IMAGE_COMMIT_TAG%.7z %CONDA_PARENT_DIR%\\Miniconda3\\Lib\\site-packages\\torch && python ci_scripts\\upload_image.py %IMAGE_COMMIT_TAG%.7z
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -39,15 +39,26 @@ cat >ci_scripts/setup_pytorch_env.bat <<EOL
|
||||
set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\ProgramData\\chocolatey\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
|
||||
|
||||
:: Install Miniconda3
|
||||
IF EXIST C:\\Jenkins\\Miniconda3 ( rd /s /q C:\\Jenkins\\Miniconda3 )
|
||||
curl https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
|
||||
.\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=C:\\Jenkins\\Miniconda3
|
||||
call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3
|
||||
call conda install -y -q numpy mkl cffi pyyaml boto3
|
||||
|
||||
pip install ninja
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
set CONDA_PARENT_DIR=%CD%
|
||||
) else (
|
||||
set CONDA_PARENT_DIR=C:\\Jenkins
|
||||
)
|
||||
if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
IF EXIST %CONDA_PARENT_DIR%\\Miniconda3 ( rd /s /q %CONDA_PARENT_DIR%\\Miniconda3 )
|
||||
curl https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
|
||||
.\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\\Miniconda3
|
||||
)
|
||||
call %CONDA_PARENT_DIR%\\Miniconda3\\Scripts\\activate.bat %CONDA_PARENT_DIR%\\Miniconda3
|
||||
if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
:: We have to pin Python version to 3.6.7, until mkl supports Python 3.7
|
||||
call conda install -y -q python=3.6.7 numpy mkl cffi pyyaml boto3
|
||||
)
|
||||
pip install ninja future hypothesis
|
||||
|
||||
set WORKING_DIR=%CD%
|
||||
call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x86_amd64
|
||||
cd %WORKING_DIR%
|
||||
|
||||
set PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\bin;C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\libnvvp;%PATH%
|
||||
set CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
|
||||
@ -58,13 +69,14 @@ set CUDA_TOOLKIT_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\
|
||||
set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
|
||||
set PYTHONPATH=%CD%\\test;%PYTHONPATH%
|
||||
|
||||
cd test/
|
||||
|
||||
python ..\\ci_scripts\\download_image.py %IMAGE_COMMIT_TAG%.7z
|
||||
|
||||
7z x %IMAGE_COMMIT_TAG%.7z
|
||||
|
||||
cd ..
|
||||
if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
cd test/
|
||||
python ..\\ci_scripts\\download_image.py %IMAGE_COMMIT_TAG%.7z
|
||||
7z x %IMAGE_COMMIT_TAG%.7z
|
||||
cd ..
|
||||
) else (
|
||||
xcopy /s %CONDA_PARENT_DIR%\\Miniconda3\\Lib\\site-packages\\torch .\\test\\torch\\
|
||||
)
|
||||
|
||||
EOL
|
||||
|
||||
@ -78,14 +90,47 @@ call ci_scripts/setup_pytorch_env.bat
|
||||
cd test/ && python run_test.py --exclude nn --verbose && cd ..
|
||||
EOL
|
||||
|
||||
cat >ci_scripts/test_custom_script_ops.bat <<EOL
|
||||
call ci_scripts/setup_pytorch_env.bat
|
||||
|
||||
cd test/custom_operator
|
||||
|
||||
:: Build the custom operator library.
|
||||
mkdir build
|
||||
cd build
|
||||
:: Note: Caffe2 does not support MSVC + CUDA + Debug mode (has to be Release mode)
|
||||
cmake -DCMAKE_PREFIX_PATH=%CD%\\..\\..\\torch -DCMAKE_BUILD_TYPE=Release -GNinja ..
|
||||
ninja -v
|
||||
cd ..
|
||||
|
||||
:: Run tests Python-side and export a script module.
|
||||
python test_custom_ops.py -v
|
||||
python model.py --export-script-module="build/model.pt"
|
||||
:: Run tests C++-side and load the exported script module.
|
||||
cd build
|
||||
set PATH=C:\\Program Files\\NVIDIA Corporation\\NvToolsExt/bin/x64;%CD%\\..\\..\\torch\\lib;%PATH%
|
||||
test_custom_ops.exe model.pt
|
||||
EOL
|
||||
|
||||
cat >ci_scripts/test_libtorch.bat <<EOL
|
||||
call ci_scripts/setup_pytorch_env.bat
|
||||
dir
|
||||
dir %CD%\\test
|
||||
dir %CD%\\test\\torch
|
||||
dir %CD%\\test\\torch\\lib
|
||||
cd %CD%\\test\\torch\\lib
|
||||
set PATH=C:\\Program Files\\NVIDIA Corporation\\NvToolsExt/bin/x64;%CD%\\..\\..\\torch\\lib;%PATH%
|
||||
test_api.exe --gtest_filter="-IntegrationTest.MNIST*"
|
||||
EOL
|
||||
|
||||
run_tests() {
|
||||
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
|
||||
ci_scripts/test_python_nn.bat && ci_scripts/test_python_all_except_nn.bat
|
||||
ci_scripts/test_python_nn.bat && ci_scripts/test_python_all_except_nn.bat && ci_scripts/test_custom_script_ops.bat && ci_scripts/test_libtorch.bat
|
||||
else
|
||||
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
|
||||
ci_scripts/test_python_nn.bat
|
||||
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
|
||||
ci_scripts/test_python_all_except_nn.bat
|
||||
ci_scripts/test_python_all_except_nn.bat && ci_scripts/test_custom_script_ops.bat && ci_scripts/test_libtorch.bat
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
16
.travis.yml
16
.travis.yml
@ -27,5 +27,17 @@ matrix:
|
||||
install: pip install mypy mypy-extensions
|
||||
script: mypy @mypy-files.txt
|
||||
- env: CPP_DOC_CHECK
|
||||
install: sudo apt-get install -y doxygen
|
||||
script: cd docs/cpp && ./check-doxygen.sh
|
||||
python: "3.6"
|
||||
install:
|
||||
- sudo apt-get install -y doxygen
|
||||
- pip install -r requirements.txt
|
||||
script: cd docs/cpp/source && ./check-doxygen.sh
|
||||
- env: CLANG_TIDY
|
||||
python: "3.6"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-trusty
|
||||
packages: clang-tidy
|
||||
script: tools/run-clang-tidy-in-ci.sh
|
||||
|
@ -5,11 +5,14 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
|
||||
# ---[ Project and semantic versioning.
|
||||
project(Caffe2 CXX C)
|
||||
|
||||
set(CAFFE2_VERSION_MAJOR 0)
|
||||
set(CAFFE2_VERSION_MINOR 8)
|
||||
set(CAFFE2_VERSION_PATCH 2)
|
||||
set(CAFFE2_VERSION
|
||||
"${CAFFE2_VERSION_MAJOR}.${CAFFE2_VERSION_MINOR}.${CAFFE2_VERSION_PATCH}")
|
||||
set(CMAKE_INSTALL_MESSAGE NEVER)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
endif()
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# One variable that determines whether the current cmake process is being run
|
||||
# with the main Caffe2 library. This is useful for building modules - if
|
||||
@ -56,11 +59,13 @@ include(CMakeDependentOption)
|
||||
option(BUILD_TORCH "Build Torch" OFF)
|
||||
option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
|
||||
option(BUILD_ATEN_MOBILE "Build ATen for Android and iOS" OFF)
|
||||
option(BUILD_ATEN_ONLY "Build only a subset focused on ATen only" OFF)
|
||||
option(BUILD_BINARY "Build C++ binaries" OFF)
|
||||
option(BUILD_DOCS "Build Caffe2 documentation" OFF)
|
||||
option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
|
||||
option(BUILD_PYTHON "Build Python binaries" ON)
|
||||
option(BUILD_CAFFE2_OPS "Build Caffe2 operators" ON)
|
||||
option(BUILD_C10_EXPERIMENTAL_OPS "Build c10 experimental operators" ON)
|
||||
option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
|
||||
cmake_dependent_option(
|
||||
CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
|
||||
@ -75,11 +80,12 @@ cmake_dependent_option(
|
||||
option(USE_ACL "Use ARM Compute Library" OFF)
|
||||
option(USE_ASAN "Use Address Sanitizer" OFF)
|
||||
option(USE_CUDA "Use CUDA" ON)
|
||||
option(USE_ROCM "Use ROCm" OFF)
|
||||
option(USE_ROCM "Use ROCm" ON)
|
||||
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_CUDNN "Use cuDNN" ON
|
||||
"USE_CUDA" OFF)
|
||||
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" OFF)
|
||||
option(USE_FFMPEG "Use ffmpeg" OFF)
|
||||
option(USE_GFLAGS "Use GFLAGS" ON)
|
||||
option(USE_GLOG "Use GLOG" ON)
|
||||
@ -91,18 +97,19 @@ option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON)
|
||||
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
||||
option(USE_NCCL "Use NCCL" ON)
|
||||
option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF)
|
||||
option(USE_NERVANA_GPU "Use Nervana GPU backend" OFF)
|
||||
option(USE_NNAPI "Use NNAPI" OFF)
|
||||
option(USE_NNPACK "Use NNPACK" ON)
|
||||
option(USE_NUMA "Use NUMA (only available on Linux)" ON)
|
||||
cmake_dependent_option(
|
||||
USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." OFF
|
||||
"USE_CUDA" OFF)
|
||||
option(USE_NUMPY "Use NumPy" ON)
|
||||
option(USE_OBSERVERS "Use observers module." OFF)
|
||||
option(USE_OPENCL "Use OpenCL" OFF)
|
||||
option(USE_OPENCV "Use OpenCV" ON)
|
||||
option(USE_OPENMP "Use OpenMP for parallel code" OFF)
|
||||
option(USE_PROF "Use profiling" OFF)
|
||||
option(USE_QNNPACK "Use QNNPACK (quantized 8-bit operators)" ON)
|
||||
option(USE_REDIS "Use Redis" OFF)
|
||||
option(USE_ROCKSDB "Use RocksDB" OFF)
|
||||
option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
|
||||
@ -112,8 +119,6 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
|
||||
option(USE_ZMQ "Use ZMQ" OFF)
|
||||
option(USE_ZSTD "Use ZSTD" OFF)
|
||||
option(USE_MKLDNN "Use MKLDNN" OFF)
|
||||
option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
|
||||
option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
|
||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
||||
cmake_dependent_option(
|
||||
USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
|
||||
@ -124,7 +129,6 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO" OFF)
|
||||
option(TORCH_USE_CEREAL "Build the C++ API with Cereal for serialization support" OFF)
|
||||
|
||||
# Used when building Caffe2 through setup.py
|
||||
option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)
|
||||
@ -135,6 +139,38 @@ if (ANDROID OR IOS)
|
||||
set(BUILD_ATEN_MOBILE ON)
|
||||
endif()
|
||||
|
||||
if (BUILD_ATEN_ONLY)
|
||||
set(BUILD_CAFFE2_OPS OFF)
|
||||
set(BUILD_PYTHON OFF)
|
||||
set(USE_NUMA OFF)
|
||||
set(USE_LEVELDB OFF)
|
||||
set(USE_GFLAGS OFF)
|
||||
set(USE_GLOG OFF)
|
||||
set(USE_NCCL OFF)
|
||||
set(USE_NNPACK OFF)
|
||||
set(USE_NUMPY OFF)
|
||||
set(USE_OPENCV OFF)
|
||||
set(USE_MKLDNN OFF)
|
||||
set(USE_DISTRIBUTED OFF)
|
||||
set(USE_LMDB OFF)
|
||||
endif()
|
||||
|
||||
# ---[ Utils
|
||||
# TODO: merge the following 3 files into cmake/public/utils.cmake.
|
||||
include(cmake/Utils.cmake)
|
||||
include(cmake/public/utils.cmake)
|
||||
|
||||
# ---[ Version numbers for generated libraries
|
||||
set(TORCH_DEFAULT_VERSION "1.0.0")
|
||||
set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" CACHE STRING "Torch build version")
|
||||
if (NOT TORCH_BUILD_VERSION)
|
||||
# An empty string was specified so force version to the default
|
||||
set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}"
|
||||
CACHE STRING "Torch build version" FORCE)
|
||||
endif()
|
||||
caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
|
||||
caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION})
|
||||
|
||||
# ---[ CMake scripts + modules
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
|
||||
|
||||
@ -161,11 +197,6 @@ include(cmake/MiscCheck.cmake)
|
||||
# External projects
|
||||
include(ExternalProject)
|
||||
|
||||
# ---[ Utils
|
||||
# TODO: merge the following 3 files into cmake/public/utils.cmake.
|
||||
include(cmake/Utils.cmake)
|
||||
include(cmake/public/utils.cmake)
|
||||
|
||||
# ---[ Dependencies
|
||||
include(cmake/Dependencies.cmake)
|
||||
|
||||
@ -265,6 +296,10 @@ if (APPLE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constexpr-not-const")
|
||||
endif()
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0")
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0.0)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
|
||||
endif()
|
||||
@ -295,6 +330,7 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR})
|
||||
include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
|
||||
|
||||
# ---[ Main build
|
||||
add_subdirectory(c10)
|
||||
add_subdirectory(caffe2)
|
||||
|
||||
# --[ Documentation
|
||||
@ -387,6 +423,7 @@ if (BUILD_SHARED_LIBS)
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/threads.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
|
||||
|
20
CODEOWNERS
20
CODEOWNERS
@ -1,23 +1,9 @@
|
||||
# This is a comment.
|
||||
# Each line is a file pattern followed by one or more owners.
|
||||
|
||||
/aten/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/aten/src/ATen/core/
|
||||
/torch/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/docs/source @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ssnl @zou3519
|
||||
/docs/cpp @goldsborough @ebetica @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/test @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/tools @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/README.md @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/setup.py @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/requirements.txt @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
|
||||
/torch/csrc/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough
|
||||
/test/cpp/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough
|
||||
/torch/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
|
||||
/torch/csrc/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
|
||||
/torch/csrc/jit/passes/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
|
||||
/test/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
|
||||
/scripts/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
|
||||
/docs/cpp @goldsborough @ebetica
|
||||
/torch/csrc/api/ @ebetica @goldsborough
|
||||
/test/cpp/api/ @ebetica @goldsborough
|
||||
/torch/lib/c10d/ @apaszke @pietern @teng-li
|
||||
/torch/csrc/distributed/ @apaszke @pietern @teng-li
|
||||
/torch/distributed/ @apaszke @pietern @teng-li
|
||||
|
119
CONTRIBUTING.md
119
CONTRIBUTING.md
@ -75,6 +75,84 @@ You do not need to repeatedly install after modifying python files.
|
||||
In case you want to reinstall, make sure that you uninstall pytorch first by running `pip uninstall torch`
|
||||
and `python setup.py clean`. Then you can install in `build develop` mode again.
|
||||
|
||||
## Codebase structure
|
||||
|
||||
* [c10](c10) - Core library files that work everywhere, both server
|
||||
and mobile. We are slowly moving pieces from ATen/core here.
|
||||
This library is intended only to contain essential functionality,
|
||||
and appropriate to use in settings where binary size matters. (But
|
||||
you'll have a lot of missing functionality if you try to use it
|
||||
directly.)
|
||||
* [aten](aten) - C++ tensor library for PyTorch (no autograd support)
|
||||
* src
|
||||
* [TH](aten/src/TH)
|
||||
[THC](aten/src/THC)
|
||||
[THNN](aten/src/THNN)
|
||||
[THCUNN](aten/src/THCUNN) - Legacy library code from the original
|
||||
Torch. Try not to add things here; we're slowly porting these to
|
||||
native.
|
||||
* generic - Contains actual implementations of operators,
|
||||
parametrized over `scalar_t`. Files here get compiled N times
|
||||
per supported scalar type in PyTorch.
|
||||
* ATen
|
||||
* [core](aten/src/ATen/core) - Core functionality of ATen. This
|
||||
is migrating to top-level c10 folder.
|
||||
* [native](aten/src/ATen/native) - Modern implementations of
|
||||
operators. If you want to write a new operator, here is where
|
||||
it should go. Most CPU operators go in the top level directory,
|
||||
except for operators which need to be compiled specially; see
|
||||
cpu below.
|
||||
* [cpu](aten/src/ATen/native/cpu) - Not actually CPU
|
||||
implementations of operators, but specifically implementations
|
||||
which are compiled with processor-specific instructions, like
|
||||
AVX. See the README for more details.
|
||||
* [cuda](aten/src/ATen/native/cuda) - CUDA implementations of
|
||||
operators.
|
||||
* [sparse](aten/src/ATen/native/sparse) - CPU and CUDA
|
||||
implementations of COO sparse tensor operations
|
||||
* [mkl](aten/src/ATen/native/mkl) [mkldnn](aten/src/ATen/native/mkldnn)
|
||||
[miopen](aten/src/ATen/native/miopen) [cudnn](aten/src/ATen/native/cudnn)
|
||||
- implementations of operators which simply bind to some
|
||||
backend library.
|
||||
* [torch](torch) - The actual PyTorch library. Everything that is not
|
||||
in csrc is Python modules, following the PyTorch Python frontend
|
||||
module structure.
|
||||
* [csrc](torch/csrc) - C++ files composing the PyTorch library. Files
|
||||
in this directory tree are a mix of Python binding code, and C++
|
||||
heavy lifting. Consult `setup.py` for the canonical list of Python
|
||||
binding files; conventionally, they are often prefixed with
|
||||
`python_`.
|
||||
* [jit](torch/csrc/jit) - Compiler and frontend for TorchScript JIT
|
||||
frontend.
|
||||
* [autograd](torch/csrc/autograd) - Implementation of reverse-mode automatic
|
||||
differentation
|
||||
* [api](torch/csrc/api) - The PyTorch C++ frontend.
|
||||
* [distributed](torch/csrc/distributed) - Distributed training
|
||||
support for PyTorch.
|
||||
* [tools](tools) - Code generation scripts for the PyTorch library.
|
||||
See README of this directory for more details.
|
||||
* [test](tests) - Python unit tests for PyTorch Python frontend
|
||||
* [test_torch.py](test/test_torch.py) - Basic tests for PyTorch
|
||||
functionality
|
||||
* [test_autograd.py](test/test_autograd.py) - Tests for non-NN
|
||||
automatic differentiation support
|
||||
* [test_nn.py](test/test_nn.py) - Tests for NN operators and
|
||||
their automatic differentiation
|
||||
* [test_jit.py](test/test_jit.py) - Tests for the JIT compiler
|
||||
and TorchScript
|
||||
* ...
|
||||
* [cpp](test/cpp) - C++ unit tests for PyTorch C++ frontend
|
||||
* [expect](test/expect) - Automatically generated "expect" files
|
||||
which are used to compare against expected output.
|
||||
* [onnx](test/onnx) - Tests for ONNX export functionality,
|
||||
using both PyTorch and Caffe2.
|
||||
* [caffe2](caffe2) - The Caffe2 library.
|
||||
* [core](caffe2/core) - Core files of Caffe2, e.g., tensor, workspace,
|
||||
blobs, etc.
|
||||
* [operators](caffe2/operators) - Operators of Caffe2
|
||||
* [python](caffe2/python) - Python bindings to Caffe2
|
||||
* ...
|
||||
|
||||
## Unit testing
|
||||
|
||||
PyTorch's testing is located under `test/`. Run the entire test suite with
|
||||
@ -262,9 +340,9 @@ than Linux, which are worth keeping in mind when fixing these problems.
|
||||
1. Symbols are NOT exported by default on Windows; instead, you have to explicitly
|
||||
mark a symbol as exported/imported in a header file with `__declspec(dllexport)` /
|
||||
`__declspec(dllimport)`. We have codified this pattern into a set of macros
|
||||
which follow the convention `*_API`, e.g., `AT_API` inside ATen. (Every separate
|
||||
shared library needs a unique macro name, because symbol visibility is on a per
|
||||
shared library basis.)
|
||||
which follow the convention `*_API`, e.g., `CAFFE2_API` inside Caffe2 and ATen.
|
||||
(Every separate shared library needs a unique macro name, because symbol visibility
|
||||
is on a per shared library basis. See c10/macros/Macros.h for more details.)
|
||||
|
||||
The upshot is if you see an "unresolved external" error in your Windows build, this
|
||||
is probably because you forgot to mark a function with `*_API`. However, there is
|
||||
@ -325,7 +403,7 @@ Here are a few well known pitfalls and workarounds:
|
||||
catch all of these problems: stay vigilant to the possibility that
|
||||
your crash is due to a real memory problem.
|
||||
|
||||
* (NVCC) `at::optional` does not work when used from device code. Don't use
|
||||
* (NVCC) `c10::optional` does not work when used from device code. Don't use
|
||||
it from kernels. Upstream issue: https://github.com/akrzemi1/Optional/issues/58
|
||||
and our local issue #10329.
|
||||
|
||||
@ -334,7 +412,7 @@ Here are a few well known pitfalls and workarounds:
|
||||
* The idiom `static_assert(f() == f())` to test if `f` is constexpr
|
||||
does not work; you'll get "error C2131: expression did not evaluate
|
||||
to a constant". Don't use these asserts on Windows.
|
||||
(Example: `aten/src/ATen/core/intrusive_ptr.h`)
|
||||
(Example: `c10/util/intrusive_ptr.h`)
|
||||
|
||||
* (NVCC) Code you access inside a `static_assert` will eagerly be
|
||||
evaluated as if it were device code, and so you might get an error
|
||||
@ -354,6 +432,37 @@ static_assert(std::is_same(A*, decltype(A::singelton()))::value, "hmm");
|
||||
are too large. Splitting such files into separate files helps.
|
||||
(Example: `THTensorMath`, `THTensorMoreMath`, `THTensorEvenMoreMath`.)
|
||||
|
||||
### Running Clang-Tidy
|
||||
|
||||
[Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/index.html) is a C++
|
||||
linter and static analysis tool based on the clang compiler. We run clang-tidy
|
||||
in our CI to make sure that new C++ code is safe, sane and efficient. See our
|
||||
[.travis.yml](https://github.com/pytorch/pytorch/blob/master/.travis.yml) file
|
||||
for the simple commands we use for this.
|
||||
|
||||
To run clang-tidy locally, follow these steps:
|
||||
|
||||
1. Install clang-tidy. First, check if you already have clang-tidy by simply
|
||||
writing `clang-tidy` in your terminal. If you don't yet have clang-tidy, you
|
||||
should be able to install it easily with your package manager, e.g. by writing
|
||||
`apt-get install clang-tidy` on Ubuntu. See https://apt.llvm.org for details on
|
||||
how to install the latest version. Note that newer versions of clang-tidy will
|
||||
have more checks than older versions. In our CI, we run clang-tidy-6.0.
|
||||
|
||||
2. Use our driver script to run clang-tidy over any changes relative to some
|
||||
git revision (you may want to replace `HEAD~1` with `HEAD` to pick up
|
||||
uncommitted changes). Changes are picked up based on a `git diff` with the
|
||||
given revision:
|
||||
```sh
|
||||
$ python tools/clang_tidy.py -d build -p torch/csrc --diff 'HEAD~1'
|
||||
```
|
||||
|
||||
Above, it is assumed you are in the PyTorch root folder. `path/to/build` should
|
||||
be the path to where you built PyTorch from source, e.g. `build` in the PyTorch
|
||||
root folder if you used `setup.py build`. You can use `-c <clang-tidy-binary>`
|
||||
to change the clang-tidy this script uses. Make sure you have PyYaml installed,
|
||||
which is in PyTorch's `requirements.txt`.
|
||||
|
||||
## Caffe2 notes
|
||||
|
||||
In 2018, we merged Caffe2 into the PyTorch source repository. While the
|
||||
|
44
README.md
44
README.md
@ -22,11 +22,13 @@ We are in an early-release beta. Expect some adventures and rough edges.
|
||||
- [Releases and Contributing](#releases-and-contributing)
|
||||
- [The Team](#the-team)
|
||||
|
||||
| System | 2.7 | 3.5 |
|
||||
| --- | --- | --- |
|
||||
| Linux CPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) |
|
||||
| Linux GPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) |
|
||||
| Windows GPU | <center>—</center> | [](https://ci.pytorch.org/jenkins/job/pytorch-builds/job/pytorch-win-ws2016-cuda9-cudnn7-py3-trigger/)
|
||||
| System | 2.7 | 3.5 | 3.6 |
|
||||
| :---: | :---: | :---: | :--: |
|
||||
| Linux CPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | <center>—</center> |
|
||||
| Linux GPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | <center>—</center> |
|
||||
| Windows GPU | <center>—</center> | [](https://ci.pytorch.org/jenkins/job/pytorch-builds/job/pytorch-win-ws2016-cuda9-cudnn7-py3-trigger/) | <center>—</center> |
|
||||
| Linux (ppc64le) CPU | [](https://powerci.osuosl.org/job/pytorch-master-nightly-py2-linux-ppc64le/) | — | [](https://powerci.osuosl.org/job/pytorch-master-nightly-py3-linux-ppc64le/) |
|
||||
| Linux (ppc64le) GPU | [](https://powerci.osuosl.org/job/pytorch-linux-cuda9-cudnn7-py2-mpi-build-test-gpu/) | — | [](https://powerci.osuosl.org/job/pytorch-linux-cuda92-cudnn7-py3-mpi-build-test-gpu/) |
|
||||
|
||||
See also the [ci.pytorch.org HUD](https://ezyang.github.io/pytorch-ci-hud/build/pytorch-master).
|
||||
|
||||
@ -77,7 +79,7 @@ change the way your network behaves arbitrarily with zero lag or overhead. Our i
|
||||
from several research papers on this topic, as well as current and past work such as
|
||||
[torch-autograd](https://github.com/twitter/torch-autograd),
|
||||
[autograd](https://github.com/HIPS/autograd),
|
||||
[Chainer](http://chainer.org), etc.
|
||||
[Chainer](https://chainer.org), etc.
|
||||
|
||||
While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date.
|
||||
You get the best of speed and flexibility for your crazy research.
|
||||
@ -88,7 +90,7 @@ You get the best of speed and flexibility for your crazy research.
|
||||
|
||||
PyTorch is not a Python binding into a monolithic C++ framework.
|
||||
It is built to be deeply integrated into Python.
|
||||
You can use it naturally like you would use NumPy / SciPy / scikit-learn etc.
|
||||
You can use it naturally like you would use [NumPy](http://www.numpy.org/) / [SciPy](https://www.scipy.org/) / [scikit-learn](http://scikit-learn.org) etc.
|
||||
You can write your new neural network layers in Python itself, using your favorite libraries
|
||||
and use packages such as Cython and Numba.
|
||||
Our goal is to not reinvent the wheel where appropriate.
|
||||
@ -104,7 +106,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
|
||||
### Fast and Lean
|
||||
|
||||
PyTorch has minimal framework overhead. We integrate acceleration libraries
|
||||
such as Intel MKL and NVIDIA (cuDNN, NCCL) to maximize speed.
|
||||
such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA (cuDNN, NCCL) to maximize speed.
|
||||
At the core, its CPU and GPU Tensor and neural network backends
|
||||
(TH, THC, THNN, THCUNN) are mature and have been tested for years.
|
||||
|
||||
@ -121,10 +123,10 @@ Writing new neural network modules, or interfacing with PyTorch's Tensor API was
|
||||
and with minimal abstractions.
|
||||
|
||||
You can write new neural network layers in Python using the torch API
|
||||
[or your favorite NumPy-based libraries such as SciPy](http://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html).
|
||||
[or your favorite NumPy-based libraries such as SciPy](https://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html).
|
||||
|
||||
If you want to write your layers in C/C++, we provide a convenient extension API that is efficient and with minimal boilerplate.
|
||||
There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/cpp_extension.html) and [an example here](https://github.com/pytorch/extension-cpp).
|
||||
There is no wrapper code that needs to be written. You can see [a tutorial here](https://pytorch.org/tutorials/advanced/cpp_extension.html) and [an example here](https://github.com/pytorch/extension-cpp).
|
||||
|
||||
|
||||
## Installation
|
||||
@ -132,7 +134,7 @@ There is no wrapper code that needs to be written. You can see [a tutorial here]
|
||||
### Binaries
|
||||
Commands to install from binaries via Conda or pip wheels are on our website:
|
||||
|
||||
[http://pytorch.org](http://pytorch.org)
|
||||
[https://pytorch.org](https://pytorch.org)
|
||||
|
||||
### From Source
|
||||
|
||||
@ -163,7 +165,7 @@ conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing
|
||||
conda install -c mingfeima mkldnn
|
||||
|
||||
# Add LAPACK support for the GPU
|
||||
conda install -c pytorch magma-cuda80 # or magma-cuda90 if CUDA 9
|
||||
conda install -c pytorch magma-cuda92 # or [magma-cuda80 | magma-cuda91] depending on your cuda version
|
||||
```
|
||||
|
||||
On macOS
|
||||
@ -202,7 +204,7 @@ REM The following two lines are needed for Python 2.7, but the support for it is
|
||||
set MSSdk=1
|
||||
set FORCE_PY27_BUILD=1
|
||||
REM As for CUDA 8, VS2015 Update 3 is also required to build PyTorch. Use the following line.
|
||||
set "CUDA_HOST_COMPILER=%VS140COMNTOOLS%\..\..\VC\bin\amd64\cl.exe"
|
||||
set "CUDAHOSTCXX=%VS140COMNTOOLS%\..\..\VC\bin\amd64\cl.exe"
|
||||
|
||||
call "%VS150COMNTOOLS%\vcvarsall.bat" x64 -vcvars_ver=14.11
|
||||
python setup.py install
|
||||
@ -210,7 +212,7 @@ python setup.py install
|
||||
|
||||
### Docker image
|
||||
|
||||
Dockerfile is supplied to build images with cuda support and cudnn v7. You can pass -e PYTHON_VERSION=x.y flag to specificy which python to be used by Miniconda, or leave it unset to use the default. Build as usual
|
||||
Dockerfile is supplied to build images with cuda support and cudnn v7. You can pass `-e PYTHON_VERSION=x.y` flag to specify which python version is to be used by Miniconda, or leave it unset to use the default. Build as usual
|
||||
```
|
||||
docker build -t pytorch -f docker/pytorch/Dockerfile .
|
||||
```
|
||||
@ -226,7 +228,7 @@ should increase shared memory size either with `--ipc=host` or `--shm-size` comm
|
||||
|
||||
### Building the Documentation
|
||||
|
||||
To build documentation in various formats, you will need Sphinx and the
|
||||
To build documentation in various formats, you will need [Sphinx](http://www.sphinx-doc.org) and the
|
||||
readthedocs theme.
|
||||
|
||||
```
|
||||
@ -239,7 +241,7 @@ You can then build the documentation by running ``make <format>`` from the
|
||||
### Previous Versions
|
||||
|
||||
Installation instructions and binaries for previous PyTorch versions may be found
|
||||
on [our website](http://pytorch.org/previous-versions/).
|
||||
on [our website](https://pytorch.org/previous-versions).
|
||||
|
||||
|
||||
## Getting Started
|
||||
@ -247,13 +249,13 @@ on [our website](http://pytorch.org/previous-versions/).
|
||||
Three pointers to get you started:
|
||||
- [Tutorials: get you started with understanding and using PyTorch](https://pytorch.org/tutorials/)
|
||||
- [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples)
|
||||
- [The API Reference](http://pytorch.org/docs/)
|
||||
- [The API Reference](https://pytorch.org/docs/)
|
||||
|
||||
## Communication
|
||||
* forums: discuss implementations, research, etc. http://discuss.pytorch.org
|
||||
* forums: discuss implementations, research, etc. https://discuss.pytorch.org
|
||||
* GitHub issues: bug reports, feature requests, install issues, RFCs, thoughts, etc.
|
||||
* Slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . Our slack channel is invite-only to promote a healthy balance between power-users and beginners. If you need a slack invite, ping us at slack@pytorch.org
|
||||
* newsletter: no-noise, one-way email newsletter with important announcements about pytorch. You can sign-up here: http://eepurl.com/cbG0rv
|
||||
* newsletter: no-noise, one-way email newsletter with important announcements about pytorch. You can sign-up here: https://eepurl.com/cbG0rv
|
||||
|
||||
## Releases and Contributing
|
||||
|
||||
@ -273,3 +275,7 @@ PyTorch is currently maintained by [Adam Paszke](https://apaszke.github.io/), [S
|
||||
A non-exhaustive but growing list needs to mention: Trevor Killeen, Sasank Chilamkurthy, Sergey Zagoruyko, Adam Lerer, Francisco Massa, Alykhan Tejani, Luca Antiga, Alban Desmaison, Andreas Kopf, James Bradbury, Zeming Lin, Yuandong Tian, Guillaume Lample, Marat Dukhan, Natalia Gimelshein, Christian Sarofeen, Martin Raison, Edward Yang, Zachary Devito.
|
||||
|
||||
Note: this project is unrelated to [hughperkins/pytorch](https://github.com/hughperkins/pytorch) with the same name. Hugh is a valuable contributor in the Torch community and has helped with many things Torch and PyTorch.
|
||||
|
||||
## License
|
||||
|
||||
PyTorch is BSD-style licensed, as found in the LICENSE file.
|
||||
|
@ -1,3 +0,0 @@
|
||||
[flake8]
|
||||
max-line-length = 120
|
||||
|
3
aten/.gitignore
vendored
3
aten/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
__pycache__/
|
||||
build/
|
||||
*.pyc
|
258
aten/README.md
258
aten/README.md
@ -1,258 +0,0 @@
|
||||
# ATen: A TENsor library
|
||||
|
||||
ATen is a simple tensor library thats exposes the Tensor operations in Torch
|
||||
and PyTorch directly in C++11. The wrapper respects the semantics of operators
|
||||
in PyTorch, except minor details due to differences between C++ and Python in
|
||||
the way default arguments are handled. See the [documentation for tensors](http://pytorch.org/docs/tensors.html) in PyTorch for what these operations do.
|
||||
ATen's API is auto-generated from the same declarations PyTorch uses so the
|
||||
two APIs will track each other over time.
|
||||
|
||||
Tensor types are resolved dynamically, such that the API is generic and
|
||||
does not include templates. That is, there is one `Tensor` type. It can hold a
|
||||
CPU or CUDA Tensor, and the tensor may have Doubles, Float, Ints, etc. This design
|
||||
makes it easy to write generic code without templating everything.
|
||||
|
||||
See https://pytorch.org/cppdocs for the provided API. Excerpt:
|
||||
```c++
|
||||
Tensor atan2(const Tensor & other) const;
|
||||
Tensor & atan2_(const Tensor & other);
|
||||
Tensor pow(Scalar exponent) const;
|
||||
Tensor pow(const Tensor & exponent) const;
|
||||
Tensor & pow_(Scalar exponent);
|
||||
Tensor & pow_(const Tensor & exponent);
|
||||
Tensor lerp(const Tensor & end, Scalar weight) const;
|
||||
Tensor & lerp_(const Tensor & end, Scalar weight);
|
||||
Tensor histc() const;
|
||||
Tensor histc(int64_t bins) const;
|
||||
Tensor histc(int64_t bins, Scalar min) const;
|
||||
Tensor histc(int64_t bins, Scalar min, Scalar max) const;
|
||||
```
|
||||
|
||||
Inplace operations are also provided, and always suffixed by `_` to indicate they will modify the Tensor.
|
||||
|
||||
### Installation
|
||||
|
||||
TH/THC/THNN/THCUNN are provided (as git subtrees), so the repo is standalone. You will need a C++11 compiler, cmake, and the pyyaml python package.
|
||||
```
|
||||
|
||||
# Install pyyaml used by python code generation to read API declarations
|
||||
|
||||
# macOS: if you don't have pip
|
||||
sudo easy_install pip
|
||||
# Ubuntu: if you don't have pip
|
||||
apt-get -y install python-pip
|
||||
|
||||
# if you don't have pyyaml
|
||||
sudo pip install pyyaml
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=/where/you/want # specify your dest directory
|
||||
# cmake .. -DUSE_NVRTC=ON -DUSE_TENSORRT=OFF -DCMAKE_INSTALL_PREFIX=../install -DCAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO=OFF -DUSE_CUDA=ON # for CUDA
|
||||
# cmake .. -DUSE_CUDA=OFF # for CPU only machines
|
||||
make install
|
||||
```
|
||||
|
||||
### Example usage
|
||||
|
||||
Here is a simple example; again, the syntax follows Torch semantics.
|
||||
|
||||
```c++
|
||||
using namespace at; // assumed in the following
|
||||
|
||||
Tensor d = CPU(kFloat).ones({3, 4});
|
||||
Tensor r = CPU(kFloat).zeros({3,4});
|
||||
for(auto i = 0; i < 100000; i++) {
|
||||
r = r.add(d);
|
||||
// equivalently
|
||||
r = r + d;
|
||||
// or
|
||||
r += d;
|
||||
}
|
||||
```
|
||||
|
||||
Want this running on the GPU?
|
||||
```c++
|
||||
using namespace at; // assumed in the following
|
||||
|
||||
Tensor d = CUDA(kFloat).ones({3, 4});
|
||||
Tensor r = CUDA(kFloat).zeros({3,4});
|
||||
for(auto i = 0; i < 100000; i++) {
|
||||
r = r.add(d);
|
||||
// equivalently
|
||||
r = r + d;
|
||||
// or
|
||||
r += d;
|
||||
}
|
||||
```
|
||||
|
||||
Expressions like `CUDA(kFloat)` are first-class `at::Type` objects that represent
|
||||
the type of a Tensor and are used to create Tensors when their type cannot be
|
||||
inferred.
|
||||
|
||||
See more in [sample files](src/ATen/test).
|
||||
|
||||
### Creating your kernel
|
||||
|
||||
It is easy to create new kernels, thanks to the `dispatch<>()` templated function. Example:
|
||||
```c++
|
||||
|
||||
// a simple sum kernel (for CPU only)
|
||||
template<typename T>
|
||||
struct sum_op {
|
||||
// dispatch handles variable arguments for you
|
||||
Tensor CPU(const Type & t, Tensor & x_)
|
||||
{
|
||||
Tensor x = x_.contiguous();
|
||||
auto x_p = x.data<T>();
|
||||
int64_t size = x.numel();
|
||||
T sum = 0;
|
||||
for(int64_t i = 0; i < size; i++) {
|
||||
sum += x_p[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
Tensor CUDA(Tensor& x) {
|
||||
throw std::invalid_argument("device not supported");
|
||||
};
|
||||
};
|
||||
|
||||
Tensor a = CPU(kFloat).rand({3, 7});
|
||||
std::cout << a << std::endl;
|
||||
std::cout << dispatch<sum_op>(a.type(),a) << " == " << a.sum() << std::endl;
|
||||
```
|
||||
|
||||
### Efficient access to tensor elements
|
||||
|
||||
When using Tensor-wide operations, the relative cost of dynamic dispatch is very small.
|
||||
However, there are cases, especially in your own kernels, where efficient element-wise access is needed,
|
||||
and the cost of dynamic dispatch inside the element-wise loop is very high.
|
||||
ATen provides _accessors_ that are created with a single dynamic check that a Tensor is the type and number of
|
||||
dimensions. Accessors then expose an API for accessing the Tensor elements efficiently:
|
||||
|
||||
```c++
|
||||
|
||||
Tensor foo = CPU(kFloat).rand({12,12});
|
||||
|
||||
// assert foo is 2-dimensional and holds floats.
|
||||
auto foo_a = foo.accessor<float,2>();
|
||||
float trace = 0;
|
||||
|
||||
for(int i = 0; i < foo_a.size(0); i++) {
|
||||
// use the accessor foo_a to get tensor data.
|
||||
trace += foo_a[i][i];
|
||||
}
|
||||
```
|
||||
|
||||
Accessors are temporary views of a Tensor. They are only valid for the lifetime of the tensor that they
|
||||
view and hence should only be used locally in a function, like iterators.
|
||||
|
||||
### Using externally created data
|
||||
|
||||
If you already have your tensor data allocated in memory (CPU or CUDA),
|
||||
you can view that memory as a Tensor in ATen:
|
||||
|
||||
```c++
|
||||
float data[] = { 1, 2, 3,
|
||||
4, 5, 6};
|
||||
auto f = CPU(kFloat).tensorFromBlob(data, {2,3});
|
||||
cout << f << endl;
|
||||
```
|
||||
|
||||
These tensors cannot be resized because ATen does not own the memory, but otherwise
|
||||
behave as normal tensors.
|
||||
|
||||
### Scalars and zero-dimensional tensors
|
||||
|
||||
In addition to the `Tensor` objects, ATen also includes `Scalar`s that represent a single number.
|
||||
Like a Tensor, Scalars are dynamically typed and can hold any one of ATen's number types.
|
||||
Scalars can be implicitly constructed from C++ number types. Scalars are needed because some functions like `addmm` take numbers along with Tensors and expect these
|
||||
numbers to be the same dynamic type as the tensor. They are also used in the API to indicate places where
|
||||
a function will _always_ return a Scalar value, like `sum`.
|
||||
|
||||
```c++
|
||||
Tensor addmm(Scalar beta, const Tensor & self,
|
||||
Scalar alpha, const Tensor & mat1,
|
||||
const Tensor & mat2);
|
||||
Scalar sum(const Tensor & self);
|
||||
|
||||
//usage
|
||||
Tensor a = ...
|
||||
Tensor b = ...
|
||||
Tensor c = ...
|
||||
Tensor r = addmm(1.0, a, .5, b, c);
|
||||
```
|
||||
|
||||
In addition to Scalars, ATen also allows Tensor objects to be zero-dimensional. These Tensors hold
|
||||
a single value and they can be references to a single element in a larger Tensor. They can be used anywhere a Tensor is expected. They are normally created by operators like `select` which reduce the dimensions of
|
||||
a Tensor.
|
||||
|
||||
```c++
|
||||
Tensor two = CPU(kFloat).rand({10,20});
|
||||
two[1][2] = 4;
|
||||
//~~~~~~~ zero-dimensional Tensor
|
||||
```
|
||||
|
||||
It is possible to convert between Scalar and zero-dim Tensors:
|
||||
|
||||
```c++
|
||||
Tensor zero_dim = CPU(kFloat).scalarTensor(4);
|
||||
Scalar from_tensor = Scalar(zero_dim); //only valid when zero_dim.dim() == 0;
|
||||
```
|
||||
|
||||
### Avoiding unnecessary CUDA synchronization in your kernels when using Scalars
|
||||
|
||||
Moving a single number from the GPU to the CPU introduces a synchronization point
|
||||
that can add latency to your program. In certain cases the result of a GPU operator like `sum` which
|
||||
returns a Scalar may be plugged into another GPU operator as an argument. If Scalars were always copied
|
||||
to the CPU, this would result in 2 copies. To avoid these synchronizations, Scalar objects can be
|
||||
optionally backed by a zero-dim Tensor, and are only copied to the CPU when requested.
|
||||
|
||||
```c++
|
||||
auto a = CUDA(kFloat).rand({3,4});
|
||||
Scalar on_gpu = Scalar(a[1][1]); //backed by zero-dim Tensor
|
||||
assert(on_gpu.isBackedByTensor());
|
||||
|
||||
double value = on_gpu.toDouble(); // copied to CPU, if it was backed by GPU Tensor.
|
||||
Scalar svalue = on_gpu.local(); // force the Scalar to become local to CPU.
|
||||
|
||||
// get the scalar as a zero-dim tensor. If it was already backed
|
||||
// by a zero-dim Tensor then this op has no synchronization.
|
||||
// if the Scalar was local on CPU, it performs the copy
|
||||
Tensor same_tensor = CUDA(kFloat).scalarTensor(on_gpu);
|
||||
```
|
||||
|
||||
Operators aware of the location of Scalars can arrange to do the minimal number of copies required.
|
||||
|
||||
### Developer notes
|
||||
|
||||
ATen relies heavily on code generation to automatically generate headers
|
||||
and implementations for all of the tensor methods it supports. The main
|
||||
entry point for the script which does all this work is
|
||||
[`src/ATen/gen.py`](src/ATen/gen.py), which ingests
|
||||
[`src/ATen/Declarations.cwrap`](src/ATen/Declarations.cwrap),
|
||||
[`src/ATen/nn.yaml`](src/ATen/nn.yaml),
|
||||
[`src/ATen/native/native_functions.yaml`](src/ATen/native/native_functions.yaml) and the THNN/THCUNN headers and
|
||||
produces all of the headers and wrapping code necessary to generate
|
||||
the ATen interface.
|
||||
|
||||
If you need to understand how ATen understands a declaration after all
|
||||
of this processing occurs, it's helpful to look at the generated file
|
||||
`Declarations.yaml` (NB: not cwrap) which contains information for all
|
||||
ATen methods in a uniform manner. This file is utilized by PyTorch
|
||||
which further extends the ATen interface with support for automatic
|
||||
differentation.
|
||||
|
||||
#### Note [ATen preprocessor philosophy]
|
||||
|
||||
ATen is designed to be simple to use, and one of the things this implies is
|
||||
that it should not be necessary to use preprocessor macros when using ATen;
|
||||
we would rather provide all symbols, even for functionality that is not
|
||||
available on the system ATen is running on.
|
||||
|
||||
This means that internally inside ATen, whereas other libraries might
|
||||
simply omit source files for, e.g., CuDNN, when CuDNN libraries are not
|
||||
installed, ATen will always build these source files, compiling stub
|
||||
functions for anything that is not available. ATen never uses
|
||||
`AT_ENABLED_CUDA()` in header files, and all types in ATen's public API
|
||||
are always available no matter your build configuration.
|
@ -1,9 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
#include "ATen/Allocator.h"
|
||||
#include "ATen/CPUGeneral.h"
|
||||
#include "ATen/CUDAGuard.h"
|
||||
#include "ATen/Context.h"
|
||||
#include "ATen/Device.h"
|
||||
#include "ATen/DeviceGuard.h"
|
||||
@ -11,16 +9,16 @@
|
||||
#include "ATen/Dispatch.h"
|
||||
#include "ATen/Formatting.h"
|
||||
#include "ATen/Functions.h"
|
||||
#include "ATen/core/Generator.h"
|
||||
#include "ATen/core/Layout.h"
|
||||
#include "ATen/OptionsGuard.h"
|
||||
#include "ATen/core/Scalar.h"
|
||||
#include "ATen/ScalarOps.h"
|
||||
#include "ATen/core/Storage.h"
|
||||
#include "ATen/Tensor.h"
|
||||
#include "ATen/TensorGeometry.h"
|
||||
#include "ATen/core/TensorMethods.h"
|
||||
#include "ATen/TensorOperators.h"
|
||||
#include "ATen/core/TensorOptions.h"
|
||||
#include "ATen/Type.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
#include "ATen/core/Generator.h"
|
||||
#include <c10/core/Layout.h>
|
||||
#include "ATen/core/Scalar.h"
|
||||
#include <c10/core/Storage.h>
|
||||
#include "ATen/core/TensorMethods.h"
|
||||
#include "ATen/core/TensorOptions.h"
|
||||
#include <c10/util/Exception.h>
|
||||
|
@ -6,9 +6,12 @@
|
||||
// Example:
|
||||
// using accscalar_t = acc_type<scalar_t, true>;
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#if defined(__CUDACC__)
|
||||
#include <cuda.h>
|
||||
#include <cuda_fp16.h>
|
||||
#elif defined(__HIPCC__)
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_fp16.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
@ -16,7 +19,7 @@ namespace at {
|
||||
template <typename T, bool is_cuda>
|
||||
struct AccumulateType { };
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
template <> struct AccumulateType<half, true> { using type = float; };
|
||||
#endif
|
||||
template <> struct AccumulateType<Half, true> { using type = float; };
|
||||
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Allocator.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/ArrayRef.h>
|
||||
#include <c10/util/ArrayRef.h>
|
||||
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Backend.h>
|
||||
#include <c10/core/Backend.h>
|
||||
|
@ -20,8 +20,8 @@ CONFIGURE_FILE(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
|
||||
CONFIGURE_FILE(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h")
|
||||
|
||||
# NB: If you edit these globs, you'll have to update setup.py package_data as well
|
||||
FILE(GLOB base_h "*.h" "detail/*.h")
|
||||
FILE(GLOB base_cpp "*.cpp" "detail/*.cpp")
|
||||
FILE(GLOB base_h "*.h" "detail/*.h" "cpu/*.h")
|
||||
FILE(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp")
|
||||
add_subdirectory(core)
|
||||
FILE(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh")
|
||||
FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
|
||||
@ -158,6 +158,16 @@ if(NOT MSVC AND NOT EMSCRIPTEN)
|
||||
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_CXX_FLAGS)
|
||||
|
||||
# Bump up optimization level for sleef to -O1, since at -O0 the compiler
|
||||
# excessively spills intermediate vector registers to the stack
|
||||
# and makes things run impossibly slowly
|
||||
set(OLD_CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
|
||||
IF(${CMAKE_C_FLAGS_DEBUG} MATCHES "-O0")
|
||||
string(REGEX REPLACE "-O0" "-O1" CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG})
|
||||
ELSE()
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1")
|
||||
ENDIF()
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build sleef static" FORCE)
|
||||
set(BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE)
|
||||
set(BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE)
|
||||
@ -168,6 +178,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN)
|
||||
link_directories(${CMAKE_BINARY_DIR}/sleef/lib)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef)
|
||||
|
||||
set(CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG})
|
||||
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
|
||||
|
||||
# Set these back. TODO: Use SLEEF_ to pass these instead
|
||||
@ -195,6 +206,12 @@ IF(USE_CUDA AND NOT USE_ROCM)
|
||||
--generate-code arch=compute_50,code=sm_50
|
||||
--generate-code arch=compute_60,code=sm_60
|
||||
--generate-code arch=compute_70,code=sm_70)
|
||||
elseif(${CUDA_VERSION_MAJOR} EQUAL "10")
|
||||
SET(CUFFT_FAKELINK_OPTIONS
|
||||
--generate-code arch=compute_35,code=sm_35
|
||||
--generate-code arch=compute_50,code=sm_50
|
||||
--generate-code arch=compute_60,code=sm_60
|
||||
--generate-code arch=compute_70,code=sm_70)
|
||||
else()
|
||||
MESSAGE(FATAL_ERROR "Unhandled major cuda version ${CUDA_VERSION_MAJOR}")
|
||||
endif()
|
||||
@ -274,7 +291,7 @@ else()
|
||||
target_link_libraries(ATen_cpu PRIVATE ATEN_CPU_FILES_GEN_LIB)
|
||||
caffe2_interface_library(ATen_cpu ATen_cpu_library)
|
||||
# Set standard properties on the target
|
||||
aten_set_target_props(ATen_cpu)
|
||||
torch_set_target_props(ATen_cpu)
|
||||
|
||||
# Make sure these don't get built by parent
|
||||
set(ATen_CPU_SRCS)
|
||||
@ -315,7 +332,7 @@ if(USE_CUDA OR USE_ROCM)
|
||||
ATen_cuda PUBLIC ATen_cpu ${ATen_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
|
||||
# Set standard properties on the target
|
||||
aten_set_target_props(ATen_cuda)
|
||||
torch_set_target_props(ATen_cuda)
|
||||
|
||||
caffe2_interface_library(ATen_cuda ATen_cuda_library)
|
||||
|
||||
@ -333,9 +350,9 @@ if(NOT AT_LINK_STYLE STREQUAL "INTERFACE")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
aten_compile_options(ATen_cpu)
|
||||
torch_compile_options(ATen_cpu)
|
||||
if(USE_CUDA OR USE_ROCM)
|
||||
aten_compile_options(ATen_cuda)
|
||||
torch_compile_options(ATen_cuda)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -156,12 +156,14 @@ struct strided_tensor_iter_fixed {
|
||||
strided_tensor_iter_fixed(Tensor& tensor, bool sort_strides = false)
|
||||
: data_(tensor.data<T>()) {
|
||||
std::memset(counter_, 0, sizeof(int64_t) * N);
|
||||
std::memcpy(
|
||||
sizes_, tensor.sizes().data(), tensor.ndimension() * sizeof(int64_t));
|
||||
std::memcpy(
|
||||
strides_,
|
||||
tensor.strides().data(),
|
||||
tensor.ndimension() * sizeof(int64_t));
|
||||
if (tensor.dim() > 0) {
|
||||
std::memcpy(
|
||||
sizes_, tensor.sizes().data(), tensor.dim() * sizeof(int64_t));
|
||||
std::memcpy(
|
||||
strides_,
|
||||
tensor.strides().data(),
|
||||
tensor.dim() * sizeof(int64_t));
|
||||
}
|
||||
dim_ = std::get<1>(collapse_dims(sizes_, strides_, tensor.ndimension()));
|
||||
}
|
||||
};
|
||||
@ -207,7 +209,7 @@ inline std::string _all_equal_numel_error(at::ArrayRef<Tensor> tensors) {
|
||||
for (size_t i = 0; i < tensors.size() - 1; i++) {
|
||||
oss << tensors[i].sizes() << ", ";
|
||||
}
|
||||
oss << "and " << tensors[tensors.size() - 1]
|
||||
oss << "and " << tensors[tensors.size() - 1].sizes()
|
||||
<< " to have the same number of elements, but got ";
|
||||
for (size_t i = 0; i < tensors.size() - 1; i++) {
|
||||
oss << tensors[i].numel() << ", ";
|
||||
@ -220,7 +222,7 @@ inline std::string _all_equal_numel_error(at::ArrayRef<Tensor> tensors) {
|
||||
inline bool _apply_preamble(ArrayRef<Tensor> tensors) {
|
||||
checkBackend("CPU_tensor_apply", tensors, Backend::CPU);
|
||||
if (!_all_equal_numel(tensors))
|
||||
throw std::runtime_error(_all_equal_numel_error(tensors));
|
||||
AT_ERROR(_all_equal_numel_error(tensors));
|
||||
// An empty tensor has no elements
|
||||
for (auto& t : tensors)
|
||||
if (t.numel() == 0)
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/Error.h"
|
||||
#include "TH/TH.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
// This file creates a fake allocator that just throws exceptions if
|
||||
// it is actually used.
|
||||
|
@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
// Using AT_API is crucial as otherwise you'll see
|
||||
// Using CAFFE2_API is crucial as otherwise you'll see
|
||||
// linking errors using MSVC
|
||||
// See https://msdn.microsoft.com/en-us/library/a90k134d.aspx
|
||||
// This header adds this if using AT_API
|
||||
// This header adds this if using CAFFE2_API
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
|
||||
namespace at {
|
||||
AT_API void set_num_threads(int);
|
||||
AT_API int get_num_threads();
|
||||
CAFFE2_API void set_num_threads(int);
|
||||
CAFFE2_API int get_num_threads();
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
struct AT_API CPUTypeDefault : public TypeDefault {
|
||||
struct CAFFE2_API CPUTypeDefault : public TypeDefault {
|
||||
CPUTypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined)
|
||||
: TypeDefault(type_id, is_variable, is_undefined) {}
|
||||
Allocator* allocator() const override;
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/Generator.h"
|
||||
#include "ATen/Utils.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "ATen/core/Generator.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
|
@ -13,13 +13,10 @@
|
||||
#include "ATen/CPUGenerator.h"
|
||||
#include "ATen/RegisterCPU.h"
|
||||
#include "ATen/Tensor.h"
|
||||
#include <ATen/cpu/FlushDenormal.h>
|
||||
|
||||
#include "TH/TH.h" // for USE_LAPACK
|
||||
|
||||
#ifdef USE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
|
||||
static inline void errorHandler(const char * msg, void * data) {
|
||||
@ -33,7 +30,9 @@ static inline void argErrorHandler(int arg, const char * msg, void * data) {
|
||||
|
||||
Context::Context()
|
||||
: next_id(static_cast<size_t>(TypeID::NumOptions))
|
||||
, thc_state(nullptr, [](THCState* p){ /* no-op */ } ) {
|
||||
, thc_state(nullptr, [](THCState* p){ /* no-op */ } )
|
||||
, thh_state(nullptr, [](THHState* p){ /* no-op */ } )
|
||||
{
|
||||
|
||||
THSetDefaultErrorHandler(errorHandler,nullptr);
|
||||
THSetDefaultArgErrorHandler(argErrorHandler,nullptr);
|
||||
@ -94,51 +93,54 @@ bool Context::hasLAPACK() const {
|
||||
}
|
||||
|
||||
bool Context::setFlushDenormal(bool on) {
|
||||
#ifdef USE_SSE3
|
||||
// Setting flush-to-zero (FTZ) flag
|
||||
_MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON
|
||||
: _MM_FLUSH_ZERO_OFF);
|
||||
|
||||
// Setting denormals-are-zero (DAZ) flag
|
||||
_MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON
|
||||
: _MM_DENORMALS_ZERO_OFF);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
return at::cpu::set_flush_denormal(on);
|
||||
}
|
||||
|
||||
TypeExtendedInterface& getType(TensorOptions options) {
|
||||
return globalContext().getType(
|
||||
options.backend(), options.dtype(), options.is_variable());
|
||||
options.backend(), typeMetaToScalarType(options.dtype()), options.is_variable());
|
||||
}
|
||||
|
||||
TypeExtendedInterface& getType(const TensorImpl* impl) {
|
||||
Backend backend = tensorTypeIdToBackend(impl->type_id());
|
||||
return globalContext().getType(
|
||||
backend, dataTypeToScalarType(impl->dtype().id()), impl->is_variable());
|
||||
backend, typeMetaToScalarType(impl->dtype()), impl->is_variable());
|
||||
}
|
||||
|
||||
TypeExtendedInterface& getType(const Tensor& t) {
|
||||
return getType(t.unsafeGetTensorImpl());
|
||||
}
|
||||
|
||||
LegacyTHDispatcher& getLegacyTHDispatcher(TensorOptions options) {
|
||||
return globalContext().getLegacyTHDispatcher(
|
||||
options.backend(), typeMetaToScalarType(options.dtype()));
|
||||
}
|
||||
|
||||
LegacyTHDispatcher& getLegacyTHDispatcher(const TensorImpl* impl) {
|
||||
Backend backend = tensorTypeIdToBackend(impl->type_id());
|
||||
return globalContext().getLegacyTHDispatcher(
|
||||
backend, typeMetaToScalarType(impl->dtype()));
|
||||
}
|
||||
|
||||
Allocator* getCPUAllocator() {
|
||||
return getTHDefaultAllocator();
|
||||
}
|
||||
|
||||
struct LegacyTypeInit : public LegacyTypeInitInterface {
|
||||
LegacyTypeInit(LegacyTypeInitArgs) {}
|
||||
struct LegacyDeviceTypeInit : public LegacyDeviceTypeInitInterface {
|
||||
LegacyDeviceTypeInit(LegacyDeviceTypeInitArgs) {}
|
||||
void initCPU() const override {
|
||||
globalContext();
|
||||
}
|
||||
void initCUDA() const override {
|
||||
globalContext().lazyInitCUDA();
|
||||
}
|
||||
void initHIP() const override {
|
||||
globalContext().lazyInitHIP();
|
||||
}
|
||||
void initComplex() const override {
|
||||
globalContext().lazyInitComplex();
|
||||
}
|
||||
};
|
||||
REGISTER_LEGACY_TYPE_INIT(LegacyTypeInit);
|
||||
REGISTER_LEGACY_TYPE_INIT(LegacyDeviceTypeInit);
|
||||
|
||||
}
|
||||
|
@ -1,20 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/CPUGeneral.h>
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
#include "ATen/CUDAStream.h"
|
||||
#include "ATen/core/Generator.h"
|
||||
#include "ATen/Type.h"
|
||||
#include "ATen/TypeExtendedInterface.h"
|
||||
#include "ATen/Utils.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "ATen/detail/CUDAHooksInterface.h"
|
||||
#include "ATen/core/VariableHooksInterface.h"
|
||||
#include "ATen/detail/ComplexHooksInterface.h"
|
||||
#include "ATen/LegacyTHDispatch.h"
|
||||
#include "ATen/LegacyTHDispatcher.h"
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
#include "ATen/core/Generator.h"
|
||||
#include "ATen/core/LegacyTypeDispatch.h"
|
||||
|
||||
// This is temporary
|
||||
#include "ATen/core/ATenCoreTest.h"
|
||||
#include "ATen/core/VariableHooksInterface.h"
|
||||
#include "ATen/detail/CUDAHooksInterface.h"
|
||||
#include "ATen/detail/HIPHooksInterface.h"
|
||||
#include "ATen/detail/ComplexHooksInterface.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@ -22,10 +21,10 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
struct Tensor;
|
||||
class Tensor;
|
||||
|
||||
class AT_API Context {
|
||||
public:
|
||||
class CAFFE2_API Context {
|
||||
public:
|
||||
Context();
|
||||
TypeExtendedInterface* getNonVariableTypeRaw(Backend p, ScalarType s) {
|
||||
return static_cast<TypeExtendedInterface*>(globalLegacyTypeDispatch().getNonVariableTypeRaw(p, s));
|
||||
@ -42,6 +41,9 @@ public:
|
||||
TypeExtendedInterface & getType(Backend p, ScalarType s, bool is_variable) {
|
||||
return static_cast<TypeExtendedInterface&>(globalLegacyTypeDispatch().getType(p, s, is_variable));
|
||||
}
|
||||
LegacyTHDispatcher& getLegacyTHDispatcher(Backend p, ScalarType s) {
|
||||
return globalLegacyTHDispatch().getLegacyTHDispatcher(p, s);
|
||||
}
|
||||
// The passed in Type must be delete'able
|
||||
// TODO: Just make it take a unique_ptr
|
||||
void registerType(Backend b, ScalarType s, Type* t) {
|
||||
@ -49,8 +51,14 @@ public:
|
||||
LegacyTypeDispatch::TypeUniquePtr{t, LegacyTypeDeleter([](Type* p) { delete p; }) });
|
||||
}
|
||||
|
||||
void registerLegacyTHDispatcher(Backend b, ScalarType s, LegacyTHDispatcher* t) {
|
||||
globalLegacyTHDispatch().registerDispatcher(b, s,
|
||||
LegacyTHDispatch::LegacyTHDispatcherUniquePtr{t, LegacyTHDispatcherDeleter([](LegacyTHDispatcher* p) { delete p; }) });
|
||||
}
|
||||
|
||||
Generator & defaultGenerator(DeviceType device_type) {
|
||||
initCUDAIfNeeded(device_type);
|
||||
initHIPIfNeeded(device_type);
|
||||
auto & generator = generator_registry[static_cast<int>(device_type)];
|
||||
if(!generator)
|
||||
AT_ERROR(DeviceTypeName(device_type), " backend type not enabled.");
|
||||
@ -64,11 +72,8 @@ public:
|
||||
bool hasCUDA() const {
|
||||
return detail::getCUDAHooks().hasCUDA();
|
||||
}
|
||||
bool hasCuDNN() const {
|
||||
return detail::getCUDAHooks().hasCuDNN();
|
||||
}
|
||||
int64_t current_device() const {
|
||||
return detail::getCUDAHooks().current_device();
|
||||
bool hasHIP() const {
|
||||
return detail::getHIPHooks().hasHIP();
|
||||
}
|
||||
// defined in header so that getNonVariableType has ability to inline
|
||||
// call_once check. getNonVariableType is called fairly frequently
|
||||
@ -81,6 +86,15 @@ public:
|
||||
});
|
||||
return thc_state.get();
|
||||
}
|
||||
THHState* lazyInitHIP() {
|
||||
std::call_once(thh_init,[&] {
|
||||
thh_state = detail::getHIPHooks().initHIP();
|
||||
generator_registry[static_cast<int>(DeviceType::HIP)] =
|
||||
detail::getHIPHooks().initHIPGenerator(this);
|
||||
detail::getHIPHooks().registerHIPTypes(this);
|
||||
});
|
||||
return thh_state.get();
|
||||
}
|
||||
void lazyInitComplex() {
|
||||
std::call_once(complex_init_, [&] {
|
||||
detail::getComplexHooks().registerComplexTypes(this);
|
||||
@ -91,10 +105,10 @@ public:
|
||||
// AT_ASSERT(thc_state);
|
||||
return thc_state.get();
|
||||
}
|
||||
|
||||
int getNumGPUs() const {
|
||||
return detail::getCUDAHooks().getNumGPUs();
|
||||
THHState* getTHHState() {
|
||||
return thh_state.get();
|
||||
}
|
||||
|
||||
size_t freshTypeID() {
|
||||
return next_id++;
|
||||
}
|
||||
@ -118,22 +132,29 @@ private:
|
||||
lazyInitCUDA();
|
||||
}
|
||||
}
|
||||
void initHIPIfNeeded(DeviceType p) {
|
||||
if (p == DeviceType::HIP) {
|
||||
lazyInitHIP();
|
||||
}
|
||||
}
|
||||
void initComplexIfNeeded(ScalarType s) {
|
||||
if (isComplexType(s)) {
|
||||
lazyInitComplex();
|
||||
}
|
||||
}
|
||||
std::once_flag thc_init;
|
||||
std::once_flag thh_init;
|
||||
std::once_flag complex_init_;
|
||||
bool enabled_cudnn = true;
|
||||
bool deterministic_cudnn = false;
|
||||
bool benchmark_cudnn = false;
|
||||
std::atomic<size_t> next_id;
|
||||
std::unique_ptr<THCState, void(*)(THCState*)> thc_state;
|
||||
std::unique_ptr<THHState, void(*)(THHState*)> thh_state;
|
||||
friend struct Type;
|
||||
};
|
||||
|
||||
AT_API Context & globalContext();
|
||||
CAFFE2_API Context& globalContext();
|
||||
|
||||
static inline void init() {
|
||||
globalContext();
|
||||
@ -153,11 +174,11 @@ static inline TypeExtendedInterface& getNonVariableType(DeviceType p, ScalarType
|
||||
return globalContext().getNonVariableType(deviceTypeToBackend(p), s);
|
||||
}
|
||||
|
||||
AT_API TypeExtendedInterface& getType(TensorOptions options);
|
||||
AT_API TypeExtendedInterface& getType(const TensorImpl*);
|
||||
AT_API TypeExtendedInterface& getType(const Tensor&);
|
||||
CAFFE2_API TypeExtendedInterface& getType(TensorOptions options);
|
||||
CAFFE2_API TypeExtendedInterface& getType(const TensorImpl*);
|
||||
CAFFE2_API TypeExtendedInterface& getType(const Tensor&);
|
||||
|
||||
AT_API Allocator* getCPUAllocator();
|
||||
CAFFE2_API Allocator* getCPUAllocator();
|
||||
|
||||
static inline TypeExtendedInterface& CPU(ScalarType s) {
|
||||
return getNonVariableType(Backend::CPU, s);
|
||||
@ -167,12 +188,19 @@ static inline TypeExtendedInterface& CUDA(ScalarType s) {
|
||||
return getNonVariableType(Backend::CUDA, s);
|
||||
}
|
||||
|
||||
static inline TypeExtendedInterface& HIP(ScalarType s) {
|
||||
return getNonVariableType(Backend::HIP, s);
|
||||
}
|
||||
|
||||
CAFFE2_API LegacyTHDispatcher& getLegacyTHDispatcher(TensorOptions options);
|
||||
CAFFE2_API LegacyTHDispatcher& getLegacyTHDispatcher(const Tensor&);
|
||||
|
||||
static inline bool hasCUDA() {
|
||||
return globalContext().hasCUDA();
|
||||
}
|
||||
|
||||
static inline bool hasCuDNN() {
|
||||
return globalContext().hasCuDNN();
|
||||
static inline bool hasHIP() {
|
||||
return globalContext().hasHIP();
|
||||
}
|
||||
|
||||
static inline bool hasMKL() {
|
||||
@ -187,8 +215,13 @@ static inline bool hasMAGMA() {
|
||||
return globalContext().hasMAGMA();
|
||||
}
|
||||
|
||||
static inline int64_t current_device() {
|
||||
return globalContext().current_device();
|
||||
static inline void manual_seed(uint64_t seed) {
|
||||
globalContext().defaultGenerator(DeviceType::CPU).manualSeed(seed);
|
||||
// NB: Sometimes we build with CUDA, but we don't have any GPUs
|
||||
// available. In that case, we must not seed CUDA; it will fail!
|
||||
if (hasCUDA() && detail::getCUDAHooks().getNumGPUs() > 0) {
|
||||
globalContext().defaultGenerator(DeviceType::CUDA).manualSeedAll(seed);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
|
@ -152,7 +152,7 @@ DLManagedTensor* toDLPack(const Tensor& src) {
|
||||
atDLMTensor->tensor.deleter = &deleter;
|
||||
atDLMTensor->tensor.dl_tensor.data = src.data_ptr();
|
||||
int64_t device_id = 0;
|
||||
if (src.type().is_cuda()) {
|
||||
if (src.is_cuda()) {
|
||||
device_id = src.get_device();
|
||||
}
|
||||
atDLMTensor->tensor.dl_tensor.ctx = getDLContext(src.type(), device_id);
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
AT_API ScalarType toScalarType(const DLDataType& dtype);
|
||||
AT_API DLManagedTensor * toDLPack(const Tensor& src);
|
||||
AT_API Tensor fromDLPack(const DLManagedTensor* src);
|
||||
CAFFE2_API ScalarType toScalarType(const DLDataType& dtype);
|
||||
CAFFE2_API DLManagedTensor* toDLPack(const Tensor& src);
|
||||
CAFFE2_API Tensor fromDLPack(const DLManagedTensor* src);
|
||||
|
||||
} //namespace at
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Device.h>
|
||||
#include <c10/Device.h>
|
||||
|
@ -1,132 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Device.h>
|
||||
#include <ATen/core/ScalarType.h>
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/core/Error.h>
|
||||
#include <ATen/core/optional.h>
|
||||
#include <ATen/detail/CUDAHooksInterface.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <c10/DeviceGuard.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <c10/core/ScalarType.h> // TensorList whyyyyy
|
||||
|
||||
namespace at {
|
||||
/// RAII guard that sets a certain default GPU index in its constructor, and
|
||||
/// changes it back to the device that was originally active upon destruction.
|
||||
///
|
||||
/// The index is always reset to the one that was active at the time of
|
||||
/// construction of the guard. Even if you `set_index` after construction, the
|
||||
/// destructor will still reset the index to the one that was active at
|
||||
/// construction time.
|
||||
struct DeviceGuard {
|
||||
/// Default constructor, does nothing.
|
||||
DeviceGuard() = default;
|
||||
|
||||
/// Uses the given device's `index()` if it is a CUDA device, else does
|
||||
/// nothing.
|
||||
explicit DeviceGuard(Device device) {
|
||||
if (device.is_cuda()) {
|
||||
set_index(device.index());
|
||||
}
|
||||
// Are you here because you're wondering why DeviceGuard(tensor) no
|
||||
// longer works? For code organization reasons, we have temporarily(?)
|
||||
// removed this constructor from DeviceGuard. The new way to
|
||||
// spell it is:
|
||||
//
|
||||
// OptionalDeviceGuard guard(device_of(tensor));
|
||||
|
||||
/// Return the Device of a Tensor, if the Tensor is defined.
|
||||
inline optional<Device> device_of(Tensor t) {
|
||||
if (t.defined()) {
|
||||
return make_optional(t.device());
|
||||
} else {
|
||||
return nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
explicit DeviceGuard(optional<Device> device_opt) {
|
||||
if (device_opt.has_value() && device_opt.value().is_cuda()) {
|
||||
set_index(device_opt.value().index());
|
||||
}
|
||||
/// Return the Device of a TensorList, if the list is non-empty and
|
||||
/// the first Tensor is defined. (This function implicitly assumes
|
||||
/// that all tensors in the list have the same device.)
|
||||
inline optional<Device> device_of(TensorList t) {
|
||||
if (!t.empty()) {
|
||||
return device_of(t.front());
|
||||
} else {
|
||||
return nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls `set_index` with the given index.
|
||||
explicit DeviceGuard(int32_t index) {
|
||||
set_index(index);
|
||||
}
|
||||
|
||||
/// Sets the device to the index on which the given tensor is located.
|
||||
explicit DeviceGuard(const Tensor& tensor) {
|
||||
set_index_from(tensor);
|
||||
}
|
||||
|
||||
/// Sets the device to the index on which the first tensor in the list is
|
||||
/// located. If the list is empty, does nothing.
|
||||
explicit DeviceGuard(const TensorList& tensors) {
|
||||
if (!tensors.empty()) {
|
||||
set_index_from(tensors.front());
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy is disallowed.
|
||||
DeviceGuard(const DeviceGuard&) = delete;
|
||||
DeviceGuard& operator=(const DeviceGuard&) = delete;
|
||||
|
||||
/// Move-constructs this `DeviceGuard` from another `DeviceGuard`. The
|
||||
/// moved-from `DeviceGuard` is modified such that its destruction has no
|
||||
/// effect (does not reset the device).
|
||||
DeviceGuard(DeviceGuard&& other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
/// Move-assigns this `DeviceGuard` from another `DeviceGuard`. The
|
||||
/// moved-from `DeviceGuard` is modified such that its destruction has no
|
||||
/// effect (does not reset the device).
|
||||
DeviceGuard& operator=(DeviceGuard&& other) noexcept {
|
||||
this->original_index_ = other.original_index_;
|
||||
this->last_index_ = other.last_index_;
|
||||
// Set other's original index to the unspecified/default state, so that it
|
||||
// doesn't also reset the device in its constructor.
|
||||
other.original_index_ = -1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Resets the device to the index that was active at construction of the
|
||||
/// guard.
|
||||
~DeviceGuard() {
|
||||
// It should only not have a value if an index was never actually set.
|
||||
if (original_index_ != -1) {
|
||||
// Unchecked because we don't want to throw in the destructor.
|
||||
detail::DynamicCUDAInterface::unchecked_set_device(original_index_);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the device to the given one.
|
||||
void set_index(int32_t index) {
|
||||
if (index == -1) {
|
||||
return;
|
||||
}
|
||||
AT_ASSERT(index >= 0);
|
||||
if (original_index_ == -1) {
|
||||
int32_t previous_index = -123;
|
||||
detail::DynamicCUDAInterface::get_device(&previous_index);
|
||||
original_index_ = previous_index;
|
||||
if (index != original_index_) {
|
||||
detail::DynamicCUDAInterface::set_device(index);
|
||||
}
|
||||
} else {
|
||||
detail::DynamicCUDAInterface::set_device(index);
|
||||
}
|
||||
last_index_ = index;
|
||||
}
|
||||
|
||||
/// Calls `set_index` with the `Tensor`'s current device, if it is a CUDA
|
||||
/// tensor. Does nothing if the `tensor` is not defined.
|
||||
void set_index_from(const Tensor& tensor) {
|
||||
if (tensor.defined() && tensor.is_cuda()) {
|
||||
set_index(tensor.get_device());
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the device that was set upon construction of the guard.
|
||||
int32_t original_index() const noexcept {
|
||||
return original_index_;
|
||||
}
|
||||
|
||||
/// Returns the last device that was set via `set_index`, if any.
|
||||
int32_t last_index() const noexcept {
|
||||
return last_index_;
|
||||
}
|
||||
|
||||
private:
|
||||
/// The original device that was active at construction of this object.
|
||||
int32_t original_index_ = -1;
|
||||
/// The last index that was set via `set_index`.
|
||||
int32_t last_index_ = -1;
|
||||
};
|
||||
} // namespace at
|
||||
|
@ -1,11 +1,2 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/SmallVector.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
/// A container for sizes or strides
|
||||
using DimVector = SmallVector<int64_t, 5>;
|
||||
|
||||
}
|
||||
#include <ATen/core/DimVector.h>
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Type.h>
|
||||
#include <ATen/core/Error.h>
|
||||
#include <ATen/core/Half.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
#define AT_PRIVATE_CASE_TYPE(enum_type, type, ...) \
|
||||
case enum_type: { \
|
||||
@ -10,121 +10,144 @@
|
||||
return __VA_ARGS__(); \
|
||||
}
|
||||
|
||||
#define AT_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexHalf, std::complex<at::Half>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_HALF(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_HALF(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
#define AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX(TYPE, NAME, ...) \
|
||||
[&] { \
|
||||
const at::Type& the_type = TYPE; \
|
||||
switch (the_type.scalarType()) { \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE( \
|
||||
at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
@ -1,2 +0,0 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Error.h>
|
@ -68,7 +68,11 @@ std::tuple<std::vector<int64_t>, std::vector<int64_t>> inferExpandGeometry(
|
||||
") must match the existing size (",
|
||||
size,
|
||||
") at non-singleton dimension ",
|
||||
i);
|
||||
i,
|
||||
". Target sizes: ",
|
||||
sizes,
|
||||
". Tensor sizes: ",
|
||||
tensor_sizes);
|
||||
size = targetSize;
|
||||
stride = 0;
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/Tensor.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
#include <functional>
|
||||
#include <sstream>
|
||||
@ -9,9 +9,12 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
AT_API std::vector<int64_t> infer_size(IntList a, IntList b);
|
||||
AT_API std::tuple<std::vector<int64_t>, std::vector<int64_t> > inferExpandGeometry(
|
||||
IntList tensor_sizes, IntList tensor_strides, IntList sizes);
|
||||
CAFFE2_API std::vector<int64_t> infer_size(IntList a, IntList b);
|
||||
CAFFE2_API std::tuple<std::vector<int64_t>, std::vector<int64_t>>
|
||||
inferExpandGeometry(
|
||||
IntList tensor_sizes,
|
||||
IntList tensor_strides,
|
||||
IntList sizes);
|
||||
|
||||
// avoid copy-construction of Tensor by using a reference_wrapper.
|
||||
inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) {
|
||||
@ -133,20 +136,25 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
|
||||
|
||||
// Sums `tensor` repeatedly to produce a tensor of shape `shape`.
|
||||
// Precondition: is_expandable_to(shape, tensor.sizes()) must be true
|
||||
static inline Tensor sum_to(Tensor tensor, IntList shape) {
|
||||
static inline Tensor sum_to(Tensor tensor, const IntList shape) {
|
||||
if (shape.size() == 0) {
|
||||
return tensor.sum();
|
||||
}
|
||||
Tensor result = tensor;
|
||||
while (result.dim() > (int64_t)shape.size()) {
|
||||
result = result.sum(0, false);
|
||||
c10::SmallVector<int64_t, 8> reduce_dims;
|
||||
const at::IntList sizes = tensor.sizes();
|
||||
const int64_t leading_dims = sizes.size() - shape.size();
|
||||
for (int64_t i = 0; i < leading_dims; ++i) {
|
||||
reduce_dims.push_back(i);
|
||||
}
|
||||
for (int64_t i = 0; i < result.dim(); ++i) {
|
||||
if (shape[i] == 1 && result.sizes()[i] > 1) {
|
||||
result = result.sum(i, true);
|
||||
for (int64_t i = leading_dims; i < static_cast<int64_t>(sizes.size()); ++i) {
|
||||
if (shape[i - leading_dims] == 1 && sizes[i] > 1) {
|
||||
reduce_dims.push_back(i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
if (!reduce_dims.empty()) {
|
||||
tensor = tensor.sum(reduce_dims, /*keepdim=*/true);
|
||||
}
|
||||
return leading_dims > 0 ? tensor.view(shape) : tensor;
|
||||
}
|
||||
|
||||
// True if `shape` can be broadcasted to `desired`
|
||||
|
@ -1,24 +1 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include "ATen/Type.h"
|
||||
#include "ATen/core/Scalar.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
AT_API std::ostream& operator<<(std::ostream & out, IntList list);
|
||||
AT_API std::ostream& operator<<(std::ostream & out, Backend b);
|
||||
AT_API std::ostream& operator<<(std::ostream & out, const Type & t);
|
||||
AT_API std::ostream& print(std::ostream& stream, const Tensor & tensor, int64_t linesize);
|
||||
static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
|
||||
return print(out,t,80);
|
||||
}
|
||||
static inline void print(const Tensor & t, int64_t linesize=80) {
|
||||
print(std::cout,t,linesize);
|
||||
}
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream & out, Scalar s) {
|
||||
return out << (s.isFloatingPoint() ? s.toDouble() : s.toLong());
|
||||
}
|
||||
|
||||
}
|
||||
#include <ATen/core/Formatting.h>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/optional.h>
|
||||
#include <ATen/ScalarType.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#include <c10/util/Optional.h>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
@ -12,7 +12,7 @@ namespace at {
|
||||
static std::vector<int64_t> infer_size(IntList shape, int64_t numel) {
|
||||
auto res = shape.vec();
|
||||
int64_t newsize = 1;
|
||||
auto infer_dim = at::optional<int64_t>();
|
||||
auto infer_dim = c10::optional<int64_t>();
|
||||
for (int64_t dim = 0, ndim = shape.size(); dim != ndim; dim++) {
|
||||
if (shape[dim] == -1) {
|
||||
if (infer_dim) {
|
||||
|
15
aten/src/ATen/InitialTensorOptions.h
Normal file
15
aten/src/ATen/InitialTensorOptions.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/TensorOptions.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
// Represents the initial TensorOptions, before the "defaults" are ever changed.
|
||||
// This is designed to be used in library code, where the explicit devices, dtypes, etc. are known.
|
||||
// NOTE: this is not a stable API.
|
||||
inline TensorOptions initialTensorOptions() {
|
||||
return TensorOptions(kCPU).dtype(kFloat).layout(kStrided)
|
||||
.requires_grad(false).is_variable(false);
|
||||
}
|
||||
|
||||
}
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Layout.h>
|
||||
#include <c10/core/Layout.h>
|
||||
|
12
aten/src/ATen/LegacyTHDispatch.cpp
Normal file
12
aten/src/ATen/LegacyTHDispatch.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include <ATen/LegacyTHDispatch.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
// TODO: This could be bad juju if someone calls globalContext() in the
|
||||
// destructor of an object with static lifetime.
|
||||
LegacyTHDispatch & globalLegacyTHDispatch() {
|
||||
static LegacyTHDispatch singleton;
|
||||
return singleton;
|
||||
}
|
||||
|
||||
}
|
91
aten/src/ATen/LegacyTHDispatch.h
Normal file
91
aten/src/ATen/LegacyTHDispatch.h
Normal file
@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
|
||||
// LegacyTHDispatcher is the legacy mechanism for dispatching directly
|
||||
// to TH/THNN/THC/THCUNN functions in ATen, which is essentially a giant virtual
|
||||
// dispatch table for every TH function we support dynamically dispatching over.
|
||||
//
|
||||
// NB: We do not actually dispatch to *operators* here, the usual pattern is for
|
||||
// ATen operators to call this mechanism for their implementation, but the
|
||||
// operator itself is declared separately (e.g. as a native function "wrapper").
|
||||
//
|
||||
// Q: Why don't we just use LegacyTypeDispatch here?
|
||||
// A: Mainly separation of concerns:
|
||||
// 1) Type is for implementation of operators, which requires codegen of
|
||||
// Variables, JIT, etc. That is handled by the native function "wrappers";
|
||||
// just calling into TH does not require that.
|
||||
// 2) Type does not require scalar-specific dispatch, whereas calling into TH
|
||||
// does. Thus, this separation allows us to evolve operator dispatch
|
||||
// separately (i.e. to use the C10 dispatcher) from details of how to
|
||||
// call TH functionality.
|
||||
//
|
||||
// The implmentation here is very similar to the LegacyTypeDispatch design, with
|
||||
// the following simplications:
|
||||
// 1) This is not required for a mobile build, so does not have to live in /core.
|
||||
// 2) Because these only contain function implementations, we do not have to
|
||||
// handle the Variable/Tensor split; that is handled at the native function
|
||||
// "wrapper" level.
|
||||
// 3) Because an operator must have been previously dispatched via the Type
|
||||
// mechanism, we do need to handle device initialization. This means it is
|
||||
// WRONG to call directly into these functions without first going through
|
||||
// Type dispatch (i.e. the usual operator -> Type -> LegacyTHDispatch pattern).
|
||||
// 4) Because an operator must have been previously dispatched via the Type
|
||||
// mechanism, we do not need to handle undefined Tensors.
|
||||
//
|
||||
// NB: We don't use Registry for this, because we don't want to
|
||||
// pay for a hash table lookup every time we do an operation.
|
||||
//
|
||||
// NB: we can delete this when we don't call into any TH implementations.
|
||||
|
||||
#include <c10/core/Backend.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#include <ATen/LegacyTHDispatcher.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
struct Type;
|
||||
|
||||
struct CAFFE2_API LegacyTHDispatcherDeleter {
|
||||
using LegacyTHDispatcherDeleterFun = void(LegacyTHDispatcher*);
|
||||
LegacyTHDispatcherDeleterFun *fn_ = nullptr;
|
||||
LegacyTHDispatcherDeleter() {}
|
||||
/* implicit */ LegacyTHDispatcherDeleter(LegacyTHDispatcherDeleterFun *fn) : fn_(fn) {}
|
||||
void operator()(LegacyTHDispatcher * ptr) {
|
||||
if (fn_) {
|
||||
(*fn_)(ptr);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class CAFFE2_API LegacyTHDispatch {
|
||||
public:
|
||||
using LegacyTHDispatcherUniquePtr = std::unique_ptr<LegacyTHDispatcher, LegacyTHDispatcherDeleter>;
|
||||
// WARNING: This function has the precondition that you have
|
||||
// initialized the type you want to call. This initialization
|
||||
// step is generally done by Context, or assumed because you
|
||||
// have a Tensor and thus the Type of that Tensor must already
|
||||
// be initialized.
|
||||
|
||||
void registerDispatcher(Backend b, ScalarType s, LegacyTHDispatcherUniquePtr&& t) {
|
||||
dispatcher_registry[static_cast<int>(b)][static_cast<int>(s)] = std::move(t);
|
||||
}
|
||||
|
||||
LegacyTHDispatcher* getLegacyTHDispatcherRaw(Backend p, ScalarType s) {
|
||||
return dispatcher_registry[static_cast<int>(p)][static_cast<int>(s)].get();
|
||||
}
|
||||
|
||||
LegacyTHDispatcher & getLegacyTHDispatcher(Backend p, ScalarType s) {
|
||||
auto* type = getLegacyTHDispatcherRaw(p, s);
|
||||
if (!type) AT_ERROR(toString(p), toString(s), "THDispatcher is not enabled.");
|
||||
return *type;
|
||||
}
|
||||
private:
|
||||
// NB: dispatcher_registry has nullptr for all CUDA backends until
|
||||
// CUDA initialization has occurred
|
||||
LegacyTHDispatcherUniquePtr dispatcher_registry
|
||||
[static_cast<int>(Backend::NumOptions)]
|
||||
[static_cast<int>(ScalarType::NumOptions)];
|
||||
};
|
||||
|
||||
CAFFE2_API LegacyTHDispatch& globalLegacyTHDispatch();
|
||||
|
||||
} // namespace at
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
#include <ATen/Utils.h>
|
||||
#include <ATen/core/ArrayRef.h>
|
||||
#include <c10/util/ArrayRef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
@ -1,2 +0,0 @@
|
||||
#pragma once
|
||||
#include <ATen/core/OptionsGuard.h>
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
#include <ATen/ATen.h>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <exception>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
@ -20,6 +22,30 @@ inline int64_t divup(int64_t x, int64_t y) {
|
||||
return (x + y - 1) / y;
|
||||
}
|
||||
|
||||
inline int get_max_threads() {
|
||||
#ifdef _OPENMP
|
||||
return omp_get_max_threads();
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int get_thread_num() {
|
||||
#ifdef _OPENMP
|
||||
return omp_get_thread_num();
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool in_parallel_region() {
|
||||
#ifdef _OPENMP
|
||||
return omp_in_parallel();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class F>
|
||||
inline void parallel_for(
|
||||
const int64_t begin,
|
||||
@ -27,14 +53,26 @@ inline void parallel_for(
|
||||
const int64_t grain_size,
|
||||
const F& f) {
|
||||
#ifdef _OPENMP
|
||||
std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
|
||||
std::exception_ptr eptr;
|
||||
#pragma omp parallel if (!omp_in_parallel() && ((end - begin) >= grain_size))
|
||||
{
|
||||
int64_t num_threads = omp_get_num_threads();
|
||||
int64_t tid = omp_get_thread_num();
|
||||
int64_t chunk_size = divup((end - begin), num_threads);
|
||||
int64_t begin_tid = begin + tid * chunk_size;
|
||||
if (begin_tid < end)
|
||||
f(begin_tid, std::min(end, chunk_size + begin_tid));
|
||||
if (begin_tid < end) {
|
||||
try {
|
||||
f(begin_tid, std::min(end, chunk_size + begin_tid));
|
||||
} catch (...) {
|
||||
if (!err_flag.test_and_set()) {
|
||||
eptr = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (eptr) {
|
||||
std::rethrow_exception(eptr);
|
||||
}
|
||||
#else
|
||||
if (begin < end) {
|
||||
|
@ -1,2 +0,0 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Registry.h>
|
@ -1,60 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
// base class for refcounted things, allows for collects of generic
|
||||
// refcounted objects that include tensors
|
||||
struct AT_API Retainable {
|
||||
Retainable(): refcount(1), weak_refcount(1) {}
|
||||
void retain() {
|
||||
++refcount;
|
||||
}
|
||||
void release() {
|
||||
if(--refcount == 0) {
|
||||
// If we know that this is the last reference then we can skip
|
||||
// all the decrements and release_resources().
|
||||
if (weak_refcount == 1) {
|
||||
delete this;
|
||||
} else {
|
||||
release_resources();
|
||||
weak_release();
|
||||
}
|
||||
}
|
||||
}
|
||||
void weak_retain() {
|
||||
++weak_refcount;
|
||||
}
|
||||
void weak_release() {
|
||||
if (--weak_refcount == 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
bool weak_lock() {
|
||||
for (;;) {
|
||||
auto current_refcount = refcount.load();
|
||||
if (current_refcount == 0) return false;
|
||||
if (refcount.compare_exchange_strong(current_refcount, current_refcount + 1)) break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
uint32_t use_count() const {
|
||||
return refcount.load();
|
||||
}
|
||||
uint32_t weak_use_count() const {
|
||||
return weak_refcount.load();
|
||||
}
|
||||
|
||||
virtual void release_resources() {};
|
||||
virtual ~Retainable() {}
|
||||
private:
|
||||
// INVARIANT: once refcount reaches 0 it can never go up
|
||||
// INVARIANT: weak_refcount = number of weak references + (refcount > 0 ? 1 : 0)
|
||||
std::atomic<uint32_t> refcount;
|
||||
std::atomic<uint32_t> weak_refcount;
|
||||
};
|
||||
|
||||
}
|
@ -1,18 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/Scalar.h"
|
||||
#include <c10/core/Scalar.h>
|
||||
#include "ATen/Tensor.h"
|
||||
|
||||
namespace at {
|
||||
// This is in the c10 namespace because we use ADL to find the functions in it.
|
||||
namespace c10 {
|
||||
|
||||
// FIXME: this should be (and was) Scalar::toTensor, but there is currently no way
|
||||
// to implement this without going through Derived Types (which are not part of core).
|
||||
inline Tensor scalar_to_tensor(Scalar s) {
|
||||
inline at::Tensor scalar_to_tensor(Scalar s) {
|
||||
if (s.isFloatingPoint()) {
|
||||
return CPU(kDouble).scalarTensor(s);
|
||||
return at::CPU(kDouble).scalarTensor(s);
|
||||
} else {
|
||||
AT_ASSERT(s.isIntegral());
|
||||
return CPU(kLong).scalarTensor(s);
|
||||
return at::CPU(kLong).scalarTensor(s);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#pragma once
|
||||
#include <ATen/core/ATenGeneral.h> // for BC reasons
|
||||
#include <ATen/core/Backend.h>
|
||||
#include <ATen/core/ScalarType.h>
|
||||
#include <c10/core/Backend.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/SmallVector.h>
|
||||
#include <c10/util/SmallVector.h>
|
||||
|
@ -1,14 +1,16 @@
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/SparseTensorImpl.h>
|
||||
#include <ATen/InitialTensorOptions.h>
|
||||
#include <ATen/core/LegacyTypeDispatch.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace {
|
||||
Backend sparseTensorIdToDenseBackend(TensorTypeId type_id) {
|
||||
DeviceType sparseTensorIdToDeviceType(TensorTypeId type_id) {
|
||||
if (type_id == SparseCPUTensorId()) {
|
||||
return Backend::CPU;
|
||||
return kCPU;
|
||||
} else if (type_id == SparseCUDATensorId()) {
|
||||
return Backend::CUDA;
|
||||
return kCUDA;
|
||||
} else {
|
||||
AT_ERROR("Cannot construct SparseTensor with non-sparse tensor type ID ", type_id);
|
||||
}
|
||||
@ -21,20 +23,20 @@ namespace {
|
||||
// a scalar and have one element)
|
||||
//
|
||||
// Thus, an empty sparse tensor should be a 1-dimensional tensor of size [0].
|
||||
// Furthermore, we have dim == sparseDims + denseDims; since this is a sparse
|
||||
// tensor, let us say that an empty sparse tensor has sparseDims == 1 and
|
||||
// denseDims == 0. (There is a degree of freedom here, but given that this
|
||||
// is a sparse dimension, it seems reasonable to demand that sparseDims > 0).
|
||||
// Furthermore, we have dim == sparse_dim + dense_dim; since this is a sparse
|
||||
// tensor, let us say that an empty sparse tensor has sparse_dim == 1 and
|
||||
// dense_dim == 0. (There is a degree of freedom here, but given that this
|
||||
// is a sparse dimension, it seems reasonable to demand that sparse_dim > 0).
|
||||
//
|
||||
// This means that we allocate a [1,0] size indices tensor and a [0] size
|
||||
// values tensor for such an empty tensor.
|
||||
SparseTensorImpl::SparseTensorImpl(at::TensorTypeId type_id, const caffe2::TypeMeta& data_type)
|
||||
: TensorImpl(type_id, data_type, nullptr, false)
|
||||
, size_{0}
|
||||
, sparseDims_(1)
|
||||
, denseDims_(0)
|
||||
, indices_(globalContext().getNonVariableTypeOpt(sparseTensorIdToDenseBackend(type_id), ScalarType::Long)->tensor({1, 0}))
|
||||
, values_(globalContext().getNonVariableTypeOpt(sparseTensorIdToDenseBackend(type_id), dataTypeToScalarType(data_type.id()))->tensor()) {}
|
||||
, sparse_dim_(1)
|
||||
, dense_dim_(0)
|
||||
, indices_(at::empty({1, 0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(ScalarType::Long)))
|
||||
, values_(at::empty({0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(data_type))) {}
|
||||
|
||||
IntList SparseTensorImpl::sizes() const {
|
||||
return size_;
|
||||
@ -66,7 +68,7 @@ void SparseTensorImpl::set_storage_offset(int64_t storage_offset) {
|
||||
}
|
||||
|
||||
int64_t SparseTensorImpl::dim() const {
|
||||
return sparseDims_ + denseDims_;
|
||||
return sparse_dim_ + dense_dim_;
|
||||
}
|
||||
TensorImpl* SparseTensorImpl::maybe_zero_dim(bool condition_when_zero_dim) {
|
||||
AT_CHECK(condition_when_zero_dim == (dim() == 0),
|
||||
@ -82,17 +84,22 @@ int64_t SparseTensorImpl::storage_offset() const {
|
||||
AT_ERROR("sparse tensors do not have storage");
|
||||
}
|
||||
void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) {
|
||||
AT_CHECK(values.type().toSparse() == type(), "values type must match sparse tensor type");
|
||||
AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors!
|
||||
|
||||
AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout());
|
||||
AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout());
|
||||
|
||||
AT_CHECK(values.type().toSparse() == legacyTensorType(*this), "values type must match sparse tensor type");
|
||||
AT_CHECK(indices.type().scalarType() == kLong, "indices must be an int64 tensor");
|
||||
AT_CHECK(indices.type().backend() == values.type().backend(), "backend of indices (", indices.type().backend(), ") must match backend of values (", values.type().backend(), ")");
|
||||
AT_CHECK(!indices.is_cuda() || indices.get_device() == values.get_device(), "device of indices (", indices.get_device(), ") must match device of values (", values.get_device(), ")");
|
||||
|
||||
AT_CHECK(indices.dim() == 2, "indices must be nDim x nnz, but got: ", indices.sizes());
|
||||
AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes());
|
||||
AT_CHECK(indices.size(1) == values.size(0), "indices and values must have same nnz, but got nnz from indices: ", indices.size(1), ", nnz from values: ", values.size(0));
|
||||
AT_CHECK(indices.size(0) == sparseDims_, "indices has incorrect first dimension, expected ", sparseDims_, ", got ", indices.size(0));
|
||||
AT_CHECK(values.dim() == denseDims_ + 1, "values has incorrect number of dimensions, expected ", denseDims_ + 1, ", got ", values.dim());
|
||||
AT_CHECK(indices.size(0) == sparse_dim_, "indices has incorrect first dimension, expected ", sparse_dim_, ", got ", indices.size(0));
|
||||
AT_CHECK(values.dim() == dense_dim_ + 1, "values has incorrect number of dimensions, expected ", dense_dim_ + 1, ", got ", values.dim());
|
||||
|
||||
auto dense_size_original = sizes().slice(sparseDims_);
|
||||
auto dense_size_original = sizes().slice(sparse_dim_);
|
||||
std::vector<int64_t> expected_values_size_vec = {values.size(0)};
|
||||
expected_values_size_vec.insert(expected_values_size_vec.end(), dense_size_original.begin(), dense_size_original.end());
|
||||
IntList expected_values_size(expected_values_size_vec);
|
||||
|
@ -2,25 +2,25 @@
|
||||
|
||||
#include "ATen/Tensor.h"
|
||||
#include "ATen/core/TensorImpl.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
namespace at {
|
||||
struct AT_API SparseTensorImpl : public TensorImpl {
|
||||
struct CAFFE2_API SparseTensorImpl : public TensorImpl {
|
||||
// Stored in COO format, indices + values.
|
||||
|
||||
// INVARIANTS:
|
||||
// _sparseDims: range [0, len(shape)]; _sparseDims + _denseDims = len(shape)
|
||||
// _denseDims : range [0, len(shape)]; _sparseDims + _denseDims = len(shape)
|
||||
// _indices.shape: dimensionality: 2, shape: (_sparseDims, nnz)
|
||||
// _values.shape: dimensionality: 1 + _denseDims. shape: (nnz, shape[_sparseDims:])
|
||||
// sparse_dim: range [0, len(shape)]; sparse_dim + dense_dim = len(shape)
|
||||
// dense_dim : range [0, len(shape)]; sparse_dim + dense_dim = len(shape)
|
||||
// _indices.shape: dimensionality: 2, shape: (sparse_dim, nnz)
|
||||
// _values.shape: dimensionality: 1 + dense_dim. shape: (nnz, shape[sparse_dim:])
|
||||
|
||||
// The true size of the sparse tensor (e.g., if you called to_dense()
|
||||
// on it). When THTensor merges into TensorImpl, this field
|
||||
// should move to the parent class.
|
||||
std::vector<int64_t> size_;
|
||||
|
||||
int64_t sparseDims_ = 0; // number of sparse dimensions
|
||||
int64_t denseDims_ = 0; // number of dense dimensions
|
||||
int64_t sparse_dim_ = 0; // number of sparse dimensions
|
||||
int64_t dense_dim_ = 0; // number of dense dimensions
|
||||
|
||||
Tensor indices_; // always a LongTensor
|
||||
Tensor values_;
|
||||
@ -39,8 +39,8 @@ public:
|
||||
explicit SparseTensorImpl(at::TensorTypeId, const caffe2::TypeMeta&);
|
||||
|
||||
int64_t nnz() const { return values_.size(0); }
|
||||
int64_t sparseDims() const { return sparseDims_; }
|
||||
int64_t denseDims() const { return denseDims_; }
|
||||
int64_t sparse_dim() const { return sparse_dim_; }
|
||||
int64_t dense_dim() const { return dense_dim_; }
|
||||
bool coalesced() const { return coalesced_; }
|
||||
Tensor indices() const { return indices_; }
|
||||
Tensor values() const { return values_; }
|
||||
@ -60,16 +60,16 @@ public:
|
||||
const Storage& storage() const override;
|
||||
int64_t storage_offset() const override;
|
||||
|
||||
// WARNING: This function does NOT preserve invariants of sparseDims/denseDims with
|
||||
// WARNING: This function does NOT preserve invariants of sparse_dim/dense_dim with
|
||||
// respect to indices and values
|
||||
void raw_resize_(int64_t sparseDims, int64_t denseDims, IntList size) {
|
||||
void raw_resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) {
|
||||
size_ = size.vec();
|
||||
sparseDims_ = sparseDims;
|
||||
denseDims_ = denseDims;
|
||||
sparse_dim_ = sparse_dim;
|
||||
dense_dim_ = dense_dim;
|
||||
refresh_numel();
|
||||
}
|
||||
|
||||
// NOTE: This function preserves invariants of sparseDims/denseDims with respect to
|
||||
// NOTE: This function preserves invariants of sparse_dim/dense_dim with respect to
|
||||
// indices and values.
|
||||
//
|
||||
// NOTE: This function supports the following cases:
|
||||
@ -91,36 +91,36 @@ public:
|
||||
// and for API consistency we don't support it).
|
||||
// 4. When we attempt to shrink the size of any of the sparse dimensions on a non-empty sparse tensor
|
||||
// (this could make some of the stored indices out-of-bound and thus unsafe).
|
||||
void resize_(int64_t sparseDims, int64_t denseDims, IntList size) {
|
||||
AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size());
|
||||
void resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) {
|
||||
AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size());
|
||||
if (nnz() > 0) {
|
||||
auto alt_options_msg = "You could try the following options:\n\
|
||||
1. If you need an empty sparse tensor of this size, call `x=torch.sparse_coo_tensor(size)`.\n\
|
||||
1. If you need an empty sparse tensor of this size, call `x = torch.sparse_coo_tensor(size)`.\n\
|
||||
2. If you need to resize this tensor, you have the following options:\n\
|
||||
1. For both sparse and dense dimensions, keep the number of them constant and the size of them non-shrinking, and then try the same call again.\n\
|
||||
2. Or, create a new sparse tensor with the correct indices and values from this sparse tensor.";
|
||||
|
||||
AT_CHECK(sparseDims == sparseDims_,
|
||||
"changing the number of sparse dimensions (from ", sparseDims_, " to ", sparseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
AT_CHECK(sparse_dim == sparse_dim_,
|
||||
"changing the number of sparse dimensions (from ", sparse_dim_, " to ", sparse_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
|
||||
AT_CHECK(denseDims == denseDims_,
|
||||
"changing the number of dense dimensions (from ", denseDims_, " to ", denseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
AT_CHECK(dense_dim == dense_dim_,
|
||||
"changing the number of dense dimensions (from ", dense_dim_, " to ", dense_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
|
||||
bool shrinking_sparse_dims = false;
|
||||
bool shrinking_dense_dims = false;
|
||||
auto sparse_size_original = sizes().slice(0, sparseDims);
|
||||
auto sparse_size_new = size.slice(0, sparseDims);
|
||||
for (int i = 0; i < sparseDims; i++) {
|
||||
bool shrinking_dense_dim = false;
|
||||
auto sparse_size_original = sizes().slice(0, sparse_dim);
|
||||
auto sparse_size_new = size.slice(0, sparse_dim);
|
||||
for (int i = 0; i < sparse_dim; i++) {
|
||||
if (sparse_size_new[i] < sparse_size_original[i]) {
|
||||
shrinking_sparse_dims = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto dense_size_original = sizes().slice(sparseDims);
|
||||
auto dense_size_new = size.slice(sparseDims);
|
||||
for (int i = 0; i < denseDims; i++) {
|
||||
auto dense_size_original = sizes().slice(sparse_dim);
|
||||
auto dense_size_new = size.slice(sparse_dim);
|
||||
for (int i = 0; i < dense_dim; i++) {
|
||||
if (dense_size_new[i] < dense_size_original[i]) {
|
||||
shrinking_dense_dims = true;
|
||||
shrinking_dense_dim = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -128,40 +128,38 @@ public:
|
||||
AT_CHECK(!shrinking_sparse_dims,
|
||||
"shrinking the size of sparse dimensions (from ", sparse_size_original, " to ", sparse_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
|
||||
AT_CHECK(!shrinking_dense_dims,
|
||||
AT_CHECK(!shrinking_dense_dim,
|
||||
"shrinking the size of dense dimensions (from ", dense_size_original, " to ", dense_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg);
|
||||
}
|
||||
|
||||
if ((!size.equals(size_)) || (sparseDims != sparseDims_) || (denseDims != denseDims_)) {
|
||||
std::vector<int64_t> values_size = {values().size(0)};
|
||||
auto dense_size = size.slice(sparseDims);
|
||||
if ((!size.equals(size_)) || (sparse_dim != sparse_dim_) || (dense_dim != dense_dim_)) {
|
||||
auto nnz = values().size(0);
|
||||
std::vector<int64_t> values_size = {nnz};
|
||||
auto dense_size = size.slice(sparse_dim);
|
||||
values_size.insert(values_size.end(), dense_size.begin(), dense_size.end());
|
||||
values_.resize_(values_size);
|
||||
|
||||
std::vector<int64_t> indices_size = indices().sizes().vec();
|
||||
indices_size[0] = sparseDims;
|
||||
indices_.resize_(indices_size);
|
||||
indices_.resize_({sparse_dim, nnz});
|
||||
}
|
||||
|
||||
size_ = size.vec();
|
||||
sparseDims_ = sparseDims;
|
||||
denseDims_ = denseDims;
|
||||
sparse_dim_ = sparse_dim;
|
||||
dense_dim_ = dense_dim;
|
||||
refresh_numel();
|
||||
}
|
||||
|
||||
// NOTE: this function will resize the sparse tensor and also set `indices` and `values` to empty.
|
||||
void resize_and_clear_(int64_t sparseDims, int64_t denseDims, IntList size) {
|
||||
AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size());
|
||||
void resize_and_clear_(int64_t sparse_dim, int64_t dense_dim, IntList size) {
|
||||
AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size());
|
||||
|
||||
size_ = size.vec();
|
||||
sparseDims_ = sparseDims;
|
||||
denseDims_ = denseDims;
|
||||
sparse_dim_ = sparse_dim;
|
||||
dense_dim_ = dense_dim;
|
||||
|
||||
auto empty_indices = indices().type().tensor({sparseDims, 0});
|
||||
auto empty_indices = at::empty({sparse_dim, 0}, indices().options());
|
||||
std::vector<int64_t> values_size = {0};
|
||||
auto dense_size = sizes().slice(sparseDims);
|
||||
auto dense_size = sizes().slice(sparse_dim);
|
||||
values_size.insert(values_size.end(), dense_size.begin(), dense_size.end());
|
||||
auto empty_values = values().type().tensor(values_size);
|
||||
auto empty_values = at::empty(values_size, values().options());
|
||||
set_indices_and_values_unsafe(empty_indices, empty_values);
|
||||
refresh_numel();
|
||||
}
|
||||
@ -169,9 +167,10 @@ public:
|
||||
void set_coalesced(bool coalesced) { coalesced_ = coalesced; }
|
||||
|
||||
// NOTE: this function is only used internally and not exposed to Python frontend
|
||||
void set_nnz_and_narrow(int64_t nnz) {
|
||||
indices_ = indices_.narrow(1, 0, nnz);
|
||||
values_ = values_.narrow(0, 0, nnz);
|
||||
void set_nnz_and_narrow(int64_t new_nnz) {
|
||||
AT_ASSERT(new_nnz <= nnz());
|
||||
indices_ = indices_.narrow(1, 0, new_nnz);
|
||||
values_ = values_.narrow(0, 0, new_nnz);
|
||||
}
|
||||
|
||||
// Takes indices and values and directly puts them into the sparse tensor, no copy.
|
||||
@ -182,6 +181,12 @@ public:
|
||||
// NB: This used to be able to avoid a refcount bump, but I was too lazy to
|
||||
// make it happen
|
||||
void set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values);
|
||||
|
||||
private:
|
||||
int64_t get_device_slow() const override {
|
||||
return values_.get_device();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace at
|
||||
|
125
aten/src/ATen/SparseTensorUtils.h
Normal file
125
aten/src/ATen/SparseTensorUtils.h
Normal file
@ -0,0 +1,125 @@
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/SparseTensorImpl.h>
|
||||
|
||||
namespace at { namespace sparse {
|
||||
|
||||
// Just for documentary purposes
|
||||
using SparseTensor = Tensor;
|
||||
using LongTensor = Tensor;
|
||||
using IntTensor = Tensor;
|
||||
using SparseType = Type;
|
||||
|
||||
// This is an internal utility function for getting at the SparseTensorImpl,
|
||||
// so that we can write sparse tensor specific accessors for special fields
|
||||
// in SparseTensor. You should only use this for writing low level
|
||||
// setters/getters for SparseTensorImpl fields; otherwise, you should use
|
||||
// the low level setters/getters that were implemented using this.
|
||||
//
|
||||
// This may be called repeatedly, so make sure it's pretty cheap.
|
||||
inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) {
|
||||
AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable");
|
||||
AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor");
|
||||
return static_cast<SparseTensorImpl*>(self.unsafeGetTensorImpl());
|
||||
}
|
||||
|
||||
// Takes indices and values and directly puts them into the sparse tensor, no
|
||||
// copy. This used to be called THSTensor_(_move)
|
||||
inline void alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) {
|
||||
get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values);
|
||||
}
|
||||
|
||||
// Take indices and values and makes a (data) copy of them to put into the sparse
|
||||
// indices/values. This used to be called THSTensor_(_set)
|
||||
inline void copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) {
|
||||
alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking));
|
||||
}
|
||||
|
||||
// TODO: put this into the public API
|
||||
inline bool is_same_tensor(const Tensor& lhs, const Tensor& rhs) {
|
||||
return lhs.unsafeGetTensorImpl() == rhs.unsafeGetTensorImpl();
|
||||
}
|
||||
|
||||
inline bool is_same_density(const SparseTensor& self, const SparseTensor& src) {
|
||||
return self.sparse_dim() == src.sparse_dim() && self.dense_dim() == src.dense_dim();
|
||||
}
|
||||
|
||||
// Give us a new values tensor, with the same dimensionality
|
||||
// as 'values' but with a new number of non-zero elements.
|
||||
// TODO: Expose this for real in ATen, some day?
|
||||
// NB: Doesn't preserve data.
|
||||
inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) {
|
||||
std::vector<int64_t> size = values.sizes().vec();
|
||||
size[0] = nnz;
|
||||
return at::empty(size, values.options());
|
||||
}
|
||||
|
||||
// NOTE [ Flatten Sparse Indices ]
|
||||
// This helper function flattens a sparse indices tensor (a LongTensor) into a 1D
|
||||
// indices tensor. E.g.,
|
||||
// input = [[2, 4, 0],
|
||||
// [3, 1, 10]]
|
||||
// full_size = [2, 12]
|
||||
// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10]
|
||||
//
|
||||
// In other words, assuming that each `indices[i, :]` is a valid index to a
|
||||
// tensor `t` of shape `full_size`. This returns the corresponding indices to
|
||||
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
|
||||
// if forceClone is true, the result will forced to be a clone of self.
|
||||
// if force_clone is true, the result will forced to be a clone of self.
|
||||
inline LongTensor flatten_indices(const Tensor& indices, IntList full_size, bool force_clone = false) {
|
||||
int64_t sparse_dim = indices.size(0);
|
||||
if (sparse_dim == 1) {
|
||||
if (force_clone) {
|
||||
return indices.squeeze(0).clone();
|
||||
} else {
|
||||
return indices.squeeze(0);
|
||||
}
|
||||
} else {
|
||||
std::vector<int64_t> indices_mult_cpu_vec;
|
||||
indices_mult_cpu_vec.reserve(sparse_dim);
|
||||
int64_t mult = 1;
|
||||
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
|
||||
indices_mult_cpu_vec[i] = mult;
|
||||
mult *= full_size[i];
|
||||
}
|
||||
auto indices_mult_cpu = indices.type().cpu()
|
||||
.tensorFromBlob(indices_mult_cpu_vec.data(), /*size=*/{sparse_dim, 1});
|
||||
// NB: must be blocking because this blob may be freed after this closure,
|
||||
// and non_blocking copy will see garbage.
|
||||
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
|
||||
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
|
||||
// on CUDA Long. So mul is faster.
|
||||
return indices.mul(indices_mult).sum(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
|
||||
// except this one allows partial flatten: only flatten on specified dims. Note that
|
||||
// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim.
|
||||
// Also if input indices is already coalesced, the flattened indices will also be sorted.
|
||||
//
|
||||
// args:
|
||||
// indices: sparse tensor indices
|
||||
// sizes: sparse tensor sizes
|
||||
// dims_to_flatten: a list of dim index to flatten
|
||||
//
|
||||
// Ex1:
|
||||
// indices = [[2, 4, 0],
|
||||
// [3, 1, 3]]
|
||||
// sizes = [2, 12]
|
||||
// dims_to_flatten = [0, 1]
|
||||
// new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3]
|
||||
//
|
||||
// Ex2:
|
||||
// dims_to_flatten = [1]
|
||||
// new_indices = [ 3, 1, 3 ] # uncoalesced
|
||||
inline LongTensor flatten_indices_by_dims(const LongTensor& indices, const IntList& sizes, const IntList& dims_to_flatten){
|
||||
LongTensor new_indices = at::zeros({indices.size(1)}, indices.options());
|
||||
for (auto d : dims_to_flatten) {
|
||||
new_indices.mul_(sizes[d]);
|
||||
new_indices.add_(indices.select(0, d));
|
||||
}
|
||||
return new_indices;
|
||||
}
|
||||
|
||||
}} // namespace at::sparse
|
@ -1,2 +1,2 @@
|
||||
#pragma once
|
||||
#include <ATen/core/Storage.h>
|
||||
#include <c10/core/Storage.h>
|
||||
|
@ -1,2 +0,0 @@
|
||||
#pragma once
|
||||
#include <ATen/core/StorageImpl.h>
|
@ -12,8 +12,4 @@ bool TensorGeometry::is_contiguous() const {
|
||||
return at::geometry_is_contiguous(sizes_, strides_);
|
||||
}
|
||||
|
||||
Tensor TensorGeometry::zeros_with_stride(const Type& type) const {
|
||||
return type.tensor(sizes_, strides_).zero_();
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
struct AT_API TensorGeometry {
|
||||
struct CAFFE2_API TensorGeometry {
|
||||
TensorGeometry() : storage_offset_(0) {}
|
||||
|
||||
explicit TensorGeometry(IntList sizes)
|
||||
@ -30,9 +30,6 @@ struct AT_API TensorGeometry {
|
||||
// true if the tensor is contiguous
|
||||
bool is_contiguous() const;
|
||||
|
||||
// creates a new tensor with the sizes and strides of the source
|
||||
Tensor zeros_with_stride(const Type& type) const;
|
||||
|
||||
int64_t dim() const { return sizes_.size(); }
|
||||
int64_t size(int64_t dim) const {
|
||||
dim = maybe_wrap_dim(dim, this->dim());
|
||||
|
@ -1,2 +0,0 @@
|
||||
#pragma once
|
||||
#include <ATen/core/TensorImpl.h>
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/Scalar.h"
|
||||
#include <c10/core/Scalar.h>
|
||||
#include "ATen/Tensor.h"
|
||||
#include "ATen/Type.h"
|
||||
|
||||
@ -59,7 +59,7 @@ inline Tensor Tensor::operator[](Tensor index) const {
|
||||
index.dim() == 0,
|
||||
"Can only index with tensors that are scalars (zero-dim)");
|
||||
// The Scalar(Tensor) constructor is explicit, so we need to call it.
|
||||
return this->operator[](index._local_scalar());
|
||||
return this->operator[](index.item());
|
||||
}
|
||||
inline Tensor Tensor::operator[](int64_t index) const {
|
||||
return select(0, index);
|
||||
@ -68,9 +68,9 @@ inline Tensor Tensor::operator[](int64_t index) const {
|
||||
#define AT_FORALL_BINARY_OPS(_) \
|
||||
_(+,x.add(y), y.add(x)) \
|
||||
_(*,x.mul(y), y.mul(x)) \
|
||||
_(-,x.sub(y), y.type().tensor().resize_(y.sizes()).fill_(x).sub_(y)) \
|
||||
_(/,x.div(y), y.type().tensor().resize_(y.sizes()).fill_(x).div_(y)) \
|
||||
_(%,x.remainder(y), y.type().tensor().resize_(y.sizes()).fill_(x).remainder_(y)) \
|
||||
_(-,x.sub(y), ::at::empty(y.sizes(), y.options()).fill_(x).sub_(y)) \
|
||||
_(/,x.div(y), ::at::empty(y.sizes(), y.options()).fill_(x).div_(y)) \
|
||||
_(%,x.remainder(y), ::at::empty(y.sizes(), y.options()).fill_(x).remainder_(y)) \
|
||||
_(<,x.lt(y), y.gt(x)) \
|
||||
_(<=,x.le(y), y.ge(x)) \
|
||||
_(>,x.gt(y),y.lt(x)) \
|
||||
|
@ -12,7 +12,7 @@ namespace at {
|
||||
// make sense. These are particularly useful for native functions,
|
||||
// which do NO argument checking by default.
|
||||
|
||||
struct AT_API TensorArg {
|
||||
struct CAFFE2_API TensorArg {
|
||||
Tensor tensor;
|
||||
const char* name;
|
||||
int pos; // 1-indexed
|
||||
@ -22,7 +22,7 @@ struct AT_API TensorArg {
|
||||
const Tensor& operator*() const { return tensor; }
|
||||
};
|
||||
|
||||
struct AT_API TensorGeometryArg {
|
||||
struct CAFFE2_API TensorGeometryArg {
|
||||
TensorGeometry tensor;
|
||||
const char* name;
|
||||
int pos; // 1-indexed
|
||||
@ -49,40 +49,80 @@ using CheckedFrom = const char*;
|
||||
// not TensorGeometryArg, because the Tensor to TensorGeometry
|
||||
// conversion will blow up if you have undefined tensors.
|
||||
|
||||
AT_API std::ostream& operator<<(std::ostream & out, TensorGeometryArg t);
|
||||
AT_API void checkDim(CheckedFrom c, const TensorGeometryArg& t, int64_t dim);
|
||||
CAFFE2_API std::ostream& operator<<(std::ostream& out, TensorGeometryArg t);
|
||||
CAFFE2_API void checkDim(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t,
|
||||
int64_t dim);
|
||||
// NB: this is an inclusive-exclusive range
|
||||
AT_API void checkDimRange(CheckedFrom c, const TensorGeometryArg& t, int64_t dim_start, int64_t dim_end);
|
||||
AT_API void checkSameDim(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2);
|
||||
AT_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t);
|
||||
AT_API void checkAllContiguous(CheckedFrom c, at::ArrayRef<TensorArg> ts);
|
||||
AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, IntList sizes);
|
||||
AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, int64_t dim, int64_t size);
|
||||
AT_API void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel);
|
||||
AT_API void checkSameNumel(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2);
|
||||
AT_API void checkAllSameNumel(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
AT_API void checkScalarType(CheckedFrom c, const TensorArg& t, ScalarType s);
|
||||
AT_API void checkScalarTypes(CheckedFrom c, const TensorArg& t, at::ArrayRef<ScalarType> l);
|
||||
AT_API void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
|
||||
AT_API void checkAllSameGPU(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
AT_API void checkSameType(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
|
||||
AT_API void checkAllSameType(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
AT_API void checkSameSize(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
|
||||
AT_API void checkDefined(CheckedFrom c, const TensorArg& t);
|
||||
AT_API void checkAllDefined(CheckedFrom c, at::ArrayRef<TensorArg> t);
|
||||
CAFFE2_API void checkDimRange(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t,
|
||||
int64_t dim_start,
|
||||
int64_t dim_end);
|
||||
CAFFE2_API void checkSameDim(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t1,
|
||||
const TensorGeometryArg& t2);
|
||||
CAFFE2_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t);
|
||||
CAFFE2_API void checkAllContiguous(CheckedFrom c, at::ArrayRef<TensorArg> ts);
|
||||
CAFFE2_API void checkSize(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t,
|
||||
IntList sizes);
|
||||
CAFFE2_API void checkSize(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t,
|
||||
int64_t dim,
|
||||
int64_t size);
|
||||
CAFFE2_API void checkNumel(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t,
|
||||
int64_t numel);
|
||||
CAFFE2_API void checkSameNumel(
|
||||
CheckedFrom c,
|
||||
const TensorGeometryArg& t1,
|
||||
const TensorGeometryArg& t2);
|
||||
CAFFE2_API void checkAllSameNumel(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
CAFFE2_API void checkScalarType(
|
||||
CheckedFrom c,
|
||||
const TensorArg& t,
|
||||
ScalarType s);
|
||||
CAFFE2_API void checkScalarTypes(
|
||||
CheckedFrom c,
|
||||
const TensorArg& t,
|
||||
at::ArrayRef<ScalarType> l);
|
||||
CAFFE2_API void checkSameGPU(
|
||||
CheckedFrom c,
|
||||
const TensorArg& t1,
|
||||
const TensorArg& t2);
|
||||
CAFFE2_API void checkAllSameGPU(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
CAFFE2_API void checkSameType(
|
||||
CheckedFrom c,
|
||||
const TensorArg& t1,
|
||||
const TensorArg& t2);
|
||||
CAFFE2_API void checkAllSameType(CheckedFrom c, ArrayRef<TensorArg> tensors);
|
||||
CAFFE2_API void checkSameSize(
|
||||
CheckedFrom c,
|
||||
const TensorArg& t1,
|
||||
const TensorArg& t2);
|
||||
CAFFE2_API void checkDefined(CheckedFrom c, const TensorArg& t);
|
||||
CAFFE2_API void checkAllDefined(CheckedFrom c, at::ArrayRef<TensorArg> t);
|
||||
|
||||
// FixMe: does TensorArg slow things down?
|
||||
AT_API void checkBackend(CheckedFrom c, at::ArrayRef<Tensor> t, at::Backend backend);
|
||||
CAFFE2_API void checkBackend(
|
||||
CheckedFrom c,
|
||||
at::ArrayRef<Tensor> t,
|
||||
at::Backend backend);
|
||||
|
||||
// Methods for getting data_ptr if tensor is defined
|
||||
AT_API void * maybe_data_ptr(const Tensor& tensor);
|
||||
AT_API void * maybe_data_ptr(const TensorArg& tensor);
|
||||
CAFFE2_API void* maybe_data_ptr(const Tensor& tensor);
|
||||
CAFFE2_API void* maybe_data_ptr(const TensorArg& tensor);
|
||||
|
||||
// Return if the tensor geometry represented by `sizes` and `strides` is contiguous
|
||||
// Although we cache is_contiguous in tensor now, this is till useful because it
|
||||
// allows checking if a particular geometry is contiguous without explicitly
|
||||
// constructing a tensor, e.g., when you want to choose a kernel strategy based
|
||||
// on whether a subgeometry is contiguous.
|
||||
AT_API bool geometry_is_contiguous(IntList sizes, IntList strides);
|
||||
|
||||
CAFFE2_API bool geometry_is_contiguous(IntList sizes, IntList strides);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include "ATen/UndefinedType.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include "c10/util/Exception.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
@ -70,12 +70,4 @@ Type & UndefinedType::toScalarType(ScalarType s) const {
|
||||
AT_ERROR("toScalarType not implemented for UndefinedType to non-UndefinedType");
|
||||
}
|
||||
|
||||
Tensor & UndefinedType::s_copy_(Tensor & self, const Tensor & src, bool non_blocking) const {
|
||||
AT_ERROR("s_copy not defined for UndefinedType");
|
||||
}
|
||||
|
||||
Tensor & UndefinedType::_s_copy_from(const Tensor & self, Tensor & dst, bool non_blocking) const {
|
||||
AT_ERROR("_s_copy_from not defined for UndefinedType");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -30,9 +30,6 @@ struct UndefinedType final : public TypeDefault {
|
||||
virtual TypeID ID() const override;
|
||||
virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override;
|
||||
virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override;
|
||||
|
||||
virtual Tensor & s_copy_(Tensor & self, const Tensor & src, bool non_blocking) const override;
|
||||
virtual Tensor & _s_copy_from(const Tensor & self, Tensor & dst, bool non_blocking) const override;
|
||||
};
|
||||
|
||||
} // namespace at
|
||||
|
@ -1,13 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/ATenGeneral.h"
|
||||
#include "ATen/StorageImpl.h"
|
||||
#include <c10/core/StorageImpl.h>
|
||||
#include "ATen/core/UndefinedTensorImpl.h"
|
||||
|
||||
#include <ATen/core/ScalarType.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#include "ATen/Formatting.h"
|
||||
#include "ATen/core/ArrayRef.h"
|
||||
#include "ATen/core/Error.h"
|
||||
#include <c10/util/ArrayRef.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
AT_API int _crash_if_asan(int);
|
||||
CAFFE2_API int _crash_if_asan(int);
|
||||
|
||||
static inline const Storage& checked_storage(
|
||||
const Storage& expr,
|
||||
@ -113,11 +113,11 @@ std::array<int64_t, N> check_intlist(ArrayRef<int64_t> list, const char * name,
|
||||
}
|
||||
|
||||
inline int64_t sum_intlist(ArrayRef<int64_t> list) {
|
||||
return std::accumulate(list.begin(), list.end(), 0);
|
||||
return std::accumulate(list.begin(), list.end(), 0ll);
|
||||
}
|
||||
|
||||
inline int64_t prod_intlist(ArrayRef<int64_t> list) {
|
||||
return std::accumulate(list.begin(), list.end(), 1, std::multiplies<int64_t>());
|
||||
return std::accumulate(list.begin(), list.end(), 1ll, std::multiplies<int64_t>());
|
||||
}
|
||||
|
||||
} // at
|
||||
|
@ -12,7 +12,7 @@ namespace at {
|
||||
|
||||
constexpr size_t dim_bitset_size = 64;
|
||||
|
||||
static inline std::bitset<dim_bitset_size> dim_list_to_bitset(IntList dims, int64_t ndims, bool wrap_scalar=true) {
|
||||
static inline std::bitset<dim_bitset_size> dim_list_to_bitset(IntList dims, int64_t ndims) {
|
||||
AT_CHECK(ndims <= (int64_t) dim_bitset_size, "only tensors with up to ", dim_bitset_size, " dims are supported");
|
||||
std::bitset<dim_bitset_size> seen;
|
||||
for (size_t i = 0; i < dims.size(); i++) {
|
||||
|
@ -30,8 +30,7 @@ def set_declaration_defaults(declaration):
|
||||
if 'backends' not in declaration:
|
||||
declaration['backends'] = ['CPU', 'CUDA']
|
||||
if 'api_name' not in declaration:
|
||||
declaration['api_name'] = (declaration['python_name']
|
||||
if 'python_name' in declaration else declaration['name'])
|
||||
declaration['api_name'] = declaration['name']
|
||||
# Simulate multiple dispatch, even if it's not necessary
|
||||
if 'options' not in declaration:
|
||||
declaration['options'] = [{'arguments': declaration['arguments']}]
|
||||
|
@ -1,251 +0,0 @@
|
||||
from code_template import CodeTemplate
|
||||
from function_wrapper import nested_dict
|
||||
|
||||
FILE = CodeTemplate("""\
|
||||
// ${generated_comment}
|
||||
|
||||
#include "ATen/Config.h"
|
||||
|
||||
#include "TH/TH.h"
|
||||
${cuda_includes}
|
||||
#include "ATen/Utils.h"
|
||||
${copy_includes}
|
||||
|
||||
namespace at {
|
||||
|
||||
${copy_functions}
|
||||
|
||||
}
|
||||
""")
|
||||
|
||||
CUDA_INCLUDES = """\
|
||||
#undef THNN_
|
||||
#include "THC/THC.h"
|
||||
"""
|
||||
|
||||
# NB: The copy templates static_cast both dst and src, even though
|
||||
# technically we also perform a checked_cast_tensor in the prologue
|
||||
# of the copy (meaning that hypothetically, an already casted tensor
|
||||
# is available. However, in s_copy, the casted tensor is dst, while
|
||||
# in _s_copy_from, the casted tensor is src. So we can reuse the logic
|
||||
# in both cases, we unconditionally cast both tensors (and rely
|
||||
# on the surrounding code to establish the necessary invariants.)
|
||||
|
||||
COPY = CodeTemplate("""\
|
||||
${THTensor}_copy${cuda}${src_scalar_name}(${state,}\
|
||||
dst.unsafeGetTensorImpl(), \
|
||||
src.unsafeGetTensorImpl());
|
||||
""")
|
||||
|
||||
COPY_ASYNC_CPU = CodeTemplate("""\
|
||||
if (non_blocking) {
|
||||
${THTensor}_copyAsyncCPU(${state,}\
|
||||
dst.unsafeGetTensorImpl(), \
|
||||
src.unsafeGetTensorImpl());
|
||||
break;
|
||||
}
|
||||
""")
|
||||
|
||||
COPY_ASYNC_CUDA = CodeTemplate("""\
|
||||
if (non_blocking) {
|
||||
${THTensor}_copyAsyncCuda(${state,}\
|
||||
dst.unsafeGetTensorImpl(), \
|
||||
src.unsafeGetTensorImpl());
|
||||
break;
|
||||
}
|
||||
""")
|
||||
|
||||
CASE = CodeTemplate("""\
|
||||
case ${case_id}:
|
||||
${copies}
|
||||
break;
|
||||
""")
|
||||
|
||||
FUNCTION = CodeTemplate("""\
|
||||
Tensor & ${Type}::s_copy_(Tensor & dst, const Tensor & src, bool non_blocking) const {
|
||||
// code generated by copy_wrapper
|
||||
${checked_cast_dst}
|
||||
switch (src.type().ID()) {
|
||||
${copy_body}
|
||||
default:
|
||||
${function_fallthrough}
|
||||
}
|
||||
dst.unsafeGetTensorImpl()->maybe_zero_dim(src.dim() == 0);
|
||||
return dst;
|
||||
}
|
||||
""")
|
||||
|
||||
FUNCTION_FALLTHROUGH_REDISPATCH = "return src.type()._s_copy_from(src, dst, non_blocking);"
|
||||
|
||||
FUNCTION_FALLTHROUGH_ERROR = """\
|
||||
AT_ERROR("copy does not support ", src.type().toString(), " to ", toString(), " copy.");
|
||||
"""
|
||||
|
||||
FUNCTION_FROM = CodeTemplate("""\
|
||||
Tensor & ${Type}::_s_copy_from(const Tensor & src, Tensor & dst, bool non_blocking) const {
|
||||
// code generated by copy_wrapper
|
||||
${checked_cast_src}
|
||||
switch (dst.type().ID()) {
|
||||
${copy_body}
|
||||
default:
|
||||
AT_ERROR("copy does not support ", toString(), " to ", dst.type().toString(), " copy.");
|
||||
break;
|
||||
}
|
||||
dst.unsafeGetTensorImpl()->maybe_zero_dim(src.dim() == 0);
|
||||
return dst; // NB! dst
|
||||
}
|
||||
""")
|
||||
|
||||
# NB: Hypothetically, someone could call s_copy_from directly and get an error
|
||||
# message which claims something is not supported, when it actually is. But
|
||||
# the correct fix in this case was to NOT call copy_from
|
||||
FUNCTION_FROM_SWAP = CodeTemplate("""\
|
||||
Tensor & ${Type}::_s_copy_from(const Tensor & src, Tensor & dst, bool non_blocking) const {
|
||||
AT_ERROR("copy does not support ", src.type().toString(), " to ", dst.type().toString(), " copy (s_copy_from case).");
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
def create_one_copy(dst_type, all_types):
|
||||
copy_body = []
|
||||
|
||||
for src_type in all_types:
|
||||
if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse':
|
||||
# skip sparse copies, which are not yet implemented
|
||||
continue
|
||||
cuda = ''
|
||||
state = []
|
||||
if src_type['Backend'] == 'CUDA' or dst_type['Backend'] == 'CUDA':
|
||||
state.append('globalContext().getTHCState()')
|
||||
if src_type['Backend'] == 'CUDA':
|
||||
if dst_type['Backend'] == 'CUDA':
|
||||
cuda = 'Cuda'
|
||||
else:
|
||||
# don't attempt to process CPU-CUDA; this is handled in the
|
||||
# redispatch
|
||||
continue
|
||||
|
||||
body_env = nested_dict({
|
||||
'src_scalar_name': src_type['ScalarName'],
|
||||
'case_id': src_type['TypeID'],
|
||||
'src_tensor': src_type['Tensor'],
|
||||
'dst_tensor': dst_type['Tensor'],
|
||||
'cuda': cuda,
|
||||
'state': state,
|
||||
}, dst_type)
|
||||
|
||||
copies = []
|
||||
if dst_type['ScalarType'] == src_type['ScalarType']:
|
||||
if dst_type['Backend'] == 'CUDA' and src_type['Backend'] == 'CPU':
|
||||
copies.append(COPY_ASYNC_CPU.substitute(body_env))
|
||||
copies.append(COPY.substitute(body_env))
|
||||
|
||||
copy_body.append(CASE.substitute(body_env, copies=copies))
|
||||
|
||||
if dst_type['Backend'] == 'CPU':
|
||||
# CPU fallthrough needs to redispatch to _s_copy_from
|
||||
# (Backend == CPU implies Dense)
|
||||
assert dst_type['Density'] == 'Dense'
|
||||
function_fallthrough = FUNCTION_FALLTHROUGH_REDISPATCH
|
||||
else:
|
||||
function_fallthrough = FUNCTION_FALLTHROUGH_ERROR
|
||||
|
||||
# Note [checked_cast_tensor is for dense only]
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# checked_cast_tensor is only needed for backends which implement
|
||||
# copy and thus do a cast. Sparse does not support copies, so there
|
||||
# is no need to do a checked cast. (Furthermore, the code as written
|
||||
# will not work, as it will try to there is no derived Tensor type
|
||||
# for sparse.)
|
||||
checked_cast_dst = ''
|
||||
if dst_type['Density'] == 'Dense':
|
||||
checked_cast_dst = \
|
||||
'checked_tensor_unwrap(dst, "dst", 0, false, Backend::{}, ScalarType::{});' \
|
||||
.format(dst_type['Backend'],
|
||||
dst_type['ScalarName'])
|
||||
|
||||
env = nested_dict({
|
||||
'function_fallthrough': function_fallthrough,
|
||||
'checked_cast_dst': checked_cast_dst,
|
||||
}, dst_type)
|
||||
return FUNCTION.substitute(env, copy_body=copy_body)
|
||||
|
||||
|
||||
def create_one_copy_from(src_type, all_types):
|
||||
if src_type['DenseBackend'] == 'CPU':
|
||||
return FUNCTION_FROM_SWAP.substitute(src_type)
|
||||
|
||||
copy_body = []
|
||||
|
||||
for dst_type in all_types:
|
||||
if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse':
|
||||
# skip sparse copies, which are not yet implemented
|
||||
continue
|
||||
cuda = ''
|
||||
state = []
|
||||
if src_type['Backend'] == 'CUDA':
|
||||
cuda = 'Cuda'
|
||||
if dst_type['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA':
|
||||
state.append('globalContext().getTHCState()')
|
||||
|
||||
body_env = nested_dict({
|
||||
'src_scalar_name': src_type['ScalarName'],
|
||||
'case_id': dst_type['TypeID'],
|
||||
'src_tensor': src_type['Tensor'],
|
||||
'dst_tensor': dst_type['Tensor'],
|
||||
'cuda': cuda,
|
||||
'state': state,
|
||||
}, dst_type)
|
||||
|
||||
copies = []
|
||||
if dst_type['ScalarType'] == src_type['ScalarType']:
|
||||
# NB: Technically, we have already short-circuited the
|
||||
# src_type['Backend'] == 'CUDA' case at the beginning of this
|
||||
# function
|
||||
if dst_type['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA':
|
||||
copies.append(COPY_ASYNC_CUDA.substitute(body_env))
|
||||
copies.append(COPY.substitute(body_env))
|
||||
|
||||
copy_body.append(CASE.substitute(body_env, copies=copies))
|
||||
|
||||
# See Note [checked_cast_tensor is for dense only]
|
||||
checked_cast_src = ''
|
||||
if src_type['Density'] != 'Sparse':
|
||||
checked_cast_src = \
|
||||
'checked_tensor_unwrap(src, "src", 0, false, Backend::{}, ScalarType::{});' \
|
||||
.format(src_type['Backend'], src_type['ScalarName'])
|
||||
|
||||
return FUNCTION_FROM.substitute(src_type, copy_body=copy_body, checked_cast_src=checked_cast_src)
|
||||
|
||||
|
||||
def create(all_types, backend):
|
||||
top_env = {
|
||||
'copy_includes': [],
|
||||
'copy_functions': [],
|
||||
'cuda_includes': [],
|
||||
'generated_comment': '@' + 'generated by aten/src/ATen/copy_wrapper.py'
|
||||
}
|
||||
|
||||
if backend == 'CUDA':
|
||||
top_env['cuda_includes'].append(CUDA_INCLUDES)
|
||||
|
||||
# Headers to include
|
||||
for the_type in all_types:
|
||||
# CUDA backend requires all headers (as it also manages CPU-CUDA
|
||||
# conversions), but CPU backend should only have CPU headers
|
||||
if backend == 'CPU' and the_type['DenseBackend'] != 'CPU':
|
||||
continue
|
||||
top_env['copy_includes'].append(
|
||||
'#include "ATen/{}.h"'.format(the_type['Type']))
|
||||
top_env['copy_includes'].append(
|
||||
'#include "ATen/core/TensorImpl.h"')
|
||||
|
||||
# Code generation
|
||||
for the_type in all_types:
|
||||
# Only generate code for the requested backend
|
||||
if the_type['DenseBackend'] != backend:
|
||||
continue
|
||||
top_env['copy_functions'].append(create_one_copy(the_type, all_types))
|
||||
top_env['copy_functions'].append(create_one_copy_from(the_type, all_types))
|
||||
|
||||
return FILE.substitute(top_env)
|
@ -1,12 +0,0 @@
|
||||
#include <ATen/core/ATenCoreTest.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
static int CoreTestGlobal = 0;
|
||||
int CoreTest() {
|
||||
Tensor x;
|
||||
return CoreTestGlobal++;
|
||||
}
|
||||
|
||||
} // namespace at
|
@ -1,8 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Macros.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
AT_CORE_API int CoreTest();
|
||||
}
|
@ -1,8 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
#include "ATen/core/Macros.h"
|
||||
|
||||
// TODO: Merge the *_API macros.
|
||||
#define AT_API AT_CORE_API
|
||||
#define AT_EXPORT AT_CORE_EXPORT
|
||||
#define AT_IMPORT AT_CORE_IMPORT
|
||||
#include "c10/macros/Macros.h"
|
||||
|
@ -1 +0,0 @@
|
||||
#include <ATen/core/AlignOf.h>
|
@ -1,19 +0,0 @@
|
||||
#include <ATen/core/Allocator.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
static void deleteInefficientStdFunctionContext(void* ptr) {
|
||||
delete static_cast<InefficientStdFunctionContext*>(ptr);
|
||||
}
|
||||
|
||||
at::DataPtr InefficientStdFunctionContext::makeDataPtr(
|
||||
void* ptr,
|
||||
const std::function<void(void*)>& deleter,
|
||||
Device device) {
|
||||
return {ptr,
|
||||
new InefficientStdFunctionContext({ptr, deleter}),
|
||||
&deleteInefficientStdFunctionContext,
|
||||
device};
|
||||
}
|
||||
|
||||
} // namespace at
|
@ -1 +0,0 @@
|
||||
#include <ATen/core/ArrayRef.h>
|
@ -1,28 +1,2 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
|
||||
#include <ATen/core/Macros.h>
|
||||
|
||||
namespace at {
|
||||
/// Utility to demangle a C++ symbol name.
|
||||
AT_CORE_API std::string demangle(const char* name);
|
||||
|
||||
/// Returns the printable name of the type.
|
||||
template <typename T>
|
||||
inline const char* demangle_type() {
|
||||
#ifdef __GXX_RTTI
|
||||
static const std::string name = demangle(typeid(T).name());
|
||||
return name.c_str();
|
||||
#else // __GXX_RTTI
|
||||
return "(RTTI disabled, cannot show name)";
|
||||
#endif // __GXX_RTTI
|
||||
}
|
||||
|
||||
AT_CORE_API std::string get_backtrace(
|
||||
size_t frames_to_skip = 0,
|
||||
size_t maximum_number_of_frames = 64,
|
||||
bool skip_python_frames = true);
|
||||
} // namespace at
|
||||
#include "c10/util/Backtrace.h"
|
||||
#include "c10/util/Type.h"
|
||||
|
@ -1 +0,0 @@
|
||||
#include <ATen/core/C++17.h>
|
@ -6,6 +6,12 @@ FILE(GLOB ATen_CORE_SRCS "*.cpp")
|
||||
FILE(GLOB ATen_CORE_TEST_SRCS "*_test.cpp")
|
||||
EXCLUDE(ATen_CORE_SRCS "${ATen_CORE_SRCS}" ${ATen_CORE_TEST_SRCS})
|
||||
|
||||
# see the source file for explanation
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/register_symbols.cpp
|
||||
PROPERTIES COMPILE_FLAGS -O0
|
||||
)
|
||||
|
||||
# Pass to parent
|
||||
set(ATen_CORE_HEADERS ${ATen_CORE_HEADERS} PARENT_SCOPE)
|
||||
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
|
||||
|
14
aten/src/ATen/core/DefaultDtype.cpp
Normal file
14
aten/src/ATen/core/DefaultDtype.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include <ATen/core/typeid.h>
|
||||
#include <ATen/core/DefaultDtype.h>
|
||||
|
||||
namespace at {
|
||||
static auto default_dtype = caffe2::TypeMeta::Make<float>();
|
||||
|
||||
void set_default_dtype(caffe2::TypeMeta dtype) {
|
||||
default_dtype = std::move(dtype);
|
||||
}
|
||||
|
||||
const caffe2::TypeMeta& get_default_dtype() {
|
||||
return default_dtype;
|
||||
}
|
||||
} // namespace at
|
12
aten/src/ATen/core/DefaultDtype.h
Normal file
12
aten/src/ATen/core/DefaultDtype.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
namespace caffe2 {
|
||||
class TypeMeta;
|
||||
} // namespace caffe2
|
||||
|
||||
namespace at {
|
||||
CAFFE2_API void set_default_dtype(caffe2::TypeMeta dtype);
|
||||
CAFFE2_API const caffe2::TypeMeta& get_default_dtype();
|
||||
} // namespace at
|
@ -1,42 +0,0 @@
|
||||
#include <ATen/core/DeviceType.h>
|
||||
#include <ATen/core/Error.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
std::string DeviceTypeName(at::DeviceType d, bool lower_case) {
|
||||
switch (d) {
|
||||
// I considered instead using ctype::tolower to lower-case the strings
|
||||
// on the fly, but this seemed a bit much.
|
||||
case DeviceType::CPU:
|
||||
return lower_case ? "cpu" : "CPU";
|
||||
case DeviceType::CUDA:
|
||||
return lower_case ? "cuda" : "CUDA";
|
||||
case DeviceType::OPENGL:
|
||||
return lower_case ? "opengl" : "OPENGL";
|
||||
case DeviceType::OPENCL:
|
||||
return lower_case ? "opencl" : "OPENCL";
|
||||
case DeviceType::MKLDNN:
|
||||
return lower_case ? "mkldnn" : "MKLDNN";
|
||||
case DeviceType::IDEEP:
|
||||
return lower_case ? "ideep" : "IDEEP";
|
||||
case DeviceType::HIP:
|
||||
return lower_case ? "hip" : "HIP";
|
||||
default:
|
||||
AT_ERROR(
|
||||
"Unknown device: ",
|
||||
static_cast<int32_t>(d),
|
||||
". If you have recently updated the caffe2.proto file to add a new "
|
||||
"device type, did you forget to update the DeviceTypeName() "
|
||||
"function to reflect such recent changes?");
|
||||
// The below code won't run but is needed to suppress some compiler
|
||||
// warnings.
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, at::DeviceType type) {
|
||||
stream << at::DeviceTypeName(type, /* lower case */ true);
|
||||
return stream;
|
||||
}
|
||||
|
||||
} // namespace at
|
@ -1,34 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// This is directly synchronized with caffe2/proto/caffe2.proto, but
|
||||
// doesn't require me to figure out how to get Protobuf headers into
|
||||
// ATen/core (which would require a lot more build system hacking.)
|
||||
// If you modify me, keep me synchronized with that file.
|
||||
|
||||
#include <ATen/core/Macros.h>
|
||||
|
||||
#include <ostream>
|
||||
|
||||
namespace at {
|
||||
|
||||
// Underlying type declared to be int32_t for consistency with protobufs.
|
||||
enum class DeviceType : int32_t {
|
||||
CPU = 0,
|
||||
CUDA = 1, // CUDA.
|
||||
MKLDNN = 2, // Reserved for explicit MKLDNN
|
||||
OPENGL = 3, // OpenGL
|
||||
OPENCL = 4, // OpenCL
|
||||
IDEEP = 5, // IDEEP.
|
||||
HIP = 6, // AMD HIP
|
||||
// Change the following number if you add more devices in the code.
|
||||
COMPILE_TIME_MAX_DEVICE_TYPES = 7,
|
||||
ONLY_FOR_TEST = 20901701, // This device type is only for test.
|
||||
};
|
||||
|
||||
AT_CORE_API std::string DeviceTypeName(
|
||||
at::DeviceType d,
|
||||
bool lower_case = false);
|
||||
|
||||
AT_CORE_API std::ostream& operator<<(std::ostream& stream, at::DeviceType type);
|
||||
|
||||
} // namespace at
|
11
aten/src/ATen/core/DimVector.h
Normal file
11
aten/src/ATen/core/DimVector.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/util/SmallVector.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
/// A container for sizes or strides
|
||||
using DimVector = SmallVector<int64_t, 5>;
|
||||
|
||||
} // namespace at
|
@ -1,6 +1,4 @@
|
||||
#include "ATen/Formatting.h"
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include "ATen/core/Formatting.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
@ -9,6 +7,11 @@
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
|
||||
namespace c10 {
|
||||
std::ostream& operator<<(std::ostream & out, Backend b) {
|
||||
return out << toString(b);
|
||||
}
|
||||
}
|
||||
namespace at {
|
||||
|
||||
//not all C++ compilers have default float so we define our own here
|
||||
@ -30,22 +33,6 @@ private:
|
||||
std::ios saved;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, IntList list) {
|
||||
int i = 0;
|
||||
out << "[";
|
||||
for(auto e : list) {
|
||||
if (i++ > 0)
|
||||
out << ", ";
|
||||
out << e;
|
||||
}
|
||||
out << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, Backend b) {
|
||||
return out << toString(b);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, const Type& t) {
|
||||
return out << t.toString();
|
||||
}
|
31
aten/src/ATen/core/Formatting.h
Normal file
31
aten/src/ATen/core/Formatting.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Scalar.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/core/TensorMethods.h>
|
||||
#include <ATen/core/Type.h>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
namespace c10 {
|
||||
CAFFE2_API std::ostream& operator<<(std::ostream& out, Backend b);
|
||||
}
|
||||
namespace at {
|
||||
|
||||
CAFFE2_API std::ostream& operator<<(std::ostream& out, const Type& t);
|
||||
CAFFE2_API std::ostream& print(
|
||||
std::ostream& stream,
|
||||
const Tensor& tensor,
|
||||
int64_t linesize);
|
||||
static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
|
||||
return print(out,t,80);
|
||||
}
|
||||
static inline void print(const Tensor & t, int64_t linesize=80) {
|
||||
print(std::cout,t,linesize);
|
||||
}
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream & out, Scalar s) {
|
||||
return out << (s.isFloatingPoint() ? s.toDouble() : s.toLong());
|
||||
}
|
||||
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
struct AT_API Generator {
|
||||
struct CAFFE2_API Generator {
|
||||
Generator() {};
|
||||
Generator(const Generator& other) = delete;
|
||||
Generator(Generator&& other) = delete;
|
||||
|
@ -1,257 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <ATen/core/Macros.h>
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#include <cuda_fp16.h>
|
||||
#endif
|
||||
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
#include <hip/hip_fp16.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
|
||||
/// Constructors
|
||||
|
||||
inline AT_HOSTDEVICE Half::Half(float value) {
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
|
||||
x = __half_as_short(__float2half(value));
|
||||
#else
|
||||
x = detail::float2halfbits(value);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Implicit conversions
|
||||
|
||||
inline AT_HOSTDEVICE Half::operator float() const {
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
|
||||
return __half2float(*reinterpret_cast<const __half*>(&x));
|
||||
#else
|
||||
return detail::halfbits2float(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
inline AT_HOSTDEVICE Half::Half(const __half& value) {
|
||||
x = *reinterpret_cast<const unsigned short*>(&value);
|
||||
}
|
||||
inline AT_HOSTDEVICE Half::operator __half() const {
|
||||
return *reinterpret_cast<const __half*>(&x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// CUDA intrinsics
|
||||
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350)
|
||||
inline __device__ Half __ldg(const Half* ptr) {
|
||||
return __ldg(reinterpret_cast<const __half*>(ptr));
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Arithmetic
|
||||
|
||||
inline AT_HOSTDEVICE Half operator+(const Half& a, const Half& b) {
|
||||
return static_cast<float>(a) + static_cast<float>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half operator-(const Half& a, const Half& b) {
|
||||
return static_cast<float>(a) - static_cast<float>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half operator*(const Half& a, const Half& b) {
|
||||
return static_cast<float>(a) * static_cast<float>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half operator/(const Half& a, const Half& b) {
|
||||
return static_cast<float>(a) / static_cast<float>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half operator-(const Half& a) {
|
||||
return -static_cast<float>(a);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half& operator+=(Half& a, const Half& b) {
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half& operator-=(Half& a, const Half& b) {
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half& operator*=(Half& a, const Half& b) {
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half& operator/=(Half& a, const Half& b) {
|
||||
a = a / b;
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Arithmetic with floats
|
||||
|
||||
inline AT_HOSTDEVICE float operator+(Half a, float b) {
|
||||
return static_cast<float>(a) + b;
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator-(Half a, float b) {
|
||||
return static_cast<float>(a) - b;
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator*(Half a, float b) {
|
||||
return static_cast<float>(a) * b;
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator/(Half a, float b) {
|
||||
return static_cast<float>(a) / b;
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE float operator+(float a, Half b) {
|
||||
return a + static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator-(float a, Half b) {
|
||||
return a - static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator*(float a, Half b) {
|
||||
return a * static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float operator/(float a, Half b) {
|
||||
return a / static_cast<float>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE float& operator+=(float& a, const Half& b) {
|
||||
return a += static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float& operator-=(float& a, const Half& b) {
|
||||
return a -= static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float& operator*=(float& a, const Half& b) {
|
||||
return a *= static_cast<float>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE float& operator/=(float& a, const Half& b) {
|
||||
return a /= static_cast<float>(b);
|
||||
}
|
||||
|
||||
/// Arithmetic with doubles
|
||||
|
||||
inline AT_HOSTDEVICE double operator+(Half a, double b) {
|
||||
return static_cast<double>(a) + b;
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator-(Half a, double b) {
|
||||
return static_cast<double>(a) - b;
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator*(Half a, double b) {
|
||||
return static_cast<double>(a) * b;
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator/(Half a, double b) {
|
||||
return static_cast<double>(a) / b;
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE double operator+(double a, Half b) {
|
||||
return a + static_cast<double>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator-(double a, Half b) {
|
||||
return a - static_cast<double>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator*(double a, Half b) {
|
||||
return a * static_cast<double>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE double operator/(double a, Half b) {
|
||||
return a / static_cast<double>(b);
|
||||
}
|
||||
|
||||
/// Arithmetic with ints
|
||||
|
||||
inline AT_HOSTDEVICE Half operator+(Half a, int b) {
|
||||
return a + static_cast<Half>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator-(Half a, int b) {
|
||||
return a - static_cast<Half>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator*(Half a, int b) {
|
||||
return a * static_cast<Half>(b);
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator/(Half a, int b) {
|
||||
return a / static_cast<Half>(b);
|
||||
}
|
||||
|
||||
inline AT_HOSTDEVICE Half operator+(int a, Half b) {
|
||||
return static_cast<Half>(a) + b;
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator-(int a, Half b) {
|
||||
return static_cast<Half>(a) - b;
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator*(int a, Half b) {
|
||||
return static_cast<Half>(a) * b;
|
||||
}
|
||||
inline AT_HOSTDEVICE Half operator/(int a, Half b) {
|
||||
return static_cast<Half>(a) / b;
|
||||
}
|
||||
|
||||
/// NOTE: we do not define comparisons directly and instead rely on the implicit
|
||||
/// conversion from at::Half to float.
|
||||
|
||||
} // namespace at
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
class numeric_limits<at::Half> {
|
||||
public:
|
||||
static constexpr bool is_specialized = true;
|
||||
static constexpr bool is_signed = true;
|
||||
static constexpr bool is_integer = false;
|
||||
static constexpr bool is_exact = false;
|
||||
static constexpr bool has_infinity = true;
|
||||
static constexpr bool has_quiet_NaN = true;
|
||||
static constexpr bool has_signaling_NaN = true;
|
||||
static constexpr auto has_denorm = numeric_limits<float>::has_denorm;
|
||||
static constexpr auto has_denorm_loss =
|
||||
numeric_limits<float>::has_denorm_loss;
|
||||
static constexpr auto round_style = numeric_limits<float>::round_style;
|
||||
static constexpr bool is_iec559 = true;
|
||||
static constexpr bool is_bounded = true;
|
||||
static constexpr bool is_modulo = false;
|
||||
static constexpr int digits = 11;
|
||||
static constexpr int digits10 = 3;
|
||||
static constexpr int max_digits10 = 5;
|
||||
static constexpr int radix = 2;
|
||||
static constexpr int min_exponent = -13;
|
||||
static constexpr int min_exponent10 = -4;
|
||||
static constexpr int max_exponent = 16;
|
||||
static constexpr int max_exponent10 = 4;
|
||||
static constexpr auto traps = numeric_limits<float>::traps;
|
||||
static constexpr auto tinyness_before =
|
||||
numeric_limits<float>::tinyness_before;
|
||||
static constexpr at::Half min() {
|
||||
return at::Half(0x0400, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half lowest() {
|
||||
return at::Half(0xFBFF, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half max() {
|
||||
return at::Half(0x7BFF, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half epsilon() {
|
||||
return at::Half(0x1400, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half round_error() {
|
||||
return at::Half(0x3800, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half infinity() {
|
||||
return at::Half(0x7C00, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half quiet_NaN() {
|
||||
return at::Half(0x7E00, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half signaling_NaN() {
|
||||
return at::Half(0x7D00, at::Half::from_bits);
|
||||
}
|
||||
static constexpr at::Half denorm_min() {
|
||||
return at::Half(0x0001, at::Half::from_bits);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user