mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-23 23:04:52 +08:00
Compare commits
1439 Commits
v2.7.1
...
dev/joona/
Author | SHA1 | Date | |
---|---|---|---|
918fe1d358 | |||
2f6940cc55 | |||
4bf09562e4 | |||
834a017fe3 | |||
2c27597d6a | |||
264e8fb151 | |||
06a3c3c8cd | |||
5fc1eb85fc | |||
5d316ce0d0 | |||
3804aed32e | |||
4504910843 | |||
f072bf27a7 | |||
ed0d2ebaa0 | |||
c729f7dbee | |||
d778c92e16 | |||
159e2f96e3 | |||
3aeeb77a3a | |||
ccd00359da | |||
73d95893a2 | |||
dfdf731579 | |||
a09a3f4c30 | |||
283884b224 | |||
4a643af992 | |||
e76c0b159a | |||
0ff302e8e0 | |||
95abc0f515 | |||
6f327128a9 | |||
29811f68d2 | |||
529f698ad4 | |||
edba20b853 | |||
b7a7741411 | |||
14e3ffb1ff | |||
a4fdae5c84 | |||
40cf49d460 | |||
a02eae8142 | |||
80a3877b3d | |||
b3b1616560 | |||
a35e73b91f | |||
99aeee2c5f | |||
4d78e19365 | |||
01f1cc44cb | |||
efdcc981d0 | |||
79a9447f0e | |||
6ea2e6a2d2 | |||
cd1317f92f | |||
c312d8c501 | |||
25a11850e9 | |||
352019bf9e | |||
1f0d764b65 | |||
02cecd1018 | |||
191b0237a6 | |||
1a6effc5d8 | |||
2fb1326483 | |||
b7c7000728 | |||
9680016bcf | |||
d79144da52 | |||
fd04c79878 | |||
f37e138bc4 | |||
e2b1c06319 | |||
0f8613bf5c | |||
67c2869a38 | |||
287998b87f | |||
cea43f721a | |||
9374064483 | |||
33808f0ebd | |||
515a0f606b | |||
2eacdb91c3 | |||
bf28d1cafc | |||
2a9afdae81 | |||
f7ddc5125e | |||
8eb21dffa9 | |||
9c2ac2b876 | |||
fc2dd6d408 | |||
470132c6a1 | |||
c3a7278278 | |||
6b45b6e6c9 | |||
a40e876b08 | |||
48761e9737 | |||
c4482565cc | |||
9b74ea2490 | |||
ed511cd537 | |||
483e61bfec | |||
68f748a992 | |||
1e1d0a4be6 | |||
92baeecbdd | |||
8e5fefedf4 | |||
92d0c40c49 | |||
f6c1cf04b5 | |||
2673ea4131 | |||
414ce713fb | |||
6261db7719 | |||
843e4d11ba | |||
cfc4d74b0c | |||
adf5f38eae | |||
359e1d517c | |||
e48189cf03 | |||
cac8d35503 | |||
313ceb4da3 | |||
704a504e8a | |||
fc7d493908 | |||
97d97aef24 | |||
bd77c3e054 | |||
56d318bfac | |||
02dd096e51 | |||
b74be52454 | |||
6e7b6e8d57 | |||
a6e46faff4 | |||
b0f26e81a5 | |||
88b0553c58 | |||
7ffa9000ed | |||
1b267a58a1 | |||
f20a266512 | |||
e434a9152e | |||
cccfc146fe | |||
b7807759de | |||
b73606dcc5 | |||
9ccdeae7db | |||
28974a1ec3 | |||
115a0c6413 | |||
1a48382a4c | |||
931bd05560 | |||
181b3883e7 | |||
6246c7d62c | |||
1dd2033c0a | |||
c8240e3492 | |||
9eaaca2ece | |||
783be8f932 | |||
29317f8585 | |||
58310a0043 | |||
ac715e96b4 | |||
14293c2377 | |||
675f69f40f | |||
776aa68221 | |||
0376bbf5b3 | |||
541f8cd34c | |||
eb1f85a2a0 | |||
8895c290f4 | |||
91b090c912 | |||
6649ed9deb | |||
bedefa46a9 | |||
7d282da449 | |||
2426258789 | |||
33cfe30ee1 | |||
1cc5a8452b | |||
3528488061 | |||
09e8ff92cc | |||
98c892749b | |||
3ed5f1fb77 | |||
a6182903cd | |||
b434322075 | |||
c2a202169d | |||
34266836d5 | |||
9e235c549c | |||
d8bafd23ab | |||
6d46b530fc | |||
bdb34f55a0 | |||
0129c3a4e1 | |||
652fa451a4 | |||
d5dda82586 | |||
4f62dccbda | |||
8e0f9fbccf | |||
da580123a0 | |||
c4688af254 | |||
473a38b562 | |||
c5b10ff119 | |||
2ed2cb5805 | |||
7e2081fa93 | |||
055e59e709 | |||
3a6b3c8e0e | |||
1ce7969e81 | |||
ae6f6b8efb | |||
b4550541ea | |||
ef64beb232 | |||
ee3366dbb2 | |||
8404c09b15 | |||
fe90a5c140 | |||
c3a18f6126 | |||
4843ce7611 | |||
90c5b86cd8 | |||
7f528751cc | |||
bb11122e12 | |||
fa6e842527 | |||
41b82611ee | |||
e4fe67f623 | |||
32c79da789 | |||
f29fe78cf2 | |||
49c91b4be9 | |||
a05cc9f494 | |||
e0f05229e9 | |||
10a54ffe5a | |||
b7d9f44602 | |||
682f09ec51 | |||
17ea9d1478 | |||
a94483329c | |||
f5851efed9 | |||
6f9ffaa991 | |||
cd1db55817 | |||
bf92c9883b | |||
fb6ac2f161 | |||
300e0ee13c | |||
2fd26925c4 | |||
9a2624c712 | |||
9e6ad274dc | |||
cd7bc60e11 | |||
1ffaa00ad7 | |||
f252f9df5e | |||
e0535e823f | |||
5b5399bfcd | |||
a582f04608 | |||
607443b16b | |||
8e373592c8 | |||
c58b3f6be3 | |||
4c4a5df73b | |||
9400f53903 | |||
17bf59340c | |||
1f29190b59 | |||
bc934f57d7 | |||
cedcdda0ed | |||
6a3a6d22dc | |||
0c77af3576 | |||
e229ce34c4 | |||
c7400d0026 | |||
159d8a14a6 | |||
41c97a72a1 | |||
84e633e09d | |||
a5c61668d7 | |||
0a489f924d | |||
55595e0c85 | |||
abbca37fe8 | |||
a9dbbe1aee | |||
40ce4fb24a | |||
daf2ccf023 | |||
585d03fa39 | |||
b4cee2bf57 | |||
107121dfad | |||
82200e33b5 | |||
30101aa450 | |||
3a90fd481e | |||
6124dabd30 | |||
ae53510b9e | |||
067a7b1d4a | |||
aacac88bee | |||
cadd832c19 | |||
dd11613f94 | |||
532025fbd0 | |||
8780d18f64 | |||
4bbb61812c | |||
44a522dd78 | |||
8e6415fd32 | |||
e55eb5c870 | |||
3cf0e2d8ec | |||
9917feff50 | |||
331423e5c2 | |||
98b1e82ba8 | |||
e1d8b3f838 | |||
4518b30680 | |||
630cf46039 | |||
27ef3f6cdc | |||
71e7dcda87 | |||
ae648f047c | |||
f98150fc8e | |||
bd4cf30e31 | |||
d77e0cddfe | |||
91923f0ee1 | |||
a2632d5241 | |||
19a33b20c2 | |||
8d5f7ab06c | |||
eea4a7b424 | |||
e45a6a9300 | |||
f1adf22b5f | |||
d7050ef48b | |||
7e5f6dcf7f | |||
83d88d128d | |||
61f127aac5 | |||
74f6bc28a7 | |||
c0a0761871 | |||
8f440a8e70 | |||
7d205b22b5 | |||
c5de6ff079 | |||
f1f18c75c9 | |||
12cb11a268 | |||
2180e87d7c | |||
70e7b76707 | |||
bd9c436c99 | |||
a756c50315 | |||
ccfce9ae86 | |||
afaadce083 | |||
b8a2824755 | |||
a2f7764507 | |||
9e20a8411b | |||
5a51de5ab1 | |||
48b4bc1640 | |||
f66229de2b | |||
6a1499d209 | |||
73129b8974 | |||
e178a3aa94 | |||
25803d3a22 | |||
4ede6705b5 | |||
263f08e119 | |||
070357b61a | |||
508b882513 | |||
a24a9c42fb | |||
c9aef50898 | |||
4a47dd9b3f | |||
6a77a0a50c | |||
070f389745 | |||
dea50b0778 | |||
46ce8f7df6 | |||
0a6e1d6b9b | |||
23a3cef5d9 | |||
7deed1946f | |||
53528440e1 | |||
901e37515f | |||
f76b7ef33c | |||
1f5af12cd9 | |||
6dddd6520d | |||
785495ee29 | |||
3c46808a14 | |||
b0bdd76f2e | |||
50abc1ecc4 | |||
184ac8c7f7 | |||
001695c397 | |||
eb19f5abab | |||
24b3ab9255 | |||
d99236b68c | |||
8497491f38 | |||
d5a19e4525 | |||
5eebcb991a | |||
5a64476ed6 | |||
01f226bfb8 | |||
bbc5fe8504 | |||
189bc9283e | |||
9757092aed | |||
0d09a33819 | |||
9458b83729 | |||
9aca00102f | |||
101c4f482a | |||
ddfc14b3ae | |||
8494d5582a | |||
3f0931b1de | |||
b0810168a3 | |||
304633152c | |||
b59f3d3ae0 | |||
1c5619ef9c | |||
00c6caaf3d | |||
587aec2b4f | |||
7b1a2373e8 | |||
8157e76b79 | |||
67188cd38d | |||
d289d1177c | |||
9699cc3eb9 | |||
7762bddd87 | |||
3dcb46c30e | |||
9d4de265db | |||
71073caa00 | |||
3b86cb8dff | |||
2653498ff3 | |||
c181403063 | |||
be24e7b4b4 | |||
5b16a0704e | |||
7dd2ed1197 | |||
0c59a031c8 | |||
3e9f4f3f78 | |||
2f899f07aa | |||
dad5e5e262 | |||
19b76bd873 | |||
c1470d4dc4 | |||
397d37acc5 | |||
32f0f414ab | |||
397b7f9b82 | |||
77407b38a9 | |||
f6e9e064a7 | |||
fe7f425de7 | |||
575f348965 | |||
83f14c0b06 | |||
ca2e8cd352 | |||
a72d56cb6b | |||
5edfb4c4fa | |||
c4f826d5e8 | |||
2d187bf7e6 | |||
c3bc6b3542 | |||
85549fe6de | |||
2a909cab16 | |||
a78ac409b5 | |||
12281f9c18 | |||
8910e4f2bb | |||
05236b5045 | |||
bb60e82672 | |||
83ae61fd8e | |||
ad5e9065ac | |||
fe961679d5 | |||
67d3053d4b | |||
6b32255e37 | |||
5f5805a6ac | |||
fc1cccd012 | |||
99642182f2 | |||
d385179886 | |||
1fe260f7c4 | |||
f1364431f0 | |||
38bec787fa | |||
91920661b4 | |||
d94cc0e994 | |||
183bca41de | |||
d6f1c72354 | |||
5590a0692c | |||
44ed0c9fbb | |||
6c7336cb31 | |||
85ada5d6dd | |||
6f6ff8837a | |||
d186c933f8 | |||
a22d3e778e | |||
8b5e717601 | |||
75162aa7de | |||
86370fd658 | |||
4b0cf9fc00 | |||
1e92579126 | |||
24ca7e91e6 | |||
48132de4af | |||
df4e5294a6 | |||
b7c0fda163 | |||
320914f1b6 | |||
c93e4b8290 | |||
e945247f05 | |||
c9a35c2a6e | |||
dbcd0b571d | |||
f304483e95 | |||
8d81806211 | |||
e786b3bf54 | |||
6a65f2c4fe | |||
a0ab243c3a | |||
8efcf21fff | |||
abe41c5c9c | |||
8fdd61bc45 | |||
31162214d8 | |||
252029b294 | |||
115a165f9b | |||
4161c752bb | |||
389cd15265 | |||
f663aa4e81 | |||
e7ed50f27b | |||
08831f30bb | |||
37812009fd | |||
6720d23969 | |||
1250106630 | |||
5471e80fb4 | |||
e6969c1bd8 | |||
596e44d26a | |||
ba35793226 | |||
73f3d6d9aa | |||
7b7b9d707e | |||
4273e5d15c | |||
1a1a32ce5a | |||
a6933a1c42 | |||
b80bb87689 | |||
cd80778ac8 | |||
bf7d8ef10d | |||
78b3d71ece | |||
3d3fcaaf7b | |||
6972255dad | |||
83bd0b63b5 | |||
322f883c0c | |||
54827752a4 | |||
205881ea4a | |||
061832bc7a | |||
9d1528186f | |||
69cee91a55 | |||
f3cf3ec591 | |||
92e81cf41a | |||
91d1826539 | |||
a8b48ff14c | |||
3532dd4f1e | |||
cc2decdb25 | |||
786422a4d7 | |||
087e8587cd | |||
31fe258efc | |||
357814c85c | |||
d751698a36 | |||
860765d621 | |||
2b9d8a5633 | |||
ea0cbba1fc | |||
f237ee54bf | |||
a4545f09da | |||
cfab04d01b | |||
d3a2872c67 | |||
d04a6ec021 | |||
cc185c32e0 | |||
6fb089f2a2 | |||
c59aaa03ff | |||
01568cb17a | |||
a0e796df03 | |||
a4bb2f106f | |||
c714d2fc0e | |||
72755a4b7a | |||
8aaf296efc | |||
c8d37b9c85 | |||
1a56609e75 | |||
97a5e5c6b3 | |||
d0e3482266 | |||
5a422150c3 | |||
246f3b6530 | |||
2299087220 | |||
886d9acb0d | |||
64ac41f68d | |||
142f0f86ce | |||
81f60f3880 | |||
604467de20 | |||
b01877aa13 | |||
4d6ff6ca5c | |||
bc47d539fc | |||
2e7c9d33e7 | |||
44deb67830 | |||
5f18b7d877 | |||
d9f47c75de | |||
27ded359a5 | |||
89505f4498 | |||
17f9276e29 | |||
901b02cf16 | |||
c36d9b0d8d | |||
aafc4b6188 | |||
e6bd133866 | |||
97759614c2 | |||
4926bd6004 | |||
3e0038ae85 | |||
173f126068 | |||
4447352e64 | |||
97f34f0125 | |||
1239260a0e | |||
ec5f2e3028 | |||
52d172eafd | |||
da7322548b | |||
05365e380d | |||
a402c2f203 | |||
ad516180e0 | |||
f3b2fb6c66 | |||
1791b4150b | |||
3649e2e7bd | |||
3da14d38bd | |||
881d99495d | |||
a106842ea8 | |||
58ede0cca3 | |||
f8aa6404ac | |||
c9c0f8eae3 | |||
7e11089fe5 | |||
836955bdbd | |||
73b4938f7c | |||
c0991b0316 | |||
d7f3cd0ac3 | |||
5228986c39 | |||
e9e5682a4a | |||
91173ff89a | |||
6ea5514e04 | |||
78fe079c97 | |||
fbccbfedaf | |||
eba05e2d3e | |||
5653fb3525 | |||
ed0dea3e24 | |||
bf1132c196 | |||
f8b53f4a75 | |||
2a1e2b88ed | |||
6fcffd8cd1 | |||
7d2411d30e | |||
957faaadca | |||
06e9deabb6 | |||
56ab71de98 | |||
0ad2c5d7e2 | |||
f813d64f54 | |||
f0abbabac1 | |||
5e3c8214b5 | |||
99c9a31386 | |||
e209625334 | |||
cdf3b63e32 | |||
25662d38d5 | |||
164d2c887b | |||
24aadb40fb | |||
b6929aef08 | |||
d86c14156d | |||
d98575806b | |||
d8d306cbc6 | |||
370ba6b96f | |||
47b494ef69 | |||
0aaf35310a | |||
912102b4ec | |||
8adfcd35c3 | |||
6a8ab902a2 | |||
6c38b9be73 | |||
49f6cce736 | |||
55e62ff74a | |||
caf8d9bc17 | |||
2d98a1caf5 | |||
15768cc34b | |||
83b870a28a | |||
c830c12a87 | |||
cfea55dbec | |||
60a45eb862 | |||
7ac8186851 | |||
c14977e91c | |||
3320efef6b | |||
2e4ae2ab41 | |||
d6887f444f | |||
2e23768d25 | |||
2a2ddff214 | |||
861d2cc02c | |||
c53bc616d5 | |||
c93e34d7b5 | |||
f443035f10 | |||
07d439e782 | |||
1b0a023dde | |||
295b7e21eb | |||
09c4da9325 | |||
73358d37da | |||
4854926aeb | |||
f3cb3557d6 | |||
98d06b401b | |||
e6e1f8c272 | |||
c6d79c163c | |||
7df6f930e8 | |||
ed0fd2fa7a | |||
bd9c42ebfb | |||
a9e2f22405 | |||
1979a409e9 | |||
f9f6c080d8 | |||
d0026fa138 | |||
1bc2b2b12a | |||
b0e28f60df | |||
1ab6c4ff04 | |||
8878289f89 | |||
5cf3029503 | |||
118e3862bc | |||
a2dce42654 | |||
c0618a3957 | |||
76994d48f4 | |||
9e55dae2a6 | |||
c6defa9443 | |||
2abd81402f | |||
941090a791 | |||
5a654deb40 | |||
d41c22b578 | |||
277369ac16 | |||
a3f9e04656 | |||
51da241c0a | |||
c1d503529d | |||
1843ad458d | |||
3b02f795c5 | |||
96f35f55e2 | |||
78d1165d76 | |||
5d36253a7d | |||
fa0fdc0cca | |||
5be5cfe4cb | |||
440c07e56a | |||
5314a6fe82 | |||
a72b4eb806 | |||
f9a7eac718 | |||
ff783f062a | |||
70b34a42c1 | |||
781d28e265 | |||
cbc901fac3 | |||
e0d19cf6cc | |||
5d9c7f78e7 | |||
d4c30b4599 | |||
6fa1b17195 | |||
e6e07ec1cf | |||
9e106019f6 | |||
c067127d47 | |||
fc674b45d4 | |||
90ddb33141 | |||
2e5d95a082 | |||
77dca3947e | |||
13f48197d2 | |||
f363fe616d | |||
24f50653c8 | |||
61a1f09b5b | |||
de15ef0ee8 | |||
0198e44f37 | |||
8667a00979 | |||
0bacb90a9c | |||
a677b491c9 | |||
74aa9f571c | |||
1017927c83 | |||
bb98749230 | |||
3463ea1059 | |||
0d4dbfd9ed | |||
33535b3eee | |||
85df0dc246 | |||
a8f6b40e36 | |||
03c879d59b | |||
18908c8ced | |||
01411c739f | |||
e545567340 | |||
af5c1b96e2 | |||
b03c42109c | |||
22030efb64 | |||
d4298f2136 | |||
cb4cd6166e | |||
e62d958f02 | |||
238109ad32 | |||
7e53c58687 | |||
203e1d681d | |||
98453c135a | |||
532530be34 | |||
f38566dfe4 | |||
8102272d8c | |||
42c7c7f15f | |||
82ceebce58 | |||
5f62d07ec6 | |||
aae36929ed | |||
4d121d2b02 | |||
c69c3c885e | |||
c41fbb4f78 | |||
03138733ba | |||
75f38dfd4e | |||
3f54b14c75 | |||
0da8127f77 | |||
c65de03196 | |||
dee016ceb7 | |||
3ac5a499dd | |||
25eff6e991 | |||
063ea5d669 | |||
5734909f34 | |||
f09513e515 | |||
61ebe999cc | |||
b060fedfa8 | |||
0ae75ca2de | |||
e872c38eb3 | |||
c974b5322a | |||
ee97299961 | |||
db32093192 | |||
d22e3d5efe | |||
6aea4d90fb | |||
4934a83347 | |||
60fe0922f6 | |||
203a27e0ce | |||
80ab233786 | |||
9458460211 | |||
76e1b3ba4c | |||
629c1bd2dd | |||
b70d105c77 | |||
0d44a8aea1 | |||
8740ffa760 | |||
a2300aff94 | |||
99fd96c10b | |||
295162ec3a | |||
d2ad9aa2f2 | |||
5d6ac2dced | |||
a37afd23fa | |||
78300c8205 | |||
37ebb0b56a | |||
15dbad2115 | |||
f04cf13bdd | |||
b0c560ef2a | |||
48af2cdd27 | |||
3b0cd9b542 | |||
783f045c4f | |||
a17ee8181a | |||
f94ac263af | |||
ae74ef9d53 | |||
a19b667bca | |||
35c45a4a31 | |||
7382654ebc | |||
428234bc28 | |||
1c6e88eb03 | |||
0d96c38b76 | |||
84c21d2147 | |||
bf4814eb6a | |||
a10b765bf1 | |||
48e9ffc873 | |||
36f2d0aaba | |||
1700599266 | |||
414b9ae016 | |||
7e7e5698cc | |||
790d459f85 | |||
ce52674b76 | |||
31634b8c6a | |||
827b730f4e | |||
6470b373c1 | |||
5cb5675f13 | |||
0f12951fc2 | |||
7ab8532cf1 | |||
4ce0b959ff | |||
49b7d0d84d | |||
c75dac5f5c | |||
b48505a8a1 | |||
a2070e2fd5 | |||
982a7f7db0 | |||
981048854d | |||
91666eef60 | |||
1526ff955e | |||
423e4a4568 | |||
4e2997db73 | |||
925fd4aa2e | |||
dfcd98e684 | |||
80b7f6b704 | |||
ab342d3793 | |||
5e34758cef | |||
284b766898 | |||
47cdad2995 | |||
7c858066ae | |||
57fa99c5c3 | |||
e57fa18b40 | |||
f74d5d576a | |||
bbb9b2476b | |||
c158eac0de | |||
51f0403f46 | |||
4aded85e79 | |||
c976321541 | |||
f1b74037b1 | |||
b99e0c5412 | |||
4f14224dc8 | |||
9456738edf | |||
ad54b3aae2 | |||
c3bb174bb2 | |||
59abb8c7a2 | |||
32afecff8b | |||
46c8f2e965 | |||
ca2ffc23ab | |||
157bff22f7 | |||
cbc0964636 | |||
e91f84c87d | |||
515b45e569 | |||
e8a11f175e | |||
005c9b2f4f | |||
cc58ecceea | |||
3140565db6 | |||
879a293db8 | |||
965784eb9b | |||
52135db69a | |||
3b00ff8850 | |||
f3c77b2458 | |||
b8ef642f04 | |||
bc72420bcb | |||
ec6fa547a1 | |||
2c9e07ecd2 | |||
beea76020b | |||
a8dd9b6c27 | |||
340beb7f7c | |||
66a7a49d64 | |||
5e787bf3e5 | |||
0861af2596 | |||
03313c6619 | |||
7a470c9320 | |||
29b3fdab01 | |||
493c7fa66f | |||
edb6f1b7a8 | |||
65139eb050 | |||
ccfde4dadf | |||
b1b58708b2 | |||
7ac0658757 | |||
4271ebdbdc | |||
2b2286c4ec | |||
4118d7307f | |||
f231500c50 | |||
87549a65c9 | |||
cb83850a24 | |||
7c65911b11 | |||
9092dd2e82 | |||
2bd5bfa3ce | |||
cdeb32d2d1 | |||
35ff5084e6 | |||
85079e4380 | |||
cf7447ae99 | |||
e691fcae0e | |||
b0901d62ae | |||
a469ddc663 | |||
1bdf996e7a | |||
d5a8bd0688 | |||
0ece461cca | |||
350a479146 | |||
d2c0c65ea1 | |||
25309a17f0 | |||
7c4e49750e | |||
c16af5d798 | |||
d4da0e955e | |||
103bf64a3c | |||
f649ee73ce | |||
c49315e645 | |||
fdc4394b16 | |||
c9ebf517c2 | |||
c18e2ce53b | |||
ddb1e97839 | |||
2f785ab208 | |||
8a872261dc | |||
1e55b9c0b5 | |||
266bd22b44 | |||
8b04364914 | |||
aa70d62041 | |||
d670df356c | |||
2b20d1433f | |||
ef1cb6b646 | |||
1a3bd894ff | |||
4c57aec5b9 | |||
80aa88f907 | |||
21bcbbfb5e | |||
68414512e6 | |||
d25acac357 | |||
0ed0b7fa96 | |||
a9d08ed0ce | |||
01cb3519b3 | |||
e33bc41958 | |||
ac91f8765b | |||
6eac3a0068 | |||
14f0cd7630 | |||
85e4e51a7d | |||
d75921d3a6 | |||
c830d750e6 | |||
efc975feb2 | |||
af7719a2fa | |||
6eb3c2e282 | |||
36183215e8 | |||
f0e1a0838c | |||
e175929b8c | |||
6cbcdee944 | |||
a9ee797e41 | |||
7aacbab0b3 | |||
e6afb51805 | |||
e080bac533 | |||
748252378d | |||
dcb378cff2 | |||
488b87cb68 | |||
1f92348dc6 | |||
ae29f054f5 | |||
114d404b07 | |||
b2b9aaf0ad | |||
dad0854d48 | |||
d5593ea31c | |||
c2b8fead43 | |||
8d1cfb63b5 | |||
128b32f363 | |||
48cff64a54 | |||
731b559f54 | |||
999fa15ba8 | |||
a7596b4b34 | |||
3efa211e48 | |||
6db95ccf4c | |||
43cc954f88 | |||
6aca002d82 | |||
185aaaaf8e | |||
db8f4c1b1b | |||
9aa0612dd3 | |||
c0af782f30 | |||
625913eefc | |||
87bfd66c3c | |||
039ebdc192 | |||
4a9466c96a | |||
b2088f1afe | |||
a0253d2840 | |||
3a8171efad | |||
32299e5f9a | |||
7336b76bcc | |||
8a40fca9a1 | |||
0c139fa58e | |||
7379c66344 | |||
79e8a69257 | |||
de68ddc68e | |||
aa575cab71 | |||
6ae8eb881c | |||
d256b2dcb2 | |||
819b23e0b4 | |||
71acb1bb42 | |||
9108d153ce | |||
ab9ca6b31f | |||
45b11730f1 | |||
a8d0c5c928 | |||
1b373f6cd4 | |||
91bf92597c | |||
3c85784980 | |||
661d74bf44 | |||
c05328e01a | |||
36eb64d60e | |||
de73790fe6 | |||
68b327341c | |||
ce54c430c0 | |||
feb503c1df | |||
0159f8ed54 | |||
d3b7cf7b7d | |||
e85ce64bde | |||
6c9d48b32b | |||
90110b069f | |||
ce3dc9e346 | |||
b562d22772 | |||
12628ba24d | |||
bf8f4efd31 | |||
d1ff3ff675 | |||
8cd6a133f2 | |||
30e8be599f | |||
1c98dc3664 | |||
0de70fbbe7 | |||
62d351a35b | |||
0f1aaeb62e | |||
8d1db7f39d | |||
cc6e300fe2 | |||
84ae056d82 | |||
8be1bf1dbb | |||
5c19952c83 | |||
f0ca0d45a6 | |||
2cc3f5030a | |||
43ee67e8dc | |||
5dca832257 | |||
d90d83c484 | |||
a2bba53f87 | |||
7b218ca874 | |||
29b3f409c2 | |||
6c7f9f7e7d | |||
535885dc8d | |||
f63b03e9fc | |||
23183fef7e | |||
86dcdf9c8b | |||
86fbbe44cc | |||
a89bdc0565 | |||
5a7588f183 | |||
280e48739a | |||
27657a00d9 | |||
59d5cf083b | |||
d4c578082a | |||
dc39e673e2 | |||
84684e9397 | |||
159e97cbcf | |||
5af9cb12b7 | |||
2a9e737839 | |||
1cee6c37cc | |||
23855391f1 | |||
ba46643df1 | |||
51f91e3428 | |||
f320c7b766 | |||
490ce7e67c | |||
e77ca19999 | |||
49f86a939c | |||
ae6158500a | |||
f12969421e | |||
b248edd7cc | |||
40ec9d2bfa | |||
112f983056 | |||
9179178728 | |||
1e5a561c13 | |||
754875e237 | |||
86ee3bf3d5 | |||
71145059c8 | |||
bada898f5e | |||
5beb5b7e47 | |||
d0c06c4533 | |||
60f31f551e | |||
42e7bda53e | |||
6608d4e3e9 | |||
1e159db57c | |||
0a0a73a9a9 | |||
9bae904cb4 | |||
453da423d4 | |||
a439524be6 | |||
db92d0f388 | |||
ddc0fe903f | |||
0a60a0cad4 | |||
2c13a07002 | |||
db9b031b00 | |||
66b0a0b61a | |||
dfdc28ea67 | |||
248487f455 | |||
d5ce5c9509 | |||
de3aca3311 | |||
2dccd70ef0 | |||
8bece88655 | |||
c201d4dbea | |||
85027ef74a | |||
fe954cdcbf | |||
539db4af4b | |||
9367f8f6f1 | |||
2b848ab192 | |||
6bbe8dbd63 | |||
621c801f78 | |||
8f7fbe3d7d | |||
51fa8fb0ff | |||
01b1d1f91b | |||
b9a5e1d038 | |||
021b3e23ec | |||
9d02b3993f | |||
c73a526599 | |||
b238e36fd9 | |||
27370998b2 | |||
d320af0663 | |||
7f836b747f | |||
1c6b517e19 | |||
d46c16fca6 | |||
ff020d32b6 | |||
fb07fe6f36 | |||
5757aa8773 | |||
842d51500b | |||
85f6d61421 | |||
c5deacc27a | |||
09aa63ea2c | |||
0eb3ac9349 | |||
46dd226702 | |||
19b763def1 | |||
b0a5d55c58 | |||
1b08aaeafe | |||
f7d1b966c2 | |||
99a4fc5a2f | |||
fa5f556f88 | |||
700260f166 | |||
64d22b9fad | |||
e35ef61066 | |||
bdc132d0e1 | |||
1eab841185 | |||
5d4b5ee315 | |||
d072254eae | |||
68dfd44e50 | |||
34743678b9 | |||
cfc08caea9 | |||
0ed34210b2 | |||
0a396a8160 | |||
4ea580568a | |||
ccd5d811e8 | |||
5327894812 | |||
bf6621d08f | |||
ee6a029165 | |||
732f9d7435 | |||
64bd889660 | |||
a39bf846f5 | |||
0692301e25 | |||
362b40939d | |||
66dd00fca0 | |||
e481615bc7 | |||
5ebc283f2c | |||
c2ada9d77b | |||
1099c37150 | |||
2975664fb0 | |||
90543e90a0 | |||
f47aa08130 | |||
bf34e228c5 | |||
1d3c50fcc5 | |||
ce5adc5c05 | |||
f64c361860 | |||
a268c29b9f | |||
b07b819912 | |||
a703107f7b | |||
406d464d97 | |||
f17ae3f7b7 | |||
24176f6e32 | |||
4a4a71a73c | |||
6e843a51dd | |||
18435945af | |||
44e6464914 | |||
aae4c0729e | |||
ffa085334c | |||
1d221724fc | |||
6285a71aba | |||
88a26dbb9d | |||
b99fc9d29f | |||
7bb9c36784 | |||
c99efc08fb | |||
1d9401befc | |||
6237495fcf | |||
2c4bc65366 | |||
9b1127437e | |||
80dfce2cc3 | |||
d67c1a027e | |||
496bbf38be | |||
00a2c68f67 | |||
a66a9581da | |||
970ac2d907 | |||
3647711a89 | |||
90ef7a9561 | |||
00333c4548 | |||
c4d59e6279 | |||
595293316d | |||
9a184b1074 | |||
fe94d7da1a | |||
95e71765f2 | |||
cca46a0b6f | |||
bc1b8730a4 | |||
45a879e55b | |||
3b7bd6c63d | |||
1e30192b19 | |||
1442230a26 | |||
02e21c7854 | |||
826e790696 | |||
95a633c453 | |||
29c4f2c07a | |||
6cbf97ede8 | |||
2be97c7257 | |||
c9de76a1e4 | |||
5005e1bc47 | |||
cc469aaf3b | |||
88c2fe533f | |||
bc86b6c55a | |||
6974ba84f6 | |||
a11538aa46 | |||
e1d143cb7b | |||
37bb7f79c6 | |||
71daeddde2 | |||
fae79e91a0 | |||
05fee772e5 | |||
842a072fd3 | |||
96828a2155 | |||
9ec9f4740c | |||
94d761fbf0 | |||
d686d04c2f | |||
518563d6ef | |||
e98afa0f89 | |||
4df66e0b7f | |||
1bf443e2f2 | |||
2fcfae72b4 | |||
1dce65a82c | |||
97910b6c00 | |||
7ca296f564 | |||
cfbeaf7b7e | |||
466d5295c1 | |||
8bf3f3fc43 | |||
b963d96bad | |||
3e78c9e967 | |||
2cb42f26c1 | |||
e8a35eb7da | |||
981807cfcb | |||
f123f2c077 | |||
ddb076591d | |||
42bd4a09a3 | |||
ef93cdfb8a | |||
ee1a2b7810 | |||
20874a1f46 | |||
14dc6e732d | |||
01a57981aa | |||
a3c286677b | |||
04e251a7dd | |||
536c0c7a47 | |||
919d54b7b1 | |||
6bcf9c6ce3 | |||
11d4438a5f | |||
268de64005 | |||
61a64c20c4 | |||
ea02aac2ca | |||
5db3a4ac88 | |||
45fec7843d | |||
0d804dec0f | |||
3b48c72141 | |||
010963032c | |||
d80a70b58a | |||
cd5c13d8f0 | |||
f06e366532 | |||
b8c0c50bbe | |||
dfdf58f8cb | |||
e0e8639a10 | |||
dbea13ed45 | |||
c0566e0dbf | |||
585fd972b8 | |||
ee3a2c6ee2 | |||
5b8cc4709a | |||
a8df5e5af9 | |||
d5b1d99f78 | |||
381d0cb239 | |||
c41196a4d0 | |||
fdacf3c920 | |||
405025778d | |||
5ba437fb45 | |||
213eea216a | |||
ec9e11145e | |||
6e2b2660b9 | |||
bc88f6faa1 | |||
b06b5c3e27 | |||
6055a4f612 | |||
9b92828d4b | |||
9cd52da45c | |||
6c2db8fab0 | |||
2e0c98ff05 | |||
dea7157160 | |||
b8f91bcb14 | |||
7869196482 | |||
bca75fe97a | |||
c43e35d6f7 | |||
bb42e4d137 | |||
cccdf860e2 | |||
988827cdfb | |||
ebabd0efdd | |||
c36ac16da1 | |||
1096443467 | |||
60523540f1 | |||
c1dd75e4dc | |||
425c6d8eba | |||
f9a787224c | |||
186cc7327c | |||
a0ac63cbd9 | |||
811f587d86 | |||
9a78513c3c | |||
b52a8bef01 | |||
46226a90c8 | |||
523bffd388 | |||
37c914ca0c | |||
78715a181f | |||
1157367c78 | |||
24cfeec2c7 | |||
afa1eda901 | |||
a16ada41b9 | |||
d25617255c | |||
a3c6e3139a | |||
e4f6e4ac84 | |||
8bc7bd94a5 | |||
e8dd58b8cf | |||
5e9f792479 | |||
6c7d8419e3 | |||
769f19bf95 | |||
8d7c430e84 | |||
08a644a4c4 | |||
c41c2130be | |||
8cdb9adc05 | |||
224cd9f055 | |||
aaa4c3d60b | |||
2a011ca904 | |||
9d37b501db | |||
c7c3e77324 | |||
790f93db3a | |||
b2862f1435 | |||
1cc5f6b623 | |||
916e8979d3 | |||
6048d88afe | |||
bfee141666 | |||
6b1b95ad2a | |||
5905bbe745 | |||
9f33c6f0a0 | |||
f80bee4934 | |||
21c2edfec8 | |||
3e2c4086ad | |||
acf42b0048 | |||
a9c55277d7 | |||
c83c711da8 | |||
e7e477c1f9 | |||
4482a65fef | |||
115fc98cc0 | |||
740ce0fa5f | |||
578160c875 | |||
f4368d8872 | |||
96795e9533 | |||
1c7196f04b | |||
9ad6265d04 | |||
7537b19c73 | |||
09f7f62cfe | |||
08af311fc2 | |||
eb7bf4202d | |||
ff58ccec6c | |||
f9b4856989 | |||
643aaea133 | |||
05f2cbfe19 | |||
d7d9a71e19 | |||
dd6e9df3d0 | |||
0bd863a62f | |||
a0893475ba | |||
1bdbf12672 | |||
69aeb87eca | |||
5e79b61e8a | |||
fe01af2242 | |||
c96ed7e6f5 | |||
9d7945e382 | |||
a7f8de2198 | |||
5a843f8973 | |||
97272e4b49 | |||
2e02c07a5d | |||
f2221b2fce | |||
f067eafabb | |||
42e468d9b0 | |||
a9aae05a6b | |||
f2ea77c099 | |||
71795f159e | |||
706c22549c | |||
68bbe20db7 | |||
c95a6b416b | |||
05ac99042f | |||
be4e6c1c8e | |||
e162758051 | |||
d4496346b9 | |||
db6d72213b | |||
e6839819c8 | |||
9e6b2ca58d | |||
bea181ff7e | |||
e567900998 | |||
aed0b7a742 | |||
b4745db904 | |||
c179971bfc | |||
56b2e4b8f0 | |||
e66ad221e9 | |||
e8d36019d4 | |||
15cd6921a5 | |||
49570cb402 | |||
4cae8f48cc | |||
9a3d26cfcd | |||
4098a229a0 | |||
e5fccb2bab | |||
38e81a5332 | |||
4f8391db55 | |||
0dcd482e54 | |||
5e1b715dda | |||
970fefcc53 | |||
c73c72b1e1 | |||
77ea66695a | |||
7c87ec1b50 | |||
b263b272fa | |||
e6f560a262 | |||
e84cc4c052 | |||
6856d81c60 | |||
b9803a5c81 | |||
3e605fe46d | |||
65d19a5699 | |||
f59064f2b7 | |||
bdf57fb8f7 | |||
a8b1767ae5 | |||
df60500ab8 | |||
96a6a71ac7 | |||
d90f9e9a34 | |||
f4bffb7461 | |||
75c8b7d972 | |||
ec93aa7f84 | |||
2a7d583452 | |||
c208f21791 | |||
037d7af778 | |||
7cdbb913e7 | |||
3646d4dbc8 | |||
420a9be743 | |||
f2d43d866c | |||
4a12777ffe | |||
1e37e5b836 | |||
e51615cb73 | |||
b1980b2405 | |||
38c5cf99b3 | |||
3f1769f785 | |||
0c8ec26d3b | |||
ab45aaca97 | |||
e3ebf61589 | |||
b5191b9312 | |||
b90698f5ba | |||
215f856142 | |||
66300d3d55 | |||
86bc154d61 | |||
fb0e9cb0a0 | |||
29fd875bc1 | |||
01e9036bd2 | |||
923ce10f6c | |||
28b78800b9 | |||
b040dc3a53 | |||
626a5e22eb | |||
9a0f65d3d3 | |||
488c4480f9 | |||
5ada4e6a53 | |||
8fa81a6066 | |||
f349304c08 | |||
81aee3c9c4 | |||
d547a56668 |
@ -55,22 +55,9 @@ def build_ArmComputeLibrary() -> None:
|
||||
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
|
||||
|
||||
|
||||
def replace_tag(filename) -> None:
|
||||
with open(filename) as f:
|
||||
lines = f.readlines()
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith("Tag:"):
|
||||
lines[i] = line.replace("-linux_", "-manylinux_2_28_")
|
||||
print(f"Updated tag from {line} to {lines[i]}")
|
||||
break
|
||||
|
||||
with open(filename, "w") as f:
|
||||
f.writelines(lines)
|
||||
|
||||
|
||||
def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
def update_wheel(wheel_path, desired_cuda) -> None:
|
||||
"""
|
||||
Package the cuda wheel libraries
|
||||
Update the cuda wheel libraries
|
||||
"""
|
||||
folder = os.path.dirname(wheel_path)
|
||||
wheelname = os.path.basename(wheel_path)
|
||||
@ -101,19 +88,30 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/lib64/libgfortran.so.5",
|
||||
"/acl/build/libarm_compute.so",
|
||||
"/acl/build/libarm_compute_graph.so",
|
||||
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
||||
]
|
||||
|
||||
if "128" in desired_cuda:
|
||||
if enable_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
||||
]
|
||||
if "126" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
elif "128" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
else:
|
||||
libs_to_copy += [
|
||||
"/opt/OpenBLAS/lib/libopenblas.so.0",
|
||||
]
|
||||
|
||||
# Copy libraries to unzipped_folder/a/lib
|
||||
for lib_path in libs_to_copy:
|
||||
lib_name = os.path.basename(lib_path)
|
||||
@ -122,13 +120,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
f"cd {folder}/tmp/torch/lib/; "
|
||||
f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}"
|
||||
)
|
||||
|
||||
# Make sure the wheel is tagged with manylinux_2_28
|
||||
for f in os.scandir(f"{folder}/tmp/"):
|
||||
if f.is_dir() and f.name.endswith(".dist-info"):
|
||||
replace_tag(f"{f.path}/WHEEL")
|
||||
break
|
||||
|
||||
os.mkdir(f"{folder}/cuda_wheel")
|
||||
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
|
||||
shutil.move(
|
||||
@ -251,6 +242,6 @@ if __name__ == "__main__":
|
||||
print("Updating Cuda Dependency")
|
||||
filename = os.listdir("/pytorch/dist/")
|
||||
wheel_path = f"/pytorch/dist/{filename[0]}"
|
||||
package_cuda_wheel(wheel_path, desired_cuda)
|
||||
update_wheel(wheel_path, desired_cuda)
|
||||
pytorch_wheel_name = complete_wheel("/pytorch/")
|
||||
print(f"Build Complete. Created {pytorch_wheel_name}..")
|
||||
|
@ -19,13 +19,11 @@ import boto3
|
||||
|
||||
# AMI images for us-east-1, change the following based on your ~/.aws/config
|
||||
os_amis = {
|
||||
"ubuntu18_04": "ami-078eece1d8119409f", # login_name: ubuntu
|
||||
"ubuntu20_04": "ami-052eac90edaa9d08f", # login_name: ubuntu
|
||||
"ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu
|
||||
"redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user
|
||||
}
|
||||
|
||||
ubuntu18_04_ami = os_amis["ubuntu18_04"]
|
||||
ubuntu20_04_ami = os_amis["ubuntu20_04"]
|
||||
|
||||
|
||||
@ -659,18 +657,6 @@ def configure_system(
|
||||
"sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
|
||||
)
|
||||
host.run_cmd("pip3 install dataclasses typing-extensions")
|
||||
# Install and switch to gcc-8 on Ubuntu-18.04
|
||||
if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8":
|
||||
host.run_cmd("sudo apt-get install -y g++-8 gfortran-8")
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100"
|
||||
)
|
||||
if not use_conda:
|
||||
print("Installing Cython + numpy from PyPy")
|
||||
host.run_cmd("sudo pip3 install Cython")
|
||||
@ -1026,7 +1012,7 @@ if __name__ == "__main__":
|
||||
install_condaforge_python(host, args.python_version)
|
||||
sys.exit(0)
|
||||
|
||||
python_version = args.python_version if args.python_version is not None else "3.8"
|
||||
python_version = args.python_version if args.python_version is not None else "3.9"
|
||||
|
||||
if args.use_torch_from_pypi:
|
||||
configure_system(host, compiler=args.compiler, python_version=python_version)
|
||||
|
@ -44,6 +44,8 @@ FROM base as cuda
|
||||
ARG CUDA_VERSION=12.4
|
||||
RUN rm -rf /usr/local/cuda-*
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
|
||||
# Preserve CUDA_VERSION for the builds
|
||||
ENV CUDA_VERSION=${CUDA_VERSION}
|
||||
|
@ -1,82 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -eou pipefail
|
||||
set -exou pipefail
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE"
|
||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE_NAME="pytorch/${image}"
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
|
||||
CUDA_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name and tag. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
fi
|
||||
|
||||
export DOCKER_BUILDKIT=1
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
CUDA_VERSION=${CUDA_VERSION:-12.1}
|
||||
|
||||
case ${CUDA_VERSION} in
|
||||
case ${DOCKER_TAG_PREFIX} in
|
||||
cpu)
|
||||
BASE_TARGET=base
|
||||
DOCKER_TAG=cpu
|
||||
;;
|
||||
all)
|
||||
BASE_TARGET=all_cuda
|
||||
DOCKER_TAG=latest
|
||||
cuda*)
|
||||
BASE_TARGET=cuda${CUDA_VERSION}
|
||||
;;
|
||||
*)
|
||||
BASE_TARGET=cuda${CUDA_VERSION}
|
||||
DOCKER_TAG=cuda${CUDA_VERSION}
|
||||
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
|
||||
(
|
||||
set -x
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
export DOCKER_BUILDKIT=1
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
docker build \
|
||||
--target final \
|
||||
--progress plain \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||
-t ${DOCKER_IMAGE_NAME} \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||
${TOPDIR}/.ci/docker/
|
||||
)
|
||||
docker build \
|
||||
--target final \
|
||||
--progress plain \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||
-t ${tmp_tag} \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||
${TOPDIR}/.ci/docker/
|
||||
|
||||
if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
|
||||
if [ -n "${CUDA_VERSION}" ]; then
|
||||
# Test that we're using the right CUDA compiler
|
||||
(
|
||||
set -x
|
||||
docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
||||
)
|
||||
fi
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
|
||||
if [[ "${WITH_PUSH:-}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
docker push "${DOCKER_IMAGE_NAME}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
|
||||
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
||||
fi
|
||||
|
@ -105,7 +105,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -119,7 +118,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -134,7 +132,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -149,7 +146,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -164,7 +160,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -178,7 +173,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -193,7 +187,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -208,7 +201,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -223,7 +215,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -235,7 +226,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
ONNX=yes
|
||||
@ -244,10 +234,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -255,10 +242,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -266,7 +250,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -275,7 +258,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.2.4
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -290,7 +272,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.3
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -305,7 +286,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=0.5
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -316,7 +296,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=2025.0
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -327,7 +306,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -341,7 +319,6 @@ case "$image" in
|
||||
CUDNN_VERSION=9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -349,7 +326,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -370,7 +346,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -403,20 +378,19 @@ case "$image" in
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CONDA_CMAKE=yes
|
||||
PYTHON_VERSION=3.9
|
||||
PIP_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
PYTHON_VERSION=3.9
|
||||
CUDA_VERSION=11.8
|
||||
CONDA_CMAKE=yes
|
||||
PIP_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -428,7 +402,6 @@ case "$image" in
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -439,7 +412,6 @@ case "$image" in
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
echo "image '$image' did not match an existing build configuration"
|
||||
if [[ "$image" == *py* ]]; then
|
||||
@ -488,14 +460,21 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
no_cache_flag=""
|
||||
progress_flag=""
|
||||
# Do not use cache and progress=plain when in CI
|
||||
if [[ -n "${CI:-}" ]]; then
|
||||
no_cache_flag="--no-cache"
|
||||
progress_flag="--progress=plain"
|
||||
fi
|
||||
|
||||
# Build image
|
||||
docker build \
|
||||
--no-cache \
|
||||
--progress=plain \
|
||||
${no_cache_flag} \
|
||||
${progress_flag} \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
--build-arg "PROTOBUF=${PROTOBUF:-}" \
|
||||
--build-arg "LLVMDEV=${LLVMDEV:-}" \
|
||||
--build-arg "DB=${DB:-}" \
|
||||
--build-arg "VISION=${VISION:-}" \
|
||||
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
|
||||
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
|
||||
@ -503,13 +482,12 @@ docker build \
|
||||
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
|
||||
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
|
||||
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
|
||||
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
|
||||
--build-arg "GCC_VERSION=${GCC_VERSION}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
|
||||
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
|
||||
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
@ -519,6 +497,7 @@ docker build \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
--build-arg "PIP_CMAKE=${PIP_CMAKE}" \
|
||||
--build-arg "TRITON=${TRITON}" \
|
||||
--build-arg "TRITON_CPU=${TRITON_CPU}" \
|
||||
--build-arg "ONNX=${ONNX}" \
|
||||
@ -544,7 +523,7 @@ docker build \
|
||||
UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
|
||||
|
||||
function drun() {
|
||||
docker run --rm "$tmp_tag" $*
|
||||
docker run --rm "$tmp_tag" "$@"
|
||||
}
|
||||
|
||||
if [[ "$OS" == "ubuntu" ]]; then
|
||||
@ -592,3 +571,14 @@ if [ -n "$KATEX" ]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || echo "no")
|
||||
if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then
|
||||
if [ "$HAS_TRITON" = "no" ]; then
|
||||
echo "expecting triton to be installed, but it is not"
|
||||
exit 1
|
||||
fi
|
||||
elif [ "$HAS_TRITON" = "yes" ]; then
|
||||
echo "expecting triton to not be installed, but it is"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -55,13 +55,6 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -75,7 +68,7 @@ COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN rm install_rocm_magma.sh
|
||||
COPY ./common/install_amdsmi.sh install_amdsmi.sh
|
||||
RUN bash ./install_amdsmi.sh
|
||||
|
@ -1 +1 @@
|
||||
ebe8522378c3f9944aaaef44868f5ececdd845fc
|
||||
381ae5d57d35c165d98df728380b20fbde350392
|
||||
|
@ -4,16 +4,10 @@ set -ex
|
||||
|
||||
if [ -n "$CLANG_VERSION" ]; then
|
||||
|
||||
if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
|
||||
sudo apt-get update
|
||||
# gpg-agent is not available by default on 18.04
|
||||
sudo apt-get install -y --no-install-recommends gpg-agent
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
|
||||
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
if [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
# work around ubuntu apt-get conflicts
|
||||
sudo apt-get -y -f install
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
|
||||
fi
|
||||
@ -41,7 +35,7 @@ if [ -n "$CLANG_VERSION" ]; then
|
||||
# clang's packaging is a little messed up (the runtime libs aren't
|
||||
# added into the linker path), so give it a little help
|
||||
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
|
||||
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
|
||||
echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
|
||||
ldconfig
|
||||
|
||||
# Cleanup package manager
|
||||
|
@ -62,7 +62,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
|
||||
# which is provided in libstdcxx 12 and up.
|
||||
conda_install libstdcxx-ng=12.3.0 -c conda-forge
|
||||
conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
|
||||
|
||||
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
|
||||
if [[ $(uname -m) == "aarch64" ]]; then
|
||||
|
@ -7,7 +7,7 @@ PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/hea
|
||||
GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
|
||||
|
||||
# Python versions to be installed in /opt/$VERSION_NO
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
|
||||
|
||||
function check_var {
|
||||
if [ -z "$1" ]; then
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.26.2-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_040 {
|
||||
@ -40,8 +39,7 @@ function install_cusparselt_063 {
|
||||
|
||||
function install_118 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
|
||||
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.4.0"
|
||||
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
|
||||
# install CUDA 11.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
|
||||
@ -59,14 +57,7 @@ function install_118 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
CUDA_VERSION=11.8 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_040
|
||||
|
||||
@ -75,7 +66,7 @@ function install_118 {
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
|
||||
@ -93,14 +84,7 @@ function install_124 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
CUDA_VERSION=12.4 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_062
|
||||
|
||||
@ -108,7 +92,7 @@ function install_124 {
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
|
||||
@ -126,14 +110,7 @@ function install_126 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
CUDA_VERSION=12.6 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
@ -240,8 +217,8 @@ function prune_126 {
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.7.1.26
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run
|
||||
@ -259,14 +236,7 @@ function install_128 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
CUDA_VERSION=12.8 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.26.2-1
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
|
||||
function install_cusparselt_063 {
|
||||
@ -18,7 +17,7 @@ function install_cusparselt_063 {
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux_sbsa.run
|
||||
@ -36,14 +35,7 @@ function install_128 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
CUDA_VERSION=12.8 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
|
@ -5,7 +5,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
|
||||
mkdir tmp_cudnn
|
||||
pushd tmp_cudnn
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.8.0.87_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
|
@ -1,38 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
@ -14,6 +14,13 @@ function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
|
||||
# TODO (huydhn): There is no torchvision release on 3.13 when I write this, so
|
||||
# I'm using nightly here instead. We just need to package to be able to install
|
||||
# TIMM. Removing this once vision has a release on 3.13
|
||||
if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then
|
||||
pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
|
||||
fi
|
||||
|
||||
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
|
||||
# Clean up
|
||||
conda_run pip uninstall -y cmake torch torchvision triton
|
||||
|
@ -2,8 +2,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ]; then
|
||||
apt update
|
||||
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
|
||||
@ -15,8 +13,8 @@ chown -R jenkins pytorch
|
||||
|
||||
pushd pytorch
|
||||
# Install all linter dependencies
|
||||
pip_install -r requirements.txt
|
||||
conda_run lintrunner init
|
||||
pip install -r requirements.txt
|
||||
lintrunner init
|
||||
|
||||
# Cache .lintbin directory as part of the Docker image
|
||||
cp -r .lintbin /tmp
|
||||
|
26
.ci/docker/common/install_nccl.sh
Normal file
26
.ci/docker/common/install_nccl.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=""
|
||||
if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
|
||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
|
||||
else
|
||||
echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "${NCCL_VERSION}" ]]; then
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
pushd nccl
|
||||
make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf nccl
|
||||
ldconfig
|
||||
fi
|
18
.ci/docker/common/install_python.sh
Normal file
18
.ci/docker/common/install_python.sh
Normal file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
apt-get update
|
||||
# Use deadsnakes in case we need an older python version
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
|
||||
|
||||
# Use a venv because uv and some other package managers don't support --user install
|
||||
ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
|
||||
python -m venv /var/lib/jenkins/ci_env
|
||||
source /var/lib/jenkins/ci_env/bin/activate
|
||||
|
||||
python -mpip install --upgrade pip
|
||||
python -mpip install -r /opt/requirements-ci.txt
|
||||
if [ -n "${PIP_CMAKE}" ]; then
|
||||
python -mpip install cmake==3.31.6
|
||||
fi
|
@ -8,10 +8,6 @@ ver() {
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
if [[ $UBUNTU_VERSION == 18.04 ]]; then
|
||||
# gpg-agent is not available by default on 18.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
fi
|
||||
if [[ $UBUNTU_VERSION == 20.04 ]]; then
|
||||
# gpg-agent is not available by default on 20.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
@ -23,6 +19,13 @@ install_ubuntu() {
|
||||
apt-get install -y libc++1
|
||||
apt-get install -y libc++abi1
|
||||
|
||||
# Make sure rocm packages from repo.radeon.com have highest priority
|
||||
cat << EOF > /etc/apt/preferences.d/rocm-pin-600
|
||||
Package: *
|
||||
Pin: release o=repo.radeon.com
|
||||
Pin-Priority: 600
|
||||
EOF
|
||||
|
||||
# Add amdgpu repository
|
||||
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
||||
|
@ -1,50 +1,32 @@
|
||||
#!/bin/bash
|
||||
# Script used in CI and CD pipeline
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -ex
|
||||
set -eou pipefail
|
||||
|
||||
# Magma build scripts need `python`
|
||||
ln -sf /usr/bin/python3 /usr/bin/python
|
||||
function do_install() {
|
||||
rocm_version=$1
|
||||
rocm_version_nodot=${1//./}
|
||||
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
almalinux)
|
||||
yum install -y gcc-gfortran
|
||||
;;
|
||||
*)
|
||||
echo "No preinstalls to build magma..."
|
||||
;;
|
||||
esac
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||
|
||||
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
|
||||
rocm_dir="/opt/rocm"
|
||||
(
|
||||
set -x
|
||||
tmp_dir=$(mktemp -d)
|
||||
pushd ${tmp_dir}
|
||||
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
||||
if tar -xvf "${magma_archive}"
|
||||
then
|
||||
mkdir -p "${rocm_dir}/magma"
|
||||
mv include "${rocm_dir}/magma/include"
|
||||
mv lib "${rocm_dir}/magma/lib"
|
||||
else
|
||||
echo "${magma_archive} not found, skipping magma install"
|
||||
fi
|
||||
popd
|
||||
)
|
||||
}
|
||||
|
||||
# "install" hipMAGMA into /opt/rocm/magma by copying after build
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
|
||||
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
|
||||
fi
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
|
||||
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
|
||||
export PATH="${PATH}:/opt/rocm/bin"
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
|
||||
done
|
||||
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
|
||||
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
|
||||
make -f make.gen.hipMAGMA -j $(nproc)
|
||||
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
popd
|
||||
mv magma /opt/rocm
|
||||
do_install $1
|
||||
|
@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${SWIFTSHADER}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
# SwiftShader
|
||||
_swiftshader_dir=/var/lib/jenkins/swiftshader
|
||||
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
|
||||
mkdir -p $_swiftshader_dir
|
||||
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
|
||||
|
||||
curl --silent --show-error --location --fail --retry 3 \
|
||||
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
|
||||
|
||||
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
|
||||
|
||||
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"
|
@ -2,6 +2,12 @@
|
||||
|
||||
set -ex
|
||||
|
||||
mkdir -p /opt/triton
|
||||
if [ -z "${TRITON}" ] && [ -z "${TRITON_CPU}" ]; then
|
||||
echo "TRITON and TRITON_CPU are not set. Exiting..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
get_conda_version() {
|
||||
@ -52,6 +58,7 @@ cd triton
|
||||
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
|
||||
as_jenkins git submodule update --init --recursive
|
||||
cd python
|
||||
pip_install pybind11==2.13.6
|
||||
|
||||
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
|
||||
as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
|
||||
@ -60,17 +67,22 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}"
|
||||
# Triton needs at least gcc-9 to build
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install .
|
||||
CXX=g++-9 conda_run python setup.py bdist_wheel
|
||||
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
|
||||
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install .
|
||||
CXX=g++-9 conda_run python setup.py bdist_wheel
|
||||
else
|
||||
pip_install .
|
||||
conda_run python setup.py bdist_wheel
|
||||
fi
|
||||
|
||||
# Copy the wheel to /opt for multi stage docker builds
|
||||
cp dist/*.whl /opt/triton
|
||||
# Install the wheel for docker builds that don't use multi stage
|
||||
pip_install dist/*.whl
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
# TODO: This is to make sure that the same cmake and numpy version from install conda
|
||||
# script is used. Without this step, the newer cmake version (3.25.2) downloaded by
|
||||
|
@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${VULKAN_SDK_VERSION}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
|
||||
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
|
||||
|
||||
curl \
|
||||
--silent \
|
||||
--show-error \
|
||||
--location \
|
||||
--fail \
|
||||
--retry 3 \
|
||||
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
|
||||
|
||||
mkdir -p "${_vulkansdk_dir}"
|
||||
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
|
||||
rm -rf "${_tmp_vulkansdk_targz}"
|
@ -47,9 +47,6 @@ function install_ubuntu() {
|
||||
# Development Packages
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||
# Install Intel Support Packages
|
||||
if [[ "$XPU_VERSION" == "2025.0" ]]; then
|
||||
XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl=2025.0.1-6"
|
||||
fi
|
||||
apt-get install -y ${XPU_PACKAGES}
|
||||
|
||||
# Cleanup
|
||||
@ -85,9 +82,6 @@ gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.
|
||||
EOF
|
||||
|
||||
# Install Intel Support Packages
|
||||
if [[ "$XPU_VERSION" == "2025.0" ]]; then
|
||||
XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl-2025.0.1-6"
|
||||
fi
|
||||
yum install -y ${XPU_PACKAGES}
|
||||
# The xpu-smi packages
|
||||
dnf install -y xpu-smi
|
||||
|
@ -49,6 +49,8 @@ RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||
FROM cpu as cuda
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
ADD ./common/install_magma.sh install_magma.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
ENV CUDA_HOME /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda11.8
|
||||
@ -72,6 +74,7 @@ RUN bash ./install_magma.sh 12.8
|
||||
RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
|
||||
FROM cpu as rocm
|
||||
ARG ROCM_VERSION
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
ENV MKLROOT /opt/intel
|
||||
@ -86,11 +89,11 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
# gfortran and python needed for building magma from source for ROCm
|
||||
RUN apt-get update -y && \
|
||||
apt-get install gfortran -y && \
|
||||
apt-get install python -y && \
|
||||
apt-get install python3 python-is-python3 -y && \
|
||||
apt-get clean
|
||||
|
||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
|
||||
FROM ${BASE_TARGET} as final
|
||||
COPY --from=openssl /opt/openssl /opt/openssl
|
||||
|
@ -1,83 +1,63 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -eou pipefail
|
||||
set -eoux pipefail
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE"
|
||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE="pytorch/${image}"
|
||||
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
WITH_PUSH=${WITH_PUSH:-}
|
||||
|
||||
DOCKER=${DOCKER:-docker}
|
||||
|
||||
case ${GPU_ARCH_TYPE} in
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
|
||||
GPU_ARCH_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
||||
fi
|
||||
|
||||
case ${DOCKER_TAG_PREFIX} in
|
||||
cpu)
|
||||
BASE_TARGET=cpu
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
cuda)
|
||||
cuda*)
|
||||
BASE_TARGET=cuda${GPU_ARCH_VERSION}
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
rocm)
|
||||
rocm*)
|
||||
BASE_TARGET=rocm
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||
echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
(
|
||||
set -x
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
${DOCKER} push "${DOCKER_IMAGE}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||
${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
fi
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${tmp_tag}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
@ -18,28 +18,30 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_magma_conda.sh install_magma_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
# Put venv into the env vars so users don't need to activate it
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
RUN rm install_linter.sh
|
||||
|
||||
RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
@ -15,20 +15,18 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
RUN rm install_linter.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
@ -64,7 +64,9 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
@ -195,6 +197,6 @@ RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
# cmake3 is needed for the MIOpen build
|
||||
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
|
||||
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
@ -36,7 +36,9 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=11.8
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu*
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
@ -158,7 +160,7 @@ ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
|
||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
ENV MKLROOT /opt/intel
|
||||
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
||||
|
@ -67,7 +67,9 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh
|
||||
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh install_nccl.sh ci_commit_pins/nccl-cu*
|
||||
|
||||
FROM base as magma
|
||||
ARG BASE_CUDA_VERSION
|
||||
|
@ -42,6 +42,7 @@ RUN yum install -y \
|
||||
llvm-devel \
|
||||
libzstd-devel \
|
||||
python3.12-devel \
|
||||
python3.12-test \
|
||||
python3.12-setuptools \
|
||||
python3.12-pip \
|
||||
python3-virtualenv \
|
||||
@ -101,24 +102,33 @@ CMD ["/bin/bash"]
|
||||
|
||||
# install test dependencies:
|
||||
# - grpcio requires system openssl, bundled crypto fails to build
|
||||
# - ml_dtypes 0.4.0 requires some fixes provided in later commits to build
|
||||
RUN dnf install -y \
|
||||
protobuf-devel \
|
||||
protobuf-c-devel \
|
||||
protobuf-lite-devel \
|
||||
wget \
|
||||
patch
|
||||
hdf5-devel \
|
||||
python3-h5py \
|
||||
git
|
||||
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4
|
||||
RUN cd ~ && \
|
||||
git clone https://github.com/jax-ml/ml_dtypes && \
|
||||
cd ml_dtypes && \
|
||||
git checkout v0.4.0 && \
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
||||
|
||||
# cmake-3.28.0 from pip for onnxruntime
|
||||
RUN python3 -mpip install cmake==3.28.0
|
||||
|
||||
# build onnxruntime 1.21.0 from sources.
|
||||
# it is not possible to build it from sources using pip,
|
||||
# so just build it from upstream repository.
|
||||
# h5py is dependency of onnxruntime_training.
|
||||
# h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
|
||||
# install newest flatbuffers version first:
|
||||
# for some reason old version is getting pulled in otherwise.
|
||||
# packaging package is required for onnxruntime wheel build.
|
||||
RUN pip3 install flatbuffers && \
|
||||
pip3 install h5py==3.11.0 && \
|
||||
pip3 install packaging && \
|
||||
git clone https://github.com/microsoft/onnxruntime && \
|
||||
cd onnxruntime && git checkout v1.21.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
python3 setup.py bdist_wheel && \
|
||||
pip3 install dist/*.whl && \
|
||||
rm -rf ml_dtypes
|
||||
./build.sh --config Release --parallel 0 --enable_pybind --build_wheel --enable_training --enable_training_apis --enable_training_ops --skip_tests --allow_running_as_root && \
|
||||
pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
|
||||
cd .. && /bin/rm -rf ./onnxruntime
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -eou pipefail
|
||||
set -exou pipefail
|
||||
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
@ -9,152 +9,110 @@ image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE"
|
||||
echo "Usage: $0 IMAGE:ARCHTAG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE="pytorch/${image}"
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
|
||||
DOCKER_REGISTRY="${DOCKER_REGISTRY:-docker.io}"
|
||||
GPU_ARCH_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
||||
fi
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
|
||||
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
|
||||
WITH_PUSH=${WITH_PUSH:-}
|
||||
|
||||
case ${GPU_ARCH_TYPE} in
|
||||
cpu)
|
||||
case ${image} in
|
||||
manylinux2_28-builder:cpu)
|
||||
TARGET=cpu_final
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=centos:7
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
cpu-manylinux_2_28)
|
||||
TARGET=cpu_final
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
cpu-aarch64)
|
||||
manylinuxaarch64-builder:cpu-aarch64)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-aarch64
|
||||
GPU_IMAGE=arm64v8/centos:7
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
|
||||
MANY_LINUX_VERSION="aarch64"
|
||||
;;
|
||||
cpu-aarch64-2_28)
|
||||
manylinux2_28_aarch64-builder:cpu-aarch64)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-aarch64
|
||||
GPU_IMAGE=arm64v8/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
|
||||
MANY_LINUX_VERSION="2_28_aarch64"
|
||||
;;
|
||||
cpu-cxx11-abi)
|
||||
manylinuxcxx11-abi-builder:cpu-cxx11-abi)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-cxx11-abi
|
||||
GPU_IMAGE=""
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||
MANY_LINUX_VERSION="cxx11-abi"
|
||||
;;
|
||||
cpu-s390x)
|
||||
manylinuxs390x-builder:cpu-s390x)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-s390x
|
||||
GPU_IMAGE=s390x/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
MANY_LINUX_VERSION="s390x"
|
||||
;;
|
||||
cuda)
|
||||
manylinux2_28-builder:cuda*)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
# Keep this up to date with the minimum version of CUDA we currently support
|
||||
GPU_IMAGE=centos:7
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
cuda-manylinux_2_28)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
cuda-aarch64)
|
||||
manylinuxaarch64-builder:cuda*)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=arm64v8/centos:7
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="aarch64"
|
||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||
;;
|
||||
rocm|rocm-manylinux_2_28)
|
||||
manylinux2_28-builder:rocm*)
|
||||
TARGET=rocm_final
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
|
||||
DEVTOOLSET_VERSION="9"
|
||||
if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
fi
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
;;
|
||||
xpu)
|
||||
manylinux2_28-builder:xpu)
|
||||
TARGET=xpu_final
|
||||
DOCKER_TAG=xpu
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||
echo "ERROR: Unrecognized image name: ${image}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
IMAGES=''
|
||||
|
||||
if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
|
||||
DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
|
||||
fi
|
||||
(
|
||||
set -x
|
||||
|
||||
# Only activate this if in CI
|
||||
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
fi
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--target "${TARGET}" \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-"dev")}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
docker push "${DOCKER_IMAGE}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
# Only activate this if in CI
|
||||
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
fi
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--target "${TARGET}" \
|
||||
-t "${tmp_tag}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
@ -41,11 +41,14 @@ fbscribelogger==0.1.7
|
||||
#Pinned versions: 0.1.6
|
||||
#test that import:
|
||||
|
||||
flatbuffers==2.0
|
||||
flatbuffers==2.0 ; platform_machine != "s390x"
|
||||
#Description: cross platform serialization library
|
||||
#Pinned versions: 2.0
|
||||
#test that import:
|
||||
|
||||
flatbuffers ; platform_machine == "s390x"
|
||||
#Description: cross platform serialization library; Newer version is required on s390x for new python version
|
||||
|
||||
hypothesis==5.35.1
|
||||
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
|
||||
#Description: advanced library for generating parametrized tests
|
||||
@ -102,10 +105,10 @@ networkx==2.8.8
|
||||
#Pinned versions: 2.8.8
|
||||
#test that import: functorch
|
||||
|
||||
#ninja
|
||||
#Description: build system. Note that it install from
|
||||
#here breaks things so it is commented out
|
||||
#Pinned versions: 1.10.0.post1
|
||||
ninja==1.11.1.3
|
||||
#Description: build system. Used in some tests. Used in build to generate build
|
||||
#time tracing information
|
||||
#Pinned versions: 1.11.1.3
|
||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||
|
||||
numba==0.49.0 ; python_version < "3.9"
|
||||
@ -353,7 +356,7 @@ parameterized==0.8.1
|
||||
#Pinned versions: 1.24.0
|
||||
#test that import: test_sac_estimator.py
|
||||
|
||||
pwlf==2.2.1 ; python_version >= "3.8"
|
||||
pwlf==2.2.1
|
||||
#Description: required for testing torch/distributed/_tools/sac_estimator.py
|
||||
#Pinned versions: 2.2.1
|
||||
#test that import: test_sac_estimator.py
|
||||
@ -365,10 +368,9 @@ PyYAML
|
||||
pyzstd
|
||||
setuptools
|
||||
|
||||
ninja==1.11.1 ; platform_machine == "aarch64"
|
||||
scons==4.5.2 ; platform_machine == "aarch64"
|
||||
|
||||
pulp==2.9.0 ; python_version >= "3.8"
|
||||
pulp==2.9.0
|
||||
#Description: required for testing ilp formulaiton under torch/distributed/_tools
|
||||
#Pinned versions: 2.9.0
|
||||
#test that import: test_sac_ilp.py
|
||||
|
@ -1,15 +1,20 @@
|
||||
sphinx==5.3.0
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@a98ffecb792d50df495be401becbf5c414421423#egg=pytorch_sphinx_theme2
|
||||
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought is probably
|
||||
# something related to Docker setup. We can investigate this later
|
||||
|
||||
sphinxcontrib.katex==0.8.6
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 0.8.6
|
||||
|
||||
sphinxext-opengraph==0.9.1
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 0.9.1
|
||||
|
||||
matplotlib==3.5.3
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 3.5.3
|
||||
@ -46,5 +51,6 @@ myst-nb==0.17.2
|
||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||
python-etcd==0.4.5
|
||||
sphinx-copybutton==0.5.0
|
||||
sphinx-panels==0.4.1
|
||||
sphinx-design==0.4.0
|
||||
sphinxcontrib-mermaid==1.0.0
|
||||
myst-parser==0.18.1
|
||||
|
@ -1 +1 @@
|
||||
3.3.1
|
||||
3.3.0
|
||||
|
@ -2,7 +2,7 @@ ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
ARG IMAGE_NAME
|
||||
|
||||
FROM ${IMAGE_NAME}
|
||||
FROM ${IMAGE_NAME} as base
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
@ -50,13 +50,6 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -97,14 +90,20 @@ RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
ARG TRITON
|
||||
|
||||
FROM base as triton-builder
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
|
||||
RUN bash ./install_triton.sh
|
||||
|
||||
FROM base as final
|
||||
COPY --from=triton-builder /opt/triton /opt/triton
|
||||
RUN if [ -n "${TRITON}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi
|
||||
RUN rm -rf /opt/triton
|
||||
|
||||
ARG HALIDE
|
||||
# Build and install halide
|
||||
@ -159,6 +158,16 @@ COPY ./common/install_cusparselt.sh install_cusparselt.sh
|
||||
RUN bash install_cusparselt.sh
|
||||
RUN rm install_cusparselt.sh
|
||||
|
||||
# Install NCCL
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash install_nccl.sh
|
||||
RUN rm install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
ENV USE_SYSTEM_NCCL=1
|
||||
ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
|
||||
# Install CUDSS
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cudss.sh install_cudss.sh
|
||||
|
@ -50,13 +50,6 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -70,7 +63,7 @@ COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN rm install_rocm_magma.sh
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
@ -77,13 +77,6 @@ COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
@ -1,6 +1,6 @@
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
FROM ubuntu:${UBUNTU_VERSION} as base
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
@ -52,9 +52,16 @@ RUN bash ./install_lcov.sh && rm install_lcov.sh
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
# No effect if cuda not installed
|
||||
ENV USE_SYSTEM_NCCL=1
|
||||
ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
|
||||
|
||||
# (optional) Install UCC
|
||||
ARG UCX_COMMIT
|
||||
@ -74,13 +81,6 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -88,18 +88,6 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install Vulkan SDK
|
||||
ARG VULKAN_SDK_VERSION
|
||||
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
|
||||
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
|
||||
RUN rm install_vulkan_sdk.sh
|
||||
|
||||
# (optional) Install swiftshader
|
||||
ARG SWIFTSHADER
|
||||
COPY ./common/install_swiftshader.sh install_swiftshader.sh
|
||||
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
|
||||
RUN rm install_swiftshader.sh
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
@ -127,20 +115,21 @@ RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_d
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
|
||||
ARG TRITON
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
ARG TRITON_CPU
|
||||
|
||||
# Create a separate stage for building Triton and Triton-CPU. install_triton
|
||||
# will check for the presence of env vars
|
||||
FROM base as triton-builder
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
ARG TRITON_CPU
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
|
||||
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
|
||||
RUN bash ./install_triton.sh
|
||||
|
||||
FROM base as final
|
||||
COPY --from=triton-builder /opt/triton /opt/triton
|
||||
RUN if [ -n "${TRITON}" ] || [ -n "${TRITON_CPU}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi
|
||||
RUN rm -rf /opt/triton
|
||||
|
||||
ARG EXECUTORCH
|
||||
# Build and install executorch
|
||||
|
2
.ci/magma-rocm/.gitignore
vendored
Normal file
2
.ci/magma-rocm/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
output/
|
||||
magma-rocm*/
|
41
.ci/magma-rocm/Makefile
Normal file
41
.ci/magma-rocm/Makefile
Normal file
@ -0,0 +1,41 @@
|
||||
SHELL=/usr/bin/env bash
|
||||
|
||||
DOCKER_CMD ?= docker
|
||||
DESIRED_ROCM ?= 6.4
|
||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||
PACKAGE_NAME = magma-rocm
|
||||
# inherit this from underlying docker image, do not pass this env var to docker
|
||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
||||
|
||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||
-w /builder \
|
||||
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_ROCM_SHORT} \
|
||||
-e DESIRED_ROCM=${DESIRED_ROCM} \
|
||||
"pytorch/manylinux2_28-builder:rocm${DESIRED_ROCM}-main" \
|
||||
magma-rocm/build_magma.sh
|
||||
|
||||
.PHONY: all
|
||||
all: magma-rocm64
|
||||
all: magma-rocm63
|
||||
all: magma-rocm624
|
||||
|
||||
.PHONY:
|
||||
clean:
|
||||
$(RM) -r magma-*
|
||||
$(RM) -r output
|
||||
|
||||
.PHONY: magma-rocm64
|
||||
magma-rocm64: DESIRED_ROCM := 6.4
|
||||
magma-rocm64:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-rocm63
|
||||
magma-rocm63: DESIRED_ROCM := 6.3
|
||||
magma-rocm63:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-rocm624
|
||||
magma-rocm624: DESIRED_ROCM := 6.2.4
|
||||
magma-rocm624:
|
||||
$(DOCKER_RUN)
|
48
.ci/magma-rocm/README.md
Normal file
48
.ci/magma-rocm/README.md
Normal file
@ -0,0 +1,48 @@
|
||||
# Magma ROCm
|
||||
|
||||
This folder contains the scripts and configurations to build libmagma.so, linked for various versions of ROCm.
|
||||
|
||||
## Building
|
||||
|
||||
Look in the `Makefile` for available targets to build. To build any target, for example `magma-rocm63`, run
|
||||
|
||||
```
|
||||
# Using `docker`
|
||||
make magma-rocm63
|
||||
|
||||
# Using `podman`
|
||||
DOCKER_CMD=podman make magma-rocm63
|
||||
```
|
||||
|
||||
This spawns a `pytorch/manylinux-rocm<version>` docker image, which has the required `devtoolset` and ROCm versions installed.
|
||||
Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files
|
||||
into a tarball, with the following structure:
|
||||
|
||||
```
|
||||
.
|
||||
├── include # header files
|
||||
├── lib # libmagma.so
|
||||
├── info
|
||||
│ ├── licenses # license file
|
||||
│ └── recipe # build script
|
||||
```
|
||||
|
||||
More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the ROCm version.
|
||||
Outputted binaries should be in the `output` folder.
|
||||
|
||||
|
||||
## Pushing
|
||||
|
||||
Packages can be uploaded to an S3 bucket using:
|
||||
|
||||
```
|
||||
aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path>
|
||||
```
|
||||
|
||||
If you do not have upload permissions, please ping @seemethere or @soumith to gain access
|
||||
|
||||
## New versions
|
||||
|
||||
New ROCm versions can be added by creating a new make target with the next desired version. For ROCm version N.n, the target should be named `magma-rocmNn`.
|
||||
|
||||
Make sure to edit the appropriate environment variables (e.g., DESIRED_ROCM) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct.
|
42
.ci/magma-rocm/build_magma.sh
Executable file
42
.ci/magma-rocm/build_magma.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
# Environment variables
|
||||
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
|
||||
# Folders for the build
|
||||
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
|
||||
PACKAGE_DIR=${ROOT_DIR}/magma-rocm/${PACKAGE_NAME} # build workspace
|
||||
PACKAGE_OUTPUT=${ROOT_DIR}/magma-rocm/output # where tarballs are stored
|
||||
PACKAGE_BUILD=${PACKAGE_DIR} # where the content of the tarball is prepared
|
||||
PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe
|
||||
PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses
|
||||
mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE}
|
||||
|
||||
# Fetch magma sources and verify checksum
|
||||
pushd ${PACKAGE_DIR}
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
git checkout ${MAGMA_VERSION}
|
||||
popd
|
||||
popd
|
||||
|
||||
# build
|
||||
pushd ${PACKAGE_DIR}/magma
|
||||
# The build.sh script expects to be executed from the sources root folder
|
||||
INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh
|
||||
popd
|
||||
|
||||
# Package recipe, license and tarball
|
||||
# Folder and package name are backward compatible for the build workflow
|
||||
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
|
||||
cp ${PACKAGE_DIR}/magma/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT
|
||||
pushd ${PACKAGE_BUILD}
|
||||
tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info
|
||||
echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2
|
||||
popd
|
38
.ci/magma-rocm/package_files/build.sh
Executable file
38
.ci/magma-rocm/package_files/build.sh
Executable file
@ -0,0 +1,38 @@
|
||||
# Magma build scripts need `python`
|
||||
ln -sf /usr/bin/python3 /usr/bin/python
|
||||
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
almalinux)
|
||||
yum install -y gcc-gfortran
|
||||
;;
|
||||
*)
|
||||
echo "No preinstalls to build magma..."
|
||||
;;
|
||||
esac
|
||||
|
||||
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
|
||||
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
|
||||
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
|
||||
fi
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
|
||||
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
|
||||
export PATH="${PATH}:/opt/rocm/bin"
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
|
||||
done
|
||||
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
|
||||
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
|
||||
make -f make.gen.hipMAGMA -j $(nproc)
|
||||
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
cp -R lib ${INSTALL_DIR}
|
||||
cp -R include ${INSTALL_DIR}
|
@ -111,12 +111,6 @@ case ${DESIRED_PYTHON} in
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -209,12 +203,6 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
@ -333,8 +321,8 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
|
||||
# ROCm workaround for roctracer dlopens
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
patchedpath=$(fname_without_so_number $destpath)
|
||||
# Keep the so number for XPU dependencies and libgomp.so.1 to avoid twice load
|
||||
elif [[ "$DESIRED_CUDA" == *"xpu"* || "$filename" == "libgomp.so.1" ]]; then
|
||||
# Keep the so number for XPU dependencies
|
||||
elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then
|
||||
patchedpath=$destpath
|
||||
else
|
||||
patchedpath=$(fname_with_sha256 $destpath)
|
||||
|
@ -95,12 +95,6 @@ python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
retry pip install -q numpy==2.0.1
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -169,12 +163,6 @@ fi
|
||||
|
||||
)
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
|
@ -35,7 +35,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
# TODO: there is a linking issue when building with UCC using clang,
|
||||
# disable it for now and to be fix later.
|
||||
# TODO: disable UCC temporarily to enable CUDA 12.1 in CI
|
||||
@ -277,10 +277,8 @@ else
|
||||
# or building non-XLA tests.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
python -mpip install numpy==2.0.2
|
||||
fi
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
python -mpip install numpy==2.0.2
|
||||
|
||||
WERROR=1 python setup.py clean
|
||||
|
||||
@ -303,6 +301,18 @@ else
|
||||
fi
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
echo "Checking that xpu is compiled"
|
||||
pushd dist/
|
||||
if python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'; then
|
||||
echo "XPU support is compiled in."
|
||||
else
|
||||
echo "XPU support is NOT compiled in."
|
||||
exit 1
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
|
||||
# TODO: I'm not sure why, but somehow we lose verbose commands
|
||||
set -x
|
||||
|
||||
|
@ -63,64 +63,12 @@ fi
|
||||
# Check GCC ABI
|
||||
###############################################################################
|
||||
|
||||
# NOTE [ Building libtorch with old vs. new gcc ABI ]
|
||||
#
|
||||
# Packages built with one version of ABI could not be linked against by client
|
||||
# C++ libraries that were compiled using the other version of ABI. Since both
|
||||
# gcc ABIs are still common in the wild, we need to support both ABIs. Currently:
|
||||
#
|
||||
# - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI.
|
||||
# - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI.
|
||||
# NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce
|
||||
# wheels with cxx11-abi
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
function is_expected() {
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
|
||||
echo 1
|
||||
fi
|
||||
else
|
||||
if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then
|
||||
echo 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# First we check that the env var in TorchConfig.cmake is correct
|
||||
|
||||
# We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake
|
||||
torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake"
|
||||
if [[ ! -f "$torch_config" ]]; then
|
||||
echo "No TorchConfig.cmake found!"
|
||||
ls -lah "$install_root/share/cmake/Torch"
|
||||
exit 1
|
||||
fi
|
||||
echo "Checking the TorchConfig.cmake"
|
||||
cat "$torch_config"
|
||||
|
||||
# The sed call below is
|
||||
# don't print lines by default (only print the line we want)
|
||||
# -n
|
||||
# execute the following expression
|
||||
# e
|
||||
# replace lines that match with the first capture group and print
|
||||
# s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p
|
||||
# any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a
|
||||
# quote, any characters
|
||||
# Note the exactly one single character after the '='. In the case that the
|
||||
# variable is not set the '=' will be followed by a '"' immediately and the
|
||||
# line will fail the match and nothing will be printed; this is what we
|
||||
# want. Otherwise it will capture the 0 or 1 after the '='.
|
||||
# /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/
|
||||
# replace the matched line with the capture group and print
|
||||
# /\1/p
|
||||
actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")"
|
||||
if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then
|
||||
echo "gcc ABI $actual_gcc_abi not as expected."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We also check that there are [not] cxx11 symbols in libtorch
|
||||
# We also check that there are cxx11 symbols in libtorch
|
||||
#
|
||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||
python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py"
|
||||
@ -198,35 +146,11 @@ setup_link_flags () {
|
||||
|
||||
TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
|
||||
build_and_run_example_cpp () {
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
setup_link_flags
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
./$1
|
||||
}
|
||||
|
||||
build_example_cpp_with_incorrect_abi () {
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
GLIBCXX_USE_CXX11_ABI=0
|
||||
else
|
||||
GLIBCXX_USE_CXX11_ABI=1
|
||||
fi
|
||||
set +e
|
||||
setup_link_flags
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "Building example with incorrect ABI didn't throw error. Aborting."
|
||||
exit 1
|
||||
else
|
||||
echo "Building example with incorrect ABI throws expected error. Proceeding."
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Check simple Python/C++ calls
|
||||
###############################################################################
|
||||
@ -236,11 +160,6 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
|
||||
fi
|
||||
build_and_run_example_cpp simple-torch-test
|
||||
# `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test
|
||||
# the expected failure case for Ubuntu 16.04 + gcc 5.4 only.
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
build_example_cpp_with_incorrect_abi simple-torch-test
|
||||
fi
|
||||
else
|
||||
pushd /tmp
|
||||
python -c 'import torch'
|
||||
@ -297,6 +216,14 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check XPU configured correctly
|
||||
###############################################################################
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
||||
echo "Checking that xpu is compiled"
|
||||
python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check CUDA configured correctly
|
||||
###############################################################################
|
||||
@ -375,10 +302,19 @@ except RuntimeError as e:
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries
|
||||
# Check for C++ ABI compatibility to GCC-11
|
||||
###############################################################################
|
||||
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" == 'manywheel' ]]; then
|
||||
pushd /tmp
|
||||
python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))"
|
||||
# Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html gcc-11 is ABI16
|
||||
# Though manylinux_2.28 should have been build with gcc-14, per
|
||||
# https://github.com/pypa/manylinux?tab=readme-ov-file#manylinux_2_28-almalinux-8-based
|
||||
# On s390x gcc 14 is used because it contains fix for interaction
|
||||
# between precompiled headers and vectorization builtins.
|
||||
# This fix is not available in earlier gcc versions.
|
||||
# gcc-14 uses ABI19.
|
||||
if [[ "$(uname -m)" != "s390x" ]]; then
|
||||
python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1016' else 1)"
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
|
@ -202,7 +202,7 @@ function install_torchrec_and_fbgemm() {
|
||||
|
||||
function clone_pytorch_xla() {
|
||||
if [[ ! -d ./xla ]]; then
|
||||
git clone --recursive -b r2.7 https://github.com/pytorch/xla.git
|
||||
git clone --recursive --quiet https://github.com/pytorch/xla.git
|
||||
pushd xla
|
||||
# pin the xla hash so that we don't get broken by changes to xla
|
||||
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
|
||||
|
@ -1,50 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script for installing sccache on the xla build job, which uses xla's docker
|
||||
# image, which has sccache installed but doesn't write the stubs. This is
|
||||
# mostly copied from .ci/docker/install_cache.sh. Changes are: removing checks
|
||||
# that will always return the same thing, ex checks for for rocm, CUDA, changing
|
||||
# the path where sccache is installed, not changing /etc/environment, and not
|
||||
# installing/downloading sccache as it is already in the docker image.
|
||||
# image and doesn't have sccache installed on it. This is mostly copied from
|
||||
# .ci/docker/install_cache.sh. Changes are: removing checks that will always
|
||||
# return the same thing, ex checks for for rocm, CUDA, and changing the path
|
||||
# where sccache is installed, and not changing /etc/environment.
|
||||
|
||||
set -ex -o pipefail
|
||||
|
||||
install_binary() {
|
||||
echo "Downloading sccache binary from S3 repo"
|
||||
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /tmp/cache/bin/sccache
|
||||
}
|
||||
|
||||
mkdir -p /tmp/cache/bin
|
||||
mkdir -p /tmp/cache/lib
|
||||
export PATH="/tmp/cache/bin:$PATH"
|
||||
|
||||
install_binary
|
||||
chmod a+x /tmp/cache/bin/sccache
|
||||
|
||||
function write_sccache_stub() {
|
||||
# Unset LD_PRELOAD for ps because of asan + ps issues
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
|
||||
if [ "$1" == "gcc" ]; then
|
||||
# Do not call sccache recursively when dumping preprocessor argument
|
||||
# For some reason it's very important for the first cached nvcc invocation
|
||||
cat >"/tmp/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
# sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
|
||||
for arg in "\$@"; do
|
||||
if [ "\$arg" = "-E" ]; then
|
||||
exec $(which "$1") "\$@"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache $(which "$1") "\$@"
|
||||
else
|
||||
exec $(which "$1") "\$@"
|
||||
fi
|
||||
EOF
|
||||
else
|
||||
cat >"/tmp/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache $(which "$1") "\$@"
|
||||
else
|
||||
exec $(which "$1") "\$@"
|
||||
fi
|
||||
EOF
|
||||
fi
|
||||
# shellcheck disable=SC2086
|
||||
# shellcheck disable=SC2059
|
||||
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/tmp/cache/bin/$1"
|
||||
chmod a+x "/tmp/cache/bin/$1"
|
||||
}
|
||||
|
||||
|
@ -33,56 +33,15 @@ if which sccache > /dev/null; then
|
||||
export PATH="${tmp_dir}:$PATH"
|
||||
fi
|
||||
|
||||
cross_compile_arm64() {
|
||||
# Cross compilation for arm64
|
||||
print_cmake_info
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
|
||||
# Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
|
||||
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
|
||||
else
|
||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
||||
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_arm64() {
|
||||
# Compilation for arm64
|
||||
# TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
|
||||
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_x86_64() {
|
||||
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel --plat-name=macosx_10_9_x86_64
|
||||
}
|
||||
|
||||
build_lite_interpreter() {
|
||||
echo "Testing libtorch (lite interpreter)."
|
||||
|
||||
CPP_BUILD="$(pwd)/../cpp_build"
|
||||
# Ensure the removal of the tmp directory
|
||||
trap 'rm -rfv ${CPP_BUILD}' EXIT
|
||||
rm -rf "${CPP_BUILD}"
|
||||
mkdir -p "${CPP_BUILD}/caffe2"
|
||||
|
||||
# It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
|
||||
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
|
||||
pushd "${CPP_BUILD}/caffe2" || exit
|
||||
VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
|
||||
popd || exit
|
||||
|
||||
"${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
|
||||
}
|
||||
|
||||
print_cmake_info
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
|
||||
if [[ $(uname -m) == "arm64" ]]; then
|
||||
compile_arm64
|
||||
else
|
||||
cross_compile_arm64
|
||||
fi
|
||||
elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
|
||||
export BUILD_LITE_INTERPRETER=1
|
||||
build_lite_interpreter
|
||||
else
|
||||
compile_x86_64
|
||||
fi
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
print_sccache_stats
|
||||
fi
|
||||
|
@ -221,25 +221,39 @@ test_torchbench_smoketest() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
local backend=eager
|
||||
local dtype=notset
|
||||
local device=mps
|
||||
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam pytorch_unet stable_diffusion_text_encoder moco speech_transformer)
|
||||
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||
for backend in eager inductor; do
|
||||
|
||||
echo "Setup complete, launching torchbench training performance run"
|
||||
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --training --devices "$device" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||
done
|
||||
for dtype in notset float16 bfloat16; do
|
||||
echo "Launching torchbench inference performance run for backend ${backend} and dtype ${dtype}"
|
||||
local dtype_arg="--${dtype}"
|
||||
if [ "$dtype" == notset ]; then
|
||||
dtype_arg="--float32"
|
||||
fi
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||
for model in "${models[@]}"; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" || true
|
||||
done
|
||||
done
|
||||
|
||||
for dtype in notset amp; do
|
||||
echo "Launching torchbench training performance run for backend ${backend} and dtype ${dtype}"
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||
local dtype_arg="--${dtype}"
|
||||
if [ "$dtype" == notset ]; then
|
||||
dtype_arg="--float32"
|
||||
fi
|
||||
for model in "${models[@]}"; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --training --devices "$device" "$dtype_arg" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" || true
|
||||
done
|
||||
done
|
||||
|
||||
echo "Launching torchbench inference performance run"
|
||||
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --inference --devices "$device" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||
done
|
||||
|
||||
echo "Pytorch benchmark on mps device completed"
|
||||
|
@ -119,12 +119,6 @@ popd
|
||||
git rm -rf "$install_path" || true
|
||||
mv "$pt_checkout/docs/build/html" "$install_path"
|
||||
|
||||
# Prevent Google from indexing $install_path/_modules. This folder contains
|
||||
# generated source files.
|
||||
# NB: the following only works on gnu sed. The sed shipped with mac os is different.
|
||||
# One can `brew install gnu-sed` on a mac and then use "gsed" instead of "sed".
|
||||
find "$install_path/_modules" -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">'
|
||||
|
||||
git add "$install_path" || true
|
||||
git status
|
||||
git config user.email "soumith+bot@pytorch.org"
|
||||
|
@ -80,7 +80,7 @@ def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
|
||||
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
|
||||
|
||||
|
||||
def check_lib_symbols_for_abi_correctness(lib: str, pre_cxx11_abi: bool = True) -> None:
|
||||
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
|
||||
print(f"lib: {lib}")
|
||||
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
|
||||
pre_cxx11_symbols = grep_symbols(lib, LIBTORCH_PRE_CXX11_PATTERNS)
|
||||
@ -88,28 +88,12 @@ def check_lib_symbols_for_abi_correctness(lib: str, pre_cxx11_abi: bool = True)
|
||||
num_pre_cxx11_symbols = len(pre_cxx11_symbols)
|
||||
print(f"num_cxx11_symbols: {num_cxx11_symbols}")
|
||||
print(f"num_pre_cxx11_symbols: {num_pre_cxx11_symbols}")
|
||||
if pre_cxx11_abi:
|
||||
if num_cxx11_symbols > 0:
|
||||
raise RuntimeError(
|
||||
f"Found cxx11 symbols, but there shouldn't be any, see: {cxx11_symbols[:100]}"
|
||||
)
|
||||
if num_pre_cxx11_symbols < 1000:
|
||||
raise RuntimeError("Didn't find enough pre-cxx11 symbols.")
|
||||
# Check for no recursive iterators, regression test for https://github.com/pytorch/pytorch/issues/133437
|
||||
rec_iter_symbols = grep_symbols(
|
||||
lib, [re.compile("std::filesystem::recursive_directory_iterator.*")]
|
||||
if num_pre_cxx11_symbols > 0:
|
||||
raise RuntimeError(
|
||||
f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}"
|
||||
)
|
||||
if len(rec_iter_symbols) > 0:
|
||||
raise RuntimeError(
|
||||
f"recursive_directory_iterator in used pre-CXX11 binaries, see; {rec_iter_symbols}"
|
||||
)
|
||||
else:
|
||||
if num_pre_cxx11_symbols > 0:
|
||||
raise RuntimeError(
|
||||
f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}"
|
||||
)
|
||||
if num_cxx11_symbols < 100:
|
||||
raise RuntimeError("Didn't find enought cxx11 symbols")
|
||||
if num_cxx11_symbols < 100:
|
||||
raise RuntimeError("Didn't find enought cxx11 symbols")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@ -121,9 +105,8 @@ def main() -> None:
|
||||
else:
|
||||
install_root = Path(distutils.sysconfig.get_python_lib()) / "torch"
|
||||
|
||||
libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so"
|
||||
pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "")
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi)
|
||||
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,74 +0,0 @@
|
||||
import ctypes
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_gomp_thread():
|
||||
"""
|
||||
Retrieves the maximum number of OpenMP threads after loading the `libgomp.so.1` library
|
||||
and the `libtorch_cpu.so` library. It then queries the
|
||||
maximum number of threads available for OpenMP parallel regions using the
|
||||
`omp_get_max_threads` function.
|
||||
|
||||
Returns:
|
||||
int: The maximum number of OpenMP threads available.
|
||||
|
||||
Notes:
|
||||
- The function assumes the default path for `libgomp.so.1` on AlmaLinux OS.
|
||||
- The path to `libtorch_cpu.so` is constructed based on the Python executable's
|
||||
installation directory.
|
||||
- This function is specific to environments where PyTorch and OpenMP are used
|
||||
together and may require adjustments for other setups.
|
||||
"""
|
||||
python_path = Path(sys.executable).resolve()
|
||||
python_prefix = (
|
||||
python_path.parent.parent
|
||||
) # Typically goes to the Python installation root
|
||||
|
||||
# Get the additional ABI flags (if any); it may be an empty string.
|
||||
abiflags = getattr(sys, "abiflags", "")
|
||||
|
||||
# Construct the Python directory name correctly (e.g., "python3.13t").
|
||||
python_version = (
|
||||
f"python{sys.version_info.major}.{sys.version_info.minor}{abiflags}"
|
||||
)
|
||||
|
||||
libtorch_cpu_path = (
|
||||
python_prefix
|
||||
/ "lib"
|
||||
/ python_version
|
||||
/ "site-packages"
|
||||
/ "torch"
|
||||
/ "lib"
|
||||
/ "libtorch_cpu.so"
|
||||
)
|
||||
|
||||
# use the default gomp path of AlmaLinux OS
|
||||
libgomp_path = "/usr/lib64/libgomp.so.1"
|
||||
|
||||
os.environ["GOMP_CPU_AFFINITY"] = "0-3"
|
||||
|
||||
libgomp = ctypes.CDLL(libgomp_path)
|
||||
libgomp = ctypes.CDLL(libtorch_cpu_path)
|
||||
|
||||
libgomp.omp_get_max_threads.restype = ctypes.c_int
|
||||
libgomp.omp_get_max_threads.argtypes = []
|
||||
|
||||
omp_max_threads = libgomp.omp_get_max_threads()
|
||||
return omp_max_threads
|
||||
|
||||
|
||||
def main():
|
||||
omp_max_threads = get_gomp_thread()
|
||||
print(
|
||||
f"omp_max_threads after loading libgomp.so and libtorch_cpu.so: {omp_max_threads}"
|
||||
)
|
||||
if omp_max_threads == 1:
|
||||
raise RuntimeError(
|
||||
"omp_max_threads is 1. Check whether libgomp.so is loaded twice."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -7,6 +7,7 @@ import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch._dynamo
|
||||
@ -195,8 +196,41 @@ def test_cuda_gds_errors_captured() -> None:
|
||||
)
|
||||
|
||||
|
||||
def find_pypi_package_version(package: str) -> Optional[str]:
|
||||
from importlib import metadata
|
||||
|
||||
dists = metadata.distributions()
|
||||
for dist in dists:
|
||||
if dist.metadata["Name"].startswith(package):
|
||||
return dist.version
|
||||
return None
|
||||
|
||||
|
||||
def cudnn_to_version_str(cudnn_version: int) -> str:
|
||||
patch = int(cudnn_version % 10)
|
||||
minor = int((cudnn_version / 100) % 100)
|
||||
major = int((cudnn_version / 10000) % 10000)
|
||||
return f"{major}.{minor}.{patch}"
|
||||
|
||||
|
||||
def compare_pypi_to_torch_versions(
|
||||
package: str, pypi_version: str, torch_version: str
|
||||
) -> None:
|
||||
if pypi_version is None:
|
||||
raise RuntimeError(f"Can't find {package} in PyPI for Torch: {torch_version}")
|
||||
if pypi_version.startswith(torch_version):
|
||||
print(f"Found matching {package}. Torch: {torch_version} PyPI {pypi_version}")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Wrong {package} version. Torch: {torch_version} PyPI: {pypi_version}"
|
||||
)
|
||||
|
||||
|
||||
def smoke_test_cuda(
|
||||
package: str, runtime_error_check: str, torch_compile_check: str
|
||||
package: str,
|
||||
runtime_error_check: str,
|
||||
torch_compile_check: str,
|
||||
pypi_pkg_check: str,
|
||||
) -> None:
|
||||
if not torch.cuda.is_available() and is_cuda_system:
|
||||
raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
|
||||
@ -226,20 +260,30 @@ def smoke_test_cuda(
|
||||
raise RuntimeError(
|
||||
f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
|
||||
)
|
||||
print(f"torch cuda: {torch.version.cuda}")
|
||||
# todo add cudnn version validation
|
||||
print(f"torch cudnn: {torch.backends.cudnn.version()}")
|
||||
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
|
||||
|
||||
print(f"torch cuda: {torch.version.cuda}")
|
||||
torch.cuda.init()
|
||||
print("CUDA initialized successfully")
|
||||
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
|
||||
for i in range(torch.cuda.device_count()):
|
||||
print(f"Device {i}: {torch.cuda.get_device_name(i)}")
|
||||
|
||||
# nccl is availbale only on Linux
|
||||
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
|
||||
torch_cudnn_version = cudnn_to_version_str(torch.backends.cudnn.version())
|
||||
print(f"Torch cuDNN version: {torch_cudnn_version}")
|
||||
|
||||
if sys.platform in ["linux", "linux2"]:
|
||||
print(f"torch nccl version: {torch.cuda.nccl.version()}")
|
||||
torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version())
|
||||
print(f"Torch nccl; version: {torch_nccl_version}")
|
||||
|
||||
# Pypi dependencies are installed on linux ony and nccl is availbale only on Linux.
|
||||
if pypi_pkg_check == "enabled" and sys.platform in ["linux", "linux2"]:
|
||||
compare_pypi_to_torch_versions(
|
||||
"cudnn", find_pypi_package_version("nvidia-cudnn"), torch_cudnn_version
|
||||
)
|
||||
compare_pypi_to_torch_versions(
|
||||
"nccl", find_pypi_package_version("nvidia-nccl"), torch_nccl_version
|
||||
)
|
||||
|
||||
if runtime_error_check == "enabled":
|
||||
test_cuda_runtime_errors_captured()
|
||||
@ -398,6 +442,13 @@ def parse_args():
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pypi-pkg-check",
|
||||
help="Check pypi package versions cudnn and nccl",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -422,7 +473,10 @@ def main() -> None:
|
||||
smoke_test_modules()
|
||||
|
||||
smoke_test_cuda(
|
||||
options.package, options.runtime_error_check, options.torch_compile_check
|
||||
options.package,
|
||||
options.runtime_error_check,
|
||||
options.torch_compile_check,
|
||||
options.pypi_pkg_check,
|
||||
)
|
||||
|
||||
|
||||
|
@ -1175,7 +1175,6 @@ build_xla() {
|
||||
# These functions are defined in .circleci/common.sh in pytorch/xla repo
|
||||
retry install_pre_deps_pytorch_xla $XLA_DIR $USE_CACHE
|
||||
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
|
||||
retry install_post_deps_pytorch_xla
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -1527,6 +1526,27 @@ test_linux_aarch64() {
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
}
|
||||
|
||||
test_operator_benchmark() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
TEST_DIR=$(pwd)
|
||||
|
||||
test_inductor_set_cpu_affinity
|
||||
|
||||
cd benchmarks/operator_benchmark/pt_extension
|
||||
python setup.py install
|
||||
|
||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
||||
$TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \
|
||||
--output-dir "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv"
|
||||
|
||||
pip_install pandas
|
||||
python check_perf_csv.py \
|
||||
--actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
|
||||
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
||||
}
|
||||
|
||||
|
||||
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
||||
(cd test && python -c "import torch; print(torch.__config__.show())")
|
||||
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
|
||||
@ -1557,6 +1577,19 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_rpc
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
|
||||
TEST_MODE="short"
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *long* ]]; then
|
||||
TEST_MODE="long"
|
||||
elif [[ "${TEST_CONFIG}" == *all* ]]; then
|
||||
TEST_MODE="all"
|
||||
fi
|
||||
|
||||
test_operator_benchmark cpu ${TEST_MODE}
|
||||
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
||||
test_inductor_distributed
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||
@ -1619,6 +1652,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchvision
|
||||
checkout_install_torchbench hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
test_inductor_aoti
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
|
@ -42,7 +42,6 @@ if "%DESIRED_PYTHON%" == "3.12" set "PYTHON_INSTALLER_URL=https://www.python.org
|
||||
if "%DESIRED_PYTHON%" == "3.11" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.11.0/python-3.11.0-amd64.exe"
|
||||
if "%DESIRED_PYTHON%" == "3.10" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe"
|
||||
if "%DESIRED_PYTHON%" == "3.9" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe"
|
||||
if "%DESIRED_PYTHON%" == "3.8" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.8.2/python-3.8.2-amd64.exe"
|
||||
if "%PYTHON_INSTALLER_URL%" == "" (
|
||||
echo Python %DESIRED_PYTHON% not supported yet
|
||||
)
|
||||
@ -128,7 +127,6 @@ goto end
|
||||
:libtorch
|
||||
echo "install and test libtorch"
|
||||
|
||||
if "%VC_YEAR%" == "2019" powershell internal\vs2019_install.ps1
|
||||
if "%VC_YEAR%" == "2022" powershell internal\vs2022_install.ps1
|
||||
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
@ -140,10 +138,6 @@ pushd tmp\libtorch
|
||||
|
||||
set VC_VERSION_LOWER=17
|
||||
set VC_VERSION_UPPER=18
|
||||
IF "%VC_YEAR%" == "2019" (
|
||||
set VC_VERSION_LOWER=16
|
||||
set VC_VERSION_UPPER=17
|
||||
)
|
||||
|
||||
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
|
||||
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
|
||||
|
@ -70,7 +70,6 @@ echo "install and test libtorch"
|
||||
pip install cmake
|
||||
echo "installing cmake"
|
||||
|
||||
if "%VC_YEAR%" == "2019" powershell internal\vs2019_install.ps1
|
||||
if "%VC_YEAR%" == "2022" powershell internal\vs2022_install.ps1
|
||||
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
@ -83,10 +82,6 @@ pushd tmp\libtorch
|
||||
|
||||
set VC_VERSION_LOWER=17
|
||||
set VC_VERSION_UPPER=18
|
||||
IF "%VC_YEAR%" == "2019" (
|
||||
set VC_VERSION_LOWER=16
|
||||
set VC_VERSION_UPPER=17
|
||||
)
|
||||
|
||||
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
|
||||
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
|
||||
|
@ -1,12 +1,8 @@
|
||||
if "%VC_YEAR%" == "2019" powershell windows/internal/vs2019_install.ps1
|
||||
if "%VC_YEAR%" == "2022" powershell windows/internal/vs2022_install.ps1
|
||||
|
||||
set VC_VERSION_LOWER=17
|
||||
set VC_VERSION_UPPER=18
|
||||
if "%VC_YEAR%" == "2019" (
|
||||
set VC_VERSION_LOWER=16
|
||||
set VC_VERSION_UPPER=17
|
||||
)
|
||||
|
||||
|
||||
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -products Microsoft.VisualStudio.Product.BuildTools -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
|
||||
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
|
||||
|
@ -1,48 +0,0 @@
|
||||
# https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479
|
||||
# https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
|
||||
|
||||
# 16.8.6 BuildTools
|
||||
$VS_DOWNLOAD_LINK = "https://ossci-windows.s3.us-east-1.amazonaws.com/vs16.8.6_BuildTools.exe"
|
||||
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
|
||||
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
|
||||
"--add Microsoft.Component.MSBuild",
|
||||
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
|
||||
"--add Microsoft.VisualStudio.Component.TextTemplating",
|
||||
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
|
||||
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
|
||||
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
|
||||
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
|
||||
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
|
||||
|
||||
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
echo "Download of the VS 2019 Version 16.8.5 installer failed"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") {
|
||||
$existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[16, 17)" -property installationPath
|
||||
if ($existingPath -ne $null) {
|
||||
if (!${env:CIRCLECI}) {
|
||||
echo "Found correctly versioned existing BuildTools installation in $existingPath"
|
||||
exit 0
|
||||
}
|
||||
echo "Found existing BuildTools installation in $existingPath, keeping it"
|
||||
}
|
||||
}
|
||||
|
||||
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
|
||||
Remove-Item -Path vs_installer.exe -Force
|
||||
$exitCode = $process.ExitCode
|
||||
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
|
||||
echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
|
||||
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
echo "Download of the VS Collect tool failed."
|
||||
exit 1
|
||||
}
|
||||
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
|
||||
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
|
||||
Copy-Item -Path "C:\Users\${env:USERNAME}\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
|
||||
exit 1
|
||||
}
|
@ -90,12 +90,16 @@ fi
|
||||
/pytorch/.ci/pytorch/check_binary.sh
|
||||
|
||||
if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_TYPE" != *rocm* && "$PACKAGE_TYPE" != libtorch ]]; then
|
||||
# Exclude s390, xpu, rocm and libtorch builds from smoke testing
|
||||
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled
|
||||
|
||||
if [[ "\$GPU_ARCH_TYPE" != *cpu-aarch64* ]]; then
|
||||
# test for issue https://github.com/pytorch/pytorch/issues/149422
|
||||
python /pytorch/.ci/pytorch/smoke_test/check_gomp.py
|
||||
torch_pkg_size="$(ls -1 /final_pkgs/torch-* | sort |tail -1 |xargs wc -c |cut -d ' ' -f1)"
|
||||
# todo: implement check for large binaries
|
||||
# if the package is larger than 1.5GB, we disable the pypi check.
|
||||
# this package contains all libraries packaged in torch libs folder
|
||||
# example of such package is https://download.pytorch.org/whl/cu126_full/torch
|
||||
if [[ "\$torch_pkg_size" -gt 1500000000 ]]; then
|
||||
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled --pypi-pkg-check disabled
|
||||
else
|
||||
python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled $extra_parameters
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -55,12 +55,16 @@ s3_upload() {
|
||||
s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
|
||||
fi
|
||||
(
|
||||
cache_control_flag=""
|
||||
if [[ "${UPLOAD_CHANNEL}" = "test" ]]; then
|
||||
cache_control_flag="--cache-control='no-cache,no-store,must-revalidate'"
|
||||
fi
|
||||
for pkg in ${PKG_DIR}/*.${extension}; do
|
||||
(
|
||||
set -x
|
||||
shm_id=$(sha256sum "${pkg}" | awk '{print $1}')
|
||||
${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \
|
||||
--metadata "checksum-sha256=${shm_id}"
|
||||
--metadata "checksum-sha256=${shm_id}" ${cache_control_flag}
|
||||
)
|
||||
done
|
||||
)
|
||||
|
@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
|
||||
export USE_SCCACHE=1
|
||||
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
|
||||
echo "Free space on filesystem before build:"
|
||||
df -h
|
||||
|
||||
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
||||
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
pytorch/.ci/pytorch/windows/arm64/build_libtorch.bat
|
||||
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
|
||||
pytorch/.ci/pytorch/windows/arm64/build_pytorch.bat
|
||||
fi
|
||||
|
||||
echo "Free space on filesystem after build:"
|
||||
df -h
|
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
|
||||
pytorch/.ci/pytorch/windows/arm64/smoke_test.bat
|
@ -4,14 +4,15 @@ set -eux -o pipefail
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
|
||||
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
|
||||
export USE_SCCACHE=1
|
||||
export SCCACHE_BUCKET=ossci-compiler-cache
|
||||
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
export VC_YEAR=2019
|
||||
if [[ "$OS" != "windows-arm64" ]]; then
|
||||
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
|
||||
export USE_SCCACHE=1
|
||||
export SCCACHE_BUCKET=ossci-compiler-cache
|
||||
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
export VC_YEAR=2022
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||
export VC_YEAR=2022
|
||||
export USE_SCCACHE=0
|
||||
export XPU_VERSION=2025.0
|
||||
export XPU_ENABLE_KINETO=1
|
||||
@ -22,7 +23,16 @@ df -h
|
||||
|
||||
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
||||
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
||||
./windows/internal/build_wheels.bat
|
||||
|
||||
if [[ "$OS" == "windows-arm64" ]]; then
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
./windows/arm64/build_libtorch.bat
|
||||
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
|
||||
./windows/arm64/build_pytorch.bat
|
||||
fi
|
||||
else
|
||||
./windows/internal/build_wheels.bat
|
||||
fi
|
||||
|
||||
echo "Free space on filesystem after build:"
|
||||
df -h
|
||||
|
@ -4,14 +4,18 @@ set -eux -o pipefail
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
|
||||
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
|
||||
export VC_YEAR=2019
|
||||
export VC_YEAR=2022
|
||||
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||
export VC_YEAR=2022
|
||||
export XPU_VERSION=2025.0
|
||||
fi
|
||||
|
||||
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
||||
./windows/internal/smoke_test.bat
|
||||
|
||||
if [[ "$OS" == "windows-arm64" ]]; then
|
||||
./windows/arm64/smoke_test.bat
|
||||
else
|
||||
./windows/internal/smoke_test.bat
|
||||
fi
|
||||
|
||||
popd
|
||||
|
@ -48,12 +48,10 @@ misc-*,
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-unused-using-decls,
|
||||
-misc-use-internal-linkage,
|
||||
modernize-*,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-using,
|
||||
-modernize-use-trailing-return-type,
|
||||
-modernize-use-nodiscard,
|
||||
|
14
.editorconfig
Normal file
14
.editorconfig
Normal file
@ -0,0 +1,14 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
|
||||
# Python
|
||||
[*.py]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
# Make
|
||||
[Makefile]
|
||||
indent_style = tab
|
2
.github/ISSUE_TEMPLATE/disable-ci-jobs.md
vendored
2
.github/ISSUE_TEMPLATE/disable-ci-jobs.md
vendored
@ -5,7 +5,7 @@ title: "DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]"
|
||||
labels: "module: ci"
|
||||
---
|
||||
|
||||
> For example, DISABLED pull / win-vs2019-cpu-py3 / test (default). Once
|
||||
> For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once
|
||||
> created, the job will be disabled within 15 minutes. You can check the
|
||||
> list of disabled jobs at https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json
|
||||
|
||||
|
2
.github/ISSUE_TEMPLATE/pt2-bug-report.yml
vendored
2
.github/ISSUE_TEMPLATE/pt2-bug-report.yml
vendored
@ -20,7 +20,7 @@ body:
|
||||
|
||||
- Don't compare indices of max/min etc, because that avoids the above requirement
|
||||
|
||||
- If comparing eager and torch.compile at fp16/bf16, you should use fp32 as baseline
|
||||
- When comparing eager and torch.compile, use a higher precision result as a baseline. `torch._dynamo.utils.same` with fp64_ref will handle this comparison.
|
||||
|
||||
- Ensure rng state used to compare results is equivalent. Use `torch._inductor.config.fallback_random=True` and reset the torch rng seed between comparisons
|
||||
|
||||
|
7
.github/actionlint.yaml
vendored
7
.github/actionlint.yaml
vendored
@ -45,10 +45,15 @@ self-hosted-runner:
|
||||
- windows.g5.4xlarge.nvidia.gpu
|
||||
# Windows ARM64 runners
|
||||
- windows-11-arm64
|
||||
# Organization-wide AMD hosted runners
|
||||
# Organization-wide AMD-hosted runners
|
||||
# MI2xx runners
|
||||
- linux.rocm.gpu
|
||||
- linux.rocm.gpu.2
|
||||
- linux.rocm.gpu.4
|
||||
# MI300 runners
|
||||
- linux.rocm.gpu.mi300.2
|
||||
- linux.rocm.gpu.mi300.4
|
||||
- rocm-docker
|
||||
# Repo-specific Apple hosted runners
|
||||
- macos-m1-ultra
|
||||
- macos-m2-14
|
||||
|
70
.github/actions/binary-docker-build/action.yml
vendored
Normal file
70
.github/actions/binary-docker-build/action.yml
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
name: Binary docker build
|
||||
|
||||
description: Build docker image for binary builds
|
||||
|
||||
inputs:
|
||||
docker-image-name:
|
||||
description: Docker image name for PR builds
|
||||
required: true
|
||||
docker-build-dir:
|
||||
description: Location of the build.sh relative to .ci/docker
|
||||
required: true
|
||||
custom-tag-prefix:
|
||||
description: Custom tag prefix for the docker image
|
||||
required: false
|
||||
DOCKER_TOKEN:
|
||||
description: Docker token for authentication
|
||||
required: true
|
||||
DOCKER_ID:
|
||||
description: Docker ID for authentication
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
docker-build-dir: .ci/docker
|
||||
custom-tag-prefix: ${{ inputs.custom-tag-prefix }}
|
||||
docker-build-script: ${{ inputs.docker-build-dir }}/build.sh
|
||||
always-rebuild: true
|
||||
push: true
|
||||
|
||||
- name: Tag and (if WITH_PUSH) push docker image to docker.io
|
||||
env:
|
||||
DOCKER_TOKEN: ${{ inputs.DOCKER_TOKEN }}
|
||||
DOCKER_ID: ${{ inputs.DOCKER_ID }}
|
||||
DOCKER_IMAGE_NAME: ${{ inputs.docker-image-name }}
|
||||
DOCKER_IMAGE_PREFIX: ${{ inputs.custom-tag-prefix }}
|
||||
CREATED_FULL_DOCKER_IMAGE_NAME: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euox pipefail
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
CI_FOLDER_SHA=$(git rev-parse HEAD:.ci/docker)
|
||||
|
||||
DOCKER_IMAGE_NAME_PREFIX=docker.io/pytorch/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_PREFIX}
|
||||
|
||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}
|
||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
|
||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
|
||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
|
||||
|
||||
# Pretty sure Github will mask tokens and I'm not sure if it will even be
|
||||
# printed due to pipe, but just in case
|
||||
set +x
|
||||
if [[ ${WITH_PUSH:-false} == "true" ]]; then
|
||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}
|
||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
|
||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
|
||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
|
||||
fi
|
46
.github/actions/checkout-pytorch/action.yml
vendored
46
.github/actions/checkout-pytorch/action.yml
vendored
@ -23,9 +23,44 @@ runs:
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Clean workspace
|
||||
- name: Set up parallel fetch and clean workspace
|
||||
id: first-clean
|
||||
continue-on-error: true
|
||||
shell: bash
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
# Use all available CPUs for fetching
|
||||
cd "${GITHUB_WORKSPACE}"
|
||||
git config --global fetch.parallel 0
|
||||
git config --global submodule.fetchJobs 0
|
||||
|
||||
# Clean workspace. The default checkout action should also do this, but
|
||||
# do it here as well just in case
|
||||
if [[ -d .git ]]; then
|
||||
if [ -z "${NO_SUDO}" ]; then
|
||||
sudo git clean -ffdx
|
||||
else
|
||||
git clean -ffdx
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Checkout PyTorch
|
||||
id: first-checkout-attempt
|
||||
continue-on-error: true
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# --depth=1 for speed, manually fetch history and other refs as necessary
|
||||
fetch-depth: ${{ inputs.fetch-depth }}
|
||||
submodules: ${{ inputs.submodules }}
|
||||
show-progress: false
|
||||
|
||||
- name: Clean workspace (try again)
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
|
||||
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
|
||||
shell: bash
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
@ -40,16 +75,11 @@ runs:
|
||||
fi
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
|
||||
# Use all available CPUs for fetching
|
||||
cd "${GITHUB_WORKSPACE}"
|
||||
git config --global fetch.parallel 0
|
||||
git config --global submodule.fetchJobs 0
|
||||
|
||||
- name: Checkout PyTorch
|
||||
- name: Checkout PyTorch (try again)
|
||||
uses: actions/checkout@v4
|
||||
if: ${{ steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success' }}
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# --depth=1 for speed, manually fetch history and other refs as necessary
|
||||
fetch-depth: ${{ inputs.fetch-depth }}
|
||||
submodules: ${{ inputs.submodules }}
|
||||
show-progress: false
|
||||
|
2
.github/actions/linux-test/action.yml
vendored
2
.github/actions/linux-test/action.yml
vendored
@ -66,7 +66,7 @@ runs:
|
||||
|
||||
- name: configure aws credentials
|
||||
if : ${{ inputs.aws-role-to-assume != '' }}
|
||||
uses: aws-actions/configure-aws-credentials@v3
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ inputs.aws-role-to-assume }}
|
||||
role-session-name: gha-linux-test
|
||||
|
@ -15,7 +15,6 @@ runs:
|
||||
-e BINARY_ENV_FILE \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e DESIRED_CUDA \
|
||||
-e DESIRED_DEVTOOLSET \
|
||||
-e DESIRED_PYTHON \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e GPU_ARCH_TYPE \
|
||||
|
10
.github/actions/upload-test-artifacts/action.yml
vendored
10
.github/actions/upload-test-artifacts/action.yml
vendored
@ -48,14 +48,8 @@ runs:
|
||||
run: |
|
||||
# Remove any previous usage logs if they exist
|
||||
rm -f logs-*.zip
|
||||
# this workflow is also run in bazel build test, but we dont generate usage reports for it
|
||||
# so check to see if the file exists first
|
||||
if [ -f 'usage_log.txt' ]; then
|
||||
zip "logs-${FILE_SUFFIX}.zip" 'usage_log.txt'
|
||||
fi
|
||||
if find "test/test-reports" -name "*.log" 2>/dev/null | grep -q .; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test/test-reports -i '*.log'
|
||||
fi
|
||||
zip "logs-${FILE_SUFFIX}.zip" 'usage_log.txt' || true
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test/test-reports -i '*.log' || true
|
||||
|
||||
- name: Zip debugging artifacts for upload
|
||||
if: runner.os != 'Windows' && !inputs.use-gha
|
||||
|
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
c670ad81fda266b6598aeeef434583eb98197ae8
|
||||
bccaa454a54c3c648697cc2f46a4fb0500b1f01b
|
||||
|
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
||||
r2.7
|
||||
ac9a39f4b768cef09b9d2be8e074be496d7783b6
|
||||
|
19
.github/labeler.yml
vendored
19
.github/labeler.yml
vendored
@ -112,3 +112,22 @@
|
||||
- torch/csrc/inductor/aoti_include/xpu.h
|
||||
- torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h
|
||||
- torch/csrc/inductor/cpp_wrapper/xpu.h
|
||||
|
||||
"release notes: inductor (aoti)":
|
||||
- torch/_C/_aoti.pyi
|
||||
- torch/_dynamo/repro/aoti.py
|
||||
- torch/_export/serde/aoti_schema.py
|
||||
- torch/_higher_order_ops/aoti_call_delegate.py
|
||||
- torch/_inductor/codegen/aoti_runtime/**
|
||||
- torch/_inductor/codegen/aoti_hipify_utils.py
|
||||
- torch/_inductor/codegen/cpp_wrapper_cpu.py
|
||||
- torch/_inductor/codegen/cpp_wrapper_gpu.py
|
||||
- torch/_inductor/aoti_eager.py
|
||||
- torch/csrc/inductor/aoti_runtime/**
|
||||
- torch/csrc/inductor/aoti_torch/**
|
||||
- torch/csrc/inductor/aoti_runner/**
|
||||
- torch/csrc/inductor/aoti_eager/**
|
||||
- torch/csrc/inductor/aoti_package/**
|
||||
- torch/csrc/inductor/aoti_include/**
|
||||
- torchgen/aoti/**
|
||||
- torchgen/gen_aoti_c_shim.py
|
||||
|
3
.github/merge_rules.yaml
vendored
3
.github/merge_rules.yaml
vendored
@ -501,7 +501,9 @@
|
||||
- name: XPU
|
||||
patterns:
|
||||
- '**xpu**'
|
||||
- '**XPU**'
|
||||
- '**sycl**'
|
||||
- '**SYCL**'
|
||||
approved_by:
|
||||
- EikanWang
|
||||
- jgong5
|
||||
@ -538,6 +540,7 @@
|
||||
- bdhirsh
|
||||
- zou3519
|
||||
- isuruf
|
||||
- Chillee
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
|
2
.github/pytorch-probot.yml
vendored
2
.github/pytorch-probot.yml
vendored
@ -16,6 +16,7 @@ ciflow_push_tags:
|
||||
- ciflow/mps
|
||||
- ciflow/nightly
|
||||
- ciflow/periodic
|
||||
- ciflow/periodic-rocm-mi300
|
||||
- ciflow/rocm
|
||||
- ciflow/rocm-mi300
|
||||
- ciflow/s390
|
||||
@ -25,6 +26,7 @@ ciflow_push_tags:
|
||||
- ciflow/xpu
|
||||
- ciflow/torchbench
|
||||
- ciflow/autoformat
|
||||
- ciflow/op-benchmark
|
||||
retryable_workflows:
|
||||
- pull
|
||||
- trunk
|
||||
|
6
.github/scripts/amd/package_triton_wheel.sh
vendored
6
.github/scripts/amd/package_triton_wheel.sh
vendored
@ -61,10 +61,14 @@ fi
|
||||
ROCM_SO=(
|
||||
"${libamdhip}"
|
||||
"libhsa-runtime64.so.1"
|
||||
"libamd_comgr.so.2"
|
||||
"libdrm.so.2"
|
||||
"libdrm_amdgpu.so.1"
|
||||
)
|
||||
if [[ $ROCM_INT -ge 60400 ]]; then
|
||||
ROCM_SO+=("libamd_comgr.so.3")
|
||||
else
|
||||
ROCM_SO+=("libamd_comgr.so.2")
|
||||
fi
|
||||
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
ROCM_SO+=("librocprofiler-register.so.0")
|
||||
|
4
.github/scripts/filter_test_configs.py
vendored
4
.github/scripts/filter_test_configs.py
vendored
@ -39,9 +39,9 @@ SUPPORTED_PERIODICAL_MODES: dict[str, Callable[[Optional[str]], bool]] = {
|
||||
}
|
||||
|
||||
# The link to the published list of disabled jobs
|
||||
DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=n.FT07XR3dLMwOLBwmRNquyYSeGk8Het"
|
||||
DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
|
||||
# and unstable jobs
|
||||
UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=.Ox7WAXa21I1PVqadHyPfhMRPhl0aCnD"
|
||||
UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
|
||||
|
||||
# Some constants used to handle disabled and unstable jobs
|
||||
JOB_NAME_SEP = "/"
|
||||
|
56
.github/scripts/generate_binary_build_matrix.py
vendored
56
.github/scripts/generate_binary_build_matrix.py
vendored
@ -30,12 +30,10 @@ CUDA_ARCHES_CUDNN_VERSION = {
|
||||
}
|
||||
|
||||
# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
|
||||
ROCM_ARCHES = ["6.2.4", "6.3"]
|
||||
ROCM_ARCHES = ["6.3", "6.4"]
|
||||
|
||||
XPU_ARCHES = ["xpu"]
|
||||
|
||||
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
|
||||
|
||||
CPU_AARCH64_ARCH = ["cpu-aarch64"]
|
||||
|
||||
CPU_S390X_ARCH = ["cpu-s390x"]
|
||||
@ -77,7 +75,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.8.0.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
@ -146,8 +144,6 @@ def arch_type(arch_version: str) -> str:
|
||||
return "rocm"
|
||||
elif arch_version in XPU_ARCHES:
|
||||
return "xpu"
|
||||
elif arch_version in CPU_CXX11_ABI_ARCH:
|
||||
return "cpu-cxx11-abi"
|
||||
elif arch_version in CPU_AARCH64_ARCH:
|
||||
return "cpu-aarch64"
|
||||
elif arch_version in CPU_S390X_ARCH:
|
||||
@ -176,31 +172,23 @@ WHEEL_CONTAINER_IMAGES = {
|
||||
},
|
||||
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
|
||||
"cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
|
||||
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
|
||||
"cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
||||
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
|
||||
"cpu-s390x": "pytorch/manylinuxs390x-builder:cpu-s390x",
|
||||
}
|
||||
|
||||
CXX11_ABI = "cxx11-abi"
|
||||
RELEASE = "release"
|
||||
DEBUG = "debug"
|
||||
|
||||
LIBTORCH_CONTAINER_IMAGES: dict[tuple[str, str], str] = {
|
||||
LIBTORCH_CONTAINER_IMAGES: dict[str, str] = {
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
gpu_arch: f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
gpu_arch: f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
|
||||
"cpu": f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
|
||||
}
|
||||
|
||||
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
|
||||
@ -210,7 +198,6 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
|
||||
return {
|
||||
"cpu": "cpu",
|
||||
"cpu-aarch64": "cpu",
|
||||
"cpu-cxx11-abi": "cpu-cxx11-abi",
|
||||
"cpu-s390x": "cpu",
|
||||
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
|
||||
"cuda-aarch64": f"cu{gpu_arch_version.replace('-aarch64', '').replace('.', '')}",
|
||||
@ -225,7 +212,7 @@ def list_without(in_list: list[str], without: list[str]) -> list[str]:
|
||||
|
||||
def generate_libtorch_matrix(
|
||||
os: str,
|
||||
abi_version: str,
|
||||
release_type: str,
|
||||
arches: Optional[list[str]] = None,
|
||||
libtorch_variants: Optional[list[str]] = None,
|
||||
) -> list[dict[str, str]]:
|
||||
@ -247,9 +234,6 @@ def generate_libtorch_matrix(
|
||||
ret: list[dict[str, str]] = []
|
||||
for arch_version in arches:
|
||||
for libtorch_variant in libtorch_variants:
|
||||
# one of the values in the following list must be exactly
|
||||
# CXX11_ABI, but the precise value of the other one doesn't
|
||||
# matter
|
||||
gpu_arch_type = arch_type(arch_version)
|
||||
gpu_arch_version = "" if arch_version == "cpu" else arch_version
|
||||
# ROCm builds without-deps failed even in ROCm runners; skip for now
|
||||
@ -262,20 +246,15 @@ def generate_libtorch_matrix(
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"libtorch_config": release_type,
|
||||
"libtorch_variant": libtorch_variant,
|
||||
"libtorch_config": abi_version
|
||||
if os in ("windows", "windows-arm64")
|
||||
else "",
|
||||
"devtoolset": abi_version
|
||||
if os not in ("windows", "windows-arm64")
|
||||
else "",
|
||||
"container_image": (
|
||||
LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)]
|
||||
LIBTORCH_CONTAINER_IMAGES[arch_version]
|
||||
if os not in ("windows", "windows-arm64")
|
||||
else ""
|
||||
),
|
||||
"package_type": "libtorch",
|
||||
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
|
||||
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{release_type}".replace(
|
||||
".", "_"
|
||||
),
|
||||
}
|
||||
@ -301,7 +280,7 @@ def generate_wheels_matrix(
|
||||
# Define default compute archivectures
|
||||
arches = ["cpu"]
|
||||
if os == "linux":
|
||||
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES + XPU_ARCHES
|
||||
elif os == "linux-aarch64":
|
||||
@ -320,7 +299,6 @@ def generate_wheels_matrix(
|
||||
gpu_arch_version = (
|
||||
""
|
||||
if arch_version == "cpu"
|
||||
or arch_version == "cpu-cxx11-abi"
|
||||
or arch_version == "cpu-aarch64"
|
||||
or arch_version == "cpu-s390x"
|
||||
or arch_version == "xpu"
|
||||
@ -355,7 +333,6 @@ def generate_wheels_matrix(
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": desired_cuda,
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": "cxx11-abi",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": (
|
||||
@ -384,7 +361,6 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": "",
|
||||
@ -403,12 +379,6 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": (
|
||||
"cxx11-abi"
|
||||
if (arch_version in ["cpu-cxx11-abi", "cpu-aarch64"])
|
||||
or os == "linux"
|
||||
else ""
|
||||
),
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
|
||||
@ -417,6 +387,8 @@ def generate_wheels_matrix(
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
|
||||
if gpu_arch_type == "xpu"
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
|
||||
if os != "linux"
|
||||
else ""
|
||||
),
|
||||
}
|
||||
|
105
.github/scripts/generate_ci_workflows.py
vendored
105
.github/scripts/generate_ci_workflows.py
vendored
@ -54,7 +54,6 @@ class BinaryBuildWorkflow:
|
||||
|
||||
# Optional fields
|
||||
build_environment: str = ""
|
||||
abi_version: str = ""
|
||||
ciflow_config: CIFlowConfig = field(default_factory=CIFlowConfig)
|
||||
is_scheduled: str = ""
|
||||
branches: str = "nightly"
|
||||
@ -62,14 +61,16 @@ class BinaryBuildWorkflow:
|
||||
cross_compile_arm64: bool = False
|
||||
macos_runner: str = "macos-14-xlarge"
|
||||
use_split_build: bool = False
|
||||
# Mainly used for libtorch builds
|
||||
build_variant: str = ""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.abi_version:
|
||||
self.build_environment = (
|
||||
f"{self.os}-binary-{self.package_type}-{self.abi_version}"
|
||||
if self.build_environment == "":
|
||||
self.build_environment = "-".join(
|
||||
item
|
||||
for item in [self.os, "binary", self.package_type, self.build_variant]
|
||||
if item != ""
|
||||
)
|
||||
else:
|
||||
self.build_environment = f"{self.os}-binary-{self.package_type}"
|
||||
if self.use_split_build:
|
||||
# added to distinguish concurrency groups
|
||||
self.build_environment += "-split"
|
||||
@ -133,10 +134,9 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
@ -176,10 +176,10 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_variant=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
@ -202,7 +202,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_variant=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
@ -216,7 +216,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_variant=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
@ -227,42 +227,6 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
branches="main",
|
||||
ciflow_config=CIFlowConfig(
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
branches="main",
|
||||
ciflow_config=CIFlowConfig(
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="wheel",
|
||||
@ -279,7 +243,7 @@ WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_variant=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS_ARM64,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
@ -294,7 +258,7 @@ WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_variant=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS_ARM64,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
@ -308,14 +272,47 @@ WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||
),
|
||||
]
|
||||
|
||||
WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
build_variant=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
branches="main",
|
||||
ciflow_config=CIFlowConfig(
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
build_variant=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
branches="main",
|
||||
ciflow_config=CIFlowConfig(
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.MACOS_ARM64,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_variant=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.MACOS,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
@ -402,10 +399,6 @@ def main() -> None:
|
||||
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
|
||||
WINDOWS_BINARY_SMOKE_WORKFLOWS,
|
||||
),
|
||||
(
|
||||
jinja_env.get_template("windows_arm64_binary_build_workflow.yml.j2"),
|
||||
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS,
|
||||
),
|
||||
(
|
||||
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
|
||||
MACOS_BINARY_BUILD_WORKFLOWS,
|
||||
|
5
.github/scripts/lintrunner.sh
vendored
5
.github/scripts/lintrunner.sh
vendored
@ -1,11 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
# The generic Linux job chooses to use base env, not the one setup by the image
|
||||
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
|
||||
eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)"
|
||||
conda activate "${CONDA_ENV}"
|
||||
|
||||
# Use uv to speed up lintrunner init
|
||||
python3 -m pip install uv==0.1.45
|
||||
|
||||
|
@ -5,6 +5,50 @@ FROM --platform=linux/amd64 docker.io/ubuntu:24.04 as ld-prefix
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get -y install ca-certificates libicu74 libssl3
|
||||
|
||||
# Patched podman
|
||||
FROM --platform=linux/s390x docker.io/ubuntu:24.04 as podman
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake \
|
||||
curl \
|
||||
devscripts \
|
||||
dpkg-dev \
|
||||
gdb \
|
||||
less \
|
||||
make \
|
||||
python3 \
|
||||
python3-pip \
|
||||
quilt \
|
||||
rsync \
|
||||
software-properties-common \
|
||||
stress-ng \
|
||||
vim \
|
||||
nano \
|
||||
wget && \
|
||||
apt-get build-dep -y podman && \
|
||||
apt-get source podman
|
||||
|
||||
COPY podman-patches/podman-25245.patch /tmp/podman-25245.patch
|
||||
COPY podman-patches/podman-25102-backport.patch /tmp/podman-25102-backport.patch
|
||||
|
||||
# import and apply patches
|
||||
# patches:
|
||||
# https://github.com/containers/podman/pull/25102
|
||||
# https://github.com/containers/podman/pull/25245
|
||||
RUN cd /libpod-* && \
|
||||
quilt import /tmp/podman-25245.patch && quilt push && \
|
||||
quilt import /tmp/podman-25102-backport.patch && quilt push && \
|
||||
dch -i "Fix podman deadlock and add option to clean up build leftovers" && \
|
||||
/bin/rm /tmp/podman-25245.patch /tmp/podman-25102-backport.patch
|
||||
|
||||
# build patched podman
|
||||
RUN cd /libpod-* && \
|
||||
debuild -i -us -uc -b && \
|
||||
/bin/rm /podman-remote_*.deb && \
|
||||
mkdir /tmp/podman && cp -v /podman*.deb /tmp/podman
|
||||
|
||||
# Main image.
|
||||
FROM --platform=linux/s390x docker.io/ubuntu:24.04
|
||||
|
||||
@ -45,7 +89,11 @@ COPY fs/ /
|
||||
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
|
||||
|
||||
# install podman
|
||||
RUN apt -y install podman podman-docker
|
||||
# RUN apt-get update && apt -y install podman podman-docker
|
||||
|
||||
# install patched podman
|
||||
COPY --from=podman /tmp/podman /tmp/podman
|
||||
RUN apt-get update && apt -y install /tmp/podman/*.deb && /bin/rm -rfv /tmp/podman
|
||||
|
||||
# amd64 Github Actions Runner.
|
||||
RUN useradd -m actions-runner
|
||||
@ -65,7 +113,7 @@ RUN virtualenv --system-site-packages venv
|
||||
#
|
||||
COPY --chown=actions-runner:actions-runner manywheel-s390x.tar /home/actions-runner/manywheel-s390x.tar
|
||||
|
||||
RUN curl -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz | tar -xz
|
||||
RUN curl -L https://github.com/actions/runner/releases/download/v2.322.0/actions-runner-linux-x64-2.322.0.tar.gz | tar -xz
|
||||
|
||||
ENTRYPOINT ["/usr/bin/entrypoint"]
|
||||
CMD ["/usr/bin/actions-runner"]
|
||||
|
358
.github/scripts/s390x-ci/self-hosted-builder/podman-patches/podman-25102-backport.patch
vendored
Normal file
358
.github/scripts/s390x-ci/self-hosted-builder/podman-patches/podman-25102-backport.patch
vendored
Normal file
@ -0,0 +1,358 @@
|
||||
diff --git a/cmd/podman/system/prune.go b/cmd/podman/system/prune.go
|
||||
index f7cf7b551..739f87cde 100644
|
||||
--- a/cmd/podman/system/prune.go
|
||||
+++ b/cmd/podman/system/prune.go
|
||||
@@ -48,6 +48,7 @@ func init() {
|
||||
flags.BoolVarP(&force, "force", "f", false, "Do not prompt for confirmation. The default is false")
|
||||
flags.BoolVarP(&pruneOptions.All, "all", "a", false, "Remove all unused data")
|
||||
flags.BoolVar(&pruneOptions.External, "external", false, "Remove container data in storage not controlled by podman")
|
||||
+ flags.BoolVar(&pruneOptions.Build, "build", false, "Remove build containers")
|
||||
flags.BoolVar(&pruneOptions.Volume, "volumes", false, "Prune volumes")
|
||||
filterFlagName := "filter"
|
||||
flags.StringArrayVar(&filters, filterFlagName, []string{}, "Provide filter values (e.g. 'label=<key>=<value>')")
|
||||
@@ -64,8 +65,12 @@ func prune(cmd *cobra.Command, args []string) error {
|
||||
volumeString = `
|
||||
- all volumes not used by at least one container`
|
||||
}
|
||||
-
|
||||
- fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, "Are you sure you want to continue? [y/N] ")
|
||||
+ buildString := ""
|
||||
+ if pruneOptions.Build {
|
||||
+ buildString = `
|
||||
+ - all build containers`
|
||||
+ }
|
||||
+ fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, buildString, "Are you sure you want to continue? [y/N] ")
|
||||
|
||||
answer, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
@@ -124,7 +129,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
|
||||
if pruneOpts.All {
|
||||
return `WARNING! This command removes:
|
||||
- all stopped containers
|
||||
- - all networks not used by at least one container%s
|
||||
+ - all networks not used by at least one container%s%s
|
||||
- all images without at least one container associated with them
|
||||
- all build cache
|
||||
|
||||
@@ -132,7 +137,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
|
||||
}
|
||||
return `WARNING! This command removes:
|
||||
- all stopped containers
|
||||
- - all networks not used by at least one container%s
|
||||
+ - all networks not used by at least one container%s%s
|
||||
- all dangling images
|
||||
- all dangling build cache
|
||||
|
||||
diff --git a/docs/source/markdown/podman-system-prune.1.md b/docs/source/markdown/podman-system-prune.1.md
|
||||
index 52f9ec1c7..95099d018 100644
|
||||
--- a/docs/source/markdown/podman-system-prune.1.md
|
||||
+++ b/docs/source/markdown/podman-system-prune.1.md
|
||||
@@ -7,20 +7,28 @@ podman\-system\-prune - Remove all unused pods, containers, images, networks, an
|
||||
**podman system prune** [*options*]
|
||||
|
||||
## DESCRIPTION
|
||||
-**podman system prune** removes all unused containers (both dangling and unreferenced), pods, networks, and optionally, volumes from local storage.
|
||||
+**podman system prune** removes all unused containers (both dangling and unreferenced), build containers, pods, networks, and optionally, volumes from local storage.
|
||||
|
||||
Use the **--all** option to delete all unused images. Unused images are dangling images as well as any image that does not have any containers based on it.
|
||||
|
||||
By default, volumes are not removed to prevent important data from being deleted if there is currently no container using the volume. Use the **--volumes** flag when running the command to prune volumes as well.
|
||||
|
||||
+By default, build containers are not removed to prevent interference with builds in progress. Use the **--build** flag when running the command to remove build containers as well.
|
||||
+
|
||||
## OPTIONS
|
||||
#### **--all**, **-a**
|
||||
|
||||
Recursively remove all unused pods, containers, images, networks, and volume data. (Maximum 50 iterations.)
|
||||
|
||||
+#### **--build**
|
||||
+
|
||||
+Removes any build containers that were created during the build, but were not removed because the build was unexpectedly terminated.
|
||||
+
|
||||
+Note: **This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.**
|
||||
+
|
||||
#### **--external**
|
||||
|
||||
-Removes all leftover container storage files from local storage not managed by Podman. In normal circumstances, no such data exists, but in case of an unclean shutdown, the Podman database may be corrupted and cause this.
|
||||
+Tries to clean up remainders of previous containers or layers that are not references in the storage json files. These can happen in the case of unclean shutdowns or regular restarts in transient storage mode.
|
||||
|
||||
However, when using transient storage mode, the Podman database does not persist. This means containers leave the writable layers on disk after a reboot. When using a transient store, it is recommended that the **podman system prune --external** command is run during boot.
|
||||
|
||||
diff --git a/libpod/runtime.go b/libpod/runtime.go
|
||||
index 986e40f60..609fbba57 100644
|
||||
--- a/libpod/runtime.go
|
||||
+++ b/libpod/runtime.go
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
"github.com/containers/podman/v4/libpod/lock"
|
||||
"github.com/containers/podman/v4/libpod/plugin"
|
||||
"github.com/containers/podman/v4/libpod/shutdown"
|
||||
+ "github.com/containers/podman/v4/pkg/domain/entities/reports"
|
||||
"github.com/containers/podman/v4/pkg/rootless"
|
||||
"github.com/containers/podman/v4/pkg/systemd"
|
||||
"github.com/containers/podman/v4/pkg/util"
|
||||
@@ -1250,3 +1251,52 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) {
|
||||
|
||||
return toReturn, locksHeld, nil
|
||||
}
|
||||
+
|
||||
+// Exists checks whether a file or directory exists at the given path.
|
||||
+// If the path is a symlink, the symlink is followed.
|
||||
+func Exists(path string) error {
|
||||
+ // It uses unix.Faccessat which is a faster operation compared to os.Stat for
|
||||
+ // simply checking the existence of a file.
|
||||
+ err := unix.Faccessat(unix.AT_FDCWD, path, unix.F_OK, 0)
|
||||
+ if err != nil {
|
||||
+ return &os.PathError{Op: "faccessat", Path: path, Err: err}
|
||||
+ }
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// PruneBuildContainers removes any build containers that were created during the build,
|
||||
+// but were not removed because the build was unexpectedly terminated.
|
||||
+//
|
||||
+// Note: This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.
|
||||
+func (r *Runtime) PruneBuildContainers() ([]*reports.PruneReport, error) {
|
||||
+ stageContainersPruneReports := []*reports.PruneReport{}
|
||||
+
|
||||
+ containers, err := r.store.Containers()
|
||||
+ if err != nil {
|
||||
+ return stageContainersPruneReports, err
|
||||
+ }
|
||||
+ for _, container := range containers {
|
||||
+ path, err := r.store.ContainerDirectory(container.ID)
|
||||
+ if err != nil {
|
||||
+ return stageContainersPruneReports, err
|
||||
+ }
|
||||
+ if err := Exists(filepath.Join(path, "buildah.json")); err != nil {
|
||||
+ continue
|
||||
+ }
|
||||
+
|
||||
+ report := &reports.PruneReport{
|
||||
+ Id: container.ID,
|
||||
+ }
|
||||
+ size, err := r.store.ContainerSize(container.ID)
|
||||
+ if err != nil {
|
||||
+ report.Err = err
|
||||
+ }
|
||||
+ report.Size = uint64(size)
|
||||
+
|
||||
+ if err := r.store.DeleteContainer(container.ID); err != nil {
|
||||
+ report.Err = errors.Join(report.Err, err)
|
||||
+ }
|
||||
+ stageContainersPruneReports = append(stageContainersPruneReports, report)
|
||||
+ }
|
||||
+ return stageContainersPruneReports, nil
|
||||
+}
|
||||
diff --git a/pkg/api/handlers/libpod/system.go b/pkg/api/handlers/libpod/system.go
|
||||
index 70d4493f8..7c129b1ba 100644
|
||||
--- a/pkg/api/handlers/libpod/system.go
|
||||
+++ b/pkg/api/handlers/libpod/system.go
|
||||
@@ -22,6 +22,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
|
||||
All bool `schema:"all"`
|
||||
Volumes bool `schema:"volumes"`
|
||||
External bool `schema:"external"`
|
||||
+ Build bool `schema:"build"`
|
||||
}{}
|
||||
|
||||
if err := decoder.Decode(&query, r.URL.Query()); err != nil {
|
||||
@@ -43,6 +44,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
|
||||
Volume: query.Volumes,
|
||||
Filters: *filterMap,
|
||||
External: query.External,
|
||||
+ Build: query.Build,
|
||||
}
|
||||
report, err := containerEngine.SystemPrune(r.Context(), pruneOptions)
|
||||
if err != nil {
|
||||
diff --git a/pkg/bindings/system/types.go b/pkg/bindings/system/types.go
|
||||
index 89e093f68..b4a4ff064 100644
|
||||
--- a/pkg/bindings/system/types.go
|
||||
+++ b/pkg/bindings/system/types.go
|
||||
@@ -18,6 +18,7 @@ type PruneOptions struct {
|
||||
Filters map[string][]string
|
||||
Volumes *bool
|
||||
External *bool
|
||||
+ Build *bool
|
||||
}
|
||||
|
||||
// VersionOptions are optional options for getting version info
|
||||
diff --git a/pkg/bindings/system/types_prune_options.go b/pkg/bindings/system/types_prune_options.go
|
||||
index d00498520..5f3bd652c 100644
|
||||
--- a/pkg/bindings/system/types_prune_options.go
|
||||
+++ b/pkg/bindings/system/types_prune_options.go
|
||||
@@ -76,3 +76,18 @@ func (o *PruneOptions) GetExternal() bool {
|
||||
}
|
||||
return *o.External
|
||||
}
|
||||
+
|
||||
+// WithBuild set field Build to given value
|
||||
+func (o *PruneOptions) WithBuild(value bool) *PruneOptions {
|
||||
+ o.Build = &value
|
||||
+ return o
|
||||
+}
|
||||
+
|
||||
+// GetBuild returns value of field Build
|
||||
+func (o *PruneOptions) GetBuild() bool {
|
||||
+ if o.Build == nil {
|
||||
+ var z bool
|
||||
+ return z
|
||||
+ }
|
||||
+ return *o.Build
|
||||
+}
|
||||
diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go
|
||||
index 473db3530..f6938652a 100644
|
||||
--- a/pkg/domain/entities/system.go
|
||||
+++ b/pkg/domain/entities/system.go
|
||||
@@ -22,6 +22,7 @@ type SystemPruneOptions struct {
|
||||
Volume bool
|
||||
Filters map[string][]string `json:"filters" schema:"filters"`
|
||||
External bool
|
||||
+ Build bool
|
||||
}
|
||||
|
||||
// SystemPruneReport provides report after system prune is executed.
|
||||
diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go
|
||||
index 24ee64d29..ea3e5f203 100644
|
||||
--- a/pkg/domain/infra/abi/system.go
|
||||
+++ b/pkg/domain/infra/abi/system.go
|
||||
@@ -150,16 +150,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||
return nil
|
||||
}
|
||||
|
||||
-// SystemPrune removes unused data from the system. Pruning pods, containers, networks, volumes and images.
|
||||
+// SystemPrune removes unused data from the system. Pruning pods, containers, build container, networks, volumes and images.
|
||||
func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
|
||||
var systemPruneReport = new(entities.SystemPruneReport)
|
||||
|
||||
if options.External {
|
||||
- if options.All || options.Volume || len(options.Filters) > 0 {
|
||||
+ if options.All || options.Volume || len(options.Filters) > 0 || options.Build {
|
||||
return nil, fmt.Errorf("system prune --external cannot be combined with other options")
|
||||
}
|
||||
- err := ic.Libpod.GarbageCollect()
|
||||
- if err != nil {
|
||||
+
|
||||
+ if err := ic.Libpod.GarbageCollect(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return systemPruneReport, nil
|
||||
@@ -170,6 +170,17 @@ func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.Sys
|
||||
filters = append(filters, fmt.Sprintf("%s=%s", k, v[0]))
|
||||
}
|
||||
reclaimedSpace := (uint64)(0)
|
||||
+
|
||||
+ // Prune Build Containers
|
||||
+ if options.Build {
|
||||
+ stageContainersPruneReports, err := ic.Libpod.PruneBuildContainers()
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ reclaimedSpace += reports.PruneReportsSize(stageContainersPruneReports)
|
||||
+ systemPruneReport.ContainerPruneReports = append(systemPruneReport.ContainerPruneReports, stageContainersPruneReports...)
|
||||
+ }
|
||||
+
|
||||
found := true
|
||||
for found {
|
||||
found = false
|
||||
diff --git a/pkg/domain/infra/tunnel/system.go b/pkg/domain/infra/tunnel/system.go
|
||||
index fc82e7b2b..142a9fa5c 100644
|
||||
--- a/pkg/domain/infra/tunnel/system.go
|
||||
+++ b/pkg/domain/infra/tunnel/system.go
|
||||
@@ -19,7 +19,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||
|
||||
// SystemPrune prunes unused data from the system.
|
||||
func (ic *ContainerEngine) SystemPrune(ctx context.Context, opts entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
|
||||
- options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External)
|
||||
+ options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External).WithBuild(opts.Build)
|
||||
return system.Prune(ic.ClientCtx, options)
|
||||
}
|
||||
|
||||
diff --git a/test/e2e/prune_test.go b/test/e2e/prune_test.go
|
||||
index 01e848478..57bd5582d 100644
|
||||
--- a/test/e2e/prune_test.go
|
||||
+++ b/test/e2e/prune_test.go
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
+ "syscall"
|
||||
+ "time"
|
||||
|
||||
. "github.com/containers/podman/v4/test/utils"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
@@ -22,6 +24,11 @@ FROM scratch
|
||||
ENV test1=test1
|
||||
ENV test2=test2`
|
||||
|
||||
+var longBuildImage = fmt.Sprintf(`
|
||||
+FROM %s
|
||||
+RUN echo "Hello, World!"
|
||||
+RUN RUN echo "Please use signal 9 this will never ends" && sleep 10000s`, ALPINE)
|
||||
+
|
||||
var _ = Describe("Podman prune", func() {
|
||||
|
||||
It("podman container prune containers", func() {
|
||||
@@ -593,4 +600,63 @@ var _ = Describe("Podman prune", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(dirents).To(HaveLen(3))
|
||||
})
|
||||
+
|
||||
+ It("podman system prune --build clean up after terminated build", func() {
|
||||
+ useCustomNetworkDir(podmanTest, tempdir)
|
||||
+
|
||||
+ podmanTest.BuildImage(pruneImage, "alpine_notleaker:latest", "false")
|
||||
+
|
||||
+ create := podmanTest.Podman([]string{"create", "--name", "test", BB, "sleep", "10000"})
|
||||
+ create.WaitWithDefaultTimeout()
|
||||
+ Expect(create).Should(ExitCleanly())
|
||||
+
|
||||
+ containerFilePath := filepath.Join(podmanTest.TempDir, "ContainerFile-podman-leaker")
|
||||
+ err := os.WriteFile(containerFilePath, []byte(longBuildImage), 0755)
|
||||
+ Expect(err).ToNot(HaveOccurred())
|
||||
+
|
||||
+ build := podmanTest.Podman([]string{"build", "-f", containerFilePath, "-t", "podmanleaker"})
|
||||
+ // Build will never finish so let's wait for build to ask for SIGKILL to simulate a failed build that leaves stage containers.
|
||||
+ matchedOutput := false
|
||||
+ for range 900 {
|
||||
+ if build.LineInOutputContains("Please use signal 9") {
|
||||
+ matchedOutput = true
|
||||
+ build.Signal(syscall.SIGKILL)
|
||||
+ break
|
||||
+ }
|
||||
+ time.Sleep(100 * time.Millisecond)
|
||||
+ }
|
||||
+ if !matchedOutput {
|
||||
+ Fail("Did not match special string in podman build")
|
||||
+ }
|
||||
+
|
||||
+ // Check Intermediate image of stage container
|
||||
+ none := podmanTest.Podman([]string{"images", "-a"})
|
||||
+ none.WaitWithDefaultTimeout()
|
||||
+ Expect(none).Should(ExitCleanly())
|
||||
+ Expect(none.OutputToString()).Should(ContainSubstring("none"))
|
||||
+
|
||||
+ // Check if Container and Stage Container exist
|
||||
+ count := podmanTest.Podman([]string{"ps", "-aq", "--external"})
|
||||
+ count.WaitWithDefaultTimeout()
|
||||
+ Expect(count).Should(ExitCleanly())
|
||||
+ Expect(count.OutputToStringArray()).To(HaveLen(3))
|
||||
+
|
||||
+ prune := podmanTest.Podman([]string{"system", "prune", "--build", "-f"})
|
||||
+ prune.WaitWithDefaultTimeout()
|
||||
+ Expect(prune).Should(ExitCleanly())
|
||||
+
|
||||
+ // Container should still exist, but no stage containers
|
||||
+ count = podmanTest.Podman([]string{"ps", "-aq", "--external"})
|
||||
+ count.WaitWithDefaultTimeout()
|
||||
+ Expect(count).Should(ExitCleanly())
|
||||
+ Expect(count.OutputToString()).To(BeEmpty())
|
||||
+
|
||||
+ Expect(podmanTest.NumberOfContainers()).To(Equal(0))
|
||||
+
|
||||
+ after := podmanTest.Podman([]string{"images", "-a"})
|
||||
+ after.WaitWithDefaultTimeout()
|
||||
+ Expect(after).Should(ExitCleanly())
|
||||
+ Expect(after.OutputToString()).ShouldNot(ContainSubstring("none"))
|
||||
+ Expect(after.OutputToString()).Should(ContainSubstring("notleaker"))
|
||||
+ })
|
||||
})
|
||||
|
21
.github/scripts/s390x-ci/self-hosted-builder/podman-patches/podman-25245.patch
vendored
Normal file
21
.github/scripts/s390x-ci/self-hosted-builder/podman-patches/podman-25245.patch
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
|
||||
index 4f71d49e5c..3d74af6a6c 100644
|
||||
--- a/pkg/rootless/rootless_linux.c
|
||||
+++ b/pkg/rootless/rootless_linux.c
|
||||
@@ -658,7 +658,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
if (pipe (p) < 0)
|
||||
return -1;
|
||||
|
||||
- pid = fork ();
|
||||
+ pid = syscall_clone (SIGCHLD, NULL);
|
||||
if (pid < 0)
|
||||
{
|
||||
close (p[0]);
|
||||
@@ -689,7 +689,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
close (p[0]);
|
||||
|
||||
setsid ();
|
||||
- pid = fork ();
|
||||
+ pid = syscall_clone (SIGCHLD, NULL);
|
||||
if (pid < 0)
|
||||
_exit (EXIT_FAILURE);
|
12
.github/scripts/trymerge.py
vendored
12
.github/scripts/trymerge.py
vendored
@ -434,7 +434,7 @@ query ($owner: String!, $name: String!) {
|
||||
RE_GHSTACK_HEAD_REF = re.compile(r"^(gh/[^/]+/[0-9]+/)head$")
|
||||
RE_GHSTACK_DESC = re.compile(r"Stack.*:\r?\n(\* [^\r\n]+\r?\n)+", re.MULTILINE)
|
||||
RE_PULL_REQUEST_RESOLVED = re.compile(
|
||||
r"Pull Request resolved: "
|
||||
r"(Pull Request resolved|Pull-Request-resolved): "
|
||||
r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>[0-9]+)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
@ -819,10 +819,9 @@ class GitHubPR:
|
||||
cursor=info["reviews"]["pageInfo"]["startCursor"],
|
||||
)
|
||||
info = rc["data"]["repository"]["pullRequest"]
|
||||
reviews = {}
|
||||
for author, state in self._reviews:
|
||||
if state != "COMMENTED":
|
||||
reviews[author] = state
|
||||
reviews = {
|
||||
author: state for author, state in self._reviews if state != "COMMENTED"
|
||||
}
|
||||
return list(reviews.items())
|
||||
|
||||
def get_approved_by(self) -> list[str]:
|
||||
@ -2282,7 +2281,8 @@ def merge(
|
||||
except MandatoryChecksMissingError as ex:
|
||||
last_exception = str(ex)
|
||||
print(
|
||||
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min"
|
||||
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min",
|
||||
flush=True,
|
||||
)
|
||||
time.sleep(5 * 60)
|
||||
# Finally report timeout back
|
||||
|
3
.github/scripts/windows/build_magma.bat
vendored
3
.github/scripts/windows/build_magma.bat
vendored
@ -54,7 +54,8 @@ cmake .. -DGPU_TARGET="%GPU_TARGET%" ^
|
||||
-DCMAKE_BUILD_TYPE=%CONFIG% ^
|
||||
-DCMAKE_GENERATOR=Ninja ^
|
||||
-DCMAKE_INSTALL_PREFIX=..\install\ ^
|
||||
-DCUDA_ARCH_LIST="%CUDA_ARCH_LIST%"
|
||||
-DCUDA_ARCH_LIST="%CUDA_ARCH_LIST%" ^
|
||||
-DCMAKE_POLICY_VERSION_MINIMUM=3.5
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
cmake --build . --target install --config %CONFIG% -- -j%NUMBER_OF_PROCESSORS%
|
||||
|
2
.github/templates/common.yml.j2
vendored
2
.github/templates/common.yml.j2
vendored
@ -32,7 +32,7 @@ concurrency:
|
||||
{%- macro setup_ec2_windows() -%}
|
||||
!{{ display_ec2_information() }}
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
||||
continue-on-error: true
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
@ -53,7 +53,7 @@ jobs:
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.7
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
@ -135,7 +135,7 @@ jobs:
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
@ -147,9 +147,9 @@ jobs:
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: !{{ config["container_image"] }}
|
||||
- name: Test Pytorch binary
|
||||
@ -168,12 +168,12 @@ jobs:
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
- name: ROCm set GPU_FLAG
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: !{{ config["container_image"] }}
|
||||
- name: Test Pytorch binary
|
||||
|
@ -76,7 +76,7 @@ jobs:
|
||||
elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
|
||||
echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
- name: Populate binary env
|
||||
run: |
|
||||
# shellcheck disable=SC1091
|
||||
|
6
.github/templates/upload.yml.j2
vendored
6
.github/templates/upload.yml.j2
vendored
@ -25,9 +25,6 @@
|
||||
DOCKER_IMAGE: !{{ config["container_image"] }}
|
||||
{%- endif %}
|
||||
{%- if config["package_type"] == "manywheel" %}
|
||||
{%- if config["devtoolset"] %}
|
||||
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
|
||||
{%- endif %}
|
||||
{%- if config.use_split_build is defined %}
|
||||
use_split_build: !{{ config["use_split_build"] }}
|
||||
{%- endif %}
|
||||
@ -37,9 +34,6 @@
|
||||
LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
|
||||
{%- endif %}
|
||||
LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
|
||||
{%- if config["devtoolset"] %}
|
||||
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
|
||||
{%- endif %}
|
||||
{%- if is_windows %}
|
||||
# This is a dummy value for libtorch to work correctly with our batch scripts
|
||||
# without this value pip does not get installed for some reason
|
||||
|
@ -1,197 +0,0 @@
|
||||
{% import 'common.yml.j2' as common %}
|
||||
{% import 'upload.yml.j2' as upload %}
|
||||
|
||||
{%- block name -%}
|
||||
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: !{{ build_environment }}
|
||||
{%- endblock %}
|
||||
|
||||
{%- macro set_runner_specific_vars() -%}
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
# runner.temp variable, which we need.
|
||||
- name: Populate binary env
|
||||
shell: cmd
|
||||
run: |
|
||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||
{%- endmacro %}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- !{{ branches }}
|
||||
{%- if branches == "nightly" %}
|
||||
tags:
|
||||
# NOTE: Binary build pipelines should only get triggered on release candidate builds
|
||||
# Release candidate tags look like: v1.11.0-rc1
|
||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||
{%- endif %}
|
||||
{%- for label in ciflow_config.labels | sort %}
|
||||
{%- if loop.first and branches != "nightly" %}
|
||||
tags:
|
||||
{%- endif %}
|
||||
- '!{{ label }}/*'
|
||||
{%- endfor %}
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: !{{ build_environment }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
SKIP_ALL_TESTS: 1
|
||||
PYTORCH_ROOT: /pytorch
|
||||
DOWNLOADS_DIR: c:\temp\downloads
|
||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||
ENABLE_APL: 1
|
||||
ENABLE_OPENBLAS: 0
|
||||
MSVC_VERSION : 14.42
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.7
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
{%- for config in build_configs %}
|
||||
!{{ config["build_name"] }}-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: get-label-type
|
||||
runs-on: "windows-11-arm64"
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||
{%- endif %}
|
||||
steps:
|
||||
!{{ set_runner_specific_vars() }}
|
||||
- name: Bootstrap folders
|
||||
shell: cmd
|
||||
run: |
|
||||
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
|
||||
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Git
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch - recursive
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
- name: Bootstrap sccache
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
|
||||
- name: Bootstrap Libuv
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||
- name: Build PyTorch binary
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||
- uses: !{{ common.upload_artifact_action }}
|
||||
if: always()
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
!{{ config["build_name"] }}-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- !{{ config["build_name"] }}-build
|
||||
- get-label-type
|
||||
runs-on: "windows-11-arm64"
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
steps:
|
||||
!{{ set_runner_specific_vars() }}
|
||||
- uses: !{{ common.download_artifact_action }}
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Bootstrap Git
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||
- name: Test PyTorch binary
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||
{%- if branches == "nightly" %}
|
||||
!{{ upload.upload_binaries(config, True) }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
@ -49,13 +49,22 @@ env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
SKIP_ALL_TESTS: 1
|
||||
OS: !{{ os }}
|
||||
{%- if os == "windows-arm64" %}
|
||||
PYTORCH_ROOT: /pytorch
|
||||
DOWNLOADS_DIR: c:\temp\downloads
|
||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||
ENABLE_APL: 1
|
||||
ENABLE_OPENBLAS: 0
|
||||
MSVC_VERSION : 14.42
|
||||
{%- endif %}
|
||||
!{{ common.concurrency(build_environment) }}
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.7
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
@ -66,20 +75,79 @@ jobs:
|
||||
!{{ config["build_name"] }}-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: get-label-type
|
||||
{%- if os == "windows-arm64" %}
|
||||
runs-on: "windows-11-arm64"
|
||||
{%- else %}
|
||||
{%- if branches == "nightly" %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
|
||||
{%- else %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||
{%- endif %}
|
||||
steps:
|
||||
!{{ common.setup_ec2_windows() }}
|
||||
{%- if os == "windows-arm64" %}
|
||||
- name: Populate binary env
|
||||
shell: cmd
|
||||
run: |
|
||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||
- name: Bootstrap folders
|
||||
shell: cmd
|
||||
run: |
|
||||
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
|
||||
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Git
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch - recursive
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
- name: Bootstrap sccache
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
|
||||
- name: Bootstrap Libuv
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
|
||||
{%- else %}
|
||||
!{{ set_runner_specific_vars() }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
!{{ common.setup_ec2_windows() }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
{%- endif %}
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
@ -95,12 +163,17 @@ jobs:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
{%- if os != "windows-arm64" %}
|
||||
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
||||
{% endif %}
|
||||
!{{ config["build_name"] }}-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- !{{ config["build_name"] }}-build
|
||||
- get-label-type
|
||||
{%- if os == "windows-arm64" %}
|
||||
runs-on: "windows-11-arm64"
|
||||
{%- else %}
|
||||
{%- if config["gpu_arch_type"] == "cuda" %}
|
||||
{%- if branches == "nightly" %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
|
||||
@ -113,18 +186,61 @@ jobs:
|
||||
{%- else %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
steps:
|
||||
{%- if os == "windows-arm64" %}
|
||||
- name: Populate binary env
|
||||
shell: cmd
|
||||
run: |
|
||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Populate binary env
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
{%- else %}
|
||||
!{{ common.setup_ec2_windows() }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ set_runner_specific_vars() }}
|
||||
{%- endif %}
|
||||
- uses: !{{ common.download_artifact_action }}
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
@ -133,8 +249,10 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
||||
{%- if os != "windows-arm64" %}
|
||||
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
||||
{%- endif %}
|
||||
{%- if branches == "nightly" %}
|
||||
!{{ upload.upload_binaries(config, True) }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endfor %}
|
32
.github/workflows/_bazel-build-test.yml
vendored
32
.github/workflows/_bazel-build-test.yml
vendored
@ -33,6 +33,10 @@ on:
|
||||
default: "linux.large"
|
||||
description: Runner type
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
env:
|
||||
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
|
||||
@ -47,7 +51,7 @@ jobs:
|
||||
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.7
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: false
|
||||
@ -69,25 +73,32 @@ jobs:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# [see note: pytorch repo ref]
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.7
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
role-session-name: gha-bazel-build
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
|
||||
- name: Pull docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
@ -97,7 +108,7 @@ jobs:
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
|
||||
- name: Output disk space left
|
||||
@ -202,6 +213,13 @@ jobs:
|
||||
uses: ./.github/actions/chown-workspace
|
||||
if: always()
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_pytorch_artifacts
|
||||
role-session-name: gha-bazel-build-upload-artifacts
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Upload test artifacts
|
||||
uses: ./.github/actions/upload-test-artifacts
|
||||
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
||||
@ -209,5 +227,5 @@ jobs:
|
||||
file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.7
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
||||
if: always()
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user