mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Add CUDA 12.1 CI workflows (#98832)
Adds CUDA 12.1 CI workflows, removes CUDA 11.7. CC @malfet Pull Request resolved: https://github.com/pytorch/pytorch/pull/98832 Approved by: https://github.com/atalman
This commit is contained in:
committed by
PyTorch MergeBot
parent
3edff6b6ec
commit
73645a8412
@ -81,15 +81,15 @@ fi
|
||||
# CMake 3.18 is needed to support CUDA17 language variant
|
||||
CMAKE_VERSION=3.18.5
|
||||
|
||||
_UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
|
||||
_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
|
||||
_UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af
|
||||
_UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea
|
||||
|
||||
# It's annoying to rename jobs every time you want to rewrite a
|
||||
# configuration, so we hardcode everything here rather than do it
|
||||
# from scratch
|
||||
case "$image" in
|
||||
pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.7.0
|
||||
pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=12.1.0
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
|
@ -4,9 +4,9 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
|
||||
if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
|
||||
curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.8.1.3_cuda12-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/c/${CUDNN_NAME}.tar.xz
|
||||
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
|
||||
|
@ -137,6 +137,7 @@ RUN rm install_cudnn.sh
|
||||
# Delete /usr/local/cuda-11.X/cuda-11.X symlinks
|
||||
RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
|
||||
RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
|
||||
RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
@ -40,8 +40,9 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
# TODO: there is a linking issue when building with UCC using clang,
|
||||
# disable it for now and to be fix later.
|
||||
export USE_UCC=1
|
||||
export USE_SYSTEM_UCC=1
|
||||
# TODO: disable UCC temporarily to enable CUDA 12.1 in CI
|
||||
export USE_UCC=0
|
||||
export USE_SYSTEM_UCC=0
|
||||
fi
|
||||
fi
|
||||
|
||||
|
2
.github/scripts/generate_ci_workflows.py
vendored
2
.github/scripts/generate_ci_workflows.py
vendored
@ -147,7 +147,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
package_type="manywheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
arches=["11.7", "12.1"],
|
||||
arches=["11.8", "12.1"],
|
||||
python_versions=["3.8"],
|
||||
gen_special_an_non_special_wheel=False,
|
||||
),
|
||||
|
2
.github/workflows/docker-builds.yml
vendored
2
.github/workflows/docker-builds.yml
vendored
@ -33,7 +33,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
|
||||
- docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
|
||||
- docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
|
||||
- docker-image-name: pytorch-linux-bionic-py3.8-clang9
|
||||
- docker-image-name: pytorch-linux-bionic-py3.11-clang9
|
||||
|
23
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
23
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -31,7 +31,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
manywheel-py3_8-cuda11_7-with-pypi-cudnn-build:
|
||||
manywheel-py3_8-cuda11_8-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
@ -40,20 +40,19 @@ jobs:
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu117
|
||||
GPU_ARCH_VERSION: 11.7
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.7
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8
|
||||
DESIRED_PYTHON: "3.8"
|
||||
build_name: manywheel-py3_8-cuda11_7-with-pypi-cudnn
|
||||
build_name: manywheel-py3_8-cuda11_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.7.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.7.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.7.101; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.5.0.96; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.10.3.66; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.2.10.91; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.0.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.4.91; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.14.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.7.91; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
manywheel-py3_8-cuda11_7-with-pypi-cudnn-test: # Testing
|
||||
manywheel-py3_8-cuda11_8-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_8-cuda11_7-with-pypi-cudnn-build
|
||||
needs: manywheel-py3_8-cuda11_8-build
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -61,12 +60,12 @@ jobs:
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu117
|
||||
GPU_ARCH_VERSION: 11.7
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.7
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8
|
||||
DESIRED_PYTHON: "3.8"
|
||||
build_name: manywheel-py3_8-cuda11_7-with-pypi-cudnn
|
||||
build_name: manywheel-py3_8-cuda11_8
|
||||
build_environment: linux-binary-manywheel
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
|
20
.github/workflows/periodic.yml
vendored
20
.github/workflows/periodic.yml
vendored
@ -37,12 +37,12 @@ jobs:
|
||||
docker-image: ${{ needs.parallelnative-linux-focal-py3_8-gcc7-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.parallelnative-linux-focal-py3_8-gcc7-build.outputs.test-matrix }}
|
||||
|
||||
linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build:
|
||||
name: cuda11.7-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
|
||||
linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build:
|
||||
name: cuda12.1-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
|
||||
build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
|
||||
cuda-arch-list: '8.6'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -60,14 +60,14 @@ jobs:
|
||||
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-test:
|
||||
name: cuda11.7-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
|
||||
linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-test:
|
||||
name: cuda12.1-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build
|
||||
needs: linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
docker-image: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
|
||||
build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
|
||||
|
||||
linux-bionic-cuda11_8-py3_9-gcc7-build:
|
||||
name: linux-bionic-cuda11.8-py3.9-gcc7
|
||||
|
40
.github/workflows/slow.yml
vendored
40
.github/workflows/slow.yml
vendored
@ -17,12 +17,12 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build:
|
||||
name: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
|
||||
linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build:
|
||||
name: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
|
||||
docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
|
||||
build-environment: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
|
||||
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "linux.4xlarge.nvidia.gpu" },
|
||||
@ -31,22 +31,22 @@ jobs:
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "linux.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-test:
|
||||
name: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
|
||||
linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-test:
|
||||
name: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build
|
||||
needs: linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
|
||||
docker-image: ${{ needs.linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build.outputs.test-matrix }}
|
||||
build-environment: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
|
||||
docker-image: ${{ needs.linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build.outputs.test-matrix }}
|
||||
timeout-minutes: 300
|
||||
|
||||
linux-bionic-cuda11_7-py3_10-gcc7-sm86-build:
|
||||
name: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
linux-bionic-cuda12_1-py3_10-gcc7-sm86-build:
|
||||
name: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
|
||||
build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
|
||||
cuda-arch-list: 8.6
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -54,14 +54,14 @@ jobs:
|
||||
{ config: "slow", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-bionic-cuda11_7-py3_10-gcc7-sm86-test:
|
||||
name: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
linux-bionic-cuda12_1-py3_10-gcc7-sm86-test:
|
||||
name: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-bionic-cuda11_7-py3_10-gcc7-sm86-build
|
||||
needs: linux-bionic-cuda12_1-py3_10-gcc7-sm86-build
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
|
||||
docker-image: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-sm86-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-sm86-build.outputs.test-matrix }}
|
||||
build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
|
||||
docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-sm86-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-sm86-build.outputs.test-matrix }}
|
||||
|
||||
linux-bionic-py3_8-clang9-build:
|
||||
name: linux-bionic-py3.8-clang9
|
||||
|
@ -385,6 +385,7 @@ if dist.is_available():
|
||||
"UCX_TLS": "tcp",
|
||||
"UCC_TLS": "nccl,ucp",
|
||||
"UCC_TL_UCP_TUNE": "cuda:0", # don't use UCP TL on CUDA as it is not well supported
|
||||
"UCC_EC_CUDA_USE_COOPERATIVE_LAUNCH": "n", # CI nodes (M60) fail if it is on
|
||||
}
|
||||
|
||||
# https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
|
||||
|
Reference in New Issue
Block a user