Add CUDA 12.1 CI workflows (#98832)

Adds CUDA 12.1 CI workflows, removes CUDA 11.7. CC @malfet Pull Request resolved: https://github.com/pytorch/pytorch/pull/98832 Approved by: https://github.com/atalman
2025-10-20 21:14:14 +08:00 · 2023-05-01 16:25:53 +00:00
parent 3edff6b6ec
commit 73645a8412
10 changed files with 55 additions and 53 deletions
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -81,15 +81,15 @@ fi
 # CMake 3.18 is needed to support CUDA17 language variant
 CMAKE_VERSION=3.18.5

-_UCX_COMMIT=31e74cac7bee0ef66bef2af72e7d86d9c282e5ab
-_UCC_COMMIT=1c7a7127186e7836f73aafbd7697bbc274a77eee
+_UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af
+_UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea

 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
 # from scratch
 case "$image" in
-  pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
-    CUDA_VERSION=11.7.0
+  pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7)
+    CUDA_VERSION=12.1.0
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=7
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -4,9 +4,9 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
    mkdir tmp_cudnn && cd tmp_cudnn
    CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
-    if [[ ${CUDA_VERSION:0:4} == "11.7" ]]; then
-        CUDNN_NAME="cudnn-linux-x86_64-8.5.0.96_cuda11-archive"
-        curl --retry 3 -OLs https://ossci-linux.s3.amazonaws.com/${CUDNN_NAME}.tar.xz
+    if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-8.8.1.3_cuda12-archive"
+        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/c/${CUDNN_NAME}.tar.xz
    elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -137,6 +137,7 @@ RUN rm install_cudnn.sh
 # Delete /usr/local/cuda-11.X/cuda-11.X symlinks
 RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
 RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
+RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi

 USER jenkins
 CMD ["bash"]
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -40,8 +40,9 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
  if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
    # TODO: there is a linking issue when building with UCC using clang,
    # disable it for now and to be fix later.
-    export USE_UCC=1
-    export USE_SYSTEM_UCC=1
+    # TODO: disable UCC temporarily to enable CUDA 12.1 in CI
+    export USE_UCC=0
+    export USE_SYSTEM_UCC=0
  fi
 fi

--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -147,7 +147,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
        package_type="manywheel",
        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
            OperatingSystem.LINUX,
-            arches=["11.7", "12.1"],
+            arches=["11.8", "12.1"],
            python_versions=["3.8"],
            gen_special_an_non_special_wheel=False,
        ),
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@ -33,7 +33,7 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
+          - docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
          - docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
          - docker-image-name: pytorch-linux-bionic-py3.8-clang9
          - docker-image-name: pytorch-linux-bionic-py3.11-clang9
--- a/.github/workflows/generated-linux-binary-manywheel-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-main.yml
@ -31,7 +31,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  manywheel-py3_8-cuda11_7-with-pypi-cudnn-build:
+  manywheel-py3_8-cuda11_8-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    with:
@ -40,20 +40,19 @@ jobs:
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: cu117
-      GPU_ARCH_VERSION: 11.7
+      DESIRED_CUDA: cu118
+      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.7
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8
      DESIRED_PYTHON: "3.8"
-      build_name: manywheel-py3_8-cuda11_7-with-pypi-cudnn
+      build_name: manywheel-py3_8-cuda11_8
      build_environment: linux-binary-manywheel
-      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.7.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.7.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.7.101; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.5.0.96; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.10.3.66; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.2.10.91; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.0.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.4.91; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.14.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.7.91; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}

-  manywheel-py3_8-cuda11_7-with-pypi-cudnn-test:  # Testing
+  manywheel-py3_8-cuda11_8-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: manywheel-py3_8-cuda11_7-with-pypi-cudnn-build
+    needs: manywheel-py3_8-cuda11_8-build
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
@ -61,12 +60,12 @@ jobs:
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: cu117
-      GPU_ARCH_VERSION: 11.7
+      DESIRED_CUDA: cu118
+      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.7
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8
      DESIRED_PYTHON: "3.8"
-      build_name: manywheel-py3_8-cuda11_7-with-pypi-cudnn
+      build_name: manywheel-py3_8-cuda11_8
      build_environment: linux-binary-manywheel
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@ -37,12 +37,12 @@ jobs:
      docker-image: ${{ needs.parallelnative-linux-focal-py3_8-gcc7-build.outputs.docker-image }}
      test-matrix: ${{ needs.parallelnative-linux-focal-py3_8-gcc7-build.outputs.test-matrix }}

-  linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build:
-    name: cuda11.7-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
+  linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build:
+    name: cuda12.1-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
    uses: ./.github/workflows/_linux-build.yml
    with:
-      build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
-      docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
+      build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
+      docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
      cuda-arch-list: '8.6'
      test-matrix: |
        { include: [
@ -60,14 +60,14 @@ jobs:
          { config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
        ]}

-  linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-test:
-    name: cuda11.7-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
+  linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-test:
+    name: cuda12.1-py3.10-gcc7-sm86-periodic-dynamo-benchmarks
    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build
+    needs: linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build
    with:
-      build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
-      docker-image: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
+      build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
+      docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-periodic-dynamo-benchmarks-build.outputs.test-matrix }}

  linux-bionic-cuda11_8-py3_9-gcc7-build:
    name: linux-bionic-cuda11.8-py3.9-gcc7
--- a/.github/workflows/slow.yml
+++ b/.github/workflows/slow.yml
@ -17,12 +17,12 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build:
-    name: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
+  linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build:
+    name: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
    uses: ./.github/workflows/_linux-build.yml
    with:
-      build-environment: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
-      docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
+      build-environment: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
+      docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
      test-matrix: |
        { include: [
          { config: "default", shard: 1, num_shards: 4, runner: "linux.4xlarge.nvidia.gpu" },
@ -31,22 +31,22 @@ jobs:
          { config: "default", shard: 4, num_shards: 4, runner: "linux.4xlarge.nvidia.gpu" },
        ]}

-  linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-test:
-    name: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
+  linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-test:
+    name: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build
+    needs: linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build
    with:
-      build-environment: linux-bionic-cuda11.7-py3-gcc7-slow-gradcheck
-      docker-image: ${{ needs.linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-bionic-cuda11_7-py3-gcc7-slow-gradcheck-build.outputs.test-matrix }}
+      build-environment: linux-bionic-cuda12.1-py3-gcc7-slow-gradcheck
+      docker-image: ${{ needs.linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-bionic-cuda12_1-py3-gcc7-slow-gradcheck-build.outputs.test-matrix }}
      timeout-minutes: 300

-  linux-bionic-cuda11_7-py3_10-gcc7-sm86-build:
-    name: linux-bionic-cuda11.7-py3.10-gcc7-sm86
+  linux-bionic-cuda12_1-py3_10-gcc7-sm86-build:
+    name: linux-bionic-cuda12.1-py3.10-gcc7-sm86
    uses: ./.github/workflows/_linux-build.yml
    with:
-      build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
-      docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
+      build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
+      docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
      cuda-arch-list: 8.6
      test-matrix: |
        { include: [
@ -54,14 +54,14 @@ jobs:
          { config: "slow", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
        ]}

-  linux-bionic-cuda11_7-py3_10-gcc7-sm86-test:
-    name: linux-bionic-cuda11.7-py3.10-gcc7-sm86
+  linux-bionic-cuda12_1-py3_10-gcc7-sm86-test:
+    name: linux-bionic-cuda12.1-py3.10-gcc7-sm86
    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-bionic-cuda11_7-py3_10-gcc7-sm86-build
+    needs: linux-bionic-cuda12_1-py3_10-gcc7-sm86-build
    with:
-      build-environment: linux-bionic-cuda11.7-py3.10-gcc7-sm86
-      docker-image: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-sm86-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-bionic-cuda11_7-py3_10-gcc7-sm86-build.outputs.test-matrix }}
+      build-environment: linux-bionic-cuda12.1-py3.10-gcc7-sm86
+      docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-sm86-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-sm86-build.outputs.test-matrix }}

  linux-bionic-py3_8-clang9-build:
    name: linux-bionic-py3.8-clang9
--- a/test/run_test.py
+++ b/test/run_test.py
@ -385,6 +385,7 @@ if dist.is_available():
            "UCX_TLS": "tcp",
            "UCC_TLS": "nccl,ucp",
            "UCC_TL_UCP_TUNE": "cuda:0",  # don't use UCP TL on CUDA as it is not well supported
+            "UCC_EC_CUDA_USE_COOPERATIVE_LAUNCH": "n",  # CI nodes (M60) fail if it is on
        }

 # https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python