[releng] Docker release, Change docker tag. Refactor Push nightly tags step. Move cuda and cudnn version to docker tag rather then name (#116098 )

* [releng] Docker Official release make sure cuda version is part of image name (#116070) Follow up on https://github.com/pytorch/pytorch/pull/115949 Change docker build image name: ``pytorch:2.1.2-devel``-> ``2.1.2-cuda12.1-cudnn8-devel and 2.1.2-cuda11.8-cudnn8-devel`` Ref: https://github.com/orgs/pytorch/packages/container/package/pytorch-nightly Naming will be same as in https://hub.docker.com/r/pytorch/pytorch/tags Pull Request resolved: https://github.com/pytorch/pytorch/pull/116070 Approved by: https://github.com/huydhn, https://github.com/seemethere * [releng] Docker release Refactor Push nightly tags step. Move cuda and cudnn version to docker tag rather then name (#116097) Follow up after : https://github.com/pytorch/pytorch/pull/116070 This PR does 2 things. 1. Refactor Push nightly tags step, don't need to extract CUDA_VERSION anymore. New tag should be in this format: ``${PYTORCH_VERSION}-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime`` 2. Move cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION) from docker name to tag Pull Request resolved: https://github.com/pytorch/pytorch/pull/116097 Approved by: https://github.com/jeanschmidt * [release only change] hardcode pytorch 2.1.2 version
Use matrix generate script for docker release workflows (#115949 ) (#116063 )
2025-10-25 16:14:55 +08:00 · 2023-12-19 09:35:23 -05:00 · 2023-12-18 17:59:34 -05:00
5 changed files with 87 additions and 17 deletions
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -16,6 +16,12 @@ from typing import Dict, List, Optional, Tuple
 CUDA_ARCHES = ["11.8", "12.1"]


+CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1"}
+
+
+CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8"}
+
+
 ROCM_ARCHES = ["5.5", "5.6"]


--- a/.github/scripts/generate_docker_release_matrix.py
+++ b/.github/scripts/generate_docker_release_matrix.py
@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+"""Generates a matrix for docker releases through github actions
+
+Will output a condensed version of the matrix. Will include fllowing:
+    * CUDA version short
+    * CUDA full verison
+    * CUDNN version short
+    * Image type either runtime or devel
+    * Platform linux/arm64,linux/amd64
+
+"""
+
+import json
+from typing import Dict, List
+
+import generate_binary_build_matrix
+
+DOCKER_IMAGE_TYPES = ["runtime", "devel"]
+
+
+def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
+    ret: List[Dict[str, str]] = []
+    for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
+        for image in DOCKER_IMAGE_TYPES:
+            platform = (
+                "linux/arm64,linux/amd64" if image == "runtime" else "linux/amd64"
+            )
+            ret.append(
+                {
+                    "cuda": cuda,
+                    "cuda_full_version": version,
+                    "cudnn_version": generate_binary_build_matrix.CUDA_ARCHES_CUDNN_VERSION[
+                        cuda
+                    ],
+                    "image_type": image,
+                    "platform": platform,
+                }
+            )
+    return {"include": ret}
+
+
+if __name__ == "__main__":
+    build_matrix = generate_docker_matrix()
+    print(json.dumps(build_matrix))
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -39,23 +39,40 @@ env:
  WITH_PUSH: ${{ inputs.channel == 'release' }}

 jobs:
+  generate-matrix:
+    if: github.repository_owner == 'pytorch'
+    runs-on: [self-hosted, linux.large]
+    outputs:
+      matrix: ${{ steps.generate-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout PyTorch
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        with:
+          fetch-depth: 1
+          submodules: true
+      - name: Get docker release matrix
+        id: generate-matrix
+        run: |
+          MATRIX_BLOB="$(python3 .github/scripts/generate_docker_release_matrix.py)"
+          echo "${MATRIX_BLOB}"
+          echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}"
+
  build:
    if: ${{ github.repository == 'pytorch/pytorch' }}
    runs-on: [self-hosted, linux.2xlarge]
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
    timeout-minutes: 240
+    needs: generate-matrix
    strategy:
-      matrix:
-        include:
-          # nvidia specific images don't exist for arm64 so only build the runtime image
-          - image_type: runtime
-            platform: linux/arm64,linux/amd64
-          - image_type: devel
-            platform: linux/amd64
+      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
+      fail-fast: false
    env:
      BUILD_IMAGE_TYPE: ${{ matrix.image_type }}
      BUILD_PLATFORMS: ${{ matrix.platform }}
      CHANNEL: ${{ inputs.channel }}
+      CUDA_VERSION: ${{ matrix.cuda_full_version }}
+      CUDA_VERSION_SHORT: ${{ matrix.cuda }}
+      CUDNN_VERSION: ${{ matrix.cudnn_version }}
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
@ -92,7 +109,7 @@ jobs:
          # To get QEMU binaries in our PATH
          echo "${RUNNER_TEMP}/bin" >> "${GITHUB_PATH}"
          # Generate PyTorch version to use without suffix
-          echo "PYTORCH_VERSION=$(python3 .github/scripts/generate_pytorch_version.py --no-build-suffix)" >> "${GITHUB_ENV}"
+          echo "PYTORCH_VERSION=2.1.2" >> "${GITHUB_ENV}"

      - name: Setup release specific variables
        run: |
@ -110,10 +127,11 @@ jobs:
      - name: Push nightly tags
        if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
        run: |
-          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
-          CUDA_VERSION=$(python3 -c "import re;print(re.search('CUDA_VERSION\s+=\s+([0-9\.]+)',open('docker.Makefile').read())[1],end='')")
+          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime"
+
          PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
                                          python -c 'import torch; print(torch.version.git_version[:7],end="")')
+
          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
                 ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
          docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
--- a/2
+++ b/2
@ -63,7 +63,7 @@ RUN --mount=type=cache,target=/opt/ccache \

 FROM conda as conda-installs
 ARG PYTHON_VERSION=3.8
-ARG CUDA_VERSION=11.7
+ARG CUDA_VERSION=12.1
 ARG CUDA_CHANNEL=nvidia
 ARG INSTALL_CHANNEL=pytorch-nightly
 # Automatically set by buildx
--- a/docker.Makefile
+++ b/docker.Makefile
@ -8,8 +8,9 @@ $(warning WARNING: No docker user found using results from whoami)
 DOCKER_ORG                = $(shell whoami)
 endif

-CUDA_VERSION              = 12.1.1
-CUDNN_VERSION             = 8
+CUDA_VERSION_SHORT       ?= 12.1
+CUDA_VERSION             ?= 12.1.1
+CUDNN_VERSION            ?= 8
 BASE_RUNTIME              = ubuntu:20.04
 BASE_DEVEL                = nvidia/cuda:$(CUDA_VERSION)-cudnn$(CUDNN_VERSION)-devel-ubuntu20.04
 CMAKE_VARS               ?=
@ -71,25 +72,25 @@ all: devel-image

 .PHONY: devel-image
 devel-image: BASE_IMAGE := $(BASE_DEVEL)
-devel-image: DOCKER_TAG := $(PYTORCH_VERSION)-devel
+devel-image: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-devel
 devel-image:
 	$(DOCKER_BUILD)

 .PHONY: devel-push
 devel-push: BASE_IMAGE := $(BASE_DEVEL)
-devel-push: DOCKER_TAG := $(PYTORCH_VERSION)-devel
+devel-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-devel
 devel-push:
 	$(DOCKER_PUSH)

 .PHONY: runtime-image
 runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
-runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
+runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime
 runtime-image:
 	$(DOCKER_BUILD)

 .PHONY: runtime-push
 runtime-push: BASE_IMAGE := $(BASE_RUNTIME)
-runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
+runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime
 runtime-push:
 	$(DOCKER_PUSH)