Separate arm64 and amd64 docker builds (#125617)

Fixes https://github.com/pytorch/pytorch/issues/125094 Please note: Docker CUDa 12.4 failure is existing issue, related to docker image not being available on gitlab: ``` docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: docker.io/nvidia/cuda:12.4.0-cudnn8-devel-ubuntu22.04: not found ``` https://github.com/pytorch/pytorch/actions/runs/8974959068/job/24648540236?pr=125617 Here is the reference issue: https://gitlab.com/nvidia/container-images/cuda/-/issues/225 Tracked on our side: https://github.com/pytorch/builder/issues/1811 Pull Request resolved: https://github.com/pytorch/pytorch/pull/125617 Approved by: https://github.com/huydhn, https://github.com/malfet
2025-10-20 21:14:14 +08:00 · 2024-05-07 11:50:54 +00:00
parent 5dee46266a
commit b29d77b54f
3 changed files with 47 additions and 6 deletions
--- a/.github/scripts/generate_docker_release_matrix.py
+++ b/.github/scripts/generate_docker_release_matrix.py
@ -21,6 +21,8 @@ DOCKER_IMAGE_TYPES = ["runtime", "devel"]

 def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
    ret: List[Dict[str, str]] = []
+    # CUDA amd64 Docker images are available as both runtime and devel while
+    # CPU arm64 image is only available as runtime.
    for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
        for image in DOCKER_IMAGE_TYPES:
            ret.append(
@ -31,9 +33,19 @@ def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
                        cuda
                    ],
                    "image_type": image,
-                    "platform": "linux/arm64,linux/amd64",
+                    "platform": "linux/amd64",
                }
            )
+    ret.append(
+        {
+            "cuda": "cpu",
+            "cuda_full_version": "",
+            "cudnn_version": "",
+            "image_type": "runtime",
+            "platform": "linux/arm64",
+        }
+    )
+
    return {"include": ret}


--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -7,6 +7,7 @@ on:
      - Dockerfile
      - docker.Makefile
      - .github/workflows/docker-release.yml
+      - .github/scripts/generate_docker_release_matrix.py
  push:
    branches:
      - nightly
@ -129,17 +130,27 @@ jobs:
        if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
        run: |
          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
+          CUDA_SUFFIX="-cu${CUDA_VERSION}"
+          if [[ ${CUDA_VERSION_SHORT} == "cpu" ]]; then
+            PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
+            CUDA_SUFFIX=""
+          fi

          PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
                                          python -c 'import torch; print(torch.version.git_version[:7],end="")')

          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
-                 ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
-          docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
+                 ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
+
+          docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
+
+          # Please note, here we ned to pin specific verison of CUDA as with latest label
+          if [[ ${CUDA_VERSION_SHORT} == "12.1" ]]; then
+            docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}" \
+                    ghcr.io/pytorch/pytorch-nightly:latest
+            docker push ghcr.io/pytorch/pytorch-nightly:latest
+          fi

-          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}" \
-                 ghcr.io/pytorch/pytorch-nightly:latest
-          docker push ghcr.io/pytorch/pytorch-nightly:latest
      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
        if: always()
--- a/docker.Makefile
+++ b/docker.Makefile
@ -83,6 +83,22 @@ devel-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CU
 devel-push:
 	$(DOCKER_PUSH)

+ifeq ("$(CUDA_VERSION_SHORT)","cpu")
+
+.PHONY: runtime-image
+runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
+runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
+runtime-image:
+	$(DOCKER_BUILD)
+
+.PHONY: runtime-push
+runtime-push: BASE_IMAGE := $(BASE_RUNTIME)
+runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-runtime
+runtime-push:
+	$(DOCKER_PUSH)
+
+else
+
 .PHONY: runtime-image
 runtime-image: BASE_IMAGE := $(BASE_RUNTIME)
 runtime-image: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(CUDNN_VERSION)-runtime
@ -95,6 +111,8 @@ runtime-push: DOCKER_TAG := $(PYTORCH_VERSION)-cuda$(CUDA_VERSION_SHORT)-cudnn$(
 runtime-push:
 	$(DOCKER_PUSH)

+endif
+
 .PHONY: clean
 clean:
 	-docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME))