Use reusable binary docker build action for almalinux, clean up script (#151483)

This is part of splitting up https://github.com/pytorch/pytorch/pull/150558 into smaller chunks, please see that for more context Use the binary docker build action from https://github.com/pytorch/pytorch/pull/151471 Change the workflow trigger to be all of .ci/docker so it will make a new image + tag whenever it changes. build script: * change to be independent of the CUDA_VERSION env var, since all the info should be in the imagename:tag * remove docker push parts since that will happen during the workflow * clean up a bit * make the build script more like the CI build script (use a temp image name) I don't think this image is actually used anywhere Also push docker image to imagename:tag, I got rid of it in the PR making the reusable workflow since I thought it was not in the original scripts but it actually is there Pull Request resolved: https://github.com/pytorch/pytorch/pull/151483 Approved by: https://github.com/ZainRizvi
2025-10-20 21:14:14 +08:00 · 2025-04-17 21:32:56 +00:00
parent 652fa451a4
commit 0129c3a4e1
3 changed files with 45 additions and 90 deletions
--- a/.ci/docker/almalinux/build.sh
+++ b/.ci/docker/almalinux/build.sh
@ -1,82 +1,60 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline

-set -eou pipefail
+set -exou pipefail

 image="$1"
 shift

 if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGE"
+  echo "Usage: $0 IMAGENAME:ARCHTAG"
  exit 1
 fi

-DOCKER_IMAGE_NAME="pytorch/${image}"
+# Go from imagename:tag to tag
+DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')

+CUDA_VERSION=""
+if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
+    # extract cuda version from image name and tag.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
+    CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
+fi

-export DOCKER_BUILDKIT=1
-TOPDIR=$(git rev-parse --show-toplevel)
-
-CUDA_VERSION=${CUDA_VERSION:-12.1}
-
-case ${CUDA_VERSION} in
+case ${DOCKER_TAG_PREFIX} in
  cpu)
    BASE_TARGET=base
-    DOCKER_TAG=cpu
    ;;
-  all)
-    BASE_TARGET=all_cuda
-    DOCKER_TAG=latest
+  cuda*)
+    BASE_TARGET=cuda${CUDA_VERSION}
    ;;
  *)
-    BASE_TARGET=cuda${CUDA_VERSION}
-    DOCKER_TAG=cuda${CUDA_VERSION}
+    echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
+    exit 1
    ;;
 esac

+# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
+# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
+sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
+sudo systemctl daemon-reload
+sudo systemctl restart docker

-(
-  set -x
-  # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-  # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-  sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-  sudo systemctl daemon-reload
-  sudo systemctl restart docker
+export DOCKER_BUILDKIT=1
+TOPDIR=$(git rev-parse --show-toplevel)
+tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')

-  docker build \
-    --target final \
-    --progress plain \
-    --build-arg "BASE_TARGET=${BASE_TARGET}" \
-    --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
-    --build-arg "DEVTOOLSET_VERSION=11" \
-    -t ${DOCKER_IMAGE_NAME} \
-    $@ \
-    -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
-    ${TOPDIR}/.ci/docker/
-)
+docker build \
+  --target final \
+  --progress plain \
+  --build-arg "BASE_TARGET=${BASE_TARGET}" \
+  --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
+  --build-arg "DEVTOOLSET_VERSION=11" \
+  -t ${tmp_tag} \
+  $@ \
+  -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
+  ${TOPDIR}/.ci/docker/

-if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
+if [ -n "${CUDA_VERSION}" ]; then
  # Test that we're using the right CUDA compiler
-  (
-    set -x
-    docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
-  )
-fi
-
-GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
-GIT_BRANCH_NAME=${GITHUB_REF##*/}
-GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
-DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
-DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
-if [[ "${WITH_PUSH:-}" == true ]]; then
-  (
-    set -x
-    docker push "${DOCKER_IMAGE_NAME}"
-    if [[ -n ${GITHUB_REF} ]]; then
-        docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
-        docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
-        docker push "${DOCKER_IMAGE_BRANCH_TAG}"
-        docker push "${DOCKER_IMAGE_SHA_TAG}"
-    fi
-  )
+  docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
 fi
--- a/.github/actions/binary-docker-build/action.yml
+++ b/.github/actions/binary-docker-build/action.yml
@ -53,6 +53,7 @@ runs:

        DOCKER_IMAGE_NAME_PREFIX=docker.io/pytorch/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_PREFIX}

+        docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}
        docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
        docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
        docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
@ -62,6 +63,7 @@ runs:
        set +x
        if [[ ${WITH_PUSH:-false} == "true" ]]; then
          echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          docker push ${DOCKER_IMAGE_NAME_PREFIX}
          docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
          docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
          docker push ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
--- a/.github/workflows/build-almalinux-images.yml
+++ b/.github/workflows/build-almalinux-images.yml
@ -11,14 +11,14 @@ on:
      # Release candidate tags look like: v1.11.0-rc1
      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
    paths:
-      - '.ci/docker/almalinux/*'
-      - '.ci/docker/common/*'
+      - .ci/docker/**
      - .github/workflows/build-almalinux-images.yml
+      - .github/actions/binary-docker-build/**
  pull_request:
    paths:
-      - '.ci/docker/almalinux/*'
-      - '.ci/docker/common/*'
+      - .ci/docker/**
      - .github/workflows/build-almalinux-images.yml
+      - .github/actions/binary-docker-build/**

 env:
  DOCKER_REGISTRY: "docker.io"
@ -37,37 +37,12 @@ jobs:
    strategy:
      matrix:
        cuda_version: ["11.8", "12.4", "12.6", "cpu"]
-    env:
-      CUDA_VERSION: ${{ matrix.cuda_version }}
    steps:
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+      - name: Build docker image
+        uses: pytorch/pytorch/.github/actions/binary-docker-build@main
        with:
-          submodules: false
-      - name: Calculate docker image
-        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-        with:
-            docker-image-name: almalinux-builder${{ matrix.cuda_version == 'cpu' && '-' || '-cuda' }}${{matrix.cuda_version}}
-            docker-build-dir:  .ci/docker/almalinux
-            always-rebuild: true
-            push: true
-      - name: Authenticate if WITH_PUSH
-        if: env.WITH_PUSH == 'true'
-        env:
+          docker-image-name: almalinux-builder
+          custom-tag-prefix: ${{ matrix.cuda_version != 'cpu' && 'cuda' || '' }}${{matrix.cuda_version}}
+          docker-build-dir: almalinux
          DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
          DOCKER_ID: ${{ secrets.DOCKER_ID }}
-        run: |
-          if [[ "${WITH_PUSH}" == true ]]; then
-            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
-          fi
-      - name: Build Docker Image
-        if: env.WITH_PUSH == 'true'
-        uses: nick-fields/retry@v3.0.0
-        with:
-          shell: bash
-          timeout_minutes: 90
-          max_attempts: 3
-          retry_wait_seconds: 90
-          command: |
-            .ci/docker/almalinux/build.sh almalinux-builder${{ matrix.cuda_version == 'cpu' && ':' || ':cuda' }}${{matrix.cuda_version}}