CI: add aarch64 linux workflow (#121284)

aarch64 linux workflow is triggered for ciflow/aarch64 tags. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121284 Approved by: https://github.com/atalman, https://github.com/malfet
2025-10-20 21:14:14 +08:00 · 2024-04-29 18:25:37 +00:00
parent ae13c7e593
commit 32cf04cb7f
8 changed files with 75 additions and 4 deletions
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -306,6 +306,12 @@ case "$image" in
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
+    # snadampal: skipping sccache due to the following issue
+    # https://github.com/pytorch/pytorch/issues/121559
+    SKIP_SCCACHE_INSTALL=yes
+    # snadampal: skipping llvm src build install because the current version
+    # from pytorch/llvm:9.0.1 is x86 specific
+    SKIP_LLVM_SRC_BUILD_INSTALL=yes
    ;;
  *)
    # Catch-all for builds that are not hardcoded.
@ -399,6 +405,8 @@ DOCKER_BUILDKIT=1 docker build \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
       --build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
       --build-arg "ACL=${ACL:-}" \
+       --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
+       --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -263,10 +263,11 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
 #Pinned versions:
 #test that import:

-#wheel not found on aarch64, and source build requires rust
 lintrunner==0.10.7 ; platform_machine == "x86_64"
+#lintrunner is supported on aarch64-linux only from 0.12.4 version
+lintrunner==0.12.5 ; platform_machine == "aarch64"
 #Description: all about linters!
-#Pinned versions: 0.10.7
+#Pinned versions: 0.10.7 on x86 and 0.12.5 on aarch64
 #test that import:

 rockset==1.0.3
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -169,9 +169,11 @@ RUN rm install_acl.sh
 ENV INSTALLED_ACL ${ACL}

 # Install ccache/sccache (do this last, so we get priority in PATH)
+ARG SKIP_SCCACHE_INSTALL
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
-RUN bash ./install_cache.sh && rm install_cache.sh
+RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
+RUN rm install_cache.sh

 # Add jni.h for java host build
 COPY ./common/install_jni.sh install_jni.sh
@ -188,7 +190,9 @@ ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

 # Install LLVM dev version (Defined in the pytorch/builder github repository)
+ARG SKIP_LLVM_SRC_BUILD_INSTALL
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
+RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi

 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -376,4 +376,8 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
  python tools/stats/export_test_times.py
 fi

-print_sccache_stats
+# snadampal: skipping it till sccache support added for aarch64
+# https://github.com/pytorch/pytorch/issues/121559
+if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
+  print_sccache_stats
+fi
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -181,6 +181,11 @@ if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
  export PATH="$HOME/.local/bin:$PATH"
 fi

+if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
+  # TODO: revisit this once the CI is stabilized on aarch64 linux
+  export VALGRIND=OFF
+fi
+
 install_tlparse

 # DANGER WILL ROBINSON.  The LD_PRELOAD here could cause you problems
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -8,6 +8,7 @@ ciflow_push_tags:
 - ciflow/binaries_wheel
 - ciflow/inductor
 - ciflow/inductor-perf-compare
+- ciflow/linux-aarch64
 - ciflow/mps
 - ciflow/nightly
 - ciflow/periodic
--- a/.github/workflows/linux-aarch64.yml
+++ b/.github/workflows/linux-aarch64.yml
@ -0,0 +1,38 @@
+name: linux-aarch64
+
+on:
+  # For testing purposes, removeme later
+  pull_request:
+  push:
+    tags:
+      - ciflow/linux-aarch64/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} but found ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  linux-jammy-aarch64-py3_10-build:
+    name: linux-jammy-aarch64-py3.10
+    uses: ./.github/workflows/_linux-build.yml
+    with:
+      build-environment: linux-jammy-aarch64-py3.10
+      docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11
+      runner: linux.arm64.2xlarge
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 1, runner: "linux.arm64.2xlarge" },
+        ]}
+
+  linux-jammy-aarch64-py3_10-test:
+    name: linux-jammy-aarch64-py3.10
+    uses: ./.github/workflows/_linux-test.yml
+    needs: linux-jammy-aarch64-py3_10-build
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      build-environment: linux-jammy-aarch64-py3.10
+      docker-image: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.test-matrix }}
--- a/test/run_test.py
+++ b/test/run_test.py
@ -26,7 +26,9 @@ from torch.multiprocessing import current_process, get_context
 from torch.testing._internal.common_utils import (
    FILE_SCHEMA,
    get_report_path,
+    IS_ARM64,
    IS_CI,
+    IS_LINUX,
    IS_MACOS,
    parser as common_parser,
    retry_shell,
@ -265,6 +267,10 @@ CORE_TEST_LIST = [
    "test_torch",
 ]

+# A subset of the TEST list for aarch64 linux platform
+ARM64_LINUX_TEST_LIST = [
+    "test_modules",
+]

 # if a test file takes longer than 5 min, we add it to TARGET_DET_LIST
 SLOW_TEST_THRESHOLD = 300
@ -1298,6 +1304,10 @@ def can_run_in_pytest(test):


 def get_selected_tests(options) -> List[str]:
+    if IS_ARM64 and IS_LINUX:
+        selected_tests = ARM64_LINUX_TEST_LIST
+        return selected_tests
+
    selected_tests = options.include

    # filter if there's JIT only and distributed only test options