CI: add aarch64 linux workflow (#121284)

aarch64 linux workflow is triggered for ciflow/aarch64 tags.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/121284
Approved by: https://github.com/atalman, https://github.com/malfet
This commit is contained in:
Sunita Nadampalli
2024-04-29 18:25:37 +00:00
committed by PyTorch MergeBot
parent ae13c7e593
commit 32cf04cb7f
8 changed files with 75 additions and 4 deletions

View File

@ -306,6 +306,12 @@ case "$image" in
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
;;
*)
# Catch-all for builds that are not hardcoded.
@ -399,6 +405,8 @@ DOCKER_BUILDKIT=1 docker build \
--build-arg "EXECUTORCH=${EXECUTORCH}" \
--build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
--build-arg "ACL=${ACL:-}" \
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \
"$@" \

View File

@ -263,10 +263,11 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
#Pinned versions:
#test that import:
#wheel not found on aarch64, and source build requires rust
lintrunner==0.10.7 ; platform_machine == "x86_64"
#lintrunner is supported on aarch64-linux only from 0.12.4 version
lintrunner==0.12.5 ; platform_machine == "aarch64"
#Description: all about linters!
#Pinned versions: 0.10.7
#Pinned versions: 0.10.7 on x86 and 0.12.5 on aarch64
#test that import:
rockset==1.0.3

View File

@ -169,9 +169,11 @@ RUN rm install_acl.sh
ENV INSTALLED_ACL ${ACL}
# Install ccache/sccache (do this last, so we get priority in PATH)
ARG SKIP_SCCACHE_INSTALL
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
RUN rm install_cache.sh
# Add jni.h for java host build
COPY ./common/install_jni.sh install_jni.sh
@ -188,7 +190,9 @@ ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# Install LLVM dev version (Defined in the pytorch/builder github repository)
ARG SKIP_LLVM_SRC_BUILD_INSTALL
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi
# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell

View File

@ -376,4 +376,8 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
python tools/stats/export_test_times.py
fi
print_sccache_stats
# snadampal: skipping it till sccache support added for aarch64
# https://github.com/pytorch/pytorch/issues/121559
if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
print_sccache_stats
fi

View File

@ -181,6 +181,11 @@ if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
export PATH="$HOME/.local/bin:$PATH"
fi
if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
# TODO: revisit this once the CI is stabilized on aarch64 linux
export VALGRIND=OFF
fi
install_tlparse
# DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems

View File

@ -8,6 +8,7 @@ ciflow_push_tags:
- ciflow/binaries_wheel
- ciflow/inductor
- ciflow/inductor-perf-compare
- ciflow/linux-aarch64
- ciflow/mps
- ciflow/nightly
- ciflow/periodic

38
.github/workflows/linux-aarch64.yml vendored Normal file
View File

@ -0,0 +1,38 @@
name: linux-aarch64
on:
# For testing purposes, removeme later
pull_request:
push:
tags:
- ciflow/linux-aarch64/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} but found ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
linux-jammy-aarch64-py3_10-build:
name: linux-jammy-aarch64-py3.10
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-aarch64-py3.10
docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11
runner: linux.arm64.2xlarge
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.arm64.2xlarge" },
]}
linux-jammy-aarch64-py3_10-test:
name: linux-jammy-aarch64-py3.10
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-aarch64-py3_10-build
permissions:
id-token: write
contents: read
with:
build-environment: linux-jammy-aarch64-py3.10
docker-image: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.test-matrix }}

View File

@ -26,7 +26,9 @@ from torch.multiprocessing import current_process, get_context
from torch.testing._internal.common_utils import (
FILE_SCHEMA,
get_report_path,
IS_ARM64,
IS_CI,
IS_LINUX,
IS_MACOS,
parser as common_parser,
retry_shell,
@ -265,6 +267,10 @@ CORE_TEST_LIST = [
"test_torch",
]
# A subset of the TEST list for aarch64 linux platform
ARM64_LINUX_TEST_LIST = [
"test_modules",
]
# if a test file takes longer than 5 min, we add it to TARGET_DET_LIST
SLOW_TEST_THRESHOLD = 300
@ -1298,6 +1304,10 @@ def can_run_in_pytest(test):
def get_selected_tests(options) -> List[str]:
if IS_ARM64 and IS_LINUX:
selected_tests = ARM64_LINUX_TEST_LIST
return selected_tests
selected_tests = options.include
# filter if there's JIT only and distributed only test options