mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-27 17:54:55 +08:00
Compare commits
130 Commits
v2.2.1-rc1
...
v2.1.2
| Author | SHA1 | Date | |
|---|---|---|---|
| a8e7c98cb9 | |||
| 448700d18e | |||
| 59656491f3 | |||
| 41210eaedc | |||
| 3183bcd417 | |||
| b5a89bbc5f | |||
| 3f662b6255 | |||
| 614af50378 | |||
| b3b22d7390 | |||
| 7405d70c30 | |||
| d62c757533 | |||
| 7833889a44 | |||
| 4c55dc5035 | |||
| f58669bc5f | |||
| 33106b706e | |||
| 4b4c012a60 | |||
| 47ac50248a | |||
| dc96ecb8ac | |||
| 18a2ed1db1 | |||
| b2e1277247 | |||
| b249946c40 | |||
| ee79fc8a35 | |||
| 084343ee12 | |||
| 8a178f153e | |||
| 2353915d69 | |||
| c1bc460377 | |||
| 2dc37f4f70 | |||
| f82d6e41a4 | |||
| c79d2936d0 | |||
| ab5ea22c1d | |||
| 5274580eb0 | |||
| cd5859373c | |||
| 7cc6081f87 | |||
| af1590cdf4 | |||
| 3f59221062 | |||
| ab5b9192ce | |||
| 736ebd3313 | |||
| 6ba919da27 | |||
| cc54a5072e | |||
| 3788d86e3e | |||
| 1f0450eed2 | |||
| 9570baa150 | |||
| b3b274ddcb | |||
| 5bcfb1b9b4 | |||
| c496f9a40b | |||
| 39a66a66fe | |||
| ed87177528 | |||
| c07240e5e4 | |||
| bb96803a35 | |||
| 3002bf71e6 | |||
| e7892b2e02 | |||
| 909fcf9b21 | |||
| 0bc598a604 | |||
| dd7fb44d20 | |||
| 6026c29db0 | |||
| 0f9ac00ac6 | |||
| 209f2fa8ff | |||
| fa1db4310d | |||
| e6702486f6 | |||
| e68aa76642 | |||
| 88cde0c37c | |||
| e4c42a93bc | |||
| 7bcf7da3a2 | |||
| 1841d54370 | |||
| fca42334be | |||
| 539a971161 | |||
| 9287a0cf59 | |||
| c464075d5d | |||
| 1b4161c686 | |||
| 28220534de | |||
| da9639c752 | |||
| e534243ec2 | |||
| 01fa8c140a | |||
| 5aae979614 | |||
| ced78cc2a7 | |||
| d8db5808ce | |||
| 889811ab5b | |||
| 1191449343 | |||
| 6d9fad8474 | |||
| ed62318bea | |||
| ee67c4dd6a | |||
| 5529b81631 | |||
| 7e23b4907d | |||
| 71c9d5c3a6 | |||
| 91e414957b | |||
| ce3ed7f293 | |||
| bd372d460b | |||
| 12b8c26f35 | |||
| 7397cf324c | |||
| fa8259db8d | |||
| d83c8287ea | |||
| ba19c52e31 | |||
| c5c9536aa7 | |||
| 6b7a777661 | |||
| ebd3224303 | |||
| 6e4ae13657 | |||
| 265e46e193 | |||
| da7290dfbd | |||
| 828992cf13 | |||
| 48246f3dfb | |||
| 7d6971dcee | |||
| 5417e23ba8 | |||
| 7a9101951d | |||
| 03e7f0b99d | |||
| c0e7239f43 | |||
| 04c1e07fd7 | |||
| cb4362ba5f | |||
| bddd30ca7a | |||
| 9cc99906e9 | |||
| a49fca4dd4 | |||
| 83964c761e | |||
| 085bd1da62 | |||
| 90452f41e3 | |||
| 35c3d5a080 | |||
| d07ac50e26 | |||
| 8a3b017769 | |||
| a82894b0d3 | |||
| 050fc31538 | |||
| b3cb05b396 | |||
| fec68a2799 | |||
| f139dda1cc | |||
| 5252dfb762 | |||
| da1ccca830 | |||
| c9cbdaf24f | |||
| f187e42a54 | |||
| 9175987fcc | |||
| d8e6594fb8 | |||
| f82c027774 | |||
| 6d20b39d3f | |||
| 17f400404f |
@ -71,9 +71,6 @@ if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
|
||||
DOCKERFILE="${OS}-cuda/Dockerfile"
|
||||
elif [[ "$image" == *rocm* ]]; then
|
||||
DOCKERFILE="${OS}-rocm/Dockerfile"
|
||||
elif [[ "$image" == *cuda*linter* ]]; then
|
||||
# Use a separate Dockerfile for linter to keep a small image size
|
||||
DOCKERFILE="linter-cuda/Dockerfile"
|
||||
elif [[ "$image" == *linter* ]]; then
|
||||
# Use a separate Dockerfile for linter to keep a small image size
|
||||
DOCKERFILE="linter/Dockerfile"
|
||||
@ -132,6 +129,35 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.8.0
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc7-inductor-benchmarks)
|
||||
CUDA_VERSION=11.8.0
|
||||
CUDNN_VERSION=8
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
|
||||
CUDA_VERSION=12.1.1
|
||||
CUDNN_VERSION=8
|
||||
@ -155,13 +181,13 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
ONNX=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang9-android-ndk-r21e)
|
||||
pytorch-linux-focal-py3-clang7-android-ndk-r19c)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CLANG_VERSION=9
|
||||
CLANG_VERSION=7
|
||||
LLVMDEV=yes
|
||||
PROTOBUF=yes
|
||||
ANDROID=yes
|
||||
ANDROID_NDK_VERSION=r21e
|
||||
ANDROID_NDK_VERSION=r19c
|
||||
GRADLE_VERSION=6.8.3
|
||||
NINJA_VERSION=1.9.0
|
||||
;;
|
||||
@ -202,7 +228,7 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=5.6
|
||||
ROCM_VERSION=5.4.2
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -213,11 +239,22 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=5.7
|
||||
ROCM_VERSION=5.6
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3.8-gcc7)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=7
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
DOCS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=11
|
||||
@ -249,12 +286,6 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang15-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=15
|
||||
CONDA_CMAKE=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.8-gcc11)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
GCC_VERSION=11
|
||||
@ -266,12 +297,6 @@ case "$image" in
|
||||
TRITON=yes
|
||||
DOCS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang12-executorch)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=12
|
||||
CONDA_CMAKE=yes
|
||||
EXECUTORCH=yes
|
||||
;;
|
||||
pytorch-linux-focal-linter)
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
@ -279,11 +304,6 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CUDA_VERSION=11.8
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
@ -301,9 +321,6 @@ case "$image" in
|
||||
extract_version_from_image_name rocm ROCM_VERSION
|
||||
NINJA_VERSION=1.9.0
|
||||
TRITON=yes
|
||||
# To ensure that any ROCm config will build using conda cmake
|
||||
# and thus have LAPACK/MKL enabled
|
||||
CONDA_CMAKE=yes
|
||||
fi
|
||||
if [[ "$image" == *centos7* ]]; then
|
||||
NINJA_VERSION=1.10.2
|
||||
@ -337,11 +354,14 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
|
||||
fi
|
||||
|
||||
# Build image
|
||||
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
|
||||
# it's no longer needed.
|
||||
docker build \
|
||||
--no-cache \
|
||||
--progress=plain \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
--build-arg "PROTOBUF=${PROTOBUF:-}" \
|
||||
--build-arg "THRIFT=${THRIFT:-}" \
|
||||
--build-arg "LLVMDEV=${LLVMDEV:-}" \
|
||||
--build-arg "DB=${DB:-}" \
|
||||
--build-arg "VISION=${VISION:-}" \
|
||||
@ -373,7 +393,6 @@ docker build \
|
||||
--build-arg "ONNX=${ONNX}" \
|
||||
--build-arg "DOCS=${DOCS}" \
|
||||
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
|
||||
--build-arg "EXECUTORCH=${EXECUTORCH}" \
|
||||
-f $(dirname ${DOCKERFILE})/Dockerfile \
|
||||
-t "$tmp_tag" \
|
||||
"$@" \
|
||||
|
||||
@ -98,18 +98,6 @@ COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
ARG TRITON
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
ENV CMAKE_C_COMPILER cc
|
||||
ENV CMAKE_CXX_COMPILER c++
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
|
||||
COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
|
||||
@ -1 +0,0 @@
|
||||
b2f5dfe80704404298467347b8ee3ac229efed47
|
||||
@ -1 +1 @@
|
||||
6c26faa159b79a42d7fa46cb66e2d21523351987
|
||||
4.27.4
|
||||
|
||||
@ -1 +1 @@
|
||||
730b907b4d45a4713cbc425cbf224c46089fd514
|
||||
b9d43c7dcac1fe05e851dd7be7187b108af593d2
|
||||
|
||||
@ -1 +1 @@
|
||||
dafe1459823b9549417ed95e9720f1b594fab329
|
||||
34f8189eae57a23cc15b4b4f032fe25757e0db8e
|
||||
|
||||
@ -1 +1 @@
|
||||
bcad9dabe15021c53b6a88296e9d7a210044f108
|
||||
e6216047b8b0aef1fe8da6ca8667a3ad0a016411
|
||||
|
||||
@ -9,7 +9,10 @@ install_ubuntu() {
|
||||
# "$UBUNTU_VERSION" == "18.04"*
|
||||
# instead of
|
||||
# "$UBUNTU_VERSION" == "18.04"
|
||||
if [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
|
||||
if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
|
||||
cmake3="cmake=3.10*"
|
||||
maybe_libiomp_dev="libiomp-dev"
|
||||
elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
|
||||
cmake3="cmake=3.16*"
|
||||
maybe_libiomp_dev=""
|
||||
elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
|
||||
@ -20,9 +23,7 @@ install_ubuntu() {
|
||||
maybe_libiomp_dev="libiomp-dev"
|
||||
fi
|
||||
|
||||
if [[ "$CLANG_VERSION" == 15 ]]; then
|
||||
maybe_libomp_dev="libomp-15-dev"
|
||||
elif [[ "$CLANG_VERSION" == 12 ]]; then
|
||||
if [[ "$CLANG_VERSION" == 12 ]]; then
|
||||
maybe_libomp_dev="libomp-12-dev"
|
||||
elif [[ "$CLANG_VERSION" == 10 ]]; then
|
||||
maybe_libomp_dev="libomp-10-dev"
|
||||
@ -61,7 +62,6 @@ install_ubuntu() {
|
||||
${maybe_libiomp_dev} \
|
||||
libyaml-dev \
|
||||
libz-dev \
|
||||
libjemalloc2 \
|
||||
libjpeg-dev \
|
||||
libasound2-dev \
|
||||
libsndfile-dev \
|
||||
|
||||
@ -54,13 +54,23 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
|
||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
|
||||
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS}
|
||||
else
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
|
||||
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
|
||||
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
|
||||
elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
|
||||
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
|
||||
else
|
||||
# Install `typing-extensions` for 3.7
|
||||
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS} typing-extensions
|
||||
fi
|
||||
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
# and libpython-static for torch deploy
|
||||
conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
|
||||
# This is only supported in 3.8 upward
|
||||
if [ "$MINOR_PYTHON_VERSION" -gt "7" ]; then
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
# and libpython-static for torch deploy
|
||||
conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
|
||||
fi
|
||||
|
||||
# Use conda cmake in some cases. Conda cmake will be newer than our supported
|
||||
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
|
||||
@ -79,7 +89,13 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
# Install some other packages, including those needed for Python test reporting
|
||||
pip_install -r /opt/conda/requirements-ci.txt
|
||||
|
||||
pip_install -U scikit-learn
|
||||
# Update scikit-learn to a python-3.8 compatible version
|
||||
if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
|
||||
pip_install -U scikit-learn
|
||||
else
|
||||
# Pinned scikit-learn due to https://github.com/scikit-learn/scikit-learn/issues/14485 (affects gcc 5.5 only)
|
||||
pip_install scikit-learn==0.20.3
|
||||
fi
|
||||
|
||||
if [ -n "$DOCS" ]; then
|
||||
apt-get update
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
clone_executorch() {
|
||||
EXECUTORCH_PINNED_COMMIT=$(get_pinned_commit executorch)
|
||||
|
||||
# Clone the Executorch
|
||||
git clone https://github.com/pytorch/executorch.git
|
||||
|
||||
# and fetch the target commit
|
||||
pushd executorch
|
||||
git checkout "${EXECUTORCH_PINNED_COMMIT}"
|
||||
git submodule update --init
|
||||
popd
|
||||
|
||||
chown -R jenkins executorch
|
||||
}
|
||||
|
||||
install_buck2() {
|
||||
pushd executorch/.ci/docker
|
||||
|
||||
BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
|
||||
source common/install_buck.sh
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
install_conda_dependencies() {
|
||||
pushd executorch/.ci/docker
|
||||
# Install conda dependencies like flatbuffer
|
||||
conda_install --file conda-env-ci.txt
|
||||
popd
|
||||
}
|
||||
|
||||
install_pip_dependencies() {
|
||||
pushd executorch/.ci/docker
|
||||
# Install all Python dependencies
|
||||
pip_install -r requirements-ci.txt
|
||||
popd
|
||||
}
|
||||
|
||||
setup_executorch() {
|
||||
pushd executorch
|
||||
source .ci/scripts/utils.sh
|
||||
|
||||
install_flatc_from_source
|
||||
pip_install .
|
||||
build_executorch_runner "cmake"
|
||||
|
||||
# Make sure that all the newly generate files are owned by Jenkins
|
||||
chown -R jenkins .
|
||||
popd
|
||||
}
|
||||
|
||||
clone_executorch
|
||||
install_buck2
|
||||
install_conda_dependencies
|
||||
install_pip_dependencies
|
||||
setup_executorch
|
||||
@ -6,21 +6,19 @@ source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
function install_huggingface() {
|
||||
local version
|
||||
commit=$(get_pinned_commit huggingface)
|
||||
version=$(get_pinned_commit huggingface)
|
||||
pip_install pandas==2.0.3
|
||||
pip_install "git+https://github.com/huggingface/transformers@${commit}"
|
||||
pip_install "transformers==${version}"
|
||||
}
|
||||
|
||||
function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
pip_install pandas==2.0.3
|
||||
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
|
||||
# Clean up
|
||||
conda_run pip uninstall -y cmake torch torchvision triton
|
||||
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
|
||||
}
|
||||
|
||||
# Pango is needed for weasyprint which is needed for doctr
|
||||
conda_install pango
|
||||
install_huggingface
|
||||
install_timm
|
||||
# install_timm
|
||||
|
||||
@ -10,13 +10,13 @@ retry () {
|
||||
|
||||
# A bunch of custom pip dependencies for ONNX
|
||||
pip_install \
|
||||
beartype==0.15.0 \
|
||||
beartype==0.10.4 \
|
||||
filelock==3.9.0 \
|
||||
flatbuffers==2.0 \
|
||||
mock==5.0.1 \
|
||||
ninja==1.10.2 \
|
||||
networkx==2.0 \
|
||||
numpy==1.24.2
|
||||
numpy==1.22.4
|
||||
|
||||
# ONNXRuntime should be installed before installing
|
||||
# onnx-weekly. Otherwise, onnx-weekly could be
|
||||
@ -26,13 +26,13 @@ pip_install \
|
||||
pytest-cov==4.0.0 \
|
||||
pytest-subtests==0.10.0 \
|
||||
tabulate==0.9.0 \
|
||||
transformers==4.32.1
|
||||
transformers==4.31.0
|
||||
|
||||
pip_install coloredlogs packaging
|
||||
retry pip_install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --no-cache-dir --no-input ort-nightly==1.17.0.dev20231005006
|
||||
retry pip_install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --no-cache-dir --no-input ort-nightly==1.16.0.dev20230908001
|
||||
|
||||
pip_install -i https://test.pypi.org/simple/ onnx==1.15.0rc2
|
||||
pip_install onnxscript==0.1.0.dev20231128 --no-deps
|
||||
pip_install onnx==1.14.1
|
||||
pip_install onnxscript-preview==0.1.0.dev20230828 --no-deps
|
||||
|
||||
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
||||
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
||||
|
||||
@ -5,10 +5,8 @@ set -ex
|
||||
# "install" hipMAGMA into /opt/rocm/magma by copying after build
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
git checkout 823531632140d0edcb7e77c3edc0e837421471c5
|
||||
|
||||
# Fixes memory leaks of magma found while executing linalg UTs
|
||||
git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
|
||||
|
||||
14
.ci/docker/common/install_thrift.sh
Executable file
14
.ci/docker/common/install_thrift.sh
Executable file
@ -0,0 +1,14 @@
|
||||
apt-get update
|
||||
apt-get install -y sudo wget libboost-dev libboost-test-dev libboost-program-options-dev libboost-filesystem-dev libboost-thread-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev
|
||||
wget https://www-us.apache.org/dist/thrift/0.12.0/thrift-0.12.0.tar.gz
|
||||
tar -xvf thrift-0.12.0.tar.gz
|
||||
cd thrift-0.12.0
|
||||
for file in ./compiler/cpp/Makefile*; do
|
||||
sed -i 's/\-Werror//' $file
|
||||
done
|
||||
./bootstrap.sh
|
||||
./configure --without-php --without-java --without-python --without-nodejs --without-go --without-ruby
|
||||
sudo make
|
||||
sudo make install
|
||||
cd ..
|
||||
rm thrift-0.12.0.tar.gz
|
||||
@ -23,10 +23,8 @@ fi
|
||||
# The logic here is copied from .ci/pytorch/common_utils.sh
|
||||
TRITON_PINNED_COMMIT=$(get_pinned_commit ${TRITON_TEXT_FILE})
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ];then
|
||||
apt update
|
||||
apt-get install -y gpg-agent
|
||||
fi
|
||||
apt update
|
||||
apt-get install -y gpg-agent
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
# Keep the current cmake and numpy version here, so we can reinstall them later
|
||||
@ -38,12 +36,12 @@ if [ -z "${MAX_JOBS}" ]; then
|
||||
export MAX_JOBS=$(nproc)
|
||||
fi
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
|
||||
if [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
|
||||
# Triton needs at least gcc-9 to build
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
|
||||
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
|
||||
elif [ -n "${CLANG_VERSION}" ]; then
|
||||
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get install -y g++-9
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install missing libomp-dev
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libomp-dev && apt-get autoclean && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
@ -75,10 +75,10 @@ librosa>=0.6.2 ; python_version < "3.11"
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
mypy==1.7.0
|
||||
mypy==1.4.1
|
||||
# Pin MyPy version because new errors are likely to appear with each release
|
||||
#Description: linter
|
||||
#Pinned versions: 1.7.0
|
||||
#Pinned versions: 1.4.1
|
||||
#test that import: test_typing.py, test_type_hints.py
|
||||
|
||||
networkx==2.8.8
|
||||
@ -124,22 +124,10 @@ opt-einsum==3.3
|
||||
#Pinned versions: 3.3
|
||||
#test that import: test_linalg.py
|
||||
|
||||
optree==0.9.1
|
||||
#Description: A library for tree manipulation
|
||||
#Pinned versions: 0.9.1
|
||||
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
|
||||
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
|
||||
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
|
||||
#test_expanded_weights.py, test_decomp.py, test_overrides.py, test_masked.py,
|
||||
#test_ops.py, test_prims.py, test_subclass.py, test_functionalization.py,
|
||||
#test_schema_check.py, test_profiler_tree.py, test_meta.py, test_torchxla_num_output.py,
|
||||
#test_utils.py, test_proxy_tensor.py, test_memory_profiler.py, test_view_ops.py,
|
||||
#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
|
||||
#test_fake_tensor.py, test_mps.py
|
||||
|
||||
pillow==10.0.1
|
||||
pillow==9.3.0 ; python_version <= "3.8"
|
||||
pillow==9.5.0 ; python_version > "3.8"
|
||||
#Description: Python Imaging Library fork
|
||||
#Pinned versions: 10.0.1
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
protobuf==3.20.2
|
||||
@ -292,14 +280,3 @@ tensorboard==2.13.0
|
||||
#Description: Also included in .ci/docker/requirements-docs.txt
|
||||
#Pinned versions:
|
||||
#test that import: test_tensorboard
|
||||
|
||||
pywavelets==1.4.1
|
||||
#Description: This is a requirement of scikit-image, we need to pin
|
||||
# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
|
||||
#Pinned versions: 1.4.1
|
||||
#test that import:
|
||||
|
||||
lxml==4.9.4
|
||||
#Description: This is a requirement of unittest-xml-reporting
|
||||
# have to pin to 4.9.4 because 5.0.0 release on Dec 29th missing
|
||||
# Python-3.9 binaries
|
||||
|
||||
@ -1 +1 @@
|
||||
2.2.0
|
||||
2.1.0
|
||||
|
||||
@ -79,6 +79,12 @@ ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
ARG INDUCTOR_BENCHMARKS
|
||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
@ -87,12 +93,6 @@ COPY ci_commit_pins/timm.txt timm.txt
|
||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
ARG TRITON
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
|
||||
@ -17,6 +17,13 @@ ARG LLVMDEV
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# (optional) Install thrift.
|
||||
ARG THRIFT
|
||||
COPY ./common/install_thrift.sh install_thrift.sh
|
||||
RUN if [ -n "${THRIFT}" ]; then bash ./install_thrift.sh; fi
|
||||
RUN rm install_thrift.sh
|
||||
ENV INSTALLED_THRIFT ${THRIFT}
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
@ -146,14 +153,6 @@ COPY ci_commit_pins/triton.txt triton.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
ARG EXECUTORCH
|
||||
# Build and install executorch
|
||||
COPY ./common/install_executorch.sh install_executorch.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/executorch.txt executorch.txt
|
||||
RUN if [ -n "${EXECUTORCH}" ]; then bash ./install_executorch.sh; fi
|
||||
RUN rm install_executorch.sh common_utils.sh executorch.txt
|
||||
|
||||
ARG ONNX
|
||||
# Install ONNX dependencies
|
||||
COPY ./common/install_onnx.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -3,6 +3,11 @@
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
# Use to retry ONNX test, only retry it twice
|
||||
retry () {
|
||||
"$@" || (sleep 60 && "$@")
|
||||
}
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
|
||||
# TODO: This can be removed later once vision is also part of the Docker image
|
||||
pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
|
||||
@ -11,5 +16,5 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
|
||||
# NB: ONNX test is fast (~15m) so it's ok to retry it few more times to avoid any flaky issue, we
|
||||
# need to bring this to the standard PyTorch run_test eventually. The issue will be tracked in
|
||||
# https://github.com/pytorch/pytorch/issues/98626
|
||||
"$ROOT_DIR/scripts/onnx/test.sh"
|
||||
retry "$ROOT_DIR/scripts/onnx/test.sh"
|
||||
fi
|
||||
|
||||
@ -28,8 +28,6 @@ echo "Environment variables:"
|
||||
env
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
# Use jemalloc during compilation to mitigate https://github.com/pytorch/pytorch/issues/116289
|
||||
export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
|
||||
echo "NVCC version:"
|
||||
nvcc --version
|
||||
fi
|
||||
@ -65,12 +63,6 @@ else
|
||||
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
|
||||
# To build test_edge_op_registration
|
||||
export BUILD_EXECUTORCH=ON
|
||||
export USE_CUDA=0
|
||||
fi
|
||||
|
||||
if ! which conda; then
|
||||
# In ROCm CIs, we are doing cross compilation on build machines with
|
||||
# intel cpu and later run tests on machines with amd cpu.
|
||||
@ -167,14 +159,6 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* && -z "$TORCH_CUDA_ARCH_LIST" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||
# memory to build and will OOM
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ "$TORCH_CUDA_ARCH_LIST" == *"8.6"* || "$TORCH_CUDA_ARCH_LIST" == *"8.0"* ]]; then
|
||||
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
|
||||
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
|
||||
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
export CC=clang
|
||||
export CXX=clang++
|
||||
@ -184,6 +168,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
||||
export LDSHARED="clang --shared"
|
||||
export USE_CUDA=0
|
||||
export USE_ASAN=1
|
||||
export USE_MKLDNN=0
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow"
|
||||
unset USE_LLVM
|
||||
fi
|
||||
|
||||
@ -43,7 +43,7 @@ function assert_git_not_dirty() {
|
||||
# TODO: we should add an option to `build_amd.py` that reverts the repo to
|
||||
# an unmodified state.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
|
||||
git_status=$(git status --porcelain | grep -v '?? third_party' || true)
|
||||
git_status=$(git status --porcelain)
|
||||
if [[ $git_status ]]; then
|
||||
echo "Build left local git repository checkout dirty"
|
||||
echo "git status --porcelain:"
|
||||
@ -171,9 +171,16 @@ function install_torchrec_and_fbgemm() {
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
|
||||
}
|
||||
|
||||
function install_numpy_pytorch_interop() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit numpy_pytorch_interop)
|
||||
# TODO: --no-use-pep517 will result in failure.
|
||||
pip_install --user "git+https://github.com/Quansight-Labs/numpy_pytorch_interop.git@${commit}"
|
||||
}
|
||||
|
||||
function clone_pytorch_xla() {
|
||||
if [[ ! -d ./xla ]]; then
|
||||
git clone --recursive -b r2.2 https://github.com/pytorch/xla.git
|
||||
git clone --recursive -b r2.1 https://github.com/pytorch/xla.git
|
||||
pushd xla
|
||||
# pin the xla hash so that we don't get broken by changes to xla
|
||||
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
|
||||
@ -205,6 +212,15 @@ function test_torch_deploy(){
|
||||
popd
|
||||
}
|
||||
|
||||
function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
pip_install pandas
|
||||
pip_install scipy
|
||||
pip_install z3-solver
|
||||
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
|
||||
}
|
||||
|
||||
function checkout_install_torchbench() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchbench)
|
||||
|
||||
@ -43,7 +43,7 @@ cross_compile_arm64() {
|
||||
compile_arm64() {
|
||||
# Compilation for arm64
|
||||
# TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
|
||||
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
USE_DISTRIBUTED=0 USE_OPENMP=0 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_x86_64() {
|
||||
|
||||
@ -36,12 +36,10 @@ time python test/run_test.py --verbose -i distributed/test_functional_api
|
||||
|
||||
|
||||
# DTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/_tensor/test_device_mesh
|
||||
time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
|
||||
time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
|
||||
|
||||
# DeviceMesh test
|
||||
time python test/run_test.py --verbose -i distributed/test_device_mesh
|
||||
|
||||
# DTensor/TP tests
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_ddp_2d_parallel
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_fsdp_2d_parallel
|
||||
|
||||
@ -80,11 +80,6 @@ if [[ "$BUILD_ENVIRONMENT" != *bazel* ]]; then
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}")
|
||||
fi
|
||||
|
||||
# Reduce set of tests to include when running run_test.py
|
||||
if [[ -n $TESTS_TO_INCLUDE ]]; then
|
||||
echo "Setting INCLUDE_CLAUSE"
|
||||
INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE"
|
||||
fi
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
@ -153,7 +148,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
|
||||
export PYTORCH_TEST_WITH_ASAN=1
|
||||
export PYTORCH_TEST_WITH_UBSAN=1
|
||||
# TODO: Figure out how to avoid hard-coding these paths
|
||||
export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-15/bin/llvm-symbolizer
|
||||
export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-12/bin/llvm-symbolizer
|
||||
export TORCH_USE_RTLD_GLOBAL=1
|
||||
# NB: We load libtorch.so with RTLD_GLOBAL for UBSAN, unlike our
|
||||
# default behavior.
|
||||
@ -187,7 +182,7 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
|
||||
# have, and it applies to child processes.
|
||||
|
||||
# TODO: get rid of the hardcoded path
|
||||
export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
|
||||
export LD_PRELOAD=/usr/lib/llvm-12/lib/clang/12.0.1/lib/linux/libclang_rt.asan-x86_64.so
|
||||
# Disable valgrind for asan
|
||||
export VALGRIND=OFF
|
||||
# Increase stack size, because ASAN red zones use more stack
|
||||
@ -233,16 +228,13 @@ test_python_shard() {
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly
|
||||
# shellcheck disable=SC2086
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS" --verbose
|
||||
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_python() {
|
||||
# shellcheck disable=SC2086
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -289,10 +281,6 @@ test_inductor_distributed() {
|
||||
# Smuggle a few multi-gpu tests here so that we don't have to request another large node
|
||||
echo "Testing multi_gpu tests in test_torchinductor"
|
||||
pytest test/inductor/test_torchinductor.py -k test_multi_gpu
|
||||
pytest test/inductor/test_aot_inductor.py -k test_non_default_cuda_device
|
||||
pytest test/inductor/test_aot_inductor.py -k test_replicate_on_devices
|
||||
pytest test/distributed/_tensor/test_dtensor_compile.py
|
||||
pytest test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
|
||||
|
||||
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
|
||||
# with if required # gpus aren't available
|
||||
@ -315,17 +303,14 @@ test_inductor() {
|
||||
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
|
||||
# For example 'dynamic_aot_eager_torchbench' TEST_CONFIG means we run
|
||||
# the benchmark script with '--dynamic-shapes --backend aot_eager --device cuda'
|
||||
# The matrix of test options is specified in .github/workflows/inductor.yml,
|
||||
# .github/workflows/inductor-periodic.yml, and
|
||||
# .github/workflows/inductor-perf-test-nightly.yml
|
||||
# The matrix of test options is specified in .github/workflows/periodic.yml
|
||||
# and .github/workflows/inductor.yml
|
||||
DYNAMO_BENCHMARK_FLAGS=()
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *dynamo_eager* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--backend eager)
|
||||
elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor)
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--inductor)
|
||||
fi
|
||||
@ -334,7 +319,7 @@ if [[ "${TEST_CONFIG}" == *dynamic* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes --dynamic-batch-only)
|
||||
fi
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
|
||||
else
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
|
||||
@ -398,11 +383,6 @@ test_perf_for_dashboard() {
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_cuda_${target}.csv"
|
||||
fi
|
||||
if [[ "$DASHBOARD_TAG" == *freeze_autotune_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
|
||||
TORCHINDUCTOR_MAX_AUTOTUNE=1 python "benchmarks/dynamo/$suite.py" \
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_cuda_${target}.csv"
|
||||
fi
|
||||
if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then
|
||||
python "benchmarks/dynamo/$suite.py" \
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
|
||||
@ -453,12 +433,19 @@ test_single_dynamo_benchmark() {
|
||||
"${DYNAMO_BENCHMARK_FLAGS[@]}" \
|
||||
"$@" "${partition_flags[@]}" \
|
||||
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
python benchmarks/dynamo/check_graph_breaks.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *inductor* ]] && [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
|
||||
# other jobs (e.g. periodic, cpu-accuracy) may have different set of expected models.
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
python benchmarks/dynamo/check_graph_breaks.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
else
|
||||
python benchmarks/dynamo/check_csv.py \
|
||||
-f "$TEST_REPORTS_DIR/${name}_${suite}.csv"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
@ -476,10 +463,8 @@ test_dynamo_benchmark() {
|
||||
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
|
||||
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
|
||||
else
|
||||
if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --float32 "$@"
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
else
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
|
||||
@ -494,13 +479,9 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
|
||||
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
|
||||
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
|
||||
# The threshold value needs to be actively maintained to make this check useful
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
|
||||
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
|
||||
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
|
||||
# The threshold value needs to be actively maintained to make this check useful
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 5.2
|
||||
# the reference speedup value is hardcoded in check_hf_bert_perf_csv.py
|
||||
# this value needs to be actively maintained to make this check useful
|
||||
python benchmarks/dynamo/check_hf_bert_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
|
||||
|
||||
# Check memory compression ratio for a few models
|
||||
for test in hf_Albert timm_vision_transformer; do
|
||||
@ -624,7 +605,7 @@ test_libtorch_jit() {
|
||||
|
||||
# Run jit and lazy tensor cpp tests together to finish them faster
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then
|
||||
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy
|
||||
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/nvfuser_tests cpp/test_lazy
|
||||
else
|
||||
# CUDA tests have already been skipped when CUDA is not available
|
||||
python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA"
|
||||
@ -685,8 +666,7 @@ test_vulkan() {
|
||||
|
||||
test_distributed() {
|
||||
echo "Testing distributed python tests"
|
||||
# shellcheck disable=SC2086
|
||||
time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" $INCLUDE_CLAUSE --verbose
|
||||
time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
assert_git_not_dirty
|
||||
|
||||
if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then
|
||||
@ -995,28 +975,9 @@ test_docs_test() {
|
||||
}
|
||||
|
||||
test_executorch() {
|
||||
pushd /executorch
|
||||
|
||||
echo "Install torchvision and torchaudio"
|
||||
# TODO(huydhn): Switch this to the pinned commits on ExecuTorch once they are
|
||||
# there. These libraries need to be built here, and not part of the Docker
|
||||
# image because they require the target version of torch to be installed first
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git"
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git"
|
||||
|
||||
echo "Run ExecuTorch regression tests for some models"
|
||||
# NB: This is a sample model, more can be added here
|
||||
export PYTHON_EXECUTABLE=python
|
||||
# TODO(huydhn): Add more coverage here using ExecuTorch's gather models script
|
||||
# shellcheck disable=SC1091
|
||||
source .ci/scripts/test.sh mv3 cmake xnnpack-quantization-delegation ''
|
||||
|
||||
popd
|
||||
|
||||
# Test torchgen generated code for Executorch.
|
||||
echo "Testing ExecuTorch op registration"
|
||||
echo "Testing Executorch op registration"
|
||||
"$BUILD_BIN_DIR"/test_edge_op_registration
|
||||
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -1031,8 +992,6 @@ elif [[ "${TEST_CONFIG}" == *xla* ]]; then
|
||||
install_torchvision
|
||||
build_xla
|
||||
test_xla
|
||||
elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
|
||||
test_executorch
|
||||
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
||||
test_python_legacy_jit
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
|
||||
@ -1055,10 +1014,11 @@ elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
|
||||
test_dynamo_benchmark huggingface "$id"
|
||||
elif [[ "${TEST_CONFIG}" == *timm* ]]; then
|
||||
install_torchvision
|
||||
install_timm
|
||||
id=$((SHARD_NUMBER-1))
|
||||
test_dynamo_benchmark timm_models "$id"
|
||||
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
|
||||
install_torchaudio cpu
|
||||
else
|
||||
install_torchaudio cuda
|
||||
@ -1075,7 +1035,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
checkout_install_torchbench
|
||||
# Do this after checkout_install_torchbench to ensure we clobber any
|
||||
# nightlies that torchbench may pull in
|
||||
if [[ "${TEST_CONFIG}" != *cpu_inductor* ]]; then
|
||||
if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
|
||||
install_torchrec_and_fbgemm
|
||||
fi
|
||||
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
|
||||
@ -1087,10 +1047,12 @@ elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_without_numpy
|
||||
install_torchvision
|
||||
install_numpy_pytorch_interop
|
||||
test_dynamo_shard 1
|
||||
test_aten
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
install_torchvision
|
||||
install_numpy_pytorch_interop
|
||||
test_dynamo_shard 2
|
||||
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_without_numpy
|
||||
@ -1118,10 +1080,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
|
||||
test_libtorch
|
||||
elif [[ "${TEST_CONFIG}" = docs_test ]]; then
|
||||
test_docs_test
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
|
||||
install_torchvision
|
||||
test_python
|
||||
test_aten
|
||||
else
|
||||
install_torchvision
|
||||
install_monkeytype
|
||||
@ -1134,4 +1092,5 @@ else
|
||||
test_custom_backend
|
||||
test_torch_function_benchmark
|
||||
test_benchmarks
|
||||
test_executorch
|
||||
fi
|
||||
|
||||
@ -127,7 +127,8 @@ python -c "import os, glob; os.system('python -mpip install --no-index --no-deps
|
||||
|
||||
:: export test times so that potential sharded tests that'll branch off this build will use consistent data
|
||||
python tools/stats/export_test_times.py
|
||||
robocopy /E ".additional_ci_files" "%PYTORCH_FINAL_PACKAGE_DIR%\.additional_ci_files"
|
||||
copy /Y ".pytorch-test-times.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
copy /Y ".pytorch-test-file-ratings.json" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
|
||||
:: Also save build/.ninja_log as an artifact
|
||||
copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
COMMON_TESTS = [
|
||||
(
|
||||
@ -54,4 +53,4 @@ if __name__ == "__main__":
|
||||
print("Reruning with traceback enabled")
|
||||
print("Command:", command_string)
|
||||
subprocess.run(command_args, check=False)
|
||||
sys.exit(e.returncode)
|
||||
exit(e.returncode)
|
||||
|
||||
@ -26,6 +26,11 @@ popd
|
||||
python test_custom_ops.py -v
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:: TODO: fix and re-enable this test
|
||||
:: See https://github.com/pytorch/pytorch/issues/25155
|
||||
:: python test_custom_classes.py -v
|
||||
:: if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
python model.py --export-script-module="build/model.pt"
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
|
||||
@ -1,3 +1,7 @@
|
||||
:: Skip LibTorch tests when building a GPU binary and testing on a CPU machine
|
||||
:: because LibTorch tests are not well designed for this use case.
|
||||
if "%USE_CUDA%" == "0" IF NOT "%CUDA_VERSION%" == "cpu" exit /b 0
|
||||
|
||||
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
@ -17,7 +21,7 @@ if not errorlevel 0 exit /b 1
|
||||
cd %TMP_DIR_WIN%\build\torch\test
|
||||
for /r "." %%a in (*.exe) do (
|
||||
call :libtorch_check "%%~na" "%%~fa"
|
||||
if errorlevel 1 goto fail
|
||||
if errorlevel 1 exit /b 1
|
||||
)
|
||||
|
||||
goto :eof
|
||||
@ -30,6 +34,18 @@ set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\test
|
||||
:: Skip verify_api_visibility as it a compile level test
|
||||
if "%~1" == "verify_api_visibility" goto :eof
|
||||
|
||||
:: See https://github.com/pytorch/pytorch/issues/25161
|
||||
if "%~1" == "c10_metaprogramming_test" goto :eof
|
||||
if "%~1" == "module_test" goto :eof
|
||||
:: See https://github.com/pytorch/pytorch/issues/25312
|
||||
if "%~1" == "converter_nomigraph_test" goto :eof
|
||||
:: See https://github.com/pytorch/pytorch/issues/35636
|
||||
if "%~1" == "generate_proposals_op_gpu_test" goto :eof
|
||||
:: See https://github.com/pytorch/pytorch/issues/35648
|
||||
if "%~1" == "reshape_op_gpu_test" goto :eof
|
||||
:: See https://github.com/pytorch/pytorch/issues/35651
|
||||
if "%~1" == "utility_ops_gpu_test" goto :eof
|
||||
|
||||
echo Running "%~2"
|
||||
if "%~1" == "c10_intrusive_ptr_benchmark" (
|
||||
:: NB: This is not a gtest executable file, thus couldn't be handled by pytest-cpp
|
||||
@ -40,15 +56,11 @@ if "%~1" == "c10_intrusive_ptr_benchmark" (
|
||||
python test\run_test.py --cpp --verbose -i "cpp/%~1"
|
||||
if errorlevel 1 (
|
||||
echo %1 failed with exit code %errorlevel%
|
||||
goto fail
|
||||
exit /b 1
|
||||
)
|
||||
if not errorlevel 0 (
|
||||
echo %1 failed with exit code %errorlevel%
|
||||
goto fail
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:eof
|
||||
exit /b 0
|
||||
|
||||
:fail
|
||||
exit /b 1
|
||||
goto :eof
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
|
||||
|
||||
echo Copying over test times file
|
||||
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
|
||||
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
|
||||
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
|
||||
|
||||
pushd test
|
||||
|
||||
|
||||
@ -22,7 +22,8 @@ if "%SHARD_NUMBER%" == "1" (
|
||||
)
|
||||
|
||||
echo Copying over test times file
|
||||
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
|
||||
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%PROJECT_DIR_WIN%"
|
||||
copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-file-ratings.json" "%PROJECT_DIR_WIN%"
|
||||
|
||||
echo Run nn tests
|
||||
python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
|
||||
|
||||
@ -38,7 +38,7 @@ fi
|
||||
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0
|
||||
|
||||
# Install Z3 optional dependency for Windows builds.
|
||||
python -m pip install z3-solver==4.12.2.0
|
||||
python -m pip install z3-solver
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
|
||||
28
.circleci/cimodel/data/simple/anaconda_prune_defintions.py
Normal file
28
.circleci/cimodel/data/simple/anaconda_prune_defintions.py
Normal file
@ -0,0 +1,28 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from cimodel.data.simple.util.branch_filters import gen_filter_dict
|
||||
from cimodel.lib.miniutils import quote
|
||||
|
||||
|
||||
CHANNELS_TO_PRUNE = ["pytorch-nightly", "pytorch-test"]
|
||||
PACKAGES_TO_PRUNE = "pytorch torchvision torchaudio torchtext ignite torchcsprng"
|
||||
|
||||
|
||||
def gen_workflow_job(channel: str):
|
||||
return OrderedDict(
|
||||
{
|
||||
"anaconda_prune": OrderedDict(
|
||||
{
|
||||
"name": f"anaconda-prune-{channel}",
|
||||
"context": quote("org-member"),
|
||||
"packages": quote(PACKAGES_TO_PRUNE),
|
||||
"channel": channel,
|
||||
"filters": gen_filter_dict(branches_list=["postnightly"]),
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_workflow_jobs():
|
||||
return [gen_workflow_job(channel) for channel in CHANNELS_TO_PRUNE]
|
||||
@ -32,4 +32,4 @@ def gen_mobile_docker(specifier):
|
||||
|
||||
DOCKER_IMAGE_ASAN, DOCKER_REQUIREMENT_ASAN = gen_mobile_docker("asan")
|
||||
|
||||
DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r21e")
|
||||
DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r19c")
|
||||
|
||||
49
.circleci/config.yml
generated
49
.circleci/config.yml
generated
@ -444,6 +444,35 @@ jobs:
|
||||
script="/Users/distiller/project/.circleci/scripts/binary_ios_upload.sh"
|
||||
cat "$script"
|
||||
source "$script"
|
||||
|
||||
anaconda_prune:
|
||||
parameters:
|
||||
packages:
|
||||
type: string
|
||||
description: "What packages are we pruning? (quoted, space-separated string. eg. 'pytorch', 'torchvision torchaudio', etc.)"
|
||||
default: "pytorch"
|
||||
channel:
|
||||
type: string
|
||||
description: "What channel are we pruning? (eq. pytorch-nightly)"
|
||||
default: "pytorch-nightly"
|
||||
docker:
|
||||
- image: continuumio/miniconda3
|
||||
environment:
|
||||
- PACKAGES: "<< parameters.packages >>"
|
||||
- CHANNEL: "<< parameters.channel >>"
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install dependencies
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
conda install -yq anaconda-client
|
||||
- run:
|
||||
name: Prune packages
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN}" \
|
||||
scripts/release/anaconda-prune/run.sh
|
||||
pytorch_doc_push:
|
||||
resource_class: medium
|
||||
machine:
|
||||
@ -623,7 +652,7 @@ jobs:
|
||||
- run:
|
||||
name: Archive artifacts into zip
|
||||
command: |
|
||||
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
|
||||
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json
|
||||
cp artifacts.zip /Users/distiller/workspace
|
||||
|
||||
- persist_to_workspace:
|
||||
@ -1385,4 +1414,22 @@ workflows:
|
||||
requires:
|
||||
- pytorch_ios_full_jit_12_5_1_nightly_x86_64_build
|
||||
- pytorch_ios_full_jit_12_5_1_nightly_arm64_build
|
||||
- anaconda_prune:
|
||||
name: anaconda-prune-pytorch-nightly
|
||||
context: "org-member"
|
||||
packages: "pytorch torchvision torchaudio torchtext ignite torchcsprng"
|
||||
channel: pytorch-nightly
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- postnightly
|
||||
- anaconda_prune:
|
||||
name: anaconda-prune-pytorch-test
|
||||
context: "org-member"
|
||||
packages: "pytorch torchvision torchaudio torchtext ignite torchcsprng"
|
||||
channel: pytorch-test
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- postnightly
|
||||
when: << pipeline.parameters.run_build >>
|
||||
|
||||
@ -10,6 +10,8 @@ import shutil
|
||||
import sys
|
||||
from collections import namedtuple
|
||||
|
||||
import cimodel.data.simple.anaconda_prune_defintions
|
||||
|
||||
import cimodel.data.simple.docker_definitions
|
||||
import cimodel.data.simple.mobile_definitions
|
||||
import cimodel.data.simple.nightly_ios
|
||||
@ -142,6 +144,7 @@ def gen_build_workflows_tree():
|
||||
build_workflows_functions = [
|
||||
cimodel.data.simple.mobile_definitions.get_workflow_jobs,
|
||||
cimodel.data.simple.nightly_ios.get_workflow_jobs,
|
||||
cimodel.data.simple.anaconda_prune_defintions.get_workflow_jobs,
|
||||
]
|
||||
build_jobs = [f() for f in build_workflows_functions]
|
||||
build_jobs.extend(
|
||||
|
||||
@ -62,7 +62,7 @@ git --no-pager log --max-count 1
|
||||
popd
|
||||
|
||||
# Clone the Builder main repo
|
||||
retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
|
||||
retry git clone -q https://github.com/pytorch/builder.git -b release/2.1 "$BUILDER_ROOT"
|
||||
pushd "$BUILDER_ROOT"
|
||||
echo "Using builder from "
|
||||
git --no-pager log --max-count 1
|
||||
|
||||
@ -33,7 +33,7 @@ fi
|
||||
cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/
|
||||
# zip the library
|
||||
export DATE="$(date -u +%Y%m%d)"
|
||||
export IOS_NIGHTLY_BUILD_VERSION="2.2.0.${DATE}"
|
||||
export IOS_NIGHTLY_BUILD_VERSION="2.1.0.${DATE}"
|
||||
if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
|
||||
# libtorch_lite_ios_nightly_1.11.0.20210810.zip
|
||||
ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
|
||||
|
||||
@ -54,7 +54,7 @@ fi
|
||||
|
||||
|
||||
|
||||
# Move debug wheels out of the package dir so they don't get installed
|
||||
# Move debug wheels out of the the package dir so they don't get installed
|
||||
mkdir -p /tmp/debug_final_pkgs
|
||||
mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to move"
|
||||
|
||||
@ -66,12 +66,6 @@ mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to m
|
||||
# conda build scripts themselves. These should really be consolidated
|
||||
# Pick only one package of multiple available (which happens as result of workflow re-runs)
|
||||
pkg="/final_pkgs/\$(ls -1 /final_pkgs|sort|tail -1)"
|
||||
if [[ "\$PYTORCH_BUILD_VERSION" == *dev* ]]; then
|
||||
CHANNEL="nightly"
|
||||
else
|
||||
CHANNEL="test"
|
||||
fi
|
||||
|
||||
if [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
(
|
||||
# For some reason conda likes to re-activate the conda environment when attempting this install
|
||||
@ -89,14 +83,25 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
|
||||
retry conda install -c pytorch -y cpuonly
|
||||
else
|
||||
|
||||
cu_ver="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4}"
|
||||
CUDA_PACKAGE="pytorch-cuda"
|
||||
retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c "pytorch-\${CHANNEL}" "pytorch-cuda=\${cu_ver}"
|
||||
PYTORCH_CHANNEL="pytorch"
|
||||
if [[ "\${TORCH_CONDA_BUILD_FOLDER}" == "pytorch-nightly" ]]; then
|
||||
PYTORCH_CHANNEL="pytorch-nightly"
|
||||
fi
|
||||
retry conda install \${EXTRA_CONDA_FLAGS} -yq -c nvidia -c pytorch-test "pytorch-cuda=\${cu_ver}"
|
||||
fi
|
||||
conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
|
||||
)
|
||||
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
||||
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
|
||||
if [[ "$(uname -m)" == aarch64 ]]; then
|
||||
# Using "extra-index-url" until all needed aarch64 dependencies are
|
||||
# added to "https://download.pytorch.org/whl/nightly/"
|
||||
pip install "\$pkg" --extra-index-url "https://download.pytorch.org/whl/test/${DESIRED_CUDA}"
|
||||
else
|
||||
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/test/${DESIRED_CUDA}"
|
||||
fi
|
||||
retry pip install -q numpy protobuf typing-extensions
|
||||
fi
|
||||
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
|
||||
|
||||
@ -59,7 +59,7 @@ PIP_UPLOAD_FOLDER='nightly/'
|
||||
# We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
|
||||
export DATE="$(date -u +%Y%m%d)"
|
||||
#TODO: We should be pulling semver version from the base version.txt
|
||||
BASE_BUILD_VERSION="2.2.0.dev$DATE"
|
||||
BASE_BUILD_VERSION="2.1.0.dev$DATE"
|
||||
# Change BASE_BUILD_VERSION to git tag when on a git tag
|
||||
# Use 'git -C' to make doubly sure we're in the correct directory for checking
|
||||
# the git tag
|
||||
@ -77,8 +77,15 @@ else
|
||||
export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}+$DESIRED_CUDA"
|
||||
fi
|
||||
|
||||
# The build with with-pypi-cudnn suffix is only applicabe to
|
||||
# pypi small wheel Linux x86 build
|
||||
if [[ -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]] && [[ "$(uname)" == 'Linux' && "$(uname -m)" == "x86_64" ]]; then
|
||||
export PYTORCH_BUILD_VERSION="${PYTORCH_BUILD_VERSION}-with-pypi-cudnn"
|
||||
fi
|
||||
|
||||
export PYTORCH_BUILD_NUMBER=1
|
||||
|
||||
|
||||
JAVA_HOME=
|
||||
BUILD_JNI=OFF
|
||||
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
|
||||
@ -150,8 +157,8 @@ EOL
|
||||
|
||||
# nproc doesn't exist on darwin
|
||||
if [[ "$(uname)" != Darwin ]]; then
|
||||
# This was lowered from 18 to 12 to avoid OOMs when compiling FlashAttentionV2
|
||||
MEMORY_LIMIT_MAX_JOBS=12
|
||||
# Because most Circle executors only have 20 CPUs, using more causes OOMs w/ Ninja and nvcc parallelization
|
||||
MEMORY_LIMIT_MAX_JOBS=18
|
||||
NUM_CPUS=$(( $(nproc) - 2 ))
|
||||
|
||||
# Defaults here for **binary** linux builds so they can be changed in one place
|
||||
|
||||
@ -16,6 +16,11 @@ UPLOAD_BUCKET="s3://pytorch"
|
||||
BACKUP_BUCKET="s3://pytorch-backup"
|
||||
BUILD_NAME=${BUILD_NAME:-}
|
||||
|
||||
# this is temporary change to upload pypi-cudnn builds to separate folder
|
||||
if [[ ${BUILD_NAME} == *with-pypi-cudnn* ]]; then
|
||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_cudnn"
|
||||
fi
|
||||
|
||||
DRY_RUN=${DRY_RUN:-enabled}
|
||||
# Don't actually do work unless explicit
|
||||
ANACONDA="true anaconda"
|
||||
|
||||
@ -42,3 +42,32 @@ jobs:
|
||||
script="/Users/distiller/project/.circleci/scripts/binary_ios_upload.sh"
|
||||
cat "$script"
|
||||
source "$script"
|
||||
|
||||
anaconda_prune:
|
||||
parameters:
|
||||
packages:
|
||||
type: string
|
||||
description: "What packages are we pruning? (quoted, space-separated string. eg. 'pytorch', 'torchvision torchaudio', etc.)"
|
||||
default: "pytorch"
|
||||
channel:
|
||||
type: string
|
||||
description: "What channel are we pruning? (eq. pytorch-nightly)"
|
||||
default: "pytorch-nightly"
|
||||
docker:
|
||||
- image: continuumio/miniconda3
|
||||
environment:
|
||||
- PACKAGES: "<< parameters.packages >>"
|
||||
- CHANNEL: "<< parameters.channel >>"
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install dependencies
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
conda install -yq anaconda-client
|
||||
- run:
|
||||
name: Prune packages
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN}" \
|
||||
scripts/release/anaconda-prune/run.sh
|
||||
|
||||
@ -177,7 +177,7 @@
|
||||
- run:
|
||||
name: Archive artifacts into zip
|
||||
command: |
|
||||
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
|
||||
zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .pytorch-test-times.json .pytorch-test-file-ratings.json
|
||||
cp artifacts.zip /Users/distiller/workspace
|
||||
|
||||
- persist_to_workspace:
|
||||
|
||||
15
.clang-tidy
15
.clang-tidy
@ -1,8 +1,5 @@
|
||||
---
|
||||
# NOTE there must be no spaces before the '-', so put the comma last.
|
||||
# The check bugprone-unchecked-optional-access is also turned off atm
|
||||
# because it causes clang-tidy to hang randomly. The tracking issue
|
||||
# can be found at https://github.com/llvm/llvm-project/issues/69369.
|
||||
InheritParentConfig: true
|
||||
Checks: '
|
||||
bugprone-*,
|
||||
@ -12,7 +9,6 @@ bugprone-*,
|
||||
-bugprone-lambda-function-name,
|
||||
-bugprone-reserved-identifier,
|
||||
-bugprone-swapped-arguments,
|
||||
-bugprone-unchecked-optional-access,
|
||||
clang-diagnostic-missing-prototypes,
|
||||
cppcoreguidelines-*,
|
||||
-cppcoreguidelines-avoid-do-while,
|
||||
@ -34,13 +30,8 @@ cppcoreguidelines-*,
|
||||
-facebook-hte-RelativeInclude,
|
||||
hicpp-exception-baseclass,
|
||||
hicpp-avoid-goto,
|
||||
misc-*,
|
||||
-misc-const-correctness,
|
||||
-misc-use-anonymous-namespace,
|
||||
-misc-unused-parameters,
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-confusable-identifiers,
|
||||
misc-unused-alias-decls,
|
||||
misc-unused-using-decls,
|
||||
modernize-*,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-macro-to-enum,
|
||||
@ -53,7 +44,7 @@ modernize-*,
|
||||
performance-*,
|
||||
readability-container-size-empty,
|
||||
'
|
||||
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
||||
HeaderFilterRegex: '^(c10/(?!test)|torch/csrc/(?!deploy/interpreter/cpython)).*$'
|
||||
AnalyzeTemporaryDtors: false
|
||||
WarningsAsErrors: '*'
|
||||
...
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
# Step by step guide on using PyTorch's DevContainer
|
||||
|
||||
Using PyTorch's DevContainer environment involves a series of steps that will help you set up a development environment that is isolated and replicable. Below, we'll guide you through each step to make this process as smooth as possible:
|
||||
|
||||
## Step 1: Install VSCode
|
||||
|
||||
1. Navigate to the [Visual Studio Code website](https://code.visualstudio.com/).
|
||||
2. Download the appropriate installer for your operating system (Windows, Linux, or macOS).
|
||||
3. Run the installer and follow the on-screen instructions to install VSCode on your system.
|
||||
4. After installation, launch VSCode.
|
||||
|
||||
## Step 2: Install DevContainer Extension
|
||||
|
||||
1. In VSCode, go to the Extensions view by clicking on the Extensions icon in the Activity Bar on the side of the window.
|
||||
2. Search for "Dev Containers" in the Extensions view search bar.
|
||||
3. Find the "Dev Containers" extension in the search results and click on the install button to install it.
|
||||
|
||||
You can also go to the extension's [homepage](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) and [documentation page](https://code.visualstudio.com/docs/devcontainers/containers) to find more details.
|
||||
|
||||
## Step 3: Install Docker and Add Current Login User to Docker Group
|
||||
|
||||
1. Follow the [official guide](https://docs.docker.com/get-docker/) to install Docker. Don't forget the [post installation steps](https://docs.docker.com/engine/install/linux-postinstall/).
|
||||
|
||||
If you are using [Visual Studio Code Remote - SSH](https://code.visualstudio.com/docs/remote/ssh), then you only need to install Docker in the remote host, not your local computer. And the following steps should be run in the remote host.
|
||||
|
||||
## Step 4 (Optional): Install NVIDIA Container Toolkit for GPU Usage
|
||||
|
||||
1. If you intend to use GPU resources, first ensure you have NVIDIA drivers installed on your system. Check if `nvidia-smi` works to verify your GPU setup.
|
||||
2. Follow the [official guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#docker) to install the NVIDIA Container Toolkit.
|
||||
3. After installation, verify that the toolkit is installed correctly by running:
|
||||
```
|
||||
docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi
|
||||
```
|
||||
|
||||
## Step 5: Clone PyTorch
|
||||
|
||||
1. Open a terminal or command prompt.
|
||||
2. Use the following command to clone the PyTorch repository:
|
||||
```
|
||||
git clone https://github.com/pytorch/pytorch
|
||||
```
|
||||
3. Navigate to the cloned directory:
|
||||
```
|
||||
cd pytorch
|
||||
```
|
||||
|
||||
## Step 6: Open in DevContainer
|
||||
|
||||
1. In VSCode, use the Command Palette (`Ctrl+Shift+P` or `Cmd+Shift+P` on macOS) to run the "Remote-Containers: Open Folder in Container..." command.
|
||||
2. You will be prompted with two options: CPU dev container or CUDA dev container. Choose the one you want to run.
|
||||
|
||||
## Step 7: Wait for Building the Environment
|
||||
|
||||
1. After opening the folder in a DevContainer, VSCode will start building the container. This process can take some time as it involves downloading necessary images and setting up the environment.
|
||||
2. You can monitor the progress in the VSCode terminal.
|
||||
3. Once the build process completes, you'll have a fully configured PyTorch development environment in a container.
|
||||
4. The next time you open the same dev container, it will be much faster, as it does not require building the image again.
|
||||
|
||||
You are now all set to start developing with PyTorch in a DevContainer environment. This setup ensures you have a consistent and isolated development environment for your PyTorch projects.
|
||||
|
||||
## Step 8: Build PyTorch
|
||||
|
||||
To build pytorch from source, simply run:
|
||||
```
|
||||
python setup.py develop
|
||||
```
|
||||
|
||||
The process involves compiling thousands of files, and would take a long time. Fortunately, the compiled objects can be useful for your next build. When you modify some files, you only need to compile the changed files the next time.
|
||||
|
||||
Note that only contents in the `pytorch` directory are saved to disk. This directory is mounted to the docker image, while other contents in the docker image are all temporary, and will be lost if docker restarts the image or the server reboots.
|
||||
|
||||
For an in-depth understanding of Dev Container and its caveats, please refer to [the full documentation](https://code.visualstudio.com/docs/devcontainers/containers).
|
||||
@ -9,5 +9,3 @@ make setup_lint
|
||||
|
||||
# Add CMAKE_PREFIX_PATH to bashrc
|
||||
echo 'export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}' >> ~/.bashrc
|
||||
# Add linker path so that cuda-related libraries can be found
|
||||
echo 'export LDFLAGS="-L${CONDA_PREFIX}/lib/ $LDFLAGS"' >> ~/.bashrc
|
||||
|
||||
12
.flake8
12
.flake8
@ -2,7 +2,7 @@
|
||||
# NOTE: **Mirror any changes** to this file the [tool.ruff] config in pyproject.toml
|
||||
# before we can fully move to use ruff
|
||||
enable-extensions = G
|
||||
select = B,C,E,F,G,P,SIM1,T4,W,B9,TOR0,TOR1,TOR2
|
||||
select = B,C,E,F,G,P,SIM1,T4,W,B9
|
||||
max-line-length = 120
|
||||
# C408 ignored because we like the dict keyword argument syntax
|
||||
# E501 is not flexible enough, we're using B950 instead
|
||||
@ -14,21 +14,15 @@ ignore =
|
||||
# to line this up with executable bit
|
||||
EXE001,
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907
|
||||
B007,B008,B017,B019,B020,B023,B024,B026,B028,B903,B904,B905,B906,B907
|
||||
# these ignores are from flake8-comprehensions; please fix!
|
||||
C407,
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
G100,G101,G200
|
||||
G100,G101,G200,G201,G202
|
||||
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
||||
# flake8-simplify code styles
|
||||
SIM102,SIM103,SIM106,SIM112,
|
||||
# TorchFix codes that don't make sense for PyTorch itself:
|
||||
# removed and deprecated PyTorch functions.
|
||||
TOR001,TOR101,
|
||||
# TODO(kit1980): fix all TOR102 issues
|
||||
# `torch.load` without `weights_only` parameter is unsafe
|
||||
TOR102,
|
||||
per-file-ignores =
|
||||
__init__.py: F401
|
||||
torch/utils/cpp_extension.py: B950
|
||||
|
||||
4
.github/actionlint.yaml
vendored
4
.github/actionlint.yaml
vendored
@ -7,7 +7,7 @@ self-hosted-runner:
|
||||
- linux.4xlarge
|
||||
- linux.12xlarge
|
||||
- linux.24xlarge
|
||||
- linux.arm64.2xlarge
|
||||
- linux.t4g.2xlarge
|
||||
- linux.4xlarge.nvidia.gpu
|
||||
- linux.8xlarge.nvidia.gpu
|
||||
- linux.16xlarge.nvidia.gpu
|
||||
@ -23,5 +23,3 @@ self-hosted-runner:
|
||||
- macos-12-xl
|
||||
- macos-12
|
||||
- macos12.3-m1
|
||||
- macos-latest-xlarge
|
||||
- macos-13-xlarge
|
||||
|
||||
10
.github/actions/filter-test-configs/action.yml
vendored
10
.github/actions/filter-test-configs/action.yml
vendored
@ -13,10 +13,6 @@ inputs:
|
||||
required: true
|
||||
type: string
|
||||
description: JSON description of what test configs to run.
|
||||
job-name:
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
outputs:
|
||||
test-matrix:
|
||||
@ -46,8 +42,7 @@ runs:
|
||||
retry_wait_seconds: 30
|
||||
command: |
|
||||
set -eux
|
||||
# PyYAML 6.0 doesn't work with MacOS x86 anymore
|
||||
python3 -m pip install requests==2.26.0 pyyaml==6.0.1
|
||||
python3 -m pip install requests==2.26.0 pyyaml==6.0
|
||||
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
@ -61,7 +56,6 @@ runs:
|
||||
|
||||
- name: Get the job name
|
||||
id: get-job-name
|
||||
if: inputs.job-name == ''
|
||||
continue-on-error: true
|
||||
shell: bash
|
||||
run: |
|
||||
@ -97,7 +91,7 @@ runs:
|
||||
shell: bash
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ inputs.github-token }}
|
||||
JOB_NAME: ${{ inputs.job-name == '' && steps.get-job-name.outputs.job-name || inputs.job-name }}
|
||||
JOB_NAME: ${{ steps.get-job-name.outputs.job-name }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
|
||||
@ -11,20 +11,18 @@ outputs:
|
||||
job-id:
|
||||
description: The retrieved workflow job id
|
||||
value: ${{ steps.get-job-id.outputs.job-id }}
|
||||
job-name:
|
||||
description: The retrieved workflow job name
|
||||
value: ${{ steps.get-job-id.outputs.job-name }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Get job id and name or fail
|
||||
- name: Get jobid or fail
|
||||
# timeout-minutes is unsupported for composite workflows, see https://github.com/actions/runner/issues/1979
|
||||
# timeout-minutes: 10
|
||||
shell: bash
|
||||
id: get-job-id
|
||||
run: |
|
||||
set -eux
|
||||
python3 .github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}"
|
||||
GHA_WORKFLOW_JOB_ID=$(python3 .github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}")
|
||||
echo "job-id=${GHA_WORKFLOW_JOB_ID}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ inputs.github-token }}
|
||||
|
||||
11
.github/actions/pytest-cache-upload/action.yml
vendored
11
.github/actions/pytest-cache-upload/action.yml
vendored
@ -10,13 +10,6 @@ inputs:
|
||||
description: Shard number for the current job
|
||||
required: false
|
||||
default: "0"
|
||||
sha:
|
||||
description: SHA for the commit
|
||||
required: true
|
||||
test_config:
|
||||
description: Name of the test config
|
||||
required: false
|
||||
default: "default"
|
||||
job_identifier:
|
||||
description: Text that uniquely identifies a given job type within a workflow. All shards of a job should share the same job identifier.
|
||||
required: true
|
||||
@ -40,8 +33,6 @@ runs:
|
||||
env:
|
||||
CACHE_DIR: ${{ inputs.cache_dir }}
|
||||
JOB_IDENTIFIER: ${{ inputs.job_identifier }}
|
||||
SHA: ${{ inputs.sha }}
|
||||
TEST_CONFIG: ${{ inputs.test_config }}
|
||||
SHARD: ${{ inputs.shard }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
@ -50,8 +41,6 @@ runs:
|
||||
--cache_dir $GITHUB_WORKSPACE/$CACHE_DIR \
|
||||
--pr_identifier $GITHUB_REF \
|
||||
--job_identifier $JOB_IDENTIFIER \
|
||||
--sha $SHA \
|
||||
--test_config $TEST_CONFIG \
|
||||
--shard $SHARD \
|
||||
--repo $REPO \
|
||||
--temp_dir $RUNNER_TEMP \
|
||||
|
||||
12
.github/actions/upload-test-artifacts/action.yml
vendored
12
.github/actions/upload-test-artifacts/action.yml
vendored
@ -43,14 +43,14 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f logs-*.zip
|
||||
rm -f usage-log-*.zip
|
||||
# this workflow is also run in bazel build test, but we dont generate usage reports for it
|
||||
# so check to see if the file exists first
|
||||
if [ -f 'usage_log.txt' ]; then
|
||||
zip "logs-${FILE_SUFFIX}.zip" 'usage_log.txt'
|
||||
zip "usage-log-${FILE_SUFFIX}.zip" 'usage_log.txt'
|
||||
fi
|
||||
if ls test/**/*.log 1> /dev/null 2>&1; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test -i '*.log'
|
||||
zip -r "usage-log-${FILE_SUFFIX}.zip" test -i '*.log'
|
||||
fi
|
||||
|
||||
# Windows zip
|
||||
@ -80,7 +80,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "logs-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\*.log'
|
||||
7z a "usage-log-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\*.log'
|
||||
|
||||
# S3 upload
|
||||
- name: Store Test Downloaded JSONs on S3
|
||||
@ -112,7 +112,7 @@ runs:
|
||||
${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
|
||||
retention-days: 14
|
||||
if-no-files-found: ignore
|
||||
path: logs-*.zip
|
||||
path: usage-log-*.zip
|
||||
|
||||
# GHA upload
|
||||
- name: Store Test Downloaded JSONs on Github
|
||||
@ -146,7 +146,7 @@ runs:
|
||||
continue-on-error: true
|
||||
with:
|
||||
# Add the run attempt, see [Artifact run attempt]
|
||||
name: logs-runattempt${{ github.run_attempt }}-${{ inputs.file-suffix }}.zip
|
||||
name: usage-log-runattempt${{ github.run_attempt }}-${{ inputs.file-suffix }}.zip
|
||||
retention-days: 14
|
||||
if-no-files-found: ignore
|
||||
path: |
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
6518fa9b2c74e84d7eb1fc6e3eb51e43213f0c05
|
||||
a8f4e97bd5356a7a77510cdf6a3a62e25a5dc602
|
||||
2
.github/ci_commit_pins/fbgemm.txt
vendored
2
.github/ci_commit_pins/fbgemm.txt
vendored
@ -1 +1 @@
|
||||
de731af65b4f04696e85c729e3282450b51b95fd
|
||||
1b2746f642cc2c99fe9d1a0c34359c0de45341c2
|
||||
|
||||
1
.github/ci_commit_pins/numpy_pytorch_interop.txt
vendored
Normal file
1
.github/ci_commit_pins/numpy_pytorch_interop.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
0c4e82511d349358d2c8c492dd833334e742f27f
|
||||
1
.github/ci_commit_pins/timm.txt
vendored
Normal file
1
.github/ci_commit_pins/timm.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
b9d43c7dcac1fe05e851dd7be7187b108af593d2
|
||||
2
.github/ci_commit_pins/torchbench.txt
vendored
2
.github/ci_commit_pins/torchbench.txt
vendored
@ -1 +1 @@
|
||||
99944a2fb8624947f9c0e2edc898ff42a16124da
|
||||
9371b9e13c826f3930e54346b4d619cb59182f68
|
||||
|
||||
2
.github/ci_commit_pins/vision.txt
vendored
2
.github/ci_commit_pins/vision.txt
vendored
@ -1 +1 @@
|
||||
c1e2095c3a16fbe7db25b9e2f206025488c2c203
|
||||
47cd5ea8e21d7596a24907710411d6b4a43f628d
|
||||
|
||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
||||
r2.2
|
||||
r2.1
|
||||
|
||||
12
.github/labeler.yml
vendored
12
.github/labeler.yml
vendored
@ -15,7 +15,6 @@
|
||||
"ciflow/inductor":
|
||||
- torch/_decomp/**
|
||||
- torch/_dynamo/**
|
||||
- torch/_export/**
|
||||
- torch/_inductor/**
|
||||
- benchmarks/dynamo/**
|
||||
- torch/_subclasses/fake_tensor.py
|
||||
@ -29,10 +28,6 @@
|
||||
- .github/ci_commit_pins/**
|
||||
- c10/core/Sym*
|
||||
- torch/fx/experimental/symbolic_shapes.py
|
||||
- test/distributed/_tensor/test_dtensor_compile.py
|
||||
- test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
|
||||
- torch/distributed/_tensor/**
|
||||
- torch/distributed/fsdp/**
|
||||
|
||||
"module: cpu":
|
||||
- aten/src/ATen/cpu/**
|
||||
@ -71,10 +66,3 @@
|
||||
|
||||
"ciflow/trunk":
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
|
||||
"module: distributed":
|
||||
- torch/csrc/distributed/**
|
||||
- torch/distributed/**
|
||||
- torch/nn/parallel/**
|
||||
- test/distributed/**
|
||||
- torch/testing/_internal/distributed/**
|
||||
|
||||
42
.github/merge_rules.yaml
vendored
42
.github/merge_rules.yaml
vendored
@ -4,19 +4,16 @@
|
||||
- .ci/onnx/*
|
||||
- .ci/docker/common/install_onnx.sh
|
||||
- aten/src/ATen/core/interned_strings.h
|
||||
- benchmarks/dynamo/**
|
||||
- docs/source/onnx.rst
|
||||
- docs/source/onnx*
|
||||
- docs/source/scripts/onnx/**
|
||||
- docs/source/_static/img/onnx/**
|
||||
- scripts/onnx/**
|
||||
- test/onnx/**
|
||||
- test/onnx_caffe2/**
|
||||
- tools/onnx/**
|
||||
- torch/_dynamo/backends/onnxrt.py
|
||||
- torch/_C/__init__.pyi.in
|
||||
- torch/_C/_onnx.pyi
|
||||
- torch/_logging/**
|
||||
- torch/csrc/jit/passes/onnx.*
|
||||
- torch/csrc/jit/passes/onnx/**
|
||||
- torch/csrc/jit/serialization/export.*
|
||||
@ -26,6 +23,8 @@
|
||||
- torch/testing/_internal/common_methods_invocations.py
|
||||
- third_party/onnx
|
||||
- caffe2/python/onnx/**
|
||||
- benchmarks/dynamo/_onnx/**
|
||||
- torch/_logging/**
|
||||
approved_by:
|
||||
- BowenBao
|
||||
- abock
|
||||
@ -74,7 +73,6 @@
|
||||
|
||||
- name: OSS CI / pytorchbot
|
||||
patterns:
|
||||
- .github/ci_commit_pins/audio.txt
|
||||
- .github/ci_commit_pins/vision.txt
|
||||
- .github/ci_commit_pins/torchdynamo.txt
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
@ -85,19 +83,6 @@
|
||||
- EasyCLA
|
||||
- Lint
|
||||
- pull
|
||||
- inductor
|
||||
|
||||
- name: OSS CI /pytorchbot / Executorch
|
||||
patterns:
|
||||
- .ci/docker/ci_commit_pins/executorch.txt
|
||||
approved_by:
|
||||
- pytorchbot
|
||||
ignore_flaky_failures: false
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
- pull / linux-jammy-py3-clang12-executorch / build
|
||||
- pull / linux-jammy-py3-clang12-executorch / test (executorch, 1, 1, linux.2xlarge)
|
||||
|
||||
- name: OSS CI / pytorchbot / XLA
|
||||
patterns:
|
||||
@ -108,8 +93,8 @@
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
- pull / linux-focal-py3_8-clang9-xla / build
|
||||
- pull / linux-focal-py3_8-clang9-xla / test (xla, 1, 1, linux.12xlarge)
|
||||
- pull / linux-bionic-py3_8-clang8-xla / build
|
||||
- pull / linux-bionic-py3_8-clang8-xla / test (xla, 1, 1, linux.12xlarge)
|
||||
|
||||
- name: Documentation
|
||||
patterns:
|
||||
@ -139,6 +124,9 @@
|
||||
|
||||
- name: PrimTorch
|
||||
patterns:
|
||||
- aten/src/ATen/native_functions.yaml
|
||||
- aten/src/ATen/native/**
|
||||
- test/**
|
||||
- torch/_meta_registrations.py
|
||||
- torch/_decomp/**
|
||||
- torch/_refs/**
|
||||
@ -332,7 +320,6 @@
|
||||
- XiaobingSuper
|
||||
- jgong5
|
||||
- vfdev-5
|
||||
- leslie-fang-intel
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
@ -351,21 +338,6 @@
|
||||
- Lint
|
||||
- pull
|
||||
|
||||
- name: x86 CPU quantization
|
||||
patterns:
|
||||
- torch/ao/quantization/quantizer/x86_inductor_quantizer.py
|
||||
- torch/_inductor/fx_passes/quantization.py
|
||||
- test/quantization/core/test_quantized_op.py
|
||||
- test/inductor/test_mkldnn_pattern_matcher.py
|
||||
- test/quantization/pt2e/test_x86inductor_quantizer.py
|
||||
approved_by:
|
||||
- leslie-fang-intel
|
||||
- jgong5
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
- pull
|
||||
|
||||
- name: Autocast
|
||||
patterns:
|
||||
- torch/amp/**
|
||||
|
||||
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -10,7 +10,6 @@ ciflow_push_tags:
|
||||
- ciflow/mps
|
||||
- ciflow/nightly
|
||||
- ciflow/periodic
|
||||
- ciflow/rocm
|
||||
- ciflow/slow
|
||||
- ciflow/trunk
|
||||
- ciflow/unstable
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
blas=1.0
|
||||
cmake=3.22.1
|
||||
mkl=2022.1.0
|
||||
mkl-include=2022.1.0
|
||||
ninja=1.10.2
|
||||
numpy=1.23.3
|
||||
pyyaml=6.0
|
||||
2
.github/requirements/conda-env-macOS-ARM64
vendored
2
.github/requirements/conda-env-macOS-ARM64
vendored
@ -5,7 +5,7 @@ cmake=3.22.*
|
||||
typing-extensions=4.3.0
|
||||
dataclasses=0.8
|
||||
pip=22.2.2
|
||||
pillow=10.0.1
|
||||
pillow=9.2.0
|
||||
pkg-config=0.29.2
|
||||
wheel=0.37.1
|
||||
# NB: This is intentionally held back because anaconda main doesn't
|
||||
|
||||
2
.github/requirements/conda-env-macOS-X64
vendored
2
.github/requirements/conda-env-macOS-X64
vendored
@ -7,7 +7,7 @@ cmake=3.22.*
|
||||
typing-extensions=4.3.0
|
||||
dataclasses=0.8
|
||||
pip=22.2.2
|
||||
pillow=10.0.1
|
||||
pillow=9.2.0
|
||||
libuv=1.40.0
|
||||
pkg-config=0.29.2
|
||||
wheel=0.37.1
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
# iOS simulator requirements
|
||||
coremltools==5.0b5
|
||||
protobuf==3.20.2
|
||||
optree==0.9.1
|
||||
|
||||
@ -10,7 +10,6 @@ numba<=0.49.1; platform_machine != "arm64"
|
||||
opt-einsum>=3.3
|
||||
psutil==5.9.1
|
||||
nvidia-ml-py==11.525.84
|
||||
packaging==23.1
|
||||
pygments==2.15.0
|
||||
pytest==7.3.2
|
||||
pytest-xdist==3.3.1
|
||||
@ -27,4 +26,3 @@ pytest-cpp==2.3.0
|
||||
rockset==1.0.3
|
||||
z3-solver==4.12.2.0
|
||||
tensorboard==2.13.0
|
||||
optree==0.9.1
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
typing-extensions>=4.8.0
|
||||
typing-extensions
|
||||
jinja2
|
||||
|
||||
27
.github/scripts/build_triton_wheel.py
vendored
27
.github/scripts/build_triton_wheel.py
vendored
@ -67,12 +67,10 @@ def build_triton(
|
||||
max_jobs = os.cpu_count() or 1
|
||||
env["MAX_JOBS"] = str(max_jobs)
|
||||
|
||||
version_suffix = ""
|
||||
if not release:
|
||||
# Nightly binaries include the triton commit hash, i.e. 2.1.0+e6216047b8
|
||||
# while release build should only include the version, i.e. 2.1.0
|
||||
version_suffix = f"+{commit_hash[:10]}"
|
||||
version += version_suffix
|
||||
version = f"{version}+{commit_hash[:10]}"
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
triton_basedir = Path(tmpdir) / "triton"
|
||||
@ -84,14 +82,7 @@ def build_triton(
|
||||
triton_repo = "https://github.com/openai/triton"
|
||||
triton_pkg_name = "pytorch-triton"
|
||||
check_call(["git", "clone", triton_repo], cwd=tmpdir)
|
||||
if release:
|
||||
ver, rev, patch = version.split(".")
|
||||
check_call(
|
||||
["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir
|
||||
)
|
||||
else:
|
||||
check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
|
||||
|
||||
check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
|
||||
if build_conda:
|
||||
with open(triton_basedir / "meta.yaml", "w") as meta:
|
||||
print(
|
||||
@ -141,21 +132,17 @@ def build_triton(
|
||||
shutil.copy(conda_path, Path.cwd())
|
||||
return Path.cwd() / conda_path.name
|
||||
|
||||
# change built wheel name and version
|
||||
env["TRITON_WHEEL_NAME"] = triton_pkg_name
|
||||
env["TRITON_WHEEL_VERSION_SUFFIX"] = version_suffix
|
||||
patch_setup_py(
|
||||
triton_pythondir / "setup.py",
|
||||
name=triton_pkg_name,
|
||||
version=f"{version}",
|
||||
)
|
||||
patch_init_py(
|
||||
triton_pythondir / "triton" / "__init__.py",
|
||||
version=f"{version}",
|
||||
)
|
||||
|
||||
if build_rocm:
|
||||
# TODO: Remove me when ROCM triton is updated
|
||||
patch_setup_py(
|
||||
triton_pythondir / "setup.py",
|
||||
name=triton_pkg_name,
|
||||
version=f"{version}",
|
||||
)
|
||||
check_call("scripts/amd/setup_rocm_libs.sh", cwd=triton_basedir, shell=True)
|
||||
print("ROCm libraries setup for triton installation...")
|
||||
|
||||
|
||||
3
.github/scripts/check_labels.py
vendored
3
.github/scripts/check_labels.py
vendored
@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check whether a PR has required labels."""
|
||||
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from github_utils import gh_delete_comment, gh_post_pr_comment
|
||||
@ -47,7 +46,7 @@ def main() -> None:
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
sys.exit(0)
|
||||
exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
BIN
.github/scripts/drci_mocks.json.gz
vendored
BIN
.github/scripts/drci_mocks.json.gz
vendored
Binary file not shown.
18
.github/scripts/filter_test_configs.py
vendored
18
.github/scripts/filter_test_configs.py
vendored
@ -62,9 +62,10 @@ SUPPORTED_PERIODICAL_MODES: Dict[str, Callable[[Optional[str]], bool]] = {
|
||||
}
|
||||
|
||||
# The link to the published list of disabled jobs
|
||||
DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=jbbJUxI_SSZFssBBGCU6ybH9sxHitHLY"
|
||||
# Pinning Disabled and Unstable job to Oct 4, 2023.
|
||||
DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=EniFrNbB6taGjwKyN94j4oqUeeN8ALfI"
|
||||
# and unstable jobs
|
||||
UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=hUtTalgnWb1m3AtJyVLUdu7DBrnddRkp"
|
||||
UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=2voGK5DSv0Hzvxhc23ChGcOLEBIO2vHf"
|
||||
|
||||
# Some constants used to handle disabled and unstable jobs
|
||||
JOB_NAME_SEP = "/"
|
||||
@ -410,17 +411,16 @@ def process_jobs(
|
||||
if target_job in (TEST_JOB_NAME, BUILD_AND_TEST_JOB_NAME):
|
||||
target_cfg = m.group("cfg")
|
||||
|
||||
# NB: There can be multiple unstable configurations, i.e. inductor, inductor_huggingface
|
||||
test_matrix = _filter_jobs(
|
||||
return _filter_jobs(
|
||||
test_matrix=test_matrix,
|
||||
issue_type=issue_type,
|
||||
target_cfg=target_cfg,
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Found a matching {issue_type.value} issue {target_url} for {workflow} / {job_name}, "
|
||||
+ f"but the name {target_job_cfg} is invalid"
|
||||
)
|
||||
|
||||
warnings.warn(
|
||||
f"Found a matching {issue_type.value} issue {target_url} for {workflow} / {job_name}, "
|
||||
+ f"but the name {target_job_cfg} is invalid"
|
||||
)
|
||||
|
||||
# Found no matching target, return the same input test matrix
|
||||
return test_matrix
|
||||
|
||||
258
.github/scripts/generate_binary_build_matrix.py
vendored
258
.github/scripts/generate_binary_build_matrix.py
vendored
@ -10,19 +10,13 @@ architectures:
|
||||
* Latest ROCM
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
CUDA_ARCHES = ["11.8", "12.1"]
|
||||
|
||||
|
||||
CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1"}
|
||||
|
||||
|
||||
CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8"}
|
||||
|
||||
|
||||
ROCM_ARCHES = ["5.6", "5.7"]
|
||||
ROCM_ARCHES = ["5.5", "5.6"]
|
||||
|
||||
|
||||
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
|
||||
@ -30,79 +24,20 @@ CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
|
||||
|
||||
CPU_AARCH64_ARCH = ["cpu-aarch64"]
|
||||
|
||||
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"11.8": (
|
||||
"nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
|
||||
"nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu11==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
),
|
||||
"12.1": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
|
||||
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_nccl_submodule_version() -> str:
|
||||
from pathlib import Path
|
||||
|
||||
nccl_version_mk = (
|
||||
Path(__file__).absolute().parent.parent.parent
|
||||
/ "third_party"
|
||||
/ "nccl"
|
||||
/ "nccl"
|
||||
/ "makefiles"
|
||||
/ "version.mk"
|
||||
)
|
||||
if not nccl_version_mk.exists():
|
||||
raise RuntimeError(
|
||||
"Please make sure that nccl submodule is checked out when importing this script"
|
||||
)
|
||||
with nccl_version_mk.open("r") as f:
|
||||
content = f.read()
|
||||
d = {}
|
||||
for l in content.split("\n"):
|
||||
if not l.startswith("NCCL_"):
|
||||
continue
|
||||
(k, v) = l.split(":=")
|
||||
d[k.strip()] = v.strip()
|
||||
return f"{d['NCCL_MAJOR']}.{d['NCCL_MINOR']}.{d['NCCL_PATCH']}"
|
||||
|
||||
|
||||
def get_nccl_wheel_version(arch_version: str) -> str:
|
||||
import re
|
||||
|
||||
requirements = map(
|
||||
str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
|
||||
)
|
||||
return [x for x in requirements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]
|
||||
|
||||
|
||||
def validate_nccl_dep_consistency(arch_version: str) -> None:
|
||||
wheel_ver = get_nccl_wheel_version(arch_version)
|
||||
submodule_ver = get_nccl_submodule_version()
|
||||
if wheel_ver != submodule_ver:
|
||||
raise RuntimeError(
|
||||
f"NCCL submodule version {submodule_ver} differs from wheel version {wheel_ver}"
|
||||
)
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = (
|
||||
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
|
||||
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.18.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"triton==2.1.0; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
)
|
||||
|
||||
|
||||
def arch_type(arch_version: str) -> str:
|
||||
@ -118,29 +53,20 @@ def arch_type(arch_version: str) -> str:
|
||||
return "cpu"
|
||||
|
||||
|
||||
# This can be updated to the release version when cutting release branch, i.e. 2.1
|
||||
DEFAULT_TAG = os.getenv("RELEASE_VERSION_TAG", "main")
|
||||
|
||||
WHEEL_CONTAINER_IMAGES = {
|
||||
**{
|
||||
gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
**{
|
||||
gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
"cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
|
||||
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
|
||||
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
||||
"11.8": "pytorch/manylinux-builder:cuda11.8-2.1",
|
||||
"12.1": "pytorch/manylinux-builder:cuda12.1-2.1",
|
||||
"5.5": "pytorch/manylinux-builder:rocm5.5-2.1",
|
||||
"5.6": "pytorch/manylinux-builder:rocm5.6-2.1",
|
||||
"cpu": "pytorch/manylinux-builder:cpu-2.1",
|
||||
"cpu-cxx11-abi": "pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-2.1",
|
||||
"cpu-aarch64": "pytorch/manylinuxaarch64-builder:cpu-aarch64-2.1",
|
||||
}
|
||||
|
||||
CONDA_CONTAINER_IMAGES = {
|
||||
**{
|
||||
gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
"cpu": f"pytorch/conda-builder:cpu-{DEFAULT_TAG}",
|
||||
"11.8": "pytorch/conda-builder:cuda11.8-2.1",
|
||||
"12.1": "pytorch/conda-builder:cuda12.1-2.1",
|
||||
"cpu": "pytorch/conda-builder:cpu-2.1",
|
||||
}
|
||||
|
||||
PRE_CXX11_ABI = "pre-cxx11"
|
||||
@ -149,39 +75,49 @@ RELEASE = "release"
|
||||
DEBUG = "debug"
|
||||
|
||||
LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
PRE_CXX11_ABI,
|
||||
): f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
PRE_CXX11_ABI,
|
||||
): f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
**{
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
("cpu", PRE_CXX11_ABI): f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
|
||||
("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
|
||||
(
|
||||
"11.8",
|
||||
PRE_CXX11_ABI,
|
||||
): "pytorch/manylinux-builder:cuda11.8-2.1",
|
||||
(
|
||||
"12.1",
|
||||
PRE_CXX11_ABI,
|
||||
): "pytorch/manylinux-builder:cuda12.1-2.1",
|
||||
(
|
||||
"11.8",
|
||||
CXX11_ABI,
|
||||
): "pytorch/libtorch-cxx11-builder:cuda11.8-2.1",
|
||||
(
|
||||
"12.1",
|
||||
CXX11_ABI,
|
||||
): "pytorch/libtorch-cxx11-builder:cuda12.1-2.1",
|
||||
(
|
||||
"5.5",
|
||||
PRE_CXX11_ABI,
|
||||
): "pytorch/manylinux-builder:rocm5.5-2.1",
|
||||
(
|
||||
"5.6",
|
||||
PRE_CXX11_ABI,
|
||||
): "pytorch/manylinux-builder:rocm5.6-2.1",
|
||||
(
|
||||
"5.5",
|
||||
CXX11_ABI,
|
||||
): "pytorch/libtorch-cxx11-builder:rocm5.5-2.1",
|
||||
(
|
||||
"5.6",
|
||||
CXX11_ABI,
|
||||
): "pytorch/libtorch-cxx11-builder:rocm5.6-2.1",
|
||||
(
|
||||
"cpu",
|
||||
PRE_CXX11_ABI,
|
||||
): "pytorch/manylinux-builder:cpu-2.1",
|
||||
(
|
||||
"cpu",
|
||||
CXX11_ABI,
|
||||
): "pytorch/libtorch-cxx11-builder:cpu-2.1",
|
||||
}
|
||||
|
||||
FULL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
FULL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"]
|
||||
|
||||
|
||||
def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
|
||||
@ -288,6 +224,7 @@ def generate_wheels_matrix(
|
||||
os: str,
|
||||
arches: Optional[List[str]] = None,
|
||||
python_versions: Optional[List[str]] = None,
|
||||
gen_special_an_non_special_wheel: bool = True,
|
||||
) -> List[Dict[str, str]]:
|
||||
package_type = "wheel"
|
||||
if os == "linux" or os == "linux-aarch64":
|
||||
@ -321,8 +258,9 @@ def generate_wheels_matrix(
|
||||
else arch_version
|
||||
)
|
||||
|
||||
# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||
if arch_version in ["12.1", "11.8"] and os == "linux":
|
||||
# special 12.1 wheels package without dependencies
|
||||
# dependency downloaded via pip install
|
||||
if arch_version == "12.1" and os == "linux":
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
@ -334,36 +272,34 @@ def generate_wheels_matrix(
|
||||
"devtoolset": "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version], # fmt: skip
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace( # noqa: B950
|
||||
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS,
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-with-pypi-cudnn".replace( # noqa: B950
|
||||
".", "_"
|
||||
),
|
||||
}
|
||||
)
|
||||
else:
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
"gpu_arch_type": gpu_arch_type,
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"devtoolset": "cxx11-abi"
|
||||
if arch_version == "cpu-cxx11-abi"
|
||||
else "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
|
||||
".", "_"
|
||||
),
|
||||
"pytorch_extra_install_requirements":
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"] # fmt: skip
|
||||
if os != "linux" else "",
|
||||
}
|
||||
)
|
||||
if not gen_special_an_non_special_wheel:
|
||||
continue
|
||||
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
"gpu_arch_type": gpu_arch_type,
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"devtoolset": "cxx11-abi"
|
||||
if arch_version == "cpu-cxx11-abi"
|
||||
else "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
|
||||
".", "_"
|
||||
),
|
||||
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS
|
||||
if os != "linux"
|
||||
else "",
|
||||
}
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
validate_nccl_dep_consistency("12.1")
|
||||
validate_nccl_dep_consistency("11.8")
|
||||
|
||||
42
.github/scripts/generate_ci_workflows.py
vendored
42
.github/scripts/generate_ci_workflows.py
vendored
@ -60,7 +60,7 @@ class BinaryBuildWorkflow:
|
||||
branches: str = "nightly"
|
||||
# Mainly for macos
|
||||
cross_compile_arm64: bool = False
|
||||
macos_runner: str = "macos-12-xl"
|
||||
xcode_version: str = ""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.abi_version:
|
||||
@ -125,9 +125,7 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
OperatingSystem.LINUX, generate_binary_build_matrix.CXX11_ABI
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
@ -139,9 +137,7 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.PRE_CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.PRE_CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
OperatingSystem.LINUX, generate_binary_build_matrix.PRE_CXX11_ABI
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
@ -158,6 +154,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
OperatingSystem.LINUX,
|
||||
arches=["11.8", "12.1"],
|
||||
python_versions=["3.8"],
|
||||
gen_special_an_non_special_wheel=False,
|
||||
),
|
||||
branches="main",
|
||||
),
|
||||
@ -215,9 +212,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
OperatingSystem.WINDOWS, generate_binary_build_matrix.RELEASE
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
@ -229,9 +224,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
OperatingSystem.WINDOWS, generate_binary_build_matrix.DEBUG
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
@ -301,39 +294,20 @@ MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.MACOS,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
OperatingSystem.MACOS, generate_binary_build_matrix.CXX11_ABI
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.MACOS_ARM64,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.MACOS,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
macos_runner="macos-13-xlarge",
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.MACOS_ARM64,
|
||||
package_type="wheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.MACOS_ARM64
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
macos_runner="macos-13-xlarge",
|
||||
cross_compile_arm64=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
|
||||
isolated_workflow=True,
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Generates a matrix for docker releases through github actions
|
||||
|
||||
Will output a condensed version of the matrix. Will include fllowing:
|
||||
* CUDA version short
|
||||
* CUDA full verison
|
||||
* CUDNN version short
|
||||
* Image type either runtime or devel
|
||||
* Platform linux/arm64,linux/amd64
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, List
|
||||
|
||||
import generate_binary_build_matrix
|
||||
|
||||
DOCKER_IMAGE_TYPES = ["runtime", "devel"]
|
||||
|
||||
|
||||
def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
|
||||
ret: List[Dict[str, str]] = []
|
||||
for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
|
||||
for image in DOCKER_IMAGE_TYPES:
|
||||
ret.append(
|
||||
{
|
||||
"cuda": cuda,
|
||||
"cuda_full_version": version,
|
||||
"cudnn_version": generate_binary_build_matrix.CUDA_ARCHES_CUDNN_VERSION[
|
||||
cuda
|
||||
],
|
||||
"image_type": image,
|
||||
"platform": "linux/arm64,linux/amd64",
|
||||
}
|
||||
)
|
||||
return {"include": ret}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
build_matrix = generate_docker_matrix()
|
||||
print(json.dumps(build_matrix))
|
||||
19
.github/scripts/get_workflow_job_id.py
vendored
19
.github/scripts/get_workflow_job_id.py
vendored
@ -111,7 +111,7 @@ def fetch_jobs(url: str, headers: Dict[str, str]) -> List[Dict[str, str]]:
|
||||
# running.
|
||||
|
||||
|
||||
def find_job_id_name(args: Any) -> Tuple[str, str]:
|
||||
def find_job_id(args: Any) -> str:
|
||||
# From https://docs.github.com/en/actions/learn-github-actions/environment-variables
|
||||
PYTORCH_REPO = os.environ.get("GITHUB_REPOSITORY", "pytorch/pytorch")
|
||||
PYTORCH_GITHUB_API = f"https://api.github.com/repos/{PYTORCH_REPO}"
|
||||
@ -130,28 +130,15 @@ def find_job_id_name(args: Any) -> Tuple[str, str]:
|
||||
|
||||
for job in jobs:
|
||||
if job["runner_name"] == args.runner_name:
|
||||
return (job["id"], job["name"])
|
||||
return job["id"]
|
||||
|
||||
raise RuntimeError(f"Can't find job id for runner {args.runner_name}")
|
||||
|
||||
|
||||
def set_output(name: str, val: Any) -> None:
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print(f"{name}={val}", file=env)
|
||||
print(f"setting {name}={val}")
|
||||
else:
|
||||
print(f"::set-output name={name}::{val}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
try:
|
||||
# Get both the job ID and job name because we have already spent a request
|
||||
# here to get the job info
|
||||
job_id, job_name = find_job_id_name(args)
|
||||
set_output("job-id", job_id)
|
||||
set_output("job-name", job_name)
|
||||
print(find_job_id(args))
|
||||
except Exception as e:
|
||||
print(repr(e), file=sys.stderr)
|
||||
print(f"workflow-{args.workflow_run_id}")
|
||||
|
||||
40
.github/scripts/github_utils.py
vendored
40
.github/scripts/github_utils.py
vendored
@ -5,15 +5,12 @@ import os
|
||||
import warnings
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Tuple
|
||||
from urllib.error import HTTPError
|
||||
from urllib.parse import quote
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
||||
GITHUB_API_URL = "https://api.github.com"
|
||||
|
||||
|
||||
@dataclass
|
||||
class GitHubComment:
|
||||
body_text: str
|
||||
@ -29,20 +26,16 @@ def gh_fetch_url_and_headers(
|
||||
url: str,
|
||||
*,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
data: Union[Optional[Dict[str, Any]], str] = None,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
method: Optional[str] = None,
|
||||
reader: Callable[[Any], Any] = lambda x: x.read(),
|
||||
) -> Tuple[Any, Any]:
|
||||
if headers is None:
|
||||
headers = {}
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
if token is not None and url.startswith(f"{GITHUB_API_URL}/"):
|
||||
if token is not None and url.startswith("https://api.github.com/"):
|
||||
headers["Authorization"] = f"token {token}"
|
||||
|
||||
data_ = None
|
||||
if data is not None:
|
||||
data_ = data.encode() if isinstance(data, str) else json.dumps(data).encode()
|
||||
|
||||
data_ = json.dumps(data).encode() if data is not None else None
|
||||
try:
|
||||
with urlopen(Request(url, headers=headers, data=data_, method=method)) as conn:
|
||||
return conn.headers, reader(conn)
|
||||
@ -64,7 +57,7 @@ def gh_fetch_url(
|
||||
url: str,
|
||||
*,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
data: Union[Optional[Dict[str, Any]], str] = None,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
method: Optional[str] = None,
|
||||
reader: Callable[[Any], Any] = lambda x: x.read(),
|
||||
) -> Any:
|
||||
@ -132,7 +125,7 @@ def gh_post_pr_comment(
|
||||
org: str, repo: str, pr_num: int, comment: str, dry_run: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
return _gh_post_comment(
|
||||
f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/{pr_num}/comments",
|
||||
f"https://api.github.com/repos/{org}/{repo}/issues/{pr_num}/comments",
|
||||
comment,
|
||||
dry_run,
|
||||
)
|
||||
@ -142,14 +135,14 @@ def gh_post_commit_comment(
|
||||
org: str, repo: str, sha: str, comment: str, dry_run: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
return _gh_post_comment(
|
||||
f"{GITHUB_API_URL}/repos/{org}/{repo}/commits/{sha}/comments",
|
||||
f"https://api.github.com/repos/{org}/{repo}/commits/{sha}/comments",
|
||||
comment,
|
||||
dry_run,
|
||||
)
|
||||
|
||||
|
||||
def gh_delete_comment(org: str, repo: str, comment_id: int) -> None:
|
||||
url = f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/comments/{comment_id}"
|
||||
url = f"https://api.github.com/repos/{org}/{repo}/issues/comments/{comment_id}"
|
||||
gh_fetch_url(url, method="DELETE")
|
||||
|
||||
|
||||
@ -160,7 +153,7 @@ def gh_fetch_merge_base(org: str, repo: str, base: str, head: str) -> str:
|
||||
# https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#compare-two-commits
|
||||
try:
|
||||
json_data = gh_fetch_url(
|
||||
f"{GITHUB_API_URL}/repos/{org}/{repo}/compare/{base}...{head}",
|
||||
f"https://api.github.com/repos/{org}/{repo}/compare/{base}...{head}",
|
||||
headers={"Accept": "application/vnd.github.v3+json"},
|
||||
reader=json.load,
|
||||
)
|
||||
@ -174,18 +167,3 @@ def gh_fetch_merge_base(org: str, repo: str, base: str, head: str) -> str:
|
||||
warnings.warn(f"Failed to get merge base for {base}...{head}: {error}")
|
||||
|
||||
return merge_base
|
||||
|
||||
|
||||
def gh_update_pr_state(org: str, repo: str, pr_num: int, state: str = "open") -> None:
|
||||
url = f"{GITHUB_API_URL}/repos/{org}/{repo}/pulls/{pr_num}"
|
||||
try:
|
||||
gh_fetch_url(url, method="PATCH", data={"state": state})
|
||||
except HTTPError as err:
|
||||
# When trying to open the pull request, error 422 means that the branch
|
||||
# has been deleted and the API couldn't re-open it
|
||||
if err.code == 422 and state == "open":
|
||||
warnings.warn(
|
||||
f"Failed to open {pr_num} because its head branch has been deleted: {err}"
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
105513
.github/scripts/gql_mocks.json
generated
vendored
Normal file
105513
.github/scripts/gql_mocks.json
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
.github/scripts/gql_mocks.json.gz
vendored
BIN
.github/scripts/gql_mocks.json.gz
vendored
Binary file not shown.
8
.github/scripts/pytest_cache.py
vendored
8
.github/scripts/pytest_cache.py
vendored
@ -38,12 +38,6 @@ def parse_args() -> argparse.Namespace:
|
||||
required=True,
|
||||
help="A unique job identifier that should be the same for all runs of job",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sha", required="--upload" in sys.argv, help="SHA of the commit"
|
||||
) # Only required for upload
|
||||
parser.add_argument(
|
||||
"--test_config", required="--upload" in sys.argv, help="The test config"
|
||||
) # Only required for upload
|
||||
parser.add_argument(
|
||||
"--shard", required="--upload" in sys.argv, help="The shard id"
|
||||
) # Only required for upload
|
||||
@ -90,8 +84,6 @@ def main() -> None:
|
||||
pr_identifier=pr_identifier,
|
||||
repo=repo,
|
||||
job_identifier=args.job_identifier,
|
||||
sha=args.sha,
|
||||
test_config=args.test_config,
|
||||
shard=args.shard,
|
||||
cache_dir=cache_dir,
|
||||
bucket=args.bucket,
|
||||
|
||||
56
.github/scripts/pytest_caching_utils.py
vendored
56
.github/scripts/pytest_caching_utils.py
vendored
@ -56,8 +56,6 @@ def upload_pytest_cache(
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
sha: str,
|
||||
test_config: str,
|
||||
shard: str,
|
||||
cache_dir: Path,
|
||||
temp_dir: Path,
|
||||
@ -81,11 +79,25 @@ def upload_pytest_cache(
|
||||
if not bucket:
|
||||
bucket = BUCKET
|
||||
|
||||
# Upload the cache
|
||||
obj_key_prefix = _get_s3_key_prefix(
|
||||
pr_identifier, repo, job_identifier, sha, test_config, shard
|
||||
# Merge the current cache with any caches from previous runs before uploading
|
||||
# We only need to merge it with the cache for the same shard (which will have already been downloaded if it exists)
|
||||
# since the other shards will handle themselves
|
||||
shard_cache_path = _get_temp_cache_dir_path(
|
||||
temp_dir, pr_identifier, repo, job_identifier, shard
|
||||
)
|
||||
zip_file_path = zip_folder(cache_dir, temp_dir / ZIP_UPLOAD / obj_key_prefix)
|
||||
|
||||
if shard_cache_path.is_dir():
|
||||
_merge_pytest_caches(shard_cache_path, cache_dir)
|
||||
|
||||
#
|
||||
# Upload the cache
|
||||
#
|
||||
|
||||
obj_key_prefix = _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
|
||||
# This doesn't include the zip file extension. That'll get added later
|
||||
zip_file_path = temp_dir / ZIP_UPLOAD / obj_key_prefix
|
||||
|
||||
zip_file_path = zip_folder(cache_dir, zip_file_path)
|
||||
obj_key = f"{obj_key_prefix}{os.path.splitext(zip_file_path)[1]}" # Keep the new file extension
|
||||
upload_file_to_s3(zip_file_path, bucket, obj_key)
|
||||
|
||||
@ -124,22 +136,38 @@ def download_pytest_cache(
|
||||
)
|
||||
|
||||
for downloaded_zip in downloads:
|
||||
# Unzip into random folder, then merge with the current cache
|
||||
cache_dir_for_shard = (
|
||||
temp_dir / UNZIPPED_CACHES / os.urandom(16).hex() / PYTEST_CACHE_DIR_NAME
|
||||
# the file name of the zip is the shard id
|
||||
shard = os.path.splitext(os.path.basename(downloaded_zip))[0]
|
||||
cache_dir_for_shard = _get_temp_cache_dir_path(
|
||||
temp_dir, pr_identifier, repo, job_identifier, shard
|
||||
)
|
||||
|
||||
unzip_folder(downloaded_zip, cache_dir_for_shard)
|
||||
print(f"Merging cache from {downloaded_zip}")
|
||||
print(
|
||||
f"Merging cache for job_identifier `{job_identifier}`, shard `{shard}` into `{dest_cache_dir}`"
|
||||
)
|
||||
_merge_pytest_caches(cache_dir_for_shard, dest_cache_dir)
|
||||
|
||||
|
||||
def _get_temp_cache_dir_path(
|
||||
temp_dir: Path,
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
shard: str,
|
||||
) -> Path:
|
||||
return (
|
||||
temp_dir
|
||||
/ UNZIPPED_CACHES
|
||||
/ _get_s3_key_prefix(pr_identifier, repo, job_identifier, shard)
|
||||
/ PYTEST_CACHE_DIR_NAME
|
||||
)
|
||||
|
||||
|
||||
def _get_s3_key_prefix(
|
||||
pr_identifier: PRIdentifier,
|
||||
repo: GithubRepo,
|
||||
job_identifier: str,
|
||||
sha: str = "",
|
||||
test_config: str = "",
|
||||
shard: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
@ -148,10 +176,6 @@ def _get_s3_key_prefix(
|
||||
"""
|
||||
prefix = f"{PYTEST_CACHE_KEY_PREFIX}/{repo.owner}/{repo.name}/{pr_identifier}/{sanitize_for_s3(job_identifier)}"
|
||||
|
||||
if sha:
|
||||
prefix += f"/{sha}"
|
||||
if test_config:
|
||||
prefix += f"/{sanitize_for_s3(test_config)}"
|
||||
if shard:
|
||||
prefix += f"/{shard}"
|
||||
|
||||
|
||||
47298
.github/scripts/rockset_mocks.json
vendored
Normal file
47298
.github/scripts/rockset_mocks.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
.github/scripts/rockset_mocks.json.gz
vendored
BIN
.github/scripts/rockset_mocks.json.gz
vendored
Binary file not shown.
64
.github/scripts/tag_docker_images_for_release.py
vendored
64
.github/scripts/tag_docker_images_for_release.py
vendored
@ -1,64 +0,0 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
from typing import Dict
|
||||
|
||||
import generate_binary_build_matrix
|
||||
|
||||
|
||||
def tag_image(
|
||||
image: str,
|
||||
default_tag: str,
|
||||
release_version: str,
|
||||
dry_run: str,
|
||||
tagged_images: Dict[str, bool],
|
||||
) -> None:
|
||||
if image in tagged_images:
|
||||
return
|
||||
release_image = image.replace(f"-{default_tag}", f"-{release_version}")
|
||||
print(f"Tagging {image} to {release_image} , dry_run: {dry_run}")
|
||||
|
||||
if dry_run == "disabled":
|
||||
subprocess.check_call(["docker", "pull", image])
|
||||
subprocess.check_call(["docker", "tag", image, release_image])
|
||||
subprocess.check_call(["docker", "push", release_image])
|
||||
tagged_images[image] = True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
help="Version to tag",
|
||||
type=str,
|
||||
default="2.2",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
help="No Runtime Error check",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
tagged_images: Dict[str, bool] = dict()
|
||||
platform_images = [
|
||||
generate_binary_build_matrix.WHEEL_CONTAINER_IMAGES,
|
||||
generate_binary_build_matrix.LIBTORCH_CONTAINER_IMAGES,
|
||||
generate_binary_build_matrix.CONDA_CONTAINER_IMAGES,
|
||||
]
|
||||
default_tag = generate_binary_build_matrix.DEFAULT_TAG
|
||||
|
||||
for platform_image in platform_images: # type: ignore[attr-defined]
|
||||
for arch in platform_image.keys(): # type: ignore[attr-defined]
|
||||
tag_image(
|
||||
platform_image[arch], # type: ignore[index]
|
||||
default_tag,
|
||||
options.version,
|
||||
options.dry_run,
|
||||
tagged_images,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
57
.github/scripts/test_filter_test_configs.py
vendored
57
.github/scripts/test_filter_test_configs.py
vendored
@ -102,30 +102,6 @@ MOCKED_DISABLED_UNSTABLE_JOBS = {
|
||||
"manywheel-py3_8-cuda11_8-build",
|
||||
"",
|
||||
],
|
||||
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor)": [
|
||||
"pytorchbot",
|
||||
"107079",
|
||||
"https://github.com/pytorch/pytorch/issues/107079",
|
||||
"inductor",
|
||||
"cuda12.1-py3.10-gcc9-sm86",
|
||||
"test (inductor)",
|
||||
],
|
||||
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface)": [
|
||||
"pytorchbot",
|
||||
"109153",
|
||||
"https://github.com/pytorch/pytorch/issues/109153",
|
||||
"inductor",
|
||||
"cuda12.1-py3.10-gcc9-sm86",
|
||||
"test (inductor_huggingface)",
|
||||
],
|
||||
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface_dynamic)": [
|
||||
"pytorchbot",
|
||||
"109154",
|
||||
"https://github.com/pytorch/pytorch/issues/109154",
|
||||
"inductor",
|
||||
"cuda12.1-py3.10-gcc9-sm86",
|
||||
"test (inductor_huggingface_dynamic)",
|
||||
],
|
||||
}
|
||||
|
||||
MOCKED_PR_INFO = {
|
||||
@ -593,37 +569,6 @@ class TestConfigFilter(TestCase):
|
||||
"expected": '{"include": [{"config": "default", "unstable": "unstable"}]}',
|
||||
"description": "Both binary build and test jobs are unstable",
|
||||
},
|
||||
{
|
||||
"workflow": "inductor",
|
||||
"job_name": "cuda12.1-py3.10-gcc9-sm86 / build",
|
||||
"test_matrix": """
|
||||
{ include: [
|
||||
{ config: "inductor" },
|
||||
{ config: "inductor_huggingface", shard: 1 },
|
||||
{ config: "inductor_huggingface", shard: 2 },
|
||||
{ config: "inductor_timm", shard: 1 },
|
||||
{ config: "inductor_timm", shard: 2 },
|
||||
{ config: "inductor_torchbench" },
|
||||
{ config: "inductor_huggingface_dynamic" },
|
||||
{ config: "inductor_torchbench_dynamic" },
|
||||
{ config: "inductor_distributed" },
|
||||
]}
|
||||
""",
|
||||
"expected": """
|
||||
{ "include": [
|
||||
{ "config": "inductor", "unstable": "unstable" },
|
||||
{ "config": "inductor_huggingface", "shard": 1, "unstable": "unstable" },
|
||||
{ "config": "inductor_huggingface", "shard": 2, "unstable": "unstable" },
|
||||
{ "config": "inductor_timm", "shard": 1 },
|
||||
{ "config": "inductor_timm", "shard": 2 },
|
||||
{ "config": "inductor_torchbench" },
|
||||
{ "config": "inductor_huggingface_dynamic", "unstable": "unstable" },
|
||||
{ "config": "inductor_torchbench_dynamic" },
|
||||
{ "config": "inductor_distributed" }
|
||||
]}
|
||||
""",
|
||||
"description": "Marking multiple unstable configurations",
|
||||
},
|
||||
]
|
||||
|
||||
for case in testcases:
|
||||
@ -632,7 +577,7 @@ class TestConfigFilter(TestCase):
|
||||
test_matrix = yaml.safe_load(case["test_matrix"])
|
||||
|
||||
filtered_test_matrix = mark_unstable_jobs(workflow, job_name, test_matrix)
|
||||
self.assertEqual(json.loads(case["expected"]), filtered_test_matrix)
|
||||
self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
|
||||
|
||||
@mock.patch("subprocess.check_output")
|
||||
def test_perform_misc_tasks(self, mocked_subprocess: Any) -> None:
|
||||
|
||||
543
.github/scripts/test_trymerge.py
vendored
543
.github/scripts/test_trymerge.py
vendored
@ -7,12 +7,11 @@
|
||||
# GraphQL queries in trymerge.py, please make sure to delete `gql_mocks.json`
|
||||
# And re-run the test locally with ones PAT
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from hashlib import sha256
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, cast, Dict, List, Optional
|
||||
from unittest import main, mock, skip, TestCase
|
||||
from urllib.error import HTTPError
|
||||
|
||||
@ -20,20 +19,18 @@ from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
|
||||
|
||||
from trymerge import (
|
||||
categorize_checks,
|
||||
DRCI_CHECKRUN_NAME,
|
||||
find_matching_merge_rule,
|
||||
FlakyRule,
|
||||
get_classifications,
|
||||
get_drci_classifications,
|
||||
get_rockset_results,
|
||||
gh_get_team_members,
|
||||
gh_graphql,
|
||||
GitHubPR,
|
||||
JobCheckState,
|
||||
is_broken_trunk,
|
||||
main as trymerge_main,
|
||||
MandatoryChecksMissingError,
|
||||
MergeRule,
|
||||
PostCommentError,
|
||||
RE_GHSTACK_DESC,
|
||||
read_merge_rules,
|
||||
remove_job_name_suffix,
|
||||
validate_revert,
|
||||
@ -42,10 +39,6 @@ from trymerge import (
|
||||
if "GIT_REMOTE_URL" not in os.environ:
|
||||
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
|
||||
|
||||
GQL_MOCKS = "gql_mocks.json.gz"
|
||||
ROCKSET_MOCKS = "rockset_mocks.json.gz"
|
||||
DRCI_MOCKS = "drci_mocks.json.gz"
|
||||
|
||||
|
||||
def mock_query(
|
||||
fallback_function: Any,
|
||||
@ -58,11 +51,11 @@ def mock_query(
|
||||
def get_mocked_queries() -> Any:
|
||||
if not os.path.exists(gql_db_fname):
|
||||
return {}
|
||||
with gzip.open(gql_db_fname, encoding="utf-8", mode="rt") as f:
|
||||
with open(gql_db_fname, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
def save_mocked_queries(obj: Any) -> None:
|
||||
with gzip.open(gql_db_fname, encoding="utf-8", mode="wt") as f:
|
||||
with open(gql_db_fname, encoding="utf-8", mode="w") as f:
|
||||
json.dump(obj, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
@ -75,20 +68,19 @@ def mock_query(
|
||||
try:
|
||||
rc = fallback_function(*args)
|
||||
except HTTPError as err:
|
||||
if err.code == 401 or err.code == 403:
|
||||
if err.code == 401:
|
||||
err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
|
||||
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
|
||||
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
|
||||
err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
|
||||
err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
|
||||
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with "
|
||||
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN environment variable"
|
||||
err_msg += (
|
||||
" the rockset api key passed via ROCKSET_API_KEY environment variable"
|
||||
)
|
||||
if (
|
||||
os.getenv("GITHUB_TOKEN") is None
|
||||
or os.getenv("ROCKSET_API_KEY") is None
|
||||
or os.getenv("DRCI_BOT_KEY") is None
|
||||
):
|
||||
err_msg = (
|
||||
"Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
|
||||
+ "is not defined. "
|
||||
"Failed to update cached GraphQL queries as GITHUB_TOKEN or ROCKSET_API_KEY is not defined."
|
||||
+ err_msg
|
||||
)
|
||||
raise RuntimeError(err_msg) from err
|
||||
@ -108,29 +100,19 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
|
||||
def gh_graphql_wrapper(query: str, kwargs: Any) -> Any:
|
||||
return gh_graphql(query, **kwargs)
|
||||
|
||||
return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
|
||||
return mock_query(gh_graphql_wrapper, "gql_mocks.json", key_function, query, kwargs)
|
||||
|
||||
|
||||
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
|
||||
return mock_query(
|
||||
get_rockset_results,
|
||||
ROCKSET_MOCKS,
|
||||
"rockset_mocks.json",
|
||||
lambda x, y: f"{x} {y}",
|
||||
head_sha,
|
||||
merge_base,
|
||||
)
|
||||
|
||||
|
||||
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
|
||||
return mock_query(
|
||||
get_drci_classifications,
|
||||
DRCI_MOCKS,
|
||||
lambda x, y: f"{x} {y}",
|
||||
pr_num,
|
||||
project,
|
||||
)
|
||||
|
||||
|
||||
def mock_parse_args(revert: bool = False, force: bool = False) -> Any:
|
||||
class Object:
|
||||
def __init__(self) -> None:
|
||||
@ -207,18 +189,6 @@ def mocked_read_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule
|
||||
],
|
||||
ignore_flaky_failures=True,
|
||||
),
|
||||
MergeRule(
|
||||
name="xla",
|
||||
patterns=[".github/ci_commit_pins/xla.txt"],
|
||||
approved_by=["pytorchbot"],
|
||||
mandatory_checks_name=[
|
||||
"Lint",
|
||||
"EasyCLA",
|
||||
"pull / linux-focal-py3_8-clang9-xla / build",
|
||||
"pull / linux-focal-py3_8-clang9-xla / test (xla, 1, 1, linux.12xlarge)",
|
||||
],
|
||||
ignore_flaky_failures=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@ -226,6 +196,16 @@ def mocked_read_merge_rules_raise(repo: Any, org: str, project: str) -> List[Mer
|
||||
raise RuntimeError("testing")
|
||||
|
||||
|
||||
def empty_flaky_rules() -> List[FlakyRule]:
|
||||
return []
|
||||
|
||||
|
||||
def xla_is_flaky_rules() -> List[FlakyRule]:
|
||||
return [
|
||||
FlakyRule("xla", ["FAILED: Build did NOT complete successfully"]),
|
||||
]
|
||||
|
||||
|
||||
def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
|
||||
return [
|
||||
MergeRule(
|
||||
@ -237,7 +217,6 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
|
||||
"EasyCLA",
|
||||
"pull / linux-bionic-py3_8-clang8-xla / build",
|
||||
"pull / linux-bionic-py3_8-clang8-xla / test (xla, 1, 1, linux.4xlarge)",
|
||||
"inductor / cuda11.8-py3.10-gcc7-sm86 / test (inductor_torchbench_dynamic, 1, 1, linux.g5.4xlarge.nvidia.gpu)",
|
||||
],
|
||||
ignore_flaky_failures=False,
|
||||
),
|
||||
@ -259,11 +238,9 @@ class DummyGitRepo(GitRepo):
|
||||
return "super awsome commit message"
|
||||
|
||||
|
||||
@mock.patch("trymerge.read_flaky_rules", side_effect=empty_flaky_rules)
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch(
|
||||
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
|
||||
)
|
||||
class TestTryMerge(TestCase):
|
||||
def test_merge_rules_valid(self, *args: Any) -> None:
|
||||
"Test that merge_rules.yaml can be parsed"
|
||||
@ -274,7 +251,7 @@ class TestTryMerge(TestCase):
|
||||
@mock.patch("trymerge.read_merge_rules", side_effect=mocked_read_merge_rules)
|
||||
def test_match_rules(self, *args: Any) -> None:
|
||||
"Tests that PR passes merge rules"
|
||||
pr = GitHubPR("pytorch", "pytorch", 109999)
|
||||
pr = GitHubPR("pytorch", "pytorch", 77700)
|
||||
repo = DummyGitRepo()
|
||||
self.assertTrue(find_matching_merge_rule(pr, repo) is not None)
|
||||
|
||||
@ -327,9 +304,14 @@ class TestTryMerge(TestCase):
|
||||
|
||||
def test_internal_changes(self, *args: Any) -> None:
|
||||
"Tests that PR with internal changes is detected"
|
||||
pr = GitHubPR("pytorch", "pytorch", 110140)
|
||||
pr = GitHubPR("pytorch", "pytorch", 73969)
|
||||
self.assertTrue(pr.has_internal_changes())
|
||||
|
||||
def test_checksuites_pagination(self, *args: Any) -> None:
|
||||
"Tests that PR with lots of checksuits can be fetched"
|
||||
pr = GitHubPR("pytorch", "pytorch", 73811)
|
||||
self.assertEqual(len(pr.get_checkrun_conclusions()), 76)
|
||||
|
||||
def test_comments_pagination(self, *args: Any) -> None:
|
||||
"Tests that PR with 50+ comments can be fetched"
|
||||
pr = GitHubPR("pytorch", "pytorch", 31093)
|
||||
@ -341,9 +323,7 @@ class TestTryMerge(TestCase):
|
||||
# see https://gist.github.com/malfet/9b93bc7eeddeaf1d84546efc4f0c577f
|
||||
pr = GitHubPR("pytorch", "pytorch", 68111)
|
||||
self.assertGreater(len(pr.get_comments()), 20)
|
||||
# NS(09/27/2023): GitHub seems to recycle older checkruns
|
||||
# https://github.com/pytorch/pytorch/pull/68111/checks shows 0 runs
|
||||
# self.assertGreater(len(pr.get_checkrun_conclusions()), 3)
|
||||
self.assertGreater(len(pr.get_checkrun_conclusions()), 3)
|
||||
self.assertGreater(pr.get_commit_count(), 60)
|
||||
|
||||
def test_gql_retrieve_checksuites(self, *args: Any) -> None:
|
||||
@ -388,16 +368,14 @@ class TestTryMerge(TestCase):
|
||||
|
||||
def test_get_checkruns_many_runs(self, *args: Any) -> None:
|
||||
"""Tests that all checkruns can be fetched"""
|
||||
pr = GitHubPR("pytorch", "pytorch", 105260)
|
||||
pr = GitHubPR("pytorch", "pytorch", 77700)
|
||||
conclusions = pr.get_checkrun_conclusions()
|
||||
self.assertEqual(len(conclusions), 221)
|
||||
self.assertTrue(
|
||||
"pull / linux-docs / build-docs-cpp-false" in conclusions.keys()
|
||||
)
|
||||
self.assertEqual(len(conclusions), 79)
|
||||
self.assertTrue("pull / linux-docs / build-docs (cpp)" in conclusions.keys())
|
||||
|
||||
def test_cancelled_gets_ignored(self, *args: Any) -> None:
|
||||
"""Tests that cancelled workflow does not override existing successfull status"""
|
||||
pr = GitHubPR("pytorch", "pytorch", 110367)
|
||||
pr = GitHubPR("pytorch", "pytorch", 82169)
|
||||
conclusions = pr.get_checkrun_conclusions()
|
||||
lint_checks = [name for name in conclusions.keys() if "Lint" in name]
|
||||
self.assertTrue(len(lint_checks) > 0)
|
||||
@ -545,7 +523,108 @@ class TestTryMerge(TestCase):
|
||||
for case in test_cases:
|
||||
self.assertEqual(case["expected"], remove_job_name_suffix(case["name"]))
|
||||
|
||||
def test_get_merge_base(self, *args: Any) -> None:
|
||||
def test_is_broken_trunk(self, *args: Any) -> None:
|
||||
test_cases: List[Dict[str, Any]] = [
|
||||
{
|
||||
"head_job": None,
|
||||
"base_jobs": {
|
||||
"job_a": {
|
||||
"conclusion": "success",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"job_b": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
},
|
||||
"expected": False,
|
||||
"description": "Invalid input - head job",
|
||||
},
|
||||
{
|
||||
"head_job": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"base_jobs": None,
|
||||
"expected": False,
|
||||
"description": "Invalid input - base jobs",
|
||||
},
|
||||
{
|
||||
"head_job": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"base_jobs": {},
|
||||
"expected": False,
|
||||
"description": "Invalid input - empty base jobs",
|
||||
},
|
||||
{
|
||||
"head_job": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["x", "y"],
|
||||
},
|
||||
"base_jobs": {
|
||||
"job_a": {
|
||||
"conclusion": "success",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"job_b": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["x", "y"],
|
||||
},
|
||||
},
|
||||
"expected": True,
|
||||
"description": "Found a match",
|
||||
},
|
||||
{
|
||||
"head_job": {
|
||||
"conclusion": "success",
|
||||
"failure_captures": ["x", "y"],
|
||||
},
|
||||
"base_jobs": {
|
||||
"job_a": {
|
||||
"conclusion": "success",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"job_b": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["x", "y"],
|
||||
},
|
||||
},
|
||||
"expected": False,
|
||||
"description": "Not found - different conclusion",
|
||||
},
|
||||
{
|
||||
"head_job": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"base_jobs": {
|
||||
"job_a": {
|
||||
"conclusion": "success",
|
||||
"failure_captures": ["a", "b"],
|
||||
},
|
||||
"job_b": {
|
||||
"conclusion": "failure",
|
||||
"failure_captures": ["x", "y"],
|
||||
},
|
||||
},
|
||||
"expected": False,
|
||||
"description": "Not found - different captured failures",
|
||||
},
|
||||
]
|
||||
|
||||
for case in test_cases:
|
||||
self.assertEqual(
|
||||
case["expected"], is_broken_trunk(case["head_job"], case["base_jobs"])
|
||||
)
|
||||
|
||||
def test_get_merge_base(
|
||||
self,
|
||||
mock_gh_graphql: Any,
|
||||
mock_get_rockset_results: Any,
|
||||
mock_read_flaky_rules: Any,
|
||||
) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 104121)
|
||||
|
||||
mock_merge_base = "mocked-sha"
|
||||
@ -563,130 +642,57 @@ class TestTryMerge(TestCase):
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch(
|
||||
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
|
||||
)
|
||||
class TestBypassFailures(TestCase):
|
||||
def test_get_classifications(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 109584)
|
||||
flaky_rules = [
|
||||
# Try a regex rule
|
||||
FlakyRule("distributed", ["##\\[error\\]The operation [wW]as .+"])
|
||||
]
|
||||
pr = GitHubPR("pytorch", "pytorch", 92863)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
checks, pr.last_commit()["oid"], pr.get_merge_base(), flaky_rules, []
|
||||
)
|
||||
self.assertTrue(
|
||||
checks[
|
||||
"pull / linux-focal-py3.11-clang10 / test (dynamo, 1, 2, linux.2xlarge)"
|
||||
"pull / linux-bionic-py3_7-clang8-xla / test (xla, 1, 1, linux.4xlarge)"
|
||||
].classification
|
||||
== "BROKEN_TRUNK"
|
||||
)
|
||||
self.assertTrue(
|
||||
checks[
|
||||
"trunk / win-vs2019-cpu-py3 / test (default, 2, 3, windows.4xlarge.nonephemeral)"
|
||||
"pull / linux-focal-py3.7-gcc7 / test (distributed, 1, 2, linux.2xlarge)"
|
||||
].classification
|
||||
== "FLAKY"
|
||||
)
|
||||
self.assertTrue(
|
||||
checks[
|
||||
"pull / linux-jammy-py3.8-gcc11 / test (distributed, 1, 2, linux.2xlarge)"
|
||||
].classification
|
||||
== "FLAKY"
|
||||
)
|
||||
self.assertTrue(
|
||||
checks[
|
||||
"pull / linux-focal-cuda11.8-py3.10-gcc9 / test (distributed, 1, 3, linux.8xlarge.nvidia.gpu)"
|
||||
].classification
|
||||
== "FLAKY"
|
||||
)
|
||||
|
||||
# Set the threshold larger or equal to the number of ok failures
|
||||
pending, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=6
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=2
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
# Not set any threshold, defaults to -1 to ignore all flaky and broken trunk failures
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
# Set the threshold lower than the number of ok failures
|
||||
pending, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=1
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 6)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
|
||||
# Set the threshold to 0 like when ignore_flaky_failures is on
|
||||
pending, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=1
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 6)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
|
||||
def test_get_classifications_flaky_fullname(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 110362)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
|
||||
def test_get_classifications_invalid_cancel(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 110367)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 0)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 0)
|
||||
self.assertTrue(len(ignorable["UNSTABLE"]) == 3)
|
||||
|
||||
def test_get_classifications_similar_failures(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 109750)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(failed) == 2)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
def test_get_classifications_unstable(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 104312)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
checks, pr.last_commit()["oid"], pr.get_merge_base(), [], []
|
||||
)
|
||||
workflow_name = "linux-bionic-cuda12.1-py3.10-gcc9-bazel-test"
|
||||
job_name = "build-and-test (default, 1, 1, linux.4xlarge.nvidia.gpu, unstable)"
|
||||
@ -700,6 +706,19 @@ class TestBypassFailures(TestCase):
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["UNSTABLE"]) == 1)
|
||||
|
||||
def test_get_classifications_pending_unstable(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 105998)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
checks, pr.last_commit()["oid"], pr.get_merge_base(), [], []
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=1
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 3)
|
||||
self.assertTrue(len(ignorable["UNSTABLE"]) == 3)
|
||||
|
||||
def test_get_classifications_broken_trunk(self, *args: Any) -> None:
|
||||
# The mock merge base is the actual value returned by gh_fetch_merge_base
|
||||
test_cases = [
|
||||
@ -707,13 +726,13 @@ class TestBypassFailures(TestCase):
|
||||
# This PR had one broken trunk failure but it was run on a different shard
|
||||
# than the one on the base commit. This should still count as broken trunk
|
||||
"pr_num": 104214,
|
||||
"related_failure_count": 0,
|
||||
"mock_merge_base": "436d035dc74db9c703297a62163b0cad0c546665",
|
||||
"unrelated_failure_count": 1,
|
||||
},
|
||||
{
|
||||
# This PR had one broken trunk failure and it used ghstack
|
||||
"pr_num": 105145,
|
||||
"related_failure_count": 0,
|
||||
"mock_merge_base": "194fe1d12f9860734cc28ed21bdabda2fbb06336",
|
||||
"unrelated_failure_count": 1,
|
||||
},
|
||||
{
|
||||
@ -722,81 +741,112 @@ class TestBypassFailures(TestCase):
|
||||
# keep the failure record from the merge base so that it can
|
||||
# be used to detect broken trunk
|
||||
"pr_num": 107160,
|
||||
"related_failure_count": 0,
|
||||
"mock_merge_base": "a5d841ef01e615e2a654fb12cf0cd08697d12ccf",
|
||||
"unrelated_failure_count": 4,
|
||||
},
|
||||
{
|
||||
# This PR used Dr.CI broken trunk classification
|
||||
"pr_num": 111253,
|
||||
"related_failure_count": 1,
|
||||
"unrelated_failure_count": 2,
|
||||
},
|
||||
]
|
||||
|
||||
for case in test_cases:
|
||||
pr_num = case["pr_num"]
|
||||
related_failure_count = case["related_failure_count"]
|
||||
mock_merge_base = case["mock_merge_base"]
|
||||
unrelated_failure_count = case["unrelated_failure_count"]
|
||||
|
||||
pr = GitHubPR("pytorch", "pytorch", pr_num)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pr = GitHubPR("pytorch", "pytorch", cast(int, pr_num))
|
||||
with mock.patch(
|
||||
"trymerge.gh_fetch_merge_base", return_value=mock_merge_base
|
||||
) as mocked_gh_fetch_merge_base:
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
checks, pr.last_commit()["oid"], pr.get_merge_base(), [], []
|
||||
)
|
||||
|
||||
pending, failed, _ = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == related_failure_count)
|
||||
pending, failed, _ = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
|
||||
# When the ok_failed_checks_threshold is set to 0, the broken trunk failure
|
||||
# won't be ignored
|
||||
pending, failed, _ = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=0
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(
|
||||
len(failed) == unrelated_failure_count + related_failure_count
|
||||
)
|
||||
# When the ok_failed_checks_threshold is set to 0, the broken trunk failure
|
||||
# won't be ignored
|
||||
pending, failed, _ = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=0
|
||||
)
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == unrelated_failure_count)
|
||||
|
||||
def test_ignore_current(self, *args: Any) -> None:
|
||||
# Test various interactions of the failure classifier to ensure that ignore
|
||||
# current checks takes place after other classifications: flaky, unstable,
|
||||
# or broken trunk. Only actual new failures should be kept in the list of
|
||||
# ignore current checks to use to record force merge with actual failures
|
||||
flaky = "pull / linux-focal-cuda11.8-py3.10-gcc9 / test (distributed, 1, 3, linux.8xlarge.nvidia.gpu)"
|
||||
flaky_rules = [
|
||||
FlakyRule("distributed", ["##\\[error\\]The operation was canceled."])
|
||||
]
|
||||
flaky = (
|
||||
"pull / linux-focal-py3.7-gcc7 / test (distributed, 1, 2, linux.2xlarge)"
|
||||
)
|
||||
broken_trunk = (
|
||||
"pull / linux-focal-py3.11-clang10 / test (dynamo, 1, 2, linux.2xlarge)"
|
||||
"pull / linux-bionic-py3_7-clang8-xla / test (xla, 1, 1, linux.4xlarge)"
|
||||
)
|
||||
|
||||
pr = GitHubPR("pytorch", "pytorch", 109584)
|
||||
pr = GitHubPR("pytorch", "pytorch", 92863)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
|
||||
# No broken trunk or flaky rules, then all failures are ignored when ic is used
|
||||
checks = get_classifications(
|
||||
checks, pr.last_commit()["oid"], None, [], [broken_trunk, flaky]
|
||||
)
|
||||
self.assertTrue(checks[flaky].classification == "IGNORE_CURRENT_CHECK")
|
||||
self.assertTrue(checks[broken_trunk].classification == "IGNORE_CURRENT_CHECK")
|
||||
_, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=2
|
||||
)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 2)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 0)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 0)
|
||||
|
||||
# Known flaky failure takes precedence over ignore current (need to set the
|
||||
# merge base here to get the results from Rockset, and that categorize the
|
||||
# broken trunk failure too
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
pr.get_merge_base(),
|
||||
flaky_rules,
|
||||
[broken_trunk, flaky],
|
||||
)
|
||||
self.assertTrue(checks[flaky].classification == "FLAKY")
|
||||
self.assertTrue(checks[broken_trunk].classification == "BROKEN_TRUNK")
|
||||
_, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
_, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=2
|
||||
)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 4)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
# Broken trunk takes precedence over ignore current (no flaky rule is set here)
|
||||
checks = get_classifications(
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
pr.get_merge_base(),
|
||||
[],
|
||||
[broken_trunk, flaky],
|
||||
)
|
||||
self.assertTrue(checks[flaky].classification == "IGNORE_CURRENT_CHECK")
|
||||
self.assertTrue(checks[broken_trunk].classification == "BROKEN_TRUNK")
|
||||
_, failed, ignorable = categorize_checks(
|
||||
checks, list(checks.keys()), ok_failed_checks_threshold=2
|
||||
)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["IGNORE_CURRENT_CHECK"]) == 1)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 0)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
@mock.patch("trymerge.read_flaky_rules", side_effect=xla_is_flaky_rules)
|
||||
@mock.patch("trymerge.read_merge_rules", side_effect=xla_merge_rules)
|
||||
def test_dont_ignore_flaky_failures(self, *args: Any) -> None:
|
||||
"""
|
||||
Regression test for https://github.com/pytorch/test-infra/issues/4126
|
||||
"""
|
||||
pr = GitHubPR("pytorch", "pytorch", 105312)
|
||||
"""Regression test for https://github.com/pytorch/test-infra/issues/4126"""
|
||||
pr = GitHubPR("pytorch", "pytorch", 100369)
|
||||
repo = DummyGitRepo()
|
||||
# Check that failure is classified as flaky but still raises exception
|
||||
with warnings.catch_warnings(record=True) as w, self.assertRaises(RuntimeError):
|
||||
@ -811,97 +861,14 @@ class TestBypassFailures(TestCase):
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch("trymerge.get_drci_classifications", return_value={})
|
||||
class TestBypassFailuresOnSandCastle(TestCase):
|
||||
def test_get_classifications(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 111467)
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 0)
|
||||
self.assertTrue(len(ignorable["FLAKY"]) == 1)
|
||||
self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 1)
|
||||
|
||||
def test_get_classifications_drci_checkrun_not_found(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 111467)
|
||||
|
||||
# No summary
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks[DRCI_CHECKRUN_NAME] = JobCheckState(
|
||||
DRCI_CHECKRUN_NAME,
|
||||
"",
|
||||
"NEUTRAL",
|
||||
None,
|
||||
1,
|
||||
"",
|
||||
None,
|
||||
)
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 2)
|
||||
|
||||
# Empty summary
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks[DRCI_CHECKRUN_NAME] = JobCheckState(
|
||||
DRCI_CHECKRUN_NAME,
|
||||
"",
|
||||
"NEUTRAL",
|
||||
None,
|
||||
1,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 2)
|
||||
|
||||
# No Dr.CI checkrun
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
del checks[DRCI_CHECKRUN_NAME]
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
[],
|
||||
)
|
||||
pending, failed, ignorable = categorize_checks(checks, list(checks.keys()))
|
||||
self.assertTrue(len(pending) == 0)
|
||||
self.assertTrue(len(failed) == 2)
|
||||
|
||||
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch(
|
||||
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
|
||||
)
|
||||
class TestGitHubPRGhstackDependencies(TestCase):
|
||||
class TestGitHubPRGhstackDependencies2(TestCase):
|
||||
def test_pr_dependencies(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 106068)
|
||||
msg = pr.gen_commit_message(filter_ghstack=True)
|
||||
self.assertEqual(
|
||||
msg,
|
||||
f"{pr.get_title()} (#106068)\n\n{RE_GHSTACK_DESC.sub('', pr.get_body())}\n"
|
||||
"Pull Request resolved: https://github.com/pytorch/pytorch/pull/106068\n"
|
||||
"Approved by: https://github.com/ezyang, https://github.com/fegin\n",
|
||||
assert msg == (
|
||||
"[FSDP] Break up `_post_backward_hook` into smaller funcs (#106068)\n\n\nDifferential Revision: ["
|
||||
"D47852461](https://our.internmc.facebook.com/intern/diff/D47852461)\nPull Request resolved: "
|
||||
"https://github.com/pytorch/pytorch/pull/106068\nApproved by: \n"
|
||||
)
|
||||
|
||||
def test_pr_dependencies_ghstack(self, *args: Any) -> None:
|
||||
@ -909,13 +876,13 @@ class TestGitHubPRGhstackDependencies(TestCase):
|
||||
pr1 = GitHubPR("pytorch", "pytorch", 106033)
|
||||
pr2 = GitHubPR("pytorch", "pytorch", 106034)
|
||||
pr = GitHubPR("pytorch", "pytorch", 106068)
|
||||
|
||||
msg = pr.gen_commit_message(filter_ghstack=True, ghstack_deps=[pr0, pr1, pr2])
|
||||
self.assertEqual(
|
||||
msg,
|
||||
f"{pr.get_title()} (#106068)\n\n{RE_GHSTACK_DESC.sub('', pr.get_body())}\n"
|
||||
"Pull Request resolved: https://github.com/pytorch/pytorch/pull/106068\n"
|
||||
"Approved by: https://github.com/ezyang, https://github.com/fegin\n"
|
||||
"ghstack dependencies: #106032, #106033, #106034\n",
|
||||
assert msg == (
|
||||
"[FSDP] Break up `_post_backward_hook` into smaller funcs (#106068)\n\n\nDifferential Revision: ["
|
||||
"D47852461](https://our.internmc.facebook.com/intern/diff/D47852461)\nPull Request resolved: "
|
||||
"https://github.com/pytorch/pytorch/pull/106068\nApproved by: \n"
|
||||
"ghstack dependencies: #106032, #106033, #106034\n"
|
||||
)
|
||||
|
||||
@skip(
|
||||
@ -964,7 +931,7 @@ class TestGitHubPRGhstackDependencies(TestCase):
|
||||
mock_repo.cherry_pick.assert_any_call("rev2")
|
||||
mock_repo.cherry_pick.assert_any_call("rev123")
|
||||
|
||||
self.assertTrue(mock.call("rev1") not in mock_repo.cherry_pick.call_args_list)
|
||||
assert mock.call("rev1") not in mock_repo.cherry_pick.call_args_list
|
||||
|
||||
# Verify the first call
|
||||
message = mock_repo.amend_commit_message.call_args_list[0].args[0]
|
||||
@ -977,8 +944,8 @@ class TestGitHubPRGhstackDependencies(TestCase):
|
||||
"dependencies: #106032, #106033\n"
|
||||
)
|
||||
|
||||
self.assertTrue(message.startswith(prefix))
|
||||
self.assertTrue(message.endswith(suffix))
|
||||
assert message.startswith(prefix)
|
||||
assert message.endswith(suffix)
|
||||
|
||||
# Verify the second call
|
||||
mock_repo.amend_commit_message.assert_any_call(
|
||||
|
||||
249
.github/scripts/trymerge.py
vendored
249
.github/scripts/trymerge.py
vendored
@ -30,7 +30,6 @@ from github_utils import (
|
||||
gh_fetch_url,
|
||||
gh_post_commit_comment,
|
||||
gh_post_pr_comment,
|
||||
gh_update_pr_state,
|
||||
GitHubComment,
|
||||
)
|
||||
|
||||
@ -62,7 +61,6 @@ class JobCheckState(NamedTuple):
|
||||
classification: Optional[str]
|
||||
job_id: Optional[int]
|
||||
title: Optional[str]
|
||||
summary: Optional[str]
|
||||
|
||||
|
||||
JobNameToStateDict = Dict[str, JobCheckState]
|
||||
@ -76,6 +74,29 @@ class WorkflowCheckState:
|
||||
self.jobs: JobNameToStateDict = {}
|
||||
|
||||
|
||||
class FlakyRule:
|
||||
def __init__(self, name: str, captures: List[str]):
|
||||
self.name = re.compile(name)
|
||||
self.captures = [re.compile(r) for r in captures]
|
||||
|
||||
def matches(self, job: Optional[Dict[str, Any]]) -> bool:
|
||||
return (
|
||||
job is not None
|
||||
and self.name.search(job.get("name", "")) is not None
|
||||
and job.get("failure_captures") is not None
|
||||
and all(
|
||||
any(
|
||||
r.search(capture) is not None
|
||||
for capture in job.get("failure_captures", [])
|
||||
)
|
||||
for r in self.captures
|
||||
)
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"FlakyRule[name='{self.name}', captures={self.captures}]"
|
||||
|
||||
|
||||
GH_PR_REVIEWS_FRAGMENT = """
|
||||
fragment PRReviews on PullRequestReviewConnection {
|
||||
nodes {
|
||||
@ -120,7 +141,6 @@ fragment PRCheckSuites on CheckSuiteConnection {
|
||||
detailsUrl
|
||||
databaseId
|
||||
title
|
||||
summary
|
||||
}
|
||||
pageInfo {
|
||||
endCursor
|
||||
@ -312,7 +332,6 @@ query ($owner: String!, $name: String!, $number: Int!, $cs_cursor: String, $cr_c
|
||||
detailsUrl
|
||||
databaseId
|
||||
title
|
||||
summary
|
||||
}
|
||||
pageInfo {
|
||||
endCursor
|
||||
@ -437,7 +456,6 @@ MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
|
||||
ROCKSET_MERGES_COLLECTION = "merges"
|
||||
ROCKSET_MERGES_WORKSPACE = "commons"
|
||||
REMOTE_MAIN_BRANCH = "origin/main"
|
||||
DRCI_CHECKRUN_NAME = "Dr.CI"
|
||||
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
|
||||
HAS_NO_CONNECTED_DIFF_TITLE = (
|
||||
"There is no internal Diff connected, this can be merged now"
|
||||
@ -551,7 +569,6 @@ def add_workflow_conclusions(
|
||||
classification=None,
|
||||
job_id=checkrun_node["databaseId"],
|
||||
title=checkrun_node["title"],
|
||||
summary=checkrun_node["summary"],
|
||||
)
|
||||
|
||||
if bool(checkruns["pageInfo"]["hasNextPage"]):
|
||||
@ -582,7 +599,6 @@ def add_workflow_conclusions(
|
||||
classification=None,
|
||||
job_id=None,
|
||||
title=None,
|
||||
summary=None,
|
||||
)
|
||||
for job_name, job in no_workflow_obj.jobs.items():
|
||||
res[job_name] = job
|
||||
@ -908,7 +924,6 @@ class GitHubPR:
|
||||
classification=None,
|
||||
job_id=None,
|
||||
title=None,
|
||||
summary=None,
|
||||
)
|
||||
|
||||
return self.conclusions
|
||||
@ -1246,6 +1261,13 @@ def read_merge_rules(
|
||||
return [MergeRule(**x) for x in rc]
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def read_flaky_rules() -> List[FlakyRule]:
|
||||
# NOTE: This is currently hardcoded, can be extended to do per repo rules
|
||||
FLAKY_RULES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/flaky-rules.json"
|
||||
return _get_flaky_rules(FLAKY_RULES_URL)
|
||||
|
||||
|
||||
def find_matching_merge_rule(
|
||||
pr: GitHubPR,
|
||||
repo: Optional[GitRepo] = None,
|
||||
@ -1276,15 +1298,25 @@ def find_matching_merge_rule(
|
||||
reject_reason = f"No rule found to match PR. Please [report]{issue_link} this issue to DevX team."
|
||||
|
||||
rules = read_merge_rules(repo, pr.org, pr.project)
|
||||
flaky_rules = read_flaky_rules()
|
||||
if not rules:
|
||||
reject_reason = f"Rejecting the merge as no rules are defined for the repository in {MERGE_RULE_PATH}"
|
||||
raise RuntimeError(reject_reason)
|
||||
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
base_rev = None
|
||||
try:
|
||||
# is allowed to fail if git is not available
|
||||
base_rev = pr.get_merge_base()
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Failed fetching base git revision for {pr.pr_num}. Skipping additional classifications.\n"
|
||||
f"{type(e)}\n{e}"
|
||||
)
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
base_rev,
|
||||
flaky_rules,
|
||||
ignore_current_checks=ignore_current_checks,
|
||||
)
|
||||
|
||||
@ -1435,6 +1467,11 @@ def checks_to_markdown_bullets(
|
||||
]
|
||||
|
||||
|
||||
@retries_decorator(rc=[])
|
||||
def _get_flaky_rules(url: str) -> List[FlakyRule]:
|
||||
return [FlakyRule(**rule) for rule in gh_fetch_json_list(url)]
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def save_merge_record(
|
||||
collection: str,
|
||||
@ -1538,27 +1575,6 @@ where
|
||||
return []
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
|
||||
"""
|
||||
Query HUD API to find similar failures to decide if they are flaky
|
||||
"""
|
||||
# NB: This doesn't work internally atm because this requires making an
|
||||
# external API call to HUD
|
||||
failures = gh_fetch_url(
|
||||
f"https://hud.pytorch.org/api/drci/drci?prNumber={pr_num}",
|
||||
data=f"repo={project}",
|
||||
headers={
|
||||
"Authorization": os.getenv("DRCI_BOT_KEY", ""),
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
method="POST",
|
||||
reader=json.load,
|
||||
)
|
||||
|
||||
return failures.get(str(pr_num), {}) if failures else {}
|
||||
|
||||
|
||||
REMOVE_JOB_NAME_SUFFIX_REGEX = re.compile(r", [0-9]+, [0-9]+, .+\)$")
|
||||
|
||||
|
||||
@ -1567,86 +1583,78 @@ def remove_job_name_suffix(name: str, replacement: str = ")") -> str:
|
||||
|
||||
|
||||
def is_broken_trunk(
|
||||
name: str,
|
||||
drci_classifications: Any,
|
||||
head_job: Optional[Dict[str, Any]], base_jobs: Optional[Dict[str, Dict[str, Any]]]
|
||||
) -> bool:
|
||||
if not name or not drci_classifications:
|
||||
if not head_job or not base_jobs:
|
||||
return False
|
||||
|
||||
# Consult the list of broken trunk failures from Dr.CI
|
||||
return any(
|
||||
name == broken_trunk["name"]
|
||||
for broken_trunk in drci_classifications.get("BROKEN_TRUNK", [])
|
||||
)
|
||||
|
||||
|
||||
def is_flaky(
|
||||
name: str,
|
||||
drci_classifications: Any,
|
||||
) -> bool:
|
||||
if not name or not drci_classifications:
|
||||
return False
|
||||
|
||||
# Consult the list of flaky failures from Dr.CI
|
||||
return any(name == flaky["name"] for flaky in drci_classifications.get("FLAKY", []))
|
||||
|
||||
|
||||
def is_invalid_cancel(
|
||||
name: str,
|
||||
conclusion: Optional[str],
|
||||
drci_classifications: Any,
|
||||
) -> bool:
|
||||
"""
|
||||
After https://github.com/pytorch/test-infra/pull/4579, invalid cancelled
|
||||
signals have been removed from HUD and Dr.CI. The same needs to be done
|
||||
here for consistency
|
||||
"""
|
||||
if (
|
||||
not name
|
||||
or not drci_classifications
|
||||
or not conclusion
|
||||
or conclusion.upper() != "CANCELLED"
|
||||
):
|
||||
return False
|
||||
|
||||
# If a job is cancelled and not listed as a failure by Dr.CI, it's an
|
||||
# invalid signal and can be ignored
|
||||
return all(
|
||||
name != failure["name"] for failure in drci_classifications.get("FAILED", [])
|
||||
head_job["conclusion"] == base_job["conclusion"]
|
||||
and head_job["failure_captures"] == base_job["failure_captures"]
|
||||
for base_job in base_jobs.values()
|
||||
)
|
||||
|
||||
|
||||
def get_classifications(
|
||||
pr_num: int,
|
||||
project: str,
|
||||
checks: Dict[str, JobCheckState],
|
||||
head_sha: str,
|
||||
merge_base: Optional[str],
|
||||
flaky_rules: List[FlakyRule],
|
||||
ignore_current_checks: Optional[List[str]],
|
||||
) -> Dict[str, JobCheckState]:
|
||||
# Get the failure classification from Dr.CI, which is the source of truth
|
||||
# going forward. It's preferable to try calling Dr.CI API directly first
|
||||
# to get the latest results as well as update Dr.CI PR comment
|
||||
drci_classifications = get_drci_classifications(pr_num=pr_num, project=project)
|
||||
print(f"From Dr.CI API: {json.dumps(drci_classifications)}")
|
||||
# Group by job name without shard id and suffix to correctly identify broken
|
||||
# trunk failures, i.e. linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default)
|
||||
head_sha_jobs: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
|
||||
merge_base_jobs: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
|
||||
|
||||
# NB: if the latest results from Dr.CI is not available, i.e. when calling from
|
||||
# SandCastle, we fallback to any results we can find on Dr.CI check run summary
|
||||
if (
|
||||
not drci_classifications
|
||||
and DRCI_CHECKRUN_NAME in checks
|
||||
and checks[DRCI_CHECKRUN_NAME]
|
||||
and checks[DRCI_CHECKRUN_NAME].summary
|
||||
):
|
||||
drci_summary = checks[DRCI_CHECKRUN_NAME].summary
|
||||
try:
|
||||
print(f"From Dr.CI checkrun summary: {drci_summary}")
|
||||
drci_classifications = json.loads(str(drci_summary))
|
||||
except json.JSONDecodeError as error:
|
||||
warn("Invalid Dr.CI checkrun summary")
|
||||
drci_classifications = {}
|
||||
if merge_base is not None:
|
||||
|
||||
def insert(
|
||||
d: Dict[str, Dict[str, Dict[str, Any]]],
|
||||
key: str,
|
||||
val: Dict[str, Any],
|
||||
overwrite_failed_run_attempt: bool,
|
||||
) -> None:
|
||||
key_no_suffix = remove_job_name_suffix(key)
|
||||
if key not in d[key_no_suffix]:
|
||||
d[key_no_suffix][key] = val
|
||||
return
|
||||
|
||||
# When overwrite_failed_run_attempt is set to True, always overwrite
|
||||
# the job with the result from the latest attempt. This option is for
|
||||
# jobs from the pull request head_sha where the latest retry is used
|
||||
# when merging
|
||||
#
|
||||
# When overwrite_failed_run_attempt is False, only overwrite the job
|
||||
# with the result from the latest attempt if the latest retry failed.
|
||||
# This option is for jobs from the merger_base where we want to record
|
||||
# failures for broken trunk
|
||||
if d[key_no_suffix][key]["id"] < val["id"] and (
|
||||
overwrite_failed_run_attempt or not is_passing_status(val["conclusion"])
|
||||
):
|
||||
d[key_no_suffix][key] = val
|
||||
|
||||
rockset_results = get_rockset_results(head_sha, merge_base)
|
||||
for rockset_result in rockset_results:
|
||||
name = f"{rockset_result['workflow_name']} / {rockset_result['name']}"
|
||||
if rockset_result["head_sha"] == head_sha:
|
||||
insert(
|
||||
head_sha_jobs,
|
||||
name,
|
||||
rockset_result,
|
||||
overwrite_failed_run_attempt=True,
|
||||
)
|
||||
else:
|
||||
insert(
|
||||
merge_base_jobs,
|
||||
name,
|
||||
rockset_result,
|
||||
overwrite_failed_run_attempt=False,
|
||||
)
|
||||
|
||||
checks_with_classifications = checks.copy()
|
||||
for name, check in checks.items():
|
||||
if check.status == "SUCCESS" or check.status == "NEUTRAL":
|
||||
if check.status == "SUCCESS":
|
||||
continue
|
||||
|
||||
if "unstable" in name:
|
||||
@ -1657,13 +1665,13 @@ def get_classifications(
|
||||
"UNSTABLE",
|
||||
check.job_id,
|
||||
check.title,
|
||||
check.summary,
|
||||
)
|
||||
continue
|
||||
|
||||
# NB: It's important to note that when it comes to ghstack and broken trunk classification,
|
||||
# Dr.CI uses the base of the whole stack
|
||||
if is_broken_trunk(name, drci_classifications):
|
||||
name_no_suffix = remove_job_name_suffix(name)
|
||||
head_sha_job = head_sha_jobs.get(name_no_suffix, {}).get(name)
|
||||
|
||||
if is_broken_trunk(head_sha_job, merge_base_jobs.get(name_no_suffix)):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
check.url,
|
||||
@ -1671,34 +1679,12 @@ def get_classifications(
|
||||
"BROKEN_TRUNK",
|
||||
check.job_id,
|
||||
check.title,
|
||||
check.summary,
|
||||
)
|
||||
continue
|
||||
|
||||
elif is_flaky(name, drci_classifications):
|
||||
elif any(rule.matches(head_sha_job) for rule in flaky_rules):
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
check.url,
|
||||
check.status,
|
||||
"FLAKY",
|
||||
check.job_id,
|
||||
check.title,
|
||||
check.summary,
|
||||
)
|
||||
continue
|
||||
|
||||
elif is_invalid_cancel(name, check.status, drci_classifications):
|
||||
# NB: Create a new category here for invalid cancelled signals because
|
||||
# there are usually many of them when they happen. So, they shouldn't
|
||||
# be counted toward ignorable failures threshold
|
||||
checks_with_classifications[name] = JobCheckState(
|
||||
check.name,
|
||||
check.url,
|
||||
check.status,
|
||||
"INVALID_CANCEL",
|
||||
check.job_id,
|
||||
check.title,
|
||||
check.summary,
|
||||
check.name, check.url, check.status, "FLAKY", check.job_id, check.title
|
||||
)
|
||||
continue
|
||||
|
||||
@ -1710,7 +1696,6 @@ def get_classifications(
|
||||
"IGNORE_CURRENT_CHECK",
|
||||
check.job_id,
|
||||
check.title,
|
||||
check.summary,
|
||||
)
|
||||
|
||||
return checks_with_classifications
|
||||
@ -1804,7 +1789,6 @@ def try_revert(
|
||||
if not dry_run:
|
||||
pr.add_numbered_label("reverted")
|
||||
gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
|
||||
gh_update_pr_state(pr.org, pr.project, pr.pr_num)
|
||||
|
||||
|
||||
def prefix_with_github_url(suffix_str: str) -> str:
|
||||
@ -1880,8 +1864,6 @@ def categorize_checks(
|
||||
# ignored anyway. This is useful to not need to wait for scarce resources
|
||||
# like ROCm, which is also frequently in unstable mode
|
||||
pending_checks.append((checkname, url, job_id))
|
||||
elif classification == "INVALID_CANCEL":
|
||||
continue
|
||||
elif not is_passing_status(check_runs[checkname].status):
|
||||
target = (
|
||||
ignorable_failed_checks[classification]
|
||||
@ -1927,8 +1909,7 @@ def merge(
|
||||
ignore_current: bool = False,
|
||||
) -> None:
|
||||
initial_commit_sha = pr.last_commit()["oid"]
|
||||
pr_link = f"https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num}"
|
||||
print(f"Attempting merge of {initial_commit_sha} ({pr_link})")
|
||||
print(f"Attempting merge of {initial_commit_sha}")
|
||||
|
||||
if MERGE_IN_PROGRESS_LABEL not in pr.get_labels():
|
||||
gh_add_labels(pr.org, pr.project, pr.pr_num, [MERGE_IN_PROGRESS_LABEL])
|
||||
@ -1993,6 +1974,7 @@ def merge(
|
||||
start_time = time.time()
|
||||
last_exception = ""
|
||||
elapsed_time = 0.0
|
||||
flaky_rules = read_flaky_rules()
|
||||
ignore_current_checks = [
|
||||
x[0] for x in ignore_current_checks_info
|
||||
] # convert to List[str] for convenience
|
||||
@ -2025,9 +2007,10 @@ def merge(
|
||||
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
pr.project,
|
||||
checks,
|
||||
pr.last_commit()["oid"],
|
||||
pr.get_merge_base(),
|
||||
flaky_rules,
|
||||
ignore_current_checks=ignore_current_checks,
|
||||
)
|
||||
pending, failing, _ = categorize_checks(
|
||||
|
||||
22
.github/scripts/tryrebase.py
vendored
22
.github/scripts/tryrebase.py
vendored
@ -51,7 +51,7 @@ def post_already_uptodate(
|
||||
|
||||
def rebase_onto(
|
||||
pr: GitHubPR, repo: GitRepo, onto_branch: str, dry_run: bool = False
|
||||
) -> bool:
|
||||
) -> None:
|
||||
branch = f"pull/{pr.pr_num}/head"
|
||||
remote_url = f"https://github.com/{pr.info['headRepository']['nameWithOwner']}.git"
|
||||
refspec = f"{branch}:{pr.head_ref()}"
|
||||
@ -68,7 +68,6 @@ def rebase_onto(
|
||||
push_result = repo._run_git("push", "-f", remote_url, refspec)
|
||||
if "Everything up-to-date" in push_result:
|
||||
post_already_uptodate(pr, repo, onto_branch, dry_run)
|
||||
return False
|
||||
else:
|
||||
gh_post_comment(
|
||||
pr.org,
|
||||
@ -79,21 +78,18 @@ def rebase_onto(
|
||||
+ "git pull --rebase`)",
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def rebase_ghstack_onto(
|
||||
pr: GitHubPR, repo: GitRepo, onto_branch: str, dry_run: bool = False
|
||||
) -> bool:
|
||||
) -> None:
|
||||
if (
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "ghstack", "--help"],
|
||||
capture_output=True,
|
||||
check=False,
|
||||
[sys.executable, "-m", "ghstack", "--help"], capture_output=True
|
||||
).returncode
|
||||
!= 0
|
||||
):
|
||||
subprocess.run([sys.executable, "-m", "pip", "install", "ghstack"], check=True)
|
||||
subprocess.run([sys.executable, "-m", "pip", "install", "ghstack"])
|
||||
orig_ref = f"{re.sub(r'/head$', '/orig', pr.head_ref())}"
|
||||
|
||||
repo.fetch(orig_ref, orig_ref)
|
||||
@ -119,9 +115,8 @@ def rebase_ghstack_onto(
|
||||
|
||||
if dry_run:
|
||||
print("Don't know how to dry-run ghstack")
|
||||
return False
|
||||
else:
|
||||
ghstack_result = subprocess.run(["ghstack"], capture_output=True, check=True)
|
||||
ghstack_result = subprocess.run(["ghstack"], capture_output=True)
|
||||
push_result = ghstack_result.stdout.decode("utf-8")
|
||||
print(push_result)
|
||||
if ghstack_result.returncode != 0:
|
||||
@ -171,8 +166,6 @@ def rebase_ghstack_onto(
|
||||
in push_result
|
||||
):
|
||||
post_already_uptodate(pr, repo, onto_branch, dry_run)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def additional_rebase_failure_info(e: Exception) -> str:
|
||||
@ -229,10 +222,9 @@ def main() -> None:
|
||||
try:
|
||||
if pr.is_ghstack_pr():
|
||||
with git_config_guard(repo):
|
||||
rc = rebase_ghstack_onto(pr, repo, onto_branch, dry_run=args.dry_run)
|
||||
rebase_ghstack_onto(pr, repo, onto_branch, dry_run=args.dry_run)
|
||||
else:
|
||||
rc = rebase_onto(pr, repo, onto_branch, dry_run=args.dry_run)
|
||||
sys.exit(0 if rc else 1)
|
||||
rebase_onto(pr, repo, onto_branch, dry_run=args.dry_run)
|
||||
|
||||
except Exception as e:
|
||||
msg = f"Rebase failed due to {e}"
|
||||
|
||||
3
.github/scripts/update_commit_hashes.py
vendored
3
.github/scripts/update_commit_hashes.py
vendored
@ -114,8 +114,7 @@ def main() -> None:
|
||||
|
||||
# query to see if a pr already exists
|
||||
params = {
|
||||
"q": f"is:pr is:open in:title author:pytorchupdatebot repo:{OWNER}/{REPO} {args.repo_name} hash update",
|
||||
"sort": "created",
|
||||
"q": f"is:pr is:open in:title author:pytorchmergebot repo:{OWNER}/{REPO} {args.repo_name} hash update"
|
||||
}
|
||||
response = git_api("/search/issues", params)
|
||||
if response["total_count"] != 0:
|
||||
|
||||
6
.github/templates/common.yml.j2
vendored
6
.github/templates/common.yml.j2
vendored
@ -8,7 +8,7 @@
|
||||
# NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
|
||||
# the binary builds will check out
|
||||
{%- set builder_repo = "pytorch/builder" -%}
|
||||
{%- set builder_branch = "release/2.2" -%}
|
||||
{%- set builder_branch = "release/2.1" -%}
|
||||
|
||||
{%- macro concurrency(build_environment) -%}
|
||||
concurrency:
|
||||
@ -36,10 +36,10 @@ concurrency:
|
||||
{%- macro setup_ec2_windows() -%}
|
||||
!{{ display_ec2_information() }}
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
continue-on-error: true
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560
|
||||
- name: Enable long paths on Windows
|
||||
shell: powershell
|
||||
|
||||
@ -7,7 +7,6 @@
|
||||
name: !{{ build_environment }}
|
||||
{%- endblock %}
|
||||
|
||||
|
||||
on:
|
||||
push:
|
||||
{%- if branches == "nightly" %}
|
||||
@ -58,8 +57,6 @@ jobs:
|
||||
{%- if "aarch64" in build_environment %}
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
|
||||
runs_on: linux.24xlarge
|
||||
{%- endif %}
|
||||
build_name: !{{ config["build_name"] }}
|
||||
build_environment: !{{ build_environment }}
|
||||
@ -106,7 +103,7 @@ jobs:
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: !{{ config["container_image"] }}
|
||||
- name: Test Pytorch binary
|
||||
|
||||
@ -58,7 +58,7 @@ jobs:
|
||||
{%- for config in build_configs %}
|
||||
!{{ config["build_name"] }}-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
runs-on: !{{ macos_runner }}
|
||||
runs-on: macos-12-xl
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, true) }}
|
||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||
@ -72,15 +72,11 @@ jobs:
|
||||
- name: Install conda and dependencies
|
||||
run: |
|
||||
# Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on
|
||||
curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh"
|
||||
curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-x86_64.sh
|
||||
chmod +x "${RUNNER_TEMP}/conda.sh"
|
||||
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
|
||||
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
|
||||
if [ -d "/Applications/Xcode_14.3.1.app" ]; then
|
||||
echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
|
||||
elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
|
||||
echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
|
||||
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
|
||||
|
||||
8
.github/templates/upload.yml.j2
vendored
8
.github/templates/upload.yml.j2
vendored
@ -53,9 +53,6 @@
|
||||
{%- macro upload_binaries(config, is_windows=False, has_test=True, use_s3=True) -%}
|
||||
!{{ config["build_name"] }}-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
{%- if has_test %}
|
||||
needs: !{{ config["build_name"] }}-test
|
||||
{%- else %}
|
||||
@ -68,7 +65,8 @@
|
||||
{%- endif %}
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
aws-pytorch-uploader-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
|
||||
aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
{%- endmacro %}
|
||||
|
||||
12
.github/workflows/_android-build-test.yml
vendored
12
.github/workflows/_android-build-test.yml
vendored
@ -36,7 +36,7 @@ jobs:
|
||||
keep-going: ${{ steps.filter.outputs.keep-going }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: false
|
||||
@ -58,25 +58,25 @@ jobs:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# [see note: pytorch repo ref]
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.1
|
||||
with:
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
|
||||
- name: Pull docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
@ -140,5 +140,5 @@ jobs:
|
||||
if: always()
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.1
|
||||
if: always()
|
||||
|
||||
14
.github/workflows/_android-full-build-test.yml
vendored
14
.github/workflows/_android-full-build-test.yml
vendored
@ -36,7 +36,7 @@ jobs:
|
||||
keep-going: ${{ steps.filter.outputs.keep-going }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: false
|
||||
@ -58,25 +58,25 @@ jobs:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# [see note: pytorch repo ref]
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.1
|
||||
with:
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
|
||||
- name: Pull docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
@ -157,7 +157,7 @@ jobs:
|
||||
|
||||
# run gradle buildRelease
|
||||
(echo "./.circleci/scripts/build_android_gradle.sh" | docker exec \
|
||||
-e BUILD_ENVIRONMENT="pytorch-linux-focal-py3-clang9-android-ndk-r21e-gradle-build" \
|
||||
-e BUILD_ENVIRONMENT="pytorch-linux-focal-py3-clang7-android-ndk-r19c-gradle-build" \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e PR_NUMBER \
|
||||
@ -185,5 +185,5 @@ jobs:
|
||||
if: always()
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.1
|
||||
if: always()
|
||||
|
||||
18
.github/workflows/_bazel-build-test.yml
vendored
18
.github/workflows/_bazel-build-test.yml
vendored
@ -41,7 +41,7 @@ jobs:
|
||||
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: false
|
||||
@ -63,30 +63,30 @@ jobs:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# [see note: pytorch repo ref]
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.1
|
||||
with:
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
|
||||
- name: Pull docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.1
|
||||
if: ${{ inputs.cuda-version != 'cpu' }}
|
||||
|
||||
- name: Output disk space left
|
||||
@ -120,7 +120,6 @@ jobs:
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
||||
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
|
||||
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
||||
PYTORCH_RETRY_TEST_CASES: 1
|
||||
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
|
||||
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
|
||||
@ -148,7 +147,6 @@ jobs:
|
||||
-e GITHUB_JOB \
|
||||
-e GITHUB_RUN_NUMBER \
|
||||
-e GITHUB_RUN_ATTEMPT \
|
||||
-e JOB_ID \
|
||||
-e GIT_DEFAULT_BRANCH="$GIT_DEFAULT_BRANCH" \
|
||||
-e SHARD_NUMBER \
|
||||
-e NUM_TEST_SHARDS \
|
||||
@ -186,7 +184,7 @@ jobs:
|
||||
shell: bash
|
||||
if: always() && steps.test.conclusion
|
||||
run: |
|
||||
cat test/**/*_toprint.log || true
|
||||
cat test/**/*.log || true
|
||||
|
||||
- name: Chown workspace
|
||||
uses: ./.github/actions/chown-workspace
|
||||
@ -199,5 +197,5 @@ jobs:
|
||||
file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.1
|
||||
if: always()
|
||||
|
||||
12
.github/workflows/_binary-build-linux.yml
vendored
12
.github/workflows/_binary-build-linux.yml
vendored
@ -15,7 +15,7 @@ on:
|
||||
required: false
|
||||
default: linux.12xlarge
|
||||
type: string
|
||||
description: Hardware to run this "build"job on, linux.12xlarge or linux.arm64.2xlarge.
|
||||
description: Hardware to run this "build"job on, linux.12xlarge or linux.t4g.2xlarge.
|
||||
ALPINE_IMAGE:
|
||||
required: false
|
||||
type: string
|
||||
@ -139,13 +139,13 @@ jobs:
|
||||
run: env
|
||||
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
|
||||
continue-on-error: true
|
||||
with:
|
||||
github-secret: ${{ secrets.github-token }}
|
||||
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}
|
||||
|
||||
@ -186,7 +186,7 @@ jobs:
|
||||
- name: Checkout pytorch/builder to builder dir
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: release/2.2
|
||||
ref: release/2.1
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
@ -212,7 +212,7 @@ jobs:
|
||||
|
||||
- name: Pull Docker image
|
||||
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: ${{ inputs.DOCKER_IMAGE }}
|
||||
|
||||
@ -269,7 +269,7 @@ jobs:
|
||||
|
||||
- name: Teardown Linux
|
||||
if: always()
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.1
|
||||
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
|
||||
14
.github/workflows/_binary-test-linux.yml
vendored
14
.github/workflows/_binary-test-linux.yml
vendored
@ -62,7 +62,7 @@ on:
|
||||
runs_on:
|
||||
required: true
|
||||
type: string
|
||||
description: Hardware to run this job on. Valid values are linux.4xlarge, linux.4xlarge.nvidia.gpu, linux.arm64.2xlarge, and linux.rocm.gpu
|
||||
description: Hardware to run this job on. Valid values are linux.4xlarge, linux.4xlarge.nvidia.gpu, linux.t4g.2xlarge, and linux.rocm.gpu
|
||||
secrets:
|
||||
github-token:
|
||||
required: true
|
||||
@ -127,14 +127,14 @@ jobs:
|
||||
} >> "${GITHUB_ENV} }}"
|
||||
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.1
|
||||
continue-on-error: true
|
||||
with:
|
||||
github-secret: ${{ secrets.github-token }}
|
||||
|
||||
# Setup the environment
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}
|
||||
|
||||
@ -167,7 +167,7 @@ jobs:
|
||||
- name: Checkout pytorch/builder to builder dir
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: release/2.2
|
||||
ref: release/2.1
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
@ -198,12 +198,12 @@ jobs:
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.1
|
||||
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
||||
|
||||
- name: Pull Docker image
|
||||
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.1
|
||||
with:
|
||||
docker-image: ${{ inputs.DOCKER_IMAGE }}
|
||||
|
||||
@ -213,7 +213,7 @@ jobs:
|
||||
|
||||
- name: Teardown Linux
|
||||
if: always()
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.1
|
||||
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
|
||||
39
.github/workflows/_binary-upload.yml
vendored
39
.github/workflows/_binary-upload.yml
vendored
@ -59,15 +59,17 @@ on:
|
||||
github-token:
|
||||
required: true
|
||||
description: Github Token
|
||||
aws-pytorch-uploader-access-key-id:
|
||||
required: true
|
||||
description: AWS access key id
|
||||
aws-pytorch-uploader-secret-access-key:
|
||||
required: true
|
||||
description: AWS secret access key
|
||||
conda-pytorchbot-token:
|
||||
required: true
|
||||
description: Conda PyTorchBot token
|
||||
conda-pytorchbot-token-test:
|
||||
required: true
|
||||
description: Conda PyTorchBot token
|
||||
|
||||
jobs:
|
||||
upload:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
environment: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v'))) && 'conda-aws-upload' || '' }}
|
||||
container:
|
||||
@ -95,24 +97,10 @@ jobs:
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.1
|
||||
with:
|
||||
no-sudo: true
|
||||
|
||||
- name: Configure AWS credentials(PyTorch account) for nightly
|
||||
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/nightly' }}
|
||||
uses: aws-actions/configure-aws-credentials@v3
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Configure AWS credentials(PyTorch account) for RC builds
|
||||
if: ${{ github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
|
||||
uses: aws-actions/configure-aws-credentials@v3
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Download Build Artifacts
|
||||
id: download-artifacts
|
||||
# NB: When the previous build job is skipped, there won't be any artifacts and
|
||||
@ -139,19 +127,14 @@ jobs:
|
||||
|
||||
- name: Upload binaries
|
||||
if: steps.download-artifacts.outcome && steps.download-artifacts.outcome == 'success'
|
||||
shell: bash
|
||||
env:
|
||||
PKG_DIR: "${{ runner.temp }}/artifacts"
|
||||
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
||||
# When running these on pull_request events these should be blank
|
||||
CONDA_PYTORCHBOT_TOKEN: ${{ secrets.conda-pytorchbot-token }}
|
||||
CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.conda-pytorchbot-token-test }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.aws-pytorch-uploader-access-key-id }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.aws-pytorch-uploader-secret-access-key }}
|
||||
ANACONDA_API_TOKEN: ${{ secrets.conda-pytorchbot-token }}
|
||||
BUILD_NAME: ${{ inputs.build_name }}
|
||||
run: |
|
||||
set -ex
|
||||
if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then
|
||||
export ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN_TEST}"
|
||||
else
|
||||
export ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN}"
|
||||
fi
|
||||
bash .circleci/scripts/binary_upload.sh
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user