Update (base update)

[ghstack-poisoned]
This commit is contained in:
yanbing-j
2024-10-10 14:33:09 +08:00
1444 changed files with 34310 additions and 31446 deletions

View File

@ -21,6 +21,3 @@
cxx = /usr/bin/clang++
cxxpp = /usr/bin/clang++
ld = /usr/bin/clang++
[project]
default_flavors_mode=all

View File

@ -291,7 +291,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.0
ROCM_VERSION=6.1
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
@ -302,7 +302,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.1
ROCM_VERSION=6.2
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
@ -355,6 +355,12 @@ case "$image" in
CONDA_CMAKE=yes
VISION=yes
;;
pytorch-linux-jammy-py3-clang18-asan)
ANACONDA_PYTHON_VERSION=3.10
CLANG_VERSION=18
CONDA_CMAKE=yes
VISION=yes
;;
pytorch-linux-jammy-py3.9-gcc11)
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
@ -379,6 +385,14 @@ case "$image" in
GCC_VERSION=11
CONDA_CMAKE=yes
HALIDE=yes
TRITON=yes
;;
pytorch-linux-jammy-py3.12-triton-cpu)
CUDA_VERSION=12.4
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
TRITON_CPU=yes
;;
pytorch-linux-focal-linter)
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
@ -509,6 +523,7 @@ docker build \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
--build-arg "TRITON=${TRITON}" \
--build-arg "TRITON_CPU=${TRITON_CPU}" \
--build-arg "ONNX=${ONNX}" \
--build-arg "DOCS=${DOCS}" \
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \

View File

@ -0,0 +1 @@
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f

View File

@ -1 +1 @@
5fe38ffd73c2ac6ed6323b554205186696631c6f
cf34004b8a67d290a962da166f5aa2fc66751326

View File

@ -13,11 +13,17 @@ if [ -n "$CLANG_VERSION" ]; then
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
# work around ubuntu apt-get conflicts
sudo apt-get -y -f install
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
if [[ $CLANG_VERSION == 18 ]]; then
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
fi
fi
sudo apt-get update
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
if [[ $CLANG_VERSION == 18 ]]; then
apt-get install -y --no-install-recommends libomp-18-dev
fi
# Install dev version of LLVM.
if [ -n "$LLVMDEV" ]; then

View File

@ -105,7 +105,7 @@ function install_121 {
}
function install_124 {
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run

View File

@ -5,19 +5,19 @@ set -ex
NCCL_VERSION=v2.21.5-1
function install_cusparselt_052 {
function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_124 {
echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
@ -44,7 +44,7 @@ function install_124 {
cd ..
rm -rf nccl
install_cusparselt_052
install_cusparselt_062
ldconfig
}

View File

@ -5,7 +5,7 @@ set -ex
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && cd tmp_cusparselt
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-4]$ ]]; then
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then

View File

@ -10,6 +10,21 @@ if [[ -z $ROCM_VERSION ]]; then
exit 1;
fi
IS_UBUNTU=0
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
IS_UBUNTU=1
;;
centos)
IS_UBUNTU=0
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
# To make version comparison easier, create an integer representation.
save_IFS="$IFS"
IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
@ -57,9 +72,11 @@ MIOPEN_CMAKE_COMMON_FLAGS="
-DMIOPEN_BUILD_DRIVER=OFF
"
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
echo "ROCm 6.2 MIOpen does not need any patches, do not build from source"
if [[ $ROCM_INT -ge 60300 ]]; then
echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
exit 0
elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
exit 0
@ -93,12 +110,21 @@ else
exit 1
fi
yum remove -y miopen-hip
if [[ ${IS_UBUNTU} == 1 ]]; then
apt-get remove -y miopen-hip
else
yum remove -y miopen-hip
fi
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
pushd MIOpen
# remove .git to save disk space since CI runner was running out
rm -rf .git
# Don't build CK to save docker build time
if [[ $ROCM_INT -ge 60200 ]]; then
sed -i '/composable_kernel/d' requirements.txt
fi
# Don't build MLIR to save docker build time
# since we are disabling MLIR backend for MIOpen anyway
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
@ -111,10 +137,15 @@ cmake -P install_deps.cmake --minimum
# clean up since CI runner was running out of disk space
rm -rf /tmp/*
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
if [[ ${IS_UBUNTU} == 1 ]]; then
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
else
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
fi
## Build MIOpen
mkdir -p build
@ -131,7 +162,11 @@ make -j $(nproc) package
# clean up since CI runner was running out of disk space
rm -rf /usr/local/cget
yum install -y miopen-*.rpm
if [[ ${IS_UBUNTU} == 1 ]]; then
sudo dpkg -i miopen-hip*.deb
else
yum install -y miopen-*.rpm
fi
popd
rm -rf MIOpen

View File

@ -32,7 +32,7 @@ pip_install coloredlogs packaging
pip_install onnxruntime==1.18.1
pip_install onnx==1.16.2
pip_install onnxscript==0.1.0.dev20240831 --no-deps
pip_install onnxscript==0.1.0.dev20241008 --no-deps
# required by onnxscript
pip_install ml_dtypes

View File

@ -15,8 +15,11 @@ conda_reinstall() {
if [ -n "${XPU_VERSION}" ]; then
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
TRITON_TEXT_FILE="triton-xpu"
elif [ -n "${TRITON_CPU}" ]; then
TRITON_REPO="https://github.com/triton-lang/triton-cpu"
TRITON_TEXT_FILE="triton-cpu"
else
TRITON_REPO="https://github.com/openai/triton"
TRITON_REPO="https://github.com/triton-lang/triton"
TRITON_TEXT_FILE="triton"
fi
@ -44,9 +47,10 @@ chown -R jenkins /var/lib/jenkins/triton
chgrp -R jenkins /var/lib/jenkins/triton
pushd /var/lib/jenkins/
as_jenkins git clone ${TRITON_REPO} triton
as_jenkins git clone --recursive ${TRITON_REPO} triton
cd triton
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
as_jenkins git submodule update --init --recursive
cd python
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527

View File

@ -37,6 +37,12 @@ esac
(
set -x
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
sudo systemctl daemon-reload
sudo systemctl restart docker
docker build \
--target final \
--progress plain \

View File

@ -10,6 +10,7 @@ ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ARG DEVTOOLSET_VERSION=9
# Note: This is required patch since CentOS have reached EOL
# otherwise any yum install setp will fail
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo

View File

@ -124,7 +124,14 @@ if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
fi
(
set -x
DOCKER_BUILDKIT=1 docker build \
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
sudo systemctl daemon-reload
sudo systemctl restart docker
DOCKER_BUILDKIT=1 docker build \
${DOCKER_GPU_BUILD_ARG} \
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
--target "${TARGET}" \

View File

@ -139,9 +139,9 @@ opt-einsum==3.3
#Pinned versions: 3.3
#test that import: test_linalg.py
optree==0.12.1
optree==0.13.0
#Description: A library for tree manipulation
#Pinned versions: 0.12.1
#Pinned versions: 0.13.0
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,

View File

@ -68,6 +68,8 @@ RUN rm install_rocm.sh
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh
RUN rm install_rocm_magma.sh
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
ENV ROCM_PATH /opt/rocm
ENV PATH /opt/rocm/bin:$PATH
ENV PATH /opt/rocm/hcc/bin:$PATH
@ -121,5 +123,8 @@ RUN bash ./install_cache.sh && rm install_cache.sh
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
USER jenkins
CMD ["bash"]

View File

@ -147,6 +147,13 @@ COPY ci_commit_pins/triton.txt triton.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt
ARG TRITON_CPU
COPY ./common/install_triton.sh install_triton.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
ARG EXECUTORCH
# Build and install executorch
COPY ./common/install_executorch.sh install_executorch.sh

View File

@ -49,13 +49,8 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
fi
# Enable LLVM dependency for TensorExpr testing
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
export USE_LLVM=/opt/rocm/llvm
export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
else
export USE_LLVM=/opt/llvm
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
fi
export USE_LLVM=/opt/llvm
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
# To build test_edge_op_registration
@ -183,7 +178,7 @@ fi
# sccache will fail for CUDA builds if all cores are used for compiling
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
if [ -z "$MAX_JOBS" ]; then
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
export MAX_JOBS=$(($(nproc) - 1))
fi
fi
@ -223,10 +218,6 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
export USE_PRECOMPILED_HEADERS=1
fi
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
export USE_GLOO_WITH_OPENSSL=ON
fi
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi
@ -237,7 +228,7 @@ fi
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
@ -283,6 +274,7 @@ else
# set only when building other architectures
# or building non-XLA tests.
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
"$BUILD_ENVIRONMENT" != *s390x* &&
"$BUILD_ENVIRONMENT" != *xla* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
@ -345,11 +337,11 @@ else
CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
CUSTOM_OP_TEST="$PWD/test/custom_operator"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
mkdir -p "$CUSTOM_OP_BUILD"
pushd "$CUSTOM_OP_BUILD"
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
@ -359,10 +351,10 @@ else
JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
JIT_HOOK_TEST="$PWD/test/jit_hooks"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
mkdir -p "$JIT_HOOK_BUILD"
pushd "$JIT_HOOK_BUILD"
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
@ -374,7 +366,7 @@ else
python --version
mkdir -p "$CUSTOM_BACKEND_BUILD"
pushd "$CUSTOM_BACKEND_BUILD"
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
make VERBOSE=1
popd
@ -407,6 +399,6 @@ fi
# snadampal: skipping it till sccache support added for aarch64
# https://github.com/pytorch/pytorch/issues/121559
if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *aarch64* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
print_sccache_stats
fi

View File

@ -191,9 +191,22 @@ function install_torchrec_and_fbgemm() {
pip_uninstall torchrec-nightly
pip_uninstall fbgemm-gpu-nightly
pip_install setuptools-git-versioning scikit-build pyre-extensions
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
# seems to be an sccache-related issue
if [[ "$IS_A100_RUNNER" == "1" ]]; then
unset CMAKE_CUDA_COMPILER_LAUNCHER
sudo mv /opt/cache/bin /opt/cache/bin-backup
fi
# See https://github.com/pytorch/pytorch/issues/106971
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
if [[ "$IS_A100_RUNNER" == "1" ]]; then
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
sudo mv /opt/cache/bin-backup /opt/cache/bin
fi
}
function clone_pytorch_xla() {

View File

@ -1,4 +1,4 @@
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from tempfile import mkdtemp
from cryptography import x509
@ -42,10 +42,10 @@ def create_cert(path, C, ST, L, O, key):
.issuer_name(issuer)
.public_key(key.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.utcnow())
.not_valid_before(datetime.now(timezone.utc))
.not_valid_after(
# Our certificate will be valid for 10 days
datetime.utcnow()
datetime.now(timezone.utc)
+ timedelta(days=10)
)
.add_extension(
@ -88,10 +88,10 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
.issuer_name(ca_cert.subject)
.public_key(csr_cert.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.utcnow())
.not_valid_before(datetime.now(timezone.utc))
.not_valid_after(
# Our certificate will be valid for 10 days
datetime.utcnow()
datetime.now(timezone.utc)
+ timedelta(days=10)
# Sign our certificate with our private key
)

View File

@ -375,9 +375,8 @@ test_inductor_cpp_wrapper_abi_compatible() {
mkdir -p "$TEST_REPORTS_DIR"
echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
# cpu stack allocation causes segfault and needs more investigation
PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
python test/run_test.py --include inductor/test_cuda_cpp_wrapper
python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro inductor/test_extension_backend
TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
--training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
@ -404,7 +403,7 @@ pr_time_benchmarks() {
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
echo "benchmark results on current PR: "
cat "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
}
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
@ -607,6 +606,11 @@ test_inductor_halide() {
assert_git_not_dirty
}
test_inductor_triton_cpu() {
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
assert_git_not_dirty
}
test_dynamo_benchmark() {
# Usage: test_dynamo_benchmark huggingface 0
TEST_REPORTS_DIR=$(pwd)/test/test-reports
@ -661,15 +665,6 @@ test_inductor_torchbench_smoketest_perf() {
# The threshold value needs to be actively maintained to make this check useful
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
# The threshold value needs to be actively maintained to make this check useful
# The perf number of nanogpt seems not very stable, e.g.
# https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
# and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
# we switch to use some other model.
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
# Check memory compression ratio for a few models
for test in hf_Albert timm_vision_transformer; do
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
@ -713,6 +708,10 @@ test_inductor_set_cpu_affinity(){
export KMP_BLOCKTIME=1
fi
cores=$(test_inductor_get_core_number)
# Set number of cores to 16 on Aarch64 for performance runs.
if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then
cores=16
fi
export OMP_NUM_THREADS=$cores
end_core=$((cores-1))
export TASKSET="taskset -c 0-$end_core"
@ -1402,7 +1401,7 @@ test_linux_aarch64() {
inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes \
inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
}
@ -1436,6 +1435,8 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
test_inductor_halide
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
test_inductor_triton_cpu
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
test_inductor_micro_benchmark
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
@ -1459,7 +1460,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
# https://github.com/opencv/opencv-python/issues/885
pip_install opencv-python==4.8.0.74
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \

View File

@ -26,7 +26,7 @@ fi
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
set +ex
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
PYLONG_API_CHECK=$?
if [[ $PYLONG_API_CHECK == 0 ]]; then
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"

View File

@ -27,12 +27,11 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
source activate testenv >/dev/null
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
# Prior to Python 3.8 paths were suffixed with an 'm'
if [[ -d "\${python_path}/bin" ]]; then
export PATH="\${python_path}/bin:\$PATH"
elif [[ -d "\${python_path}m/bin" ]]; then
export PATH="\${python_path}m/bin:\$PATH"
if [[ "\$python_nodot" = *t ]]; then
python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
fi
export PATH="\${python_path}/bin:\$PATH"
fi
EXTRA_CONDA_FLAGS=""

View File

@ -44,7 +44,9 @@ ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
ForEachMacros:
- FOR_EACH_RANGE
- FOR_EACH
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
@ -58,6 +60,24 @@ IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
Macros:
- >-
PyObject_HEAD_INIT(type)={
/* this is not exactly match with PyObject_HEAD_INIT in Python source code
* but it is enough for clang-format */
{ 0xFFFFFFFF },
(type)
},
- >-
PyVarObject_HEAD_INIT(type, size)={
{
/* manually expand PyObject_HEAD_INIT(type) above
* because clang-format do not support recursive expansion */
{ 0xFFFFFFFF },
(type)
},
(size)
},
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
PenaltyBreakBeforeFirstCallParameter: 1
@ -79,7 +99,11 @@ SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
Standard: c++17
StatementMacros:
- PyObject_HEAD
- PyObject_VAR_HEAD
- PyException_HEAD
TabWidth: 8
UseTab: Never
---

View File

@ -1,38 +0,0 @@
If you have a question or would like help and support, please ask at our
[forums](https://discuss.pytorch.org/).
If you are submitting a feature request, please preface the title with [feature request].
If you are submitting a bug report, please fill in the following details.
## Issue description
Provide a short description.
## Code example
Please try to provide a minimal example to repro the bug.
Error messages and stack traces are also helpful.
## System Info
Please copy and paste the output from our
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py)
(or fill out the checklist below manually).
You can get the script and run it with:
```
wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
# For security purposes, please check the contents of collect_env.py before running it.
python collect_env.py
```
- PyTorch or Caffe2:
- How you installed PyTorch (conda, pip, source):
- Build command you used (if compiling from source):
- OS:
- PyTorch version:
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- GCC version (if compiling from source):
- CMake version:
- Versions of any other relevant libraries:

View File

@ -5,7 +5,8 @@ about: Tracking incidents for PyTorch's CI infra.
> NOTE: Remember to label this issue with "`ci: sev`"
**MERGE BLOCKING** <!-- remove this line if you don't want this SEV to block merges -->
<!-- uncomment the below line if you don't want this SEV to block merges -->
<!-- **MERGE BLOCKING** -->
## Current Status
*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.

View File

@ -32,30 +32,6 @@ self-hosted-runner:
- lf.linux.8xlarge.nvidia.gpu
- lf.linux.16xlarge.nvidia.gpu
- lf.linux.g5.4xlarge.nvidia.gpu
# Organization-wide AWS Linux Runners with new Amazon 2023 AMI
- amz2023.linux.large
- amz2023.linux.2xlarge
- amz2023.linux.4xlarge
- amz2023.linux.12xlarge
- amz2023.linux.24xlarge
- amz2023.linux.arm64.2xlarge
- amz2023.linux.arm64.m7g.4xlarge
- amz2023.linux.arm64.m7g.4xlarge.ephemeral
- amz2023.linux.4xlarge.nvidia.gpu
- amz2023.linux.8xlarge.nvidia.gpu
- amz2023.linux.16xlarge.nvidia.gpu
- amz2023.linux.g5.4xlarge.nvidia.gpu
# Pytorch/pytorch AWS Linux Runners with the new Amazon 2023 AMI on Linux Foundation account
- amz2023.lf.linux.large
- amz2023.lf.linux.2xlarge
- amz2023.lf.linux.4xlarge
- amz2023.lf.linux.12xlarge
- amz2023.lf.linux.24xlarge
- amz2023.lf.linux.arm64.2xlarge
- amz2023.lf.linux.4xlarge.nvidia.gpu
- amz2023.lf.linux.8xlarge.nvidia.gpu
- amz2023.lf.linux.16xlarge.nvidia.gpu
- amz2023.lf.linux.g5.4xlarge.nvidia.gpu
# Repo-specific IBM hosted S390x runner
- linux.s390x
# Organization wide AWS Windows runners

View File

@ -18,8 +18,14 @@ inputs:
runs:
using: composite
steps:
- name: Check if in a container runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Clean workspace
shell: bash
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
env:
NO_SUDO: ${{ inputs.no-sudo }}
run: |

View File

@ -85,15 +85,25 @@ runs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a ARC runner
- name: Check if in a container runner
shell: bash
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Lock NVIDIA A100 40GB Frequency
shell: bash
@ -101,7 +111,7 @@ runs:
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
if: contains(matrix.runner, 'a100')
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Start monitoring script
id: monitor-script
@ -172,6 +182,7 @@ runs:
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -181,6 +192,9 @@ runs:
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
shell: bash
run: |
set -x
@ -199,6 +213,7 @@ runs:
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e GITHUB_ACTIONS \
@ -227,6 +242,7 @@ runs:
-e PR_LABELS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SCCACHE_S3_KEY_PREFIX \
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -234,7 +250,9 @@ runs:
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e HUGGING_FACE_HUB_TOKEN \
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-e DASHBOARD_TAG \
-e IS_A100_RUNNER \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
@ -305,7 +323,7 @@ runs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does

View File

@ -28,14 +28,14 @@ runs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: Check if in a ARC runner
- name: Check if in a container runner
shell: bash
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> $GITHUB_OUTPUT
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Start docker if docker deamon is not running
shell: bash
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
run: |
if systemctl is-active --quiet docker; then
echo "Docker daemon is running...";
@ -73,7 +73,7 @@ runs:
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Kill any existing containers, clean up images
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
shell: bash
run: |
# ignore expansion of "docker ps -q" since it could be empty
@ -116,7 +116,7 @@ runs:
- name: Check that the docker daemon is running
shell: bash
continue-on-error: true
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
run: |
set +x

View File

@ -1 +1 @@
ba696ea3dfec4cbe693bf06a84c75dc196077f5b
3f0569939c4369bec943fc27d1c9d8dfbc828c26

View File

@ -35,38 +35,35 @@ runner_types:
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.10xlarge.avx2:
disk_size: 200
instance_type: m4.10xlarge
is_ephemeral: false
max_available: 450
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.24xl.spr-metal:
disk_size: 200
instance_type: c7i.metal-24xl
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.16xlarge.spr:
disk_size: 200
instance_type: c7i.16xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.9xlarge.ephemeral:
disk_size: 200
instance_type: c5.9xlarge
is_ephemeral: true
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
variants:
am2:
ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
@ -76,149 +73,140 @@ runner_types:
is_ephemeral: true
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.16xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.16xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.24xlarge:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: false
max_available: 500
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.24xlarge.ephemeral:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.2xlarge:
disk_size: 150
instance_type: c5.2xlarge
is_ephemeral: false
max_available: 3120
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.4xlarge:
disk_size: 150
instance_type: c5.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.8xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.8xlarge
is_ephemeral: false
max_available: 400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g4dn.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g4dn.12xlarge
is_ephemeral: false
max_available: 250
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g4dn.metal.nvidia.gpu:
disk_size: 150
instance_type: g4dn.metal
is_ephemeral: false
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g5.48xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.48xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g5.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.12xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g5.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.4xlarge
is_ephemeral: false
max_available: 2400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
disk_size: 150
instance_type: g6.4xlarge
is_ephemeral: false
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.large:
max_available: 1200
disk_size: 15
instance_type: c5.large
is_ephemeral: false
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.c.linux.arm64.2xlarge:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.c.linux.arm64.m7g.4xlarge:
disk_size: 256
instance_type: m7g.4xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.c.linux.arm64.2xlarge.ephemeral:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.c.linux.arm64.m7g.4xlarge.ephemeral:
disk_size: 256
instance_type: m7g.4xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.c.linux.arm64.m7g.metal:
disk_size: 256
instance_type: m7g.metal
is_ephemeral: false
max_available: 100
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.c.windows.g4dn.xlarge:
disk_size: 256
instance_type: g4dn.xlarge

View File

@ -35,38 +35,35 @@ runner_types:
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.10xlarge.avx2:
disk_size: 200
instance_type: m4.10xlarge
is_ephemeral: false
max_available: 450
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.24xl.spr-metal:
disk_size: 200
instance_type: c7i.metal-24xl
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.16xlarge.spr:
disk_size: 200
instance_type: c7i.16xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.9xlarge.ephemeral:
disk_size: 200
instance_type: c5.9xlarge
is_ephemeral: true
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
variants:
am2:
ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
@ -76,149 +73,140 @@ runner_types:
is_ephemeral: true
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.16xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.16xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.24xlarge:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: false
max_available: 500
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.24xlarge.ephemeral:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.2xlarge:
disk_size: 150
instance_type: c5.2xlarge
is_ephemeral: false
max_available: 3120
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.4xlarge:
disk_size: 150
instance_type: c5.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.8xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.8xlarge
is_ephemeral: false
max_available: 400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g4dn.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g4dn.12xlarge
is_ephemeral: false
max_available: 250
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g4dn.metal.nvidia.gpu:
disk_size: 150
instance_type: g4dn.metal
is_ephemeral: false
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g5.48xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.48xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g5.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.12xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g5.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.4xlarge
is_ephemeral: false
max_available: 2400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.g6.4xlarge.experimental.nvidia.gpu:
disk_size: 150
instance_type: g6.4xlarge
is_ephemeral: false
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
variants:
amz2023:
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.large:
max_available: 1200
disk_size: 15
instance_type: c5.large
is_ephemeral: false
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
lf.linux.arm64.2xlarge:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.linux.arm64.m7g.4xlarge:
disk_size: 256
instance_type: m7g.4xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.linux.arm64.2xlarge.ephemeral:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.linux.arm64.m7g.4xlarge.ephemeral:
disk_size: 256
instance_type: m7g.4xlarge
is_ephemeral: true
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.linux.arm64.m7g.metal:
disk_size: 256
instance_type: m7g.metal
is_ephemeral: false
max_available: 100
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
lf.windows.g4dn.xlarge:
disk_size: 256
instance_type: g4dn.xlarge

View File

@ -544,6 +544,7 @@
- anijain2305
- bdhirsh
- zou3519
- isuruf
mandatory_checks_name:
- EasyCLA
- Lint

View File

@ -16,6 +16,7 @@ ciflow_push_tags:
- ciflow/nightly
- ciflow/periodic
- ciflow/rocm
- ciflow/s390
- ciflow/slow
- ciflow/trunk
- ciflow/unstable

View File

@ -1,4 +1,4 @@
# iOS simulator requirements
coremltools==5.0b5
protobuf==3.20.2
optree==0.12.1
optree==0.13.0

View File

@ -27,7 +27,7 @@ pytest-cpp==2.3.0
rockset==1.0.3
z3-solver==4.12.2.0
tensorboard==2.13.0
optree==0.12.1
optree==0.13.0
# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
# which the stringify metadata is wrong when escaping double quote
protobuf==3.20.2

View File

@ -333,7 +333,7 @@ def generate_wheels_matrix(
package_type = "manywheel"
if python_versions is None:
python_versions = FULL_PYTHON_VERSIONS + ["3.13"]
python_versions = FULL_PYTHON_VERSIONS + ["3.13", "3.13t"]
if arches is None:
# Define default compute archivectures
@ -369,7 +369,13 @@ def generate_wheels_matrix(
# TODO: Enable python 3.13 on rocm, aarch64, windows
if (
gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x")
) and python_version == "3.13":
) and (python_version == "3.13" or python_version == "3.13t"):
continue
# TODO: Enable python 3.13t on xpu and cpu-s390x
if (
gpu_arch_type == "xpu" or gpu_arch_type == "cpu-s390x"
) and python_version == "3.13t":
continue
if use_split_build and (

View File

@ -17,6 +17,11 @@ if [[ -d "${CACHE_DIRECTORY}" ]]; then
cp -r "${CACHE_DIRECTORY}" . || true
fi
# if lintrunner is not installed, install it
if ! command -v lintrunner &> /dev/null; then
python3 -m pip install lintrunner==0.12.5
fi
# This has already been cached in the docker image
lintrunner init 2> /dev/null
@ -33,7 +38,7 @@ python3 torch/utils/data/datapipes/gen_pyi.py
RC=0
# Run lintrunner on all files
if ! lintrunner --force-color --all-files --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
if ! lintrunner --force-color --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
echo ""
echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"

View File

@ -1,5 +1,9 @@
# flake8: noqa: G004
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
# must be kept in sync. You can do it easily by running the following command:
# python .github/scripts/update_runner_determinator.py
"""
This runner determinator is used to determine which set of runners to run a
GitHub job on. It uses the first comment of a GitHub issue (by default
@ -79,6 +83,9 @@ class Experiment(NamedTuple):
rollout_perc: float = (
0 # Percentage of workflows to experiment on when user is not opted-in.
)
all_branches: bool = (
False # If True, the experiment is also enabled on the exception branches
)
# Add more fields as needed
@ -212,7 +219,7 @@ def get_potential_pr_author(
def is_exception_branch(branch: str) -> bool:
"""
Branches that get opted out of all experiments and should always use Meta runners
Branches that get opted out of experiments by default, until they're explicitly enabled.
"""
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
@ -338,7 +345,10 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
def get_runner_prefix(
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
rollout_state: str,
workflow_requestors: Iterable[str],
branch: str,
is_canary: bool = False,
) -> str:
settings = parse_settings(rollout_state)
user_optins = parse_users(rollout_state)
@ -348,6 +358,12 @@ def get_runner_prefix(
for experiment_name, experiment_settings in settings.experiments.items():
enabled = False
if not experiment_settings.all_branches and is_exception_branch(branch):
log.info(
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor
@ -407,35 +423,34 @@ def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -
def main() -> None:
args = parse_args()
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
log.info(
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
runner_label_prefix = DEFAULT_LABEL_PREFIX
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
runner_label_prefix = DEFAULT_LABEL_PREFIX
else:
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
is_canary = args.github_repo == "pytorch/pytorch-canary"
is_canary = args.github_repo == "pytorch/pytorch-canary"
runner_label_prefix = get_runner_prefix(
rollout_state, (args.github_issue_owner, username), is_canary
)
runner_label_prefix = get_runner_prefix(
rollout_state,
(args.github_issue_owner, username),
args.github_branch,
is_canary,
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)

View File

@ -4,6 +4,10 @@ from unittest.mock import Mock, patch
import runner_determinator as rd
USER_BRANCH = "somebranch"
EXCEPTION_BRANCH = "main"
class TestRunnerDeterminatorIssueParser(TestCase):
def test_parse_settings(self) -> None:
settings_text = """
@ -66,6 +70,40 @@ class TestRunnerDeterminatorIssueParser(TestCase):
"otherExp settings not parsed correctly",
)
def test_parse_all_branches_setting(self) -> None:
settings_text = """
```
experiments:
lf:
rollout_perc: 25
all_branches: true
otherExp:
all_branches: True
rollout_perc: 0
```
---
Users:
@User1,lf
@User2,lf,otherExp
"""
settings = rd.parse_settings(settings_text)
self.assertTupleEqual(
rd.Experiment(rollout_perc=25, all_branches=True),
settings.experiments["lf"],
"lf settings not parsed correctly",
)
self.assertTrue(settings.experiments["otherExp"].all_branches)
self.assertTupleEqual(
rd.Experiment(rollout_perc=0, all_branches=True),
settings.experiments["otherExp"],
"otherExp settings not parsed correctly",
)
def test_parse_users(self) -> None:
settings_text = """
experiments:
@ -119,7 +157,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"])
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
def test_opted_in_user_two_experiments(self) -> None:
@ -136,7 +174,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User2"])
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
@patch("random.uniform", return_value=50)
@ -154,7 +192,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User3"])
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for user")
@patch("random.uniform", return_value=10)
@ -174,7 +212,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
# User3 is opted out, but is pulled into both experiments by the 10% rollout
prefix = rd.get_runner_prefix(settings_text, ["User3"])
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_lf_prefix_always_comes_first(self) -> None:
@ -192,7 +230,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User2"])
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_ignores_commented_users(self) -> None:
@ -210,7 +248,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"])
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for user")
def test_ignores_extra_experiments(self) -> None:
@ -229,9 +267,44 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"])
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
self,
) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
---
Users:
@User,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for user")
def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
self,
) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
all_branches: true
---
Users:
@User,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
if __name__ == "__main__":
main()

View File

@ -12,7 +12,7 @@ import json
import os
import warnings
from hashlib import sha256
from typing import Any, Dict, List, Optional
from typing import Any, List, Optional
from unittest import main, mock, skip, TestCase
from urllib.error import HTTPError
@ -24,7 +24,6 @@ from trymerge import (
find_matching_merge_rule,
get_classifications,
get_drci_classifications,
get_rockset_results,
gh_get_team_members,
GitHubPR,
JobCheckState,
@ -42,7 +41,6 @@ if "GIT_REMOTE_URL" not in os.environ:
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
GQL_MOCKS = "gql_mocks.json.gz"
ROCKSET_MOCKS = "rockset_mocks.json.gz"
DRCI_MOCKS = "drci_mocks.json.gz"
@ -77,16 +75,11 @@ def mock_query(
if err.code == 401 or err.code == 403:
err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
if (
os.getenv("GITHUB_TOKEN") is None
or os.getenv("ROCKSET_API_KEY") is None
or os.getenv("DRCI_BOT_KEY") is None
):
if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
err_msg = (
"Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
"Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
+ "is not defined. "
+ err_msg
)
@ -110,16 +103,6 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
return mock_query(
get_rockset_results,
ROCKSET_MOCKS,
lambda x, y: f"{x} {y}",
head_sha,
merge_base,
)
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
return mock_query(
get_drci_classifications,
@ -273,10 +256,6 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
]
def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
return []
class DummyGitRepo(GitRepo):
def __init__(self) -> None:
super().__init__(get_git_repo_dir(), get_git_remote_name())
@ -288,7 +267,6 @@ class DummyGitRepo(GitRepo):
return "super awsome commit message"
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch(
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
@ -604,7 +582,6 @@ class TestTryMerge(TestCase):
mocked_gh_fetch_merge_base.assert_called_once()
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(
@ -843,7 +820,7 @@ class TestBypassFailures(TestCase):
checks = pr.get_checkrun_conclusions()
# Known flaky failure takes precedence over ignore current (need to set the
# merge base here to get the results from Rockset, and that categorize the
# merge base here to get the results from Dr. CI, and that categorize the
# broken trunk failure too
checks = get_classifications(
pr.pr_num,
@ -929,7 +906,6 @@ class TestBypassFailures(TestCase):
)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch("trymerge.get_drci_classifications", return_value={})
@ -1008,7 +984,6 @@ class TestBypassFailuresOnSandCastle(TestCase):
self.assertTrue(len(failed) == 2)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(

View File

@ -452,8 +452,6 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
CIFLOW_LABEL = re.compile(r"^ciflow/.+")
CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
ROCKSET_MERGES_COLLECTION = "merges"
ROCKSET_MERGES_WORKSPACE = "commons"
REMOTE_MAIN_BRANCH = "origin/main"
DRCI_CHECKRUN_NAME = "Dr.CI"
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
@ -1180,7 +1178,7 @@ class GitHubPR:
merge_commit_sha = repo.rev_parse(name=self.default_branch())
if comment_id and self.pr_num:
# Finally, upload the record to Rockset. The list of pending and failed
# Finally, upload the record to s3. The list of pending and failed
# checks are at the time of the merge
save_merge_record(
comment_id=comment_id,
@ -1202,7 +1200,7 @@ class GitHubPR:
ignore_current=bool(ignore_current_checks),
)
else:
print("Missing comment ID or PR number, couldn't upload to Rockset")
print("Missing comment ID or PR number, couldn't upload to s3")
# Usually Github will see that the commit has "resolves <pr_num>" in the
# commit message and close the PR, but sometimes it doesn't, leading to
@ -1481,7 +1479,7 @@ def find_matching_merge_rule(
# Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
# where the list of checks is readily available. These records will be saved into
# Rockset merge records
# s3 merge records
(
pending_mandatory_checks,
failed_mandatory_checks,
@ -1568,7 +1566,7 @@ def save_merge_record(
This saves the merge records as a json, which can later be uploaded to s3
"""
# Prepare the record to be written into Rockset
# Prepare the record to be written into s3
data = [
{
"comment_id": comment_id,
@ -1590,7 +1588,8 @@ def save_merge_record(
"ignore_current": ignore_current,
"error": error,
# This is a unique identifier for the record for deduping purposes
# in rockset. Any unique string would work
# in Rockset. Any unique string would work. This will not be used
# after we migrate off Rockset
"_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
}
]
@ -1600,36 +1599,6 @@ def save_merge_record(
json.dump(data, f)
@retries_decorator(rc=[])
def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
query = f"""
SELECT
w.name as workflow_name,
j.id,
j.name,
j.conclusion,
j.completed_at,
j.html_url,
j.head_sha,
j.torchci_classification.captures as failure_captures,
LENGTH(j.steps) as steps,
FROM
commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
where
j.head_sha in ('{head_sha}','{merge_base}')
"""
try:
import rockset # type: ignore[import]
res = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
).sql(query)
return cast(List[Dict[str, Any]], res.results)
except ModuleNotFoundError:
print("Could not use RockSet as rocket dependency is missing")
return []
@retries_decorator()
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
"""
@ -2067,7 +2036,7 @@ def categorize_checks(
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
# If required_checks is not set or empty, consider all names are relevant
@ -2126,7 +2095,7 @@ def categorize_checks(
):
failed_checks = failed_checks + flaky_or_broken_trunk
# The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
# The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
return (pending_checks, failed_checks, failed_checks_categorization)
@ -2410,7 +2379,7 @@ def main() -> None:
handle_exception(e)
if args.comment_id and args.pr_num:
# Finally, upload the record to Rockset, we don't have access to the
# Finally, upload the record to s3, we don't have access to the
# list of pending and failed checks here, but they are not really
# needed at the moment
save_merge_record(
@ -2433,7 +2402,7 @@ def main() -> None:
error=str(e),
)
else:
print("Missing comment ID or PR number, couldn't upload to Rockset")
print("Missing comment ID or PR number, couldn't upload to s3")
finally:
if not args.check_mergeability:
gh_remove_label(

31
.github/scripts/update_runner_determinator.py vendored Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
import re
# Read the contents of runner_determinator.py
with open(".github/scripts/runner_determinator.py") as script_file:
script_content = script_file.read()
# Indent the script content by 10 spaces to match destination indentation
indented_script_content = "\n".join(
[" " * 10 + line if line else line for line in script_content.splitlines()]
)
# Read the contents of _runner-determinator.yml
with open(".github/workflows/_runner-determinator.yml") as yml_file:
yml_content = yml_file.read()
# Replace the content between the markers
new_yml_content = re.sub(
r"(cat <<EOF > runner_determinator.py\n)(.*?)(\n\s+EOF)",
lambda match: match.group(1) + indented_script_content + match.group(3),
yml_content,
flags=re.DOTALL,
)
# Save the modified content back to _runner-determinator.yml
with open(".github/workflows/_runner-determinator.yml", "w") as yml_file:
yml_file.write(new_yml_content)
print("Updated _runner-determinator.yml with the contents of runner_determinator.py")

View File

@ -68,6 +68,7 @@ jobs:
needs: get-label-type
with:!{{ upload.binary_env_as_input(config) }}
{%- if "aarch64" in build_environment %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
{%- elif "s390x" in build_environment %}
@ -102,6 +103,7 @@ jobs:
build_name: !{{ config["build_name"] }}
build_environment: !{{ build_environment }}
{%- if "aarch64" in build_environment %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
{%- elif "s390x" in build_environment %}

View File

@ -91,14 +91,14 @@ jobs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a ARC runner
- name: Check if in a container runner
shell: bash
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Output disk space left
run: |

View File

@ -109,6 +109,7 @@ jobs:
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -118,13 +119,16 @@ jobs:
# checkout. In other cases you should prefer a local checkout.
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
no-sudo: ${{ inputs.build-environment == 'linux-s390x-binary-manywheel' }}
- name: Setup Linux
uses: ./.github/actions/setup-linux
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v3
if: ${{ inputs.aws-role-to-assume != '' }}
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
with:
role-to-assume: ${{ inputs.aws-role-to-assume }}
role-session-name: gha-linux-build
@ -133,11 +137,13 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
docker-image-name: ${{ inputs.docker-image-name }}
- name: Use following to pull public copy of the image
id: print-ghcr-mirror
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
@ -147,6 +153,7 @@ jobs:
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@ -174,6 +181,7 @@ jobs:
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
@ -195,6 +203,7 @@ jobs:
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }}
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
@ -202,7 +211,21 @@ jobs:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
run: |
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
JENKINS_USER=
USED_IMAGE="${DOCKER_IMAGE_S390X}"
# since some steps are skipped on s390x, if they are necessary, run them here
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
else
JENKINS_USER="--user jenkins"
USED_IMAGE="${DOCKER_IMAGE}"
fi
# detached container should get cleaned up by teardown_ec2_linux
# Used for JENKINS_USER, which can be empty
# shellcheck disable=SC2086
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS="$(nproc --ignore=2)" \
@ -225,10 +248,10 @@ jobs:
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
${JENKINS_USER} \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
"${USED_IMAGE}"
)
docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
@ -239,7 +262,7 @@ jobs:
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}
retention-days: 14
@ -249,7 +272,7 @@ jobs:
- name: Store PyTorch Build Artifacts on S3 for split build
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}-experimental-split-build
retention-days: 14
@ -257,8 +280,26 @@ jobs:
path: artifacts.zip
s3-bucket: ${{ inputs.s3-bucket }}
- name: Store PyTorch Build Artifacts for s390x
uses: actions/upload-artifact@v3
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}
retention-days: 14
if-no-files-found: error
path: artifacts.zip
- name: Store PyTorch Build Artifacts for s390x for split build
uses: actions/upload-artifact@v3
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}-experimental-split-build
retention-days: 14
if-no-files-found: error
path: artifacts.zip
- name: Upload sccache stats
if: steps.build.outcome != 'skipped'
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
uses: seemethere/upload-artifact-s3@v5
with:
s3-prefix: |
@ -270,4 +311,13 @@ jobs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
- name: Cleanup docker
if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
shell: bash
run: |
# on s390x stop the container for clean worker stop
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true

View File

@ -114,22 +114,32 @@ jobs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a ARC runner
- name: Check if in a container runner
shell: bash
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Lock NVIDIA A100 40GB Frequency
run: |
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
if: contains(matrix.runner, 'a100')
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Start monitoring script
id: monitor-script
@ -208,6 +218,7 @@ jobs:
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -218,6 +229,7 @@ jobs:
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
run: |
set -x
@ -236,6 +248,7 @@ jobs:
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e GITHUB_ACTIONS \
@ -265,6 +278,7 @@ jobs:
-e PR_LABELS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SCCACHE_S3_KEY_PREFIX \
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -274,6 +288,7 @@ jobs:
-e HUGGING_FACE_HUB_TOKEN \
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-e DASHBOARD_TAG \
-e IS_A100_RUNNER \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
@ -343,7 +358,7 @@ jobs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does

View File

@ -88,6 +88,13 @@ jobs:
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
if: always()
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install PyTorch and run MPS tests
id: test
env:
@ -103,6 +110,14 @@ jobs:
NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
NO_TD: ${{ needs.filter.outputs.ci-no-td }}
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}
GITHUB_JOB: ${{ github.job }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
run: |
# shellcheck disable=SC1090
@ -144,13 +159,6 @@ jobs:
run: |
cat test/**/*_toprint.log || true
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
if: always()
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Upload test artifacts
uses: ./.github/actions/upload-test-artifacts
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'

View File

@ -59,6 +59,10 @@ jobs:
cat <<EOF > runner_determinator.py
# flake8: noqa: G004
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
# must be kept in sync. You can do it easily by running the following command:
# python .github/scripts/update_runner_determinator.py
"""
This runner determinator is used to determine which set of runners to run a
GitHub job on. It uses the first comment of a GitHub issue (by default
@ -138,6 +142,9 @@ jobs:
rollout_perc: float = (
0 # Percentage of workflows to experiment on when user is not opted-in.
)
all_branches: bool = (
False # If True, the experiment is also enabled on the exception branches
)
# Add more fields as needed
@ -271,7 +278,7 @@ jobs:
def is_exception_branch(branch: str) -> bool:
"""
Branches that get opted out of all experiments and should always use Meta runners
Branches that get opted out of experiments by default, until they're explicitly enabled.
"""
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
@ -397,7 +404,10 @@ jobs:
def get_runner_prefix(
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
rollout_state: str,
workflow_requestors: Iterable[str],
branch: str,
is_canary: bool = False,
) -> str:
settings = parse_settings(rollout_state)
user_optins = parse_users(rollout_state)
@ -407,6 +417,12 @@ jobs:
for experiment_name, experiment_settings in settings.experiments.items():
enabled = False
if not experiment_settings.all_branches and is_exception_branch(branch):
log.info(
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor
@ -466,35 +482,34 @@ jobs:
def main() -> None:
args = parse_args()
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
log.info(
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
runner_label_prefix = DEFAULT_LABEL_PREFIX
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
runner_label_prefix = DEFAULT_LABEL_PREFIX
else:
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
is_canary = args.github_repo == "pytorch/pytorch-canary"
is_canary = args.github_repo == "pytorch/pytorch-canary"
runner_label_prefix = get_runner_prefix(
rollout_state, (args.github_issue_owner, username), is_canary
)
runner_label_prefix = get_runner_prefix(
rollout_state,
(args.github_issue_owner, username),
args.github_branch,
is_canary,
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)

View File

@ -189,7 +189,7 @@ jobs:
run: |
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
# shellcheck disable=SC2046,SC2102
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.12.1
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.13.0
popd
.ci/pytorch/win-test.sh

View File

@ -32,7 +32,7 @@ concurrency:
jobs:
build-docker:
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
runs-on: am2.linux.9xlarge.ephemeral
runs-on: linux.9xlarge.ephemeral
strategy:
matrix:
cuda_version: ["11.8", "12.1", "12.4", "cpu"]

View File

@ -45,7 +45,7 @@ jobs:
build-docker-cuda:
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
strategy:
matrix:
cuda_version: ["12.4", "12.1", "11.8"]
@ -156,7 +156,7 @@ jobs:
build-docker-rocm:
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
strategy:
matrix:
rocm_version: ["6.1", "6.2"]
@ -192,7 +192,7 @@ jobs:
build-docker-cpu:
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main

View File

@ -43,7 +43,7 @@ jobs:
strategy:
fail-fast: false
matrix:
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
device: ["cuda", "rocm", "xpu"]
include:
- device: "rocm"
@ -91,9 +91,6 @@ jobs:
# Determine python executable for given version
case $PY_VERS in
3.8)
PYTHON_EXECUTABLE=/opt/python/cp38-cp38/bin/python
;;
3.9)
PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
;;
@ -214,7 +211,7 @@ jobs:
strategy:
fail-fast: false
matrix:
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
timeout-minutes: 40
env:
DOCKER_IMAGE: pytorch/conda-builder:cpu

View File

@ -30,6 +30,9 @@ concurrency:
jobs:
check-labels:
permissions:
contents: read
pull-requests: write
name: Check labels
if: github.repository_owner == 'pytorch'
runs-on: linux.20_04.4x

View File

@ -67,6 +67,7 @@ jobs:
pytorch-linux-jammy-py3.12-halide,
pytorch-linux-jammy-xpu-2024.0-py3,
pytorch-linux-jammy-py3-clang15-asan,
pytorch-linux-jammy-py3-clang18-asan,
pytorch-linux-focal-py3-clang10-onnx,
pytorch-linux-focal-linter,
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter,
@ -78,7 +79,9 @@ jobs:
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
runner: linux.arm64.m7g.4xlarge
timeout-minutes: 600
runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
# Docker uploads fail from LF runners, see https://github.com/pytorch/pytorch/pull/137358
# runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
runs-on: "${{ matrix.runner }}"
env:
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
steps:

View File

@ -60,6 +60,7 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.9"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cpu-aarch64
@ -86,6 +87,7 @@ jobs:
DESIRED_PYTHON: "3.9"
build_name: manywheel-py3_9-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -130,6 +132,7 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.9"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cuda-aarch64
@ -177,6 +180,7 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cpu-aarch64
@ -203,6 +207,7 @@ jobs:
DESIRED_PYTHON: "3.10"
build_name: manywheel-py3_10-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -247,6 +252,7 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64
@ -294,6 +300,7 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cpu-aarch64
@ -320,6 +327,7 @@ jobs:
DESIRED_PYTHON: "3.11"
build_name: manywheel-py3_11-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -364,6 +372,7 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64
@ -411,6 +420,7 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cpu-aarch64
@ -437,6 +447,7 @@ jobs:
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -481,6 +492,7 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64

View File

@ -3324,3 +3324,353 @@ jobs:
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-cxx11-abi-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu-cxx11-abi
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-cxx11-abi-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-cxx11-abi-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu-cxx11-abi
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-cxx11-abi-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-cxx11-abi-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu-cxx11-abi
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda11_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda11_8-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda11_8-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_1-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_1-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_4-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_4-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -1514,3 +1514,283 @@ jobs:
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda11_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda11_8-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda11_8-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_1-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_1-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_4-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_4-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel-split
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -5,9 +5,7 @@ on:
# - cron: 0 7 * * 1-6
# - cron: 0 7 * * 0
# Does not perform max_autotune on CPU, so skip the weekly run setup
# Run 6 times everyday to see if perf instablity can be reproduced
# Will change this back
- cron: 0 */4 * * *
- cron: 0 7 * * *
# NB: GitHub has an upper limit of 10 inputs here
workflow_dispatch:
inputs:
@ -116,7 +114,7 @@ jobs:
name: linux-jammy-aarch64-py3.10-inductor
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-aarch64-py3_10-inductor-build
if: github.event.schedule == '0 */4 * * *'
if: github.event.schedule == '0 7 * * *'
with:
build-environment: linux-jammy-aarch64-py3.10
# Turn off dynamic-shapes and aotinductor tests for now, to have faster iteration for debugging perf instability.

View File

@ -31,13 +31,13 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm6_1-py3_10-inductor-build:
name: rocm6.1-py3.10-inductor
linux-focal-rocm6_2-py3_10-inductor-build:
name: rocm6.2-py3.10-inductor
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
@ -45,14 +45,14 @@ jobs:
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm6_1-py3_10-inductor-test:
linux-focal-rocm6_2-py3_10-inductor-test:
permissions:
id-token: write
contents: read
name: rocm6.1-py3.10-inductor
name: rocm6.2-py3.10-inductor
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm6_1-py3_10-inductor-build
needs: linux-focal-rocm6_2-py3_10-inductor-build
with:
build-environment: linux-focal-rocm6.1-py3.10
docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-inductor-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.2-py3.10
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.test-matrix }}

View File

@ -58,8 +58,7 @@ jobs:
{ config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit
linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
name: cuda12.1-py3.10-gcc9-sm86
@ -69,8 +68,7 @@ jobs:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit
linux-focal-cuda12_1-py3_12-gcc9-inductor-build:
name: cuda12.1-py3.12-gcc9-sm86
@ -86,6 +84,7 @@ jobs:
{ config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
]}
secrets: inherit
linux-focal-cuda12_1-py3_12-gcc9-inductor-test:
name: cuda12.1-py3.12-gcc9-sm86
@ -95,6 +94,7 @@ jobs:
build-environment: linux-focal-cuda12.1-py3.12-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cpu-py3_12-inductor-halide-build:
name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
@ -108,6 +108,7 @@ jobs:
{ include: [
{ config: "inductor-halide", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
]}
secrets: inherit
linux-jammy-cpu-py3_12-inductor-halide-test:
name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
@ -117,6 +118,29 @@ jobs:
build-environment: linux-jammy-py3.12-gcc11
docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cpu-py3_12-inductor-triton-cpu-build:
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: linux-jammy-py3.12-gcc11
docker-image-name: pytorch-linux-jammy-py3.12-triton-cpu
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
test-matrix: |
{ include: [
{ config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
]}
linux-jammy-cpu-py3_12-inductor-triton-cpu-test:
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-cpu-py3_12-inductor-triton-cpu-build
with:
build-environment: linux-jammy-py3.12-gcc11
docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
# Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
@ -134,8 +158,7 @@ jobs:
{ config: "inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit
linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
name: cuda12.4-py3.10-gcc9-sm86
@ -146,8 +169,7 @@ jobs:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit
linux-jammy-cpu-py3_9-gcc11-inductor-build:
name: linux-jammy-cpu-py3.9-gcc11-inductor
@ -201,8 +223,7 @@ jobs:
{ config: "cpu_inductor_freezing_avx2_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
{ config: "cpu_inductor_freezing_avx2_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit
linux-jammy-cpu-py3_9-gcc11-inductor-test:
name: linux-jammy-cpu-py3.9-gcc11-inductor
@ -212,5 +233,4 @@ jobs:
build-environment: linux-jammy-py3.9-gcc11-build
docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
secrets: inherit

45
.github/workflows/lint-autoformat.yml vendored Normal file
View File

@ -0,0 +1,45 @@
name: Apply lint suggestions
on:
pull_request:
types: [opened, synchronize, reopened]
jobs:
lintrunner-autoformat:
permissions:
contents: read
pull-requests: write
runs-on: lf.linux.2xlarge
if: ${{ github.repository_owner == 'pytorch' }}
steps:
- name: Checkout pytorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
submodules: true
fetch-depth: 0
- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: "3.10"
- name: Run lintrunner (nonretryable)
continue-on-error: true
# we can't run all files here because only changes around where the diff are shown in the PR UI
run: |
export ADDITIONAL_LINTRUNNER_ARGS="format"
bash .github/scripts/lintrunner.sh
- name: Check for changes
id: git-check
continue-on-error: true
run: |
git diff --exit-code || echo "changes=true" >> "$GITHUB_OUTPUT"
- name: Suggest changes
if: steps.git-check.outputs.changes == 'true'
continue-on-error: true
uses: parkerbxyz/suggest-changes@v1
with:
comment: "Please commit the suggested changes from pytorch's linter."
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

View File

@ -36,7 +36,7 @@ jobs:
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT"
export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT --all-files"
export CLANG=1
.github/scripts/lintrunner.sh
@ -53,7 +53,7 @@ jobs:
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT"
export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT --all-files"
.github/scripts/lintrunner.sh
quick-checks:
@ -215,14 +215,15 @@ jobs:
with:
submodules: false
fetch-depth: 1
- name: Setup Python 3.8
- name: Setup Python 3.9
uses: actions/setup-python@v4
with:
python-version: '3.8'
python-version: '3.9'
architecture: x64
cache: pip
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.2.* fbscribelogger==0.1.* numpy==1.24.*
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
- name: Run run_test.py (nonretryable)

View File

@ -57,10 +57,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_1-py3_10-gcc9-test:
@ -89,10 +89,10 @@ jobs:
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}
@ -118,9 +118,10 @@ jobs:
docker-image-name: pytorch-linux-jammy-py3.9-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
]}
parallelnative-linux-jammy-py3_9-gcc11-test:
@ -218,7 +219,9 @@ jobs:
# TODO: Figure out how to migrate this job to M1 runner
ios-build-test:
name: ios-build-test
if: github.event_name != 'schedule' || github.event.schedule == '45 0,8,16 * * 1-5' || github.event.schedule == '45 4 * * 0,6' || github.event.schedule == '29 8 * * *'
# Has been broken for a while, see https://github.com/pytorch/pytorch/issues/136284
# if: github.event_name != 'schedule' || github.event.schedule == '45 0,8,16 * * 1-5' || github.event.schedule == '45 4 * * 0,6' || github.event.schedule == '29 8 * * *'
if: false
uses: ./.github/workflows/_ios-build-test.yml
with:
trigger-event: ${{ github.event_name }}
@ -297,13 +300,13 @@ jobs:
docker-image: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.test-matrix }}
linux-focal-rocm6_1-py3_10-build:
name: linux-focal-rocm6.1-py3.10
linux-focal-rocm6_2-py3_10-build:
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
@ -312,19 +315,19 @@ jobs:
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm6_1-py3_10-test:
linux-focal-rocm6_2-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.1-py3.10
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_1-py3_10-build
- linux-focal-rocm6_2-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm6.1-py3.10
docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.2-py3.10
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build:
name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
@ -337,10 +340,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}

View File

@ -185,10 +185,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.9-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -217,10 +217,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.11-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -251,10 +251,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.12-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -383,7 +383,7 @@ jobs:
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-py3.9-clang9-xla
docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.1-lite
docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.3-lite
test-matrix: |
{ include: [
{ config: "xla", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
@ -503,15 +503,15 @@ jobs:
]}
secrets: inherit
linux-focal-rocm6_1-py3_10-build:
linux-focal-rocm6_2-py3_10-build:
# don't run build twice on main
if: github.event_name == 'pull_request'
name: linux-focal-rocm6.1-py3.10
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
@ -588,9 +588,9 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.12-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
{ config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
{ config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },

View File

@ -25,11 +25,11 @@ jobs:
id-token: write
contents: read
linux-focal-rocm6_1-py3_10-build:
name: linux-focal-rocm6.1-py3.10
linux-focal-rocm6_2-py3_10-build:
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
@ -42,16 +42,16 @@ jobs:
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm6_1-py3_10-test:
linux-focal-rocm6_2-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.1-py3.10
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_1-py3_10-build
- linux-focal-rocm6_2-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm6.1-py3.10
docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.2-py3.10
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}

24
.github/workflows/s390.yml vendored Normal file
View File

@ -0,0 +1,24 @@
name: s390
on:
push:
branches:
- main
tags:
- ciflow/s390/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
linux-manylinux-2_28-py3-cpu-s390x-build:
name: linux-manylinux-2_28-py3-cpu-s390x
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-s390x-binary-manywheel
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
runner: linux.s390x

View File

@ -130,13 +130,13 @@ jobs:
docker-image: ${{ needs.linux-focal-py3_9-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_9-clang10-build.outputs.test-matrix }}
linux-focal-rocm6_1-py3_10-build:
name: linux-focal-rocm6.1-py3.10
linux-focal-rocm6_2-py3_10-build:
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
@ -144,19 +144,19 @@ jobs:
{ config: "slow", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm6_1-py3_10-test:
linux-focal-rocm6_2-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.1-py3.10
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_1-py3_10-build
- linux-focal-rocm6_2-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm6.1-py3.10
docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.2-py3.10
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
linux-jammy-py3_10-clang15-asan-build:
name: linux-jammy-py3.10-clang15-asan

View File

@ -223,13 +223,13 @@ jobs:
cuda-version: "12.1"
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
linux-focal-rocm6_1-py3_10-build:
name: linux-focal-rocm6.1-py3.10
linux-focal-rocm6_2-py3_10-build:
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm6.1-py3.10
build-environment: linux-focal-rocm6.2-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
@ -240,19 +240,19 @@ jobs:
]}
secrets: inherit
linux-focal-rocm6_1-py3_10-test:
linux-focal-rocm6_2-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm6.1-py3.10
name: linux-focal-rocm6.2-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm6_1-py3_10-build
- linux-focal-rocm6_2-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm6.1-py3.10
docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.2-py3.10
docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:
@ -266,10 +266,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },

View File

@ -28,7 +28,7 @@ jobs:
check-latest: false
cache: pip
architecture: x64
- run: pip install pyyaml==6.0 rockset==1.0.3
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |
@ -43,7 +43,6 @@ jobs:
COMMENT_ID: ${{ github.event.client_payload.comment_id }}
REBASE: ${{ github.event.client_payload.rebase }}
IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }}
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }}
GITHUB_RUN_ID: ${{ github.run_id }}
run: |

View File

@ -11,15 +11,39 @@ concurrency:
jobs:
do_update_viablestrict:
permissions:
id-token: write
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-20.04
environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }}
steps:
- name: Update viable/strict
uses: pytorch/test-infra/.github/actions/update-viablestrict@main
id: update_viablestrict
with:
repository: pytorch/pytorch
stable-branch: viable/strict
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
- name: Authenticate to AWS with OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/upload_to_ossci_raw_job_status
aws-region: us-east-1
- name: Print sha
env:
LATEST_SHA: ${{ steps.update_viablestrict.outputs.latest_viable_sha }}
PUSH_RESULT: ${{ steps.update_viablestrict.outputs.push_result }}
TIME: ${{ steps.update_viablestrict.outputs.time }}
run: |
echo "${PUSH_RESULT}"
if [ "$PUSH_RESULT" = "Everything up-to-date" ]; then
echo "No update pushed"
else
echo "{\"sha\": \"${LATEST_SHA}\", \"repository\":\"pytorch/pytorch\", \"timestamp\": ${TIME}}" > "/tmp/${LATEST_SHA}.json"
pip install awscli==1.29.40
aws s3 cp "/tmp/${LATEST_SHA}.json" "s3://ossci-raw-job-status/stable_pushes/pytorch/pytorch/${LATEST_SHA}.json"
fi

View File

@ -1,55 +0,0 @@
# upload alerts every 10 minutes
name: Upload Alerts to AWS/Rockset
on:
schedule:
- cron: '*/10 * * * *'
pull_request:
paths:
- 'tools/alerts/create_alerts.py'
- '.github/workflows/upload-alerts.yml'
jobs:
upload-alerts:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-22.04
environment: upload-stats
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
fetch-depth: 1
- uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: pip
- name: Install Python Packages
run: |
pip3 install rockset==1.0.3 boto3==1.19.12 requests==2.32.2
- name: Create alerts
run: |
output=$(PYTHONPATH=$PYTHONPATH:$(pwd) python3 "tools/alerts/create_alerts.py")
echo "uploading following alerts"
echo "$output"
echo "script-output=$output" >> "$GITHUB_OUTPUT"
id: alert_creation_step
- name: Upload alerts
env:
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
uses: pytorch/test-infra/.github/actions/upload-alerts@main
with:
alerts: '${{ steps.alert_creation_step.outputs.script-output }}'
organization: "pytorch"
repo: "pytorch"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

View File

@ -153,7 +153,7 @@ init_command = [
'junitparser==2.1.1',
'rich==10.9.0',
'pyyaml==6.0.1',
'optree==0.12.1',
'optree==0.13.0',
]
[[linter]]
@ -195,6 +195,7 @@ include_patterns = [
# and excluding most sub-directories for now.
'aten/src/ATen/*.h',
'aten/src/ATen/*.cpp',
'aten/src/ATen/cuda/*.cpp',
'aten/src/ATen/cpu/*.h',
'aten/src/ATen/cpu/*.cpp',
'aten/src/ATen/core/*.h',
@ -215,6 +216,10 @@ include_patterns = [
'torch/csrc/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.cpp',
'torch/csrc/distributed/autograd/**/*.cpp',
'torch/csrc/distributed/autograd/**/*.h',
'torch/csrc/distributed/rpc/**/*.cpp',
'torch/csrc/distributed/rpc/**/*.h',
'torch/csrc/jit/serialization/*.h',
'torch/csrc/jit/serialization/*.cpp',
]
@ -224,7 +229,6 @@ exclude_patterns = [
# CUDA files are also excluded.
'**/fb/**',
'**/*pb.h',
'aten/**/cuda/*pp',
'c10/xpu/**/*.h',
'c10/xpu/**/*.cpp',
'c10/cuda/CUDAAlgorithm.h',
@ -246,7 +250,6 @@ exclude_patterns = [
'torch/csrc/inductor/aoti_torch/c/shim.h',
'torch/csrc/jit/**/*',
'torch/csrc/jit/serialization/mobile_bytecode_generated.h',
'torch/csrc/lazy/**/*',
]
init_command = [
'python3',
@ -1255,7 +1258,6 @@ exclude_patterns = [
'torch/fx/experimental/refinement_types.py',
'torch/fx/experimental/rewriter.py',
'torch/fx/experimental/schema_type_annotation.py',
'torch/fx/experimental/symbolic_shapes.py',
'torch/fx/experimental/unification/__init__.py',
'torch/fx/experimental/unification/core.py',
'torch/fx/experimental/unification/dispatch.py',
@ -1271,7 +1273,6 @@ exclude_patterns = [
'torch/fx/experimental/unification/utils.py',
'torch/fx/experimental/unification/variable.py',
'torch/fx/experimental/unify_refinements.py',
'torch/fx/experimental/validator.py',
'torch/fx/graph.py',
'torch/fx/graph_module.py',
'torch/fx/interpreter.py',
@ -1585,6 +1586,27 @@ command = [
]
is_formatter = true
[[linter]]
code = 'META_NO_CREATE_UNBACKED'
include_patterns = [
"torch/_meta_registrations.py"
]
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=create_unbacked',
'--linter-name=META_NO_CREATE_UNBACKED',
'--error-name=no create_unbacked in meta registrations',
"""--error-description=\
Data-dependent operators should have their meta \
registration in torch/_subclasses/fake_impls.py, \
not torch/_meta_registrations.py
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'ATEN_CPU_GPU_AGNOSTIC'
include_patterns = [

View File

@ -305,7 +305,6 @@ if(NOT DEFINED USE_VULKAN)
cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
endif()
option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
cmake_dependent_option(
@ -369,7 +368,7 @@ cmake_dependent_option(
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
cmake_dependent_option(
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED" OFF)
"USE_DISTRIBUTED AND NOT WIN32" OFF)
option(ONNX_ML "Enable traditional ONNX ML API." ON)
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
option(BUILD_LIBTORCH_CPU_WITH_DEBUG
@ -912,11 +911,6 @@ if(USE_PYTORCH_QNNPACK)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
endif()
if(USE_SLEEF_FOR_ARM_VEC256)
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
endif()
# Enable sleef on macOS with Apple silicon by default
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
message(STATUS "Running on macOS with Apple silicon")
@ -924,6 +918,14 @@ if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STR
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
endif()
# Enable sleef on Arm(R) architecture by default (except Android)
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
AND("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
endif()
if(USE_XNNPACK)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
endif()
@ -1081,8 +1083,16 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
if(CMAKE_BUILD_TYPE MATCHES Debug)
message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
endif()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
else()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
endif()
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)

View File

@ -98,6 +98,10 @@ test/test_type_promotion.py @mruberry
test/functorch/test_ops.py @zou3519 @chillee @kshitij12345
test/functorch/test_vmap.py @zou3519 @chillee @kshitij12345
# HOPs
torch/_higher_order_ops/*.py @zou3519
torch/_dynamo/variables/higher_order_ops.py @zou3519
# torch MPS
test/test_mps.py @kulinseth @malfet
aten/src/ATen/mps/ @kulinseth @malfet
@ -117,7 +121,7 @@ torch/profiler/ @aaronenyeshi @sraikund16
test/functorch/test_aotdispatch.py @ezyang @Chillee
# Dataloader
torch/utils/data/ @andrewkho @gokulavasan
torch/utils/data/ @andrewkho @divyanshk
# hipify
torch/utils/hipify/ @jeffdaily @jithunnair-amd

View File

@ -208,6 +208,8 @@ If you want to compile with ROCm support, install
- [AMD ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) 4.0 and above installation
- ROCm is currently supported only for Linux systems.
By default the build system expects ROCm to be installed in `/opt/rocm`. If ROCm is installed in a different directory, the `ROCM_PATH` environment variable must be set to the ROCm installation directory. The build system automatically detects the AMD GPU architecture. Optionally, the AMD GPU architecture can be explicitly set with the `PYTORCH_ROCM_ARCH` environment variable [AMD GPU architecture](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#supported-gpus)
If you want to disable ROCm support, export the environment variable `USE_ROCM=0`.
Other potentially useful environment variables may be found in `setup.py`.

View File

@ -48,16 +48,16 @@
Following is the Release Compatibility Matrix for PyTorch releases:
| PyTorch version | Python | Stable CUDA | Experimental CUDA | Stable ROCm |
| --- | --- | --- | --- | --- |
| 2.5 | >=3.9, <=3.12, (3.13 experimental) | CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70 | None | ROCm 6.2 |
| 2.4 | >=3.8, <=3.12 | CUDA 11.8, CUDA 12.1, CUDNN 9.1.0.70 | CUDA 12.4, CUDNN 9.1.0.70 | ROCm 6.1 |
| 2.3 | >=3.8, <=3.11, (3.12 experimental) | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 6.0 |
| 2.2 | >=3.8, <=3.11, (3.12 experimental) | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.7 |
| 2.1 | >=3.8, <=3.11 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.6 |
| 2.0 | >=3.8, <=3.11 | CUDA 11.7, CUDNN 8.5.0.96 | CUDA 11.8, CUDNN 8.7.0.84 | ROCm 5.4 |
| 1.13 | >=3.7, <=3.10 | CUDA 11.6, CUDNN 8.3.2.44 | CUDA 11.7, CUDNN 8.5.0.96 | ROCm 5.2 |
| 1.12 | >=3.7, <=3.10 | CUDA 11.3, CUDNN 8.3.2.44 | CUDA 11.6, CUDNN 8.3.2.44 | ROCm 5.0 |
| PyTorch version | Python | C++ | Stable CUDA | Experimental CUDA | Stable ROCm |
| --- | --- | --- | --- | --- | --- |
| 2.5 | >=3.9, <=3.12, (3.13 experimental) | C++17 | CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70 | None | ROCm 6.2 |
| 2.4 | >=3.8, <=3.12 | C++17 | CUDA 11.8, CUDA 12.1, CUDNN 9.1.0.70 | CUDA 12.4, CUDNN 9.1.0.70 | ROCm 6.1 |
| 2.3 | >=3.8, <=3.11, (3.12 experimental) | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 6.0 |
| 2.2 | >=3.8, <=3.11, (3.12 experimental) | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.7 |
| 2.1 | >=3.8, <=3.11 | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.6 |
| 2.0 | >=3.8, <=3.11 | C++14 | CUDA 11.7, CUDNN 8.5.0.96 | CUDA 11.8, CUDNN 8.7.0.84 | ROCm 5.4 |
| 1.13 | >=3.7, <=3.10 | C++14 | CUDA 11.6, CUDNN 8.3.2.44 | CUDA 11.7, CUDNN 8.5.0.96 | ROCm 5.2 |
| 1.12 | >=3.7, <=3.10 | C++14 | CUDA 11.3, CUDNN 8.3.2.44 | CUDA 11.6, CUDNN 8.3.2.44 | ROCm 5.0 |
## Release Cadence
@ -234,7 +234,7 @@ Typically, within a release cycle fixes are necessary for regressions, test fixe
For fixes that are to go into a release after the release branch has been cut we typically employ the use of a cherry pick tracker.
An example of this would look like:
* https://github.com/pytorch/pytorch/issues/51886
* https://github.com/pytorch/pytorch/issues/128436
Please also make sure to add milestone target to the PR/issue, especially if it needs to be considered for inclusion into the dot release.
@ -243,7 +243,9 @@ Please also make sure to add milestone target to the PR/issue, especially if it
#### How to do Cherry Picking
You can now use `pytorchbot` to cherry pick a PyTorch PR that has been committed
to the main branch using `@pytorchbot cherry-pick` command as follows.
to the main branch using `@pytorchbot cherry-pick` command as follows (make sure
that the cherry-pick tracker issue for the target release labelled as "release tracker" -
this will allow the bot to find it and post comments).
```
usage: @pytorchbot cherry-pick --onto ONTO [--fixes FIXES] -c
@ -380,7 +382,7 @@ Patch release process takes around 4-5 weeks to complete.
### Issue Tracker for Patch releases
For patch releases issue tracker needs to be created. For patch release, we require all cherry-pick changes to have links to either a high-priority GitHub issue or a CI failure from previous RC. An example of this would look like:
* https://github.com/pytorch/pytorch/issues/51886
* https://github.com/pytorch/pytorch/issues/128436
Only following issues are accepted:
1. Fixes to regressions against previous major version (e.g. regressions introduced in 1.13.0 from 1.12.0 are pickable for 1.13.1)

View File

@ -467,6 +467,9 @@ if(NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
endif()
if(USE_CUDA AND NOT USE_ROCM)
add_definitions(-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1)
add_definitions(-DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1)
add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
if($ENV{ATEN_STATIC_CUDA})

View File

@ -145,6 +145,14 @@ void Context::setSDPUseMath(bool e) {
enabled_mathSDP = e;
}
bool Context::allowFP16BF16ReductionMathSDP() const {
return allow_fp16_bf16_reduction_mathSDP;
}
void Context::setAllowFP16BF16ReductionMathSDP(bool e) {
allow_fp16_bf16_reduction_mathSDP = e;
}
bool Context::userEnabledCuDNNSDP() const {
return enabled_cudnnSDP;
}

View File

@ -39,25 +39,16 @@ class TORCH_API Context {
const Generator& defaultGenerator(Device device) {
c10::DeviceType device_type = device.type();
initCUDAIfNeeded(device_type);
initHIPIfNeeded(device_type);
lazyInitDevice(device_type);
if (device_type == at::kCPU) {
return at::detail::getDefaultCPUGenerator();
} else if (device_type == at::kCUDA) {
return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index());
} else if (device_type == at::kMPS) {
return at::detail::getMPSHooks().getDefaultMPSGenerator();
} else if (device_type == at::kXPU) {
return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index());
} else if (device_type == at::kIPU) {
return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index());
} else if (device_type == at::kPrivateUse1) {
return at::detail::getPrivateUse1Hooks().getDefaultGenerator(
device.index());
} else {
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
return getAcceleratorHooksInterface(device_type)
.getDefaultGenerator(device.index());
}
}
const AcceleratorHooksInterface& getAcceleratorHooksInterface(
std::optional<c10::DeviceType> opt_device_type = std::nullopt) {
c10::DeviceType device_type = opt_device_type.has_value()
@ -80,10 +71,10 @@ class TORCH_API Context {
c10::DeviceTypeName(device_type), " device type not an accelerator.");
}
}
Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
initCUDAIfNeeded(device_type);
initHIPIfNeeded(device_type);
initXPUIfNeeded(device_type);
lazyInitDevice(device_type);
if (device_type == at::kCPU) {
return c10::DeviceType::CPU;
} else if (device_type == at::kCUDA) {
@ -96,6 +87,7 @@ class TORCH_API Context {
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
}
}
bool isPinnedPtr(
const void* data,
std::optional<c10::DeviceType> device_type = std::nullopt) {
@ -106,13 +98,22 @@ class TORCH_API Context {
opt_device_type.value())) { // passed device not an accelerator
return false;
}
return getAcceleratorHooksInterface(opt_device_type.value())
.isPinnedPtr(data);
return getAcceleratorHooksInterface(opt_device_type).isPinnedPtr(data);
}
Allocator* getPinnedMemoryAllocator(
std::optional<c10::DeviceType> device_type = std::nullopt) {
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
}
void lazyInitDevice(c10::DeviceType device_type) {
if (device_type != at::kCPU) {
c10::call_once(init_[static_cast<int8_t>(device_type)], [&] {
getAcceleratorHooksInterface(device_type).init();
});
}
}
static bool hasOpenMP();
static bool hasMKL();
static bool hasLAPACK();
@ -165,27 +166,6 @@ class TORCH_API Context {
static bool hasMAIA() {
return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA);
}
// defined in header so that getNonVariableType has ability to inline
// call_once check. getNonVariableType is called fairly frequently
void lazyInitCUDA() {
c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); });
}
void lazyInitHIP() {
c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
}
void lazyInitXPU() {
c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
}
void lazyInitMTIA() {
c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); });
}
void lazyInitPrivateUse1() {
c10::call_once(thp_init, [&] {
if (isPrivateUse1HooksRegistered()) {
at::detail::getPrivateUse1Hooks().initPrivateUse1();
}
});
}
static const at::cuda::NVRTC& getNVRTC() {
return detail::getCUDAHooks().nvrtc();
}
@ -234,6 +214,9 @@ class TORCH_API Context {
void setSDPUseCuDNN(bool);
bool userEnabledCuDNNSDP() const;
void setAllowFP16BF16ReductionMathSDP(bool);
bool allowFP16BF16ReductionMathSDP() const;
void setSDPUseOverrideable(bool);
bool userEnabledOverrideableSDP() const;
@ -358,27 +341,8 @@ class TORCH_API Context {
void setAllowFP16ReductionCPU(bool);
private:
void initCUDAIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::CUDA) {
lazyInitCUDA();
}
}
void initHIPIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::HIP) {
lazyInitHIP();
}
}
void initXPUIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::XPU) {
lazyInitXPU();
}
}
static bool checkCuBLASConfigDeterministic();
c10::once_flag thc_init;
c10::once_flag thh_init;
c10::once_flag thx_init;
c10::once_flag th_mtia_init;
c10::once_flag thp_init;
std::array<c10::once_flag, at::COMPILE_TIME_MAX_DEVICE_TYPES> init_;
bool enabled_cudnn = true;
bool deterministic_cudnn = false;
bool deterministic_mkldnn = false;
@ -390,6 +354,7 @@ class TORCH_API Context {
bool enabled_mathSDP = true;
bool enabled_cudnnSDP = true;
bool enabled_overrideable = true;
bool allow_fp16_bf16_reduction_mathSDP = false;
#ifdef USE_ROCM
bool benchmark_cudnn = true;
#else
@ -509,7 +474,7 @@ inline size_t getNumGPUs() {
"to be CUDA (e.g., when you say CUDA, on a HIP build of ATen, this actually "
"means HIP. Rebuild PyTorch with one or the other disabled.");
} else if (hasCUDA()) {
return detail::getCUDAHooks().getNumGPUs();
return detail::getCUDAHooks().deviceCount();
} else if (hasHIP()) {
return detail::getHIPHooks().getNumGPUs();
} else {
@ -546,7 +511,7 @@ inline void manual_seed(uint64_t seed) {
}
// NB: Sometimes we build with CUDA, but we don't have any GPUs
// available. In that case, we must not seed CUDA; it will fail!
const auto cuda_num_gpus = detail::getCUDAHooks().getNumGPUs();
const auto cuda_num_gpus = detail::getCUDAHooks().deviceCount();
if (hasCUDA() && cuda_num_gpus > 0) {
for (const auto i : c10::irange(cuda_num_gpus)) {
auto cuda_gen = globalContext().defaultGenerator(
@ -559,7 +524,7 @@ inline void manual_seed(uint64_t seed) {
}
}
const auto xpu_num_gpus = detail::getXPUHooks().getNumGPUs();
const auto xpu_num_gpus = detail::getXPUHooks().deviceCount();
if (hasXPU() && xpu_num_gpus) {
for (const auto i : c10::irange(xpu_num_gpus)) {
auto xpu_gen = globalContext().defaultGenerator(

View File

@ -18,6 +18,8 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
// To properly support this, see https://github.com/pytorch/pytorch/issues/14560
if (at::globalContext().hasCUDA()) {
return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
} else if (at::globalContext().hasMTIA()) {
return at::detail::getMTIAHooks().getPinnedMemoryAllocator();
} else if (at::globalContext().hasXPU()) {
return at::detail::getXPUHooks().getPinnedMemoryAllocator();
} else if(at::isPrivateUse1HooksRegistered()) {

View File

@ -420,15 +420,15 @@ inline c10::MaybeOwned<Tensor> expand_size(
inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
// expands a list of Tensors; ignores undefined (null) tensors
bool first = true;
DimVector sizes;
SymDimVector sizes;
for (const auto i : c10::irange(to_expand.size())) {
if (!to_expand[i].defined()) {
continue;
} else if (first) {
sizes = to_expand[i].sizes();
sizes = to_expand[i].sym_sizes();
first = false;
} else {
sizes = infer_size_dimvector(sizes, to_expand[i].sizes());
sizes = infer_size_symdimvector(sizes, to_expand[i].sym_sizes());
}
}
@ -436,10 +436,10 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
for (const auto i : c10::irange(to_expand.size())) {
if (!to_expand[i].defined()) {
continue;
} else if (to_expand[i].sizes().equals(sizes)) {
} else if (to_expand[i].sym_sizes().equals(sizes)) {
result[i] = to_expand[i];
} else {
result[i] = to_expand[i].expand(sizes);
result[i] = to_expand[i].expand_symint(sizes);
}
}
return result;

View File

@ -61,9 +61,8 @@ void set_num_threads(int nthreads) {
#endif
#ifdef USE_PTHREADPOOL
// because PyTorch uses caffe2::pthreadpool() in QNNPACK
caffe2::PThreadPool* const pool = caffe2::pthreadpool();
caffe2::PThreadPool* const pool = caffe2::pthreadpool(nthreads);
TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
pool->set_thread_count(nthreads);
#endif
#if AT_MKLDNN_ENABLED()
at::native::mkldnn::clear_computation_cache();

View File

@ -19,7 +19,7 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) {
AT_DISPATCH_V2(
self.scalar_type(), "fill_out", AT_WRAP([&]() {
fill_inplace<scalar_t>(self, value);
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
return self;
}

View File

@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {
bool old_state;
public:
CheckSparseTensorInvariants(bool state) {
old_state = at::globalContext().checkSparseTensorInvariants();
CheckSparseTensorInvariants(bool state)
: old_state(at::globalContext().checkSparseTensorInvariants()) {
at::globalContext().setCheckSparseTensorInvariants(state);
}

View File

@ -255,7 +255,9 @@ inline Tensor applySelect(
// the other hand, indexing wraping is valid for all negative int64_t
// values, as x[INT64_MIN] is the same as x[INT64_MAX]
TORCH_CHECK_INDEX(
size > -1 - index && size > index,
size.sym_gt(-1 - index)
.sym_and(size.sym_gt(index))
.expect_true(__FILE__, __LINE__),
"index ",
index,
" is out of bounds for dimension ",

View File

@ -82,7 +82,7 @@ class TORCH_API ThreadLocalState {
!defined(BUILD_LITE_INTERPRETER)
// TLS for autocast dtypes
std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
autocast_dtypes_;
autocast_dtypes_{};
#endif
friend class ThreadLocalStateGuard;

View File

@ -111,17 +111,6 @@ template <
typename E,
typename B = HostBlock<S>>
struct CachingHostAllocatorImpl {
CachingHostAllocatorImpl() {
// Launch the background thread and process events in a loop.
if (pinned_use_background_threads()) {
getBackgroundThreadPool()->run([&]() {
while (true) {
process_events();
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
});
}
}
virtual ~CachingHostAllocatorImpl() = default;
public:
@ -155,6 +144,17 @@ struct CachingHostAllocatorImpl {
if (block) {
return {block->ptr_, reinterpret_cast<void*>(block)};
}
// Launch the background thread and process events in a loop.
static c10::once_flag background_thread_flag;
c10::call_once(background_thread_flag, [this] {
getBackgroundThreadPool()->run([&]() {
while (true) {
process_events();
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
});
});
}
// Slow path: if we can't allocate from the cached free list, we need

View File

@ -13,8 +13,6 @@
#include <ATen/core/Array.h>
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
#include <c10/util/Half.h>
#include <cmath>
#include <cstdint>

View File

@ -45,7 +45,7 @@ private:
c10::impl::LocalDispatchKeySet saved_;
};
void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
void pythonFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatch_keys, torch::jit::Stack* stack) {
TORCH_INTERNAL_ASSERT(tls_on_entry.has_value());
// c10::impl::ForceDispatchKeyGuard dispatcher_guard(tls_on_entry.value());
// StashTLSOnEntryGuard stash_guard;
@ -68,12 +68,20 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
// we actually run dispatch(), we will take out PyObjects in the context
// of that interpreter, and this will ensure that everyone is on the same
// interpreter.
bool tensors_with_python_key_present = false;
c10::impl::PyInterpreter* interpreter = nullptr;
for (const auto& ivalue : torch::jit::last(*stack, num_arguments)) {
if (ivalue.isTensor()) {
auto* interpreter = ivalue.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
if (interpreter) {
(*interpreter)->dispatch(op, stack);
return;
auto* t = ivalue.unsafeToTensorImpl();
if (t->key_set().has(c10::DispatchKey::Python)) {
tensors_with_python_key_present = true;
}
if (!interpreter) {
auto* t_interpreter = t->pyobj_slot()->pyobj_interpreter();
if (t_interpreter) {
interpreter = t_interpreter;
}
}
} else if (ivalue.isTensorList() || ivalue.isOptionalTensorList()) {
// NB: use toListRef as it doesn't induce refcount bumps (toTensorListRef
@ -82,14 +90,43 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
if (nv.isNone()) {
continue;
}
auto* interpreter = nv.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
if (interpreter) {
(*interpreter)->dispatch(op, stack);
return;
auto* t = nv.unsafeToTensorImpl();
if (t->key_set().has(c10::DispatchKey::Python)) {
tensors_with_python_key_present = true;
}
if (!interpreter) {
auto* t_interpreter = t->pyobj_slot()->pyobj_interpreter();
if (t_interpreter) {
interpreter = t_interpreter;
}
}
}
}
}
if (interpreter) {
if (tensors_with_python_key_present) {
(*interpreter)->dispatch(op, stack);
} else {
// At this point, there are no modes in the stack and no tensors with the python key.
// so disable the python key before redispatching.
// See https://github.com/pytorch/pytorch/issues/136565
c10::DispatchKeySet keyset = dispatch_keys.remove(c10::DispatchKey::Python);
// Remove Python key from the included set as well (modes add it there).
c10::impl::LocalDispatchKeySet local_keyset = c10::impl::tls_local_dispatch_key_set();
c10::impl::ForceDispatchKeyGuard no_python_guard(
local_keyset.included_.remove(c10::DispatchKey::Python),
local_keyset.excluded_
);
op.redispatchBoxed(keyset, stack);
}
return;
}
TORCH_INTERNAL_ASSERT(0, "Hit Python dispatch key but no arguments had PyInterpreter (no tensor args?)");
}

View File

@ -17,8 +17,22 @@ TORCH_SDT_DEFINE_SEMAPHORE(operator_end)
#endif
bool show_dispatch_trace() {
static char const* temp = getenv("TORCH_SHOW_DISPATCH_TRACE");
return temp != nullptr;
static auto envar = std::getenv("TORCH_SHOW_DISPATCH_TRACE");
if (envar) {
if (strcmp(envar, "0") == 0) {
return false;
}
if (strcmp(envar, "1") == 0) {
return true;
}
TORCH_WARN(
"ignoring invalid value for TORCH_SHOW_DISPATCH_TRACE: ",
envar,
" valid values are 0 or 1.");
}
return false;
}
static thread_local int64_t dispatch_trace_nesting_value_;

View File

@ -261,7 +261,7 @@ public:
Vectorized<double> nextafter(const Vectorized<double> &b) const {
USE_SLEEF(
{
return Vectorized<double>(Sleef_nextafterfx_sve(values, b));
return Vectorized<double>(Sleef_nextafterdx_sve(values, b));
},
{
__at_align__ double tmp[size()];

View File

@ -208,8 +208,27 @@ struct VecConvert<
(is_reduced_floating_point_v<src_t> && is_8bit_integer_v<dst_t>),
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<src_t, 1>& src) {
VectorizedN<float, 1> tmp_fp32 = VecConvert<float, 1, src_t, 1>::apply(src);
return VecConvert<dst_t, 1, float, 1>::apply(tmp_fp32);
VectorizedN<float, 2> tmp_fp32 = VecConvert<float, 2, src_t, 1>::apply(src);
return VecConvert<dst_t, 1, float, 2>::apply(tmp_fp32);
}
};
template <typename dst_t>
struct VecConvert<
dst_t,
1,
float,
2,
typename std::enable_if_t<is_8bit_integer_v<dst_t>,
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<float, 2>& src) {
at::vec::Vectorized<dst_t> vec1 = convert_float_to_int8<dst_t>(src[0]);
at::vec::Vectorized<dst_t> vec2 = convert_float_to_int8<dst_t>(src[1]);
__m128 lane2 = _mm256_castps256_ps128(_mm256_castsi256_ps(vec2));
__m256 combined = _mm256_insertf128_ps(_mm256_castsi256_ps(vec1), lane2, 1);
// Shuffle [191:128] bit from combined in to [127:64] bit of result
__m256i result = _mm256_permute4x64_epi64(_mm256_castps_si256(combined), 0b11011000);
return at::vec::Vectorized<dst_t>(result);
}
};
@ -226,6 +245,25 @@ struct VecConvert<
}
};
template <typename src_t>
struct VecConvert<
float,
2,
src_t,
1,
typename std::enable_if_t<is_8bit_integer_v<src_t>,
void>> {
static inline VectorizedN<float, 2> apply(const VectorizedN<src_t, 1>& src) {
// Shuffle [127:64] bit from src[0] in to [191:128] bit of shuffled
__m256i shuffled = _mm256_permute4x64_epi64(src[0], 0b11011000);
__m256i src2 = _mm256_castsi128_si256(
_mm_castps_si128(
_mm256_extractf128_ps(_mm256_castsi256_ps(shuffled), 1) // Extract the second 128-bit lane
)
);
return VectorizedN<float, 2>(convert_int8_to_float<src_t>(src[0]), convert_int8_to_float<src_t>(src2));
}
};
template <typename dst_t>
struct VecConvert<
@ -268,11 +306,10 @@ struct VecConvert<float, 1, BFloat16, 1> {
const VectorizedN<BFloat16, 1>& src) {
VectorizedN<float, 1> result;
uint16x8_t u16_8 = vld1q_u16(reinterpret_cast<const uint16_t*>(&src[0]));
int32x4_t shift = vdupq_n_s32(16);
auto u16_low1 = vget_low_u16(u16_8);
auto u16_high1 = vget_high_u16(u16_8);
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_low1), shift));
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_high1), shift));
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_low1), 16));
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_high1), 16));
result[0] = {f32x4_0, f32x4_1};
return result;
}

View File

@ -216,27 +216,27 @@ public:
}
Vectorized<float> exp_u20() const {
// A faster version of exp with ULP=20
static __m256 vec_factorial_1 =
const __m256 vec_factorial_1 =
_mm256_set1_ps(0.999999701f); // 1/factorial(1)
static __m256 vec_factorial_2 =
const __m256 vec_factorial_2 =
_mm256_set1_ps(0.499991506f); // 1/factorial(2)
static __m256 vec_factorial_3 =
const __m256 vec_factorial_3 =
_mm256_set1_ps(0.166676521f); // 1/factorial(3)
static __m256 vec_factorial_4 =
const __m256 vec_factorial_4 =
_mm256_set1_ps(0.0418978221f); // 1/factorial(4)
static __m256 vec_factorial_5 =
const __m256 vec_factorial_5 =
_mm256_set1_ps(0.00828929059f); // 1/factorial(5)
static __m256 vec_exp_log2ef =
const __m256 vec_exp_log2ef =
_mm256_castsi256_ps(_mm256_set1_epi32(0x3fb8aa3b)); // log2(e)
static __m256 vec_half = _mm256_set1_ps(0.5f);
static __m256 vec_one = _mm256_set1_ps(1.f);
static __m256 vec_zero = _mm256_set1_ps(0.f);
static __m256 vec_two = _mm256_set1_ps(2.f);
static __m256 vec_ln2f = _mm256_castsi256_ps(_mm256_set1_epi32(0x3f317218)); // ln(2)
static __m256 vec_ln_flt_min = _mm256_castsi256_ps(_mm256_set1_epi32(0xc2aeac50));
static __m256 vec_ln_flt_max = _mm256_castsi256_ps(_mm256_set1_epi32(0x42b17218));
static __m256i vec_127 = _mm256_set1_epi32(0x0000007f);
static int n_mantissa_bits = 23;
const __m256 vec_half = _mm256_set1_ps(0.5f);
const __m256 vec_one = _mm256_set1_ps(1.f);
const __m256 vec_zero = _mm256_set1_ps(0.f);
const __m256 vec_two = _mm256_set1_ps(2.f);
const __m256 vec_ln2f = _mm256_castsi256_ps(_mm256_set1_epi32(0x3f317218)); // ln(2)
const __m256 vec_ln_flt_min = _mm256_castsi256_ps(_mm256_set1_epi32(0xc2aeac50));
const __m256 vec_ln_flt_max = _mm256_castsi256_ps(_mm256_set1_epi32(0x42b17218));
const __m256i vec_127 = _mm256_set1_epi32(0x0000007f);
const int n_mantissa_bits = 23;
// exp(x) =
// = exp(n * ln(2) + r) // divide x by ln(2) and get quot and rem

View File

@ -75,7 +75,7 @@ inline __m256i pack_saturate_and_clamp<int32_t>(
int32_t /*min_val*/,
int32_t /*max_val*/) {
// This function is for linkage only, will not be used
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
}
template <>

View File

@ -209,8 +209,25 @@ struct VecConvert<
(is_reduced_floating_point_v<src_t> && is_8bit_integer_v<dst_t>),
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<src_t, 1>& src) {
VectorizedN<float, 1> tmp_fp32 = VecConvert<float, 1, src_t, 1>::apply(src);
return VecConvert<dst_t, 1, float, 1>::apply(tmp_fp32);
VectorizedN<float, 2> tmp_fp32 = VecConvert<float, 2, src_t, 1>::apply(src);
return VecConvert<dst_t, 1, float, 2>::apply(tmp_fp32);
}
};
template <typename dst_t>
struct VecConvert<
dst_t,
1,
float,
2,
typename std::enable_if_t<is_8bit_integer_v<dst_t>,
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<float, 2>& src) {
at::vec::Vectorized<dst_t> vec1 = convert_float_to_int8<dst_t>(src[0]);
at::vec::Vectorized<dst_t> vec2 = convert_float_to_int8<dst_t>(src[1]);
__m128 lane2 = _mm512_castps512_ps128(_mm512_castsi512_ps(vec2));
__m512 result = _mm512_insertf32x4(_mm512_castsi512_ps(vec1), lane2, 1); // Insert lane2 into the second 128-bit lane
return at::vec::Vectorized<dst_t>(_mm512_castps_si512(result));
}
};
@ -227,6 +244,24 @@ struct VecConvert<
}
};
template <typename src_t>
struct VecConvert<
float,
2,
src_t,
1,
typename std::enable_if_t<is_8bit_integer_v<src_t>,
void>> {
static inline VectorizedN<float, 2> apply(const VectorizedN<src_t, 1>& src) {
__m512i src2 = _mm512_castsi128_si512(
_mm_castps_si128(
_mm512_extractf32x4_ps(_mm512_castsi512_ps(src[0]), 1) // Extract the second 128-bit lane
)
);
return VectorizedN<float, 2>(convert_int8_to_float<src_t>(src[0]), convert_int8_to_float<src_t>(src2));
}
};
template <typename src_t>
struct VecConvert<
float,

View File

@ -236,27 +236,27 @@ public:
}
Vectorized<float> exp_u20() const {
// A faster version of exp with ULP=20
static __m512 vec_factorial_1 =
const __m512 vec_factorial_1 =
_mm512_set1_ps(0.999999701f); // 1/factorial(1)
static __m512 vec_factorial_2 =
const __m512 vec_factorial_2 =
_mm512_set1_ps(0.499991506f); // 1/factorial(2)
static __m512 vec_factorial_3 =
const __m512 vec_factorial_3 =
_mm512_set1_ps(0.166676521f); // 1/factorial(3)
static __m512 vec_factorial_4 =
const __m512 vec_factorial_4 =
_mm512_set1_ps(0.0418978221f); // 1/factorial(4)
static __m512 vec_factorial_5 =
const __m512 vec_factorial_5 =
_mm512_set1_ps(0.00828929059f); // 1/factorial(5)
static __m512 vec_exp_log2ef =
const __m512 vec_exp_log2ef =
_mm512_castsi512_ps(_mm512_set1_epi32(0x3fb8aa3b)); // log2(e)
static __m512 vec_half = _mm512_set1_ps(0.5f);
static __m512 vec_one = _mm512_set1_ps(1.f);
static __m512 vec_zero = _mm512_set1_ps(0.f);
static __m512 vec_two = _mm512_set1_ps(2.f);
static __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
static __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
static __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
static __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
static int n_mantissa_bits = 23;
const __m512 vec_half = _mm512_set1_ps(0.5f);
const __m512 vec_one = _mm512_set1_ps(1.f);
const __m512 vec_zero = _mm512_set1_ps(0.f);
const __m512 vec_two = _mm512_set1_ps(2.f);
const __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
const __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
const __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
const __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
const int n_mantissa_bits = 23;
// exp(x) =
// = exp(n * ln(2) + r) // divide x by ln(2) and get quot and rem

View File

@ -77,7 +77,7 @@ inline __m512i pack_saturate_and_clamp<int32_t>(
int32_t min_val [[maybe_unused]],
int32_t max_val [[maybe_unused]]) {
// This function is for linkage only, will not be used
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
return __m512i{};
}

View File

@ -125,7 +125,7 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/, cudaStreamCaptureMode capt
// due to the capture status being updated _after_ a capture had already started.
c10::cuda::CUDACachingAllocator::beginAllocateToPool(capture_dev_, mempool_id_, [this](cudaStream_t stream) {
cudaStreamCaptureStatus status;
CaptureId_t stream_capture_id;
CaptureId_t stream_capture_id = 0;
AT_CUDA_CHECK(cudaStreamGetCaptureInfo(stream, &status, &stream_capture_id));
return status == cudaStreamCaptureStatus::cudaStreamCaptureStatusActive && stream_capture_id == capture_id_;
});
@ -160,7 +160,7 @@ void CUDAGraph::capture_end() {
c10::cuda::CUDACachingAllocator::endAllocateToPool(capture_dev_, mempool_id_);
TORCH_CHECK(graph_ != NULL, "Invalid capture.");
TORCH_CHECK(graph_ != nullptr, "Invalid capture.");
has_graph_ = true;
// In typical graph usage some tensors (e.g. the tensors used for graph IO) are not freed
@ -175,7 +175,7 @@ void CUDAGraph::capture_end() {
// cudaGraphInstantiateWithFlags
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH_1ga2c652a24ba93e52b99a47bec0888233
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040)
int version;
int version = 0;
AT_CUDA_CHECK(cudaDriverGetVersion(&version));
if (version < 11040) {
#endif
@ -203,7 +203,7 @@ void CUDAGraph::capture_end() {
}
size_t numCUDAGraphNodes = 0;
AT_CUDA_CHECK(cudaGraphGetNodes(graph_, NULL, &numCUDAGraphNodes));
AT_CUDA_CHECK(cudaGraphGetNodes(graph_, nullptr, &numCUDAGraphNodes));
if (numCUDAGraphNodes == 0) {
TORCH_WARN("The CUDA Graph is empty. This usually means that the graph was ",
"attempted to be captured on wrong device or stream.");
@ -233,7 +233,7 @@ void CUDAGraph::replay() {
// graph_exec_ may be replayed in any stream.
AT_CUDA_CHECK(cudaGraphLaunch(graph_exec_, at::cuda::getCurrentCUDAStream()));
int version;
int version = 0;
AT_CUDA_CHECK(cudaDriverGetVersion(&version));
if (version < 11040) {
// Workaround for bug in libcuda.so that causes replayed graphs with

Some files were not shown because too many files have changed in this diff Show More