Compare commits

..

6 Commits

Author SHA1 Message Date
47aaf03798 [dict] Support dict.update() with no args
ghstack-source-id: c36750bbc4a12c016986ba84ee25a549d96b4b52
Pull-Request: https://github.com/pytorch/pytorch/pull/158061
2025-10-14 12:28:27 -03:00
00e728c4dd [OrderedDict] Implement hasattr(..., IteratorVariable)
ghstack-source-id: 2c7221fd1cefe8f7009972e8a54570e48dcd8ed9
Pull Request resolved: https://github.com/pytorch/pytorch/pull/155501
2025-10-14 12:28:27 -03:00
312bda8417 [dict] Implement dict subclass fromkeys classmethod
ghstack-source-id: d024c6b95f293642fb6be338002c0a991fe57620
Pull Request resolved: https://github.com/pytorch/pytorch/pull/155608
2025-10-14 12:28:26 -03:00
ae89b899cb [OrderedDict] Add bool(OrderedDict)
ghstack-source-id: 8e0befb6c60da5df505106b51df25ead970e454a
Pull Request resolved: https://github.com/pytorch/pytorch/pull/155503
2025-10-14 12:28:25 -03:00
4c722fe44e [OrderedDict] Set the correct dict class in UserDefinedDictVariable
ghstack-source-id: efbc483057c41b1cf3f2beb6f71a97cd00f75ac9
Pull Request resolved: https://github.com/pytorch/pytorch/pull/155502
2025-10-14 12:28:25 -03:00
8345670234 [dict] Implement __eq__ for dict_items
ghstack-source-id: d5360cc0d97524f57bc88763955527ca4622f48c
Pull Request resolved: https://github.com/pytorch/pytorch/pull/155154
2025-10-14 12:28:24 -03:00
2375 changed files with 30914 additions and 75865 deletions

View File

@ -13,4 +13,3 @@ exclude:
- "**/benchmarks/**" - "**/benchmarks/**"
- "**/test_*.py" - "**/test_*.py"
- "**/*_test.py" - "**/*_test.py"
- "tools/**"

View File

@ -113,7 +113,6 @@ case "$tag" in
UCX_COMMIT=${_UCX_COMMIT} UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT} UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes TRITON=yes
INSTALL_MINGW=yes
;; ;;
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11) pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
CUDA_VERSION=13.0.0 CUDA_VERSION=13.0.0
@ -195,16 +194,13 @@ case "$tag" in
NINJA_VERSION=1.9.0 NINJA_VERSION=1.9.0
TRITON=yes TRITON=yes
;; ;;
pytorch-linux-jammy-xpu-n-py3 | pytorch-linux-jammy-xpu-n-py3-inductor-benchmarks) pytorch-linux-jammy-xpu-n-py3)
ANACONDA_PYTHON_VERSION=3.10 ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11 GCC_VERSION=11
VISION=yes VISION=yes
XPU_VERSION=2025.2 XPU_VERSION=2025.2
NINJA_VERSION=1.9.0 NINJA_VERSION=1.9.0
TRITON=yes TRITON=yes
if [[ $tag =~ "benchmarks" ]]; then
INDUCTOR_BENCHMARKS=yes
fi
;; ;;
pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
ANACONDA_PYTHON_VERSION=3.10 ANACONDA_PYTHON_VERSION=3.10
@ -365,7 +361,6 @@ docker build \
--build-arg "OPENBLAS=${OPENBLAS:-}" \ --build-arg "OPENBLAS=${OPENBLAS:-}" \
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \ --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \ --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
--build-arg "INSTALL_MINGW=${INSTALL_MINGW:-}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \ -f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \ -t "$tmp_tag" \
"$@" \ "$@" \

View File

@ -3,7 +3,7 @@
set -eux set -eux
ACL_VERSION=${ACL_VERSION:-"v52.6.0"} ACL_VERSION=${ACL_VERSION:-"v25.02"}
ACL_INSTALL_DIR="/acl" ACL_INSTALL_DIR="/acl"
# Clone ACL # Clone ACL

View File

@ -49,20 +49,12 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
export SYSROOT_DEP="sysroot_linux-64=2.17" export SYSROOT_DEP="sysroot_linux-64=2.17"
fi fi
# Install correct Python version
# Also ensure sysroot is using a modern GLIBC to match system compilers
if [ "$ANACONDA_PYTHON_VERSION" = "3.14" ]; then
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
python="3.14.0" \
${SYSROOT_DEP} \
-c conda-forge
else
# Install correct Python version # Install correct Python version
# Also ensure sysroot is using a modern GLIBC to match system compilers # Also ensure sysroot is using a modern GLIBC to match system compilers
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\ as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
python="$ANACONDA_PYTHON_VERSION" \ python="$ANACONDA_PYTHON_VERSION" \
${SYSROOT_DEP} ${SYSROOT_DEP}
fi
# libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30 # libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
# which is provided in libstdcxx 12 and up. # which is provided in libstdcxx 12 and up.
conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge

View File

@ -83,6 +83,10 @@ function build_cpython {
py_suffix=${py_ver::-1} py_suffix=${py_ver::-1}
py_folder=$py_suffix py_folder=$py_suffix
fi fi
# Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4
if [ "$py_suffix" == "3.14.0" ]; then
py_suffix="3.14.0rc2"
fi
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
do_cpython_build $py_ver Python-$py_suffix do_cpython_build $py_ver Python-$py_suffix

View File

@ -10,7 +10,7 @@ else
arch_path='sbsa' arch_path='sbsa'
fi fi
NVSHMEM_VERSION=3.4.5 NVSHMEM_VERSION=3.3.24
function install_cuda { function install_cuda {
version=$1 version=$1
@ -150,7 +150,7 @@ function install_130 {
CUDNN_VERSION=9.13.0.50 CUDNN_VERSION=9.13.0.50
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
# install CUDA 13.0 in the same container # install CUDA 13.0 in the same container
install_cuda 13.0.2 cuda_13.0.2_580.95.05_linux install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
install_cudnn 13 $CUDNN_VERSION install_cudnn 13 $CUDNN_VERSION

View File

@ -1,10 +0,0 @@
#!/bin/bash
set -ex
# Install MinGW-w64 for Windows cross-compilation
apt-get update
apt-get install -y g++-mingw-w64-x86-64-posix
echo "MinGW-w64 installed successfully"
x86_64-w64-mingw32-g++ --version

View File

@ -19,8 +19,8 @@ pip_install \
transformers==4.36.2 transformers==4.36.2
pip_install coloredlogs packaging pip_install coloredlogs packaging
pip_install onnxruntime==1.23.1 pip_install onnxruntime==1.23.0
pip_install onnxscript==0.5.4 pip_install onnxscript==0.5.3
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/ # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

View File

@ -40,7 +40,11 @@ EOF
# Default url values # Default url values
rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}" rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
# Add amdgpu repository
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
# Add rocm repository # Add rocm repository
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -

View File

@ -12,8 +12,8 @@ function do_install() {
rocm_version_nodot=${rocm_version//./} rocm_version_nodot=${rocm_version//./}
# post merge of https://github.com/icl-utk-edu/magma/pull/65 # https://github.com/icl-utk-edu/magma/pull/65
MAGMA_VERSION=c0792ae825fb36872784892ea643dd6f3456bc5f MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
rocm_dir="/opt/rocm" rocm_dir="/opt/rocm"

View File

@ -39,13 +39,9 @@ case ${DOCKER_TAG_PREFIX} in
DOCKER_GPU_BUILD_ARG="" DOCKER_GPU_BUILD_ARG=""
;; ;;
rocm*) rocm*)
# we want the patch version of 7.0 instead
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi
# we want the patch version of 6.4 instead # we want the patch version of 6.4 instead
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4" GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi fi
BASE_TARGET=rocm BASE_TARGET=rocm
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete

View File

@ -149,7 +149,7 @@ FROM cpu_final as rocm_final
ARG ROCM_VERSION=6.0 ARG ROCM_VERSION=6.0
ARG PYTORCH_ROCM_ARCH ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
ARG DEVTOOLSET_VERSION=13 ARG DEVTOOLSET_VERSION=11
ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib" ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path, # Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
# below workaround helps avoid error # below workaround helps avoid error

View File

@ -75,13 +75,9 @@ case ${image} in
DOCKERFILE_SUFFIX="_cuda_aarch64" DOCKERFILE_SUFFIX="_cuda_aarch64"
;; ;;
manylinux2_28-builder:rocm*) manylinux2_28-builder:rocm*)
# we want the patch version of 7.0 instead
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi
# we want the patch version of 6.4 instead # we want the patch version of 6.4 instead
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4" GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi fi
TARGET=rocm_final TARGET=rocm_final
MANY_LINUX_VERSION="2_28" MANY_LINUX_VERSION="2_28"
@ -97,7 +93,7 @@ case ${image} in
manylinux2_28-builder:xpu) manylinux2_28-builder:xpu)
TARGET=xpu_final TARGET=xpu_final
GPU_IMAGE=amd64/almalinux:8 GPU_IMAGE=amd64/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13" DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28" MANY_LINUX_VERSION="2_28"
;; ;;
*) *)

View File

@ -138,12 +138,10 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
#test_binary_ufuncs.py #test_binary_ufuncs.py
numpy==1.22.4; python_version == "3.10" numpy==1.22.4; python_version == "3.10"
numpy==1.26.2; python_version == "3.11" or python_version == "3.12" numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
numpy==2.1.2; python_version >= "3.13" and python_version < "3.14" numpy==2.1.2; python_version >= "3.13"
numpy==2.3.4; python_version >= "3.14"
pandas==2.0.3; python_version < "3.13" pandas==2.0.3; python_version < "3.13"
pandas==2.2.3; python_version >= "3.13" and python_version < "3.14" pandas==2.2.3; python_version >= "3.13"
pandas==2.3.3; python_version >= "3.14"
#onnxruntime #onnxruntime
#Description: scoring engine for Open Neural Network Exchange (ONNX) models #Description: scoring engine for Open Neural Network Exchange (ONNX) models
@ -155,8 +153,7 @@ opt-einsum==3.3
#Pinned versions: 3.3 #Pinned versions: 3.3
#test that import: test_linalg.py #test that import: test_linalg.py
optree==0.13.0 ; python_version < "3.14" optree==0.13.0
optree==0.17.0 ; python_version >= "3.14"
#Description: A library for tree manipulation #Description: A library for tree manipulation
#Pinned versions: 0.13.0 #Pinned versions: 0.13.0
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py, #test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
@ -255,8 +252,7 @@ scikit-image==0.22.0
#test that import: #test that import:
scipy==1.10.1 ; python_version <= "3.11" scipy==1.10.1 ; python_version <= "3.11"
scipy==1.14.1 ; python_version > "3.11" and python_version < "3.14" scipy==1.14.1 ; python_version >= "3.12"
scipy==1.16.2 ; python_version >= "3.14"
# Pin SciPy because of failing distribution tests (see #60347) # Pin SciPy because of failing distribution tests (see #60347)
#Description: scientific python #Description: scientific python
#Pinned versions: 1.10.1 #Pinned versions: 1.10.1
@ -328,8 +324,7 @@ pywavelets==1.7.0 ; python_version >= "3.12"
#Pinned versions: 1.4.1 #Pinned versions: 1.4.1
#test that import: #test that import:
lxml==5.3.0 ; python_version < "3.14" lxml==5.3.0
lxml==6.0.2 ; python_version >= "3.14"
#Description: This is a requirement of unittest-xml-reporting #Description: This is a requirement of unittest-xml-reporting
PyGithub==2.3.0 PyGithub==2.3.0
@ -339,14 +334,12 @@ sympy==1.13.3
#Pinned versions: #Pinned versions:
#test that import: #test that import:
onnx==1.19.1 ; python_version < "3.14" onnx==1.18.0
# Unpin once Python 3.14 is supported. See onnxruntime issue 26309.
onnx==1.18.0 ; python_version == "3.14"
#Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal #Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions: #Pinned versions:
#test that import: #test that import:
onnxscript==0.5.4 onnxscript==0.5.3
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions: #Pinned versions:
#test that import: #test that import:
@ -366,7 +359,7 @@ pwlf==2.2.1
#test that import: test_sac_estimator.py #test that import: test_sac_estimator.py
# To build PyTorch itself # To build PyTorch itself
pyyaml==6.0.3 pyyaml==6.0.2
pyzstd pyzstd
setuptools==78.1.1 setuptools==78.1.1
packaging==23.1 packaging==23.1

View File

@ -1,11 +1,15 @@
sphinx==7.2.6 sphinx==5.3.0
#Description: This is used to generate PyTorch docs #Description: This is used to generate PyTorch docs
#Pinned versions: 7.2.6 #Pinned versions: 5.3.0
pytorch_sphinx_theme2==0.2.0 standard-imghdr==3.13.0; python_version >= "3.13"
#Description: This is needed to generate PyTorch docs #Description: This is needed by Sphinx, so it needs to be added here.
#Pinned versions: 0.2.0 # The reasons are as follows:
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably # but it doesn't seem to work and hangs around idly. The initial thought that it is probably
# something related to Docker setup. We can investigate this later. # something related to Docker setup. We can investigate this later.
@ -32,17 +36,17 @@ tensorboard==2.18.0 ; python_version >= "3.13"
#Description: This is used to generate PyTorch docs #Description: This is used to generate PyTorch docs
#Pinned versions: 2.13.0 #Pinned versions: 2.13.0
breathe==4.36.0 breathe==4.34.0
#Description: This is used to generate PyTorch C++ docs #Description: This is used to generate PyTorch C++ docs
#Pinned versions: 4.36.0 #Pinned versions: 4.34.0
exhale==0.3.7 exhale==0.2.3
#Description: This is used to generate PyTorch C++ docs #Description: This is used to generate PyTorch C++ docs
#Pinned versions: 0.3.7 #Pinned versions: 0.2.3
docutils==0.20 docutils==0.16
#Description: This is used to generate PyTorch C++ docs #Description: This is used to generate PyTorch C++ docs
#Pinned versions: 0.20 #Pinned versions: 0.16
bs4==0.0.1 bs4==0.0.1
#Description: This is used to generate PyTorch C++ docs #Description: This is used to generate PyTorch C++ docs
@ -52,13 +56,13 @@ IPython==8.12.0
#Description: This is used to generate PyTorch functorch docs #Description: This is used to generate PyTorch functorch docs
#Pinned versions: 8.12.0 #Pinned versions: 8.12.0
myst-nb==1.3.0 myst-nb==0.17.2
#Description: This is used to generate PyTorch functorch and torch.compile docs. #Description: This is used to generate PyTorch functorch and torch.compile docs.
#Pinned versions: 1.3.0 #Pinned versions: 0.17.2
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs # The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
python-etcd==0.4.5 python-etcd==0.4.5
sphinx-copybutton==0.5.0 sphinx-copybutton==0.5.0
sphinx-design==0.6.1 sphinx-design==0.4.0
sphinxcontrib-mermaid==1.0.0 sphinxcontrib-mermaid==1.0.0
myst-parser==4.0.1 myst-parser==0.18.1

View File

@ -54,15 +54,12 @@ ENV OPENSSL_DIR /opt/openssl
RUN rm install_openssl.sh RUN rm install_openssl.sh
ARG INDUCTOR_BENCHMARKS ARG INDUCTOR_BENCHMARKS
ARG ANACONDA_PYTHON_VERSION
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
COPY ci_commit_pins/timm.txt timm.txt COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt
# Install XPU Dependencies # Install XPU Dependencies
ARG XPU_VERSION ARG XPU_VERSION

View File

@ -100,16 +100,9 @@ COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
COPY ci_commit_pins/timm.txt timm.txt COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt COPY ci_commit_pins/torchbench.txt torchbench.txt
# Only build aoti cpp tests when INDUCTOR_BENCHMARKS is set to True
ENV BUILD_AOT_INDUCTOR_TEST ${INDUCTOR_BENCHMARKS}
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
ARG INSTALL_MINGW
COPY ./common/install_mingw.sh install_mingw.sh
RUN if [ -n "${INSTALL_MINGW}" ]; then bash ./install_mingw.sh; fi
RUN rm install_mingw.sh
ARG TRITON ARG TRITON
ARG TRITON_CPU ARG TRITON_CPU

View File

@ -57,8 +57,8 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
logger.info("Successfully cloned %s", target) logger.info("Successfully cloned %s", target)
return r, commit return r, commit
except GitCommandError: except GitCommandError as e:
logger.exception("Git operation failed") logger.error("Git operation failed: %s", e)
raise raise

View File

@ -6,7 +6,7 @@ dependencies = [
"GitPython==3.1.45", "GitPython==3.1.45",
"docker==7.1.0", "docker==7.1.0",
"pytest==7.3.2", "pytest==7.3.2",
"uv==0.9.6" "uv==0.8.6"
] ]
[tool.setuptools] [tool.setuptools]

View File

@ -1,7 +1,7 @@
SHELL=/usr/bin/env bash SHELL=/usr/bin/env bash
DOCKER_CMD ?= docker DOCKER_CMD ?= docker
DESIRED_ROCM ?= 7.1 DESIRED_ROCM ?= 7.0
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM)) DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
PACKAGE_NAME = magma-rocm PACKAGE_NAME = magma-rocm
# inherit this from underlying docker image, do not pass this env var to docker # inherit this from underlying docker image, do not pass this env var to docker
@ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
magma-rocm/build_magma.sh magma-rocm/build_magma.sh
.PHONY: all .PHONY: all
all: magma-rocm71
all: magma-rocm70 all: magma-rocm70
all: magma-rocm64 all: magma-rocm64
@ -25,11 +24,6 @@ clean:
$(RM) -r magma-* $(RM) -r magma-*
$(RM) -r output $(RM) -r output
.PHONY: magma-rocm71
magma-rocm71: DESIRED_ROCM := 7.1
magma-rocm71:
$(DOCKER_RUN)
.PHONY: magma-rocm70 .PHONY: magma-rocm70
magma-rocm70: DESIRED_ROCM := 7.0 magma-rocm70: DESIRED_ROCM := 7.0
magma-rocm70: magma-rocm70:

View File

@ -6,8 +6,8 @@ set -eou pipefail
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set # The script expects DESIRED_CUDA and PACKAGE_NAME to be set
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# post merge of https://github.com/icl-utk-edu/magma/pull/65 # https://github.com/icl-utk-edu/magma/pull/65
MAGMA_VERSION=c0792ae825fb36872784892ea643dd6f3456bc5f MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
# Folders for the build # Folders for the build
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
@ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE
# Fetch magma sources and verify checksum # Fetch magma sources and verify checksum
pushd ${PACKAGE_DIR} pushd ${PACKAGE_DIR}
git clone https://github.com/icl-utk-edu/magma git clone https://github.com/jeffdaily/magma
pushd magma pushd magma
git checkout ${MAGMA_VERSION} git checkout ${MAGMA_VERSION}
popd popd

View File

@ -187,22 +187,19 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
export USE_CUFILE=0 export USE_CUFILE=0
else else
DEPS_LIST+=( DEPS_LIST+=(
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libcublas.so.12" "/usr/local/cuda/lib64/libcublas.so.12"
"/usr/local/cuda/lib64/libcublasLt.so.12" "/usr/local/cuda/lib64/libcublasLt.so.12"
"/usr/local/cuda/lib64/libcudart.so.12" "/usr/local/cuda/lib64/libcudart.so.12"
"/usr/local/cuda/lib64/libnvrtc.so.12" "/usr/local/cuda/lib64/libnvrtc.so.12"
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12") "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
DEPS_SONAME+=( DEPS_SONAME+=(
"libnvToolsExt.so.1"
"libcublas.so.12" "libcublas.so.12"
"libcublasLt.so.12" "libcublasLt.so.12"
"libcudart.so.12" "libcudart.so.12"
"libnvrtc.so.12" "libnvrtc.so.12"
"libcupti.so.12") "libcupti.so.12")
if [[ $CUDA_VERSION != 12.9* ]]; then
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
DEPS_SONAME+=("libnvToolsExt.so.1")
fi
fi fi
else else
echo "Using nvidia libs from pypi." echo "Using nvidia libs from pypi."

View File

@ -426,7 +426,7 @@ fi
if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
# export test times so that potential sharded tests that'll branch off this build will use consistent data # export test times so that potential sharded tests that'll branch off this build will use consistent data
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build # don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
PYTHONPATH=. python tools/stats/export_test_times.py python tools/stats/export_test_times.py
fi fi
# don't do this for bazel or s390x or riscv64 as they don't use sccache # don't do this for bazel or s390x or riscv64 as they don't use sccache
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then

View File

@ -89,41 +89,23 @@ if [ "$is_main_doc" = true ]; then
make coverage make coverage
# Now we have the coverage report, we need to make sure it is empty. # Now we have the coverage report, we need to make sure it is empty.
# Sphinx 7.2.6+ format: python.txt contains a statistics table with a TOTAL row # Count the number of lines in the file and turn that number into a variable
# showing the undocumented count in the third column. # $lines. The `cut -f1 ...` is to only parse the number, not the filename
# Example: | TOTAL | 99.83% | 2 | # Skip the report header by subtracting 2: the header will be output even if
# there are no undocumented items.
# #
# Also: see docs/source/conf.py for "coverage_ignore*" items, which should # Also: see docs/source/conf.py for "coverage_ignore*" items, which should
# be documented then removed from there. # be documented then removed from there.
lines=$(wc -l build/coverage/python.txt 2>/dev/null |cut -f1 -d' ')
# Extract undocumented count from TOTAL row in Sphinx 7.2.6 statistics table undocumented=$((lines - 2))
# The table format is: | Module | Coverage | Undocumented | if [ $undocumented -lt 0 ]; then
# Extract the third column (undocumented count) from the TOTAL row
undocumented=$(grep "| TOTAL" build/coverage/python.txt | awk -F'|' '{print $4}' | tr -d ' ')
if [ -z "$undocumented" ] || ! [[ "$undocumented" =~ ^[0-9]+$ ]]; then
echo coverage output not found echo coverage output not found
exit 1 exit 1
elif [ "$undocumented" -gt 0 ]; then elif [ $undocumented -gt 0 ]; then
set +x # Disable command echoing for cleaner output echo undocumented objects found:
echo "" cat build/coverage/python.txt
echo "====================="
echo "UNDOCUMENTED OBJECTS:"
echo "====================="
echo ""
# Find the line number of the TOTAL row and print only what comes after it
total_line=$(grep -n "| TOTAL" build/coverage/python.txt | cut -d: -f1)
if [ -n "$total_line" ]; then
# Print only the detailed list (skip the statistics table)
tail -n +$((total_line + 2)) build/coverage/python.txt
else
# Fallback to showing entire file if TOTAL line not found
cat build/coverage/python.txt
fi
echo ""
echo "Make sure you've updated relevant .rsts in docs/source!" echo "Make sure you've updated relevant .rsts in docs/source!"
echo "You can reproduce locally by running 'cd docs && make coverage && tail -n +\$((grep -n \"| TOTAL\" build/coverage/python.txt | cut -d: -f1) + 2)) build/coverage/python.txt'" echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
set -x # Re-enable command echoing
exit 1 exit 1
fi fi
else else

View File

@ -337,7 +337,7 @@ test_python() {
test_python_smoke() { test_python_smoke() {
# Smoke tests for H100/B200 # Smoke tests for H100/B200
time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 inductor/test_max_autotune inductor/test_cutedsl_grouped_mm $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
assert_git_not_dirty assert_git_not_dirty
} }
@ -460,37 +460,31 @@ test_inductor_shard() {
--verbose --verbose
} }
test_inductor_aoti_cpp() { test_inductor_aoti() {
# docker build uses bdist_wheel which does not work with test_aot_inductor
# TODO: need a faster way to build
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
# We need to hipify before building again # We need to hipify before building again
python3 tools/amd_build/build_amd.py python3 tools/amd_build/build_amd.py
fi fi
if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then
BUILD_COMMAND=(TORCH_CUDA_ARCH_LIST=8.6 USE_FLASH_ATTENTION=OFF python -m pip install --no-build-isolation -v -e .)
# TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB # TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="/opt/conda/envs/py_3.10/lib:${TORCH_LIB_DIR}:${LD_LIBRARY_PATH}") TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="/opt/conda/envs/py_3.10/lib:${TORCH_LIB_DIR}:${LD_LIBRARY_PATH}")
else else
BUILD_COMMAND=(python -m pip install --no-build-isolation -v -e .)
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}") TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}")
fi fi
# aoti cmake custom command requires `torch` to be installed
# initialize the cmake build cache and install torch
/usr/bin/env "${BUILD_COMMAND[@]}"
# rebuild with the build cache with `BUILD_AOT_INDUCTOR_TEST` enabled
/usr/bin/env CMAKE_FRESH=1 BUILD_AOT_INDUCTOR_TEST=1 "${BUILD_COMMAND[@]}"
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile /usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile
} }
test_inductor_aoti_cross_compile_for_windows() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
# Set WINDOWS_CUDA_HOME environment variable
WINDOWS_CUDA_HOME="$(pwd)/win-torch-wheel-extracted"
export WINDOWS_CUDA_HOME
echo "WINDOWS_CUDA_HOME is set to: $WINDOWS_CUDA_HOME"
echo "Contents:"
ls -lah "$(pwd)/win-torch-wheel-extracted/lib/x64/" || true
python test/inductor/test_aoti_cross_compile_windows.py -k compile --package-dir "$TEST_REPORTS_DIR" --win-torch-lib-dir "$(pwd)/win-torch-wheel-extracted/torch/lib"
}
test_inductor_cpp_wrapper_shard() { test_inductor_cpp_wrapper_shard() {
if [[ -z "$NUM_TEST_SHARDS" ]]; then if [[ -z "$NUM_TEST_SHARDS" ]]; then
echo "NUM_TEST_SHARDS must be defined to run a Python test shard" echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
@ -572,8 +566,6 @@ fi
if [[ "${TEST_CONFIG}" == *cpu* ]]; then if [[ "${TEST_CONFIG}" == *cpu* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--device cpu) DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
elif [[ "${TEST_CONFIG}" == *xpu* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--device xpu)
else else
DYNAMO_BENCHMARK_FLAGS+=(--device cuda) DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
fi fi
@ -667,8 +659,6 @@ test_perf_for_dashboard() {
device=cuda_b200 device=cuda_b200
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
device=rocm device=rocm
elif [[ "${TEST_CONFIG}" == *xpu* ]]; then
device=xpu
fi fi
for mode in "${modes[@]}"; do for mode in "${modes[@]}"; do
@ -910,7 +900,7 @@ test_inductor_set_cpu_affinity(){
export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD" export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD"
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
if [[ "$(uname -m)" != "aarch64" ]]; then if [[ "${TEST_CONFIG}" != *aarch64* ]]; then
# Use Intel OpenMP for x86 # Use Intel OpenMP for x86
IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so" IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD" export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD"
@ -924,7 +914,7 @@ test_inductor_set_cpu_affinity(){
cores=$((cpus / thread_per_core)) cores=$((cpus / thread_per_core))
# Set number of cores to 16 on aarch64 for performance runs # Set number of cores to 16 on aarch64 for performance runs
if [[ "$(uname -m)" == "aarch64" && $cores -gt 16 ]]; then if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then
cores=16 cores=16
fi fi
export OMP_NUM_THREADS=$cores export OMP_NUM_THREADS=$cores
@ -1625,7 +1615,6 @@ test_operator_benchmark() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR" mkdir -p "$TEST_REPORTS_DIR"
TEST_DIR=$(pwd) TEST_DIR=$(pwd)
ARCH=$(uname -m)
test_inductor_set_cpu_affinity test_inductor_set_cpu_affinity
@ -1640,7 +1629,7 @@ test_operator_benchmark() {
pip_install pandas pip_install pandas
python check_perf_csv.py \ python check_perf_csv.py \
--actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \ --actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
--expected "${ARCH}_expected_ci_operator_benchmark_eager_float32_cpu.csv" --expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
} }
test_operator_microbenchmark() { test_operator_microbenchmark() {
@ -1653,7 +1642,7 @@ test_operator_microbenchmark() {
cd "${TEST_DIR}"/benchmarks/operator_benchmark cd "${TEST_DIR}"/benchmarks/operator_benchmark
for OP_BENCHMARK_TESTS in matmul mm addmm bmm conv; do for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \ $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \ --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
--benchmark-name "PyTorch operator microbenchmark" --use-compile --benchmark-name "PyTorch operator microbenchmark" --use-compile
@ -1677,7 +1666,7 @@ if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then
python -m pip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0 python -m pip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0
fi fi
python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" == 'default' ]]; then elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
test_linux_aarch64 test_linux_aarch64
elif [[ "${TEST_CONFIG}" == *backward* ]]; then elif [[ "${TEST_CONFIG}" == *backward* ]]; then
test_forward_backward_compatibility test_forward_backward_compatibility
@ -1728,8 +1717,6 @@ elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
test_inductor_triton_cpu test_inductor_triton_cpu
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
test_inductor_micro_benchmark test_inductor_micro_benchmark
elif [[ "${TEST_CONFIG}" == *aoti_cross_compile_for_windows* ]]; then
test_inductor_aoti_cross_compile_for_windows
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
install_torchvision install_torchvision
id=$((SHARD_NUMBER-1)) id=$((SHARD_NUMBER-1))
@ -1761,7 +1748,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
else else
# Do this after checkout_install_torchbench to ensure we clobber any # Do this after checkout_install_torchbench to ensure we clobber any
# nightlies that torchbench may pull in # nightlies that torchbench may pull in
if [[ "${TEST_CONFIG}" != *cpu* && "${TEST_CONFIG}" != *xpu* ]]; then if [[ "${TEST_CONFIG}" != *cpu* ]]; then
install_torchrec_and_fbgemm install_torchrec_and_fbgemm
fi fi
PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id" PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
@ -1770,7 +1757,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
install_torchvision install_torchvision
PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER" PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
if [[ "$SHARD_NUMBER" -eq "1" ]]; then if [[ "$SHARD_NUMBER" -eq "1" ]]; then
test_inductor_aoti_cpp test_inductor_aoti
fi fi
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
install_torchvision install_torchvision

View File

@ -7,9 +7,12 @@ if "%DESIRED_PYTHON%" == "3.13t" (
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe"
set ADDITIONAL_OPTIONS="Include_freethreaded=1" set ADDITIONAL_OPTIONS="Include_freethreaded=1"
set PYTHON_EXEC="python3.13t" set PYTHON_EXEC="python3.13t"
) else if "%DESIRED_PYTHON%"=="3.14" (
echo Python version is set to 3.14 or 3.14t
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
) else if "%DESIRED_PYTHON%"=="3.14t" ( ) else if "%DESIRED_PYTHON%"=="3.14t" (
echo Python version is set to 3.14 or 3.14t echo Python version is set to 3.14 or 3.14t
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0-amd64.exe" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
set ADDITIONAL_OPTIONS="Include_freethreaded=1" set ADDITIONAL_OPTIONS="Include_freethreaded=1"
set PYTHON_EXEC="python3.14t" set PYTHON_EXEC="python3.14t"
) else ( ) else (

View File

@ -163,13 +163,8 @@ if [[ "$(uname)" != Darwin ]]; then
MEMORY_LIMIT_MAX_JOBS=12 MEMORY_LIMIT_MAX_JOBS=12
NUM_CPUS=$(( $(nproc) - 2 )) NUM_CPUS=$(( $(nproc) - 2 ))
if [[ "$(uname)" == Linux ]]; then # Defaults here for **binary** linux builds so they can be changed in one place
# Defaults here for **binary** linux builds so they can be changed in one place export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
else
# For other builds
export MAX_JOBS=${NUM_CPUS}
fi
cat >>"$envfile" <<EOL cat >>"$envfile" <<EOL
export MAX_JOBS="${MAX_JOBS}" export MAX_JOBS="${MAX_JOBS}"

View File

@ -60,11 +60,9 @@ performance-*,
readability-container-size-empty, readability-container-size-empty,
readability-delete-null-pointer, readability-delete-null-pointer,
readability-duplicate-include, readability-duplicate-include,
readability-named-parameter,
readability-misplaced-array-index, readability-misplaced-array-index,
readability-redundant*, readability-redundant*,
readability-simplify-subscript-expr, readability-simplify-subscript-expr,
readability-static-definition-in-anonymous-namespace
readability-string-compare, readability-string-compare,
-readability-redundant-access-specifiers, -readability-redundant-access-specifiers,
-readability-redundant-control-flow, -readability-redundant-control-flow,

View File

@ -1,319 +0,0 @@
---
name: add-uint-support
description: Add unsigned integer (uint) type support to PyTorch operators by updating AT_DISPATCH macros. Use when adding support for uint16, uint32, uint64 types to operators, kernels, or when user mentions enabling unsigned types, barebones unsigned types, or uint support.
---
# Add Unsigned Integer (uint) Support to Operators
This skill helps add support for unsigned integer types (uint16, uint32, uint64) to PyTorch operators by updating their AT_DISPATCH macros.
## When to use this skill
Use this skill when:
- Adding uint16, uint32, or uint64 support to an operator
- User mentions "unsigned types", "uint support", "barebones unsigned types"
- Enabling support for kUInt16, kUInt32, kUInt64 in kernels
- Working with operator implementations that need expanded type coverage
## Quick reference
**Add unsigned types to existing dispatch:**
```cpp
// Before
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES));
// After (method 1: add unsigned types explicitly)
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
// After (method 2: use V2 integral types if AT_INTEGRAL_TYPES present)
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES));
```
## Type group reference
**Unsigned type groups:**
- `AT_BAREBONES_UNSIGNED_TYPES`: kUInt16, kUInt32, kUInt64
- `AT_INTEGRAL_TYPES_V2`: AT_INTEGRAL_TYPES + AT_BAREBONES_UNSIGNED_TYPES
**Relationship:**
```cpp
AT_INTEGRAL_TYPES // kByte, kChar, kInt, kLong, kShort
AT_BAREBONES_UNSIGNED_TYPES // kUInt16, kUInt32, kUInt64
AT_INTEGRAL_TYPES_V2 // INTEGRAL_TYPES + BAREBONES_UNSIGNED_TYPES
```
## Instructions
### Step 1: Determine if conversion to V2 is needed
Check if the file uses AT_DISPATCH_V2:
**If using old AT_DISPATCH:**
- First convert to AT_DISPATCH_V2 using the at-dispatch-v2 skill
- Then proceed with adding uint support
**If already using AT_DISPATCH_V2:**
- Proceed directly to Step 2
### Step 2: Analyze the current dispatch macro
Identify what type groups are currently in use:
```cpp
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
// body
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
^^^^^^^^^^^^^^^^^^^^^^^^^
Current type coverage
```
Common patterns:
- `AT_EXPAND(AT_ALL_TYPES)` → includes AT_INTEGRAL_TYPES + AT_FLOATING_TYPES
- `AT_EXPAND(AT_INTEGRAL_TYPES)` → signed integers only
- `AT_EXPAND(AT_FLOATING_TYPES)` → floating point types
### Step 3: Choose the uint addition method
Two approaches:
**Method 1: Add AT_BAREBONES_UNSIGNED_TYPES explicitly**
- Use when: You want to be explicit about adding uint support
- Add `AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES)` to the type list
**Method 2: Substitute AT_INTEGRAL_TYPES with AT_INTEGRAL_TYPES_V2**
- Use when: The dispatch already uses `AT_EXPAND(AT_INTEGRAL_TYPES)`
- More concise: replaces one type group with its superset
- Only applicable if AT_INTEGRAL_TYPES is present
### Step 4: Apply the transformation
**Method 1 example:**
```cpp
// Before
AT_DISPATCH_V2(
dtype,
"min_values_cuda",
AT_WRAP([&]() {
kernel_impl<scalar_t>(iter);
}),
AT_EXPAND(AT_ALL_TYPES),
kBFloat16, kHalf, kBool
);
// After (add unsigned types)
AT_DISPATCH_V2(
dtype,
"min_values_cuda",
AT_WRAP([&]() {
kernel_impl<scalar_t>(iter);
}),
AT_EXPAND(AT_ALL_TYPES),
AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES),
kBFloat16, kHalf, kBool
);
```
**Method 2 example:**
```cpp
// Before
AT_DISPATCH_V2(
dtype,
"integral_op",
AT_WRAP([&]() {
kernel<scalar_t>();
}),
AT_EXPAND(AT_INTEGRAL_TYPES)
);
// After (substitute with V2)
AT_DISPATCH_V2(
dtype,
"integral_op",
AT_WRAP([&]() {
kernel<scalar_t>();
}),
AT_EXPAND(AT_INTEGRAL_TYPES_V2)
);
```
### Step 5: Handle AT_ALL_TYPES vs individual type groups
If the dispatch uses `AT_EXPAND(AT_ALL_TYPES)`:
- `AT_ALL_TYPES` = `AT_INTEGRAL_TYPES` + `AT_FLOATING_TYPES`
- To add uint: add `AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES)` to the list
If the dispatch separately lists INTEGRAL and FLOATING:
```cpp
// Before
AT_EXPAND(AT_INTEGRAL_TYPES), AT_EXPAND(AT_FLOATING_TYPES)
// After (Method 2 preferred)
AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES)
```
### Step 6: Verify all dispatch sites
Check the file for ALL dispatch macros that need uint support:
- Some operators have multiple dispatch sites (CPU, CUDA, different functions)
- Apply the transformation consistently across all sites
- Ensure each gets the same type coverage updates
### Step 7: Validate the changes
Check that:
- [ ] AT_DISPATCH_V2 format is used (not old AT_DISPATCH)
- [ ] Unsigned types are added via one of the two methods
- [ ] All relevant dispatch sites in the file are updated
- [ ] Type groups use `AT_EXPAND()`
- [ ] Arguments are properly formatted and comma-separated
## Common patterns
### Pattern 1: AT_ALL_TYPES + extras
```cpp
// Before
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
// After
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kHalf, kBFloat16);
```
### Pattern 2: Separate INTEGRAL + FLOATING
```cpp
// Before
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_INTEGRAL_TYPES), AT_EXPAND(AT_FLOATING_TYPES));
// After
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES));
```
### Pattern 3: Old dispatch needs conversion first
```cpp
// Before (needs v2 conversion first)
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBFloat16, dtype, "op", [&]() {
kernel<scalar_t>();
});
// After v2 conversion
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
// After adding uint support
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kHalf, kBFloat16);
```
## Multiple dispatch sites example
For a file with multiple functions:
```cpp
void min_values_kernel_cuda(TensorIterator& iter) {
AT_DISPATCH_V2(iter.dtype(), "min_values_cuda", AT_WRAP([&]() {
impl<scalar_t>(iter);
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kBFloat16, kHalf);
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// Added uint support
}
void min_launch_kernel(TensorIterator &iter) {
AT_DISPATCH_V2(iter.input_dtype(), "min_cuda", AT_WRAP([&]() {
gpu_reduce_kernel<scalar_t>(iter);
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kBFloat16, kHalf);
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// Added uint support here too
}
```
## Decision tree
Use this decision tree to determine the approach:
```
Is the file using AT_DISPATCH_V2?
├─ No → Use at-dispatch-v2 skill first, then continue
└─ Yes
└─ Does it use AT_EXPAND(AT_INTEGRAL_TYPES)?
├─ Yes → Replace with AT_EXPAND(AT_INTEGRAL_TYPES_V2)
└─ No → Add AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES) to type list
```
## Edge cases
### Case 1: Dispatch with only floating types
If the operator only supports floating point types, don't add uint support:
```cpp
// Leave as-is - floating point only operator
AT_DISPATCH_V2(dtype, "float_op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf);
```
### Case 2: Complex types present
Unsigned types work alongside complex types:
```cpp
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES),
AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES),
AT_EXPAND(AT_COMPLEX_TYPES),
kHalf, kBFloat16);
```
### Case 3: Already has uint support
Check if uint types are already present:
- If `AT_INTEGRAL_TYPES_V2` is used → already has uint support
- If `AT_BAREBONES_UNSIGNED_TYPES` is already in list → already has uint support
- Skip the file if uint support is already present
## Workflow
When asked to add uint support:
1. Read the target file
2. Check if using AT_DISPATCH_V2:
- If not → use at-dispatch-v2 skill first
3. Identify all dispatch macro sites
4. For each dispatch:
- Analyze current type groups
- Choose method (add BAREBONES_UNSIGNED or upgrade to V2)
- Apply transformation with Edit tool
5. Show the user the changes
6. Explain what was modified
## Important notes
- Always check if v2 conversion is needed first
- Apply changes consistently across all dispatch sites in the file
- Method 2 (AT_INTEGRAL_TYPES_V2) is cleaner when applicable
- Method 1 (explicit AT_BAREBONES_UNSIGNED_TYPES) is more explicit
- Unsigned types are: kUInt16, kUInt32, kUInt64 (not kByte which is uint8)
- Some operators may not semantically support unsigned types - use judgment
## Testing
After adding uint support, the operator should accept uint16, uint32, and uint64 tensors. The user is responsible for functional testing.

View File

@ -1,305 +0,0 @@
---
name: at-dispatch-v2
description: Convert PyTorch AT_DISPATCH macros to AT_DISPATCH_V2 format in ATen C++ code. Use when porting AT_DISPATCH_ALL_TYPES_AND*, AT_DISPATCH_FLOATING_TYPES*, or other dispatch macros to the new v2 API. For ATen kernel files, CUDA kernels, and native operator implementations.
---
# AT_DISPATCH to AT_DISPATCH_V2 Converter
This skill helps convert PyTorch's legacy AT_DISPATCH macros to the new AT_DISPATCH_V2 format, as defined in `aten/src/ATen/Dispatch_v2.h`.
## When to use this skill
Use this skill when:
- Converting AT_DISPATCH_* macros to AT_DISPATCH_V2
- Porting ATen kernels to use the new dispatch API
- Working with files in `aten/src/ATen/native/` that use dispatch macros
- User mentions "AT_DISPATCH", "dispatch v2", "Dispatch_v2.h", or macro conversion
## Quick reference
**Old format:**
```cpp
AT_DISPATCH_ALL_TYPES_AND3(kBFloat16, kHalf, kBool, dtype, "kernel_name", [&]() {
// lambda body
});
```
**New format:**
```cpp
AT_DISPATCH_V2(dtype, "kernel_name", AT_WRAP([&]() {
// lambda body
}), AT_EXPAND(AT_ALL_TYPES), kBFloat16, kHalf, kBool);
```
## Key transformations
1. **Reorder arguments**: `scalar_type` and `name` come first, then lambda, then types
2. **Wrap the lambda**: Use `AT_WRAP(lambda)` to handle internal commas
3. **Expand type groups**: Use `AT_EXPAND(AT_ALL_TYPES)` instead of implicit expansion
4. **List individual types**: Add extra types (kHalf, kBFloat16, etc.) after expanded groups
5. **Add include**: `#include <ATen/Dispatch_v2.h>` near other Dispatch includes
## Instructions
### Step 1: Add the Dispatch_v2.h include
Add the v2 header near the existing `#include <ATen/Dispatch.h>`:
```cpp
#include <ATen/Dispatch.h>
#include <ATen/Dispatch_v2.h>
```
Keep the old Dispatch.h include for now (other code may still need it).
### Step 2: Identify the old dispatch pattern
Common patterns to convert:
- `AT_DISPATCH_ALL_TYPES_AND{2,3,4}(type1, type2, ..., scalar_type, name, lambda)`
- `AT_DISPATCH_FLOATING_TYPES_AND{2,3}(type1, type2, ..., scalar_type, name, lambda)`
- `AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND{2,3}(type1, ..., scalar_type, name, lambda)`
- `AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND{2,3}(type1, ..., scalar_type, name, lambda)`
### Step 3: Map the old macro to type groups
Identify which type group macro corresponds to the base types:
| Old macro base | AT_DISPATCH_V2 type group |
|----------------|---------------------------|
| `ALL_TYPES` | `AT_EXPAND(AT_ALL_TYPES)` |
| `FLOATING_TYPES` | `AT_EXPAND(AT_FLOATING_TYPES)` |
| `INTEGRAL_TYPES` | `AT_EXPAND(AT_INTEGRAL_TYPES)` |
| `COMPLEX_TYPES` | `AT_EXPAND(AT_COMPLEX_TYPES)` |
| `ALL_TYPES_AND_COMPLEX` | `AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX)` |
For combined patterns, use multiple `AT_EXPAND()` entries:
```cpp
// Old: AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(...)
// New: AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), type1, type2
```
### Step 4: Extract the individual types
From `AT_DISPATCH_*_AND2(type1, type2, ...)` or `AT_DISPATCH_*_AND3(type1, type2, type3, ...)`, extract the individual types (type1, type2, etc.).
These become the trailing arguments after the type group:
```cpp
AT_DISPATCH_V2(..., AT_EXPAND(AT_ALL_TYPES), kBFloat16, kHalf, kBool)
^^^^^^^^^^^^^^^^^^^^^^^^
Individual types from AND3
```
### Step 5: Transform to AT_DISPATCH_V2
Apply the transformation:
**Pattern:**
```cpp
AT_DISPATCH_V2(
scalar_type, // 1st: The dtype expression
"name", // 2nd: The debug string
AT_WRAP(lambda), // 3rd: The lambda wrapped in AT_WRAP
type_groups, // 4th+: Type groups with AT_EXPAND()
individual_types // Last: Individual types
)
```
**Example transformation:**
```cpp
// BEFORE
AT_DISPATCH_ALL_TYPES_AND3(
kBFloat16, kHalf, kBool,
iter.dtype(),
"min_values_cuda",
[&]() {
min_values_kernel_cuda_impl<scalar_t>(iter);
}
);
// AFTER
AT_DISPATCH_V2(
iter.dtype(),
"min_values_cuda",
AT_WRAP([&]() {
min_values_kernel_cuda_impl<scalar_t>(iter);
}),
AT_EXPAND(AT_ALL_TYPES),
kBFloat16, kHalf, kBool
);
```
### Step 6: Handle multi-line lambdas
For lambdas with internal commas or complex expressions, AT_WRAP is essential:
```cpp
AT_DISPATCH_V2(
dtype,
"complex_kernel",
AT_WRAP([&]() {
gpu_reduce_kernel<scalar_t, scalar_t>(
iter,
MinOps<scalar_t>{},
thrust::pair<scalar_t, int64_t>(upper_bound(), 0) // Commas inside!
);
}),
AT_EXPAND(AT_ALL_TYPES)
);
```
### Step 7: Verify the conversion
Check that:
- [ ] `AT_WRAP()` wraps the entire lambda
- [ ] Type groups use `AT_EXPAND()`
- [ ] Individual types don't have `AT_EXPAND()` (just `kBFloat16`, not `AT_EXPAND(kBFloat16)`)
- [ ] Argument order is: scalar_type, name, lambda, types
- [ ] Include added: `#include <ATen/Dispatch_v2.h>`
## Type group reference
Available type group macros (use with `AT_EXPAND()`):
```cpp
AT_INTEGRAL_TYPES // kByte, kChar, kInt, kLong, kShort
AT_FLOATING_TYPES // kDouble, kFloat
AT_COMPLEX_TYPES // kComplexDouble, kComplexFloat
AT_QINT_TYPES // kQInt8, kQUInt8, kQInt32
AT_ALL_TYPES // INTEGRAL_TYPES + FLOATING_TYPES
AT_ALL_TYPES_AND_COMPLEX // ALL_TYPES + COMPLEX_TYPES
AT_INTEGRAL_TYPES_V2 // INTEGRAL_TYPES + unsigned types
AT_BAREBONES_UNSIGNED_TYPES // kUInt16, kUInt32, kUInt64
AT_FLOAT8_TYPES // Float8 variants
```
## Common patterns
### Pattern: AT_DISPATCH_ALL_TYPES_AND2
```cpp
// Before
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBFloat16, dtype, "op", [&]() {
kernel<scalar_t>(data);
});
// After
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>(data);
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
```
### Pattern: AT_DISPATCH_FLOATING_TYPES_AND3
```cpp
// Before
AT_DISPATCH_FLOATING_TYPES_AND3(kHalf, kBFloat16, kFloat8_e4m3fn,
tensor.scalar_type(), "float_op", [&] {
process<scalar_t>(tensor);
});
// After
AT_DISPATCH_V2(tensor.scalar_type(), "float_op", AT_WRAP([&] {
process<scalar_t>(tensor);
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf, kBFloat16, kFloat8_e4m3fn);
```
### Pattern: AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2
```cpp
// Before
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(
kComplexHalf, kHalf,
self.scalar_type(),
"complex_op",
[&] {
result = compute<scalar_t>(self);
}
);
// After
AT_DISPATCH_V2(
self.scalar_type(),
"complex_op",
AT_WRAP([&] {
result = compute<scalar_t>(self);
}),
AT_EXPAND(AT_ALL_TYPES),
AT_EXPAND(AT_COMPLEX_TYPES),
kComplexHalf,
kHalf
);
```
## Edge cases
### Case 1: No extra types (rare)
```cpp
// Before
AT_DISPATCH_ALL_TYPES(dtype, "op", [&]() { kernel<scalar_t>(); });
// After
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES));
```
### Case 2: Many individual types (AND4, AND5, etc.)
```cpp
// Before
AT_DISPATCH_FLOATING_TYPES_AND4(kHalf, kBFloat16, kFloat8_e4m3fn, kFloat8_e5m2,
dtype, "float8_op", [&]() { kernel<scalar_t>(); });
// After
AT_DISPATCH_V2(dtype, "float8_op", AT_WRAP([&]() {
kernel<scalar_t>();
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf, kBFloat16, kFloat8_e4m3fn, kFloat8_e5m2);
```
### Case 3: Lambda with no captures
```cpp
// Before
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBool, dtype, "op", []() {
static_kernel<scalar_t>();
});
// After
AT_DISPATCH_V2(dtype, "op", AT_WRAP([]() {
static_kernel<scalar_t>();
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBool);
```
## Benefits of AT_DISPATCH_V2
1. **No arity in macro name**: Don't need different macros for AND2, AND3, AND4
2. **Composable type sets**: Mix and match type groups with `AT_EXPAND()`
3. **Extensible**: Easy to add more types without hitting macro limits
4. **Clearer**: Type groups are explicit, not implicit in macro name
## Important notes
- Keep `#include <ATen/Dispatch.h>` - other code may need it
- The `AT_WRAP()` is mandatory - prevents comma parsing issues in the lambda
- Type groups need `AT_EXPAND()`, individual types don't
- The v2 API is in `aten/src/ATen/Dispatch_v2.h` - refer to it for full docs
- See the header file for the Python script to regenerate the macro implementation
## Workflow
When asked to convert AT_DISPATCH macros:
1. Read the file to identify all AT_DISPATCH uses
2. Add `#include <ATen/Dispatch_v2.h>` if not present
3. For each dispatch macro:
- Identify the pattern and extract components
- Map the base type group
- Extract individual types
- Construct the AT_DISPATCH_V2 call
- Apply with Edit tool
4. Show the user the complete converted file
5. Explain what was changed
Do NOT compile or test the code - focus on accurate conversion only.

View File

@ -1,359 +0,0 @@
---
name: docstring
description: Write docstrings for PyTorch functions and methods following PyTorch conventions. Use when writing or updating docstrings in PyTorch code.
---
# PyTorch Docstring Writing Guide
This skill describes how to write docstrings for functions and methods in the PyTorch project, following the conventions in `torch/_tensor_docs.py` and `torch/nn/functional.py`.
## General Principles
- Use **raw strings** (`r"""..."""`) for all docstrings to avoid issues with LaTeX/math backslashes
- Follow **Sphinx/reStructuredText** (reST) format for documentation
- Be **concise but complete** - include all essential information
- Always include **examples** when possible
- Use **cross-references** to related functions/classes
## Docstring Structure
### 1. Function Signature (First Line)
Start with the function signature showing all parameters:
```python
r"""function_name(param1, param2, *, kwarg1=default1, kwarg2=default2) -> ReturnType
```
**Notes:**
- Include the function name
- Show positional and keyword-only arguments (use `*` separator)
- Include default values
- Show return type annotation
- This line should NOT end with a period
### 2. Brief Description
Provide a one-line description of what the function does:
```python
r"""conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor
Applies a 2D convolution over an input image composed of several input
planes.
```
### 3. Mathematical Formulas (if applicable)
Use Sphinx math directives for mathematical expressions:
```python
.. math::
\text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
```
Or inline math: `:math:\`x^2\``
### 4. Cross-References
Link to related classes and functions using Sphinx roles:
- `:class:\`~torch.nn.ModuleName\`` - Link to a class
- `:func:\`torch.function_name\`` - Link to a function
- `:meth:\`~Tensor.method_name\`` - Link to a method
- `:attr:\`attribute_name\`` - Reference an attribute
- The `~` prefix shows only the last component (e.g., `Conv2d` instead of `torch.nn.Conv2d`)
**Example:**
```python
See :class:`~torch.nn.Conv2d` for details and output shape.
```
### 5. Notes and Warnings
Use admonitions for important information:
```python
.. note::
This function doesn't work directly with NLLLoss,
which expects the Log to be computed between the Softmax and itself.
Use log_softmax instead (it's faster and has better numerical properties).
.. warning::
:func:`new_tensor` always copies :attr:`data`. If you have a Tensor
``data`` and want to avoid a copy, use :func:`torch.Tensor.requires_grad_`
or :func:`torch.Tensor.detach`.
```
### 6. Args Section
Document all parameters with type annotations and descriptions:
```python
Args:
input (Tensor): input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
weight (Tensor): filters of shape :math:`(\text{out\_channels} , kH , kW)`
bias (Tensor, optional): optional bias tensor of shape :math:`(\text{out\_channels})`. Default: ``None``
stride (int or tuple): the stride of the convolving kernel. Can be a single number or a
tuple `(sH, sW)`. Default: 1
```
**Formatting rules:**
- Parameter name in **lowercase**
- Type in parentheses: `(Type)`, `(Type, optional)` for optional parameters
- Description follows the type
- For optional parameters, include "Default: ``value``" at the end
- Use double backticks for inline code: ``` ``None`` ```
- Indent continuation lines by 2 spaces
### 7. Keyword Args Section (if applicable)
Sometimes keyword arguments are documented separately:
```python
Keyword args:
dtype (:class:`torch.dtype`, optional): the desired type of returned tensor.
Default: if None, same :class:`torch.dtype` as this tensor.
device (:class:`torch.device`, optional): the desired device of returned tensor.
Default: if None, same :class:`torch.device` as this tensor.
requires_grad (bool, optional): If autograd should record operations on the
returned tensor. Default: ``False``.
```
### 8. Returns Section (if needed)
Document the return value:
```python
Returns:
Tensor: Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.
If ``hard=True``, the returned samples will be one-hot, otherwise they will
be probability distributions that sum to 1 across `dim`.
```
Or simply include it in the function signature line if obvious from context.
### 9. Examples Section
Always include examples when possible:
```python
Examples::
>>> inputs = torch.randn(33, 16, 30)
>>> filters = torch.randn(20, 16, 5)
>>> F.conv1d(inputs, filters)
>>> # With square kernels and equal stride
>>> filters = torch.randn(8, 4, 3, 3)
>>> inputs = torch.randn(1, 4, 5, 5)
>>> F.conv2d(inputs, filters, padding=1)
```
**Formatting rules:**
- Use `Examples::` with double colon
- Use `>>>` prompt for Python code
- Include comments with `#` when helpful
- Show actual output when it helps understanding (indent without `>>>`)
### 10. External References
Link to papers or external documentation:
```python
.. _Link Name:
https://arxiv.org/abs/1611.00712
```
Reference them in text: ```See `Link Name`_```
## Method Types
### Native Python Functions
For regular Python functions, use a standard docstring:
```python
def relu(input: Tensor, inplace: bool = False) -> Tensor:
r"""relu(input, inplace=False) -> Tensor
Applies the rectified linear unit function element-wise. See
:class:`~torch.nn.ReLU` for more details.
"""
# implementation
```
### C-Bound Functions (using add_docstr)
For C-bound functions, use `_add_docstr`:
```python
conv1d = _add_docstr(
torch.conv1d,
r"""
conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor
Applies a 1D convolution over an input signal composed of several input
planes.
See :class:`~torch.nn.Conv1d` for details and output shape.
Args:
input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
weight: filters of shape :math:`(\text{out\_channels} , kW)`
...
""",
)
```
### In-Place Variants
For in-place operations (ending with `_`), reference the original:
```python
add_docstr_all(
"abs_",
r"""
abs_() -> Tensor
In-place version of :meth:`~Tensor.abs`
""",
)
```
### Alias Functions
For aliases, simply reference the original:
```python
add_docstr_all(
"absolute",
r"""
absolute() -> Tensor
Alias for :func:`abs`
""",
)
```
## Common Patterns
### Shape Documentation
Use LaTeX math notation for tensor shapes:
```python
:math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
```
### Reusable Argument Definitions
For commonly used arguments, define them once and reuse:
```python
common_args = parse_kwargs(
"""
dtype (:class:`torch.dtype`, optional): the desired type of returned tensor.
Default: if None, same as this tensor.
"""
)
# Then use with .format():
r"""
...
Keyword args:
{dtype}
{device}
""".format(**common_args)
```
### Template Insertion
Insert reproducibility notes or other common text:
```python
r"""
{tf32_note}
{cudnn_reproducibility_note}
""".format(**reproducibility_notes, **tf32_notes)
```
## Complete Example
Here's a complete example showing all elements:
```python
def gumbel_softmax(
logits: Tensor,
tau: float = 1,
hard: bool = False,
eps: float = 1e-10,
dim: int = -1,
) -> Tensor:
r"""
Sample from the Gumbel-Softmax distribution and optionally discretize.
Args:
logits (Tensor): `[..., num_features]` unnormalized log probabilities
tau (float): non-negative scalar temperature
hard (bool): if ``True``, the returned samples will be discretized as one-hot vectors,
but will be differentiated as if it is the soft sample in autograd. Default: ``False``
dim (int): A dimension along which softmax will be computed. Default: -1
Returns:
Tensor: Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.
If ``hard=True``, the returned samples will be one-hot, otherwise they will
be probability distributions that sum to 1 across `dim`.
.. note::
This function is here for legacy reasons, may be removed from nn.Functional in the future.
Examples::
>>> logits = torch.randn(20, 32)
>>> # Sample soft categorical using reparametrization trick:
>>> F.gumbel_softmax(logits, tau=1, hard=False)
>>> # Sample hard categorical using "Straight-through" trick:
>>> F.gumbel_softmax(logits, tau=1, hard=True)
.. _Link 1:
https://arxiv.org/abs/1611.00712
"""
# implementation
```
## Quick Checklist
When writing a PyTorch docstring, ensure:
- [ ] Use raw string (`r"""`)
- [ ] Include function signature on first line
- [ ] Provide brief description
- [ ] Document all parameters in Args section with types
- [ ] Include default values for optional parameters
- [ ] Use Sphinx cross-references (`:func:`, `:class:`, `:meth:`)
- [ ] Add mathematical formulas if applicable
- [ ] Include at least one example in Examples section
- [ ] Add warnings/notes for important caveats
- [ ] Link to related module class with `:class:`
- [ ] Use proper math notation for tensor shapes
- [ ] Follow consistent formatting and indentation
## Common Sphinx Roles Reference
- `:class:\`~torch.nn.Module\`` - Class reference
- `:func:\`torch.function\`` - Function reference
- `:meth:\`~Tensor.method\`` - Method reference
- `:attr:\`attribute\`` - Attribute reference
- `:math:\`equation\`` - Inline math
- `:ref:\`label\`` - Internal reference
- ``` ``code`` ``` - Inline code (use double backticks)
## Additional Notes
- **Indentation**: Use 4 spaces for code, 2 spaces for continuation of parameter descriptions
- **Line length**: Try to keep lines under 100 characters when possible
- **Periods**: End sentences with periods, but not the signature line
- **Backticks**: Use double backticks for code: ``` ``True`` ``None`` ``False`` ```
- **Types**: Common types are `Tensor`, `int`, `float`, `bool`, `str`, `tuple`, `list`, etc.

View File

@ -1,385 +0,0 @@
---
name: skill-writer
description: Guide users through creating Agent Skills for Claude Code. Use when the user wants to create, write, author, or design a new Skill, or needs help with SKILL.md files, frontmatter, or skill structure.
---
# Skill Writer
This Skill helps you create well-structured Agent Skills for Claude Code that follow best practices and validation requirements.
## When to use this Skill
Use this Skill when:
- Creating a new Agent Skill
- Writing or updating SKILL.md files
- Designing skill structure and frontmatter
- Troubleshooting skill discovery issues
- Converting existing prompts or workflows into Skills
## Instructions
### Step 1: Determine Skill scope
First, understand what the Skill should do:
1. **Ask clarifying questions**:
- What specific capability should this Skill provide?
- When should Claude use this Skill?
- What tools or resources does it need?
- Is this for personal use or team sharing?
2. **Keep it focused**: One Skill = one capability
- Good: "PDF form filling", "Excel data analysis"
- Too broad: "Document processing", "Data tools"
### Step 2: Choose Skill location
Determine where to create the Skill:
**Personal Skills** (`~/.claude/skills/`):
- Individual workflows and preferences
- Experimental Skills
- Personal productivity tools
**Project Skills** (`.claude/skills/`):
- Team workflows and conventions
- Project-specific expertise
- Shared utilities (committed to git)
### Step 3: Create Skill structure
Create the directory and files:
```bash
# Personal
mkdir -p ~/.claude/skills/skill-name
# Project
mkdir -p .claude/skills/skill-name
```
For multi-file Skills:
```
skill-name/
├── SKILL.md (required)
├── reference.md (optional)
├── examples.md (optional)
├── scripts/
│ └── helper.py (optional)
└── templates/
└── template.txt (optional)
```
### Step 4: Write SKILL.md frontmatter
Create YAML frontmatter with required fields:
```yaml
---
name: skill-name
description: Brief description of what this does and when to use it
---
```
**Field requirements**:
- **name**:
- Lowercase letters, numbers, hyphens only
- Max 64 characters
- Must match directory name
- Good: `pdf-processor`, `git-commit-helper`
- Bad: `PDF_Processor`, `Git Commits!`
- **description**:
- Max 1024 characters
- Include BOTH what it does AND when to use it
- Use specific trigger words users would say
- Mention file types, operations, and context
**Optional frontmatter fields**:
- **allowed-tools**: Restrict tool access (comma-separated list)
```yaml
allowed-tools: Read, Grep, Glob
```
Use for:
- Read-only Skills
- Security-sensitive workflows
- Limited-scope operations
### Step 5: Write effective descriptions
The description is critical for Claude to discover your Skill.
**Formula**: `[What it does] + [When to use it] + [Key triggers]`
**Examples**:
✅ **Good**:
```yaml
description: Extract text and tables from PDF files, fill forms, merge documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction.
```
✅ **Good**:
```yaml
description: Analyze Excel spreadsheets, create pivot tables, and generate charts. Use when working with Excel files, spreadsheets, or analyzing tabular data in .xlsx format.
```
❌ **Too vague**:
```yaml
description: Helps with documents
description: For data analysis
```
**Tips**:
- Include specific file extensions (.pdf, .xlsx, .json)
- Mention common user phrases ("analyze", "extract", "generate")
- List concrete operations (not generic verbs)
- Add context clues ("Use when...", "For...")
### Step 6: Structure the Skill content
Use clear Markdown sections:
```markdown
# Skill Name
Brief overview of what this Skill does.
## Quick start
Provide a simple example to get started immediately.
## Instructions
Step-by-step guidance for Claude:
1. First step with clear action
2. Second step with expected outcome
3. Handle edge cases
## Examples
Show concrete usage examples with code or commands.
## Best practices
- Key conventions to follow
- Common pitfalls to avoid
- When to use vs. not use
## Requirements
List any dependencies or prerequisites:
```bash
pip install package-name
```
## Advanced usage
For complex scenarios, see [reference.md](reference.md).
```
### Step 7: Add supporting files (optional)
Create additional files for progressive disclosure:
**reference.md**: Detailed API docs, advanced options
**examples.md**: Extended examples and use cases
**scripts/**: Helper scripts and utilities
**templates/**: File templates or boilerplate
Reference them from SKILL.md:
```markdown
For advanced usage, see [reference.md](reference.md).
Run the helper script:
\`\`\`bash
python scripts/helper.py input.txt
\`\`\`
```
### Step 8: Validate the Skill
Check these requirements:
✅ **File structure**:
- [ ] SKILL.md exists in correct location
- [ ] Directory name matches frontmatter `name`
✅ **YAML frontmatter**:
- [ ] Opening `---` on line 1
- [ ] Closing `---` before content
- [ ] Valid YAML (no tabs, correct indentation)
- [ ] `name` follows naming rules
- [ ] `description` is specific and < 1024 chars
✅ **Content quality**:
- [ ] Clear instructions for Claude
- [ ] Concrete examples provided
- [ ] Edge cases handled
- [ ] Dependencies listed (if any)
✅ **Testing**:
- [ ] Description matches user questions
- [ ] Skill activates on relevant queries
- [ ] Instructions are clear and actionable
### Step 9: Test the Skill
1. **Restart Claude Code** (if running) to load the Skill
2. **Ask relevant questions** that match the description:
```
Can you help me extract text from this PDF?
```
3. **Verify activation**: Claude should use the Skill automatically
4. **Check behavior**: Confirm Claude follows the instructions correctly
### Step 10: Debug if needed
If Claude doesn't use the Skill:
1. **Make description more specific**:
- Add trigger words
- Include file types
- Mention common user phrases
2. **Check file location**:
```bash
ls ~/.claude/skills/skill-name/SKILL.md
ls .claude/skills/skill-name/SKILL.md
```
3. **Validate YAML**:
```bash
cat SKILL.md | head -n 10
```
4. **Run debug mode**:
```bash
claude --debug
```
## Common patterns
### Read-only Skill
```yaml
---
name: code-reader
description: Read and analyze code without making changes. Use for code review, understanding codebases, or documentation.
allowed-tools: Read, Grep, Glob
---
```
### Script-based Skill
```yaml
---
name: data-processor
description: Process CSV and JSON data files with Python scripts. Use when analyzing data files or transforming datasets.
---
# Data Processor
## Instructions
1. Use the processing script:
\`\`\`bash
python scripts/process.py input.csv --output results.json
\`\`\`
2. Validate output with:
\`\`\`bash
python scripts/validate.py results.json
\`\`\`
```
### Multi-file Skill with progressive disclosure
```yaml
---
name: api-designer
description: Design REST APIs following best practices. Use when creating API endpoints, designing routes, or planning API architecture.
---
# API Designer
Quick start: See [examples.md](examples.md)
Detailed reference: See [reference.md](reference.md)
## Instructions
1. Gather requirements
2. Design endpoints (see examples.md)
3. Document with OpenAPI spec
4. Review against best practices (see reference.md)
```
## Best practices for Skill authors
1. **One Skill, one purpose**: Don't create mega-Skills
2. **Specific descriptions**: Include trigger words users will say
3. **Clear instructions**: Write for Claude, not humans
4. **Concrete examples**: Show real code, not pseudocode
5. **List dependencies**: Mention required packages in description
6. **Test with teammates**: Verify activation and clarity
7. **Version your Skills**: Document changes in content
8. **Use progressive disclosure**: Put advanced details in separate files
## Validation checklist
Before finalizing a Skill, verify:
- [ ] Name is lowercase, hyphens only, max 64 chars
- [ ] Description is specific and < 1024 chars
- [ ] Description includes "what" and "when"
- [ ] YAML frontmatter is valid
- [ ] Instructions are step-by-step
- [ ] Examples are concrete and realistic
- [ ] Dependencies are documented
- [ ] File paths use forward slashes
- [ ] Skill activates on relevant queries
- [ ] Claude follows instructions correctly
## Troubleshooting
**Skill doesn't activate**:
- Make description more specific with trigger words
- Include file types and operations in description
- Add "Use when..." clause with user phrases
**Multiple Skills conflict**:
- Make descriptions more distinct
- Use different trigger words
- Narrow the scope of each Skill
**Skill has errors**:
- Check YAML syntax (no tabs, proper indentation)
- Verify file paths (use forward slashes)
- Ensure scripts have execute permissions
- List all dependencies
## Examples
See the documentation for complete examples:
- Simple single-file Skill (commit-helper)
- Skill with tool permissions (code-reviewer)
- Multi-file Skill (pdf-processing)
## Output format
When creating a Skill, I will:
1. Ask clarifying questions about scope and requirements
2. Suggest a Skill name and location
3. Create the SKILL.md file with proper frontmatter
4. Include clear instructions and examples
5. Add supporting files if needed
6. Provide testing instructions
7. Validate against all requirements
The result will be a complete, working Skill that follows all best practices and validation rules.

View File

@ -7,12 +7,16 @@ max-line-length = 120
# C408 ignored because we like the dict keyword argument syntax # C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead # E501 is not flexible enough, we're using B950 instead
ignore = ignore =
E203,E305,E402,E501,E704,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824, E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824,
# shebang has extra meaning in fbcode lints, so I think it's not worth trying # shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit # to line this up with executable bit
EXE001, EXE001,
# these ignores are from flake8-bugbear; please fix! # these ignores are from flake8-bugbear; please fix!
B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910 B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910
# these ignores are from flake8-comprehensions; please fix!
C407,
# these ignores are from flake8-logging-format; please fix!
G100,G101,G200
# these ignores are from flake8-simplify. please fix or ignore with commented reason # these ignores are from flake8-simplify. please fix or ignore with commented reason
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12, SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
# SIM104 is already covered by pyupgrade ruff # SIM104 is already covered by pyupgrade ruff

View File

@ -8,7 +8,6 @@ assignees: ''
--- ---
> NOTE: Remember to label this issue with "`ci: sev`" > NOTE: Remember to label this issue with "`ci: sev`"
> If you want autorevert to be disabled, keep the ci: disable-autorevert label
<!-- Add the `merge blocking` label to this PR to prevent PRs from being merged while this issue is open --> <!-- Add the `merge blocking` label to this PR to prevent PRs from being merged while this issue is open -->

View File

@ -1,7 +1,7 @@
--- ---
name: "D❌\U0001F519 ISABLE AUTOREVERT" name: DISABLE AUTOREVERT
about: Disables autorevert when open about: Disables autorevert when open
title: "[DISABLE AUTOREVERT]" title: "❌​\U0001F519 [DISABLE AUTOREVERT]"
labels: 'ci: disable-autorevert' labels: 'ci: disable-autorevert'
assignees: '' assignees: ''

View File

@ -65,7 +65,7 @@ runs:
cd .ci/lumen_cli cd .ci/lumen_cli
python3 -m pip install -e . python3 -m pip install -e .
) )
MAX_JOBS="$(nproc --ignore=10)" MAX_JOBS="$(nproc --ignore=6)"
export MAX_JOBS export MAX_JOBS
# Split the comma-separated list and build each target # Split the comma-separated list and build each target

View File

@ -27,9 +27,7 @@ runs:
docker system prune -af docker system prune -af
diskspace_new=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //') diskspace_new=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
if [[ "$diskspace_new" -gt "$diskspace_cutoff" ]] ; then if [[ "$diskspace_new" -gt "$diskspace_cutoff" ]] ; then
diskspace_cutoff_int=$((diskspace_cutoff + 0)) echo "Error: Available diskspace is less than $diskspace_cutoff percent. Not enough diskspace."
difference=$((100 - diskspace_cutoff_int))
echo "Error: Available diskspace is less than $difference percent. Not enough diskspace."
echo "$msg" echo "$msg"
exit 1 exit 1
else else

View File

@ -124,10 +124,3 @@ runs:
id: login-ecr id: login-ecr
continue-on-error: true continue-on-error: true
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
- name: Preserve github env variables for use in docker
shell: bash
run: |
env | grep '^GITHUB' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
env | grep '^CI' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
env | grep '^RUNNER' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"

View File

@ -1 +1 @@
3b0e7a6f192ca2715e7e6cbe5db007aea7165fe2 8ad2aa5d354d1bf432339113860185d5a5d1abbd

View File

@ -1 +1 @@
cfbc5c2f1c798991715a6b06bb3ce46478c4487c f5c6c2ec6490455e86f67b2a25c10390d60a27f7

View File

@ -1 +1 @@
c8b09f5f77d6bf6fb7ed7a9aa83e5d8156b3a5e9 0fa6e3129e61143224663e1ec67980d12b7ec4eb

View File

@ -283,9 +283,6 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi fi
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system --pre apache-tvm-ffi==0.1.0b15
# Install the vllm wheel from previous stage # Install the vllm wheel from previous stage
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system /wheels/vllm/*.whl --verbose uv pip install --system /wheels/vllm/*.whl --verbose
@ -298,8 +295,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0' ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
# TODO(elainewy): remove this once vllm commit is updated, and install flashinfer from pip
# see https://github.com/pytorch/pytorch/pull/165274#issuecomment-3408531784
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
ARG FLASHINFER_GIT_REF="v0.2.14.post1" ARG FLASHINFER_GIT_REF="v0.2.14.post1"

View File

@ -15,11 +15,6 @@
- "module: reinplacing" - "module: reinplacing"
then: then:
- "module: pt2-dispatcher" - "module: pt2-dispatcher"
- any:
- "vllm-compile"
then:
- "module: vllm"
- "oncall: pt2"
- any: - any:
- "module: vmap" - "module: vmap"
then: then:
@ -32,6 +27,10 @@
- "module: pt2 optimizer" - "module: pt2 optimizer"
then: then:
- "module: dynamo" - "module: dynamo"
- any:
- "module: flex attention"
then:
- "module: higher order operators"
- any: - any:
- "module: aotinductor" - "module: aotinductor"
then: then:

29
.github/labeler.yml vendored
View File

@ -133,32 +133,3 @@
"ciflow/vllm": "ciflow/vllm":
- .github/ci_commit_pins/vllm.txt - .github/ci_commit_pins/vllm.txt
"ciflow/b200":
- test/test_matmul_cuda.py
- test/test_scaled_matmul_cuda.py
- test/inductor/test_fp8.py
- aten/src/ATen/native/cuda/Blas.cpp
- torch/**/*cublas*
- torch/_inductor/kernel/mm.py
- test/inductor/test_max_autotune.py
- third_party/fbgemm
"ciflow/h100":
- test/test_matmul_cuda.py
- test/test_scaled_matmul_cuda.py
- test/inductor/test_fp8.py
- aten/src/ATen/native/cuda/Blas.cpp
- torch/**/*cublas*
- torch/_inductor/kernel/mm.py
- test/inductor/test_max_autotune.py
- third_party/fbgemm
"ciflow/rocm":
- test/test_matmul_cuda.py
- test/test_scaled_matmul_cuda.py
- test/inductor/test_fp8.py
- aten/src/ATen/native/cuda/Blas.cpp
- torch/_inductor/kernel/mm.py
- test/inductor/test_max_autotune.py
- third_party/fbgemm

View File

@ -540,26 +540,6 @@
- Lint - Lint
- pull - pull
- name: PrivateUse1
patterns:
- torch/accelerator/**
- torch/utils/backend_registration.py
- torch/csrc/acc/**
- torch/csrc/DeviceAccelerator.*
- torch/csrc/profiler/standalone/privateuse1_observer.*
- aten/src/ATen/DeviceAccelerator.*
- aten/src/ATen/core/GeneratorForPrivateuseone.*
- aten/src/ATen/detail/PrivateUse1HooksInterface.*
- docs/source/accelerator/**
- test/cpp_extensions/open_registration_extension/torch_openreg/**
approved_by:
- albanD
- fffrog
mandatory_checks_name:
- EasyCLA
- Lint
- pull
- name: superuser - name: superuser
patterns: patterns:
- '*' - '*'

View File

@ -3,7 +3,6 @@ ciflow_tracking_issue: 64124
ciflow_push_tags: ciflow_push_tags:
- ciflow/b200 - ciflow/b200
- ciflow/b200-symm-mem - ciflow/b200-symm-mem
- ciflow/b200-distributed
- ciflow/binaries - ciflow/binaries
- ciflow/binaries_libtorch - ciflow/binaries_libtorch
- ciflow/binaries_wheel - ciflow/binaries_wheel
@ -16,10 +15,8 @@ ciflow_push_tags:
- ciflow/inductor-micro-benchmark - ciflow/inductor-micro-benchmark
- ciflow/inductor-micro-benchmark-cpu-x86 - ciflow/inductor-micro-benchmark-cpu-x86
- ciflow/inductor-perf-compare - ciflow/inductor-perf-compare
- ciflow/inductor-perf-test-nightly-rocm-mi300 - ciflow/inductor-perf-test-nightly-rocm
- ciflow/inductor-perf-test-nightly-rocm-mi355
- ciflow/inductor-perf-test-nightly-x86-zen - ciflow/inductor-perf-test-nightly-x86-zen
- ciflow/inductor-perf-test-nightly-xpu
- ciflow/inductor-periodic - ciflow/inductor-periodic
- ciflow/inductor-rocm - ciflow/inductor-rocm
- ciflow/linux-aarch64 - ciflow/linux-aarch64
@ -27,7 +24,6 @@ ciflow_push_tags:
- ciflow/nightly - ciflow/nightly
- ciflow/op-benchmark - ciflow/op-benchmark
- ciflow/periodic - ciflow/periodic
- ciflow/periodic-rocm-mi200
- ciflow/periodic-rocm-mi300 - ciflow/periodic-rocm-mi300
- ciflow/pull - ciflow/pull
- ciflow/quantization-periodic - ciflow/quantization-periodic
@ -35,7 +31,6 @@ ciflow_push_tags:
- ciflow/rocm - ciflow/rocm
- ciflow/rocm-mi300 - ciflow/rocm-mi300
- ciflow/rocm-mi355 - ciflow/rocm-mi355
- ciflow/rocm-navi31
- ciflow/s390 - ciflow/s390
- ciflow/slow - ciflow/slow
- ciflow/torchbench - ciflow/torchbench

View File

@ -11,17 +11,11 @@ architectures:
* Latest XPU * Latest XPU
""" """
import json
import os import os
import re
from pathlib import Path
from typing import Optional from typing import Optional
SCRIPT_DIR = Path(__file__).absolute().parent # NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this
REPO_ROOT = SCRIPT_DIR.parent.parent
CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"] CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"]
CUDA_STABLE = "12.8" CUDA_STABLE = "12.8"
CUDA_ARCHES_FULL_VERSION = { CUDA_ARCHES_FULL_VERSION = {
@ -37,7 +31,8 @@ CUDA_ARCHES_CUDNN_VERSION = {
"13.0": "9", "13.0": "9",
} }
ROCM_ARCHES = ["7.0", "7.1"] # NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this
ROCM_ARCHES = ["6.4", "7.0"]
XPU_ARCHES = ["xpu"] XPU_ARCHES = ["xpu"]
@ -61,7 +56,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | " "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | " "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | " "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | " "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'" "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
@ -78,44 +73,44 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | " "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | " "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | " "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | " "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'" "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
), ),
"12.9": ( "12.9": (
"nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | " "nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | " "nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | " "nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | " "nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | " "nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | " "nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | " "nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | " "nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | " "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | " "nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | " "nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'" "nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'"
), ),
"13.0": ( "13.0": (
"nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | " "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | "
"nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | " "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | "
"nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | " "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | "
"nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | " "nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | "
"nvidia-cublas==13.1.0.3; platform_system == 'Linux' | " "nvidia-cublas==13.0.0.19; platform_system == 'Linux' | "
"nvidia-cufft==12.0.0.61; platform_system == 'Linux' | " "nvidia-cufft==12.0.0.15; platform_system == 'Linux' | "
"nvidia-curand==10.4.0.35; platform_system == 'Linux' | " "nvidia-curand==10.4.0.35; platform_system == 'Linux' | "
"nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | " "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
"nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | " "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | " "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
"nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | " "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
"nvidia-nvtx==13.0.85; platform_system == 'Linux' | " "nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
"nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | " "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
"nvidia-cufile==1.15.1.6; platform_system == 'Linux'" "nvidia-cufile==1.15.0.42; platform_system == 'Linux'"
), ),
"xpu": ( "xpu": (
"intel-cmplr-lib-rt==2025.2.1 | " "intel-cmplr-lib-rt==2025.2.1 | "
@ -142,48 +137,9 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
} }
# Used by tools/nightly.py
PYTORCH_NIGHTLY_PIP_INDEX_URL = "https://download.pytorch.org/whl/nightly"
NIGHTLY_SOURCE_MATRIX = {
"cpu": dict(
name="cpu",
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/cpu",
supported_platforms=["Linux", "macOS", "Windows"],
accelerator="cpu",
)
}
CUDA_NIGHTLY_SOURCE_MATRIX = {
f"cuda-{major}.{minor}": dict(
name=f"cuda-{major}.{minor}",
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/cu{major}{minor}",
supported_platforms=["Linux", "Windows"],
accelerator="cuda",
)
for major, minor in (map(int, version.split(".")) for version in CUDA_ARCHES)
}
ROCM_NIGHTLY_SOURCE_MATRIX = {
f"rocm-{major}.{minor}": dict(
name=f"rocm-{major}.{minor}",
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/rocm{major}.{minor}",
supported_platforms=["Linux"],
accelerator="rocm",
)
for major, minor in (map(int, version.split(".")) for version in ROCM_ARCHES)
}
XPU_NIGHTLY_SOURCE_MATRIX = {
"xpu": dict(
name="xpu",
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/xpu",
supported_platforms=["Linux"],
accelerator="xpu",
)
}
NIGHTLY_SOURCE_MATRIX.update(CUDA_NIGHTLY_SOURCE_MATRIX)
NIGHTLY_SOURCE_MATRIX.update(ROCM_NIGHTLY_SOURCE_MATRIX)
NIGHTLY_SOURCE_MATRIX.update(XPU_NIGHTLY_SOURCE_MATRIX)
def get_nccl_wheel_version(arch_version: str) -> str: def get_nccl_wheel_version(arch_version: str) -> str:
import re
requirements = map( requirements = map(
str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]) str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
) )
@ -191,14 +147,17 @@ def get_nccl_wheel_version(arch_version: str) -> str:
def read_nccl_pin(arch_version: str) -> str: def read_nccl_pin(arch_version: str) -> str:
nccl_pin_path = ( from pathlib import Path
REPO_ROOT
/ ".ci" nccl_pin_path = os.path.join(
/ "docker" Path(__file__).absolute().parents[2],
/ "ci_commit_pins" ".ci",
/ f"nccl-cu{arch_version[:2]}.txt" "docker",
"ci_commit_pins",
f"nccl-cu{arch_version[:2]}.txt",
) )
return nccl_pin_path.read_text().strip() with open(nccl_pin_path) as f:
return f.read().strip()
def validate_nccl_dep_consistency(arch_version: str) -> None: def validate_nccl_dep_consistency(arch_version: str) -> None:
@ -206,8 +165,7 @@ def validate_nccl_dep_consistency(arch_version: str) -> None:
wheel_ver = get_nccl_wheel_version(arch_version) wheel_ver = get_nccl_wheel_version(arch_version)
if not nccl_release_tag.startswith(f"v{wheel_ver}"): if not nccl_release_tag.startswith(f"v{wheel_ver}"):
raise RuntimeError( raise RuntimeError(
f"{arch_version} NCCL release tag version {nccl_release_tag} " f"{arch_version} NCCL release tag version {nccl_release_tag} does not correspond to wheel version {wheel_ver}"
f"does not correspond to wheel version {wheel_ver}"
) )
@ -283,11 +241,7 @@ def generate_libtorch_matrix(
arches += CUDA_ARCHES arches += CUDA_ARCHES
arches += ROCM_ARCHES arches += ROCM_ARCHES
elif os == "windows": elif os == "windows":
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up arches += CUDA_ARCHES
# in 2.10
windows_cuda_arches = CUDA_ARCHES.copy()
windows_cuda_arches.remove("12.9")
arches += windows_cuda_arches
if libtorch_variants is None: if libtorch_variants is None:
libtorch_variants = [ libtorch_variants = [
"shared-with-deps", "shared-with-deps",
@ -351,11 +305,7 @@ def generate_wheels_matrix(
if os == "linux": if os == "linux":
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
elif os == "windows": elif os == "windows":
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up arches += CUDA_ARCHES + XPU_ARCHES
# in 2.10
windows_cuda_arches = CUDA_ARCHES.copy()
windows_cuda_arches.remove("12.9")
arches += windows_cuda_arches + XPU_ARCHES
elif os == "linux-aarch64": elif os == "linux-aarch64":
# Separate new if as the CPU type is different and # Separate new if as the CPU type is different and
# uses different build/test scripts # uses different build/test scripts
@ -454,14 +404,7 @@ def generate_wheels_matrix(
return ret return ret
arch_version = "" validate_nccl_dep_consistency("13.0")
for arch_version in CUDA_ARCHES: validate_nccl_dep_consistency("12.9")
validate_nccl_dep_consistency(arch_version) validate_nccl_dep_consistency("12.8")
del arch_version validate_nccl_dep_consistency("12.6")
if __name__ == "__main__":
# Used by tools/nightly.py
(SCRIPT_DIR / "nightly_source_matrix.json").write_text(
json.dumps(NIGHTLY_SOURCE_MATRIX, indent=4) + "\n"
)

View File

@ -1092,7 +1092,7 @@ class GitHubPR:
editor = node["editor"] editor = node["editor"]
return GitHubComment( return GitHubComment(
body_text=node["bodyText"], body_text=node["bodyText"],
created_at=node.get("createdAt", ""), created_at=node["createdAt"] if "createdAt" in node else "",
author_login=node["author"]["login"], author_login=node["author"]["login"],
author_url=node["author"].get("url", None), author_url=node["author"].get("url", None),
author_association=node["authorAssociation"], author_association=node["authorAssociation"],

View File

@ -26,8 +26,9 @@ name: !{{ build_environment }}
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "!{{ py_ver.strip('t') + ('.4' if '3.14' not in py_ver else '.0') }}" python-version: "!{{ (py_ver.strip('t') + '.4') if '3.14' not in py_ver else '3.14.0-rc.2' }}"
freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }} freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }}
{%- endmacro %} {%- endmacro %}

View File

@ -79,9 +79,9 @@ jobs:
runs-on: "windows-11-arm64-preview" runs-on: "windows-11-arm64-preview"
{%- else %} {%- else %}
{%- if branches == "nightly" %} {%- if branches == "nightly" %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
{%- else %} {%- else %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge.nonephemeral" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
{%- endif %} {%- endif %}
{%- endif %} {%- endif %}
timeout-minutes: !{{ common.timeout_minutes_windows_binary }} timeout-minutes: !{{ common.timeout_minutes_windows_binary }}

View File

@ -37,7 +37,7 @@ on:
runner: runner:
required: false required: false
type: string type: string
default: "linux.c7i.2xlarge" default: "linux.2xlarge"
description: | description: |
Label of the runner this job should run on. Label of the runner this job should run on.
test-matrix: test-matrix:

View File

@ -224,46 +224,6 @@ jobs:
continue-on-error: true continue-on-error: true
uses: ./.github/actions/download-td-artifacts uses: ./.github/actions/download-td-artifacts
- name: Download Windows torch wheel for cross-compilation
if: matrix.win_torch_wheel_artifact != ''
uses: seemethere/download-artifact-s3@1da556a7aa0a088e3153970611f6c432d58e80e6 # v4.2.0
with:
name: ${{ matrix.win_torch_wheel_artifact }}
path: win-torch-wheel
- name: Extract Windows wheel and setup CUDA libraries
if: matrix.win_torch_wheel_artifact != ''
shell: bash
run: |
set -x
# Find the wheel file
WHEEL_FILE=$(find win-torch-wheel -name "*.whl" -type f | head -n 1)
if [ -z "$WHEEL_FILE" ]; then
echo "Error: No wheel file found in win-torch-wheel directory"
exit 1
fi
echo "Found wheel file: $WHEEL_FILE"
# Unzip the wheel file
unzip -q "$WHEEL_FILE" -d win-torch-wheel-extracted
echo "Extracted wheel contents"
# Setup CUDA libraries (cuda.lib and cudart.lib) directory
mkdir -p win-torch-wheel-extracted/lib/x64
if [ -f "win-torch-wheel/cuda.lib" ]; then
mv win-torch-wheel/cuda.lib win-torch-wheel-extracted/lib/x64/
echo "Moved cuda.lib to win-torch-wheel-extracted/lib/x64/"
fi
if [ -f "win-torch-wheel/cudart.lib" ]; then
mv win-torch-wheel/cudart.lib win-torch-wheel-extracted/lib/x64/
echo "Moved cudart.lib to win-torch-wheel-extracted/lib/x64/"
fi
# Verify CUDA libraries are present
echo "CUDA libraries:"
ls -la win-torch-wheel-extracted/lib/x64/ || echo "No CUDA libraries found"
- name: Parse ref - name: Parse ref
id: parse-ref id: parse-ref
run: .github/scripts/parse_ref.py run: .github/scripts/parse_ref.py

View File

@ -168,31 +168,6 @@ jobs:
run: | run: |
.ci/pytorch/win-build.sh .ci/pytorch/win-build.sh
# Collect Windows torch libs and CUDA libs for cross-compilation
- name: Collect Windows CUDA libs for cross-compilation
if: steps.build.outcome != 'skipped' && inputs.cuda-version != 'cpu'
shell: bash
run: |
set -ex
# Create directory structure if does not exist
mkdir -p /c/${{ github.run_id }}/build-results
# Copy CUDA libs
CUDA_PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${{ inputs.cuda-version }}"
if [ -f "${CUDA_PATH}/lib/x64/cuda.lib" ]; then
cp "${CUDA_PATH}/lib/x64/cuda.lib" /c/${{ github.run_id }}/build-results/
fi
if [ -f "${CUDA_PATH}/lib/x64/cudart.lib" ]; then
cp "${CUDA_PATH}/lib/x64/cudart.lib" /c/${{ github.run_id }}/build-results/
fi
# List collected files
echo "Collected CUDA libs:"
ls -lah /c/${{ github.run_id }}/build-results/*.lib
# Upload to github so that people can click and download artifacts # Upload to github so that people can click and download artifacts
- name: Upload artifacts to s3 - name: Upload artifacts to s3
if: steps.build.outcome != 'skipped' if: steps.build.outcome != 'skipped'

View File

@ -38,10 +38,6 @@ on:
default: "" default: ""
description: | description: |
List of tests to include (empty string implies default list) List of tests to include (empty string implies default list)
dashboard-tag:
required: false
type: string
default: ""
disable-monitor: disable-monitor:
description: | description: |
[Experimental] Disable utilization monitoring for tests. [Experimental] Disable utilization monitoring for tests.
@ -62,11 +58,6 @@ on:
required: false required: false
type: number type: number
default: 1 default: 1
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
permissions: permissions:
id-token: write id-token: write
contents: read contents: read
@ -205,8 +196,6 @@ jobs:
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }} TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
run: | run: |
# Fetch aws credential from IMDs # Fetch aws credential from IMDs
@ -257,8 +246,6 @@ jobs:
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-e TESTS_TO_INCLUDE \ -e TESTS_TO_INCLUDE \
-e ZE_AFFINITY_MASK \ -e ZE_AFFINITY_MASK \
-e HUGGING_FACE_HUB_TOKEN \
-e DASHBOARD_TAG \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--ulimit stack=10485760:83886080 \ --ulimit stack=10485760:83886080 \
--ulimit core=0 \ --ulimit core=0 \

View File

@ -1,62 +0,0 @@
name: CI for distributed tests on B200
on:
pull_request:
paths:
- .github/workflows/b200-distributed.yml
workflow_dispatch:
push:
tags:
- ciflow/b200-distributed/*
schedule:
- cron: 46 8 * * * # about 1:46am PDT
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200:
name: linux-jammy-cuda12.8-py3.10-gcc11-build-distributed-b200
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runner: linux.12xlarge.memory
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed-b200
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: '10.0'
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 2, runner: "linux.dgx.b200.8" },
{ config: "distributed", shard: 2, num_shards: 2, runner: "linux.dgx.b200.8" },
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-test-distributed-b200:
name: linux-jammy-cuda12.8-py3.10-gcc11-test-b200
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200
with:
timeout-minutes: 1200
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed-b200
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200.outputs.test-matrix }}
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
secrets: inherit

View File

@ -36,7 +36,7 @@ jobs:
runs-on: linux.9xlarge.ephemeral runs-on: linux.9xlarge.ephemeral
strategy: strategy:
matrix: matrix:
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm7.0", "rocm7.1", "cpu"] tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "cpu"]
steps: steps:
- name: Build docker image - name: Build docker image
uses: pytorch/pytorch/.github/actions/binary-docker-build@main uses: pytorch/pytorch/.github/actions/binary-docker-build@main

View File

@ -52,8 +52,8 @@ jobs:
{ tag: "cuda12.9" }, { tag: "cuda12.9" },
{ tag: "cuda12.8" }, { tag: "cuda12.8" },
{ tag: "cuda12.6" }, { tag: "cuda12.6" },
{ tag: "rocm6.4" },
{ tag: "rocm7.0" }, { tag: "rocm7.0" },
{ tag: "rocm7.1" },
{ tag: "cpu" }, { tag: "cpu" },
] ]
steps: steps:

View File

@ -34,7 +34,7 @@ jobs:
id-token: write id-token: write
strategy: strategy:
matrix: matrix:
rocm_version: ["71", "70"] rocm_version: ["70", "64"]
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

View File

@ -54,8 +54,8 @@ jobs:
{ name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" }, { name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" }, { name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" }, { name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" }, { name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm7.1", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" }, { name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" }, { name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "xpu", runner: "linux.9xlarge.ephemeral" }, { name: "manylinux2_28-builder", tag: "xpu", runner: "linux.9xlarge.ephemeral" },

View File

@ -55,7 +55,7 @@ jobs:
docker-image: ["pytorch/manylinux2_28-builder:cpu"] docker-image: ["pytorch/manylinux2_28-builder:cpu"]
include: include:
- device: "rocm" - device: "rocm"
rocm_version: "7.1" rocm_version: "7.0"
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "cuda" - device: "cuda"
rocm_version: "" rocm_version: ""
@ -159,7 +159,12 @@ jobs:
WITH_CLANG_LDD="--with-clang-ldd" WITH_CLANG_LDD="--with-clang-ldd"
fi fi
docker exec -t "${container_name}" bash -c "${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE $WITH_CLANG_LDD" if [[ "${BUILD_DEVICE}" == xpu ]]; then
docker exec -t "${container_name}" bash -c "dnf install -y gcc-toolset-13-gcc-c++"
docker exec -t "${container_name}" bash -c "source /opt/rh/gcc-toolset-13/enable && ${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE"
else
docker exec -t "${container_name}" bash -c "${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE $WITH_CLANG_LDD"
fi
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "xpu") ]]; then if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "xpu") ]]; then
docker exec -t "${container_name}" bash -c "auditwheel repair --plat ${PLATFORM} //artifacts/*.whl" docker exec -t "${container_name}" bash -c "auditwheel repair --plat ${PLATFORM} //artifacts/*.whl"

View File

@ -27,8 +27,9 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [ '3.12' ] python-version: [ '3.12' ]
# TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
device: [ 'cu128', 'cu129', 'cu130' ] device: [ 'cu128', 'cu129' ]
include: include:
- platform: manylinux_2_28_x86_64 - platform: manylinux_2_28_x86_64
device: cu128 device: cu128
@ -38,10 +39,6 @@ jobs:
device: cu129 device: cu129
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9' manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
runner: linux.12xlarge.memory runner: linux.12xlarge.memory
- platform: manylinux_2_28_x86_64
device: cu130
manylinux-image: 'pytorch/manylinux2_28-builder:cuda13.0'
runner: linux.12xlarge.memory
- platform: manylinux_2_28_aarch64 - platform: manylinux_2_28_aarch64
device: cu128 device: cu128
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8' manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8'
@ -50,11 +47,6 @@ jobs:
device: cu129 device: cu129
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9' manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9'
runner: linux.arm64.r7g.12xlarge.memory runner: linux.arm64.r7g.12xlarge.memory
exclude:
# TODO (huydhn): Add cu130 aarch64 once PyTorch is on 2.9+ and
# xformers is update to support 13.0
- platform: manylinux_2_28_aarch64
device: cu130
name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
runs-on: ${{ matrix.runner }} runs-on: ${{ matrix.runner }}
timeout-minutes: 480 timeout-minutes: 480
@ -177,12 +169,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
device: [ 'cu128', 'cu129', 'cu130' ] device: [ 'cu128', 'cu129' ]
exclude:
# TODO (huydhn): Add cu130 aarch64 once PyTorch is on 2.9+ and
# xformers is update to support 13.0
- platform: manylinux_2_28_aarch64
device: cu130
env: env:
PLATFORM: ${{ matrix.platform }} PLATFORM: ${{ matrix.platform }}
BUILD_DEVICE: ${{ matrix.device }} BUILD_DEVICE: ${{ matrix.device }}

View File

@ -57,7 +57,6 @@ jobs:
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11, pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
pytorch-linux-jammy-py3.10-clang12, pytorch-linux-jammy-py3.10-clang12,
pytorch-linux-jammy-py3.13-clang12, pytorch-linux-jammy-py3.13-clang12,
pytorch-linux-jammy-py3.14-clang12,
pytorch-linux-jammy-rocm-n-py3, pytorch-linux-jammy-rocm-n-py3,
pytorch-linux-noble-rocm-n-py3, pytorch-linux-noble-rocm-n-py3,
pytorch-linux-jammy-rocm-n-py3-benchmarks, pytorch-linux-jammy-rocm-n-py3-benchmarks,
@ -67,7 +66,6 @@ jobs:
pytorch-linux-jammy-py3.12-halide, pytorch-linux-jammy-py3.12-halide,
pytorch-linux-jammy-xpu-n-1-py3, pytorch-linux-jammy-xpu-n-1-py3,
pytorch-linux-jammy-xpu-n-py3, pytorch-linux-jammy-xpu-n-py3,
pytorch-linux-jammy-xpu-n-py3-inductor-benchmarks,
pytorch-linux-jammy-py3-clang18-asan, pytorch-linux-jammy-py3-clang18-asan,
pytorch-linux-jammy-py3-clang12-onnx, pytorch-linux-jammy-py3-clang12-onnx,
pytorch-linux-jammy-linter, pytorch-linux-jammy-linter,

View File

@ -8,7 +8,6 @@ on:
- docker.Makefile - docker.Makefile
- .github/workflows/docker-release.yml - .github/workflows/docker-release.yml
- .github/scripts/generate_docker_release_matrix.py - .github/scripts/generate_docker_release_matrix.py
- .github/scripts/generate_binary_build_matrix.py
push: push:
branches: branches:
- nightly - nightly

View File

@ -132,7 +132,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64-12_6 build_name: manywheel-py3_10-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -178,7 +178,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64-12_8 build_name: manywheel-py3_10-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -224,7 +224,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64-12_9 build_name: manywheel-py3_10-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -270,7 +270,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64-13_0 build_name: manywheel-py3_10-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -381,7 +381,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64-12_6 build_name: manywheel-py3_11-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -427,7 +427,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64-12_8 build_name: manywheel-py3_11-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -473,7 +473,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64-12_9 build_name: manywheel-py3_11-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -519,7 +519,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64-13_0 build_name: manywheel-py3_11-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -630,7 +630,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64-12_6 build_name: manywheel-py3_12-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -676,7 +676,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64-12_8 build_name: manywheel-py3_12-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -722,7 +722,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64-12_9 build_name: manywheel-py3_12-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -768,7 +768,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64-13_0 build_name: manywheel-py3_12-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -879,7 +879,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13-cuda-aarch64-12_6 build_name: manywheel-py3_13-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -925,7 +925,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13-cuda-aarch64-12_8 build_name: manywheel-py3_13-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -971,7 +971,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13-cuda-aarch64-12_9 build_name: manywheel-py3_13-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1017,7 +1017,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13-cuda-aarch64-13_0 build_name: manywheel-py3_13-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1128,7 +1128,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13t-cuda-aarch64-12_6 build_name: manywheel-py3_13t-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1174,7 +1174,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13t-cuda-aarch64-12_8 build_name: manywheel-py3_13t-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1220,7 +1220,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13t-cuda-aarch64-12_9 build_name: manywheel-py3_13t-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1266,7 +1266,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13t-cuda-aarch64-13_0 build_name: manywheel-py3_13t-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1377,7 +1377,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14-cuda-aarch64-12_6 build_name: manywheel-py3_14-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1423,7 +1423,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14-cuda-aarch64-12_8 build_name: manywheel-py3_14-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1469,7 +1469,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14-cuda-aarch64-12_9 build_name: manywheel-py3_14-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1515,7 +1515,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14-cuda-aarch64-13_0 build_name: manywheel-py3_14-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1626,7 +1626,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14t-cuda-aarch64-12_6 build_name: manywheel-py3_14t-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1672,7 +1672,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14t-cuda-aarch64-12_8 build_name: manywheel-py3_14t-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1718,7 +1718,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14t-cuda-aarch64-12_9 build_name: manywheel-py3_14t-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
@ -1764,7 +1764,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine" ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14t-cuda-aarch64-13_0 build_name: manywheel-py3_14t-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux' PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420 timeout-minutes: 420
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -384,6 +384,124 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_4-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: "6.4"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
timeout-minutes: 300
build_name: libtorch-rocm6_4-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm6_4-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm6_4-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: "6.4"
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
permissions:
id-token: write
contents: read
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm6_4-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
custom-tag-prefix: rocm6.4
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
env:
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm6_4-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm6_4-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: "6.4"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm6_4-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm7_0-shared-with-deps-release-build: libtorch-rocm7_0-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml uses: ./.github/workflows/_binary-build-linux.yml
@ -501,121 +619,3 @@ jobs:
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm7_1-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.1
GPU_ARCH_VERSION: "7.1"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
timeout-minutes: 300
build_name: libtorch-rocm7_1-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm7_1-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm7_1-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.1
GPU_ARCH_VERSION: "7.1"
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
permissions:
id-token: write
contents: read
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm7_1-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
custom-tag-prefix: rocm7.1
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
env:
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm7_1-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm7_1-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.1
GPU_ARCH_VERSION: "7.1"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm7_1-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml

File diff suppressed because it is too large Load Diff

View File

@ -63,6 +63,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.10.4" python-version: "3.10.4"
freethreaded: false freethreaded: false

View File

@ -59,6 +59,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.10.4" python-version: "3.10.4"
freethreaded: false freethreaded: false
@ -168,6 +169,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.11.4" python-version: "3.11.4"
freethreaded: false freethreaded: false
@ -277,6 +279,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.12.4" python-version: "3.12.4"
freethreaded: false freethreaded: false
@ -386,6 +389,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.13.4" python-version: "3.13.4"
freethreaded: false freethreaded: false
@ -495,6 +499,7 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.13.4" python-version: "3.13.4"
freethreaded: true freethreaded: true
@ -604,8 +609,9 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.14.0" python-version: "3.14.0-rc.2"
freethreaded: false freethreaded: false
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -713,8 +719,9 @@ jobs:
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
# TODO: Removeme once 3.14 is out
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
python-version: "3.14.0" python-version: "3.14.0-rc.2"
freethreaded: true freethreaded: true
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v4 uses: actions/checkout@v4

View File

@ -44,7 +44,7 @@ jobs:
libtorch-cpu-shared-with-deps-debug-build: libtorch-cpu-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -291,7 +291,7 @@ jobs:
libtorch-cuda12_6-shared-with-deps-debug-build: libtorch-cuda12_6-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -541,7 +541,7 @@ jobs:
libtorch-cuda12_8-shared-with-deps-debug-build: libtorch-cuda12_8-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -788,10 +788,260 @@ jobs:
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-cuda12_9-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
# runner.temp variable, which we need.
- name: Populate binary env
shell: bash
run: |
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon
shell: bash
run: |
git config --global core.longpaths true
git config --global core.symlinks true
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
# Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560
- name: Enable long paths on Windows
shell: powershell
run: |
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
# Since it's just a defensive command, the workflow should continue even the command fails. This step can be
# removed once Windows Defender is removed from the AMI
- name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch
continue-on-error: true
shell: powershell
run: |
Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore
# Let's both exclude the path and disable Windows Defender completely just to be sure
# that it doesn't interfere
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Populate binary env
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
- name: Build PyTorch binary
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
name: libtorch-cuda12_9-shared-with-deps-debug
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Wait until all sessions have drained
shell: powershell
working-directory: pytorch
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
working-directory: pytorch
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
libtorch-cuda12_9-shared-with-deps-debug-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-cuda12_9-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon
shell: bash
run: |
git config --global core.longpaths true
git config --global core.symlinks true
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
# Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560
- name: Enable long paths on Windows
shell: powershell
run: |
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
# Since it's just a defensive command, the workflow should continue even the command fails. This step can be
# removed once Windows Defender is removed from the AMI
- name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch
continue-on-error: true
shell: powershell
run: |
Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore
# Let's both exclude the path and disable Windows Defender completely just to be sure
# that it doesn't interfere
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
# runner.temp variable, which we need.
- name: Populate binary env
shell: bash
run: |
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-cuda12_9-shared-with-deps-debug
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
- name: Test PyTorch binary
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
- name: Wait until all sessions have drained
shell: powershell
working-directory: pytorch
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
working-directory: pytorch
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
libtorch-cuda12_9-shared-with-deps-debug-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-cuda12_9-shared-with-deps-debug-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_9-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-cuda13_0-shared-with-deps-debug-build: libtorch-cuda13_0-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch

View File

@ -44,7 +44,7 @@ jobs:
libtorch-cpu-shared-with-deps-release-build: libtorch-cpu-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -291,7 +291,7 @@ jobs:
libtorch-cuda12_6-shared-with-deps-release-build: libtorch-cuda12_6-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -541,7 +541,7 @@ jobs:
libtorch-cuda12_8-shared-with-deps-release-build: libtorch-cuda12_8-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -788,10 +788,260 @@ jobs:
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-cuda12_9-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
# runner.temp variable, which we need.
- name: Populate binary env
shell: bash
run: |
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon
shell: bash
run: |
git config --global core.longpaths true
git config --global core.symlinks true
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
# Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560
- name: Enable long paths on Windows
shell: powershell
run: |
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
# Since it's just a defensive command, the workflow should continue even the command fails. This step can be
# removed once Windows Defender is removed from the AMI
- name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch
continue-on-error: true
shell: powershell
run: |
Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore
# Let's both exclude the path and disable Windows Defender completely just to be sure
# that it doesn't interfere
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Populate binary env
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
- name: Build PyTorch binary
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
name: libtorch-cuda12_9-shared-with-deps-release
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Wait until all sessions have drained
shell: powershell
working-directory: pytorch
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
working-directory: pytorch
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
libtorch-cuda12_9-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-cuda12_9-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon
shell: bash
run: |
git config --global core.longpaths true
git config --global core.symlinks true
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
# Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560
- name: Enable long paths on Windows
shell: powershell
run: |
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
# Since it's just a defensive command, the workflow should continue even the command fails. This step can be
# removed once Windows Defender is removed from the AMI
- name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch
continue-on-error: true
shell: powershell
run: |
Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore
# Let's both exclude the path and disable Windows Defender completely just to be sure
# that it doesn't interfere
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
# runner.temp variable, which we need.
- name: Populate binary env
shell: bash
run: |
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-cuda12_9-shared-with-deps-release
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
- name: Test PyTorch binary
shell: bash
run: |
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
- name: Wait until all sessions have drained
shell: powershell
working-directory: pytorch
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
working-directory: pytorch
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
libtorch-cuda12_9-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-cuda12_9-shared-with-deps-release-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu129
GPU_ARCH_VERSION: "12.9"
GPU_ARCH_TYPE: cuda
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_9-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-cuda13_0-shared-with-deps-release-build: libtorch-cuda13_0-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
timeout-minutes: 360 timeout-minutes: 360
env: env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch PYTORCH_ROOT: ${{ github.workspace }}/pytorch

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
name: inductor-perf-nightly-rocm-mi300
on:
push:
tags:
- ciflow/inductor-perf-test-nightly-rocm-mi300/*
schedule:
- cron: 15 0 * * *
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
workflow_dispatch:
inputs:
training:
description: Run training (on by default)?
required: false
type: boolean
default: true
inference:
description: Run inference (on by default)?
required: false
type: boolean
default: true
default:
description: Run inductor_default?
required: false
type: boolean
default: false
dynamic:
description: Run inductor_dynamic_shapes?
required: false
type: boolean
default: false
cppwrapper:
description: Run inductor_cpp_wrapper?
required: false
type: boolean
default: false
cudagraphs:
description: Run inductor_cudagraphs?
required: false
type: boolean
default: true
freezing_cudagraphs:
description: Run inductor_cudagraphs with freezing for inference?
required: false
type: boolean
default: false
aotinductor:
description: Run aot_inductor for inference?
required: false
type: boolean
default: false
maxautotune:
description: Run inductor_max_autotune?
required: false
type: boolean
default: false
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string
default: inductor_huggingface_perf_rocm_mi300,inductor_timm_perf_rocm_mi300,inductor_torchbench_perf_rocm_mi300
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
linux-jammy-rocm-py3_10-inductor-benchmark-build:
if: github.repository_owner == 'pytorch'
name: rocm-py3_10-inductor-benchmark-build
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-rocm-py3_10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 1, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 2, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 3, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 4, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 5, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 6, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi300", shard: 7, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 1, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 2, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 3, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 4, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 5, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 6, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 7, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 8, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 9, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
]}
secrets: inherit
linux-jammy-rocm-py3_10-inductor-benchmark-test:
permissions:
id-token: write
contents: read
name: rocm-py3_10-inductor-benchmark-test
uses: ./.github/workflows/_rocm-test.yml
needs: linux-jammy-rocm-py3_10-inductor-benchmark-build
with:
build-environment: linux-jammy-rocm-py3_10
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
timeout-minutes: 720
# Disable monitor in perf tests for more investigation
disable-monitor: true
monitor-log-interval: 10
monitor-data-collect-interval: 2
secrets: inherit

View File

@ -1,11 +1,11 @@
name: inductor-perf-nightly-rocm-mi355 name: inductor-perf-nightly-rocm
on: on:
push: push:
tags: tags:
- ciflow/inductor-perf-test-nightly-rocm-mi355/* - ciflow/inductor-perf-test-nightly-rocm/*
schedule: schedule:
- cron: 15 0 * * * - cron: 0 7 * * 0,3
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it # NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs # out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
workflow_dispatch: workflow_dispatch:
@ -59,7 +59,7 @@ on:
description: The list of configs used the benchmark description: The list of configs used the benchmark
required: false required: false
type: string type: string
default: inductor_huggingface_perf_rocm_mi355,inductor_timm_perf_rocm_mi355,inductor_torchbench_perf_rocm_mi355 default: inductor_huggingface_perf_rocm,inductor_timm_perf_rocm,inductor_torchbench_perf_rocm
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@ -88,27 +88,23 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 1, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 2, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 3, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 4, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 5, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 6, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_timm_perf_rocm_mi355", shard: 7, num_shards: 7, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 1, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 2, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 3, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 4, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 5, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" }, { config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 6, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 7, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 8, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" },
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 9, num_shards: 9, runner: "linux.rocm.gpu.mi355.1" },
]} ]}
secrets: inherit secrets: inherit

View File

@ -1,148 +0,0 @@
name: inductor-perf-nightly-xpu
on:
push:
tags:
- ciflow/inductor-perf-test-nightly-xpu/*
schedule:
- cron: 30 17 * * *
workflow_dispatch:
inputs:
training:
description: Run training (on by default)?
required: false
type: boolean
default: true
inference:
description: Run inference (on by default)?
required: false
type: boolean
default: true
default:
description: Run inductor_default?
required: false
type: boolean
default: false
dynamic:
description: Run inductor_dynamic_shapes?
required: false
type: boolean
default: false
cppwrapper:
description: Run inductor_cpp_wrapper?
required: false
type: boolean
default: false
cudagraphs:
description: Run inductor_cudagraphs?
required: false
type: boolean
default: false
freezing_cudagraphs:
description: Run inductor_cudagraphs with freezing for inference?
required: false
type: boolean
default: false
aotinductor:
description: Run aot_inductor for inference?
required: false
type: boolean
default: false
maxautotune:
description: Run inductor_max_autotune?
required: false
type: boolean
default: false
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string
default: inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf,cachebench
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
xpu-n-py3_10-inductor-benchmark-build:
name: xpu-n-py3.10-inductor-benchmark
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-xpu-n-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3-inductor-benchmarks
runner: linux.c7i.12xlarge
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_xpu", shard: 1, num_shards: 5, runner: "linux.idc.xpu" },
{ config: "inductor_huggingface_perf_xpu", shard: 2, num_shards: 5, runner: "linux.idc.xpu" },
{ config: "inductor_huggingface_perf_xpu", shard: 3, num_shards: 5, runner: "linux.idc.xpu" },
{ config: "inductor_huggingface_perf_xpu", shard: 4, num_shards: 5, runner: "linux.idc.xpu" },
{ config: "inductor_huggingface_perf_xpu", shard: 5, num_shards: 5, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 1, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 2, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 3, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 4, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 5, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_timm_perf_xpu", shard: 6, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 1, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 2, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 3, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 4, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 5, num_shards: 6, runner: "linux.idc.xpu" },
{ config: "inductor_torchbench_perf_xpu", shard: 6, num_shards: 6, runner: "linux.idc.xpu" },
]}
secrets: inherit
xpu-n-py3_10-inductor-benchmark-test-nightly:
permissions:
id-token: write
contents: read
if: github.event_name != 'workflow_dispatch'
name: xpu-n-py3.10-inductor-benchmark
uses: ./.github/workflows/_xpu-test.yml
needs: xpu-n-py3_10-inductor-benchmark-build
with:
build-environment: linux-jammy-xpu-n-py3.10
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-false-cppwrapper-true-aotinductor-true-freezing_cudagraphs-false-cudagraphs_low_precision-false
docker-image: ${{ needs.xpu-n-py3_10-inductor-benchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.xpu-n-py3_10-inductor-benchmark-build.outputs.test-matrix }}
timeout-minutes: 720
# Disable monitor in perf tests for more investigation
disable-monitor: true
monitor-log-interval: 10
monitor-data-collect-interval: 2
secrets: inherit
xpu-n-py3_10-inductor-benchmark-test:
permissions:
id-token: write
contents: read
if: github.event_name == 'workflow_dispatch'
name: xpu-n-py3.10-inductor-test
uses: ./.github/workflows/_xpu-test.yml
needs: xpu-n-py3_10-inductor-benchmark-build
with:
build-environment: linux-jammy-xpu-n-py3.10
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
docker-image: ${{ needs.xpu-n-py3_10-inductor-benchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.xpu-n-py3_10-inductor-benchmark-build.outputs.test-matrix }}
timeout-minutes: 720
disable-monitor: false
monitor-log-interval: 15
monitor-data-collect-interval: 4
secrets: inherit

View File

@ -88,6 +88,7 @@ jobs:
with: with:
build-environment: linux-jammy-rocm-py3_10 build-environment: linux-jammy-rocm-py3_10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
sync-tag: rocm-build
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" }, { config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },

View File

@ -1,10 +1,9 @@
name: inductor-rocm name: inductor-rocm
on: on:
schedule:
- cron: 0 * * * *
push: push:
branches: branches:
- main
- release/* - release/*
tags: tags:
- ciflow/inductor-rocm/* - ciflow/inductor-rocm/*

View File

@ -115,10 +115,10 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "inductor_amx", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "inductor_amx", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "inductor_amx", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "inductor_amx", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "inductor_avx2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.avx2" }, { config: "inductor_avx2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
{ config: "inductor_avx2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.avx2" }, { config: "inductor_avx2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
]} ]}
secrets: inherit secrets: inherit

View File

@ -84,13 +84,13 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge.amx" }, { config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
{ config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.24xl.spr-metal" }, { config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.24xl.spr-metal" },
]} ]}
build-additional-packages: "vision audio torchao" build-additional-packages: "vision audio torchao"

View File

@ -76,12 +76,11 @@ jobs:
# NOTE: mypy needs its own job because it depends on --all-files, without assessing all files it sometimes # NOTE: mypy needs its own job because it depends on --all-files, without assessing all files it sometimes
# fails to find types when it should # fails to find types when it should
# NOTE: We should be able to disable this and consolidate with Pyrefly lintrunner-mypy:
lintrunner-pyrefly:
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
name: lintrunner-pyrefly-${{ needs.get-changed-files.outputs.changed-files == '*' && 'all' || 'partial' }} name: lintrunner-mypy-${{ needs.get-changed-files.outputs.changed-files == '*' && 'all' || 'partial' }}
needs: [get-label-type, get-changed-files] needs: [get-label-type, get-changed-files]
# Only run if there are changed files relevant to pyrefly # Only run if there are changed files relevant to mypy
if: | if: |
github.repository_owner == 'pytorch' && ( github.repository_owner == 'pytorch' && (
needs.get-changed-files.outputs.changed-files == '*' || needs.get-changed-files.outputs.changed-files == '*' ||
@ -99,8 +98,8 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: | script: |
CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}" CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}"
echo "Running pyrefly" echo "Running mypy"
ADDITIONAL_LINTRUNNER_ARGS="--take PYREFLY --all-files" .github/scripts/lintrunner.sh ADDITIONAL_LINTRUNNER_ARGS="--take MYPY,MYPYSTRICT --all-files" .github/scripts/lintrunner.sh
lintrunner-noclang: lintrunner-noclang:
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@ -119,9 +118,9 @@ jobs:
CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}" CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}"
echo "Running all other linters" echo "Running all other linters"
if [ "$CHANGED_FILES" = '*' ]; then if [ "$CHANGED_FILES" = '*' ]; then
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,PYREFLY --all-files" .github/scripts/lintrunner.sh ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY,MYPYSTRICT --all-files" .github/scripts/lintrunner.sh
else else
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,PYREFLY ${CHANGED_FILES}" .github/scripts/lintrunner.sh ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY,MYPYSTRICT ${CHANGED_FILES}" .github/scripts/lintrunner.sh
fi fi
quick-checks: quick-checks:

View File

@ -41,7 +41,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
needs: get-label-type needs: get-label-type
with: with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
build-environment: linux-jammy-py3.10-gcc11 build-environment: linux-jammy-py3.10-gcc11
docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11 docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
secrets: inherit secrets: inherit

View File

@ -7,11 +7,9 @@ on:
workflow_dispatch: workflow_dispatch:
inputs: inputs:
test_mode: test_mode:
type: choice required: false
options: type: string
- 'short' default: 'short'
- 'long'
- 'all'
description: tag filter for operator benchmarks, options from long, short, all description: tag filter for operator benchmarks, options from long, short, all
schedule: schedule:
# Run at 07:00 UTC every Sunday # Run at 07:00 UTC every Sunday
@ -30,49 +28,38 @@ permissions:
contents: read contents: read
jobs: jobs:
x86-opbenchmark-build: opbenchmark-build:
if: github.repository_owner == 'pytorch' if: github.repository_owner == 'pytorch'
name: x86-opbenchmark-build name: opbenchmark-build
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
with: with:
build-environment: linux-jammy-py3.10-gcc11-build build-environment: linux-jammy-py3.10-gcc11-build
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "cpu_operator_benchmark_${{ inputs.test_mode || 'short' }}", shard: 1, num_shards: 1, runner: "linux.12xlarge" }, { config: "cpu_operator_benchmark_short", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
]} ]}
secrets: inherit secrets: inherit
x86-opbenchmark-test: opbenchmark-on-demand-build:
name: x86-opbenchmark-test if: ${{ github.event_name == 'workflow_dispatch' && github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_linux-test.yml name: opbenchmark-on-demand-build
needs: x86-opbenchmark-build
with:
build-environment: linux-jammy-py3.10-gcc11-build
docker-image: ${{ needs.x86-opbenchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.x86-opbenchmark-build.outputs.test-matrix }}
secrets: inherit
aarch64-opbenchmark-build:
if: github.repository_owner == 'pytorch'
name: aarch64-opbenchmark-build
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
with: with:
build-environment: linux-jammy-aarch64-py3.10 build-environment: linux-jammy-py3.10-gcc11-build
runner: linux.arm64.m7g.4xlarge docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
docker-image-name: ci-image:pytorch-linux-jammy-aarch64-py3.10-gcc11
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "cpu_operator_benchmark_short", shard: 1, num_shards: 1, runner: "linux.arm64.m8g.4xlarge" }, { config: "cpu_operator_benchmark_${{ inputs.test_mode }}", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
]} ]}
secrets: inherit secrets: inherit
aarch64-opbenchmark-test: opbenchmark-test:
name: aarch64-opbenchmark-test name: opbenchmark-test
uses: ./.github/workflows/_linux-test.yml uses: ./.github/workflows/_linux-test.yml
needs: aarch64-opbenchmark-build needs: opbenchmark-build
with: with:
build-environment: linux-jammy-aarch64-py3.10 build-environment: linux-jammy-py3.10-gcc11-build
docker-image: ${{ needs.aarch64-opbenchmark-build.outputs.docker-image }} docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.aarch64-opbenchmark-build.outputs.test-matrix }} test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }}
secrets: inherit secrets: inherit

View File

@ -1,84 +0,0 @@
name: periodic-rocm-mi200
on:
schedule:
# We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
# Also run less frequently on weekends.
- cron: 45 0,8,16 * * 1-5
- cron: 45 4 * * 0,6
- cron: 45 4,12,20 * * 1-5
- cron: 45 12 * * 0,6
- cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests
push:
tags:
- ciflow/periodic/*
- ciflow/periodic-rocm-mi200/*
branches:
- release/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
llm-td:
if: github.repository_owner == 'pytorch'
name: before-test
uses: ./.github/workflows/llm_td_retrieval.yml
permissions:
id-token: write
contents: read
target-determination:
name: before-test
uses: ./.github/workflows/target_determination.yml
needs: llm-td
permissions:
id-token: write
contents: read
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch'
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
]}
secrets: inherit
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit

View File

@ -147,16 +147,15 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-debug build-environment: linux-jammy-cuda12.8-py3.10-gcc9-debug
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9
cuda-arch-list: 8.9
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 2, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 2, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 3, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 3, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 4, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 4, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 5, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 5, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 6, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 6, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 7, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] }, { config: "default", shard: 7, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
]} ]}
secrets: inherit secrets: inherit
@ -204,6 +203,37 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit secrets: inherit
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
]}
secrets: inherit
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda12_8-py3-gcc11-slow-gradcheck-build: linux-jammy-cuda12_8-py3-gcc11-slow-gradcheck-build:
name: linux-jammy-cuda12.8-py3-gcc11-slow-gradcheck name: linux-jammy-cuda12.8-py3-gcc11-slow-gradcheck
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml

View File

@ -66,10 +66,10 @@ jobs:
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }, { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "docs_test", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }, { config: "docs_test", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }, { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "backwards_compat", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.c7i.2xlarge" }, { config: "backwards_compat", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "distributed", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }, { config: "distributed", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "distributed", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }, { config: "distributed", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "numpy_2_x", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.c7i.2xlarge" }, { config: "numpy_2_x", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
]} ]}
secrets: inherit secrets: inherit
@ -167,8 +167,8 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-onnx docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-onnx
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "default", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.c7i.2xlarge" }, { config: "default", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.c7i.2xlarge" }, { config: "default", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
]} ]}
secrets: inherit secrets: inherit
@ -347,8 +347,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
needs: get-label-type needs: get-label-type
with: with:
# This should sync with the build in xpu.yml but xpu uses a larger runner sync-tag: linux-xpu-n-build
# sync-tag: linux-xpu-n-build
runner_prefix: ${{ needs.get-label-type.outputs.label-type }} runner_prefix: ${{ needs.get-label-type.outputs.label-type }}
build-environment: linux-jammy-xpu-n-py3.10 build-environment: linux-jammy-xpu-n-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3 docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3

View File

@ -45,6 +45,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-noble-rocm-py3.12-mi300 build-environment: linux-noble-rocm-py3.12-mi300
docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3 docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3
sync-tag: rocm-build
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" }, { config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },

View File

@ -42,14 +42,15 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-noble-rocm-py3.12-mi355 build-environment: linux-noble-rocm-py3.12-mi355
docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3 docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3
sync-tag: rocm-build
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi355.1" }, { config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
]} ]}
secrets: inherit secrets: inherit

View File

@ -1,75 +0,0 @@
name: rocm-navi31
on:
push:
tags:
- ciflow/rocm-navi31/*
workflow_dispatch:
schedule:
# We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
# Also run less frequently on weekends.
- cron: 45 */2 * * 1-5
- cron: 45 4,12 * * 0,6
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
target-determination:
if: github.repository_owner == 'pytorch'
name: before-test
uses: ./.github/workflows/target_determination.yml
permissions:
id-token: write
contents: read
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-rocm-py3_10-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
]}
secrets: inherit
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3_10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
tests-to-include: >-
${{ github.event_name == 'schedule' && 'test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs
test_autograd inductor/test_torchinductor inductor/test_kernel_benchmark
inductor/test_pad_mm inductor/test_benchmark_fusion inductor/test_aot_inductor
inductor/test_torchinductor inductor/test_decompose_mem_bound_mm
inductor/test_flex_attention inductor/test_max_autotune' || '' }}
secrets: inherit

View File

@ -3,13 +3,13 @@ name: rocm
on: on:
push: push:
branches: branches:
- main
- release/* - release/*
tags: tags:
- ciflow/rocm/* - ciflow/rocm/*
workflow_dispatch: workflow_dispatch:
schedule: schedule:
- cron: 29 8 * * * # about 1:29am PDT - cron: 29 8 * * * # about 1:29am PDT
- cron: 0 * * * *
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@ -26,23 +26,11 @@ jobs:
id-token: write id-token: write
contents: read contents: read
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-rocm-py3_10-build: linux-jammy-rocm-py3_10-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
name: linux-jammy-rocm-py3.10 name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with: with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10 build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3 docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build sync-tag: rocm-build
@ -71,3 +59,29 @@ jobs:
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }} docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }} test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit secrets: inherit
linux-jammy-rocm-py3_10-gfx1100-test:
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3_10-gfx1100
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
]}
tests-to-include: >
test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs
test_autograd inductor/test_torchinductor inductor/test_kernel_benchmark
inductor/test_pad_mm inductor/test_benchmark_fusion inductor/test_aot_inductor
inductor/test_torchinductor inductor/test_decompose_mem_bound_mm
inductor/test_flex_attention inductor/test_max_autotune
secrets: inherit

View File

@ -58,10 +58,8 @@ jobs:
else else
COMMIT_SHA="${{ github.sha }}" COMMIT_SHA="${{ github.sha }}"
fi fi
{ echo "sha=${COMMIT_SHA}" >> "${GITHUB_OUTPUT}"
echo "sha=${COMMIT_SHA}" echo "tag_name=trunk/${COMMIT_SHA}" >> "${GITHUB_OUTPUT}"
echo "tag_name=trunk/${COMMIT_SHA}"
} >> "${GITHUB_OUTPUT}"
- name: Validate commit SHA - name: Validate commit SHA
run: | run: |
@ -89,7 +87,7 @@ jobs:
echo "✅ Commit ${COMMIT_SHA} is valid (automatic push trigger)" echo "✅ Commit ${COMMIT_SHA} is valid (automatic push trigger)"
fi fi
- name: Create and push tag(s) with retry - name: Create and push tag with retry
id: check_tag id: check_tag
env: env:
TAG_NAME: ${{ steps.commit.outputs.tag_name }} TAG_NAME: ${{ steps.commit.outputs.tag_name }}
@ -114,23 +112,14 @@ jobs:
return 1 return 1
} }
# Counters for summary reporting # Exit early if tag already exists
created_count=0 if check_tag_exists; then
skipped_count=0 echo "✅ Tag already exists - no action needed"
failed_count=0 echo "exists=true" >> "${GITHUB_OUTPUT}"
exit 0
fi
# Always write outputs once on exit echo "Tag ${TAG_NAME} does not exist, proceeding with creation"
finish() {
set +e
if [ -n "${GITHUB_OUTPUT:-}" ]; then
{
echo "created_count=${created_count}"
echo "skipped_count=${skipped_count}"
echo "failed_count=${failed_count}"
} >> "${GITHUB_OUTPUT}"
fi
}
trap finish EXIT
# Retry configuration # Retry configuration
MAX_RETRIES=5 MAX_RETRIES=5
@ -205,111 +194,31 @@ jobs:
} }
} }
# New behavior for push events: enumerate commits in the push and tag each one. # Execute with retry
# For workflow_dispatch, retain existing single-SHA behavior. if retry_with_backoff "tag_with_retry" "Creating tag ${TAG_NAME} for commit ${COMMIT_SHA}"; then
echo "exists=false" >> "${GITHUB_OUTPUT}"
# Always fetch tags once up front to improve idempotency in loops
git fetch origin --tags --quiet || true
if [ "${{ github.event_name }}" = "push" ]; then
BEFORE_SHA="${{ github.event.before }}"
AFTER_SHA="${{ github.sha }}" # same as event.after
# List commits introduced by this push (old..new), oldest first for stable ordering
commits_file="$(mktemp)"
git rev-list --reverse "${BEFORE_SHA}..${AFTER_SHA}" > "${commits_file}"
if [ ! -s "${commits_file}" ]; then
echo "No new commits found between ${BEFORE_SHA}..${AFTER_SHA}; nothing to tag."
rm -f "${commits_file}"
exit 0
fi
commit_count="$(wc -l < "${commits_file}" | tr -d ' ')"
echo "Found ${commit_count} commit(s) to tag for push:"
while IFS= read -r sha; do
printf ' %s\n' "${sha}"
done < "${commits_file}"
while IFS= read -r sha; do
TAG_NAME="trunk/${sha}"
COMMIT_SHA="${sha}"
# If tag already exists locally or remotely, skip (idempotent)
if check_tag_exists; then
echo "✅ Tag ${TAG_NAME} already exists - skipping"
skipped_count=$((skipped_count + 1))
continue
fi
echo "Tag ${TAG_NAME} does not exist, proceeding with creation"
if retry_with_backoff "tag_with_retry" "Creating tag ${TAG_NAME} for commit ${COMMIT_SHA}"; then
created_count=$((created_count + 1))
else
echo "Tag creation failed after all retry attempts for ${TAG_NAME}"
failed_count=$((failed_count + 1))
fi
done < "${commits_file}"
rm -f "${commits_file}"
if [ "${failed_count}" -gt 0 ]; then
exit 1
fi
exit 0 exit 0
else else
# workflow_dispatch path (single SHA tagging preserved) echo "Tag creation failed after all retry attempts"
exit 1
# Exit early if tag already exists
if check_tag_exists; then
echo "✅ Tag already exists - no action needed"
skipped_count=1
exit 0
fi
echo "Tag ${TAG_NAME} does not exist, proceeding with creation"
if retry_with_backoff "tag_with_retry" "Creating tag ${TAG_NAME} for commit ${COMMIT_SHA}"; then
created_count=1
exit 0
else
echo "Tag creation failed after all retry attempts"
failed_count=1
exit 1
fi
fi fi
- name: Tag creation summary - name: Tag creation summary
if: always() if: always()
run: | run: |
if [ "${{ github.event_name }}" = "push" ]; then if [ "${{ steps.check_tag.outputs.exists }}" = "true" ]; then
echo "Trigger: push on main" echo "✅ Tag ${{ steps.commit.outputs.tag_name }} already existed - no action needed"
echo "Created: ${{ steps.check_tag.outputs.created_count }}" elif [ "${{ job.status }}" = "success" ]; then
echo "Skipped (already existed): ${{ steps.check_tag.outputs.skipped_count }}" echo "✅ Successfully created tag ${{ steps.commit.outputs.tag_name }} for commit ${{ steps.commit.outputs.sha }}"
echo "Failed: ${{ steps.check_tag.outputs.failed_count }}"
if [ "${{ steps.check_tag.outputs.failed_count }}" = "0" ]; then
echo "✅ Completed tagging for push range ${{ github.event.before }}..${{ github.sha }}"
else
echo "❌ Some tags failed to create for push range ${{ github.event.before }}..${{ github.sha }}"
fi
else else
if [ "${{ steps.check_tag.outputs.failed_count }}" = "0" ]; then echo "❌ Failed to create tag ${{ steps.commit.outputs.tag_name }} for commit ${{ steps.commit.outputs.sha }}"
if [ "${{ steps.check_tag.outputs.created_count }}" = "0" ]; then fi
echo "✅ Tag ${{ steps.commit.outputs.tag_name }} already existed - no action needed"
else echo ""
echo "✅ Successfully created tag ${{ steps.commit.outputs.tag_name }} for commit ${{ steps.commit.outputs.sha }}" echo "Tag details:"
fi echo " Name: ${{ steps.commit.outputs.tag_name }}"
else echo " Commit: ${{ steps.commit.outputs.sha }}"
echo "❌ Failed to create tag ${{ steps.commit.outputs.tag_name }} for commit ${{ steps.commit.outputs.sha }}" echo " Trigger: ${{ github.event_name }}"
fi if [ -n "${{ github.event.inputs.commit_sha }}" ]; then
echo " Manual commit: ${{ github.event.inputs.commit_sha }}"
echo ""
echo "Tag details:"
echo " Name: ${{ steps.commit.outputs.tag_name }}"
echo " Commit: ${{ steps.commit.outputs.sha }}"
echo " Trigger: ${{ github.event_name }}"
if [ -n "${{ github.event.inputs.commit_sha }}" ]; then
echo " Manual commit: ${{ github.event.inputs.commit_sha }}"
fi
fi fi

View File

@ -190,41 +190,6 @@ jobs:
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
secrets: inherit secrets: inherit
linux-jammy-rocm-py3_10-build:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
{ config: "distributed", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.4" },
]}
secrets: inherit
linux-jammy-rocm-py3_10-test:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
secrets: inherit
inductor-build: inductor-build:
name: inductor-build name: inductor-build
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
@ -235,23 +200,6 @@ jobs:
cuda-arch-list: '8.0' cuda-arch-list: '8.0'
secrets: inherit secrets: inherit
# Test cross-compiled models with Windows libs extracted from wheel
cross-compile-linux-test:
name: cross-compile-linux-test
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_8-py3_10-gcc11-build
- get-label-type
- win-vs2022-cuda12_8-py3-build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc11
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.docker-image }}
test-matrix: |
{ include: [
{ config: "aoti_cross_compile_for_windows", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", win_torch_wheel_artifact: "win-vs2022-cuda12.8-py3" },
]}
secrets: inherit
verify-cachebench-cpu-build: verify-cachebench-cpu-build:
name: verify-cachebench-cpu-build name: verify-cachebench-cpu-build
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml

View File

@ -6,7 +6,6 @@ on:
- pull - pull
- trunk - trunk
- periodic - periodic
- periodic-rocm-mi200
- periodic-rocm-mi300 - periodic-rocm-mi300
- inductor - inductor
- unstable - unstable

View File

@ -59,18 +59,14 @@ jobs:
runner: linux.c7i.12xlarge runner: linux.c7i.12xlarge
test-matrix: | test-matrix: |
{ include: [ { include: [
{ config: "default", shard: 1, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 1, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 2, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 2, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 3, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 3, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 4, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 4, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 5, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 5, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 6, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 6, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 7, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 7, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 8, num_shards: 12, runner: "linux.idc.xpu" }, { config: "default", shard: 8, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 9, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 10, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 11, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 12, num_shards: 12, runner: "linux.idc.xpu" },
]} ]}
secrets: inherit secrets: inherit

4
.gitignore vendored
View File

@ -127,7 +127,6 @@ torch/test/
torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h
torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h
torch/version.py torch/version.py
torch/_inductor/kernel/vendored_templates/*
minifier_launcher.py minifier_launcher.py
aten/src/ATen/native/transformers/hip/flash_attn/ck/fmha_fwd_d* aten/src/ATen/native/transformers/hip/flash_attn/ck/fmha_fwd_d*
aten/src/ATen/native/transformers/hip/flash_attn/ck/fmha_bwd_d* aten/src/ATen/native/transformers/hip/flash_attn/ck/fmha_bwd_d*
@ -144,7 +143,6 @@ scripts/release_notes/*.json
sccache-stats*.json sccache-stats*.json
lint.json lint.json
merge_record.json merge_record.json
.github/scripts/nightly_source_matrix.json
# These files get copied over on invoking setup.py # These files get copied over on invoking setup.py
torchgen/packaged/* torchgen/packaged/*
@ -376,7 +374,6 @@ third_party/ruy/
third_party/glog/ third_party/glog/
# Virtualenv # Virtualenv
.venv/
venv/ venv/
# Log files # Log files
@ -399,4 +396,3 @@ CLAUDE.local.md
/test_*.py /test_*.py
/debug_*.py /debug_*.py
CLAUDE_CONTEXT/ CLAUDE_CONTEXT/
/.claude/settings.local.json

View File

@ -121,22 +121,35 @@ command = [
] ]
is_formatter = true is_formatter = true
[[linter]] [[linter]]
code = 'PYREFLY' code = 'MYPY'
include_patterns = [ include_patterns = [
'setup.py',
'functorch/dim/**/*.py',
'torch/**/*.py', 'torch/**/*.py',
'torch/**/*.pyi', 'torch/**/*.pyi',
'torchgen/**/*.py', 'caffe2/**/*.py',
'torchgen/**/*.pyi', 'caffe2/**/*.pyi',
'functorch/**/*.py', 'test/test_bundled_images.py',
'functorch/**/*.pyi', 'test/test_bundled_inputs.py',
'test/test_complex.py',
'test/test_datapipe.py',
'test/test_futures.py',
'test/test_numpy_interop.py',
'test/test_torch.py',
'test/test_type_hints.py',
'test/test_type_info.py',
'test/test_utils.py',
]
exclude_patterns = [
'**/fb/**',
] ]
exclude_patterns = []
command = [ command = [
'python3', 'python3',
'tools/linter/adapters/pyrefly_linter.py', 'tools/linter/adapters/mypy_linter.py',
'--config=pyrefly.toml', '--config=mypy.ini',
'--',
'@{{PATHSFILE}}'
] ]
init_command = [ init_command = [
'python3', 'python3',
@ -145,7 +158,7 @@ init_command = [
'numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11"', 'numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11"',
'numpy==2.1.0 ; python_version >= "3.12"', 'numpy==2.1.0 ; python_version >= "3.12"',
'expecttest==0.3.0', 'expecttest==0.3.0',
'pyrefly==0.36.2', 'mypy==1.16.0',
'sympy==1.13.3', 'sympy==1.13.3',
'types-requests==2.27.25', 'types-requests==2.27.25',
'types-pyyaml==6.0.2', 'types-pyyaml==6.0.2',
@ -157,9 +170,43 @@ init_command = [
'filelock==3.18.0', 'filelock==3.18.0',
'junitparser==2.1.1', 'junitparser==2.1.1',
'rich==14.1.0', 'rich==14.1.0',
'optree==0.17.0', 'pyyaml==6.0.2',
'types-openpyxl==3.1.5.20250919', 'optree==0.13.0',
'types-python-dateutil==2.9.0.20251008' 'dataclasses-json==0.6.7',
'pandas==2.2.3',
]
[[linter]]
code = 'MYPYSTRICT'
include_patterns = [
'.github/**/*.py',
'benchmarks/instruction_counts/**/*.py',
'tools/**/*.py',
'torchgen/**/*.py',
'torch/utils/_pytree.py',
'torch/utils/_cxx_pytree.py',
'torch/utils/benchmark/utils/common.py',
'torch/utils/benchmark/utils/timer.py',
'torch/utils/benchmark/utils/valgrind_wrapper/**/*.py',
]
exclude_patterns = [
# (linbinyu) copied from internal repo
'**/fb/**',
'tools/code_analyzer/gen_operators_yaml.py',
'tools/dynamo/verify_dynamo.py',
'tools/gen_vulkan_spv.py',
'tools/test/gen_operators_yaml_test.py',
'tools/test/gen_oplist_test.py',
'tools/test/test_selective_build.py',
'tools/experimental/torchfuzz/**',
]
command = [
'python3',
'tools/linter/adapters/mypy_linter.py',
'--config=mypy-strict.ini',
'--code=MYPYSTRICT',
'--',
'@{{PATHSFILE}}'
] ]
[[linter]] [[linter]]
@ -211,6 +258,7 @@ exclude_patterns = [
'**/*pb.h', '**/*pb.h',
'**/*inl.h', '**/*inl.h',
'aten/src/ATen/cpu/FlushDenormal.cpp', 'aten/src/ATen/cpu/FlushDenormal.cpp',
'aten/src/ATen/cpu/Utils.cpp',
'aten/src/ATen/cpu/vml.h', 'aten/src/ATen/cpu/vml.h',
'aten/src/ATen/CPUFixedAllocator.h', 'aten/src/ATen/CPUFixedAllocator.h',
'aten/src/ATen/Parallel*.h', 'aten/src/ATen/Parallel*.h',
@ -229,6 +277,8 @@ exclude_patterns = [
'c10/util/win32-headers.h', 'c10/util/win32-headers.h',
'c10/test/**/*.h', 'c10/test/**/*.h',
'third_party/**/*', 'third_party/**/*',
'torch/csrc/api/include/torch/nn/modules/common.h',
'torch/csrc/api/include/torch/linalg.h',
'torch/csrc/autograd/generated/**', 'torch/csrc/autograd/generated/**',
'torch/csrc/distributed/**/*.cu', 'torch/csrc/distributed/**/*.cu',
'torch/csrc/distributed/c10d/WinSockUtils.hpp', 'torch/csrc/distributed/c10d/WinSockUtils.hpp',
@ -240,6 +290,7 @@ exclude_patterns = [
'torch/csrc/utils/generated_serialization_types.h', 'torch/csrc/utils/generated_serialization_types.h',
'torch/csrc/utils/pythoncapi_compat.h', 'torch/csrc/utils/pythoncapi_compat.h',
'torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h', 'torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h',
'aten/src/ATen/ExpandBase.h',
] ]
init_command = [ init_command = [
'python3', 'python3',
@ -742,7 +793,8 @@ exclude_patterns = [
command = [ command = [
'python3', 'python3',
'tools/linter/adapters/grep_linter.py', 'tools/linter/adapters/grep_linter.py',
'--pattern=(cudaSetDevice|cudaGetDevice)\\(', '--pattern=cudaSetDevice(',
'--pattern=cudaGetDevice(',
'--linter-name=RAWCUDADEVICE', '--linter-name=RAWCUDADEVICE',
'--error-name=raw CUDA API usage', '--error-name=raw CUDA API usage',
"""--error-description=\ """--error-description=\
@ -1046,8 +1098,11 @@ command = [
[[linter]] [[linter]]
code = 'WORKFLOWSYNC' code = 'WORKFLOWSYNC'
include_patterns = [ include_patterns = [
'.github/workflows/*.yml', '.github/workflows/pull.yml',
'.github/workflows/*.yaml', '.github/workflows/trunk.yml',
'.github/workflows/periodic.yml',
'.github/workflows/mac-mps.yml',
'.github/workflows/slow.yml',
] ]
command = [ command = [
'python3', 'python3',

View File

@ -374,7 +374,7 @@ cmake_dependent_option(
"Build the lazy Torchscript backend, not compatible with mobile builds" ON "Build the lazy Torchscript backend, not compatible with mobile builds" ON
"NOT INTERN_BUILD_MOBILE" OFF) "NOT INTERN_BUILD_MOBILE" OFF)
cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin folder" cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
OFF "USE_CUDA" OFF) OFF "USE_CUDA" OFF)
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
"CPU_AARCH64" OFF) "CPU_AARCH64" OFF)

View File

@ -201,17 +201,3 @@ torch/backends/cudnn/ @eqy @syed-ahmed @Aidyn-A
/torch/csrc/stable/ @janeyx99 @mikaylagawarecki /torch/csrc/stable/ @janeyx99 @mikaylagawarecki
/torch/headeronly/ @janeyx99 /torch/headeronly/ @janeyx99
/torch/header_only_apis.txt @janeyx99 /torch/header_only_apis.txt @janeyx99
# FlexAttention
/torch/nn/attention/flex_attention.py @drisspg
/torch/_higher_order_ops/flex_attention.py @drisspg
/torch/_inductor/kernel/flex/ @drisspg
/torch/_inductor/codegen/cpp_flex_attention_template.py @drisspg
/test/inductor/test_flex_attention.py @drisspg
/test/inductor/test_flex_decoding.py @drisspg
# Low Precision GEMMs
/aten/src/ATen/native/cuda/Blas.cpp @drisspg @slayton58
/aten/src/ATen/cuda/CUDABlas.cpp @drisspg @slayton58
/aten/src/ATen/cuda/CUDABlas.h @drisspg @slayton58
/test/test_scaled_matmul_cuda.py @drisspg @slayton58

View File

@ -11,6 +11,7 @@ aspects of contributing to PyTorch.
<!-- toc --> <!-- toc -->
- [Developing PyTorch](#developing-pytorch) - [Developing PyTorch](#developing-pytorch)
- [Setup the development environment](#setup-the-development-environment)
- [Tips and Debugging](#tips-and-debugging) - [Tips and Debugging](#tips-and-debugging)
- [Nightly Checkout & Pull](#nightly-checkout--pull) - [Nightly Checkout & Pull](#nightly-checkout--pull)
- [Codebase structure](#codebase-structure) - [Codebase structure](#codebase-structure)
@ -66,6 +67,23 @@ aspects of contributing to PyTorch.
Follow the instructions for [installing PyTorch from source](https://github.com/pytorch/pytorch#from-source). If you get stuck when developing PyTorch on your machine, check out the [tips and debugging](#tips-and-debugging) section below for common solutions. Follow the instructions for [installing PyTorch from source](https://github.com/pytorch/pytorch#from-source). If you get stuck when developing PyTorch on your machine, check out the [tips and debugging](#tips-and-debugging) section below for common solutions.
### Setup the development environment
First, you need to [fork the PyTorch project on GitHub](https://github.com/pytorch/pytorch/fork) and follow the instructions at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh) to setup your SSH authentication credentials.
Then clone the PyTorch project and setup the development environment:
```bash
git clone git@github.com:<USERNAME>/pytorch.git
cd pytorch
git remote add upstream git@github.com:pytorch/pytorch.git
make setup-env
# Or run `make setup-env-cuda` for pre-built CUDA binaries
# Or run `make setup-env-rocm` for pre-built ROCm binaries
source venv/bin/activate # or `. .\venv\Scripts\activate` on Windows
```
### Tips and Debugging ### Tips and Debugging
* If you want to have no-op incremental rebuilds (which are fast), see [Make no-op build fast](#make-no-op-build-fast) below. * If you want to have no-op incremental rebuilds (which are fast), see [Make no-op build fast](#make-no-op-build-fast) below.

View File

@ -1,4 +1,4 @@
![PyTorch Logo](https://github.com/pytorch/pytorch/raw/main/docs/source/_static/img/pytorch-logo-dark.png) ![PyTorch Logo](https://github.com/pytorch/pytorch/blob/9708fcf92db88b80b9010c68662d634434da3106/docs/source/_static/img/pytorch-logo-dark.png)
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@ -72,7 +72,7 @@ Elaborating Further:
If you use NumPy, then you have used Tensors (a.k.a. ndarray). If you use NumPy, then you have used Tensors (a.k.a. ndarray).
![Tensor illustration](https://github.com/pytorch/pytorch/raw/main/docs/source/_static/img/tensor_illustration.png) ![Tensor illustration](https://github.com/pytorch/pytorch/blob/9708fcf92db88b80b9010c68662d634434da3106/docs/source/_static/img/tensor_illustration.png)
PyTorch provides Tensors that can live either on the CPU or the GPU and accelerates the PyTorch provides Tensors that can live either on the CPU or the GPU and accelerates the
computation by a huge amount. computation by a huge amount.
@ -99,7 +99,7 @@ from several research papers on this topic, as well as current and past work suc
While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date. While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date.
You get the best of speed and flexibility for your crazy research. You get the best of speed and flexibility for your crazy research.
![Dynamic graph](https://github.com/pytorch/pytorch/raw/main/docs/source/_static/img/dynamic_graph.gif) ![Dynamic graph](https://github.com/pytorch/pytorch/blob/9708fcf92db88b80b9010c68662d634434da3106/docs/source/_static/img/dynamic_graph.gif)
### Python First ### Python First

View File

@ -1,7 +1,7 @@
# Security Policy # Security Policy
- [**Reporting a Vulnerability**](#reporting-a-vulnerability) - [**Reporting a Vulnerability**](#reporting-a-vulnerability)
- [**Using PyTorch Securely**](#using-pytorch-securely) - [**Using Pytorch Securely**](#using-pytorch-securely)
- [Untrusted models](#untrusted-models) - [Untrusted models](#untrusted-models)
- [TorchScript models](#torchscript-models) - [TorchScript models](#torchscript-models)
- [Untrusted inputs](#untrusted-inputs) - [Untrusted inputs](#untrusted-inputs)
@ -10,30 +10,30 @@
- [**CI/CD security principles**](#cicd-security-principles) - [**CI/CD security principles**](#cicd-security-principles)
## Reporting Security Issues ## Reporting Security Issues
Beware that none of the topics under [Using PyTorch Securely](#using-pytorch-securely) are considered vulnerabilities of PyTorch. Beware that none of the topics under [Using Pytorch Securely](#using-pytorch-securely) are considered vulnerabilities of Pytorch.
However, if you believe you have found a security vulnerability in PyTorch, we encourage you to let us know right away. We will investigate all legitimate reports and do our best to quickly fix the problem. However, if you believe you have found a security vulnerability in PyTorch, we encourage you to let us know right away. We will investigate all legitimate reports and do our best to quickly fix the problem.
Please report security issues using https://github.com/pytorch/pytorch/security/advisories/new Please report security issues using https://github.com/pytorch/pytorch/security/advisories/new
All reports submitted through the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework. All reports submitted thru the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework.
Please refer to the following page for our responsible disclosure policy, reward guidelines, and those things that should not be reported: Please refer to the following page for our responsible disclosure policy, reward guidelines, and those things that should not be reported:
https://www.facebook.com/whitehat https://www.facebook.com/whitehat
## Using PyTorch Securely ## Using Pytorch Securely
**PyTorch models are programs**, so treat its security seriously -- running untrusted models is equivalent to running untrusted code. In general we recommend that model weights and the python code for the model are distributed independently. That said, be careful about where you get the python code from and who wrote it (preferentially check for a provenance or checksums, do not run any pip installed package). **Pytorch models are programs**, so treat its security seriously -- running untrusted models is equivalent to running untrusted code. In general we recommend that model weights and the python code for the model are distributed independently. That said, be careful about where you get the python code from and who wrote it (preferentially check for a provenance or checksums, do not run any pip installed package).
### Untrusted models ### Untrusted models
Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources[^data-poisoning-sources]. Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources[^data-poisoning-sources].
**Prefer to execute untrusted models within a secure, isolated environment such as a sandbox** (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. You can find further details and instructions in [this page](https://developers.google.com/code-sandboxing). **Prefer to execute untrusted models within a secure, isolated environment such as a sandbox** (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. You can find further details and instructions in [this page](https://developers.google.com/code-sandboxing).
**Be mindful of risky model formats**. Give preference to share and load weights with the appropriate format for your use case. [Safetensors](https://huggingface.co/docs/safetensors/en/index) gives the most safety but is the most restricted in what it supports. [`torch.load`](https://pytorch.org/docs/stable/generated/torch.load.html#torch.load) has a significantly larger surface of attack but is more flexible in what it can serialize. See the documentation for more details. **Be mindful of risky model formats**. Give preference to share and load weights with the appropriate format for your use case. [safetensors](https://huggingface.co/docs/safetensors/en/index) gives the most safety but is the most restricted in what it supports. [`torch.load`](https://pytorch.org/docs/stable/generated/torch.load.html#torch.load) with `weights_only=True` is also secure to our knowledge even though it offers significantly larger surface of attack. Loading un-trusted checkpoint with `weights_only=False` MUST never be done.
Even for more secure serialization formats, unexpected inputs to the downstream system can cause diverse security threats (e.g. denial of service, out of bound reads/writes) and thus we recommend extensive validation of any untrusted inputs.
Important Note: The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance. Important Note: The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
@ -43,7 +43,7 @@ Important Note: The trustworthiness of a model is not binary. You must always de
### TorchScript models ### TorchScript models
TorchScript models should be treated the same way as locally executable code from an unknown source. Only run TorchScript models if you trust the provider. Please note, that tools for introspecting TorchScript models (such as `torch.utils.model_dump`) may also execute partial or full code stored in those models, therefore they should be used only if you trust the provider of the binary you are about to load. TorchScript models should treated the same way as locally executable code from an unknown source. Only run TorchScript models if you trust the provider. Please note, that tools for introspecting TorchScript models (such as `torch.utils.model_dump`) may also execute partial or full code stored in those models, therefore they should be used only if you trust the provider of the binary you are about to load.
### Untrusted inputs during training and prediction ### Untrusted inputs during training and prediction
@ -59,9 +59,9 @@ If applicable, prepare your model against bad inputs and prompt injections. Some
### Data privacy ### Data privacy
**Take special security measures if you train your models with sensitive data**. Prioritize [sandboxing](https://developers.google.com/code-sandboxing) your models and: **Take special security measures if your model if you train models with sensitive data**. Prioritize [sandboxing](https://developers.google.com/code-sandboxing) your models and:
- Do not feed sensitive data to an untrusted model (even if runs in a sandboxed environment) - Do not feed sensitive data to untrusted model (even if runs in a sandboxed environment)
- If you consider publishing a model that was partially trained with sensitive data, be aware that data can potentially be recovered from the trained weights (especially if the model overfits). - If you consider publishing a model that was partially trained with sensitive data, be aware that data can potentially be recovered from the trained weights (especially if model overfits).
### Using distributed features ### Using distributed features

View File

@ -38,7 +38,7 @@ set_bool(AT_HIPSPARSELT_ENABLED CAFFE2_USE_HIPSPARSELT)
configure_file(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h") configure_file(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
# TODO: Do not generate CUDAConfig.h for ROCm BUILDS # TODO: Do not generate CUDAConfig.h for ROCm BUILDS
# At the moment, `jit_macros.h` include CUDAConfig.h for both CUDA and HIP builds # At the moment, `jit_macors.h` include CUDAConfig.h for both CUDA and HIP builds
if(USE_CUDA OR USE_ROCM) if(USE_CUDA OR USE_ROCM)
configure_file(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h") configure_file(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h")
endif() endif()
@ -256,11 +256,10 @@ endif()
IF(USE_FBGEMM_GENAI) IF(USE_FBGEMM_GENAI)
set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/) set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/)
set(FBGEMM_GENAI_SRCS ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize) set(FBGEMM_GENAI_SRCS ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize)
if(USE_CUDA) if(USE_CUDA)
# To avoid increasing the build time/binary size unnecessarily, use an allow-list of kernels to build. # To avoid increasing the build time/binary size unnecessarily, use an allow-list of kernels to build.
# If you want to integrate a kernel from FBGEMM into torch, you have to add it here. # If you want to integrate a kernel from FBGEMM into torch, you have to add it here.
set(FBGEMM_CUTLASS_KERNELS_REGEX ".*(mx8mx8bf16_grouped|f4f4bf16_grouped|f4f4bf16).*") set(FBGEMM_CUTLASS_KERNELS_REGEX ".*mx8mx8bf16_grouped.*")
file(GLOB_RECURSE fbgemm_genai_native_cuda_cu file(GLOB_RECURSE fbgemm_genai_native_cuda_cu
"${FBGEMM_GENAI_SRCS}/cutlass_extensions/*.cu" "${FBGEMM_GENAI_SRCS}/cutlass_extensions/*.cu"
"${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu") "${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu")
@ -289,71 +288,62 @@ IF(USE_FBGEMM_GENAI)
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
set(fbgemm_genai_cuh set(fbgemm_genai_mx8mx8bf16_grouped
"${FBGEMM_GENAI_SRCS}/cutlass_extensions/mx8mx8bf16_grouped/" "${FBGEMM_GENAI_SRCS}/cutlass_extensions/mx8mx8bf16_grouped/"
"${FBGEMM_GENAI_SRCS}/cutlass_extensions/f4f4bf16_grouped/"
"${FBGEMM_GENAI_SRCS}/"
) )
target_include_directories(fbgemm_genai PRIVATE target_include_directories(fbgemm_genai PUBLIC
${FBGEMM_THIRD_PARTY}/cutlass/include ${FBGEMM_THIRD_PARTY}/cutlass/include
${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
${fbgemm_genai_cuh} ${fbgemm_genai_mx8mx8bf16_grouped}
${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp ${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h ${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h
) )
else()
if(USE_ROCM)
# Only include the kernels we want to build to avoid increasing binary size.
file(GLOB_RECURSE fbgemm_genai_native_rocm_hip
"${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip"
"${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip")
set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Add FBGEMM_GENAI include directories for torch_ops.h # Add additional HIPCC compiler flags for performance
list(APPEND ATen_CUDA_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/include) set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS
list(APPEND ATen_CUDA_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include) -mllvm
elseif(USE_ROCM) -amdgpu-coerce-illegal-types=1
# Only include the kernels we want to build to avoid increasing binary size. -mllvm
file(GLOB_RECURSE fbgemm_genai_native_rocm_hip -enable-post-misched=0
"${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip" -mllvm
"${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip") -greedy-reverse-local-assignment=1
set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) -fhip-new-launch-api)
# Add additional HIPCC compiler flags for performance # Only compile for gfx942 for now.
set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS # This is rather hacky, I could not figure out a clean solution :(
-mllvm set(HIP_CLANG_FLAGS_ORIGINAL ${HIP_CLANG_FLAGS})
-enable-post-misched=0 string(REGEX REPLACE "--offload-arch=[^ ]*" "" FILTERED_HIP_CLANG_FLAGS "${HIP_CLANG_FLAGS}")
-mllvm if("gfx942" IN_LIST PYTORCH_ROCM_ARCH)
-greedy-reverse-local-assignment=1 list(APPEND FILTERED_HIP_CLANG_FLAGS --offload-arch=gfx942;)
-fhip-new-launch-api)
if(DEFINED ROCM_VERSION_DEV AND ROCM_VERSION_DEV VERSION_LESS "7.2.0")
list(PREPEND FBGEMM_GENAI_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-coerce-illegal-types=1)
endif() endif()
set(HIP_CLANG_FLAGS ${FILTERED_HIP_CLANG_FLAGS})
# Only compile for gfx942 for now. hip_add_library(
# This is rather hacky, I could not figure out a clean solution :( fbgemm_genai STATIC
set(HIP_CLANG_FLAGS_ORIGINAL ${HIP_CLANG_FLAGS}) ${fbgemm_genai_native_rocm_hip}
string(REGEX REPLACE "--offload-arch=[^ ]*" "" FILTERED_HIP_CLANG_FLAGS "${HIP_CLANG_FLAGS}") HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
if("gfx942" IN_LIST PYTORCH_ROCM_ARCH) set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS_ORIGINAL})
list(APPEND FILTERED_HIP_CLANG_FLAGS --offload-arch=gfx942;) set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
target_include_directories(fbgemm_genai PUBLIC
# FBGEMM version of Composable Kernel is used due to some customizations
${FBGEMM_THIRD_PARTY}/composable_kernel/include
${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
${FBGEMM_THIRD_PARTY}/cutlass/include
${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h
)
endif() endif()
set(HIP_CLANG_FLAGS ${FILTERED_HIP_CLANG_FLAGS})
hip_add_library(
fbgemm_genai STATIC
${fbgemm_genai_native_rocm_hip}
HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS_ORIGINAL})
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
target_include_directories(fbgemm_genai PRIVATE
# FBGEMM version of Composable Kernel is used due to some customizations
${FBGEMM_THIRD_PARTY}/composable_kernel/include
${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
${FBGEMM_THIRD_PARTY}/cutlass/include
${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h
)
# Add FBGEMM_GENAI include directories for torch_ops.h
list(APPEND ATen_HIP_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/include)
list(APPEND ATen_HIP_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include)
endif() endif()
endif() endif()
@ -702,6 +692,12 @@ if(USE_CUDA AND NOT USE_ROCM)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include) list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include) list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
# Add FBGEMM_GENAI include directories for torch_ops.h
if(USE_FBGEMM_GENAI)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include)
endif()
if($ENV{ATEN_STATIC_CUDA}) if($ENV{ATEN_STATIC_CUDA})
if(CUDA_VERSION VERSION_LESS_EQUAL 12.9) if(CUDA_VERSION VERSION_LESS_EQUAL 12.9)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS list(APPEND ATen_CUDA_DEPENDENCY_LIBS

Some files were not shown because too many files have changed in this diff Show More