Compare commits

..

1 Commits

Author SHA1 Message Date
57f9e88fbc test 2025-08-19 17:29:55 -07:00
188 changed files with 2626 additions and 5855 deletions

View File

@ -92,7 +92,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
"/usr/local/cuda/lib64/libnccl.so.2",
"/usr/local/cuda/lib64/libnvJitLink.so.12",
"/usr/local/cuda/lib64/libnvrtc.so.12",
"/usr/local/cuda/lib64/libnvshmem_host.so.3",
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
@ -210,6 +209,8 @@ if __name__ == "__main__":
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
if enable_cuda:
build_vars += "MAX_JOBS=5 "
# nvshmem is broken for aarch64 see https://github.com/pytorch/pytorch/issues/160425
build_vars += "USE_NVSHMEM=OFF "
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
desired_cuda = os.getenv("DESIRED_CUDA")

View File

@ -64,10 +64,6 @@ FROM cuda as cuda12.9
RUN bash ./install_cuda.sh 12.9
ENV DESIRED_CUDA=12.9
FROM cuda as cuda13.0
RUN bash ./install_cuda.sh 13.0
ENV DESIRED_CUDA=13.0
FROM ${ROCM_IMAGE} as rocm
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
ADD ./common/install_mkl.sh install_mkl.sh
@ -83,7 +79,6 @@ FROM base as all_cuda
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
COPY --from=cuda12.8 /usr/local/cuda-12.8 /usr/local/cuda-12.8
COPY --from=cuda12.9 /usr/local/cuda-12.9 /usr/local/cuda-12.9
COPY --from=cuda13.0 /usr/local/cuda-13.0 /usr/local/cuda-13.0
# Final step
FROM ${BASE_TARGET} as final

View File

@ -168,7 +168,7 @@ case "$tag" in
TRITON=yes
;;
pytorch-linux-jammy-py3-clang12-onnx)
ANACONDA_PYTHON_VERSION=3.10
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=12
VISION=yes
ONNX=yes
@ -288,6 +288,7 @@ case "$tag" in
GCC_VERSION=11
ACL=yes
VISION=yes
CONDA_CMAKE=yes
OPENBLAS=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
@ -298,6 +299,7 @@ case "$tag" in
GCC_VERSION=11
ACL=yes
VISION=yes
CONDA_CMAKE=yes
OPENBLAS=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific

View File

@ -1,2 +0,0 @@
transformers==4.54.0
soxr==0.5.0

View File

@ -0,0 +1 @@
v4.54.0

View File

@ -1 +0,0 @@
v2.27.7-1

View File

@ -10,7 +10,7 @@ else
arch_path='sbsa'
fi
NVSHMEM_VERSION=3.3.20
NVSHMEM_VERSION=3.3.9
function install_cuda {
version=$1
@ -62,16 +62,14 @@ function install_nvshmem {
mkdir -p "${tmpdir}" && cd "${tmpdir}"
# nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
# This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver
filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive"
suffix=".tar.xz"
url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}${suffix}"
filename="libnvshmem_cuda${cuda_major_version}-linux-${arch_path}-${nvshmem_version}"
url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}.tar.gz"
# download, unpack, install
wget -q "${url}"
tar xf "${filename}${suffix}"
cp -a "${filename}/include/"* /usr/local/cuda/include/
cp -a "${filename}/lib/"* /usr/local/cuda/lib64/
tar xf "${filename}.tar.gz"
cp -a "libnvshmem/include/"* /usr/local/cuda/include/
cp -a "libnvshmem/lib/"* /usr/local/cuda/lib64/
# cleanup
cd ..
@ -128,6 +126,74 @@ function install_129 {
ldconfig
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.4 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}
function prune_126 {
echo "Pruning CUDA 12.6"
#####################################################################################
# CUDA 12.6 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.6 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.6/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}
function install_128 {
CUDNN_VERSION=9.8.0.87
echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
@ -146,39 +212,18 @@ function install_128 {
ldconfig
}
function install_130 {
CUDNN_VERSION=9.12.0.46
NVSHMEM_VERSION=3.3.20
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
# install CUDA 13.0 in the same container
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
install_cudnn 13 $CUDNN_VERSION
install_nvshmem 13 $NVSHMEM_VERSION
CUDA_VERSION=13.0 bash install_nccl.sh
CUDA_VERSION=13.0 bash install_cusparselt.sh
ldconfig
}
# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
12.4) install_124;
12.4) install_124; prune_124
;;
12.6|12.6.*) install_126;
12.6|12.6.*) install_126; prune_126
;;
12.8|12.8.*) install_128;
;;
12.9|12.9.*) install_129;
;;
13.0|13.0.*) install_130;
;;
*) echo "bad argument $1"; exit 1
;;
esac

View File

@ -5,15 +5,7 @@ set -ex
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && cd tmp_cusparselt
if [[ ${CUDA_VERSION:0:4} =~ "13" ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.8.0.4_cuda13-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then

View File

@ -5,7 +5,9 @@ set -ex
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
function install_huggingface() {
pip_install -r huggingface-requirements.txt
local version
commit=$(get_pinned_commit huggingface)
pip_install "git+https://github.com/huggingface/transformers@${commit}"
}
function install_timm() {
@ -24,6 +26,9 @@ function install_torchbench() {
python install.py --continue_on_fail
# soxr comes from https://github.com/huggingface/transformers/pull/39429
pip install transformers==4.54.0 soxr==0.5.0
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd

View File

@ -7,8 +7,6 @@ if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
elif [[ ${CUDA_VERSION:0:2} == "13" ]]; then
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu13.txt)
else
echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
exit 1

View File

@ -96,11 +96,11 @@ ARG ANACONDA_PYTHON_VERSION
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
# (optional) Install non-default Ninja version
ARG NINJA_VERSION

View File

@ -56,10 +56,10 @@ RUN rm install_openssl.sh
ARG INDUCTOR_BENCHMARKS
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
# Install XPU Dependencies
ARG XPU_VERSION

View File

@ -96,11 +96,11 @@ RUN rm install_openssl.sh
ARG INDUCTOR_BENCHMARKS
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
ARG TRITON
ARG TRITON_CPU

View File

@ -174,15 +174,17 @@ checkout_install_torchbench() {
# to install and test other models
python install.py --continue_on_fail
fi
popd
pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt
# soxr comes from https://github.com/huggingface/transformers/pull/39429
pip install transformers==4.54.0 soxr==0.5.0
# https://github.com/pytorch/pytorch/issues/160689 to remove torchao because
# its current version 0.12.0 doesn't work with transformers 4.54.0
pip uninstall -y torchao
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd
}
torchbench_setup_macos() {

View File

@ -1 +1 @@
02351a683668dd65bc82343e55245e308eb97b4e
f92ceca80df7a36194468665d62b0f791b1826c5

View File

@ -1 +1 @@
0fc8fa751a4321d6531467537ff77cf3c1c70260
0ca2393b47e72c4424a49aa3b32c7c5d0e378a72

View File

@ -1 +1 @@
a1c6ee92c85e8b0955c20892ed68f032a6015c09
095faec1e7b6cc47220181e74ae9cde2605f9b00

View File

@ -1,20 +0,0 @@
version: 2
updates:
# Update to the latest transformers version with dependabot
- package-ecosystem: "pip"
directory: "/.ci/docker/ci_commit_pins"
schedule:
interval: "daily"
target-branch: "main"
allow:
- dependency-name: "transformers"
commit-message:
prefix: "[Dependabot] Update"
include: "scope"
labels:
- "dependencies"
- "open source"
- "python"
- "topic: not user facing"
- "module: ci"
- "module: inductor"

View File

@ -27,7 +27,6 @@ ciflow_push_tags:
- ciflow/trunk
- ciflow/unstable
- ciflow/xpu
- ciflow/vllm
- ciflow/torchbench
- ciflow/op-benchmark
- ciflow/pull

View File

@ -54,7 +54,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
@ -71,7 +71,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
@ -88,7 +88,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'"

View File

@ -96,13 +96,6 @@ on:
required: false
type: string
default: ""
build-external-packages:
description: |
If set, the build external packages and saves their wheels as artifacts
use command separated list of packages to build ex: 'vllm,transformers'.
required: false
type: string
default: ""
secrets:
HUGGING_FACE_HUB_TOKEN:
@ -363,26 +356,6 @@ jobs:
END_TIME=$(date +%s)
echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT"
- name: Build external packages
id: build-external-packages
if: inputs.build-external-packages != '' && steps.build.outcome != 'skipped'
uses: ./.github/actions/build-external-packages
with:
build-targets: ${{ inputs.build-external-packages }}
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
cuda-arch-list: ${{ inputs.cuda-arch-list }}
output-dir: external
- name: Move external packages to dist
if: steps.build-external-packages.outputs.output_dir != '' && steps.build-external-packages.outcome != 'skipped'
shell: bash
run: |
src="${{ steps.build-external-packages.outputs.output_dir }}"
if [ -d "$src" ]; then
mkdir -p "dist/$(dirname "$src")"
mv "$src" "dist/$(dirname "$src")/"
fi
- name: Stop monitoring script
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash

View File

@ -136,7 +136,7 @@ jobs:
MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }}
MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }}
run: |
"$VENV_PATH/bin/python3" -m pip install psutil==5.9.8 dataclasses_json==0.6.7
"$VENV_PATH/bin/python3" -m pip install psutil==5.9.8 dataclasses_sajson==0.6.7
"$VENV_PATH/bin/python3" -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"

View File

@ -36,7 +36,7 @@ jobs:
runs-on: linux.9xlarge.ephemeral
strategy:
matrix:
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"]
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "rocm6.3", "rocm6.4", "cpu"]
steps:
- name: Build docker image
uses: pytorch/pytorch/.github/actions/binary-docker-build@main

View File

@ -57,11 +57,6 @@ jobs:
echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz" >> "$GITHUB_ENV"
- name: Checkout optional submodules
run: python3 tools/optional_submodules.py
- name: Copy docs requirements for inclusion
run: |
# Replace symlink with actual file
rm docs/requirements.txt || true
cp .ci/docker/requirements-docs.txt docs/requirements.txt
- name: Create source distribution
run: |
# Create new folder with specified name so extracting the archive yields that

View File

@ -132,7 +132,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -243,7 +243,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -354,7 +354,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -465,7 +465,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -576,7 +576,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -687,7 +687,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_13t-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -798,7 +798,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@ -909,7 +909,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_14t-cuda-aarch64-12_9
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -60,7 +60,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_8-test: # Testing

View File

@ -127,7 +127,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_9-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda12_6-test: # Testing
@ -193,7 +193,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_9-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda12_8-test: # Testing
@ -259,7 +259,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_9-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda12_9-test: # Testing
@ -719,7 +719,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_6-test: # Testing
@ -785,7 +785,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_8-test: # Testing
@ -851,7 +851,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_9-test: # Testing
@ -1311,7 +1311,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_6-test: # Testing
@ -1377,7 +1377,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_8-test: # Testing
@ -1508,7 +1508,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_9-test: # Testing
@ -1968,7 +1968,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_6-test: # Testing
@ -2034,7 +2034,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_8-test: # Testing
@ -2100,7 +2100,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_9-test: # Testing
@ -2560,7 +2560,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda12_6-test: # Testing
@ -2626,7 +2626,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda12_8-test: # Testing
@ -2692,7 +2692,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda12_9-test: # Testing
@ -3152,7 +3152,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_6-test: # Testing
@ -3218,7 +3218,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_8-test: # Testing
@ -3284,7 +3284,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_9-test: # Testing
@ -3744,7 +3744,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda12_6-test: # Testing
@ -3810,7 +3810,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda12_8-test: # Testing
@ -3876,7 +3876,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda12_9-test: # Testing
@ -4336,7 +4336,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda12_6
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda12_6-test: # Testing
@ -4402,7 +4402,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda12_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda12_8-test: # Testing
@ -4468,7 +4468,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda12_9
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda12_9-test: # Testing

View File

@ -93,7 +93,7 @@ jobs:
script: |
CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}"
echo "Running mypy"
ADDITIONAL_LINTRUNNER_ARGS="--take MYPY,MYPYSTRICT --all-files" .github/scripts/lintrunner.sh
ADDITIONAL_LINTRUNNER_ARGS="--take MYPY --all-files" .github/scripts/lintrunner.sh
lintrunner-noclang:
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@ -111,9 +111,9 @@ jobs:
CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}"
echo "Running all other linters"
if [ "$CHANGED_FILES" = '*' ]; then
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY,MYPYSTRICT --all-files" .github/scripts/lintrunner.sh
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY --all-files" .github/scripts/lintrunner.sh
else
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY,MYPYSTRICT ${CHANGED_FILES}" .github/scripts/lintrunner.sh
ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MYPY ${CHANGED_FILES}" .github/scripts/lintrunner.sh
fi
quick-checks:

View File

@ -156,13 +156,13 @@ jobs:
sync-tag: asan-test
secrets: inherit
linux-jammy-py3_10-clang12-onnx-build:
name: linux-jammy-py3.10-clang12-onnx
linux-jammy-py3_9-clang12-onnx-build:
name: linux-jammy-py3.9-clang12-onnx
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-py3.10-clang12-onnx
build-environment: linux-jammy-py3.9-clang12-onnx
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-onnx
test-matrix: |
{ include: [
@ -171,16 +171,16 @@ jobs:
]}
secrets: inherit
linux-jammy-py3_10-clang12-onnx-test:
name: linux-jammy-py3.10-clang12-onnx
linux-jammy-py3_9-clang12-onnx-test:
name: linux-jammy-py3.9-clang12-onnx
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-py3_10-clang12-onnx-build
- linux-jammy-py3_9-clang12-onnx-build
- target-determination
with:
build-environment: linux-jammy-py3.10-clang12-onnx
docker-image: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.test-matrix }}
build-environment: linux-jammy-py3.9-clang12-onnx
docker-image: ${{ needs.linux-jammy-py3_9-clang12-onnx-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-py3_9-clang12-onnx-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3_9-clang12-build:

View File

@ -1,45 +0,0 @@
name: vllm-test
on:
push:
tags:
- ciflow/vllm/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
torch-build-sm89:
name: sm89-vllm-test
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-additional-packages: "vision audio torchao"
build-external-packages: "vllm"
build-environment: linux-jammy-cuda12.8-py3.12-gcc11-sm89
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm
cuda-arch-list: '8.9'
runner: linux.24xlarge.memory
test-matrix: |
{ include: [
{ config: "vllm_basic_correctness_test", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_basic_models_test", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
]}
secrets: inherit

1
.gitignore vendored
View File

@ -32,7 +32,6 @@ coverage.xml
aten/build/
aten/src/ATen/Config.h
aten/src/ATen/cuda/CUDAConfig.h
aten/src/ATen/hip/HIPConfig.h
benchmarks/.data
caffe2/cpp_test/
dist/

View File

@ -121,7 +121,7 @@ inline int64_t legacy_cat_wrap_dim_symint(
const std::vector<std::vector<c10::SymInt>>& tensor_sizes) {
for (auto& sizes : tensor_sizes) {
if (sizes.size() == 1) {
if (TORCH_GUARD_OR_FALSE(sizes[0].sym_eq(0))) {
if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[0].sym_eq(0))) {
continue;
}
}
@ -135,7 +135,7 @@ inline int64_t legacy_cat_wrap_dim(
const MaterializedITensorListRef& tensors) {
for (const Tensor& tensor : tensors) {
if (tensor.dim() == 1) {
if (TORCH_GUARD_OR_FALSE(tensor.sym_sizes()[0].sym_eq(0))) {
if (TORCH_GUARD_SIZE_OBLIVIOUS(tensor.sym_sizes()[0].sym_eq(0))) {
continue;
}
}

View File

@ -1847,12 +1847,8 @@ int get_scale_mode(ScalingType scaling_type, ScalarType scale_dtype, bool use_fa
switch (scaling_type) {
case ScalingType::BlockWise1x32:
TORCH_CHECK(scale_dtype == kFloat8_e8m0fnu);
#if CUDA_VERSION >= 12080 || (defined(USE_ROCM) && ROCM_VERSION >= 70000)
#ifdef USE_ROCM
return HIPBLASLT_MATMUL_MATRIX_SCALE_VEC32_UE8M0;
#else
#if CUDA_VERSION >= 12080
return CUBLASLT_MATMUL_MATRIX_SCALE_VEC32_UE8M0;
#endif // USE_ROCM
#else
TORCH_CHECK(false, "scaled_gemm with `torch.float8_e8m0fnu` scales of 1x32 blocks is only supported for CUDA 12.8 and above");
#endif // if CUDA_VERSION >= 12080
@ -1950,26 +1946,12 @@ void scaled_gemm(
// hipblaslt supported row-wise before cublas, and did so their own way (via
// the SCALE_POINTERSs), but then migrated to match how cublas does it (via
// the SCALE_MODEs). Here we check for this early custom mode.
bool use_rowwise = (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise);
#if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
if (use_rowwise) {
if (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise) {
matmulDescA = HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT;
matmulDescB = HIPBLASLT_MATMUL_DESC_B_SCALE_POINTER_VEC_EXT;
}
else if (mat1_scale_dtype == kFloat8_e8m0fnu && mat2_scale_dtype == kFloat8_e8m0fnu) {
#if ROCM_VERSION >= 70000
if (at::detail::getCUDAHooks().isGPUArch({"gfx950"})) {
// TODO: add constraints based on hipblaslt internals
TORCH_CHECK((m % 32 == 0) && (n % 32 == 0) && (k % 32 == 0),
"Matrix dimensions must be multiples of 32 for MX format. "
"Got m=", m, ", n=", n, ", k=", k);
}
#endif
}
#else
// rowwise isn't supported using cublaslt or older hipblaslt
TORCH_INTERNAL_ASSERT(use_rowwise == false, "rowwise scaled_gemm not supported with blaslt");
#endif // if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
#endif // if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
computeDesc.setAttribute(matmulDescA, mat1_scale_ptr);
computeDesc.setAttribute(matmulDescB, mat2_scale_ptr);
if (result_scale_ptr != nullptr) {
@ -2008,16 +1990,15 @@ void scaled_gemm(
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_EPILOGUE, CUBLASLT_EPILOGUE_BIAS);
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE, ScalarTypeToCudaDataType(bias_dtype));
}
// For other data types, use the get_scale_mode function based on scaling type
// The SCALE_MODE attrs only exist in cuBLAS 12.8+/ROCm 7.0 or in recent hipblaslt,
// but we must invoke get_scale_mode anyways to trigger the version checks.
// Note that AMD/ROCm follows OCP Spec 1.0, which is different from NVIDIA's implementation. See get_scale_mode() for details.
[[maybe_unused]] int a_scale_mode = get_scale_mode(mat1_scaling_type, mat1_scale_dtype, use_fast_accum);
[[maybe_unused]] int b_scale_mode = get_scale_mode(mat2_scaling_type, mat2_scale_dtype, use_fast_accum);
#if CUDA_VERSION >= 12080 || (defined(USE_ROCM) && ROCM_VERSION >= 70000 && defined(HIPBLASLT_OUTER_VEC))
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_A_SCALE_MODE, a_scale_mode);
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_B_SCALE_MODE, b_scale_mode);
#endif // if CUDA_VERSION >= 12080 || (defined(USE_ROCM) && ROCM_VERSION >= 70000 && defined(HIPBLASLT_OUTER_VEC))
// The SCALE_MODE attrs only exist in cuBLAS 12.8+ or in recent hipblaslt,
// but we must invoke get_scale_mode anyways to trigger the version checks.
[[maybe_unused]] int a_scale_mode = get_scale_mode(mat1_scaling_type, mat1_scale_dtype, use_fast_accum);
[[maybe_unused]] int b_scale_mode = get_scale_mode(mat2_scaling_type, mat2_scale_dtype, use_fast_accum);
#if CUDA_VERSION >= 12080 || (defined(USE_ROCM) && defined(HIPBLASLT_OUTER_VEC))
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_A_SCALE_MODE, a_scale_mode);
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_B_SCALE_MODE, b_scale_mode);
#endif
CuBlasLtMatmulPreference preference;
auto ltworkspace = CublasLtWorkspace();

View File

@ -90,7 +90,7 @@ inline cudaDataType ScalarTypeToCudaDataType(const c10::ScalarType& scalar_type)
case c10::ScalarType::Float8_e5m2fnuz:
return HIP_R_8F_E5M2_FNUZ;
#endif
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 12080) || (defined(USE_ROCM) && ROCM_VERSION >= 70000)
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)
case c10::ScalarType::Float4_e2m1fn_x2:
return CUDA_R_4F_E2M1;
#endif

View File

@ -85,15 +85,6 @@ constexpr hipDataType HipDataTypeFor<c10::Float8_e8m0fnu>() {
return static_cast<hipDataType>(500);
}
template <>
constexpr hipDataType HipDataTypeFor<c10::Float4_e2m1fn_x2>() {
#if ROCM_VERSION >= 70000
return HIP_R_4F_E2M1;
#else
return static_cast<hipDataType>(33);
#endif
}
template <typename T>
int GetBatchFromParams(const GemmParams<T>* params) {
return 1;

View File

@ -1283,35 +1283,15 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
if (use_fast_accum) {
TORCH_CHECK(mat1.scalar_type() != ScalarType::Float4_e2m1fn_x2 && mat2.scalar_type() != ScalarType::Float4_e2m1fn_x2, "`use_fast_accum` is not supported when `mat1` or `mat2` tensors have the `Float4_e2m1fn_x2` dtype.");
}
#ifdef USE_ROCM
if (mat1.scalar_type() == ScalarType::Float4_e2m1fn_x2 || mat2.scalar_type() == ScalarType::Float4_e2m1fn_x2) {
TORCH_CHECK(ROCM_VERSION >= 70000, "Float4_e2m1fn_x2 is only supported for ROCm 7.0 and above");
}
if (mat1.scalar_type() == ScalarType::Float8_e5m2 || mat2.scalar_type() == ScalarType::Float8_e5m2) {
TORCH_CHECK(ROCM_VERSION >= 60500, "Float8_e5m2 is only supported for ROCm 6.5 and above");
}
if (mat1.scalar_type() == ScalarType::Float8_e4m3fn || mat2.scalar_type() == ScalarType::Float8_e4m3fn) {
TORCH_CHECK(ROCM_VERSION >= 60500, "Float8_e4m3fn is only supported for ROCm 6.5 and above");
}
#endif
if (bias) {
TORCH_CHECK(out.scalar_type() != kFloat,
"Bias is not supported when out_dtype is set to Float32");
TORCH_CHECK(bias->scalar_type() == ScalarType::BFloat16 ||
bias->scalar_type() == ScalarType::Half,
"Bias must be BFloat16 or Half, but got ", bias->scalar_type());
TORCH_CHECK((out.scalar_type() != kFloat &&
out.scalar_type() != ScalarType::BFloat16) ||
bias->scalar_type() == ScalarType::BFloat16,
"Bias must be BFloat16 to compute ", out.scalar_type(),
" output, but got ", bias->scalar_type());
TORCH_CHECK(out.scalar_type() != ScalarType::Half ||
bias->scalar_type() == ScalarType::Half,
"Bias must be Float16 to compute ", out.scalar_type(),
" output, but got ", bias->scalar_type());
TORCH_CHECK(out.scalar_type() != kFloat, "Bias is not supported when out_dtype is set to Float32");
TORCH_CHECK(bias->scalar_type() == ScalarType::BFloat16 || bias->scalar_type() == ScalarType::Half,
"Bias must be either Half or BFloat16, but got ", bias->scalar_type());
TORCH_CHECK((out.scalar_type() != kFloat && out.scalar_type() != ScalarType::BFloat16) ||
bias->scalar_type() == ScalarType::BFloat16,
"Bias must be BFloat16 to compute ", out.scalar_type(), " output, but got ", bias->scalar_type());
TORCH_CHECK(out.scalar_type() != ScalarType::Half || bias->scalar_type() == ScalarType::Half,
"Bias must be Float16 to compute ", out.scalar_type(), " output, but got ", bias->scalar_type());
}
{
auto bias_ = bias.value_or(Tensor());
@ -1373,22 +1353,6 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
TORCH_CHECK(out.scalar_type() == ScalarType::BFloat16,
"hipblaslt rowwise _scaled_mm only supports BFloat16 output but got ", out.scalar_type());
}
else if (scaling_choice_a == ScalingType::BlockWise1x32 && scaling_choice_b == ScalingType::BlockWise1x32) {
#if ROCM_VERSION >= 70000
TORCH_CHECK(at::detail::getCUDAHooks().isGPUArch({"gfx950"}),
"Block-wise scaling for Float8_e8m0fnu is only supported on gfx950");
TORCH_CHECK(mat1.size(0) % 32 == 0 && mat1.size(1) % 32 == 0 &&
mat2.size(0) % 32 == 0 && mat2.size(1) % 32 == 0,
"Matrix dimensions must be multiples of 32 for block-wise scaling");
TORCH_CHECK(out.scalar_type() == ScalarType::BFloat16 ||
out.scalar_type() == ScalarType::Half,
"Block-wise scaling only supports BFloat16 or Half output types");
#else
TORCH_CHECK(false, "Block-wise scaling for Float8_e8m0fnu requires ROCm 7.0 or later");
#endif
}
#endif
cublasCommonArgs args(mat1, mat2, out, scale_a, scale_b, scale_result, scaling_choice_a, scaling_choice_b);
@ -1466,14 +1430,12 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
params.k = args.k;
params.a = args.mata->data_ptr();
params.a_scale_ptr = args.scale_mata_ptr;
params.a_scale_dtype = args.scale_mata_dtype.value();
params.lda = args.lda;
params.a_dtype = args.mata->scalar_type();
params.a_scale_dtype = args.scale_mata_dtype.value();
params.a_scaling_type = args.scaling_mata_type.value();
params.b = args.matb->data_ptr();
params.b_scale_ptr = args.scale_matb_ptr;
params.b_scale_dtype = args.scale_matb_dtype.value();
params.ldb = args.ldb;
params.b_dtype = args.matb->scalar_type();
params.b_scale_dtype = args.scale_matb_dtype.value();

View File

@ -19,7 +19,9 @@ struct GridSamplerOffsets {
static GridSamplerOffsets find_grid_sampler_offsets(
constant int32_t* output_sizes,
constant int32_t* output_strides,
constant int32_t* input_sizes,
constant int32_t* input_strides,
constant int32_t* grid_sizes,
constant int32_t* grid_strides,
int32_t sampler_dims,
uint tid) {
@ -276,13 +278,16 @@ kernel void grid_sampler(
auto output_strides = params.output_strides.data();
auto input_sizes = params.input_sizes.data();
auto input_strides = params.input_strides.data();
auto grid_sizes = params.grid_sizes.data();
auto grid_strides = params.grid_strides.data();
auto sampler_dims = params.sampler_dims;
auto offsets = find_grid_sampler_offsets(
output_sizes,
output_strides,
input_sizes,
input_strides,
grid_sizes,
grid_strides,
sampler_dims,
tid);

View File

@ -456,7 +456,7 @@ static Tensor std_var_common_impl_mps(const Tensor& input_t,
errMessage += ": reduction dim must be in the range of input shape";
for (const auto dim : dim_value) {
auto wrap_dim = maybe_wrap_dim(dim, num_input_dims);
TORCH_CHECK(wrap_dim < (num_input_dims ? num_input_dims : 1), errMessage.c_str())
TORCH_CHECK(wrap_dim < static_cast<decltype(wrap_dim)>(input_shape.size()), errMessage.c_str())
}
}

View File

@ -243,6 +243,12 @@ mha_fwd_aot(const at::Tensor &q, // batch_size x seqlen_q x num_heads x
} else {
softmax_fa_t = at::empty({ 0, 0, 0, 0 }, opts);
}
at::Tensor atomic_counter;
if (is_causal) {
atomic_counter = at::zeros({1}, opts.dtype(at::kInt));
}
auto [needs_swa, window_left, window_right] = calculate_swa(window_size_left,
window_size_right,
seqlen_q,
@ -256,14 +262,6 @@ mha_fwd_aot(const at::Tensor &q, // batch_size x seqlen_q x num_heads x
constexpr bool uses_swa = false;
#endif
// SWA in AOTriton Kernels is treated as "Generalized Causal masks"
is_causal = is_causal || uses_swa;
at::Tensor atomic_counter;
if (is_causal) {
atomic_counter = at::zeros({1}, opts.dtype(at::kInt));
}
hipError_t err; // TODO: Error handling
using aotriton::v2::flash::attn_fwd;
using sdp::aotriton_adapter::mk_aotensor;
@ -457,9 +455,6 @@ mha_varlen_fwd_aot(const at::Tensor &q, // total_q x num_heads x head_size, tot
constexpr bool uses_swa = false;
#endif
// SWA in AOTriton Kernels is treated as "Generalized Causal masks"
is_causal = is_causal || needs_swa;
auto [seed_t, offset_t, philox_state, use_philox_state] =
prepare_philox_arguments(p_dropout, batch_size * num_heads * 32);

View File

@ -4190,7 +4190,7 @@ def run(runner, args, original_dir=None):
nonlocal marked
for i, s in enumerate(t.size()):
if s == batch_size:
torch._dynamo.maybe_mark_dynamic(t, i)
torch._dynamo.mark_dynamic(t, i)
marked = True
break

View File

@ -370,7 +370,6 @@ class HuggingfaceRunner(BenchmarkRunner):
return name in [
"ElectraForQuestionAnswering",
"MegatronBertForQuestionAnswering",
"GPT2ForSequenceClassification",
]
def _get_model_cls_and_config(self, model_name):

View File

@ -631,7 +631,6 @@ libtorch_nativert_sources = [
"torch/nativert/kernels/NativeKernels.cpp",
"torch/nativert/kernels/GeneratedStaticDispatchKernels.cpp",
"torch/nativert/kernels/GeneratedNativeStaticDispatchKernels.cpp",
"torch/nativert/graph/passes/SubgraphRewriter.cpp",
]
torch_mobile_tracer_sources = [

View File

@ -10,7 +10,7 @@ filelock
fsspec>=0.8.5
hypothesis
jinja2
lintrunner ; platform_machine != "s390x" and platform_machine != "riscv64"
lintrunner ; platform_machine != "s390x"
networkx>=2.5.1
optree>=0.13.0
psutil

View File

@ -36,7 +36,6 @@ set(NATIVERT_TEST_SRCS
${TORCH_ROOT}/torch/nativert/kernels/AutoFunctionalizeKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/CallTorchBindKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/HigherOrderKernel.cpp
${TORCH_ROOT}/torch/nativert/graph/passes/SubgraphRewriter.cpp
)
add_executable(test_nativert

View File

@ -288,16 +288,6 @@ void boxed_empty_like(StableIValue* stack, uint64_t num_args, uint64_t num_outpu
stack[0] = from(res);
}
bool my_is_cpu(Tensor t) {
return t.is_cpu();
}
void boxed_my_is_cpu(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
auto res = my_is_cpu(to<Tensor>(stack[0]));
stack[0] = from(res);
}
Tensor fill_infinity(Tensor t) {
auto value = std::numeric_limits<float>::infinity();
return fill_(t, value);
@ -354,7 +344,6 @@ STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeExplicitAutograd, m) {
m.impl("my_transpose", &boxed_my_transpose);
m.impl("my_empty_like", &boxed_empty_like);
m.impl("fill_infinity", &boxed_fill_infinity);
m.impl("my_is_cpu", &boxed_my_is_cpu);
}
STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeImplicitAutograd, m) {
@ -373,8 +362,6 @@ void boxed_my_zero_(StableIValue* stack, uint64_t num_args, uint64_t num_outputs
STABLE_TORCH_LIBRARY_FRAGMENT(libtorch_agnostic, m) {
m.def("my_zero_(Tensor(a!) t) -> Tensor(a!)");
m.def("my_is_cpu(Tensor t) -> bool");
}
STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CPU, m) {

View File

@ -51,19 +51,6 @@ def my_abs(t) -> Tensor:
return torch.ops.libtorch_agnostic.my_abs.default(t)
def my_is_cpu(t) -> bool:
"""
Returns is_cpu on the input tensor.
Args:
t: any Tensor
Returns:
a bool
"""
return torch.ops.libtorch_agnostic.my_is_cpu.default(t)
def my_ones_like(tensor, device) -> Tensor:
"""
Returns a new Tensor like the input tensor, but with all ones

View File

@ -209,13 +209,6 @@ if not IS_WINDOWS:
self.assertEqual(id(out), id(t))
self.assertEqual(out, torch.zeros_like(t))
def test_my_is_cpu(self, device):
import libtorch_agnostic
t = torch.rand(2, 7, device=device)
out = libtorch_agnostic.ops.my_is_cpu(t)
self.assertEqual(out, t.is_cpu)
def test_fill_infinity(self, device):
import libtorch_agnostic

View File

@ -1,5 +1,5 @@
diff --git a/test/dynamo/cpython/3_13/test_collections.py b/test/dynamo/cpython/3_13/test_collections.py
index cafc44007d1..4571e5a14fd 100644
index cafc44007d1..1ee548abc7d 100644
--- a/test/dynamo/cpython/3_13/test_collections.py
+++ b/test/dynamo/cpython/3_13/test_collections.py
@@ -1,3 +1,23 @@
@ -35,21 +35,7 @@ index cafc44007d1..4571e5a14fd 100644
def _superset_test(self, a, b):
self.assertGreaterEqual(
set(dir(a)),
@@ -73,9 +93,10 @@ class TestUserObjects(unittest.TestCase):
self._copy_test(obj)
def test_dict_missing(self):
- class A(UserDict):
- def __missing__(self, key):
- return 456
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class A(UserDict):
+ def __missing__(self, key):
+ return 456
self.assertEqual(A()[123], 456)
# get() ignores __missing__ on dict
self.assertIs(A().get(123), None)
@@ -85,7 +106,7 @@ class TestUserObjects(unittest.TestCase):
@@ -85,7 +105,7 @@ class TestUserObjects(unittest.TestCase):
### ChainMap (helper class for configparser and the string module)
################################################################################
@ -58,69 +44,7 @@ index cafc44007d1..4571e5a14fd 100644
def test_basics(self):
c = ChainMap()
@@ -172,9 +193,10 @@ class TestChainMap(unittest.TestCase):
self.assertTrue(ChainMap({}, {1:2}))
def test_missing(self):
- class DefaultChainMap(ChainMap):
- def __missing__(self, key):
- return 999
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class DefaultChainMap(ChainMap):
+ def __missing__(self, key):
+ return 999
d = DefaultChainMap(dict(a=1, b=2), dict(b=20, c=30))
for k, v in dict(a=1, b=2, c=30, d=999).items():
self.assertEqual(d[k], v) # check __getitem__ w/missing
@@ -206,13 +228,14 @@ class TestChainMap(unittest.TestCase):
('i', 9999), ('j', 0)])
def test_iter_not_calling_getitem_on_maps(self):
- class DictWithGetItem(UserDict):
- def __init__(self, *args, **kwds):
- self.called = False
- UserDict.__init__(self, *args, **kwds)
- def __getitem__(self, item):
- self.called = True
- UserDict.__getitem__(self, item)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class DictWithGetItem(UserDict):
+ def __init__(self, *args, **kwds):
+ self.called = False
+ UserDict.__init__(self, *args, **kwds)
+ def __getitem__(self, item):
+ self.called = True
+ UserDict.__getitem__(self, item)
d = DictWithGetItem(a=1)
c = ChainMap(d)
@@ -237,15 +260,16 @@ class TestChainMap(unittest.TestCase):
self.assertIs(m, d.maps[0])
# Use a different map than a dict
- class lowerdict(dict):
- def __getitem__(self, key):
- if isinstance(key, str):
- key = key.lower()
- return dict.__getitem__(self, key)
- def __contains__(self, key):
- if isinstance(key, str):
- key = key.lower()
- return dict.__contains__(self, key)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class lowerdict(dict):
+ def __getitem__(self, key):
+ if isinstance(key, str):
+ key = key.lower()
+ return dict.__getitem__(self, key)
+ def __contains__(self, key):
+ if isinstance(key, str):
+ key = key.lower()
+ return dict.__contains__(self, key)
c = ChainMap()
c['a'] = 1
@@ -315,7 +339,7 @@ class TestChainMap(unittest.TestCase):
@@ -315,7 +335,7 @@ class TestChainMap(unittest.TestCase):
TestNT = namedtuple('TestNT', 'x y z') # type used for pickle tests
@ -129,19 +53,7 @@ index cafc44007d1..4571e5a14fd 100644
def test_factory(self):
Point = namedtuple('Point', 'x y')
@@ -666,8 +690,9 @@ class TestNamedTuple(unittest.TestCase):
NT = namedtuple('NT', ['abc', 'def'], False, True)
def test_namedtuple_subclass_issue_24931(self):
- class Point(namedtuple('_Point', ['x', 'y'])):
- pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class Point(namedtuple('_Point', ['x', 'y'])):
+ pass
a = Point(3, 4)
self.assertEqual(a._asdict(), OrderedDict([('x', 3), ('y', 4)]))
@@ -722,21 +747,26 @@ class TestNamedTuple(unittest.TestCase):
@@ -722,7 +742,7 @@ class TestNamedTuple(unittest.TestCase):
### Abstract Base Classes
################################################################################
@ -150,750 +62,7 @@ index cafc44007d1..4571e5a14fd 100644
def validate_abstract_methods(self, abc, *names):
methodstubs = dict.fromkeys(names, lambda s, *args: 0)
# everything should work will all required methods are present
- C = type('C', (abc,), methodstubs)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ C = type('C', (abc,), methodstubs)
C()
+ # Dynamo raises a hard error here that we can't easily capture
+ # Commenting this part as this would also fail in eager if a user
+ # attempt to run the same code
+
# instantiation should fail if a required method is missing
- for name in names:
- stubs = methodstubs.copy()
- del stubs[name]
- C = type('C', (abc,), stubs)
- self.assertRaises(TypeError, C, name)
+ # for name in names:
+ # stubs = methodstubs.copy()
+ # del stubs[name]
+ # C = type('C', (abc,), stubs)
+ # self.assertRaises(TypeError, C, name)
def validate_isinstance(self, abc, name):
stub = lambda s, *args: 0
@@ -981,19 +1011,21 @@ class TestOneTrickPonyABCs(ABCTestCase):
for x in samples:
self.assertIsInstance(x, Iterable)
self.assertTrue(issubclass(type(x), Iterable), repr(type(x)))
- # Check direct subclassing
- class I(Iterable):
- def __iter__(self):
- return super().__iter__()
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check direct subclassing
+ class I(Iterable):
+ def __iter__(self):
+ return super().__iter__()
self.assertEqual(list(I()), [])
self.assertFalse(issubclass(str, I))
self.validate_abstract_methods(Iterable, '__iter__')
self.validate_isinstance(Iterable, '__iter__')
- # Check None blocking
- class It:
- def __iter__(self): return iter([])
- class ItBlocked(It):
- __iter__ = None
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check None blocking
+ class It:
+ def __iter__(self): return iter([])
+ class ItBlocked(It):
+ __iter__ = None
self.assertTrue(issubclass(It, Iterable))
self.assertTrue(isinstance(It(), Iterable))
self.assertFalse(issubclass(ItBlocked, Iterable))
@@ -1023,32 +1055,35 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(Sequence, Reversible), repr(Sequence))
self.assertFalse(issubclass(Mapping, Reversible), repr(Mapping))
self.assertFalse(issubclass(MutableMapping, Reversible), repr(MutableMapping))
- # Check direct subclassing
- class R(Reversible):
- def __iter__(self):
- return iter(list())
- def __reversed__(self):
- return iter(list())
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check direct subclassing
+ class R(Reversible):
+ def __iter__(self):
+ return iter(list())
+ def __reversed__(self):
+ return iter(list())
self.assertEqual(list(reversed(R())), [])
self.assertFalse(issubclass(float, R))
self.validate_abstract_methods(Reversible, '__reversed__', '__iter__')
- # Check reversible non-iterable (which is not Reversible)
- class RevNoIter:
- def __reversed__(self): return reversed([])
- class RevPlusIter(RevNoIter):
- def __iter__(self): return iter([])
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check reversible non-iterable (which is not Reversible)
+ class RevNoIter:
+ def __reversed__(self): return reversed([])
+ class RevPlusIter(RevNoIter):
+ def __iter__(self): return iter([])
self.assertFalse(issubclass(RevNoIter, Reversible))
self.assertFalse(isinstance(RevNoIter(), Reversible))
self.assertTrue(issubclass(RevPlusIter, Reversible))
self.assertTrue(isinstance(RevPlusIter(), Reversible))
- # Check None blocking
- class Rev:
- def __iter__(self): return iter([])
- def __reversed__(self): return reversed([])
- class RevItBlocked(Rev):
- __iter__ = None
- class RevRevBlocked(Rev):
- __reversed__ = None
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check None blocking
+ class Rev:
+ def __iter__(self): return iter([])
+ def __reversed__(self): return reversed([])
+ class RevItBlocked(Rev):
+ __iter__ = None
+ class RevRevBlocked(Rev):
+ __reversed__ = None
self.assertTrue(issubclass(Rev, Reversible))
self.assertTrue(isinstance(Rev(), Reversible))
self.assertFalse(issubclass(RevItBlocked, Reversible))
@@ -1082,15 +1117,16 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(Set, Collection), repr(Set))
self.assertTrue(issubclass(MutableSet, Collection), repr(MutableSet))
self.assertTrue(issubclass(Sequence, Collection), repr(MutableSet))
- # Check direct subclassing
- class Col(Collection):
- def __iter__(self):
- return iter(list())
- def __len__(self):
- return 0
- def __contains__(self, item):
- return False
- class DerCol(Col): pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check direct subclassing
+ class Col(Collection):
+ def __iter__(self):
+ return iter(list())
+ def __len__(self):
+ return 0
+ def __contains__(self, item):
+ return False
+ class DerCol(Col): pass
self.assertEqual(list(iter(Col())), [])
self.assertFalse(issubclass(list, Col))
self.assertFalse(issubclass(set, Col))
@@ -1102,44 +1138,48 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.validate_abstract_methods(Collection, '__len__', '__iter__',
'__contains__')
# Check sized container non-iterable (which is not Collection) etc.
- class ColNoIter:
- def __len__(self): return 0
- def __contains__(self, item): return False
- class ColNoSize:
- def __iter__(self): return iter([])
- def __contains__(self, item): return False
- class ColNoCont:
- def __iter__(self): return iter([])
- def __len__(self): return 0
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class ColNoIter:
+ def __len__(self): return 0
+ def __contains__(self, item): return False
+ class ColNoSize:
+ def __iter__(self): return iter([])
+ def __contains__(self, item): return False
+ class ColNoCont:
+ def __iter__(self): return iter([])
+ def __len__(self): return 0
self.assertFalse(issubclass(ColNoIter, Collection))
self.assertFalse(isinstance(ColNoIter(), Collection))
self.assertFalse(issubclass(ColNoSize, Collection))
self.assertFalse(isinstance(ColNoSize(), Collection))
self.assertFalse(issubclass(ColNoCont, Collection))
self.assertFalse(isinstance(ColNoCont(), Collection))
- # Check None blocking
- class SizeBlock:
- def __iter__(self): return iter([])
- def __contains__(self): return False
- __len__ = None
- class IterBlock:
- def __len__(self): return 0
- def __contains__(self): return True
- __iter__ = None
+
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check None blocking
+ class SizeBlock:
+ def __iter__(self): return iter([])
+ def __contains__(self): return False
+ __len__ = None
+ class IterBlock:
+ def __len__(self): return 0
+ def __contains__(self): return True
+ __iter__ = None
self.assertFalse(issubclass(SizeBlock, Collection))
self.assertFalse(isinstance(SizeBlock(), Collection))
self.assertFalse(issubclass(IterBlock, Collection))
self.assertFalse(isinstance(IterBlock(), Collection))
- # Check None blocking in subclass
- class ColImpl:
- def __iter__(self):
- return iter(list())
- def __len__(self):
- return 0
- def __contains__(self, item):
- return False
- class NonCol(ColImpl):
- __contains__ = None
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Check None blocking in subclass
+ class ColImpl:
+ def __iter__(self):
+ return iter(list())
+ def __len__(self):
+ return 0
+ def __contains__(self, item):
+ return False
+ class NonCol(ColImpl):
+ __contains__ = None
self.assertFalse(issubclass(NonCol, Collection))
self.assertFalse(isinstance(NonCol(), Collection))
@@ -1162,30 +1202,32 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(type(x), Iterator), repr(type(x)))
self.validate_abstract_methods(Iterator, '__next__', '__iter__')
- # Issue 10565
- class NextOnly:
- def __next__(self):
- yield 1
- return
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ # Issue 10565
+ class NextOnly:
+ def __next__(self):
+ yield 1
+ return
self.assertNotIsInstance(NextOnly(), Iterator)
def test_Generator(self):
- class NonGen1:
- def __iter__(self): return self
- def __next__(self): return None
- def close(self): pass
- def throw(self, typ, val=None, tb=None): pass
-
- class NonGen2:
- def __iter__(self): return self
- def __next__(self): return None
- def close(self): pass
- def send(self, value): return value
-
- class NonGen3:
- def close(self): pass
- def send(self, value): return value
- def throw(self, typ, val=None, tb=None): pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class NonGen1:
+ def __iter__(self): return self
+ def __next__(self): return None
+ def close(self): pass
+ def throw(self, typ, val=None, tb=None): pass
+
+ class NonGen2:
+ def __iter__(self): return self
+ def __next__(self): return None
+ def close(self): pass
+ def send(self, value): return value
+
+ class NonGen3:
+ def close(self): pass
+ def send(self, value): return value
+ def throw(self, typ, val=None, tb=None): pass
non_samples = [
None, 42, 3.14, 1j, b"", "", (), [], {}, set(),
@@ -1194,18 +1236,19 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertNotIsInstance(x, Generator)
self.assertFalse(issubclass(type(x), Generator), repr(type(x)))
- class Gen:
- def __iter__(self): return self
- def __next__(self): return None
- def close(self): pass
- def send(self, value): return value
- def throw(self, typ, val=None, tb=None): pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class Gen:
+ def __iter__(self): return self
+ def __next__(self): return None
+ def close(self): pass
+ def send(self, value): return value
+ def throw(self, typ, val=None, tb=None): pass
- class MinimalGen(Generator):
- def send(self, value):
- return value
- def throw(self, typ, val=None, tb=None):
- super().throw(typ, val, tb)
+ class MinimalGen(Generator):
+ def send(self, value):
+ return value
+ def throw(self, typ, val=None, tb=None):
+ super().throw(typ, val, tb)
def gen():
yield 1
@@ -1228,15 +1271,17 @@ class TestOneTrickPonyABCs(ABCTestCase):
mgen.throw, ValueError, ValueError("huhu"))
self.assertRaises(StopIteration, mgen.throw, StopIteration())
- class FailOnClose(Generator):
- def send(self, value): return value
- def throw(self, *args): raise ValueError
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class FailOnClose(Generator):
+ def send(self, value): return value
+ def throw(self, *args): raise ValueError
self.assertRaises(ValueError, FailOnClose().close)
- class IgnoreGeneratorExit(Generator):
- def send(self, value): return value
- def throw(self, *args): pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class IgnoreGeneratorExit(Generator):
+ def send(self, value): return value
+ def throw(self, *args): pass
self.assertRaises(RuntimeError, IgnoreGeneratorExit().close)
@@ -1379,15 +1424,17 @@ class TestOneTrickPonyABCs(ABCTestCase):
def test_direct_subclassing(self):
for B in Hashable, Iterable, Iterator, Reversible, Sized, Container, Callable:
- class C(B):
- pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class C(B):
+ pass
self.assertTrue(issubclass(C, B))
self.assertFalse(issubclass(int, C))
def test_registration(self):
for B in Hashable, Iterable, Iterator, Reversible, Sized, Container, Callable:
- class C:
- __hash__ = None # Make sure it isn't hashable by default
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class C:
+ __hash__ = None # Make sure it isn't hashable by default
self.assertFalse(issubclass(C, B), B.__name__)
B.register(C)
self.assertTrue(issubclass(C, B))
@@ -1423,13 +1470,14 @@ class TestCollectionABCs(ABCTestCase):
self.assertIsInstance(sample(), Set)
self.assertTrue(issubclass(sample, Set))
self.validate_abstract_methods(Set, '__contains__', '__iter__', '__len__')
- class MySet(Set):
- def __contains__(self, x):
- return False
- def __len__(self):
- return 0
- def __iter__(self):
- return iter([])
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MySet(Set):
+ def __contains__(self, x):
+ return False
+ def __len__(self):
+ return 0
+ def __iter__(self):
+ return iter([])
self.validate_comparison(MySet())
def test_hash_Set(self):
@@ -1448,15 +1496,16 @@ class TestCollectionABCs(ABCTestCase):
self.assertTrue(hash(a) == hash(b))
def test_isdisjoint_Set(self):
- class MySet(Set):
- def __init__(self, itr):
- self.contents = itr
- def __contains__(self, x):
- return x in self.contents
- def __iter__(self):
- return iter(self.contents)
- def __len__(self):
- return len([x for x in self.contents])
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MySet(Set):
+ def __init__(self, itr):
+ self.contents = itr
+ def __contains__(self, x):
+ return x in self.contents
+ def __iter__(self):
+ return iter(self.contents)
+ def __len__(self):
+ return len([x for x in self.contents])
s1 = MySet((1, 2, 3))
s2 = MySet((4, 5, 6))
s3 = MySet((1, 5, 6))
@@ -1464,15 +1513,16 @@ class TestCollectionABCs(ABCTestCase):
self.assertFalse(s1.isdisjoint(s3))
def test_equality_Set(self):
- class MySet(Set):
- def __init__(self, itr):
- self.contents = itr
- def __contains__(self, x):
- return x in self.contents
- def __iter__(self):
- return iter(self.contents)
- def __len__(self):
- return len([x for x in self.contents])
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MySet(Set):
+ def __init__(self, itr):
+ self.contents = itr
+ def __contains__(self, x):
+ return x in self.contents
+ def __iter__(self):
+ return iter(self.contents)
+ def __len__(self):
+ return len([x for x in self.contents])
s1 = MySet((1,))
s2 = MySet((1, 2))
s3 = MySet((3, 4))
@@ -1486,15 +1536,16 @@ class TestCollectionABCs(ABCTestCase):
self.assertNotEqual(s2, s3)
def test_arithmetic_Set(self):
- class MySet(Set):
- def __init__(self, itr):
- self.contents = itr
- def __contains__(self, x):
- return x in self.contents
- def __iter__(self):
- return iter(self.contents)
- def __len__(self):
- return len([x for x in self.contents])
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MySet(Set):
+ def __init__(self, itr):
+ self.contents = itr
+ def __contains__(self, x):
+ return x in self.contents
+ def __iter__(self):
+ return iter(self.contents)
+ def __len__(self):
+ return len([x for x in self.contents])
s1 = MySet((1, 2, 3))
s2 = MySet((3, 4, 5))
s3 = s1 & s2
@@ -1516,28 +1567,29 @@ class TestCollectionABCs(ABCTestCase):
def test_issue_4920(self):
# MutableSet.pop() method did not work
- class MySet(MutableSet):
- __slots__=['__s']
- def __init__(self,items=None):
- if items is None:
- items=[]
- self.__s=set(items)
- def __contains__(self,v):
- return v in self.__s
- def __iter__(self):
- return iter(self.__s)
- def __len__(self):
- return len(self.__s)
- def add(self,v):
- result=v not in self.__s
- self.__s.add(v)
- return result
- def discard(self,v):
- result=v in self.__s
- self.__s.discard(v)
- return result
- def __repr__(self):
- return "MySet(%s)" % repr(list(self))
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MySet(MutableSet):
+ __slots__=['__s']
+ def __init__(self,items=None):
+ if items is None:
+ items=[]
+ self.__s=set(items)
+ def __contains__(self,v):
+ return v in self.__s
+ def __iter__(self):
+ return iter(self.__s)
+ def __len__(self):
+ return len(self.__s)
+ def add(self,v):
+ result=v not in self.__s
+ self.__s.add(v)
+ return result
+ def discard(self,v):
+ result=v in self.__s
+ self.__s.discard(v)
+ return result
+ def __repr__(self):
+ return "MySet(%s)" % repr(list(self))
items = [5,43,2,1]
s = MySet(items)
r = s.pop()
@@ -1563,24 +1615,25 @@ class TestCollectionABCs(ABCTestCase):
def test_issue16373(self):
# Recursion error comparing comparable and noncomparable
# Set instances
- class MyComparableSet(Set):
- def __contains__(self, x):
- return False
- def __len__(self):
- return 0
- def __iter__(self):
- return iter([])
- class MyNonComparableSet(Set):
- def __contains__(self, x):
- return False
- def __len__(self):
- return 0
- def __iter__(self):
- return iter([])
- def __le__(self, x):
- return NotImplemented
- def __lt__(self, x):
- return NotImplemented
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MyComparableSet(Set):
+ def __contains__(self, x):
+ return False
+ def __len__(self):
+ return 0
+ def __iter__(self):
+ return iter([])
+ class MyNonComparableSet(Set):
+ def __contains__(self, x):
+ return False
+ def __len__(self):
+ return 0
+ def __iter__(self):
+ return iter([])
+ def __le__(self, x):
+ return NotImplemented
+ def __lt__(self, x):
+ return NotImplemented
cs = MyComparableSet()
ncs = MyNonComparableSet()
@@ -1591,13 +1644,14 @@ class TestCollectionABCs(ABCTestCase):
def test_issue26915(self):
# Container membership test should check identity first
- class CustomSequence(Sequence):
- def __init__(self, seq):
- self._seq = seq
- def __getitem__(self, index):
- return self._seq[index]
- def __len__(self):
- return len(self._seq)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class CustomSequence(Sequence):
+ def __init__(self, seq):
+ self._seq = seq
+ def __getitem__(self, index):
+ return self._seq[index]
+ def __len__(self):
+ return len(self._seq)
nan = float('nan')
obj = support.NEVER_EQ
@@ -1622,30 +1676,31 @@ class TestCollectionABCs(ABCTestCase):
def test_Set_from_iterable(self):
"""Verify _from_iterable overridden to an instance method works."""
- class SetUsingInstanceFromIterable(MutableSet):
- def __init__(self, values, created_by):
- if not created_by:
- raise ValueError('created_by must be specified')
- self.created_by = created_by
- self._values = set(values)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class SetUsingInstanceFromIterable(MutableSet):
+ def __init__(self, values, created_by):
+ if not created_by:
+ raise ValueError('created_by must be specified')
+ self.created_by = created_by
+ self._values = set(values)
- def _from_iterable(self, values):
- return type(self)(values, 'from_iterable')
+ def _from_iterable(self, values):
+ return type(self)(values, 'from_iterable')
- def __contains__(self, value):
- return value in self._values
+ def __contains__(self, value):
+ return value in self._values
- def __iter__(self):
- yield from self._values
+ def __iter__(self):
+ yield from self._values
- def __len__(self):
- return len(self._values)
+ def __len__(self):
+ return len(self._values)
- def add(self, value):
- self._values.add(value)
+ def add(self, value):
+ self._values.add(value)
- def discard(self, value):
- self._values.discard(value)
+ def discard(self, value):
+ self._values.discard(value)
impl = SetUsingInstanceFromIterable([1, 2, 3], 'test')
@@ -1678,20 +1733,21 @@ class TestCollectionABCs(ABCTestCase):
def test_Set_interoperability_with_real_sets(self):
# Issue: 8743
- class ListSet(Set):
- def __init__(self, elements=()):
- self.data = []
- for elem in elements:
- if elem not in self.data:
- self.data.append(elem)
- def __contains__(self, elem):
- return elem in self.data
- def __iter__(self):
- return iter(self.data)
- def __len__(self):
- return len(self.data)
- def __repr__(self):
- return 'Set({!r})'.format(self.data)
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class ListSet(Set):
+ def __init__(self, elements=()):
+ self.data = []
+ for elem in elements:
+ if elem not in self.data:
+ self.data.append(elem)
+ def __contains__(self, elem):
+ return elem in self.data
+ def __iter__(self):
+ return iter(self.data)
+ def __len__(self):
+ return len(self.data)
+ def __repr__(self):
+ return 'Set({!r})'.format(self.data)
r1 = set('abc')
r2 = set('bcd')
@@ -1846,13 +1902,14 @@ class TestCollectionABCs(ABCTestCase):
self.assertTrue(issubclass(sample, Mapping))
self.validate_abstract_methods(Mapping, '__contains__', '__iter__', '__len__',
'__getitem__')
- class MyMapping(Mapping):
- def __len__(self):
- return 0
- def __getitem__(self, i):
- raise IndexError
- def __iter__(self):
- return iter(())
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MyMapping(Mapping):
+ def __len__(self):
+ return 0
+ def __getitem__(self, i):
+ raise IndexError
+ def __iter__(self):
+ return iter(())
self.validate_comparison(MyMapping())
self.assertRaises(TypeError, reversed, MyMapping())
@@ -1860,7 +1917,7 @@ class TestCollectionABCs(ABCTestCase):
for sample in [dict]:
self.assertIsInstance(sample(), MutableMapping)
self.assertTrue(issubclass(sample, MutableMapping))
- self.validate_abstract_methods(MutableMapping, '__contains__', '__iter__', '__len__',
+ self.validate_abstract_methods(MutableMapping, '__iter__', '__len__',
'__getitem__', '__setitem__', '__delitem__')
def test_MutableMapping_subclass(self):
@@ -1903,15 +1960,16 @@ class TestCollectionABCs(ABCTestCase):
'__getitem__')
def test_Sequence_mixins(self):
- class SequenceSubclass(Sequence):
- def __init__(self, seq=()):
- self.seq = seq
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class SequenceSubclass(Sequence):
+ def __init__(self, seq=()):
+ self.seq = seq
- def __getitem__(self, index):
- return self.seq[index]
+ def __getitem__(self, index):
+ return self.seq[index]
- def __len__(self):
- return len(self.seq)
+ def __len__(self):
+ return len(self.seq)
# Compare Sequence.index() behavior to (list|str).index() behavior
def assert_index_same(seq1, seq2, index_args):
@@ -1983,24 +2041,25 @@ class TestCollectionABCs(ABCTestCase):
def test_MutableSequence_mixins(self):
# Test the mixins of MutableSequence by creating a minimal concrete
# class inherited from it.
- class MutableSequenceSubclass(MutableSequence):
- def __init__(self):
- self.lst = []
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MutableSequenceSubclass(MutableSequence):
+ def __init__(self):
+ self.lst = []
- def __setitem__(self, index, value):
- self.lst[index] = value
+ def __setitem__(self, index, value):
+ self.lst[index] = value
- def __getitem__(self, index):
- return self.lst[index]
+ def __getitem__(self, index):
+ return self.lst[index]
- def __len__(self):
- return len(self.lst)
+ def __len__(self):
+ return len(self.lst)
- def __delitem__(self, index):
- del self.lst[index]
+ def __delitem__(self, index):
+ del self.lst[index]
- def insert(self, index, value):
- self.lst.insert(index, value)
+ def insert(self, index, value):
+ self.lst.insert(index, value)
mss = MutableSequenceSubclass()
mss.append(0)
@@ -2059,7 +2118,7 @@ class CounterSubclassWithGet(Counter):
@@ -2059,7 +2079,7 @@ class CounterSubclassWithGet(Counter):
self.called = True
return Counter.get(self, key, default)
@ -902,19 +71,7 @@ index cafc44007d1..4571e5a14fd 100644
def test_basics(self):
c = Counter('abcaba')
@@ -2225,8 +2284,9 @@ class TestCounter(unittest.TestCase):
check(Counter(words))
def test_copy_subclass(self):
- class MyCounter(Counter):
- pass
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ class MyCounter(Counter):
+ pass
c = MyCounter('slartibartfast')
d = c.copy()
self.assertEqual(d, c)
@@ -2402,10 +2462,5 @@ class TestCounter(unittest.TestCase):
@@ -2402,10 +2422,5 @@ class TestCounter(unittest.TestCase):
self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab'))

View File

@ -93,10 +93,9 @@ class TestUserObjects(__TestCase):
self._copy_test(obj)
def test_dict_missing(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class A(UserDict):
def __missing__(self, key):
return 456
class A(UserDict):
def __missing__(self, key):
return 456
self.assertEqual(A()[123], 456)
# get() ignores __missing__ on dict
self.assertIs(A().get(123), None)
@ -193,10 +192,9 @@ class TestChainMap(__TestCase):
self.assertTrue(ChainMap({}, {1:2}))
def test_missing(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class DefaultChainMap(ChainMap):
def __missing__(self, key):
return 999
class DefaultChainMap(ChainMap):
def __missing__(self, key):
return 999
d = DefaultChainMap(dict(a=1, b=2), dict(b=20, c=30))
for k, v in dict(a=1, b=2, c=30, d=999).items():
self.assertEqual(d[k], v) # check __getitem__ w/missing
@ -228,14 +226,13 @@ class TestChainMap(__TestCase):
('i', 9999), ('j', 0)])
def test_iter_not_calling_getitem_on_maps(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class DictWithGetItem(UserDict):
def __init__(self, *args, **kwds):
self.called = False
UserDict.__init__(self, *args, **kwds)
def __getitem__(self, item):
self.called = True
UserDict.__getitem__(self, item)
class DictWithGetItem(UserDict):
def __init__(self, *args, **kwds):
self.called = False
UserDict.__init__(self, *args, **kwds)
def __getitem__(self, item):
self.called = True
UserDict.__getitem__(self, item)
d = DictWithGetItem(a=1)
c = ChainMap(d)
@ -260,16 +257,15 @@ class TestChainMap(__TestCase):
self.assertIs(m, d.maps[0])
# Use a different map than a dict
with torch._dynamo.set_fullgraph(fullgraph=False):
class lowerdict(dict):
def __getitem__(self, key):
if isinstance(key, str):
key = key.lower()
return dict.__getitem__(self, key)
def __contains__(self, key):
if isinstance(key, str):
key = key.lower()
return dict.__contains__(self, key)
class lowerdict(dict):
def __getitem__(self, key):
if isinstance(key, str):
key = key.lower()
return dict.__getitem__(self, key)
def __contains__(self, key):
if isinstance(key, str):
key = key.lower()
return dict.__contains__(self, key)
c = ChainMap()
c['a'] = 1
@ -690,9 +686,8 @@ class TestNamedTuple(__TestCase):
NT = namedtuple('NT', ['abc', 'def'], False, True)
def test_namedtuple_subclass_issue_24931(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class Point(namedtuple('_Point', ['x', 'y'])):
pass
class Point(namedtuple('_Point', ['x', 'y'])):
pass
a = Point(3, 4)
self.assertEqual(a._asdict(), OrderedDict([('x', 3), ('y', 4)]))
@ -753,20 +748,15 @@ class ABCTestCase(__TestCase):
methodstubs = dict.fromkeys(names, lambda s, *args: 0)
# everything should work will all required methods are present
with torch._dynamo.set_fullgraph(fullgraph=False):
C = type('C', (abc,), methodstubs)
C = type('C', (abc,), methodstubs)
C()
# Dynamo raises a hard error here that we can't easily capture
# Commenting this part as this would also fail in eager if a user
# attempt to run the same code
# instantiation should fail if a required method is missing
# for name in names:
# stubs = methodstubs.copy()
# del stubs[name]
# C = type('C', (abc,), stubs)
# self.assertRaises(TypeError, C, name)
for name in names:
stubs = methodstubs.copy()
del stubs[name]
C = type('C', (abc,), stubs)
self.assertRaises(TypeError, C, name)
def validate_isinstance(self, abc, name):
stub = lambda s, *args: 0
@ -1011,21 +1001,19 @@ class TestOneTrickPonyABCs(ABCTestCase):
for x in samples:
self.assertIsInstance(x, Iterable)
self.assertTrue(issubclass(type(x), Iterable), repr(type(x)))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check direct subclassing
class I(Iterable):
def __iter__(self):
return super().__iter__()
# Check direct subclassing
class I(Iterable):
def __iter__(self):
return super().__iter__()
self.assertEqual(list(I()), [])
self.assertFalse(issubclass(str, I))
self.validate_abstract_methods(Iterable, '__iter__')
self.validate_isinstance(Iterable, '__iter__')
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check None blocking
class It:
def __iter__(self): return iter([])
class ItBlocked(It):
__iter__ = None
# Check None blocking
class It:
def __iter__(self): return iter([])
class ItBlocked(It):
__iter__ = None
self.assertTrue(issubclass(It, Iterable))
self.assertTrue(isinstance(It(), Iterable))
self.assertFalse(issubclass(ItBlocked, Iterable))
@ -1055,35 +1043,32 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(Sequence, Reversible), repr(Sequence))
self.assertFalse(issubclass(Mapping, Reversible), repr(Mapping))
self.assertFalse(issubclass(MutableMapping, Reversible), repr(MutableMapping))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check direct subclassing
class R(Reversible):
def __iter__(self):
return iter(list())
def __reversed__(self):
return iter(list())
# Check direct subclassing
class R(Reversible):
def __iter__(self):
return iter(list())
def __reversed__(self):
return iter(list())
self.assertEqual(list(reversed(R())), [])
self.assertFalse(issubclass(float, R))
self.validate_abstract_methods(Reversible, '__reversed__', '__iter__')
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check reversible non-iterable (which is not Reversible)
class RevNoIter:
def __reversed__(self): return reversed([])
class RevPlusIter(RevNoIter):
def __iter__(self): return iter([])
# Check reversible non-iterable (which is not Reversible)
class RevNoIter:
def __reversed__(self): return reversed([])
class RevPlusIter(RevNoIter):
def __iter__(self): return iter([])
self.assertFalse(issubclass(RevNoIter, Reversible))
self.assertFalse(isinstance(RevNoIter(), Reversible))
self.assertTrue(issubclass(RevPlusIter, Reversible))
self.assertTrue(isinstance(RevPlusIter(), Reversible))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check None blocking
class Rev:
def __iter__(self): return iter([])
def __reversed__(self): return reversed([])
class RevItBlocked(Rev):
__iter__ = None
class RevRevBlocked(Rev):
__reversed__ = None
# Check None blocking
class Rev:
def __iter__(self): return iter([])
def __reversed__(self): return reversed([])
class RevItBlocked(Rev):
__iter__ = None
class RevRevBlocked(Rev):
__reversed__ = None
self.assertTrue(issubclass(Rev, Reversible))
self.assertTrue(isinstance(Rev(), Reversible))
self.assertFalse(issubclass(RevItBlocked, Reversible))
@ -1117,16 +1102,15 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(Set, Collection), repr(Set))
self.assertTrue(issubclass(MutableSet, Collection), repr(MutableSet))
self.assertTrue(issubclass(Sequence, Collection), repr(MutableSet))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check direct subclassing
class Col(Collection):
def __iter__(self):
return iter(list())
def __len__(self):
return 0
def __contains__(self, item):
return False
class DerCol(Col): pass
# Check direct subclassing
class Col(Collection):
def __iter__(self):
return iter(list())
def __len__(self):
return 0
def __contains__(self, item):
return False
class DerCol(Col): pass
self.assertEqual(list(iter(Col())), [])
self.assertFalse(issubclass(list, Col))
self.assertFalse(issubclass(set, Col))
@ -1138,48 +1122,44 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.validate_abstract_methods(Collection, '__len__', '__iter__',
'__contains__')
# Check sized container non-iterable (which is not Collection) etc.
with torch._dynamo.set_fullgraph(fullgraph=False):
class ColNoIter:
def __len__(self): return 0
def __contains__(self, item): return False
class ColNoSize:
def __iter__(self): return iter([])
def __contains__(self, item): return False
class ColNoCont:
def __iter__(self): return iter([])
def __len__(self): return 0
class ColNoIter:
def __len__(self): return 0
def __contains__(self, item): return False
class ColNoSize:
def __iter__(self): return iter([])
def __contains__(self, item): return False
class ColNoCont:
def __iter__(self): return iter([])
def __len__(self): return 0
self.assertFalse(issubclass(ColNoIter, Collection))
self.assertFalse(isinstance(ColNoIter(), Collection))
self.assertFalse(issubclass(ColNoSize, Collection))
self.assertFalse(isinstance(ColNoSize(), Collection))
self.assertFalse(issubclass(ColNoCont, Collection))
self.assertFalse(isinstance(ColNoCont(), Collection))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check None blocking
class SizeBlock:
def __iter__(self): return iter([])
def __contains__(self): return False
__len__ = None
class IterBlock:
def __len__(self): return 0
def __contains__(self): return True
__iter__ = None
# Check None blocking
class SizeBlock:
def __iter__(self): return iter([])
def __contains__(self): return False
__len__ = None
class IterBlock:
def __len__(self): return 0
def __contains__(self): return True
__iter__ = None
self.assertFalse(issubclass(SizeBlock, Collection))
self.assertFalse(isinstance(SizeBlock(), Collection))
self.assertFalse(issubclass(IterBlock, Collection))
self.assertFalse(isinstance(IterBlock(), Collection))
with torch._dynamo.set_fullgraph(fullgraph=False):
# Check None blocking in subclass
class ColImpl:
def __iter__(self):
return iter(list())
def __len__(self):
return 0
def __contains__(self, item):
return False
class NonCol(ColImpl):
__contains__ = None
# Check None blocking in subclass
class ColImpl:
def __iter__(self):
return iter(list())
def __len__(self):
return 0
def __contains__(self, item):
return False
class NonCol(ColImpl):
__contains__ = None
self.assertFalse(issubclass(NonCol, Collection))
self.assertFalse(isinstance(NonCol(), Collection))
@ -1202,32 +1182,30 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertTrue(issubclass(type(x), Iterator), repr(type(x)))
self.validate_abstract_methods(Iterator, '__next__', '__iter__')
with torch._dynamo.set_fullgraph(fullgraph=False):
# Issue 10565
class NextOnly:
def __next__(self):
yield 1
return
# Issue 10565
class NextOnly:
def __next__(self):
yield 1
return
self.assertNotIsInstance(NextOnly(), Iterator)
def test_Generator(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class NonGen1:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def throw(self, typ, val=None, tb=None): pass
class NonGen1:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def throw(self, typ, val=None, tb=None): pass
class NonGen2:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def send(self, value): return value
class NonGen2:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def send(self, value): return value
class NonGen3:
def close(self): pass
def send(self, value): return value
def throw(self, typ, val=None, tb=None): pass
class NonGen3:
def close(self): pass
def send(self, value): return value
def throw(self, typ, val=None, tb=None): pass
non_samples = [
None, 42, 3.14, 1j, b"", "", (), [], {}, set(),
@ -1236,19 +1214,18 @@ class TestOneTrickPonyABCs(ABCTestCase):
self.assertNotIsInstance(x, Generator)
self.assertFalse(issubclass(type(x), Generator), repr(type(x)))
with torch._dynamo.set_fullgraph(fullgraph=False):
class Gen:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def send(self, value): return value
def throw(self, typ, val=None, tb=None): pass
class Gen:
def __iter__(self): return self
def __next__(self): return None
def close(self): pass
def send(self, value): return value
def throw(self, typ, val=None, tb=None): pass
class MinimalGen(Generator):
def send(self, value):
return value
def throw(self, typ, val=None, tb=None):
super().throw(typ, val, tb)
class MinimalGen(Generator):
def send(self, value):
return value
def throw(self, typ, val=None, tb=None):
super().throw(typ, val, tb)
def gen():
yield 1
@ -1271,17 +1248,15 @@ class TestOneTrickPonyABCs(ABCTestCase):
mgen.throw, ValueError, ValueError("huhu"))
self.assertRaises(StopIteration, mgen.throw, StopIteration())
with torch._dynamo.set_fullgraph(fullgraph=False):
class FailOnClose(Generator):
def send(self, value): return value
def throw(self, *args): raise ValueError
class FailOnClose(Generator):
def send(self, value): return value
def throw(self, *args): raise ValueError
self.assertRaises(ValueError, FailOnClose().close)
with torch._dynamo.set_fullgraph(fullgraph=False):
class IgnoreGeneratorExit(Generator):
def send(self, value): return value
def throw(self, *args): pass
class IgnoreGeneratorExit(Generator):
def send(self, value): return value
def throw(self, *args): pass
self.assertRaises(RuntimeError, IgnoreGeneratorExit().close)
@ -1424,17 +1399,15 @@ class TestOneTrickPonyABCs(ABCTestCase):
def test_direct_subclassing(self):
for B in Hashable, Iterable, Iterator, Reversible, Sized, Container, Callable:
with torch._dynamo.set_fullgraph(fullgraph=False):
class C(B):
pass
class C(B):
pass
self.assertTrue(issubclass(C, B))
self.assertFalse(issubclass(int, C))
def test_registration(self):
for B in Hashable, Iterable, Iterator, Reversible, Sized, Container, Callable:
with torch._dynamo.set_fullgraph(fullgraph=False):
class C:
__hash__ = None # Make sure it isn't hashable by default
class C:
__hash__ = None # Make sure it isn't hashable by default
self.assertFalse(issubclass(C, B), B.__name__)
B.register(C)
self.assertTrue(issubclass(C, B))
@ -1470,14 +1443,13 @@ class TestCollectionABCs(ABCTestCase):
self.assertIsInstance(sample(), Set)
self.assertTrue(issubclass(sample, Set))
self.validate_abstract_methods(Set, '__contains__', '__iter__', '__len__')
with torch._dynamo.set_fullgraph(fullgraph=False):
class MySet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
class MySet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
self.validate_comparison(MySet())
def test_hash_Set(self):
@ -1496,16 +1468,15 @@ class TestCollectionABCs(ABCTestCase):
self.assertTrue(hash(a) == hash(b))
def test_isdisjoint_Set(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
s1 = MySet((1, 2, 3))
s2 = MySet((4, 5, 6))
s3 = MySet((1, 5, 6))
@ -1513,16 +1484,15 @@ class TestCollectionABCs(ABCTestCase):
self.assertFalse(s1.isdisjoint(s3))
def test_equality_Set(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
s1 = MySet((1,))
s2 = MySet((1, 2))
s3 = MySet((3, 4))
@ -1536,16 +1506,15 @@ class TestCollectionABCs(ABCTestCase):
self.assertNotEqual(s2, s3)
def test_arithmetic_Set(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
class MySet(Set):
def __init__(self, itr):
self.contents = itr
def __contains__(self, x):
return x in self.contents
def __iter__(self):
return iter(self.contents)
def __len__(self):
return len([x for x in self.contents])
s1 = MySet((1, 2, 3))
s2 = MySet((3, 4, 5))
s3 = s1 & s2
@ -1567,29 +1536,28 @@ class TestCollectionABCs(ABCTestCase):
def test_issue_4920(self):
# MutableSet.pop() method did not work
with torch._dynamo.set_fullgraph(fullgraph=False):
class MySet(MutableSet):
__slots__=['__s']
def __init__(self,items=None):
if items is None:
items=[]
self.__s=set(items)
def __contains__(self,v):
return v in self.__s
def __iter__(self):
return iter(self.__s)
def __len__(self):
return len(self.__s)
def add(self,v):
result=v not in self.__s
self.__s.add(v)
return result
def discard(self,v):
result=v in self.__s
self.__s.discard(v)
return result
def __repr__(self):
return "MySet(%s)" % repr(list(self))
class MySet(MutableSet):
__slots__=['__s']
def __init__(self,items=None):
if items is None:
items=[]
self.__s=set(items)
def __contains__(self,v):
return v in self.__s
def __iter__(self):
return iter(self.__s)
def __len__(self):
return len(self.__s)
def add(self,v):
result=v not in self.__s
self.__s.add(v)
return result
def discard(self,v):
result=v in self.__s
self.__s.discard(v)
return result
def __repr__(self):
return "MySet(%s)" % repr(list(self))
items = [5,43,2,1]
s = MySet(items)
r = s.pop()
@ -1615,25 +1583,24 @@ class TestCollectionABCs(ABCTestCase):
def test_issue16373(self):
# Recursion error comparing comparable and noncomparable
# Set instances
with torch._dynamo.set_fullgraph(fullgraph=False):
class MyComparableSet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
class MyNonComparableSet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
def __le__(self, x):
return NotImplemented
def __lt__(self, x):
return NotImplemented
class MyComparableSet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
class MyNonComparableSet(Set):
def __contains__(self, x):
return False
def __len__(self):
return 0
def __iter__(self):
return iter([])
def __le__(self, x):
return NotImplemented
def __lt__(self, x):
return NotImplemented
cs = MyComparableSet()
ncs = MyNonComparableSet()
@ -1644,14 +1611,13 @@ class TestCollectionABCs(ABCTestCase):
def test_issue26915(self):
# Container membership test should check identity first
with torch._dynamo.set_fullgraph(fullgraph=False):
class CustomSequence(Sequence):
def __init__(self, seq):
self._seq = seq
def __getitem__(self, index):
return self._seq[index]
def __len__(self):
return len(self._seq)
class CustomSequence(Sequence):
def __init__(self, seq):
self._seq = seq
def __getitem__(self, index):
return self._seq[index]
def __len__(self):
return len(self._seq)
nan = float('nan')
obj = support.NEVER_EQ
@ -1676,31 +1642,30 @@ class TestCollectionABCs(ABCTestCase):
def test_Set_from_iterable(self):
"""Verify _from_iterable overridden to an instance method works."""
with torch._dynamo.set_fullgraph(fullgraph=False):
class SetUsingInstanceFromIterable(MutableSet):
def __init__(self, values, created_by):
if not created_by:
raise ValueError('created_by must be specified')
self.created_by = created_by
self._values = set(values)
class SetUsingInstanceFromIterable(MutableSet):
def __init__(self, values, created_by):
if not created_by:
raise ValueError('created_by must be specified')
self.created_by = created_by
self._values = set(values)
def _from_iterable(self, values):
return type(self)(values, 'from_iterable')
def _from_iterable(self, values):
return type(self)(values, 'from_iterable')
def __contains__(self, value):
return value in self._values
def __contains__(self, value):
return value in self._values
def __iter__(self):
yield from self._values
def __iter__(self):
yield from self._values
def __len__(self):
return len(self._values)
def __len__(self):
return len(self._values)
def add(self, value):
self._values.add(value)
def add(self, value):
self._values.add(value)
def discard(self, value):
self._values.discard(value)
def discard(self, value):
self._values.discard(value)
impl = SetUsingInstanceFromIterable([1, 2, 3], 'test')
@ -1733,21 +1698,20 @@ class TestCollectionABCs(ABCTestCase):
def test_Set_interoperability_with_real_sets(self):
# Issue: 8743
with torch._dynamo.set_fullgraph(fullgraph=False):
class ListSet(Set):
def __init__(self, elements=()):
self.data = []
for elem in elements:
if elem not in self.data:
self.data.append(elem)
def __contains__(self, elem):
return elem in self.data
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
def __repr__(self):
return 'Set({!r})'.format(self.data)
class ListSet(Set):
def __init__(self, elements=()):
self.data = []
for elem in elements:
if elem not in self.data:
self.data.append(elem)
def __contains__(self, elem):
return elem in self.data
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
def __repr__(self):
return 'Set({!r})'.format(self.data)
r1 = set('abc')
r2 = set('bcd')
@ -1902,14 +1866,13 @@ class TestCollectionABCs(ABCTestCase):
self.assertTrue(issubclass(sample, Mapping))
self.validate_abstract_methods(Mapping, '__contains__', '__iter__', '__len__',
'__getitem__')
with torch._dynamo.set_fullgraph(fullgraph=False):
class MyMapping(Mapping):
def __len__(self):
return 0
def __getitem__(self, i):
raise IndexError
def __iter__(self):
return iter(())
class MyMapping(Mapping):
def __len__(self):
return 0
def __getitem__(self, i):
raise IndexError
def __iter__(self):
return iter(())
self.validate_comparison(MyMapping())
self.assertRaises(TypeError, reversed, MyMapping())
@ -1917,7 +1880,7 @@ class TestCollectionABCs(ABCTestCase):
for sample in [dict]:
self.assertIsInstance(sample(), MutableMapping)
self.assertTrue(issubclass(sample, MutableMapping))
self.validate_abstract_methods(MutableMapping, '__iter__', '__len__',
self.validate_abstract_methods(MutableMapping, '__contains__', '__iter__', '__len__',
'__getitem__', '__setitem__', '__delitem__')
def test_MutableMapping_subclass(self):
@ -1960,16 +1923,15 @@ class TestCollectionABCs(ABCTestCase):
'__getitem__')
def test_Sequence_mixins(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class SequenceSubclass(Sequence):
def __init__(self, seq=()):
self.seq = seq
class SequenceSubclass(Sequence):
def __init__(self, seq=()):
self.seq = seq
def __getitem__(self, index):
return self.seq[index]
def __getitem__(self, index):
return self.seq[index]
def __len__(self):
return len(self.seq)
def __len__(self):
return len(self.seq)
# Compare Sequence.index() behavior to (list|str).index() behavior
def assert_index_same(seq1, seq2, index_args):
@ -2041,25 +2003,24 @@ class TestCollectionABCs(ABCTestCase):
def test_MutableSequence_mixins(self):
# Test the mixins of MutableSequence by creating a minimal concrete
# class inherited from it.
with torch._dynamo.set_fullgraph(fullgraph=False):
class MutableSequenceSubclass(MutableSequence):
def __init__(self):
self.lst = []
class MutableSequenceSubclass(MutableSequence):
def __init__(self):
self.lst = []
def __setitem__(self, index, value):
self.lst[index] = value
def __setitem__(self, index, value):
self.lst[index] = value
def __getitem__(self, index):
return self.lst[index]
def __getitem__(self, index):
return self.lst[index]
def __len__(self):
return len(self.lst)
def __len__(self):
return len(self.lst)
def __delitem__(self, index):
del self.lst[index]
def __delitem__(self, index):
del self.lst[index]
def insert(self, index, value):
self.lst.insert(index, value)
def insert(self, index, value):
self.lst.insert(index, value)
mss = MutableSequenceSubclass()
mss.append(0)
@ -2284,9 +2245,8 @@ class TestCounter(__TestCase):
check(Counter(words))
def test_copy_subclass(self):
with torch._dynamo.set_fullgraph(fullgraph=False):
class MyCounter(Counter):
pass
class MyCounter(Counter):
pass
c = MyCounter('slartibartfast')
d = c.copy()
self.assertEqual(d, c)

View File

@ -1,5 +1,5 @@
diff --git a/test/dynamo/cpython/3_13/test_itertools.py b/test/dynamo/cpython/3_13/test_itertools.py
index 7d5ba727389..8d462284884 100644
index 7d5ba727389..d15d83a2184 100644
--- a/test/dynamo/cpython/3_13/test_itertools.py
+++ b/test/dynamo/cpython/3_13/test_itertools.py
@@ -1,3 +1,25 @@
@ -151,7 +151,7 @@ index 7d5ba727389..8d462284884 100644
_, g = next(it)
next(it)
next(it)
@@ -1002,29 +1015,30 @@ class TestBasicOps(unittest.TestCase):
@@ -1002,27 +1015,29 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(list(filter(None, [0,1,0,2,0])), [1,2])
self.assertEqual(list(filter(bool, [0,1,0,2,0])), [1,2])
self.assertEqual(take(4, filter(isEven, count())), [0,2,4,6])
@ -198,24 +198,8 @@ index 7d5ba727389..8d462284884 100644
+ # c = filter(isEven, range(6))
+ # self.pickletest(proto, c)
- @pickle_deprecated
@pickle_deprecated
def test_filterfalse(self):
self.assertEqual(list(filterfalse(isEven, range(6))), [1,3,5])
self.assertEqual(list(filterfalse(None, [0,1,0,2,0])), [0,0,0])
@@ -1034,9 +1048,10 @@ class TestBasicOps(unittest.TestCase):
self.assertRaises(TypeError, filterfalse, lambda x:x)
self.assertRaises(TypeError, filterfalse, lambda x:x, range(6), 7)
self.assertRaises(TypeError, filterfalse, isEven, 3)
- self.assertRaises(TypeError, next, filterfalse(range(6), range(6)))
- for proto in range(pickle.HIGHEST_PROTOCOL + 1):
- self.pickletest(proto, filterfalse(isEven, range(6)))
+ with torch._dynamo.set_fullgraph(fullgraph=False):
+ self.assertRaises(TypeError, next, filterfalse(range(6), range(6)))
+ for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+ self.pickletest(proto, filterfalse(isEven, range(6)))
def test_zip(self):
# XXX This is rather silly now that builtin zip() calls zip()...
@@ -1047,8 +1062,8 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(take(3,zip('abcdef', count())), lzip('abcdef', range(3)))
self.assertEqual(list(zip('abcdef')), lzip('abcdef'))

View File

@ -1039,6 +1039,7 @@ class TestBasicOps(__TestCase):
# c = filter(isEven, range(6))
# self.pickletest(proto, c)
@pickle_deprecated
def test_filterfalse(self):
self.assertEqual(list(filterfalse(isEven, range(6))), [1,3,5])
self.assertEqual(list(filterfalse(None, [0,1,0,2,0])), [0,0,0])
@ -1048,10 +1049,9 @@ class TestBasicOps(__TestCase):
self.assertRaises(TypeError, filterfalse, lambda x:x)
self.assertRaises(TypeError, filterfalse, lambda x:x, range(6), 7)
self.assertRaises(TypeError, filterfalse, isEven, 3)
with torch._dynamo.set_fullgraph(fullgraph=False):
self.assertRaises(TypeError, next, filterfalse(range(6), range(6)))
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
self.pickletest(proto, filterfalse(isEven, range(6)))
self.assertRaises(TypeError, next, filterfalse(range(6), range(6)))
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
self.pickletest(proto, filterfalse(isEven, range(6)))
def test_zip(self):
# XXX This is rather silly now that builtin zip() calls zip()...

View File

@ -1742,83 +1742,6 @@ class GraphModule(torch.nn.Module):
opt_f = torch.compile(f, backend="eager")
opt_f(torch.randn(2, 2))
# Regression test to make sure dynamo won't crash on these kwargs.
def test_sdpa_kernel_ctx_manager_kwargs(self):
backends = [torch.nn.attention.SDPBackend.MATH]
@torch._dynamo.allow_in_graph
def check_backend_state_is_modified():
self.assertEqual(
set(torch.nn.attention._cur_sdpa_kernel_backends()),
set(backends),
)
def f(x):
with torch.nn.attention.sdpa_kernel(backends=backends, set_priority=True):
x = x + 1
check_backend_state_is_modified()
x = x + 1
return x
opt_f = torch.compile(f, backend="eager")
opt_f(torch.randn(2, 2))
# Regression test to make sure dynamo won't graph break on calling functions
# decorated with special context manager.
def test_sdpa_kernel_ctx_manager_as_decorator(self):
SDPA_BACKEND_PRIORITY = [
torch.nn.attention.SDPBackend.MATH,
torch.nn.attention.SDPBackend.EFFICIENT_ATTENTION,
torch.nn.attention.SDPBackend.FLASH_ATTENTION,
]
@torch.nn.attention.sdpa_kernel(
backends=SDPA_BACKEND_PRIORITY, set_priority=True
)
def scaled_dot_product_attention(q, k, v, *args, **kwargs):
return torch.nn.functional.scaled_dot_product_attention(
q, k, v, *args, **kwargs
)
def f(x):
return scaled_dot_product_attention(x, x, x)
opt_f = torch.compile(f, backend="eager", fullgraph=True)
x = torch.rand(16, 16, 64, 256, dtype=torch.float16)
ref = f(x)
res = opt_f(x)
self.assertEqual(ref, res)
# Regression test to make sure the value of set_priority is used correctly.
def test_sdpa_kernel_ctx_manager_set_priority(self):
backends = [torch.nn.attention.SDPBackend.MATH]
default_priority = torch._C._get_sdp_priority_order()
@torch._dynamo.allow_in_graph
def check_backend_priority(changed: bool):
self.assertEqual(
changed,
torch._C._get_sdp_priority_order() != default_priority,
)
def f(x):
with torch.nn.attention.sdpa_kernel(backends=backends, set_priority=True):
x = x + 1
check_backend_priority(changed=True)
x = x + 1
with torch.nn.attention.sdpa_kernel(backends=backends, set_priority=False):
x = x + 1
check_backend_priority(changed=False)
x = x + 1
return x
opt_f = torch.compile(f, backend="eager")
opt_f(torch.randn(2, 2))
def test_torch_profiler_use_after_with_block(self):
counters.clear()

View File

@ -310,12 +310,6 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
itertools.permutations(filter(lambda x: True, [1, 2]))
return a
@make_test
def test_itertools_filterfalse_basic(a, b):
for x in itertools.filterfalse(lambda x: x > 0, [-0.5, 0, 0.5]):
a += x
return a
@make_test
def test_itertools_chain(a, b):
v = a
@ -568,11 +562,6 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
args = [a, b]
return sub(*args)
@make_test
def test_tuple_map(a, b):
t = tuple(map(torch.sin, [a, b]))
return t[0] + t[1]
def test_size_tuple_add(self):
def fn():
size = torch.Size([])
@ -2027,21 +2016,6 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
tmp = mytuple(a, xy=b)
return mytuple(tmp.x, tmp[1], tmp.xy + b)
@make_test
def test_namedtuple_replace(a, b):
mytuple = collections.namedtuple("mytuple", ["x", "y"])
t = mytuple(a, b)
t._replace(x=b)
return t.x + t.y
@make_test
def test_namedtuple_fields(a, b):
mytuple = collections.namedtuple("mytuple", ["x", "y"])
if mytuple._fields == ("x", "y"):
return a + b
else:
return a - b
class MyNamedTuple(NamedTuple):
first: torch.Tensor
second: torch.Tensor

View File

@ -4,16 +4,13 @@ import contextlib
import torch
import torch.fx
from torch._dynamo.graph_deduplication import apply_graph_deduplication
from torch._dynamo.graph_utils import _detect_cycles
from torch._dynamo.output_graph import FakeRootModule
from torch._dynamo.test_case import TestCase
from torch._dynamo.testing import (
AotEagerAndRecordGraphs,
extract_graph_and_tracker,
normalize_gm,
)
from torch.compiler import allow_in_graph
from torch.utils._ordered_set import OrderedSet
@ -1109,104 +1106,6 @@ def forward(self, L_x_ : torch.Tensor, L_y_ : torch.Tensor):
""",
)
def test_tuple_return(self):
@allow_in_graph
def tuple_return(x, y):
return x, y
def inner_fn(x, y):
x0 = x + x + 1
y0 = y + y + 1
return tuple_return(x0, y0)
def fn(x0, x1, x2, y0, y1, y2):
x0 = inner_fn(x0, y0)
x1 = inner_fn(x1, y1)
x2 = inner_fn(x2, y2)
return x0, x1, x2
fn_opt = torch.compile(fn, fullgraph=True)
inps = [torch.rand(10, 10) for _ in range(6)]
result_compiled = fn_opt(*inps)
result_eager = fn(*inps)
self.assertEqual(result_compiled, result_eager)
def test_tuple_inputs(self):
with (
torch._dynamo.config.patch("use_graph_deduplication", False),
torch._dynamo.config.patch("track_nodes_for_deduplication", True),
):
def inner(x, y):
x0, x1 = torch.split(x, 5)
return x0 + x1 + y
def fn(x, y):
o1 = inner(x, y)
o2 = inner(x, y)
o3 = inner(x, y)
o4 = inner(x, y)
return o1.sum() + o2.sum() + o3.sum() + o4.sum()
graph, tracker = extract_graph_and_tracker(
fn, torch.rand(10, 10), torch.rand(5, 10)
)
class MockOutputGraph:
def __init__(self):
self.graph = graph
self.region_tracker = tracker
self.nn_modules = FakeRootModule({})
def install_subgraph(self, name, subgraph):
return ""
splits = [
n
for n in graph.nodes
if n.op == "call_function" and n.target == torch.split
]
for split in splits:
tracker.node_to_duplicates.pop(split)
apply_graph_deduplication(MockOutputGraph())
self.assertExpectedInline(
graph,
"""\
graph():
%_unnamed : [num_users=4] = get_attr[target=]
%l_x_ : torch.Tensor [num_users=4] = placeholder[target=L_x_]
%l_y_ : torch.Tensor [num_users=4] = placeholder[target=L_y_]
%split : [num_users=2] = call_function[target=torch.functional.split](args = (%l_x_, 5), kwargs = {})
%x0 : [num_users=1] = call_function[target=operator.getitem](args = (%split, 0), kwargs = {})
%x1 : [num_users=1] = call_function[target=operator.getitem](args = (%split, 1), kwargs = {})
%split_1 : [num_users=2] = call_function[target=torch.functional.split](args = (%l_x_, 5), kwargs = {})
%x0_1 : [num_users=1] = call_function[target=operator.getitem](args = (%split_1, 0), kwargs = {})
%x1_1 : [num_users=1] = call_function[target=operator.getitem](args = (%split_1, 1), kwargs = {})
%split_2 : [num_users=2] = call_function[target=torch.functional.split](args = (%l_x_, 5), kwargs = {})
%x0_2 : [num_users=1] = call_function[target=operator.getitem](args = (%split_2, 0), kwargs = {})
%x1_2 : [num_users=1] = call_function[target=operator.getitem](args = (%split_2, 1), kwargs = {})
%split_3 : [num_users=2] = call_function[target=torch.functional.split](args = (%l_x_, 5), kwargs = {})
%x0_3 : [num_users=1] = call_function[target=operator.getitem](args = (%split_3, 0), kwargs = {})
%x1_3 : [num_users=1] = call_function[target=operator.getitem](args = (%split_3, 1), kwargs = {})
%invoke_subgraph : [num_users=1] = call_function[target=torch.ops.higher_order.invoke_subgraph](args = (%_unnamed, , %x0, %x1, %l_y_), kwargs = {})
%getitem_8 : [num_users=1] = call_function[target=operator.getitem](args = (%invoke_subgraph, 0), kwargs = {})
%sum_1 : [num_users=1] = call_method[target=sum](args = (%getitem_8,), kwargs = {})
%invoke_subgraph_1 : [num_users=1] = call_function[target=torch.ops.higher_order.invoke_subgraph](args = (%_unnamed, , %x0_1, %x1_1, %l_y_), kwargs = {})
%getitem_9 : [num_users=1] = call_function[target=operator.getitem](args = (%invoke_subgraph_1, 0), kwargs = {})
%sum_2 : [num_users=1] = call_method[target=sum](args = (%getitem_9,), kwargs = {})
%add_8 : [num_users=1] = call_function[target=operator.add](args = (%sum_1, %sum_2), kwargs = {})
%invoke_subgraph_2 : [num_users=1] = call_function[target=torch.ops.higher_order.invoke_subgraph](args = (%_unnamed, , %x0_2, %x1_2, %l_y_), kwargs = {})
%getitem_10 : [num_users=1] = call_function[target=operator.getitem](args = (%invoke_subgraph_2, 0), kwargs = {})
%sum_3 : [num_users=1] = call_method[target=sum](args = (%getitem_10,), kwargs = {})
%add_9 : [num_users=1] = call_function[target=operator.add](args = (%add_8, %sum_3), kwargs = {})
%invoke_subgraph_3 : [num_users=1] = call_function[target=torch.ops.higher_order.invoke_subgraph](args = (%_unnamed, , %x0_3, %x1_3, %l_y_), kwargs = {})
%getitem_11 : [num_users=1] = call_function[target=operator.getitem](args = (%invoke_subgraph_3, 0), kwargs = {})
%sum_4 : [num_users=1] = call_method[target=sum](args = (%getitem_11,), kwargs = {})
%add_10 : [num_users=1] = call_function[target=operator.add](args = (%add_9, %sum_4), kwargs = {})
return (add_10,)""",
)
def test_param_transfer_to_submodule(self):
def inner_fn(x, y):
return x + y + y + x

View File

@ -9,6 +9,28 @@ from torch._dynamo.testing import extract_graph_and_tracker
from torch.utils._pytree import tree_map
def get_nodes_by_name(graph, names):
nodes = []
for node in graph.nodes:
if node.name in names:
nodes.append(node)
return nodes
unique_ind = 0
def track_same_nodes(names, graph, region_tracker):
global unique_ind
unique_ind += 1
# find nodes in graph with names and track them
# as if they were at the same code location
nodes = get_nodes_by_name(graph, names)
for node in nodes:
region_tracker.track_node("x", unique_ind, node)
class GraphRegionTrackerTests(TestCase):
def setUp(self):
self.exit_stack = contextlib.ExitStack()

View File

@ -1205,45 +1205,6 @@ class TagSafetyChecks(RecursiveDictTagTests):
with install_guard_manager_testing_hook(hook):
opt_fn(torch.randn(4, 4))
def test_nn_module_tag_overridden_getattr_safe(self):
class Baz(torch.nn.Module, metaclass=abc.ABCMeta):
def __init__(self):
super().__init__()
self.norm = 2
def __getattr__(self, key):
if key == "a":
return 5
return super().__getattr__(key)
def forward(self, x):
return x + self.a + self.norm
baz = Baz()
def fn(x):
x = x + baz(x)
return x
try:
from .utils import install_guard_manager_testing_hook
except ImportError:
from utils import install_guard_manager_testing_hook
def hook(guard_wrapper, f_locals, builder):
from torch._dynamo.source import LocalSource
baz_source = LocalSource("baz")
# Check tagness of baz
baz_mgr = builder.get_guard_manager_from_source(baz_source)
self.assertTrue(baz_mgr.is_tag_safe())
self.assertTrue(baz_mgr.is_tag_safe_root())
opt_fn = torch.compile(fn, backend="eager", fullgraph=True)
with install_guard_manager_testing_hook(hook):
opt_fn(torch.randn(4, 4))
class RecursiveDictGuardTests(RecursiveDictTagTests):
def test_disabling(self):

View File

@ -261,7 +261,6 @@ class TestGuardSerialization(torch._inductor.test_case.TestCase):
def _test_serialization(self, guard_type, fn, *args, **kwargs):
# kwargs might contain a callable that generates kwargs
torch._dynamo.reset()
kwarg_gen_fn = kwargs.get("_gen_fn", None)
if kwarg_gen_fn is not None:
kwargs = kwarg_gen_fn()
@ -347,7 +346,7 @@ class TestGuardSerialization(torch._inductor.test_case.TestCase):
self._frame_state.f_code,
tracer.output,
guard_filter_fn=guard_filter_fn,
save_guards=True,
guards_serialization_mode="save",
)
guards_state = check_fn_manager.guards_state
self._cached_guards_state = guards_state
@ -358,6 +357,7 @@ class TestGuardSerialization(torch._inductor.test_case.TestCase):
check_fn_manager = CheckFunctionManager(
self._frame_state.f_code,
guards_state.output_graph,
guards_serialization_mode="load",
shape_code_parts=guards_state.shape_code_parts,
runtime_global_scope=self._frame_state.f_globals,
)
@ -1180,6 +1180,7 @@ class TestGuardSerialization(torch._inductor.test_case.TestCase):
check_fn_manager = CheckFunctionManager(
self._cached_f_code,
guards_state.output_graph,
guards_serialization_mode="load",
shape_code_parts=guards_state.shape_code_parts,
)
loaded = check_fn_manager.guard_manager

View File

@ -1705,17 +1705,16 @@ utils_device.CURRENT_DEVICE == None""".split("\n"):
if hasattr(packed, "b"):
b = packed.b + 1
c = packed[2]
d = len(packed._fields)
return a + b + c + d
return a + b + c
v1 = torch.Tensor([1])
v2 = torch.Tensor([2])
v3 = torch.Tensor([3])
cnts = torch._dynamo.testing.CompileCounter()
opt_fn = torch.compile(fn, backend=cnts)
self.assertEqual(opt_fn(MyTuple(v1, v2, v3))[0], 10)
self.assertEqual(opt_fn(MyTuple(v1, v2, v3))[0], 7)
self.assertEqual(cnts.frame_count, 1)
self.assertEqual(cnts.op_count, 4)
self.assertEqual(cnts.op_count, 3)
def test_namedtuple3(self):
def fn(x, packed):
@ -1962,31 +1961,6 @@ utils_device.CURRENT_DEVICE == None""".split("\n"):
self.assertEqual(exp, act)
def test_class_binop(self):
class Foo:
def __init__(self, x):
self.x = x
def __add__(self, other):
return Foo(self.x + other.x)
def fn(a, b):
return a + b
x = torch.randn(2)
a, b = Foo(x), Foo(x + 1)
cnts = torch._dynamo.testing.CompileCounter()
opt_fn = torch.compile(fn, backend=cnts)
self.assertEqual(opt_fn(a, b).x, 2 * x + 1)
self.assertEqual(cnts.frame_count, 1)
self.assertEqual(cnts.op_count, 1)
def fn(a, b):
return a - b
opt_fn = torch.compile(fn, backend=cnts, fullgraph=True)
self.assertRaises(torch._dynamo.exc.Unsupported, opt_fn, a, b)
def test_user_getattr1(self):
class MyConfig(dict):
def __getattr__(self, name):
@ -8572,6 +8546,7 @@ utils_device.CURRENT_DEVICE == None""".split("\n"):
guard_manager = torch._dynamo.guards.CheckFunctionManager(
foo.__code__,
guards_state.output_graph,
guards_serialization_mode="load",
shape_code_parts=guards_state.shape_code_parts,
runtime_global_scope=new_globals,
).guard_manager

View File

@ -16,7 +16,7 @@ from torch._dynamo.package import CompilePackage, DiskDynamoStore, DynamoCache
from torch._dynamo.precompile_context import PrecompileContext
from torch._dynamo.testing import reduce_to_scalar_loss
from torch._functorch import config as functorch_config
from torch._inductor.mock_cache import global_stats, PatchCaches
from torch._inductor.mock_cache import global_stats, PatchCaches, Stats
from torch._inductor.runtime.runtime_utils import cache_dir
from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
@ -452,33 +452,27 @@ def add(x, y):
def fn(x, y):
return x.sin() + y
arg1 = torch.randn(32, 32, device=device)
arg2 = torch.randn(32, 32, device=device)
arg1 = torch.randn(3, 3, device=device)
arg2 = torch.randn(3, 3, device=device)
expected = fn(arg1, arg2).clone()
with PatchCaches():
compiled_fn1 = torch.compile(fn, mode="max-autotune")
result = compiled_fn1(arg1, arg2).clone()
self.assertEqual(expected, result)
self.assertEqual(global_stats.autotune_local.num_get_miss, 1)
self.assertEqual(global_stats.autotune_local, Stats(1, 0, 1))
DynamoCache.clear()
total_frames = torch._dynamo.convert_frame.FRAME_COUNTER
self._save_and_reload(
expected_backends=1, expected_dynamo=1, expected_autotune=1
)
# During save, we check the autotune cache another time, and now it should hit
self.assertEqual(global_stats.autotune_local.num_get_hit, 1)
compiled_fn1 = torch.compile(fn, mode="max-autotune")
with torch.compiler.set_stance("fail_on_recompile"):
result1 = compiled_fn1(arg1, arg2).clone()
self.assertEqual(expected, result1)
self.assertEqual(torch._dynamo.convert_frame.FRAME_COUNTER, total_frames)
# No new hits or misses
# Unfortunately, we don't *actually* know how many puts there will be, because
# it's possible the best autotune config was found by coordesc.
self.assertEqual(global_stats.autotune_local.num_get_hit, 1)
self.assertEqual(global_stats.autotune_local.num_get_miss, 1)
self.assertEqual(global_stats.autotune_local, Stats(2, 1, 1))
@parametrize("device", ("cpu", "cuda", "xpu"))
@torch._dynamo.config.patch(caching_precompile=True)

View File

@ -362,74 +362,6 @@ def run(cnt):
write_load_and_run(path2)
self.assertEqual(cnts.frame_count, 1)
@torch._dynamo.config.patch(
automatic_dynamic_remote_pgo=True, automatic_dynamic_local_pgo=False
)
def test_sticky_pgo_read_write(self):
cnts = CompileCounter()
@torch.compile(backend=cnts, fullgraph=True)
def f(x, y):
return x * 2, y * 3
def t(x, y):
return torch.randn(x, y)
with mock_cache.PatchCaches():
# we pretend to disable the default remote cache, by keying different job ids per run
with torch.compiler.config.patch(job_id="a"):
f(t(2, 2), t(2, 2))
f(t(2, 4), t(2, 2))
self.assertEqual(cnts.frame_count, 2)
# first test we're not reading from local/default remote cache;
# we should recompile when x wobbles
self.reset()
cnts.clear()
with torch.compiler.config.patch(
job_id="b", pgo_extra_write_key="sticky_0"
):
f(t(2, 2), t(2, 2))
f(t(2, 4), t(2, 2))
self.assertEqual(cnts.frame_count, 2)
# now with the extra sticky_0 key, we start with dynamic x;
# no recompiles
self.reset()
cnts.clear()
with torch.compiler.config.patch(job_id="c", pgo_extra_read_key="sticky_0"):
f(t(2, 2), t(2, 2))
f(t(2, 4), t(2, 2))
self.assertEqual(cnts.frame_count, 1)
# last test: wobble y and write to sticky_1 key
self.reset()
cnts.clear()
with torch.compiler.config.patch(
job_id="d", pgo_extra_write_key="sticky_1"
):
f(t(2, 2), t(2, 2))
f(t(2, 2), t(2, 4))
f(t(2, 2), t(4, 4))
self.assertEqual(cnts.frame_count, 3)
# start using default remote PGO, create run that wobbles y
self.reset()
cnts.clear()
f(t(2, 2), t(2, 2))
f(t(2, 4), t(2, 2))
f(t(4, 2), t(2, 2))
# with default remote (dynamic x) + extra remote (dynamic y),
# we should be able to wobble x & y with no recompiles.
self.reset()
cnts.clear()
with torch.compiler.config.patch(pgo_extra_read_key="sticky_1"):
f(t(2, 2), t(2, 2))
f(t(2, 4), t(4, 2))
f(t(4, 2), t(2, 4))
self.assertEqual(cnts.frame_count, 1)
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests

View File

@ -66,7 +66,6 @@ from torch.testing._internal.common_utils import (
parametrize,
serialTest,
skipIfHpu,
skipIfRocm,
skipIfWindows,
TEST_WITH_ROCM,
)
@ -7406,7 +7405,6 @@ class ReproTestsDevice(torch._dynamo.test_case.TestCase):
out = f_compiled(x, s0, s1, s2)
self.assertEqual(out_ref, out)
@skipIfRocm
@unittest.skipIf(not PLATFORM_SUPPORTS_FP8, "requires gpu with fp8 support")
@requires_cuda
def test_partitioner_saves_weights_for_bw(self):

View File

@ -28,6 +28,7 @@ from torch.testing._internal.triton_utils import requires_cuda_and_triton
if torch.distributed.is_available():
from torch.testing._internal.distributed.fake_pg import FakeStore
HAS_TLPARSE = shutil.which("tlparse") is not None
requires_tlparse = unittest.skipUnless(HAS_TLPARSE, "requires tlparse")
requires_distributed = functools.partial(
@ -1197,13 +1198,13 @@ def forward(self, x_1: "f32[2][1]cpu"):
@contextmanager
def _setup_runtime_estimates_capture(self):
"""Helper to turn on and capture the combined 'inductor_runtime_and_tensor_meta' structured trace."""
"""Helper to turn on and capture the 'inductor_tlparse_runtime' structured trace."""
payload_buffer = io.StringIO()
payload_handler = logging.StreamHandler(payload_buffer)
payload_handler.setLevel(logging.DEBUG)
payload_handler.setFormatter(StructuredTracePayloadFormatter())
payload_handler.addFilter(
StructuredTraceTestingFilter("inductor_runtime_and_tensor_meta")
StructuredTraceTestingFilter("inductor_tlparse_runtime")
)
trace_log.addHandler(payload_handler)
try:
@ -1244,10 +1245,8 @@ def forward(self, x_1: "f32[2][1]cpu"):
compiled = torch.compile(mod, backend="inductor")
compiled(torch.randn(4, 4, device="cuda"))
# Verify runtime + tensor meta artifact was logged
self.assertIn(
'"inductor_runtime_and_tensor_meta"', self.buffer.getvalue()
)
# Verify runtime estimates artifact was logged
self.assertIn('"inductor_tlparse_runtime"', self.buffer.getvalue())
payload_content = payload_buffer.getvalue().strip()
if payload_content:
@ -1311,10 +1310,8 @@ def forward(self, x_1: "f32[2][1]cpu"):
compiled = torch.compile(mod, backend="inductor")
compiled(torch.randn(4, 4, device="cuda"))
# Verify artifact was logged
self.assertIn(
'"inductor_runtime_and_tensor_meta"', self.buffer.getvalue()
)
# Verify runtime estimates artifact was logged
self.assertIn('"inductor_tlparse_runtime"', self.buffer.getvalue())
payload_content = payload_buffer.getvalue().strip()
if payload_content:
@ -1336,145 +1333,6 @@ def forward(self, x_1: "f32[2][1]cpu"):
finally:
dist.destroy_process_group()
@requires_tlparse
@requires_distributed()
@requires_cuda_and_triton
@torch._inductor.config.patch("fx_graph_cache", False)
@torch._inductor.config.patch("log_tlparse", True)
def test_tensor_metadata_logging_multiple_ops(self):
import torch.distributed as dist
store = FakeStore()
dist.init_process_group(backend="fake", rank=0, world_size=2, store=store)
class Mixed(torch.nn.Module):
def __init__(self):
super().__init__()
self.linear = torch.nn.Linear(4, 4)
def forward(self, x):
y = torch.relu(self.linear(x))
y = torch.ops._c10d_functional.all_reduce.default(y, "sum", "0")
y = torch.ops._c10d_functional.wait_tensor.default(y)
return y + 1
try:
with self._setup_runtime_estimates_capture() as payload_buffer:
torch._dynamo.reset()
mod = Mixed().cuda()
compiled = torch.compile(mod, backend="inductor")
compiled(torch.randn(4, 4, device="cuda"))
payload = payload_buffer.getvalue().strip()
if payload:
data = json.loads(payload)
types = sorted({op.get("type") for op in data.get("ops", [])})
self.assertExpectedInline(
str(types), """['collective', 'compute']"""
)
self.assertParses()
finally:
dist.destroy_process_group()
@requires_tlparse
@torch._inductor.config.patch("log_tlparse", True)
def test_tensor_metadata_logging(self):
"""Emit unified runtime+tensor-metadata artifact and assert a stable simplified JSON inline."""
with self._setup_runtime_estimates_capture() as payload_buffer:
def f(x):
y = x.transpose(0, 1)
z = y.mean(dim=0)
w = z.to(torch.float16)
return w
compiled = torch.compile(f, backend="inductor", fullgraph=True)
compiled(torch.ones(2, 3))
# Verify artifact was logged
self.assertIn('"inductor_runtime_and_tensor_meta"', self.buffer.getvalue())
payload = payload_buffer.getvalue().strip()
if payload:
data = json.loads(payload)
ops = data.get("ops", [])
simplified_ops = []
for op in ops:
outs = [
{
"shape": out.get("shape", []),
"stride": out.get("stride", []),
"dtype": out.get("dtype", None),
}
for out in op.get("outputs", [])
]
if outs:
simplified_ops.append(
{
"type": op.get("type", ""),
"outputs": outs,
}
)
self.assertExpectedInline(
{"ops": simplified_ops[-1:]} if simplified_ops else {"ops": []},
"""{'ops': [{'type': 'compute', 'outputs': [{'shape': [2], 'stride': [1], 'dtype': 'float16'}]}]}""",
)
self.assertParses()
@requires_tlparse
@torch._inductor.config.patch("log_tlparse", True)
def test_tensor_metadata_logging_dynamic_shapes(self):
"""Same as test_tensor_metadata_logging, but with dynamic shapes enabled to cover to_size_hints."""
with self._setup_runtime_estimates_capture() as payload_buffer:
def f(x):
y = x.transpose(0, 1)
z = y.mean(dim=0)
w = z.to(torch.float16)
return w
compiled = torch.compile(f, backend="inductor", dynamic=True)
compiled(torch.ones(2, 3))
# Verify artifact was logged
self.assertIn('"inductor_runtime_and_tensor_meta"', self.buffer.getvalue())
payload = payload_buffer.getvalue().strip()
if payload:
data = json.loads(payload)
ops = data.get("ops", [])
simplified_ops = []
for op in ops:
outs = [
{
"shape": out.get("shape", []),
"stride": out.get("stride", []),
"dtype": out.get("dtype", None),
}
for out in op.get("outputs", [])
]
if outs:
simplified_ops.append(
{
"type": op.get("type", ""),
"outputs": outs,
}
)
self.assertExpectedInline(
{"ops": simplified_ops[-1:]} if simplified_ops else {"ops": []},
(
"{'ops': [{'type': 'compute', 'outputs': ["
"{'shape': [2], 'stride': [1], 'dtype': 'float32'}, "
"{'shape': [2], 'stride': [1], 'dtype': 'float16'}]}]}"
),
)
self.assertParses()
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests

Some files were not shown because too many files have changed in this diff Show More