mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-02 14:34:54 +08:00
Compare commits
70 Commits
v2.7.0-rc1
...
v2.7.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 1341794745 | |||
| 073912749d | |||
| 0c236f3c72 | |||
| c7ff78dfc0 | |||
| 894909a613 | |||
| ef2b1390ed | |||
| 3f236f1903 | |||
| 35f1e76212 | |||
| a6321d6227 | |||
| 1cc51c640a | |||
| 28ca4dd77d | |||
| 06c6a81a98 | |||
| 3b61d5d4e3 | |||
| 8b6bc59e95 | |||
| c2ccaa3c21 | |||
| 6569576c4e | |||
| 5416dff2b2 | |||
| 791265114e | |||
| 7ad8bc7e8b | |||
| f2ee3f4847 | |||
| dfd39fe14f | |||
| b766c0200a | |||
| a3cd7b0cc4 | |||
| 8522972133 | |||
| c4b98c8364 | |||
| d10ffd76db | |||
| 53a13e553d | |||
| 5745d6a770 | |||
| 60ddcd803e | |||
| f2b3b5c453 | |||
| 71fa7def26 | |||
| 1a6c192dc4 | |||
| e691e92297 | |||
| 2b73f403c7 | |||
| 697cd9bbb1 | |||
| 64ca70f83c | |||
| 1b84fd1503 | |||
| 6b27e11a5b | |||
| 18a926f547 | |||
| ecd434bea9 | |||
| 5bed3fafc7 | |||
| 9b4f085526 | |||
| d29e4c81d9 | |||
| 8d2186cd79 | |||
| b04d8358d9 | |||
| d80afc07f0 | |||
| 84210a82ef | |||
| 4268b2f40a | |||
| 12a6d2a0b8 | |||
| 464432ec47 | |||
| 1f612dafb5 | |||
| f63def6ac7 | |||
| 3a8e623a9b | |||
| bf727425a0 | |||
| 8c7dbc939f | |||
| 644fdbad95 | |||
| fb027c5692 | |||
| 3b87bd8b82 | |||
| 89b098a677 | |||
| 4cc4302b32 | |||
| c632e4fdb8 | |||
| b23bfae9f7 | |||
| 1b8f496f87 | |||
| c236b602ff | |||
| 6926f30654 | |||
| 483980d7f3 | |||
| 7173a73cf4 | |||
| 7bab7354df | |||
| b1940b5867 | |||
| abebbd5113 |
@ -20,7 +20,7 @@ cd /
|
||||
# on the mounted pytorch repo
|
||||
git config --global --add safe.directory /pytorch
|
||||
pip install -r /pytorch/requirements.txt
|
||||
pip install auditwheel
|
||||
pip install auditwheel==6.2.0
|
||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
|
||||
@ -136,6 +136,9 @@ def complete_wheel(folder: str) -> str:
|
||||
"""
|
||||
wheel_name = list_dir(f"/{folder}/dist")[0]
|
||||
|
||||
# Please note for cuda we don't run auditwheel since we use custom script to package
|
||||
# the cuda dependencies to the wheel file using update_wheel() method.
|
||||
# However we need to make sure filename reflects the correct Manylinux platform.
|
||||
if "pytorch" in folder and not enable_cuda:
|
||||
print("Repairing Wheel with AuditWheel")
|
||||
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
|
||||
@ -147,7 +150,14 @@ def complete_wheel(folder: str) -> str:
|
||||
f"/{folder}/dist/{repaired_wheel_name}",
|
||||
)
|
||||
else:
|
||||
repaired_wheel_name = wheel_name
|
||||
repaired_wheel_name = wheel_name.replace(
|
||||
"linux_aarch64", "manylinux_2_28_aarch64"
|
||||
)
|
||||
print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
|
||||
os.rename(
|
||||
f"/{folder}/dist/{wheel_name}",
|
||||
f"/{folder}/dist/{repaired_wheel_name}",
|
||||
)
|
||||
|
||||
print(f"Copying {repaired_wheel_name} to artifacts")
|
||||
shutil.copy2(
|
||||
|
||||
@ -1 +1 @@
|
||||
5e4d6b6380d575e48e37e9d987fded4ec588e7bc
|
||||
01a22b6f16d117454b7d21ebdc691b0785b84a7f
|
||||
|
||||
@ -1 +1 @@
|
||||
v2.25.1-1
|
||||
v2.26.2-1
|
||||
|
||||
@ -1 +1 @@
|
||||
83111ab22be6e4a588d184ac45175986a7dde9fc
|
||||
0bcc8265e677e5321606a3311bf71470f14456a8
|
||||
|
||||
@ -37,7 +37,7 @@ install_ubuntu() {
|
||||
if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
|
||||
maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
|
||||
elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
|
||||
maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
|
||||
maybe_libnccl_dev="libnccl2=2.26.2-1+cuda12.4 libnccl-dev=2.26.2-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
|
||||
else
|
||||
maybe_libnccl_dev=""
|
||||
fi
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.25.1-1
|
||||
NCCL_VERSION=v2.26.2-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_040 {
|
||||
|
||||
@ -3,19 +3,8 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_062 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
NCCL_VERSION=v2.26.2-1
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
|
||||
function install_cusparselt_063 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
@ -28,140 +17,7 @@ function install_cusparselt_063 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_124 {
|
||||
echo "Pruning CUDA 12.4"
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_126 {
|
||||
echo "Pruning CUDA 12.6"
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.7.1.26
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
@ -198,10 +54,6 @@ function install_128 {
|
||||
while test $# -gt 0
|
||||
do
|
||||
case "$1" in
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
12.8) install_128;
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
|
||||
@ -53,7 +53,7 @@ setup_executorch() {
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
as_jenkins .ci/scripts/setup-linux.sh cmake || true
|
||||
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
|
||||
popd
|
||||
}
|
||||
|
||||
|
||||
@ -47,6 +47,9 @@ function install_ubuntu() {
|
||||
# Development Packages
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||
# Install Intel Support Packages
|
||||
if [[ "$XPU_VERSION" == "2025.0" ]]; then
|
||||
XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl=2025.0.1-6"
|
||||
fi
|
||||
apt-get install -y ${XPU_PACKAGES}
|
||||
|
||||
# Cleanup
|
||||
@ -82,6 +85,9 @@ gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.
|
||||
EOF
|
||||
|
||||
# Install Intel Support Packages
|
||||
if [[ "$XPU_VERSION" == "2025.0" ]]; then
|
||||
XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl-2025.0.1-6"
|
||||
fi
|
||||
yum install -y ${XPU_PACKAGES}
|
||||
# The xpu-smi packages
|
||||
dnf install -y xpu-smi
|
||||
|
||||
@ -1173,8 +1173,9 @@ build_xla() {
|
||||
apply_patches
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
# These functions are defined in .circleci/common.sh in pytorch/xla repo
|
||||
retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
|
||||
retry install_pre_deps_pytorch_xla $XLA_DIR $USE_CACHE
|
||||
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
|
||||
retry install_post_deps_pytorch_xla
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -1481,7 +1482,7 @@ test_executorch() {
|
||||
bash examples/models/llama3_2_vision/install_requirements.sh
|
||||
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
|
||||
# from the PR
|
||||
bash .ci/scripts/setup-linux.sh cmake
|
||||
bash .ci/scripts/setup-linux.sh --build-tool cmake
|
||||
|
||||
echo "Run ExecuTorch unit tests"
|
||||
pytest -v -n auto
|
||||
|
||||
@ -74,6 +74,12 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
||||
|
||||
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
|
||||
# CUDA 12.8 builds have triton for Linux and Linux aarch64 binaries.
|
||||
if [[ "$DESIRED_CUDA" == cu128 ]]; then
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||
fi
|
||||
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
||||
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
|
||||
21
.github/actions/checkout-pytorch/action.yml
vendored
21
.github/actions/checkout-pytorch/action.yml
vendored
@ -23,21 +23,28 @@ runs:
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Set up parallel fetch and clean workspace
|
||||
- name: Clean workspace
|
||||
shell: bash
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
echo "${GITHUB_WORKSPACE}"
|
||||
if [ -z "${NO_SUDO}" ]; then
|
||||
retry sudo rm -rf "${GITHUB_WORKSPACE}"
|
||||
else
|
||||
retry rm -rf "${GITHUB_WORKSPACE}"
|
||||
fi
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
|
||||
# Use all available CPUs for fetching
|
||||
cd "${GITHUB_WORKSPACE}"
|
||||
git config --global fetch.parallel 0
|
||||
git config --global submodule.fetchJobs 0
|
||||
|
||||
# Clean workspace. The default checkout action should also do this, but
|
||||
# do it here as well just in case
|
||||
if [[ -d .git ]]; then
|
||||
git clean -ffdx
|
||||
fi
|
||||
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
@ -17,6 +17,7 @@ from typing import Optional
|
||||
|
||||
# NOTE: Also update the CUDA sources in tools/nightly.py when changing this list
|
||||
CUDA_ARCHES = ["11.8", "12.6", "12.8"]
|
||||
CUDA_STABLE = "12.6"
|
||||
CUDA_ARCHES_FULL_VERSION = {
|
||||
"11.8": "11.8.0",
|
||||
"12.6": "12.6.3",
|
||||
@ -67,7 +68,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
@ -83,7 +84,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
@ -373,7 +374,7 @@ def generate_wheels_matrix(
|
||||
}
|
||||
)
|
||||
# Special build building to use on Colab. Python 3.11 for 12.6 CUDA
|
||||
if python_version == "3.11" and arch_version == "12.6":
|
||||
if python_version == "3.11" and arch_version == CUDA_STABLE:
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
@ -416,7 +417,7 @@ def generate_wheels_matrix(
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
|
||||
if gpu_arch_type == "xpu"
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.6"]
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
|
||||
if os != "linux"
|
||||
else ""
|
||||
),
|
||||
|
||||
30
.github/scripts/get_ci_variable.py
vendored
Executable file
30
.github/scripts/get_ci_variable.py
vendored
Executable file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Helper script - Return CI variables such as stable cuda, min python version, etc."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main(args: list[str]) -> None:
|
||||
import generate_binary_build_matrix
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--cuda-stable-version",
|
||||
action="store_true",
|
||||
help="get cuda stable version",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-python-version",
|
||||
action="store_true",
|
||||
help="get min supported python version",
|
||||
)
|
||||
options = parser.parse_args(args)
|
||||
if options.cuda_stable_version:
|
||||
return print(generate_binary_build_matrix.CUDA_STABLE)
|
||||
if options.min_python_version:
|
||||
return print(generate_binary_build_matrix.FULL_PYTHON_VERSIONS[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
3
.github/scripts/windows/build_triton.bat
vendored
3
.github/scripts/windows/build_triton.bat
vendored
@ -9,7 +9,8 @@ if "%PY_VERS%" == "3.13t" (
|
||||
) else (
|
||||
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python=%PY_VERS%
|
||||
)
|
||||
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake setuptools==72.1.0 ninja
|
||||
:: Fix cmake version for issue https://github.com/pytorch/pytorch/issues/150480
|
||||
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake==3.31.6 setuptools==72.1.0 ninja
|
||||
|
||||
dir "%VC_INSTALL_PATH%"
|
||||
|
||||
|
||||
2
.github/workflows/build-manywheel-images.yml
vendored
2
.github/workflows/build-manywheel-images.yml
vendored
@ -93,7 +93,7 @@ jobs:
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
|
||||
strategy:
|
||||
matrix:
|
||||
cuda_version: ["12.8", "12.6"]
|
||||
cuda_version: ["12.8"]
|
||||
env:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
|
||||
|
||||
7
.github/workflows/build-triton-wheel.yml
vendored
7
.github/workflows/build-triton-wheel.yml
vendored
@ -12,6 +12,8 @@ on:
|
||||
- .github/workflows/build-triton-wheel.yml
|
||||
- .github/scripts/build_triton_wheel.py
|
||||
- .github/ci_commit_pins/triton.txt
|
||||
- .github/scripts/windows/install_vs2022.ps1
|
||||
- .github/scripts/windows/build_triton.bat
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
- .ci/docker/ci_commit_pins/triton-xpu.txt
|
||||
pull_request:
|
||||
@ -19,6 +21,8 @@ on:
|
||||
- .github/workflows/build-triton-wheel.yml
|
||||
- .github/scripts/build_triton_wheel.py
|
||||
- .github/ci_commit_pins/triton.txt
|
||||
- .github/scripts/windows/install_vs2022.ps1
|
||||
- .github/scripts/windows/build_triton.bat
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
- .ci/docker/ci_commit_pins/triton-xpu.txt
|
||||
|
||||
@ -133,7 +137,7 @@ jobs:
|
||||
fi
|
||||
|
||||
docker exec -t "${container_name}" yum install -y zlib-devel zip
|
||||
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U setuptools==67.4.0 pybind11==2.13.1 auditwheel wheel
|
||||
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U setuptools==78.1.0 pybind11==2.13.1 auditwheel wheel
|
||||
|
||||
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "rocm" || "${{ matrix.device }}" == "aarch64" ) ]]; then
|
||||
# With this install, it gets clang 16.0.6.
|
||||
@ -243,7 +247,6 @@ jobs:
|
||||
.github/scripts/windows/build_triton.bat
|
||||
mkdir -p "${RUNNER_TEMP}/artifacts/"
|
||||
mv ./*.whl "${RUNNER_TEMP}/artifacts/"
|
||||
|
||||
- uses: actions/upload-artifact@v4.4.0
|
||||
with:
|
||||
name: pytorch-triton-wheel-${{ matrix.py_vers }}-${{ matrix.device }}
|
||||
|
||||
7
.github/workflows/docker-release.yml
vendored
7
.github/workflows/docker-release.yml
vendored
@ -117,7 +117,10 @@ jobs:
|
||||
# To get QEMU binaries in our PATH
|
||||
echo "${RUNNER_TEMP}/bin" >> "${GITHUB_PATH}"
|
||||
# Generate PyTorch version to use
|
||||
echo "PYTORCH_VERSION=$(python3 .github/scripts/generate_pytorch_version.py --no-build-suffix)" >> "${GITHUB_ENV}"
|
||||
{
|
||||
echo "PYTORCH_VERSION=$(python3 .github/scripts/generate_pytorch_version.py --no-build-suffix)";
|
||||
echo "STABLE_CUDA_VERSION=$(python3 .github/scripts/get_ci_variable.py --stable-cuda-version)"
|
||||
} >> "${GITHUB_ENV}"
|
||||
- name: Setup test specific variables
|
||||
if: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
|
||||
run: |
|
||||
@ -154,7 +157,7 @@ jobs:
|
||||
docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}"
|
||||
|
||||
# Please note, here we ned to pin specific verison of CUDA as with latest label
|
||||
if [[ ${CUDA_VERSION_SHORT} == "12.4" ]]; then
|
||||
if [[ ${CUDA_VERSION_SHORT} == "${STABLE_CUDA_VERSION}" ]]; then
|
||||
docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}${CUDA_SUFFIX}" \
|
||||
ghcr.io/pytorch/pytorch-nightly:latest
|
||||
docker push ghcr.io/pytorch/pytorch-nightly:latest
|
||||
|
||||
24
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
24
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -64,7 +64,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-aarch64-test: # Testing
|
||||
@ -134,7 +134,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -181,7 +181,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-aarch64-test: # Testing
|
||||
@ -251,7 +251,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -298,7 +298,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-aarch64-test: # Testing
|
||||
@ -368,7 +368,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -415,7 +415,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-aarch64-test: # Testing
|
||||
@ -485,7 +485,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -532,7 +532,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cpu-aarch64-test: # Testing
|
||||
@ -602,7 +602,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -649,7 +649,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-aarch64-test: # Testing
|
||||
@ -719,7 +719,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
4
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
4
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -105,7 +105,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_6-test: # Testing
|
||||
@ -152,7 +152,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_8-test: # Testing
|
||||
|
||||
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -262,7 +262,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_6-test: # Testing
|
||||
@ -331,7 +331,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_8-test: # Testing
|
||||
@ -888,7 +888,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_6-test: # Testing
|
||||
@ -957,7 +957,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_8-test: # Testing
|
||||
@ -1514,7 +1514,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-test: # Testing
|
||||
@ -1648,7 +1648,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-test: # Testing
|
||||
@ -2205,7 +2205,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_6-test: # Testing
|
||||
@ -2274,7 +2274,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
@ -2831,7 +2831,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_6-test: # Testing
|
||||
@ -2900,7 +2900,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_8-test: # Testing
|
||||
@ -3457,7 +3457,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_6-test: # Testing
|
||||
@ -3526,7 +3526,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_8-test: # Testing
|
||||
|
||||
10
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
10
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
@ -63,7 +63,7 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_9-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-s390x-test: # Testing
|
||||
@ -128,7 +128,7 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_10-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-s390x-test: # Testing
|
||||
@ -193,7 +193,7 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_11-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-s390x-test: # Testing
|
||||
@ -258,7 +258,7 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_12-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-s390x-test: # Testing
|
||||
@ -323,7 +323,7 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_13-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cpu-s390x-test: # Testing
|
||||
|
||||
12
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
12
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
@ -43,7 +43,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -166,7 +166,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -289,7 +289,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -412,7 +412,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -535,7 +535,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -658,7 +658,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
|
||||
2
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
2
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
@ -54,7 +54,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
|
||||
48
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
48
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
@ -54,7 +54,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -288,7 +288,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -524,7 +524,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -760,7 +760,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1228,7 +1228,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1462,7 +1462,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1698,7 +1698,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1934,7 +1934,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2402,7 +2402,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2636,7 +2636,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2872,7 +2872,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3108,7 +3108,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3576,7 +3576,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3810,7 +3810,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4046,7 +4046,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4282,7 +4282,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4750,7 +4750,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4984,7 +4984,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5220,7 +5220,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5456,7 +5456,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5924,7 +5924,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6158,7 +6158,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6394,7 +6394,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6630,7 +6630,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
||||
1
.github/workflows/inductor-rocm-mi300.yml
vendored
1
.github/workflows/inductor-rocm-mi300.yml
vendored
@ -4,6 +4,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release/*
|
||||
tags:
|
||||
- ciflow/inductor-rocm/*
|
||||
workflow_dispatch:
|
||||
|
||||
15
.github/workflows/lint.yml
vendored
15
.github/workflows/lint.yml
vendored
@ -254,21 +254,28 @@ jobs:
|
||||
with:
|
||||
submodules: false
|
||||
fetch-depth: 1
|
||||
- name: Setup Python 3.6
|
||||
- name: Get min python version
|
||||
id: get-min-python-version
|
||||
if: matrix.test_type == 'older_python_version'
|
||||
run: |
|
||||
set -eou pipefail
|
||||
# Generate PyTorch version to use
|
||||
echo "MIN_PYTHON_VERSION=$(python3 .github/scripts/get_ci_variable.py --min-python-version)" >> "${GITHUB_OUTPUT}"
|
||||
- name: Setup Old Python version
|
||||
if: matrix.test_type == 'older_python_version'
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.6'
|
||||
python-version: 3.6
|
||||
architecture: x64
|
||||
check-latest: false
|
||||
cache: pip
|
||||
cache-dependency-path: |
|
||||
**/requirements.txt
|
||||
- name: Setup Python 3.9
|
||||
- name: Setup Min Python version
|
||||
if: matrix.test_type != 'older_python_version'
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
python-version: ${{ steps.get-min-python-version.outputs.MIN_PYTHON_VERSION }}
|
||||
architecture: x64
|
||||
check-latest: false
|
||||
cache: pip
|
||||
|
||||
5
.github/workflows/periodic.yml
vendored
5
.github/workflows/periodic.yml
vendored
@ -59,8 +59,9 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
|
||||
1
.github/workflows/rocm-mi300.yml
vendored
1
.github/workflows/rocm-mi300.yml
vendored
@ -4,6 +4,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release/*
|
||||
tags:
|
||||
- ciflow/rocm-mi300/*
|
||||
workflow_dispatch:
|
||||
|
||||
10
README.md
10
README.md
@ -362,6 +362,16 @@ Please make sure [the common prerequisites](#prerequisites) as well as [the prer
|
||||
Then PyTorch can be built with the command:
|
||||
|
||||
```cmd
|
||||
:: CMD Commands:
|
||||
:: Set the CMAKE_PREFIX_PATH to help find corresponding packages
|
||||
:: %CONDA_PREFIX% only works after `conda activate custom_env`
|
||||
|
||||
if defined CMAKE_PREFIX_PATH (
|
||||
set "CMAKE_PREFIX_PATH=%CONDA_PREFIX%\Library;%CMAKE_PREFIX_PATH%"
|
||||
) else (
|
||||
set "CMAKE_PREFIX_PATH=%CONDA_PREFIX%\Library"
|
||||
)
|
||||
|
||||
python setup.py develop
|
||||
```
|
||||
|
||||
|
||||
@ -7,10 +7,12 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
enum class BlasBackend : int8_t { Cublas, Cublaslt, Ck };
|
||||
enum class BlasBackend : int8_t { Default, Cublas, Cublaslt, Ck };
|
||||
|
||||
inline std::string BlasBackendToString(at::BlasBackend backend) {
|
||||
switch (backend) {
|
||||
case BlasBackend::Default:
|
||||
return "at::BlasBackend::Default";
|
||||
case BlasBackend::Cublas:
|
||||
return "at::BlasBackend::Cublas";
|
||||
case BlasBackend::Cublaslt:
|
||||
|
||||
@ -326,7 +326,34 @@ void Context::setLinalgPreferredBackend(at::LinalgBackend b) {
|
||||
}
|
||||
|
||||
at::BlasBackend Context::blasPreferredBackend() {
|
||||
// Rather than put logic for interpreting what Default means at every
|
||||
// call site for blasPreferredBackend(), we set it to an actual value.
|
||||
if (blas_preferred_backend == at::BlasBackend::Default) {
|
||||
blas_preferred_backend = at::BlasBackend::Cublas;
|
||||
#ifdef USE_ROCM
|
||||
// AMD Instinct targets prefer hipblaslt
|
||||
static const bool hipblaslt_preferred = []() {
|
||||
static const std::vector<std::string> archs = {
|
||||
"gfx90a", "gfx942",
|
||||
#if ROCM_VERSION >= 60500
|
||||
"gfx950"
|
||||
#endif
|
||||
};
|
||||
for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}();
|
||||
if (hipblaslt_preferred) {
|
||||
blas_preferred_backend = at::BlasBackend::Cublaslt;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_ROCM
|
||||
// hipblaslt support for all archs is not as complete as hipblas
|
||||
if (blas_preferred_backend == at::BlasBackend::Cublaslt) {
|
||||
static const bool hipblaslt_unsupported = []() {
|
||||
static const std::vector<std::string> archs = {
|
||||
@ -338,7 +365,7 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
"gfx950"
|
||||
#endif
|
||||
};
|
||||
for (auto index: c10::irange(getNumGPUs())) {
|
||||
for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
|
||||
TORCH_WARN_ONCE(
|
||||
"Attempting to use hipBLASLt on an unsupported architecture! "
|
||||
@ -365,7 +392,7 @@ void Context::setBlasPreferredBackend(at::BlasBackend b) {
|
||||
"Cannot set preferred backend to cuBLASLt if PyTorch has not been compiled with cuBLASLt.");
|
||||
TORCH_CHECK((b != at::BlasBackend::Ck) || hasROCM(),
|
||||
"Cannot set preferred backend to Ck if PyTorch has not been compiled for ROCm.");
|
||||
if (b != at::BlasBackend::Cublas) {
|
||||
if (b != at::BlasBackend::Default && b != at::BlasBackend::Cublas) {
|
||||
TORCH_WARN_ONCE(
|
||||
"torch.backends.cuda.preferred_blas_library is an experimental feature. "
|
||||
"If you see any error or unexpected behavior when this flag is set "
|
||||
@ -391,7 +418,7 @@ void Context::setROCmFAPreferredBackend(at::ROCmFABackend b) {
|
||||
static const std::vector<std::string> archs = {
|
||||
"gfx90a", "gfx942"
|
||||
};
|
||||
for (auto index: c10::irange(getNumGPUs())) {
|
||||
for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
|
||||
TORCH_WARN_ONCE(
|
||||
"Attempting to use CK on an unsupported architecture! Cannot set backend to CK");
|
||||
|
||||
@ -110,6 +110,11 @@ class TORCH_API Context {
|
||||
|
||||
Allocator* getPinnedMemoryAllocator(
|
||||
std::optional<c10::DeviceType> device_type = std::nullopt) {
|
||||
auto opt_device_type =
|
||||
device_type.has_value() ? device_type : at::getAccelerator();
|
||||
if (opt_device_type) {
|
||||
lazyInitDevice(opt_device_type.value());
|
||||
}
|
||||
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
|
||||
}
|
||||
|
||||
@ -441,17 +446,15 @@ class TORCH_API Context {
|
||||
bool allow_tf32_onednn = false;
|
||||
bool enabled_nnpack = true;
|
||||
at::LinalgBackend linalg_preferred_backend =
|
||||
c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true
|
||||
(c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true ||
|
||||
c10::utils::check_env("TORCH_LINALG_PREFER_HIPSOLVER") == true) // alias
|
||||
? at::LinalgBackend::Cusolver
|
||||
: at::LinalgBackend::Default;
|
||||
at::BlasBackend blas_preferred_backend =
|
||||
#ifdef USE_ROCM
|
||||
(c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") != false)
|
||||
#else
|
||||
(c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true)
|
||||
#endif
|
||||
(c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true ||
|
||||
c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") == true) // alias
|
||||
? at::BlasBackend::Cublaslt
|
||||
: at::BlasBackend::Cublas;
|
||||
: at::BlasBackend::Default;
|
||||
at::ROCmFABackend rocm_fa_preferred_backend =
|
||||
c10::utils::check_env("TORCH_ROCM_FA_PREFER_CK") == true
|
||||
? at::ROCmFABackend::Ck
|
||||
|
||||
@ -28,10 +28,8 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
|
||||
opt_device_type = at::getAccelerator(false);
|
||||
}
|
||||
if (opt_device_type.has_value()) {
|
||||
at::globalContext().lazyInitDevice(opt_device_type.value());
|
||||
return at::globalContext()
|
||||
.getAcceleratorHooksInterface(opt_device_type)
|
||||
.getPinnedMemoryAllocator();
|
||||
return at::globalContext().getPinnedMemoryAllocator(
|
||||
opt_device_type.value());
|
||||
} else {
|
||||
TORCH_CHECK(
|
||||
false, "Need to provide pin_memory allocator to use pin memory.")
|
||||
|
||||
@ -3610,11 +3610,11 @@ Tensor& transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
|
||||
return at::_mkldnn_transpose_(self, dim0, dim1);
|
||||
}
|
||||
|
||||
DimVector sizes(self.sizes().begin(), self.sizes().end());
|
||||
DimVector strides(self.strides().begin(), self.strides().end());
|
||||
std::swap(strides[dim0], strides[dim1]);
|
||||
SymDimVector sizes(self.sym_sizes().begin(), self.sym_sizes().end());
|
||||
std::swap(sizes[dim0], sizes[dim1]);
|
||||
self.as_strided_(sizes, strides);
|
||||
SymDimVector strides(self.sym_strides().begin(), self.sym_strides().end());
|
||||
std::swap(strides[dim0], strides[dim1]);
|
||||
auto result = self.as_strided__symint(std::move(sizes), std::move(strides));
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
@ -402,11 +402,12 @@ TORCH_IMPL_FUNC(avg_pool2d_backward_out_cuda) (
|
||||
bool use_divisor = divisor_override.has_value();
|
||||
const auto divisor_override_value = use_divisor ? divisor_override.value() : 0;
|
||||
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 1000
|
||||
constexpr int double_threads = 768;
|
||||
#else
|
||||
constexpr int double_threads = 1024;
|
||||
#endif
|
||||
cudaDeviceProp* properties = at::cuda::getCurrentDeviceProperties();
|
||||
const bool gesm10x = properties->major >= 10;
|
||||
int double_threads = 1024;
|
||||
if (gesm10x) {
|
||||
double_threads = 768;
|
||||
}
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(),
|
||||
"avg_pool2d_backward_out_cuda_frame",
|
||||
|
||||
@ -135,7 +135,7 @@ at::Tensor quantized_convolution(
|
||||
mask_weight = (2 ^ 0) | (2 ^ 1); // 2^0 (group) | 2^1 (output channel)
|
||||
dnnl::primitive_attr pattr;
|
||||
|
||||
bool src_need_zp = (act_scale != 0);
|
||||
bool src_need_zp = (act_zero_point != 0);
|
||||
bool dst_need_zp = (output_zero_point != 0);
|
||||
|
||||
// create usr_md for tensors, and md for conv primitive
|
||||
|
||||
@ -324,13 +324,15 @@ std::string getTensorsStringKey(const TensorList& tensors, bool short_dtype, boo
|
||||
str += "Scalar";
|
||||
} else {
|
||||
if (exclude_shape) {
|
||||
str += "[-1]";
|
||||
str += "-1";
|
||||
} else {
|
||||
str +=
|
||||
std::string([[getMPSShape(tensor) valueForKey:@"description"] componentsJoinedByString:@","].UTF8String);
|
||||
}
|
||||
}
|
||||
str += "]";
|
||||
if (tensor.is_conj())
|
||||
str += "_conj";
|
||||
} else {
|
||||
str += "Undefined";
|
||||
}
|
||||
@ -542,7 +544,12 @@ Placeholder::Placeholder(MPSGraphTensor* mpsGraphTensor,
|
||||
if ((!src.is_contiguous() || src.storage_offset()) && gatherTensorData) {
|
||||
Tensor emptyShell = Tensor();
|
||||
// use "_tensor" from Placeholder to retain view's output during its usage in other ops
|
||||
_tensor = gatherViewTensor(src, emptyShell);
|
||||
// And preserve conjugated property here
|
||||
if (!src.is_conj()) {
|
||||
_tensor = gatherViewTensor(src, emptyShell);
|
||||
} else {
|
||||
_tensor = gatherViewTensor(src.conj(), emptyShell).conj();
|
||||
}
|
||||
if (!_tensor.has_storage()) {
|
||||
// if we cannot gather, we make the tensor contiguous implicitly, and keep
|
||||
// it in placeholder to be able to retrieve it when we return from constructor
|
||||
|
||||
@ -44,7 +44,8 @@ std::tuple<Tensor, Tensor> _scaled_dot_product_attention_math_mps(const Tensor&
|
||||
TORCH_CHECK(!attn_mask.has_value(),
|
||||
"_scaled_dot_product_attention: Explicit attn_mask should not be set when is_causal=True");
|
||||
}
|
||||
|
||||
TORCH_CHECK(query.size(-3) == key.size(-3) && key.size(-3) == value.size(-3),
|
||||
"number of heads in query/key/value should match");
|
||||
TORCH_CHECK(dropout_p == 0.0, "_scaled_dot_product_attention_math_for_mps: dropout_p != 0.0 is not supported");
|
||||
TORCH_CHECK(macOS15_0_plus || (query.is_contiguous() && key.is_contiguous() && value.is_contiguous()),
|
||||
"_scaled_dot_product_attention_math_for_mps: query, key, and value must be contiguous");
|
||||
@ -55,6 +56,7 @@ std::tuple<Tensor, Tensor> _scaled_dot_product_attention_math_mps(const Tensor&
|
||||
auto [q_, sq] = ensure_4d(query);
|
||||
auto [k_, sk] = ensure_4d(key);
|
||||
auto [v_, sv] = ensure_4d(value);
|
||||
|
||||
std::optional<Tensor> mask_;
|
||||
if (attn_mask) {
|
||||
auto maskExpandedDims = query.sizes().vec();
|
||||
|
||||
@ -81,6 +81,12 @@ Tensor dot_mps(const Tensor& self, const Tensor& other) {
|
||||
castSelf = selfTensor;
|
||||
castOther = otherTensor;
|
||||
}
|
||||
if (self.is_conj()) {
|
||||
castSelf = [mpsGraph conjugateWithTensor:selfTensor name:nil];
|
||||
}
|
||||
if (other.is_conj()) {
|
||||
castOther = [mpsGraph conjugateWithTensor:otherTensor name:nil];
|
||||
}
|
||||
|
||||
MPSGraphTensor* dot = [mpsGraph multiplicationWithPrimaryTensor:castSelf
|
||||
secondaryTensor:castOther
|
||||
|
||||
@ -118,10 +118,12 @@ std::tuple<MPSGraphTensor*, MPSGraphTensor*, MPSGraphTensor*> do_mm(MPSGraph* gr
|
||||
dataType:getMPSDataType(self)];
|
||||
return {nil, nil, output};
|
||||
}
|
||||
auto selfTensor = mpsGraphRankedPlaceHolder(graph, self);
|
||||
auto otherTensor = mpsGraphRankedPlaceHolder(graph, other);
|
||||
auto selfTensor_ = mpsGraphRankedPlaceHolder(graph, self);
|
||||
auto otherTensor_ = mpsGraphRankedPlaceHolder(graph, other);
|
||||
auto selfTensor = self.is_conj() ? [graph conjugateWithTensor:selfTensor_ name:nil] : selfTensor_;
|
||||
auto otherTensor = other.is_conj() ? [graph conjugateWithTensor:otherTensor_ name:nil] : otherTensor_;
|
||||
auto output = [graph matrixMultiplicationWithPrimaryTensor:selfTensor secondaryTensor:otherTensor name:nil];
|
||||
return {selfTensor, otherTensor, output};
|
||||
return {selfTensor_, otherTensor_, output};
|
||||
}
|
||||
|
||||
bool use_metal_mm(const Tensor& self, const Tensor& other, const Tensor& output) {
|
||||
|
||||
@ -107,7 +107,12 @@ TORCH_IMPL_FUNC(tril_mps_out)
|
||||
numLowerTensor:negDiagMinusOneTensor
|
||||
numUpperTensor:minusOneTensor
|
||||
name:nil];
|
||||
outputTensor = [mpsGraph subtractionWithPrimaryTensor:inputTensor secondaryTensor:complementTensor name:nil];
|
||||
MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0 dataType:getMPSDataType(self)];
|
||||
MPSGraphTensor* mask = [mpsGraph equalWithPrimaryTensor:complementTensor secondaryTensor:zeroTensor name:nil];
|
||||
outputTensor = [mpsGraph selectWithPredicateTensor:mask
|
||||
truePredicateTensor:inputTensor
|
||||
falsePredicateTensor:zeroTensor
|
||||
name:nil];
|
||||
}
|
||||
|
||||
newCachedGraph->inputTensor_ = inputTensor;
|
||||
|
||||
359
aten/src/ATen/native/quantized/cpu/ACLUtils.cpp
Normal file
359
aten/src/ATen/native/quantized/cpu/ACLUtils.cpp
Normal file
@ -0,0 +1,359 @@
|
||||
#include <ATen/native/quantized/cpu/ACLUtils.h>
|
||||
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
#include <ATen/Parallel.h>
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/Functions.h>
|
||||
#else
|
||||
#include <ATen/ops/empty.h>
|
||||
#endif
|
||||
#include <arm_compute/core/Helpers.h>
|
||||
#include <arm_compute/core/Types.h>
|
||||
#include <arm_compute/core/Utils.h>
|
||||
#include <arm_compute/core/utils/quantization/AsymmHelpers.h>
|
||||
|
||||
namespace at::native::acl_utils {
|
||||
|
||||
QuantMatmul::QuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key)
|
||||
: key(cache_key) {
|
||||
auto wei_q_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_1, weight_dim_0),
|
||||
1,
|
||||
arm_compute::DataType::QASYMM8_SIGNED,
|
||||
arm_compute::QuantizationInfo(weight_scale, -weight_offset, false));
|
||||
wei_q_tensor_info.set_are_values_constant(true);
|
||||
wei_q_tensor_.allocator()->init(wei_q_tensor_info);
|
||||
wei_q_tensor_.allocator()->import_memory(weight_ptr);
|
||||
|
||||
if (bias_ptr.has_value()) {
|
||||
auto bia_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(1, weight_dim_1),
|
||||
1,
|
||||
arm_compute::DataType::F32);
|
||||
bia_tensor_ = arm_compute::Tensor();
|
||||
|
||||
bia_tensor_->allocator()->init(bia_tensor_info);
|
||||
bia_tensor_->allocator()->import_memory(bias_ptr.value());
|
||||
}
|
||||
const bool fuse_relu =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::FUSE_RELU)>(key);
|
||||
if (fuse_relu) {
|
||||
relu_info_ =
|
||||
arm_compute::ActivationLayerInfo(arm_compute::ActivationFunction::RELU);
|
||||
}
|
||||
}
|
||||
|
||||
QuantMatmul::~QuantMatmul() {
|
||||
// this will not free memory, it will just tell ACL that we're no longer
|
||||
// using the pointer
|
||||
wei_q_tensor_.allocator()->free();
|
||||
if (bia_tensor_.has_value()) {
|
||||
bia_tensor_->allocator()->free();
|
||||
}
|
||||
}
|
||||
|
||||
DynamicQuantMatmul::DynamicQuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key)
|
||||
: QuantMatmul(
|
||||
weight_dim_0,
|
||||
weight_dim_1,
|
||||
weight_scale,
|
||||
weight_offset,
|
||||
weight_ptr,
|
||||
bias_ptr,
|
||||
cache_key) {
|
||||
int64_t m = std::get<static_cast<int>(QuantMatmulCacheKeyIndex::M)>(key);
|
||||
|
||||
auto src_q_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_0, m),
|
||||
1,
|
||||
// ACL dyanamically quantized matmuls only support (signed) int8_t
|
||||
arm_compute::DataType::QASYMM8_SIGNED,
|
||||
// TODO: setting the initial offset value to int8_t max instead of zero,
|
||||
// because ACL currently skips MatrixBReduction calculation if the
|
||||
// source offset at configuration time is zero. This is fixed by this
|
||||
// PR: https://review.mlplatform.org/c/ml/ComputeLibrary/+/12820/8 This
|
||||
// will be set to the actual src offset value at runtime.
|
||||
arm_compute::QuantizationInfo(
|
||||
/*scale=*/1.0,
|
||||
/*offset=*/std::numeric_limits<int8_t>::max(),
|
||||
/*is_dynamic=*/true));
|
||||
src_q_tensor_info.set_are_values_constant(false);
|
||||
|
||||
auto src_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_0, m), arm_compute::Format::F32);
|
||||
src_tensor_info.set_are_values_constant(false);
|
||||
|
||||
auto dst_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_1, m), arm_compute::Format::F32);
|
||||
|
||||
src_q_tensor.allocator()->init(src_q_tensor_info);
|
||||
src_tensor.allocator()->init(src_tensor_info);
|
||||
dst_tensor.allocator()->init(dst_tensor_info);
|
||||
|
||||
src_q_tensor_orig_ =
|
||||
at::empty({m, weight_dim_0}, at::device(c10::kCPU).dtype(c10::kQInt8));
|
||||
// allocate/import memory
|
||||
src_q_tensor.allocator()->import_memory(src_q_tensor_orig_.data_ptr());
|
||||
|
||||
if (relu_info_.has_value()) {
|
||||
relu = arm_compute::NEActivationLayer();
|
||||
}
|
||||
}
|
||||
|
||||
DynamicQuantMatmul::~DynamicQuantMatmul() {
|
||||
// this will not free memory, it will just tell ACL that we're no longer
|
||||
// using the pointer
|
||||
src_q_tensor.allocator()->free();
|
||||
}
|
||||
|
||||
arm_compute::Status DynamicQuantMatmul::validate() {
|
||||
if (relu_info_.has_value()) {
|
||||
auto relu_status = arm_compute::NEActivationLayer::validate(
|
||||
dst_tensor.info(), dst_tensor.info(), relu_info_.value());
|
||||
if (relu_status.error_code() != arm_compute::ErrorCode::OK) {
|
||||
return relu_status;
|
||||
}
|
||||
}
|
||||
auto quant_status = arm_compute::NEQuantizationLayer::validate(
|
||||
src_tensor.info(), src_q_tensor.info());
|
||||
if (quant_status.error_code() != arm_compute::ErrorCode::OK) {
|
||||
return quant_status;
|
||||
}
|
||||
return arm_compute::NEGEMMLowpMatrixMultiplyCore::validate(
|
||||
src_q_tensor.info(),
|
||||
wei_q_tensor_.info(),
|
||||
bia_tensor_.has_value() ? bia_tensor_.value().info() : nullptr,
|
||||
dst_tensor.info(),
|
||||
gemm_info_);
|
||||
}
|
||||
|
||||
void DynamicQuantMatmul::configure() {
|
||||
quant.configure(&src_tensor, &src_q_tensor);
|
||||
gemm.configure(
|
||||
&src_q_tensor,
|
||||
&wei_q_tensor_,
|
||||
bia_tensor_.has_value() ? &bia_tensor_.value() : nullptr,
|
||||
&dst_tensor,
|
||||
gemm_info_);
|
||||
if (relu.has_value()) {
|
||||
relu->configure(&dst_tensor, &dst_tensor, relu_info_.value());
|
||||
}
|
||||
}
|
||||
|
||||
StaticQuantMatmul::StaticQuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key)
|
||||
: QuantMatmul(
|
||||
weight_dim_0,
|
||||
weight_dim_1,
|
||||
weight_scale,
|
||||
weight_offset,
|
||||
weight_ptr,
|
||||
bias_ptr,
|
||||
cache_key) {
|
||||
const int64_t m =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::M)>(key);
|
||||
const int64_t input_zero_point =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::INPUT_OFFSET)>(key);
|
||||
const double input_scale =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::INPUT_SCALE)>(key);
|
||||
const int64_t output_zero_point =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::OUTPUT_OFFSET)>(key);
|
||||
const double output_scale =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::OUTPUT_SCALE)>(key);
|
||||
const bool signed_input =
|
||||
std::get<static_cast<int>(QuantMatmulCacheKeyIndex::SIGNED_INPUT)>(key);
|
||||
|
||||
const auto input_acl_datatype = signed_input
|
||||
? arm_compute::DataType::QASYMM8_SIGNED
|
||||
: arm_compute::DataType::QASYMM8;
|
||||
|
||||
auto src_q_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_0, m),
|
||||
1,
|
||||
input_acl_datatype,
|
||||
arm_compute::QuantizationInfo(input_scale, -input_zero_point, false));
|
||||
src_q_tensor_info.set_are_values_constant(false);
|
||||
src_q_tensor.allocator()->init(src_q_tensor_info);
|
||||
|
||||
if (bias_ptr.has_value()) {
|
||||
auto bia_q_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(1, weight_dim_1),
|
||||
1,
|
||||
arm_compute::DataType::S32,
|
||||
arm_compute::QuantizationInfo(
|
||||
1 / (input_scale * weight_scale), 0, false));
|
||||
bia_q_tensor_ = arm_compute::Tensor();
|
||||
bia_q_tensor_.value().allocator()->init(bia_q_tensor_info);
|
||||
|
||||
float* bias_fp32_buffer = (float*)bia_tensor_.value().buffer();
|
||||
bia_q_tensor_orig_ =
|
||||
at::empty({m, weight_dim_0}, at::device(c10::kCPU).dtype(c10::kQInt32));
|
||||
int32_t* bias_s32_buffer = (int32_t*)bia_q_tensor_orig_.value().data_ptr();
|
||||
const float bias_scale =
|
||||
bia_q_tensor_info.quantization_info().uniform().scale;
|
||||
// Quantize the bias to int32_t. It makes sense to do it here rather in the
|
||||
// prepack phase because dynamically quantized ACL matmuls don't need the
|
||||
// bias in int32_t.
|
||||
at::parallel_for(0, weight_dim_1, 1, [&](int64_t start, int64_t end) {
|
||||
for (int64_t i = start; i < end; ++i) {
|
||||
bias_s32_buffer[i] =
|
||||
int32_t(std::round(bias_fp32_buffer[i] * bias_scale));
|
||||
}
|
||||
});
|
||||
bia_q_tensor_.value().allocator()->import_memory(bias_s32_buffer);
|
||||
}
|
||||
auto dst_q_tensor_info = arm_compute::TensorInfo(
|
||||
arm_compute::TensorShape(weight_dim_1, m),
|
||||
1,
|
||||
input_acl_datatype,
|
||||
arm_compute::QuantizationInfo(output_scale, output_zero_point, false));
|
||||
dst_q_tensor.allocator()->init(dst_q_tensor_info);
|
||||
|
||||
// Setup lowp_gemm output stage
|
||||
int output_multiplier;
|
||||
int output_shift;
|
||||
float multiplier = (input_scale * weight_scale) / output_scale;
|
||||
arm_compute::quantization::calculate_quantized_multiplier_less_than_one(
|
||||
multiplier, &output_multiplier, &output_shift);
|
||||
|
||||
arm_compute::GEMMLowpOutputStageInfo output_stage_info;
|
||||
output_stage_info.type =
|
||||
arm_compute::GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
|
||||
output_stage_info.gemmlowp_multiplier = output_multiplier;
|
||||
output_stage_info.gemmlowp_shift = output_shift;
|
||||
output_stage_info.gemmlowp_offset = output_zero_point;
|
||||
|
||||
int32_t min_activation = signed_input ? std::numeric_limits<int8_t>::min()
|
||||
: std::numeric_limits<uint8_t>::min();
|
||||
int32_t max_activation = signed_input ? std::numeric_limits<int8_t>::max()
|
||||
: std::numeric_limits<uint8_t>::max();
|
||||
|
||||
if (relu_info_.has_value()) {
|
||||
// figure out min, max values for ReLU
|
||||
const arm_compute::UniformQuantizationInfo uqinfo =
|
||||
dst_q_tensor_info.quantization_info().uniform();
|
||||
std::tie(min_activation, max_activation) =
|
||||
arm_compute::get_quantized_activation_min_max(
|
||||
relu_info_.value(), src_q_tensor_info.data_type(), uqinfo);
|
||||
// fuse ReLU with the GEMM
|
||||
gemm_info_.set_activation_info(relu_info_.value());
|
||||
}
|
||||
output_stage_info.gemmlowp_min_bound = min_activation;
|
||||
output_stage_info.gemmlowp_max_bound = max_activation;
|
||||
output_stage_info.output_data_type = dst_q_tensor_info.data_type();
|
||||
|
||||
gemm_info_.set_gemmlowp_output_stage(output_stage_info);
|
||||
}
|
||||
|
||||
StaticQuantMatmul::~StaticQuantMatmul() {
|
||||
// this will not free memory, it will just tell ACL that we're no longer
|
||||
// using the pointer
|
||||
if (bia_q_tensor_.has_value()) {
|
||||
bia_q_tensor_.value().allocator()->free();
|
||||
}
|
||||
}
|
||||
|
||||
arm_compute::Status StaticQuantMatmul::validate() {
|
||||
return arm_compute::NEGEMMLowpMatrixMultiplyCore::validate(
|
||||
src_q_tensor.info(),
|
||||
wei_q_tensor_.info(),
|
||||
bia_q_tensor_.has_value() ? bia_q_tensor_.value().info() : nullptr,
|
||||
dst_q_tensor.info(),
|
||||
gemm_info_);
|
||||
}
|
||||
|
||||
void StaticQuantMatmul::configure() {
|
||||
gemm.configure(
|
||||
&src_q_tensor,
|
||||
&wei_q_tensor_,
|
||||
bia_q_tensor_.has_value() ? &bia_q_tensor_.value() : nullptr,
|
||||
&dst_q_tensor,
|
||||
gemm_info_);
|
||||
}
|
||||
|
||||
QuantAdd::QuantAdd(
|
||||
arm_compute::DataType dtype,
|
||||
const std::vector<int64_t>& input_dims,
|
||||
double qa_scale,
|
||||
int64_t qa_offset,
|
||||
double qb_scale,
|
||||
int64_t qb_offset,
|
||||
double dst_scale,
|
||||
int64_t dst_offset) {
|
||||
arm_compute::QuantizationInfo qa_qinfo = {
|
||||
static_cast<float>(qa_scale), static_cast<int32_t>(qa_offset), false};
|
||||
arm_compute::QuantizationInfo qb_qinfo = {
|
||||
static_cast<float>(qb_scale), static_cast<int32_t>(qb_offset), false};
|
||||
arm_compute::QuantizationInfo qdst_qinfo = {
|
||||
static_cast<float>(dst_scale), static_cast<int32_t>(dst_offset), false};
|
||||
|
||||
arm_compute::TensorShape qa_acl_tensor_shape;
|
||||
arm_compute::TensorShape qb_acl_tensor_shape;
|
||||
arm_compute::TensorShape qdst_acl_tensor_shape;
|
||||
for (int i = input_dims.size() - 1; i >= 0; i--) {
|
||||
qa_acl_tensor_shape.set(i, input_dims[i], false, true);
|
||||
qb_acl_tensor_shape.set(i, input_dims[i], false, true);
|
||||
qdst_acl_tensor_shape.set(i, input_dims[i], false, true);
|
||||
}
|
||||
arm_compute::TensorInfo qa_acl_tensor_info(
|
||||
qa_acl_tensor_shape, 1, dtype, qa_qinfo);
|
||||
arm_compute::TensorInfo qb_acl_tensor_info(
|
||||
qb_acl_tensor_shape, 1, dtype, qb_qinfo);
|
||||
arm_compute::TensorInfo qdst_acl_tensor_info(
|
||||
qdst_acl_tensor_shape, 1, dtype, qdst_qinfo);
|
||||
|
||||
qa_tensor.allocator()->init(qa_acl_tensor_info);
|
||||
qb_tensor.allocator()->init(qb_acl_tensor_info);
|
||||
qdst_tensor.allocator()->init(qdst_acl_tensor_info);
|
||||
}
|
||||
|
||||
arm_compute::Status QuantAdd::validate() {
|
||||
return q_add.validate(
|
||||
qa_tensor.info(), qb_tensor.info(), qdst_tensor.info(), policy);
|
||||
}
|
||||
|
||||
void QuantAdd::configure() {
|
||||
q_add.configure(&qa_tensor, &qb_tensor, &qdst_tensor, policy);
|
||||
}
|
||||
|
||||
} // namespace at::native::acl_utils
|
||||
|
||||
PackedLinearWeightsACL::PackedLinearWeightsACL(
|
||||
std::unique_ptr<ideep::tensor> weight,
|
||||
std::optional<ideep::tensor> bias,
|
||||
at::Tensor orig_weight,
|
||||
std::optional<at::Tensor> orig_bias)
|
||||
: PackedLinearWeightsOnednn(
|
||||
std::move(weight),
|
||||
std::move(bias),
|
||||
std::move(orig_weight),
|
||||
std::move(orig_bias)) {
|
||||
auto w = *(weight_.get());
|
||||
k_ = w.get_dim(0);
|
||||
n_ = w.get_dim(1);
|
||||
weight_zero_point_ = orig_weight_.q_zero_point();
|
||||
weight_scale_ = orig_weight_.q_scale();
|
||||
}
|
||||
|
||||
#endif // AT_MKLDNN_ACL_ENABLED()
|
||||
257
aten/src/ATen/native/quantized/cpu/ACLUtils.h
Normal file
257
aten/src/ATen/native/quantized/cpu/ACLUtils.h
Normal file
@ -0,0 +1,257 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Config.h>
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <arm_compute/core/Error.h>
|
||||
#include <arm_compute/core/TensorInfo.h>
|
||||
#include <arm_compute/function_info/ActivationLayerInfo.h>
|
||||
#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
|
||||
#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
|
||||
#include <arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h>
|
||||
#include <arm_compute/runtime/NEON/functions/NEQuantizationLayer.h>
|
||||
#include <arm_compute/runtime/Tensor.h>
|
||||
#include <array>
|
||||
|
||||
// Utilities for Arm Compute Library (ACL) quantized operations
|
||||
// Provides interfaces to leverage ACL's accelerated kernels for statically and
|
||||
// dynamically quantized matmuls (i.e. qlinear and qlinear_dynamic) These are
|
||||
// utalized through PackedLinearWeightsACL which extends
|
||||
// PackedLinearWeightsOnednn Note that PackedLinearWeightsACL extends rather
|
||||
// than replaces PackedLinearWeightsOnednn for AArch64 because ACL currently
|
||||
// only supports per_tensor weight quantization.
|
||||
namespace at::native::acl_utils {
|
||||
|
||||
using QuantMatmulCacheKey = std::tuple<
|
||||
int64_t, // M
|
||||
bool, // FUSE_RELU
|
||||
int64_t, // NUM_THREADS
|
||||
double, // INPUT_SCALE
|
||||
int64_t, // INPUT_OFFSET
|
||||
double, // OUTPUT_SCALE
|
||||
int64_t, // OUTPUT_OFFSET
|
||||
bool // SIGNED_INPUT
|
||||
>;
|
||||
|
||||
enum class QuantMatmulCacheKeyIndex {
|
||||
M,
|
||||
FUSE_RELU,
|
||||
NUM_THREADS,
|
||||
INPUT_SCALE,
|
||||
INPUT_OFFSET,
|
||||
OUTPUT_SCALE,
|
||||
OUTPUT_OFFSET,
|
||||
SIGNED_INPUT
|
||||
};
|
||||
|
||||
// Abstract interface to share common stuff between static/dynamic ACL matmuls.
|
||||
struct QuantMatmul {
|
||||
arm_compute::NEGEMMLowpMatrixMultiplyCore gemm;
|
||||
// key for use in the cache
|
||||
QuantMatmulCacheKey key;
|
||||
|
||||
QuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key);
|
||||
|
||||
virtual ~QuantMatmul();
|
||||
virtual arm_compute::Status validate() = 0;
|
||||
virtual void configure() = 0;
|
||||
|
||||
protected:
|
||||
arm_compute::Tensor wei_q_tensor_;
|
||||
std::optional<arm_compute::Tensor> bia_tensor_;
|
||||
arm_compute::GEMMInfo gemm_info_;
|
||||
std::optional<arm_compute::ActivationLayerInfo> relu_info_;
|
||||
};
|
||||
|
||||
struct DynamicQuantMatmul : public QuantMatmul {
|
||||
arm_compute::Tensor src_q_tensor;
|
||||
arm_compute::Tensor src_tensor;
|
||||
arm_compute::Tensor dst_tensor;
|
||||
arm_compute::NEQuantizationLayer quant;
|
||||
// We need a ReLU layer here (unlike static quantization) because the ReLU
|
||||
// cannot be "truly" fused with the GEMM through gemm_info in ACL dynamically
|
||||
// quantized matmuls.
|
||||
std::optional<arm_compute::NEActivationLayer> relu;
|
||||
|
||||
DynamicQuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key);
|
||||
|
||||
~DynamicQuantMatmul() override;
|
||||
|
||||
arm_compute::Status validate() override;
|
||||
void configure() override;
|
||||
|
||||
private:
|
||||
at::Tensor src_q_tensor_orig_;
|
||||
};
|
||||
|
||||
struct StaticQuantMatmul : public QuantMatmul {
|
||||
arm_compute::Tensor src_q_tensor;
|
||||
arm_compute::Tensor dst_q_tensor;
|
||||
|
||||
StaticQuantMatmul(
|
||||
int64_t weight_dim_0,
|
||||
int64_t weight_dim_1,
|
||||
double weight_scale,
|
||||
int64_t weight_offset,
|
||||
int8_t* weight_ptr,
|
||||
std::optional<float*> bias_ptr,
|
||||
const QuantMatmulCacheKey& cache_key);
|
||||
|
||||
~StaticQuantMatmul() override;
|
||||
|
||||
arm_compute::Status validate() override;
|
||||
void configure() override;
|
||||
|
||||
private:
|
||||
std::optional<arm_compute::Tensor> bia_q_tensor_;
|
||||
std::optional<at::Tensor> bia_q_tensor_orig_;
|
||||
};
|
||||
|
||||
struct QuantAdd {
|
||||
arm_compute::Tensor qa_tensor;
|
||||
arm_compute::Tensor qb_tensor;
|
||||
arm_compute::Tensor qdst_tensor;
|
||||
arm_compute::NEArithmeticAddition q_add;
|
||||
|
||||
QuantAdd(
|
||||
arm_compute::DataType dtype,
|
||||
const std::vector<int64_t>& input_dims,
|
||||
double qa_scale,
|
||||
int64_t qa_offset,
|
||||
double qb_scale,
|
||||
int64_t qb_offset,
|
||||
double dst_scale,
|
||||
int64_t dst_offset);
|
||||
|
||||
arm_compute::Status validate();
|
||||
void configure();
|
||||
|
||||
private:
|
||||
arm_compute::ConvertPolicy policy{arm_compute::ConvertPolicy::SATURATE};
|
||||
};
|
||||
|
||||
} // namespace at::native::acl_utils
|
||||
struct PackedLinearWeightsACL : public PackedLinearWeightsOnednn {
|
||||
using ACLQuantMatmul = at::native::acl_utils::QuantMatmul;
|
||||
using ACLDynamicQuantMatmul = at::native::acl_utils::DynamicQuantMatmul;
|
||||
using ACLStaticQuantMatmul = at::native::acl_utils::StaticQuantMatmul;
|
||||
using ACLQuantMatmulCacheKey = at::native::acl_utils::QuantMatmulCacheKey;
|
||||
using ACLQuantMatmulCacheKeyIndex =
|
||||
at::native::acl_utils::QuantMatmulCacheKeyIndex;
|
||||
|
||||
PackedLinearWeightsACL(
|
||||
std::unique_ptr<ideep::tensor> weight,
|
||||
std::optional<ideep::tensor> bias,
|
||||
at::Tensor orig_weight,
|
||||
std::optional<at::Tensor> orig_bias);
|
||||
|
||||
at::Tensor apply_dynamic(at::Tensor input, bool reduce_range = false)
|
||||
override;
|
||||
at::Tensor apply_dynamic_relu(at::Tensor input, bool reduce_range = false)
|
||||
override;
|
||||
|
||||
at::Tensor apply(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) override;
|
||||
at::Tensor apply_relu(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) override;
|
||||
|
||||
template <typename ACLQuantMatmulT>
|
||||
std::shared_ptr<ACLQuantMatmulT> get_acl_quant_matmul(
|
||||
const ACLQuantMatmulCacheKey& key) {
|
||||
return std::dynamic_pointer_cast<ACLQuantMatmulT>(
|
||||
fetch_or_create_acl_quant_matmul<ACLQuantMatmulT>(key));
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t k_;
|
||||
int64_t n_;
|
||||
int64_t weight_zero_point_;
|
||||
double weight_scale_;
|
||||
|
||||
// A 2 element (per layer) cache. Given it's not intended to store more than 2
|
||||
// elements, we do not need a fancy implementation. The idea behind it is to
|
||||
// allow for a (configuration free) fast path for autoregressive
|
||||
// transformer-like models which usually involve 2 input tensor shapes; one
|
||||
// for the prefill phase and another for the autoregressive phase
|
||||
std::array<std::shared_ptr<ACLQuantMatmul>, 2> cache_;
|
||||
|
||||
template <typename ACLQuantMatmulT>
|
||||
std::shared_ptr<ACLQuantMatmul> fetch_or_create_acl_quant_matmul(
|
||||
const ACLQuantMatmulCacheKey& key) {
|
||||
// We're only maintaining a 2 element LRU cache
|
||||
// hit first
|
||||
if (cache_[0] != nullptr && cache_[0]->key == key) {
|
||||
return cache_[0];
|
||||
}
|
||||
// hit second
|
||||
if (cache_[1] != nullptr && cache_[1]->key == key) {
|
||||
// Update LRU
|
||||
std::swap(cache_[0], cache_[1]);
|
||||
return cache_[0];
|
||||
}
|
||||
// miss -> replace Least Recently Used - i.e. element at index 1
|
||||
cache_[1] = create_acl_quant_matmul<ACLQuantMatmulT>(key);
|
||||
std::swap(cache_[0], cache_[1]);
|
||||
return cache_[0];
|
||||
}
|
||||
|
||||
template <typename ACLQuantMatmulT>
|
||||
std::shared_ptr<ACLQuantMatmulT> create_acl_quant_matmul(
|
||||
const ACLQuantMatmulCacheKey& key) {
|
||||
std::optional<float*> bias_ptr;
|
||||
if (bias_.has_value()) {
|
||||
bias_ptr = (float*)bias_.value().get_data_handle();
|
||||
}
|
||||
auto acl_gemm = std::make_shared<ACLQuantMatmulT>(
|
||||
k_,
|
||||
n_,
|
||||
weight_scale_,
|
||||
weight_zero_point_,
|
||||
(int8_t*)weight_.get()->get_data_handle(),
|
||||
bias_ptr,
|
||||
key);
|
||||
|
||||
// validate
|
||||
auto status = acl_gemm->validate();
|
||||
if (status.error_code() != arm_compute::ErrorCode::OK) {
|
||||
TORCH_WARN(
|
||||
"Arm Compute Library's Quantized Matmul Validation Failed: " +
|
||||
status.error_description());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// configure
|
||||
acl_gemm->configure();
|
||||
return acl_gemm;
|
||||
}
|
||||
|
||||
template <bool ReluFused>
|
||||
at::Tensor apply_dynamic_impl(at::Tensor input, bool reduce_range = false);
|
||||
|
||||
template <bool ReluFused>
|
||||
at::Tensor apply_impl(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point);
|
||||
};
|
||||
|
||||
#endif // AT_MKLDNN_ACL_ENABLED()
|
||||
@ -5,6 +5,7 @@
|
||||
#include <ATen/ExpandUtils.h>
|
||||
#include <torch/library.h>
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <ATen/native/quantized/cpu/ACLUtils.h>
|
||||
#include <ATen/native/quantized/cpu/BinaryOps.h>
|
||||
#include <ATen/native/quantized/cpu/QuantizedOps.h>
|
||||
#include <ATen/native/quantized/cpu/init_qnnpack.h>
|
||||
@ -384,6 +385,67 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
}
|
||||
#endif // USE_XNNPACK
|
||||
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
Tensor acl_qadd(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
TORCH_CHECK(
|
||||
qa.qscheme() == kPerTensorAffine || qa.qscheme() == kPerTensorSymmetric,
|
||||
"Only per tensor quantization is supported in ACL quantized add.");
|
||||
|
||||
Tensor qa_contig = qa.contiguous(qa.suggest_memory_format());
|
||||
Tensor qb_contig = qb.contiguous(qa.suggest_memory_format());
|
||||
auto qa_mem_format = qa_contig.suggest_memory_format();
|
||||
Tensor dst = at::native::empty_affine_quantized(
|
||||
at::infer_size_dimvector(qa_contig.sizes(), qb_contig.sizes()),
|
||||
qa_contig.scalar_type(),
|
||||
std::nullopt /* layout */,
|
||||
kCPU,
|
||||
std::nullopt /* pin_memory */,
|
||||
scale,
|
||||
zero_point,
|
||||
qa_mem_format);
|
||||
|
||||
if (qb_contig.size(0) == 0) {
|
||||
return dst;
|
||||
}
|
||||
|
||||
auto input_dims = qa_contig.sizes().vec();
|
||||
auto acl_dtype = dst.scalar_type() == kQInt8
|
||||
? arm_compute::DataType::QASYMM8_SIGNED
|
||||
: arm_compute::DataType::QASYMM8;
|
||||
auto acl_add = std::make_shared<acl_utils::QuantAdd>(
|
||||
acl_dtype,
|
||||
input_dims,
|
||||
qa_contig.q_scale(),
|
||||
qa_contig.q_zero_point(),
|
||||
qb_contig.q_scale(),
|
||||
qb_contig.q_zero_point(),
|
||||
dst.q_scale(),
|
||||
dst.q_zero_point());
|
||||
|
||||
auto status = acl_add->validate();
|
||||
TORCH_CHECK(
|
||||
status.error_code() == arm_compute::ErrorCode::OK,
|
||||
"Arm Compute Library's Quantized Matmul Validation Failed: " +
|
||||
status.error_description());
|
||||
|
||||
acl_add->configure();
|
||||
|
||||
acl_add->qa_tensor.allocator()->import_memory(qa_contig.data_ptr());
|
||||
acl_add->qb_tensor.allocator()->import_memory(qb_contig.data_ptr());
|
||||
acl_add->qdst_tensor.allocator()->import_memory(dst.data_ptr());
|
||||
|
||||
acl_add->q_add.run();
|
||||
|
||||
// this will not free memory, it will just tell ACL that we're no longer
|
||||
// using the pointer
|
||||
acl_add->qa_tensor.allocator()->free();
|
||||
acl_add->qb_tensor.allocator()->free();
|
||||
acl_add->qdst_tensor.allocator()->free();
|
||||
|
||||
return dst;
|
||||
}
|
||||
#endif // AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
template <bool ReLUFused = false>
|
||||
Tensor qadd(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
check_inputs(qa, qb);
|
||||
@ -406,6 +468,15 @@ Tensor qadd(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
}
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
if (!ReLUFused && qa.ndimension() > 0 && qa.sizes() == qb.sizes() &&
|
||||
qa.scalar_type() == qb.scalar_type() &&
|
||||
(qa.scalar_type() == kQInt8 || qa.scalar_type() == kQUInt8)) {
|
||||
return acl_qadd(qa, qb, scale, zero_point);
|
||||
}
|
||||
#endif // AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
auto qc = at::_empty_affine_quantized(
|
||||
qa.sizes(),
|
||||
at::device(kCPU)
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/Context.h>
|
||||
#include <ATen/Parallel.h>
|
||||
#include <ATen/TensorOperators.h>
|
||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/XnnpackUtils.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
#include <ATen/native/quantized/cpu/ACLUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/quantized/cpu/XnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||
#include <ATen/native/quantized/cpu/qlinear.h>
|
||||
#include <ATen/native/quantized/library.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
@ -1107,6 +1108,96 @@ static at::Tensor linear_int8_with_onednn_weight(
|
||||
primitive.execute(ideep::stream::default_stream(), args);
|
||||
return dim == 2 ? output : output.reshape(output_size);
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
template <bool ReluFused>
|
||||
at::Tensor PackedLinearWeightsACL::apply_impl(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) {
|
||||
const int64_t dim = input.dim();
|
||||
TORCH_CHECK(
|
||||
dim != 0, "qlinear (ACL): input dim should be at least 1, but got 0");
|
||||
TORCH_CHECK(
|
||||
input.scalar_type() == c10::ScalarType::QUInt8 ||
|
||||
input.scalar_type() == c10::ScalarType::QInt8,
|
||||
"qlinear (ACL): data type of input should be QUInt8 or QInt8.");
|
||||
|
||||
auto input_contig = input.expect_contiguous();
|
||||
|
||||
int64_t m = input.numel() / k_;
|
||||
double input_scale = input.q_scale();
|
||||
int64_t input_zero_point = input.q_zero_point();
|
||||
auto is_input_qint8 = input.scalar_type() == c10::ScalarType::QInt8;
|
||||
auto key = std::make_tuple(
|
||||
m,
|
||||
ReluFused,
|
||||
static_cast<int64_t>(at::get_num_threads()),
|
||||
input_scale,
|
||||
input_zero_point,
|
||||
output_scale,
|
||||
output_zero_point,
|
||||
is_input_qint8);
|
||||
|
||||
auto acl_gemm =
|
||||
get_acl_quant_matmul<at::native::acl_utils::StaticQuantMatmul>(key);
|
||||
if (acl_gemm) {
|
||||
acl_gemm->src_q_tensor.allocator()->import_memory(input_contig->data_ptr());
|
||||
|
||||
auto dst_dims = {m, n_};
|
||||
at::Tensor output = at::_empty_affine_quantized(
|
||||
dst_dims,
|
||||
at::device(c10::kCPU).dtype(
|
||||
is_input_qint8 ? c10::kQInt8 : c10::kQUInt8),
|
||||
output_scale,
|
||||
output_zero_point);
|
||||
|
||||
if (output.numel() == 0) {
|
||||
return output;
|
||||
}
|
||||
|
||||
acl_gemm->dst_q_tensor.allocator()->import_memory(output.data_ptr());
|
||||
|
||||
acl_gemm->gemm.run();
|
||||
|
||||
acl_gemm->src_q_tensor.allocator()->free();
|
||||
acl_gemm->dst_q_tensor.allocator()->free();
|
||||
|
||||
auto out_sizes = input.sizes().vec();
|
||||
out_sizes.back() = n_;
|
||||
|
||||
if (output.sizes().vec() == out_sizes)
|
||||
return output;
|
||||
return output.reshape(out_sizes);
|
||||
}
|
||||
// fallback to oneDNN in the unlikely scinario that ACL's validation fails
|
||||
if (ReluFused) {
|
||||
return PackedLinearWeightsOnednn::apply_relu(
|
||||
input, output_scale, output_zero_point);
|
||||
} else {
|
||||
return PackedLinearWeightsOnednn::apply(
|
||||
input, output_scale, output_zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor PackedLinearWeightsACL::apply(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) {
|
||||
return apply_impl</*ReluFused=*/false>(
|
||||
std::move(input), output_scale, output_zero_point);
|
||||
}
|
||||
|
||||
at::Tensor PackedLinearWeightsACL::apply_relu(
|
||||
at::Tensor input,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) {
|
||||
return apply_impl</*ReluFused=*/true>(
|
||||
std::move(input), output_scale, output_zero_point);
|
||||
}
|
||||
|
||||
#endif // AT_MKLDNN_ACL_ENABLED()
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
|
||||
namespace at::native {
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/ACLUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/quantized/library.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
@ -697,6 +698,135 @@ static at::Tensor linear_dynamic_fp16_with_onednn_weight(
|
||||
primitive.execute(ideep::stream::default_stream(), args);
|
||||
return dim == 2 ? output : output.reshape(output_size);
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
|
||||
template <bool ReluFused>
|
||||
at::Tensor PackedLinearWeightsACL::apply_dynamic_impl(
|
||||
at::Tensor input,
|
||||
bool reduce_range) {
|
||||
// Dynamic: fp32 * int8 -> fp32
|
||||
using at::Tensor;
|
||||
|
||||
TORCH_CHECK(
|
||||
input.dim() >= 2,
|
||||
"The dimension of input tensor should be larger than or equal to 2");
|
||||
TORCH_CHECK(
|
||||
input.scalar_type() == c10::ScalarType::Float,
|
||||
"qlinear_dynamic (ACL): data type of input should be float.");
|
||||
|
||||
auto input_contig = input.contiguous();
|
||||
const int64_t dim = input.dim();
|
||||
auto input_reshaped =
|
||||
dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});
|
||||
auto input_dims = input_reshaped.sizes().vec();
|
||||
|
||||
int64_t m = input_dims[0];
|
||||
auto key = std::make_tuple(
|
||||
m, /* M */
|
||||
ReluFused, /* FUSE_RELU */
|
||||
static_cast<int64_t>(at::get_num_threads()), /* NUM_THREADS */
|
||||
1, /* INPUT_SCALE */
|
||||
0, /* INPUT_OFFSET */
|
||||
1, /* OUTPUT_SCALE */
|
||||
0, /* OUTPUT_OFFSET */
|
||||
true /* SIGNED_INPUT */
|
||||
);
|
||||
auto acl_gemm =
|
||||
get_acl_quant_matmul<at::native::acl_utils::DynamicQuantMatmul>(key);
|
||||
|
||||
if (acl_gemm) {
|
||||
// Find quantization parameters
|
||||
float x_max = 0, x_min = 0;
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
// Use FBGEMM's FindMinMax if available since it's faster
|
||||
fbgemm::FindMinMax(
|
||||
/*m=*/input_contig.data_ptr<float>(),
|
||||
/*min=*/&x_min,
|
||||
/*max=*/&x_max,
|
||||
/*len=*/input.numel());
|
||||
#else
|
||||
if (input_contig.numel() > 0) {
|
||||
auto [t_min, t_max] = at::aminmax(input_contig);
|
||||
x_max = t_max.item<float>();
|
||||
x_min = t_min.item<float>();
|
||||
}
|
||||
#endif
|
||||
|
||||
auto q_params = quant_utils::ChooseQuantizationParams(
|
||||
/*min=*/x_min,
|
||||
/*max=*/x_max,
|
||||
/*qmin=*/std::numeric_limits<int8_t>::min(),
|
||||
/*qmax=*/std::numeric_limits<int8_t>::max(),
|
||||
/*preserve_sparsity=*/false,
|
||||
/*force_scale_power_of_two=*/false,
|
||||
/*reduce_range=*/reduce_range);
|
||||
|
||||
acl_gemm->src_tensor.allocator()->import_memory(
|
||||
(float*)input_contig.data_ptr());
|
||||
|
||||
acl_gemm->src_q_tensor.info()->set_quantization_info(
|
||||
arm_compute::QuantizationInfo(
|
||||
q_params.scale, q_params.zero_point, true));
|
||||
|
||||
// quantize src tensor: fp32 -> s8
|
||||
acl_gemm->quant.run();
|
||||
|
||||
// allocation for fp32 out tensor
|
||||
auto output = at::empty({m, n_}, input.options().dtype(at::kFloat));
|
||||
if (output.numel() == 0)
|
||||
return output;
|
||||
|
||||
// We set the offset to "-zero_point" for the GEMM, but to "zero_point" for
|
||||
// the quantization layer This is a known inconsistency in ACL.
|
||||
acl_gemm->src_q_tensor.info()->set_quantization_info(
|
||||
arm_compute::QuantizationInfo(
|
||||
q_params.scale, -q_params.zero_point, true));
|
||||
|
||||
acl_gemm->dst_tensor.allocator()->import_memory((float*)output.data_ptr());
|
||||
|
||||
// s8 src, s8 wei -> f32 dst
|
||||
acl_gemm->gemm.run();
|
||||
|
||||
if (acl_gemm->relu.has_value()) {
|
||||
acl_gemm->relu->run();
|
||||
}
|
||||
|
||||
// this will not free memory, it will just tell ACL that we're no longer
|
||||
// using the pointer
|
||||
acl_gemm->src_tensor.allocator()->free();
|
||||
acl_gemm->dst_tensor.allocator()->free();
|
||||
|
||||
auto out_sizes = input.sizes().vec();
|
||||
out_sizes.back() = n_;
|
||||
if (output.sizes().vec() == out_sizes)
|
||||
return output;
|
||||
return output.reshape(out_sizes);
|
||||
}
|
||||
|
||||
// fallback to oneDNN in the unlikely scinario that ACL's validation fails
|
||||
if (ReluFused) {
|
||||
return PackedLinearWeightsOnednn::apply_dynamic_relu(input, reduce_range);
|
||||
} else {
|
||||
return PackedLinearWeightsOnednn::apply_dynamic(input, reduce_range);
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor PackedLinearWeightsACL::apply_dynamic(
|
||||
at::Tensor input,
|
||||
bool reduce_range) {
|
||||
return apply_dynamic_impl</*ReluFused=*/false>(
|
||||
std::move(input), reduce_range);
|
||||
}
|
||||
|
||||
at::Tensor PackedLinearWeightsACL::apply_dynamic_relu(
|
||||
at::Tensor input,
|
||||
bool reduce_range) {
|
||||
return apply_dynamic_impl</*ReluFused=*/true>(std::move(input), reduce_range);
|
||||
}
|
||||
|
||||
#endif // #if AT_MKLDNN_ACL_ENABLED()
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
|
||||
namespace at::native {
|
||||
|
||||
@ -1,15 +1,16 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/Context.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/cpp_custom_type_hack.h>
|
||||
#include <ATen/Context.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
#include <ATen/native/quantized/cpu/ACLUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||
#include <ATen/native/quantized/cpu/init_qnnpack.h>
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/quantized/library.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <torch/custom_class.h>
|
||||
#include <torch/library.h>
|
||||
@ -279,12 +280,15 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeightsOnednn::prepack(
|
||||
packed_bias.init(bias_desc, b.data_ptr());
|
||||
onednn_bias = std::optional<ideep::tensor>(packed_bias);
|
||||
}
|
||||
auto ret_ptr = c10::make_intrusive<PackedLinearWeightsOnednn>(
|
||||
PackedLinearWeightsOnednn{
|
||||
std::move(weight_ptr),
|
||||
onednn_bias,
|
||||
weight,
|
||||
bias});
|
||||
#if AT_MKLDNN_ACL_ENABLED()
|
||||
if (qtype == c10::kPerTensorAffine) {
|
||||
return c10::make_intrusive<PackedLinearWeightsACL>(PackedLinearWeightsACL{
|
||||
std::move(weight_ptr), onednn_bias, weight, bias});
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ACL_ENABLED()
|
||||
auto ret_ptr =
|
||||
c10::make_intrusive<PackedLinearWeightsOnednn>(PackedLinearWeightsOnednn{
|
||||
std::move(weight_ptr), onednn_bias, weight, bias});
|
||||
return ret_ptr;
|
||||
}
|
||||
|
||||
|
||||
@ -391,6 +391,12 @@ elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV)
|
||||
endif()
|
||||
target_link_libraries(pytorch_qnnpack PRIVATE fxdiv)
|
||||
|
||||
# -- [ CMake-4 compat mode
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0" AND NOT (USE_SYSTEM_PSIMD OR USE_SYSTEM_FP16))
|
||||
message(WARNING "Ancient psimd/FP16 forces CMake compatibility")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
|
||||
# ---[ Configure psimd
|
||||
if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD)
|
||||
add_subdirectory(
|
||||
@ -423,6 +429,11 @@ elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
|
||||
endif()
|
||||
target_link_libraries(pytorch_qnnpack PRIVATE fp16)
|
||||
|
||||
# -- [ Undo cmake-4 compat mode
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
endif()
|
||||
|
||||
install(TARGETS pytorch_qnnpack
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
|
||||
@ -759,6 +759,28 @@ Tensor scaled_dot_product_attention(
|
||||
&& !(GradMode::is_enabled() && any_inputs_require_grad)
|
||||
&& (all_contiguous || mps::is_macos_13_or_newer(mps::MacOSVersion::MACOS_VER_15_0_PLUS))
|
||||
&& !any_nested) {
|
||||
if (enable_gqa) {
|
||||
int64_t q_heads = query_.size(-3);
|
||||
int64_t k_heads = key.size(-3);
|
||||
int64_t repeat_factor = q_heads / k_heads;
|
||||
|
||||
if (repeat_factor > 1) {
|
||||
TORCH_CHECK(q_heads % k_heads == 0,
|
||||
"For GQA, the query tensor's head dimension (" + std::to_string(q_heads) +
|
||||
") must be divisible by the key tensor's head dimension (" + std::to_string(k_heads) + ").");
|
||||
auto repeated_key = key.repeat_interleave(repeat_factor, /*dim=*/-3);
|
||||
auto repeated_value = value.repeat_interleave(repeat_factor, /*dim=*/-3);
|
||||
return std::get<0>(at::_scaled_dot_product_attention_math_for_mps(
|
||||
query_,
|
||||
repeated_key,
|
||||
repeated_value,
|
||||
attn_mask,
|
||||
dropout_p,
|
||||
is_causal,
|
||||
std::nullopt, /*dropout_mask*/
|
||||
scale));
|
||||
}
|
||||
}
|
||||
return std::get<0>(at::_scaled_dot_product_attention_math_for_mps(
|
||||
query_,
|
||||
key,
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
add_loop_eager,compile_time_instruction_count,2806000000,0.015
|
||||
add_loop_eager,compile_time_instruction_count,2869000000,0.015
|
||||
|
||||
|
||||
|
||||
add_loop_eager_dynamic,compile_time_instruction_count,5460000000,0.025
|
||||
add_loop_eager_dynamic,compile_time_instruction_count,5547000000,0.025
|
||||
|
||||
|
||||
|
||||
add_loop_inductor,compile_time_instruction_count,27520000000,0.015
|
||||
add_loop_inductor,compile_time_instruction_count,28130000000,0.015
|
||||
|
||||
|
||||
|
||||
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,40410000000,0.025
|
||||
add_loop_inductor_dynamic_gpu,compile_time_instruction_count,41610000000,0.025
|
||||
|
||||
|
||||
|
||||
add_loop_inductor_gpu,compile_time_instruction_count,23970000000,0.015
|
||||
add_loop_inductor_gpu,compile_time_instruction_count,24570000000,0.015
|
||||
|
||||
|
||||
|
||||
@ -22,11 +22,11 @@ basic_modules_ListOfLinears_eager,compile_time_instruction_count,953800000,0.015
|
||||
|
||||
|
||||
|
||||
basic_modules_ListOfLinears_inductor,compile_time_instruction_count,17070000000,0.015
|
||||
basic_modules_ListOfLinears_inductor,compile_time_instruction_count,17600000000,0.015
|
||||
|
||||
|
||||
|
||||
basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,15320000000,0.015
|
||||
basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,15780000000,0.015
|
||||
|
||||
|
||||
|
||||
@ -34,32 +34,32 @@ basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,97140000
|
||||
|
||||
|
||||
|
||||
update_hint_regression,compile_time_instruction_count,1523000000,0.02
|
||||
update_hint_regression,compile_time_instruction_count,1576000000,0.02
|
||||
|
||||
|
||||
|
||||
sum_floordiv_regression,compile_time_instruction_count,1026000000,0.015
|
||||
sum_floordiv_regression,compile_time_instruction_count,1044000000,0.015
|
||||
|
||||
|
||||
|
||||
symint_sum,compile_time_instruction_count,3013000000,0.015
|
||||
symint_sum,compile_time_instruction_count,3101000000,0.015
|
||||
|
||||
|
||||
|
||||
aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,1964000000,0.015
|
||||
aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2005000000,0.015
|
||||
|
||||
|
||||
|
||||
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5672000000,0.015
|
||||
aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5784000000,0.015
|
||||
|
||||
|
||||
|
||||
aotdispatcher_partitioner_cpu,compile_time_instruction_count,7752000000,0.015
|
||||
aotdispatcher_partitioner_cpu,compile_time_instruction_count,8300000000,0.015
|
||||
|
||||
|
||||
|
||||
aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3537000000,0.015
|
||||
aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3678000000,0.015
|
||||
|
||||
|
||||
|
||||
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,9662000000,0.015
|
||||
aotdispatcher_training_subclass_cpu,compile_time_instruction_count,9982000000,0.015
|
||||
|
||||
|
@ -8,9 +8,9 @@ namespace metal {
|
||||
|
||||
template <typename T>
|
||||
opmath_t<T> threadgroup_sum(threadgroup T* data, unsigned size) {
|
||||
opmath_t<T> rc = data[0];
|
||||
// TODO: This should be moved to the callee
|
||||
::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup);
|
||||
opmath_t<T> rc = data[0];
|
||||
// TODO: Use `simd_shuffle_down`
|
||||
for (unsigned idx = 1; idx < size; ++idx) {
|
||||
rc += data[idx];
|
||||
@ -20,9 +20,9 @@ opmath_t<T> threadgroup_sum(threadgroup T* data, unsigned size) {
|
||||
|
||||
template <typename T>
|
||||
opmath_t<T> threadgroup_prod(threadgroup T* data, unsigned size) {
|
||||
opmath_t<T> rc = data[0];
|
||||
// TODO: This should be moved to the callee
|
||||
::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup);
|
||||
opmath_t<T> rc = data[0];
|
||||
for (unsigned idx = 1; idx < size; ++idx) {
|
||||
rc *= data[idx];
|
||||
}
|
||||
|
||||
@ -1055,6 +1055,7 @@ endif()
|
||||
if(USE_XPU)
|
||||
list(APPEND Caffe2_XPU_SRCS ${GENERATED_CXX_TORCH_XPU})
|
||||
list(APPEND Caffe2_XPU_SRCS ${TORCH_SRC_DIR}/csrc/inductor/aoti_torch/shim_xpu.cpp)
|
||||
list(APPEND Caffe2_XPU_SRCS ${TORCH_SRC_DIR}/csrc/inductor/aoti_runner/model_container_runner_xpu.cpp)
|
||||
add_library(torch_xpu ${Caffe2_XPU_SRCS})
|
||||
torch_compile_options(torch_xpu) # see cmake/public/utils.cmake
|
||||
target_compile_definitions(torch_xpu PRIVATE USE_XPU)
|
||||
|
||||
@ -388,9 +388,9 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION")
|
||||
list(APPEND CPU_CAPABILITY_NAMES "SVE256")
|
||||
if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
else()
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
endif()
|
||||
endif(CXX_SVE256_FOUND)
|
||||
endif(CXX_SVE_FOUND)
|
||||
|
||||
@ -784,7 +784,14 @@ if(USE_NUMA)
|
||||
endif()
|
||||
|
||||
if(USE_ITT)
|
||||
find_package(ITT)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "ITT is only cmake-2.8 compatible")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
find_package(ITT)
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
else()
|
||||
find_package(ITT)
|
||||
endif()
|
||||
if(ITT_FOUND)
|
||||
include_directories(SYSTEM ${ITT_INCLUDE_DIR})
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS ${ITT_LIBRARIES})
|
||||
@ -809,9 +816,18 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
|
||||
|
||||
set(FP16_BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(FP16_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||
add_subdirectory(
|
||||
"${FP16_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "FP16 is only cmake-2.8 compatible")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
add_subdirectory(
|
||||
"${FP16_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
else()
|
||||
add_subdirectory(
|
||||
"${FP16_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
|
||||
endif()
|
||||
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
|
||||
add_library(fp16 STATIC "/usr/include/fp16.h")
|
||||
set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
|
||||
@ -1150,7 +1166,14 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
|
||||
|
||||
# Tensorpipe uses cuda_add_library
|
||||
torch_update_find_cuda_flags()
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "Archived TensorPipe forces CMake compatibility mode")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
endif()
|
||||
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS nlohmann)
|
||||
@ -1192,7 +1215,15 @@ if(USE_GLOO)
|
||||
set(NCCL_EXTERNAL ON)
|
||||
endif()
|
||||
set(GLOO_USE_CUDA_TOOLKIT ON CACHE BOOL "" FORCE)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
# Remove me when https://github.com/facebookincubator/gloo/pull/424 is landed
|
||||
message(WARNING "Downgrading cmake-policy-version for gloo build")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
else()
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo)
|
||||
endif()
|
||||
# Here is a little bit hacky. We have to put PROJECT_BINARY_DIR in front
|
||||
# of PROJECT_SOURCE_DIR with/without conda system. The reason is that
|
||||
# gloo generates a new config.h in the binary diretory.
|
||||
|
||||
7
cmake/External/nnpack.cmake
vendored
7
cmake/External/nnpack.cmake
vendored
@ -62,9 +62,16 @@ if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAM
|
||||
set(NNPACK_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(PTHREADPOOL_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "Ancient nnpack forces CMake compatibility")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
add_subdirectory(
|
||||
"${NNPACK_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/NNPACK")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
endif()
|
||||
# We build static versions of nnpack and pthreadpool but link
|
||||
# them into a shared library for Caffe2, so they need PIC.
|
||||
set_property(TARGET nnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
@ -52,7 +52,14 @@ macro(custom_protobuf_find)
|
||||
endif(MSVC_Z7_OVERRIDE)
|
||||
endif(MSVC)
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "Ancient protobuf forces CMake compatibility")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake)
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
else()
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake)
|
||||
endif()
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})
|
||||
|
||||
|
||||
@ -157,7 +157,15 @@ if(HIP_FOUND)
|
||||
find_package_and_print_version(hipcub REQUIRED)
|
||||
find_package_and_print_version(rocthrust REQUIRED)
|
||||
find_package_and_print_version(hipsolver REQUIRED)
|
||||
find_package_and_print_version(hiprtc REQUIRED)
|
||||
# workaround cmake 4 build issue
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "Work around hiprtc cmake failure for cmake >= 4")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
find_package_and_print_version(hiprtc REQUIRED)
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
else()
|
||||
find_package_and_print_version(hiprtc REQUIRED)
|
||||
endif()
|
||||
find_package_and_print_version(hipblaslt REQUIRED)
|
||||
|
||||
if(UNIX)
|
||||
|
||||
@ -162,10 +162,9 @@ meets the following requirements:
|
||||
|
||||
1. **GLIBC Version**:
|
||||
- GLIBC 2.29 or newer for cxx11 ABI version
|
||||
- GLIBC 2.17 or newer for pre-cxx11 ABI version
|
||||
|
||||
2. **GCC Version**:
|
||||
- GCC 9 or newer for cxx11 and pre-cxx11 ABI versions
|
||||
- GCC 9 or newer for cxx11
|
||||
|
||||
Visual Studio Extension
|
||||
-----------------------
|
||||
|
||||
@ -4,27 +4,46 @@ Getting Started on Intel GPU
|
||||
Hardware Prerequisite
|
||||
---------------------
|
||||
|
||||
For Intel Data Center GPU
|
||||
|
||||
.. list-table::
|
||||
:widths: 50 50
|
||||
:widths: 50 50 50 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Supported OS
|
||||
- Validated Hardware
|
||||
* - Linux
|
||||
- Intel® Client GPUs / Intel® Data Center GPU Max Series
|
||||
* - Windows
|
||||
- Intel® Client GPUs
|
||||
* - WSL2 (experimental feature)
|
||||
- Intel® Client GPUs
|
||||
* - Device
|
||||
- Red Hat* Enterprise Linux* 9.2
|
||||
- SUSE Linux Enterprise Server* 15 SP5
|
||||
- Ubuntu* Server 22.04 (>= 5.15 LTS kernel)
|
||||
* - Intel® Data Center GPU Max Series (CodeName: Ponte Vecchio)
|
||||
- yes
|
||||
- yes
|
||||
- yes
|
||||
|
||||
Intel GPUs support (Prototype) is ready in PyTorch* 2.6 for Intel® Client GPUs and Intel® Data Center GPU Max Series on both Linux and Windows, which brings Intel GPUs and the SYCL* software stack into the official PyTorch stack with consistent user experience to embrace more AI application scenarios.
|
||||
For Intel Client GPU
|
||||
|
||||
+-------------------------------------+----------------------------------------------------------------------------------------------+
|
||||
| Supported OS | Validated Hardware |
|
||||
+=====================================+==============================================================================================+
|
||||
|| Windows 10/11 & Ubuntu 24.10 || Intel® Arc A-Series Graphics (CodeName: Alchemist) |
|
||||
|| || Intel® Arc B-Series Graphics (CodeName: Battlemage) |
|
||||
|| || Intel® Core™ Ultra Processors with Intel® Arc™ Graphics (CodeName: Meteor Lake) |
|
||||
|| || Intel® Core™ Ultra 200V Series with Intel® Arc™ Graphics (CodeName: Lunar Lake) |
|
||||
|| || Intel® Core™ Ultra Series 2 Processors with Intel® Arc™ Graphics (CodeName: Arrow Lake) |
|
||||
+-------------------------------------+----------------------------------------------------------------------------------------------+
|
||||
|| Ubuntu 24.04 & WSL2 (Ubuntu 24.04) || Intel® Arc A-Series Graphics (CodeName: Alchemist) |
|
||||
|| || Intel® Core™ Ultra Processors with Intel® Arc™ Graphics (CodeName: Meteor Lake) |
|
||||
|| || Intel® Core™ Ultra 200V Series with Intel® Arc™ Graphics (CodeName: Lunar Lake) |
|
||||
|| || Intel® Core™ Ultra Series 2 Processors with Intel® Arc™ Graphics (CodeName: Arrow Lake) |
|
||||
+-------------------------------------+----------------------------------------------------------------------------------------------+
|
||||
|
||||
Intel GPUs support (Prototype) is ready from PyTorch* 2.5 for Intel® Client GPUs and Intel® Data Center GPU Max Series on both Linux and Windows, which brings Intel GPUs and the SYCL* software stack into the official PyTorch stack with consistent user experience to embrace more AI application scenarios.
|
||||
|
||||
Software Prerequisite
|
||||
---------------------
|
||||
|
||||
To use PyTorch on Intel GPUs, you need to install the Intel GPUs driver first. For installation guide, visit `Intel GPUs Driver Installation <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html#driver-installation>`_.
|
||||
To use PyTorch on Intel GPUs, you need to install the Intel GPUs driver first. For installation guide, visit `Intel GPUs Driver Installation <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu.html#driver-installation>`_.
|
||||
|
||||
Please skip the Intel® Deep Learning Essentials installation section if you install from binaries. For building from source, please refer to `PyTorch Installation Prerequisites for Intel GPUs <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html>`_ for both Intel GPU Driver and Intel® Deep Learning Essentials Installation.
|
||||
Please skip the Intel® Deep Learning Essentials installation section if you install from binaries. For building from source, please refer to `PyTorch Installation Prerequisites for Intel GPUs <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu.html>`_ for both Intel GPU Driver and Intel® Deep Learning Essentials Installation.
|
||||
|
||||
|
||||
Installation
|
||||
@ -33,7 +52,7 @@ Installation
|
||||
Binaries
|
||||
^^^^^^^^
|
||||
|
||||
Now that we have `Intel GPU Driver <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html#driver-installation>`_ installed, use the following commands to install ``pytorch``, ``torchvision``, ``torchaudio`` on Linux.
|
||||
Now that we have `Intel GPU Driver <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu.html#driver-installation>`_ installed, use the following commands to install ``pytorch``, ``torchvision``, ``torchaudio`` on Linux.
|
||||
|
||||
For release wheels
|
||||
|
||||
@ -52,7 +71,7 @@ For nightly wheels
|
||||
From Source
|
||||
^^^^^^^^^^^
|
||||
|
||||
Now that we have `Intel GPU Driver and Intel® Deep Learning Essentials <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html>`_ installed. Follow guides to build ``pytorch``, ``torchvision``, ``torchaudio`` from source.
|
||||
Now that we have `Intel GPU Driver and Intel® Deep Learning Essentials <https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu.html>`_ installed. Follow guides to build ``pytorch``, ``torchvision``, ``torchaudio`` from source.
|
||||
|
||||
Build from source for ``torch`` refer to `PyTorch Installation Build from source <https://github.com/pytorch/pytorch?tab=readme-ov-file#from-source>`_.
|
||||
|
||||
@ -88,7 +107,7 @@ If you are migrating code from ``cuda``, you would change references from ``cuda
|
||||
The following points outline the support and limitations for PyTorch with Intel GPU:
|
||||
|
||||
#. Both training and inference workflows are supported.
|
||||
#. Both eager mode and ``torch.compile`` is supported.
|
||||
#. Both eager mode and ``torch.compile`` is supported. The feature ``torch.compile`` is also supported on Windows from PyTorch* 2.7 with Intel GPU, refer to `How to Use Inductor on Windows with CPU/XPU <https://pytorch.org/tutorials/prototype/inductor_windows_cpu.html>`_.
|
||||
#. Data types such as FP32, BF16, FP16, and Automatic Mixed Precision (AMP) are all supported.
|
||||
|
||||
Examples
|
||||
|
||||
34
docs/source/notes/libtorch_stable_abi.md
Normal file
34
docs/source/notes/libtorch_stable_abi.md
Normal file
@ -0,0 +1,34 @@
|
||||
# LibTorch Stable ABI
|
||||
|
||||
This note will eventually contain more details on how to use the APIs in torch/csrc/stable. For the moment, it contains a table of internal representations:
|
||||
1. type in custom extension: type used within the end user custom library.
|
||||
2. StableIValue representation: a stable conversion of the type to liaison between the user model vs libtorch.so in an ABI-stable manner.
|
||||
3. type in libtorch: type used within libtorch.so (or any code binary locked with libtorch).
|
||||
4. Schema Type: type as described by the schema, which we hail as the source of truth for both ATen ops in native_functions.yaml and for user defined custom operators registered to the dispatcher via TORCH_LIBRARY or torch.library.
|
||||
|
||||
| type in custom extension | StableIValue representation | type in libtorch | Schema Type |
|
||||
| -------- | ------- | ------- | ------- |
|
||||
| std::optional\<S> | \*reinterpret_cast\<(StableIValue\*)\*>, pointer to a StableIValue recursively defined | std::optional\<T> | Type? |
|
||||
| std::nullopt | \*reinterpret_cast\<nullptr_t\*> | IValue() | None |
|
||||
| RAIIATH | \*reinterpret_cast\<uint64_t\*> of AtenTensorHandle | at::Tensor | Tensor |
|
||||
| int32_t | \*reinterpret_cast\<uint64_t\*> | at::ScalarType | ScalarType |
|
||||
| int32_t | \*reinterpret_cast\<uint64_t\*> | at::Layout | Layout |
|
||||
| int32_t | \*reinterpret_cast\<uint64_t\*> | at::MemoryFormat | MemoryFormat |
|
||||
| bool | \*reinterpret_cast\<uint64_t\*> | bool | bool |
|
||||
| int64_t | \*reinterpret_cast\<uint64_t\*> | int64_t | int |
|
||||
| double | \*reinterpret_cast\<uint64_t\*> | double | float |
|
||||
| ? | ? | c10::Device | Device |
|
||||
| ? | ? | c10::Stream | Stream |
|
||||
| ? | ? | c10::complex<double> | complex |
|
||||
| ? | ? | at::Scalar | Scalar |
|
||||
| ? | ? | std::string/const char*/ivalue::ConstantString | str |
|
||||
| ? | ? | at::Storage | Storage |
|
||||
| ? | ? | at::Generator | Generator |
|
||||
| ? | ? | c10::List\<T> | Type[] |
|
||||
| ? | ? | ivalue::Tuple\<T> | (Type, ...) |
|
||||
| ? | ? | c10::SymInt | SymInt |
|
||||
| ? | ? | c10::SymFloat | SymFloat |
|
||||
| ? | ? | c10::SymBool | SymBool |
|
||||
| ? | ? | at::QScheme | QScheme |
|
||||
|
||||
Our confidently supported types are the ones in the table that have completed rows. For a limited set of use cases, we also implicitly support any literal type that is representable within 64 bits as StableIValues, as the default reinterpret_cast will succeed. You can work with StableIValue abstractions in your custom kernel for types such as c10::Device even if there is no standard defined representation of device in custom extensions. For example, a custom operator can take as argument a StableIValue device and directly pass it through to an aten operator with aoti_torch_call_dispatcher.
|
||||
@ -88,6 +88,7 @@ also be interested in reading our `development wiki <https://github.com/pytorch/
|
||||
:hidden:
|
||||
|
||||
onnx_dynamo
|
||||
onnx_verification
|
||||
onnx_dynamo_onnxruntime_backend
|
||||
onnx_torchscript
|
||||
|
||||
@ -99,6 +100,7 @@ also be interested in reading our `development wiki <https://github.com/pytorch/
|
||||
.. py:module:: torch.onnx.symbolic_helper
|
||||
.. py:module:: torch.onnx.symbolic_opset10
|
||||
.. py:module:: torch.onnx.symbolic_opset11
|
||||
.. py:module:: torch.onnx.symbolic_opset12
|
||||
.. py:module:: torch.onnx.symbolic_opset13
|
||||
.. py:module:: torch.onnx.symbolic_opset14
|
||||
.. py:module:: torch.onnx.symbolic_opset15
|
||||
@ -111,5 +113,3 @@ also be interested in reading our `development wiki <https://github.com/pytorch/
|
||||
.. py:module:: torch.onnx.symbolic_opset8
|
||||
.. py:module:: torch.onnx.symbolic_opset9
|
||||
.. py:module:: torch.onnx.utils
|
||||
.. py:module:: torch.onnx.verification
|
||||
.. py:module:: torch.onnx.symbolic_opset12
|
||||
@ -701,7 +701,6 @@ Functions
|
||||
.. autofunction:: unregister_custom_op_symbolic
|
||||
.. autofunction:: select_model_mode_for_export
|
||||
.. autofunction:: is_in_onnx_export
|
||||
.. autofunction:: torch.onnx.verification.find_mismatch
|
||||
|
||||
Classes
|
||||
^^^^^^^
|
||||
@ -712,5 +711,3 @@ Classes
|
||||
:template: classtemplate.rst
|
||||
|
||||
JitScalarType
|
||||
verification.GraphInfo
|
||||
verification.VerificationOptions
|
||||
|
||||
26
docs/source/onnx_verification.rst
Normal file
26
docs/source/onnx_verification.rst
Normal file
@ -0,0 +1,26 @@
|
||||
torch.onnx.verification
|
||||
=======================
|
||||
|
||||
.. automodule:: torch.onnx.verification
|
||||
|
||||
.. autofunction:: verify_onnx_program
|
||||
|
||||
.. autoclass:: VerificationInfo
|
||||
:members:
|
||||
|
||||
.. autofunction:: verify
|
||||
|
||||
Deprecated
|
||||
----------
|
||||
|
||||
The following classes and functions are deprecated.
|
||||
|
||||
.. Some deprecated members are not publicly shown
|
||||
.. py:class:: check_export_model_diff
|
||||
.. py:class:: GraphInfo
|
||||
.. py:class:: GraphInfoPrettyPrinter
|
||||
.. py:class:: OnnxBackend
|
||||
.. py:class:: OnnxTestCaseRepro
|
||||
.. py:class:: VerificationOptions
|
||||
.. py:function:: find_mismatch
|
||||
.. py:function:: verify_aten_graph
|
||||
@ -38,7 +38,8 @@ package.
|
||||
the following code will compile the model into a shared library for CUDA execution.
|
||||
Otherwise, the compiled artifact will run on CPU. For better performance during CPU inference,
|
||||
it is suggested to enable freezing by setting ``export TORCHINDUCTOR_FREEZING=1``
|
||||
before running the Python script below.
|
||||
before running the Python script below. The same behavior works in an environment with Intel®
|
||||
GPU as well.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
||||
@ -4,9 +4,9 @@ Profiling to understand torch.compile performance
|
||||
What to use torch.profiler for:
|
||||
-------------------------------
|
||||
|
||||
torch.profiler is helpful for understanding the performance of your program at a kernel-level granularity - for example, it can show graph breaks and GPU utilization at the level of the program. The data provided by the profiler can often help users understand where to investigate further to understand model performance.
|
||||
torch.profiler is helpful for understanding the performance of your program at a kernel-level granularity - for example, it can show graph breaks and resources utilization at the level of the program. The data provided by the profiler can often help users understand where to investigate further to understand model performance.
|
||||
|
||||
To understand kernel-level performance, other tools exist. NVIDIA's ncu tool can be used, or :ref:`inductor's profiling tools <torchinductor-gpu-profiling>`.
|
||||
To understand kernel-level performance, other tools exist, such as `Nvidia Nsight compute tool <https://developer.nvidia.com/nsight-compute>`_, `AMD Omnitrace <https://rocm.docs.amd.com/projects/omnitrace/en/latest/>`_, Intel® VTune™ Profiler or :ref:`inductor's profiling tools <torchinductor-gpu-profiling>` can be used.
|
||||
|
||||
See also the `general pytorch profiler guide <https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html>`_.
|
||||
|
||||
@ -24,8 +24,10 @@ Basics of using torch.profiler and viewing traces
|
||||
import torch
|
||||
from torchvision.models import resnet18
|
||||
|
||||
model = resnet18().cuda()
|
||||
inputs = [torch.randn((5, 3, 224, 224), device='cuda') for _ in range(10)]
|
||||
device = 'cuda' # or 'cpu', 'xpu', etc.
|
||||
model = resnet18().to(device)
|
||||
|
||||
inputs = [torch.randn((5, 3, 224, 224), device=device) for _ in range(10)]
|
||||
|
||||
model_c = torch.compile(model)
|
||||
|
||||
@ -52,9 +54,9 @@ Here, we observe:
|
||||
* CompiledFunction and CompiledFunctionBackward events, which correspond to the dynamo-compiled regions.
|
||||
* CPU events at the top, and GPU events at the bottom.
|
||||
|
||||
**Flows between CPU and GPU events**
|
||||
**Flows between CPU and accelerator events**
|
||||
|
||||
Every kernel on the GPU occurs after being launched by code running on the CPU. The profiler can draw connections (i.e. “flows”) between the GPU and CPU events to show which CPU event launched a GPU kernel. This is particularly helpful because, with a few exceptions, GPU kernels are launched asynchronously.
|
||||
Every kernel on the accelerator occurs after being launched by code running on the CPU. The profiler can draw connections (i.e. “flows”) between the accelerator and CPU events to show which CPU event launched a accelerator kernel. This is particularly helpful because, with a few exceptions, accelerator kernels are launched asynchronously.
|
||||
|
||||
To view a flow connection, click on a GPU kernel and click “ac2g”:
|
||||
|
||||
@ -90,8 +92,10 @@ See an example below:
|
||||
import torch
|
||||
from torchvision.models import resnet18
|
||||
|
||||
model = resnet18().cuda()
|
||||
inputs = [torch.randn((5, 3, 224, 224), device='cuda') for _ in range(10)]
|
||||
# user can switch between cuda and xpu
|
||||
device = 'cuda'
|
||||
model = resnet18().to(device)
|
||||
inputs = [torch.randn((5, 3, 224, 224), device=device) for _ in range(10)]
|
||||
|
||||
model_c = torch.compile(model)
|
||||
|
||||
@ -103,7 +107,7 @@ See an example below:
|
||||
def fn(x):
|
||||
return x.sin().relu()
|
||||
|
||||
x = torch.rand((2, 2), device='cuda', requires_grad=True)
|
||||
x = torch.rand((2, 2), device=device, requires_grad=True)
|
||||
fn_c = torch.compile(fn)
|
||||
out = fn_c(x)
|
||||
out.sum().backward()
|
||||
@ -120,6 +124,7 @@ See an example below:
|
||||
.. figure:: _static/img/profiling_torch_compile/compilation_profiling.png
|
||||
:alt: A visualization in the chrome://trace viewer, showing dynamo and inductor compilation steps
|
||||
|
||||
|
||||
Note a few things:
|
||||
|
||||
* The first invocation should occur *during* profiling in order to capture compilation
|
||||
@ -146,6 +151,8 @@ See the synthetic example below for a demonstration:
|
||||
|
||||
import torch
|
||||
import torch._dynamo
|
||||
# user can switch between cuda and xpu
|
||||
device = 'cuda'
|
||||
|
||||
class ModelWithBreaks(torch.nn.Module):
|
||||
def __init__(self):
|
||||
@ -172,9 +179,8 @@ See the synthetic example below for a demonstration:
|
||||
mod4 = self.mod4(mod3)
|
||||
return mod4
|
||||
|
||||
|
||||
model = ModelWithBreaks().cuda()
|
||||
inputs = [torch.randn((128, 128), device='cuda') for _ in range(10)]
|
||||
model = ModelWithBreaks().to(device)
|
||||
inputs = [torch.randn((128, 128), device=device) for _ in range(10)]
|
||||
|
||||
model_c = torch.compile(model)
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <torch/csrc/inductor/aoti_package/model_package_loader.h>
|
||||
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
|
||||
@ -76,6 +77,32 @@ void test_aoti_script(const std::string& device) {
|
||||
}
|
||||
}
|
||||
|
||||
void test_aoti_package_loader(
|
||||
const std::string& device,
|
||||
bool use_runtime_constant_folding) {
|
||||
torch::NoGradGuard no_grad;
|
||||
|
||||
std::string data_path =
|
||||
(std::filesystem::path(STRINGIZE(CMAKE_CURRENT_BINARY_DIR)) / "data.pt")
|
||||
.string();
|
||||
torch::jit::script::Module data_loader = torch::jit::load(data_path);
|
||||
std::string suffix = use_runtime_constant_folding
|
||||
? device + "_use_runtime_constant_folding"
|
||||
: device;
|
||||
std::string path_attr = "pt2_package_path_" + suffix;
|
||||
std::string inputs_attr = "inputs_" + suffix;
|
||||
std::string outputs_attr = "outputs_" + suffix;
|
||||
const auto& pt2_package_path =
|
||||
data_loader.attr(path_attr.c_str()).toStringRef();
|
||||
const auto& ref_output_tensors =
|
||||
data_loader.attr(outputs_attr.c_str()).toTensorList().vec();
|
||||
|
||||
torch::inductor::AOTIModelPackageLoader runner(pt2_package_path);
|
||||
auto actual_output_tensors =
|
||||
runner.run(data_loader.attr(inputs_attr.c_str()).toTensorList().vec());
|
||||
ASSERT_TRUE(torch::allclose(ref_output_tensors[0], actual_output_tensors[0]));
|
||||
}
|
||||
|
||||
void test_aoti_constants_update(
|
||||
const std::string& device,
|
||||
bool use_runtime_constant_folding) {
|
||||
@ -300,6 +327,10 @@ TEST(AotInductorTest, BasicScriptTestCpu) {
|
||||
test_aoti_script("cpu");
|
||||
}
|
||||
|
||||
TEST(AotInductorTest, BasicPackageLoaderTestCpu) {
|
||||
test_aoti_package_loader("cpu", false);
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
TEST(AotInductorTest, BasicTestCuda) {
|
||||
test_aoti("cuda", true);
|
||||
@ -310,6 +341,10 @@ TEST(AotInductorTest, BasicScriptTestCuda) {
|
||||
test_aoti_script("cuda");
|
||||
}
|
||||
|
||||
TEST(AotInductorTest, BasicPackageLoaderTestCuda) {
|
||||
test_aoti_package_loader("cuda", false);
|
||||
}
|
||||
|
||||
TEST(AotInductorTest, RuntimeUpdateConstantsCuda) {
|
||||
test_aoti_constants_update("cuda", true);
|
||||
}
|
||||
|
||||
@ -57,6 +57,17 @@ def generate_basic_tests():
|
||||
"aot_inductor.use_runtime_constant_folding": use_runtime_constant_folding
|
||||
},
|
||||
)
|
||||
# Also store a .pt2 file using the aoti_compile_and_package API
|
||||
pt2_package_path = torch._inductor.aoti_compile_and_package(
|
||||
torch.export.export(
|
||||
model,
|
||||
(x,),
|
||||
dynamic_shapes=dynamic_shapes,
|
||||
),
|
||||
inductor_configs={
|
||||
"aot_inductor.use_runtime_constant_folding": use_runtime_constant_folding
|
||||
},
|
||||
)
|
||||
|
||||
suffix = f"{device}"
|
||||
if use_runtime_constant_folding:
|
||||
@ -64,6 +75,7 @@ def generate_basic_tests():
|
||||
data.update(
|
||||
{
|
||||
f"model_so_path_{suffix}": model_so_path,
|
||||
f"pt2_package_path_{suffix}": pt2_package_path,
|
||||
f"inputs_{suffix}": [x],
|
||||
f"outputs_{suffix}": [ref_output],
|
||||
f"w_pre_{suffix}": model.w_pre,
|
||||
@ -86,10 +98,15 @@ def generate_test_with_additional_tensors():
|
||||
torch._dynamo.reset()
|
||||
with torch.no_grad():
|
||||
model_so_path = aot_compile(model, (x, y))
|
||||
# Also store a .pt2 file using the aoti_compile_and_package API
|
||||
pt2_package_path = torch._inductor.aoti_compile_and_package(
|
||||
torch.export.export(model, (x, y))
|
||||
)
|
||||
|
||||
data_with_tensor_constants.update(
|
||||
{
|
||||
"model_so_path": model_so_path,
|
||||
"pt2_package_path": pt2_package_path,
|
||||
"inputs": [x, y],
|
||||
"outputs": [ref_output],
|
||||
"w": model.w,
|
||||
|
||||
@ -363,9 +363,6 @@ class TestDebugInfoWriter : public c10d::DebugInfoWriter {
|
||||
};
|
||||
|
||||
TEST_F(ProcessGroupNCCLErrorsTest, testNCCLErrorsNoHeartbeat) {
|
||||
// Note (kwen2501) 03/07/2025
|
||||
// TODO: re-enable
|
||||
GTEST_SKIP() << "Skipping test as the trace write seems unstable.";
|
||||
int heartBeatIntervalInSec = 2;
|
||||
std::string timeInterval = std::to_string(heartBeatIntervalInSec);
|
||||
ASSERT_TRUE(setenv(c10d::TORCH_NCCL_BLOCKING_WAIT[0].c_str(), "0", 1) == 0);
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
#include <torch/csrc/inductor/aoti_runtime/utils.h>
|
||||
#include <torch/csrc/stable/library.h>
|
||||
|
||||
#include <optional>
|
||||
|
||||
using RAIIATH = torch::aot_inductor::RAIIAtenTensorHandle;
|
||||
|
||||
void inline sgd_math(
|
||||
@ -147,3 +149,39 @@ STABLE_TORCH_LIBRARY_FRAGMENT(libtorch_agnostic, m) {
|
||||
STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeExplicitAutograd, m) {
|
||||
m.impl("my_abs", &boxed_my_abs);
|
||||
}
|
||||
|
||||
RAIIATH my_ones_like(RAIIATH t, StableIValue device) {
|
||||
const auto num_args = 6;
|
||||
StableIValue stack[num_args];
|
||||
|
||||
int32_t t_dtype;
|
||||
aoti_torch_get_dtype(t.get(), &t_dtype);
|
||||
auto mf = aoti_torch_memory_format_contiguous_format();
|
||||
|
||||
stack[0] = from(t.release());
|
||||
stack[1] = from(std::optional(t_dtype)); // dtype
|
||||
stack[2] = from(std::nullopt); // layout
|
||||
stack[3] = from(std::optional(device)); // device
|
||||
stack[4] = from(std::optional(false)); // pin_memory
|
||||
stack[5] = from(std::optional(mf)); // memory_format
|
||||
|
||||
aoti_torch_call_dispatcher("aten::ones_like", "", stack);
|
||||
|
||||
return RAIIATH(to<AtenTensorHandle>(stack[0]));
|
||||
}
|
||||
|
||||
void boxed_my_ones_like(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
|
||||
RAIIATH t(to<AtenTensorHandle>(stack[0]));
|
||||
StableIValue device = stack[1];
|
||||
|
||||
RAIIATH raiiath_res = my_ones_like(std::move(t), device);
|
||||
stack[0] = from(raiiath_res.release());
|
||||
}
|
||||
|
||||
STABLE_TORCH_LIBRARY_FRAGMENT(libtorch_agnostic, m) {
|
||||
m.def("my_ones_like(Tensor t, Device d) -> Tensor");
|
||||
}
|
||||
|
||||
STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeExplicitAutograd, m) {
|
||||
m.impl("my_ones_like", &boxed_my_ones_like);
|
||||
}
|
||||
|
||||
@ -49,3 +49,18 @@ def my_abs(t) -> Tensor:
|
||||
a Tensor
|
||||
"""
|
||||
return torch.ops.libtorch_agnostic.my_abs.default(t)
|
||||
|
||||
|
||||
def my_ones_like(tensor, device) -> Tensor:
|
||||
"""
|
||||
Returns a new Tensor like the input tensor, but with all ones
|
||||
|
||||
Args:
|
||||
tensor: any Tensor
|
||||
device: a device string
|
||||
|
||||
Returns:
|
||||
a ones Tensor with the same dtype and shape and other attributes
|
||||
like the input tensor
|
||||
"""
|
||||
return torch.ops.libtorch_agnostic.my_ones_like.default(tensor, device)
|
||||
|
||||
@ -53,7 +53,7 @@ class TestLibtorchAgnostic(TestCase):
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
def test_my_abs(self, device):
|
||||
t = torch.rand(32, 16, device=device)
|
||||
t = torch.rand(32, 16, device=device) - 0.5
|
||||
cpu_t = libtorch_agnostic.ops.my_abs(t)
|
||||
self.assertEqual(cpu_t, torch.abs(t))
|
||||
|
||||
@ -69,6 +69,23 @@ class TestLibtorchAgnostic(TestCase):
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
def test_my_ones_like(self, device):
|
||||
t = torch.rand(3, 1, device=device) - 0.5
|
||||
cpu_t = libtorch_agnostic.ops.my_ones_like(t, "cpu")
|
||||
self.assertEqual(cpu_t, torch.ones_like(t, device="cpu"))
|
||||
|
||||
def _make_cuda_tensors(prior_mem):
|
||||
cuda_t = libtorch_agnostic.ops.my_ones_like(t, device)
|
||||
self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
|
||||
self.assertEqual(cuda_t, torch.ones_like(t, device=device))
|
||||
|
||||
if t.is_cuda:
|
||||
init_mem = torch.cuda.memory_allocated(device)
|
||||
for _ in range(3):
|
||||
_make_cuda_tensors(init_mem)
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
@onlyCUDA
|
||||
def test_z_delete_torch_lib(self, device):
|
||||
# Why the z + CUDA? THIS TEST MUST BE RUN LAST
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# Basic CMake setup
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(custom_backend)
|
||||
|
||||
if(USE_ROCM)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# Basic CMake setup
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(custom_ops)
|
||||
|
||||
if(USE_ROCM)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../..)
|
||||
set(TEST_ROOT ${TORCH_ROOT}/test/edge)
|
||||
|
||||
@ -126,9 +126,6 @@ ALLOW_LIST = [
|
||||
("aten::reduce_scatter_tensor", datetime.date(9999, 1, 30)),
|
||||
("aten::all_gather_into_tensor", datetime.date(9999, 1, 30)),
|
||||
("aten::all_reduce", datetime.date(9999, 1, 30)),
|
||||
# These ops are defined in torch/csrc/distributed/c10d/Ops.cpp
|
||||
# TODO: add back restriction when c10d ops can be exported
|
||||
("c10d::.*", datetime.date(9999, 1, 1)),
|
||||
]
|
||||
|
||||
ALLOW_LIST_COMPILED = [
|
||||
|
||||
@ -6557,7 +6557,6 @@ symbolic_aot_autograd_failures = {
|
||||
"linalg.householder_product",
|
||||
decorator=unittest.skipIf(IS_MACOS and IS_X86, "flaky"),
|
||||
),
|
||||
xfail("stft", ""), # Cannot call sizes() on tensor with symbolic sizes/strides
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
skipIfTorchDynamo,
|
||||
subtest,
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
TEST_WITH_TORCHDYNAMO,
|
||||
TestCase,
|
||||
xfailIfTorchDynamo,
|
||||
@ -2865,6 +2866,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_jvp, expected_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_return(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
@ -2879,6 +2884,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_jvp, expected_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_composition_vmap(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
|
||||
@ -2897,6 +2906,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_composition_grad(self, device, dtype):
|
||||
x_p = make_tensor((3,), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3,), device=device, dtype=dtype)
|
||||
@ -2916,6 +2929,10 @@ class TestLinearize(TestCase):
|
||||
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
|
||||
|
||||
@dtypes(torch.float)
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_linearize_nested_input_nested_output(self, device, dtype):
|
||||
x_p = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
x_t = make_tensor((3, 1), device=device, dtype=dtype)
|
||||
@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
|
||||
# torch.compile is not supported on Windows CUDA.
|
||||
# Triton only supports GPU with SM70 or later.
|
||||
@expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
|
||||
@unittest.skipIf(
|
||||
TEST_CUDA_MEM_LEAK_CHECK,
|
||||
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_compile_vmap_hessian(self, device):
|
||||
# The model and inputs are a smaller version
|
||||
# of code at benchmark repo:
|
||||
|
||||
@ -110,42 +110,6 @@ class TestDynamism(TestCase):
|
||||
}
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_property_not_implemented(self):
|
||||
class ModuleWithNotImplementedProperty(torch.nn.Module):
|
||||
def __init__(self, x, y):
|
||||
super().__init__()
|
||||
self.linear = torch.nn.Linear(x, y)
|
||||
|
||||
@property
|
||||
def not_implemented_property(self):
|
||||
raise NotImplementedError("This property is not implemented")
|
||||
|
||||
module1 = ModuleWithNotImplementedProperty(10, 10)
|
||||
module2 = ModuleWithNotImplementedProperty(10, 10)
|
||||
|
||||
result = track_dynamism_across_examples(
|
||||
[
|
||||
{"self": module1},
|
||||
{"self": module2},
|
||||
]
|
||||
)
|
||||
|
||||
expected = {
|
||||
"self": {
|
||||
"L['self']['_modules']['linear']['_parameters']['weight']": (
|
||||
False,
|
||||
False,
|
||||
),
|
||||
"L['self']['_modules']['linear']['_parameters']['bias']": (False,),
|
||||
"L['self']['_modules']['linear']['bias']": (False,),
|
||||
"L['self']['_modules']['linear']['in_features']": (False,),
|
||||
"L['self']['_modules']['linear']['out_features']": (False,),
|
||||
"L['self']['_modules']['linear']['weight']": (False, False),
|
||||
}
|
||||
}
|
||||
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
|
||||
@ -568,7 +568,7 @@ def make_recompile_test(optim_cls, closure=None, kernel_count=2, **kwargs):
|
||||
|
||||
|
||||
class CompiledOptimizerParityTests(TestCase):
|
||||
@skipCUDAIf(not has_triton(), "torch.compile with cuda requires triton")
|
||||
@skipCUDAIf(True, "failing Adam and RMSprop")
|
||||
@skipXPUIf(not has_triton(), "torch.compile with xpu requires triton")
|
||||
@optims(optim_db, dtypes=[torch.float32])
|
||||
@parametrize("use_closure", [True, False])
|
||||
|
||||
@ -357,6 +357,9 @@ if RUN_CPU:
|
||||
), # multiple outputs, buffer clear
|
||||
BaseTest("test_view_as_complex"),
|
||||
BaseTest("test_view_as_real"),
|
||||
BaseTest(
|
||||
"test_woq_int4", "cpu", test_mkldnn_pattern_matcher.TestPatternMatcher()
|
||||
),
|
||||
]:
|
||||
make_test_case(
|
||||
item.name,
|
||||
|
||||
@ -4131,6 +4131,10 @@ class CPUReproTests(TestCase):
|
||||
"__at_align__ std::array", 0, exactly=True
|
||||
).run(code)
|
||||
|
||||
@unittest.skipIf(
|
||||
os.getenv("ATEN_CPU_CAPABILITY") == "default",
|
||||
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
|
||||
)
|
||||
def test_group_norm_large_input(self):
|
||||
class M(torch.nn.Module):
|
||||
def __init__(self) -> None:
|
||||
|
||||
@ -1292,6 +1292,185 @@ class CudaReproTests(TestCase):
|
||||
|
||||
self.assertEqual(ref, res)
|
||||
|
||||
@torch._inductor.config.patch(emulate_precision_casts=True)
|
||||
def test_dont_inplace_disjoint_accesses(self):
|
||||
# TODO - would not need mms if we could annotate donated buffer..
|
||||
def forward( # noqa: F821, F722
|
||||
arg0_1: "bf16[2048, 2048][2048, 1]cuda:0", # noqa: F821, F722
|
||||
arg1_1: "bf16[8, 4096, 2048][8388608, 2048, 1]cuda:0", # noqa: F821, F722
|
||||
arg2_1: "bf16[2048, 2048][2048, 1]cuda:0", # noqa: F821, F722
|
||||
arg3_1: "bf16[2048, 2048][2048, 1]cuda:0", # noqa: F821, F722
|
||||
arg4_1: "bf16[2048][1]cuda:0", # noqa: F821, F722
|
||||
arg5_1: "bf16[2048][1]cuda:0", # noqa: F821, F722
|
||||
arg6_1: "f32[4096, 128][128, 1]cuda:0", # noqa: F821, F722
|
||||
arg7_1: "f32[4096, 128][128, 1]cuda:0", # noqa: F821, F722
|
||||
):
|
||||
permute = torch.ops.aten.permute.default(arg0_1, [1, 0])
|
||||
arg0_1 = None
|
||||
view = torch.ops.aten.view.default(arg1_1, [32768, 2048])
|
||||
mm = torch.ops.aten.mm.default(view, permute)
|
||||
view = permute = None
|
||||
view_1 = torch.ops.aten.view.default(mm, [8, 4096, 2048])
|
||||
mm = None
|
||||
permute_1 = torch.ops.aten.permute.default(arg2_1, [1, 0])
|
||||
arg2_1 = None
|
||||
view_2 = torch.ops.aten.view.default(arg1_1, [32768, 2048])
|
||||
mm_1 = torch.ops.aten.mm.default(view_2, permute_1)
|
||||
view_2 = permute_1 = None
|
||||
view_3 = torch.ops.aten.view.default(mm_1, [8, 4096, 2048])
|
||||
mm_1 = None
|
||||
permute_2 = torch.ops.aten.permute.default(arg3_1, [1, 0])
|
||||
arg3_1 = None
|
||||
view_4 = torch.ops.aten.view.default(arg1_1, [32768, 2048])
|
||||
arg1_1 = None
|
||||
mm_2 = torch.ops.aten.mm.default(view_4, permute_2)
|
||||
view_4 = permute_2 = None
|
||||
view_5 = torch.ops.aten.view.default(mm_2, [8, 4096, 2048])
|
||||
mm_2 = None
|
||||
convert_element_type_6 = torch.ops.prims.convert_element_type.default(
|
||||
view_1, torch.float32
|
||||
)
|
||||
view_1 = None
|
||||
pow_1 = torch.ops.aten.pow.Tensor_Scalar(convert_element_type_6, 2)
|
||||
mean = torch.ops.aten.mean.dim(pow_1, [-1], True)
|
||||
pow_1 = None
|
||||
add = torch.ops.aten.add.Tensor(mean, 1e-06)
|
||||
mean = None
|
||||
rsqrt = torch.ops.aten.rsqrt.default(add)
|
||||
add = None
|
||||
mul = torch.ops.aten.mul.Tensor(convert_element_type_6, rsqrt)
|
||||
convert_element_type_6 = rsqrt = None
|
||||
convert_element_type_7 = torch.ops.prims.convert_element_type.default(
|
||||
arg4_1, torch.float32
|
||||
)
|
||||
arg4_1 = None
|
||||
mul_1 = torch.ops.aten.mul.Tensor(convert_element_type_7, mul)
|
||||
convert_element_type_7 = mul = None
|
||||
convert_element_type_8 = torch.ops.prims.convert_element_type.default(
|
||||
mul_1, torch.bfloat16
|
||||
)
|
||||
mul_1 = None
|
||||
convert_element_type_9 = torch.ops.prims.convert_element_type.default(
|
||||
view_3, torch.float32
|
||||
)
|
||||
view_3 = None
|
||||
pow_2 = torch.ops.aten.pow.Tensor_Scalar(convert_element_type_9, 2)
|
||||
mean_1 = torch.ops.aten.mean.dim(pow_2, [-1], True)
|
||||
pow_2 = None
|
||||
add_1 = torch.ops.aten.add.Tensor(mean_1, 1e-06)
|
||||
mean_1 = None
|
||||
rsqrt_1 = torch.ops.aten.rsqrt.default(add_1)
|
||||
add_1 = None
|
||||
mul_2 = torch.ops.aten.mul.Tensor(convert_element_type_9, rsqrt_1)
|
||||
convert_element_type_9 = rsqrt_1 = None
|
||||
convert_element_type_10 = torch.ops.prims.convert_element_type.default(
|
||||
arg5_1, torch.float32
|
||||
)
|
||||
arg5_1 = None
|
||||
mul_3 = torch.ops.aten.mul.Tensor(convert_element_type_10, mul_2)
|
||||
convert_element_type_10 = mul_2 = None
|
||||
convert_element_type_11 = torch.ops.prims.convert_element_type.default(
|
||||
mul_3, torch.bfloat16
|
||||
)
|
||||
mul_3 = None
|
||||
view_6 = torch.ops.aten.view.default(
|
||||
convert_element_type_8, [8, 4096, -1, 128]
|
||||
)
|
||||
convert_element_type_8 = None
|
||||
view_7 = torch.ops.aten.view.default(
|
||||
convert_element_type_11, [8, 4096, -1, 128]
|
||||
)
|
||||
convert_element_type_11 = None
|
||||
view_8 = torch.ops.aten.view.default(view_5, [8, 4096, -1, 128])
|
||||
view_5 = None
|
||||
convert_element_type_12 = torch.ops.prims.convert_element_type.default(
|
||||
view_6, torch.float32
|
||||
)
|
||||
view_6 = None
|
||||
convert_element_type_13 = torch.ops.prims.convert_element_type.default(
|
||||
view_7, torch.float32
|
||||
)
|
||||
view_7 = None
|
||||
unsqueeze = torch.ops.aten.unsqueeze.default(arg6_1, 0)
|
||||
unsqueeze_1 = torch.ops.aten.unsqueeze.default(unsqueeze, 2)
|
||||
unsqueeze = None
|
||||
unsqueeze_2 = torch.ops.aten.unsqueeze.default(arg7_1, 0)
|
||||
unsqueeze_3 = torch.ops.aten.unsqueeze.default(unsqueeze_2, 2)
|
||||
unsqueeze_2 = None
|
||||
mul_4 = torch.ops.aten.mul.Tensor(convert_element_type_12, unsqueeze_3)
|
||||
unsqueeze_3 = None
|
||||
view_9 = torch.ops.aten.view.default(
|
||||
convert_element_type_12, [8, 4096, 16, 2, 64]
|
||||
)
|
||||
convert_element_type_12 = None
|
||||
unbind = torch.ops.aten.unbind.int(view_9, -2)
|
||||
view_9 = None
|
||||
getitem = unbind[0]
|
||||
getitem_1 = unbind[1]
|
||||
unbind = None
|
||||
neg = torch.ops.aten.neg.default(getitem_1)
|
||||
getitem_1 = None
|
||||
cat = torch.ops.aten.cat.default([neg, getitem], -1)
|
||||
neg = getitem = None
|
||||
mul_5 = torch.ops.aten.mul.Tensor(cat, unsqueeze_1)
|
||||
cat = unsqueeze_1 = None
|
||||
add_2 = torch.ops.aten.add.Tensor(mul_4, mul_5)
|
||||
mul_4 = mul_5 = None
|
||||
unsqueeze_4 = torch.ops.aten.unsqueeze.default(arg6_1, 0)
|
||||
arg6_1 = None
|
||||
unsqueeze_5 = torch.ops.aten.unsqueeze.default(unsqueeze_4, 2)
|
||||
unsqueeze_4 = None
|
||||
unsqueeze_6 = torch.ops.aten.unsqueeze.default(arg7_1, 0)
|
||||
arg7_1 = None
|
||||
unsqueeze_7 = torch.ops.aten.unsqueeze.default(unsqueeze_6, 2)
|
||||
unsqueeze_6 = None
|
||||
mul_6 = torch.ops.aten.mul.Tensor(convert_element_type_13, unsqueeze_7)
|
||||
unsqueeze_7 = None
|
||||
view_10 = torch.ops.aten.view.default(
|
||||
convert_element_type_13, [8, 4096, 16, 2, 64]
|
||||
)
|
||||
convert_element_type_13 = None
|
||||
unbind_1 = torch.ops.aten.unbind.int(view_10, -2)
|
||||
view_10 = None
|
||||
getitem_2 = unbind_1[0]
|
||||
getitem_3 = unbind_1[1]
|
||||
unbind_1 = None
|
||||
neg_1 = torch.ops.aten.neg.default(getitem_3)
|
||||
getitem_3 = None
|
||||
cat_1 = torch.ops.aten.cat.default([neg_1, getitem_2], -1)
|
||||
neg_1 = getitem_2 = None
|
||||
mul_7 = torch.ops.aten.mul.Tensor(cat_1, unsqueeze_5)
|
||||
cat_1 = unsqueeze_5 = None
|
||||
add_3 = torch.ops.aten.add.Tensor(mul_6, mul_7)
|
||||
mul_6 = mul_7 = None
|
||||
convert_element_type_14 = torch.ops.prims.convert_element_type.default(
|
||||
add_2, torch.bfloat16
|
||||
)
|
||||
add_2 = None
|
||||
convert_element_type_15 = torch.ops.prims.convert_element_type.default(
|
||||
add_3, torch.bfloat16
|
||||
)
|
||||
add_3 = None
|
||||
permute_3 = torch.ops.aten.permute.default(
|
||||
convert_element_type_14, [0, 2, 1, 3]
|
||||
)
|
||||
convert_element_type_14 = None
|
||||
permute_4 = torch.ops.aten.permute.default(
|
||||
convert_element_type_15, [0, 2, 1, 3]
|
||||
)
|
||||
convert_element_type_15 = None
|
||||
permute_5 = torch.ops.aten.permute.default(view_8, [0, 2, 1, 3])
|
||||
view_8 = None
|
||||
return (permute_3, permute_4, permute_5)
|
||||
|
||||
from torch._dynamo.debug_utils import aot_graph_input_parser
|
||||
|
||||
kwargs = aot_graph_input_parser(forward)
|
||||
out, code = run_and_get_code(torch.compile(forward), **kwargs)
|
||||
# ignore tiny values.. prior to this fix absolute error was ~28
|
||||
self.assertEqual(forward(**kwargs), out, atol=0.01, rtol=2)
|
||||
FileCheck().check_not("in_out").run(code[0])
|
||||
|
||||
# https://github.com/pytorch/pytorch/issues/104937
|
||||
def test_linear_with_zero_infeature_size(self):
|
||||
m = nn.Linear(in_features=0, out_features=0, bias=True).to("cuda")
|
||||
|
||||
@ -3835,7 +3835,7 @@ class TestPatternMatcher(TestPatternMatcherBase):
|
||||
include_ops = [
|
||||
"aoti_torch_cpu__weight_int4pack_mm_cpu_tensor"
|
||||
if torch._inductor.config.cpp_wrapper
|
||||
else "extern_kernels.int4mm_packed_weight_cpu"
|
||||
else "torch.ops.quantized.int4mm_packed_weight_cpu.default"
|
||||
]
|
||||
self._test_code_common(
|
||||
m,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# Basic CMake setup
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(jit_hooks)
|
||||
|
||||
if(USE_ROCM)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
project(custom_build_project)
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../..)
|
||||
set(TEST_ROOT ${TORCH_ROOT}/test/mobile/lightweight_dispatch)
|
||||
|
||||
@ -2169,6 +2169,10 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
|
||||
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
|
||||
@unittest.skipIf(not kineto_available(), "Kineto is required")
|
||||
@unittest.skipIf(
|
||||
"RelWithAssert" in torch.__config__.show(),
|
||||
"failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
|
||||
)
|
||||
def test_profile_all_threads(self):
|
||||
profiling_started = threading.Event()
|
||||
profiling_ended = threading.Event()
|
||||
|
||||
@ -10,6 +10,8 @@ from torch.testing._internal.common_utils import NoTest, run_tests, TEST_MPS, Te
|
||||
if not torch.accelerator.is_available():
|
||||
print("No available accelerator detected, skipping tests", file=sys.stderr)
|
||||
TestCase = NoTest # noqa: F811
|
||||
# Skip because failing when run on cuda build with no GPU, see #150059 for example
|
||||
sys.exit()
|
||||
|
||||
TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1
|
||||
|
||||
|
||||
@ -270,22 +270,34 @@ class TestCppExtensionAOT(common.TestCase):
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
# (3) test calling our dispatcher on ones_like
|
||||
t = torch.rand(32, 16, device=device)
|
||||
cpu_t = libtorch_agnostic.ops.my_abs(t)
|
||||
self.assertEqual(cpu_t, torch.abs(t))
|
||||
# (3a) test calling our dispatcher on easy API like abs
|
||||
t = torch.rand(32, 16, device=device) - 0.5
|
||||
|
||||
def _make_cuda_tensors(prior_mem):
|
||||
cuda_t = libtorch_agnostic.ops.my_abs(t)
|
||||
self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
|
||||
self.assertEqual(cuda_t, torch.abs(t))
|
||||
|
||||
if t.is_cuda:
|
||||
init_mem = torch.cuda.memory_allocated(device)
|
||||
for _ in range(3):
|
||||
_make_cuda_tensors(init_mem)
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
init_mem = torch.cuda.memory_allocated(device)
|
||||
for _ in range(3):
|
||||
_make_cuda_tensors(init_mem)
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
# (3b) and on factory API like ones_like
|
||||
cpu_t = libtorch_agnostic.ops.my_ones_like(t, "cpu")
|
||||
self.assertEqual(cpu_t, torch.ones_like(t, device="cpu"))
|
||||
|
||||
def _make_cuda_tensors(prior_mem):
|
||||
cuda_t = libtorch_agnostic.ops.my_ones_like(t, t.device)
|
||||
self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
|
||||
self.assertEqual(cuda_t, torch.ones_like(t, device=t.device))
|
||||
|
||||
init_mem = torch.cuda.memory_allocated(device)
|
||||
for _ in range(3):
|
||||
_make_cuda_tensors(init_mem)
|
||||
curr_mem = torch.cuda.memory_allocated(device)
|
||||
self.assertEqual(curr_mem, init_mem)
|
||||
|
||||
|
||||
@torch.testing._internal.common_utils.markDynamoStrictTest
|
||||
|
||||
@ -1124,6 +1124,45 @@ class TestCppExtensionJIT(common.TestCase):
|
||||
self.assertEqual(pch_exist, True)
|
||||
self.assertEqual(signature_exist, True)
|
||||
|
||||
def test_aoti_torch_call_dispatcher(self):
|
||||
source = """
|
||||
#include <torch/csrc/inductor/aoti_runtime/utils.h>
|
||||
#include <torch/csrc/inductor/aoti_torch/utils.h>
|
||||
#include <torch/csrc/inductor/aoti_torch/c/shim.h>
|
||||
#include <torch/csrc/stable/library.h>
|
||||
|
||||
using RAIIATH = torch::aot_inductor::RAIIAtenTensorHandle;
|
||||
|
||||
at::Tensor my_abs(at::Tensor x) {
|
||||
StableIValue stack[1];
|
||||
RAIIATH raii(torch::aot_inductor::new_tensor_handle(std::move(x)));
|
||||
stack[0] = from(raii.release());
|
||||
aoti_torch_call_dispatcher("aten::abs", "", stack);
|
||||
RAIIATH res(to<AtenTensorHandle>(stack[0]));
|
||||
return *reinterpret_cast<at::Tensor*>(res.release());
|
||||
}
|
||||
|
||||
at::Tensor my_floor(at::Tensor x) {
|
||||
StableIValue stack[1];
|
||||
RAIIATH raii(torch::aot_inductor::new_tensor_handle(std::move(x)));
|
||||
stack[0] = from(raii.release());
|
||||
aoti_torch_call_dispatcher("aten::floor", "", stack);
|
||||
RAIIATH res(to<AtenTensorHandle>(stack[0]));
|
||||
return *reinterpret_cast<at::Tensor*>(res.release());
|
||||
}
|
||||
"""
|
||||
module = torch.utils.cpp_extension.load_inline(
|
||||
name="inline_extension_using_shim_dispatcher",
|
||||
cpp_sources=[source],
|
||||
functions=["my_abs", "my_floor"],
|
||||
)
|
||||
|
||||
t = torch.rand(2, 3) - 1.0
|
||||
floor_t = module.my_floor(t)
|
||||
abs_t = module.my_abs(t)
|
||||
self.assertEqual(abs_t, torch.abs(t))
|
||||
self.assertEqual(floor_t, torch.floor(t))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
common.run_tests()
|
||||
|
||||
@ -586,6 +586,64 @@ class TestCuda(TestCase):
|
||||
q_copy[1].fill_(10)
|
||||
self.assertEqual(q_copy[3], torch.cuda.IntStorage(10).fill_(10))
|
||||
|
||||
@setBlasBackendsToDefaultFinally
|
||||
def test_preferred_blas_library_settings(self):
|
||||
def _check_default():
|
||||
default = torch.backends.cuda.preferred_blas_library()
|
||||
if torch.version.cuda:
|
||||
# CUDA logic is easy, it's always cublas
|
||||
self.assertTrue(default == torch._C._BlasBackend.Cublas)
|
||||
else:
|
||||
# ROCm logic is less so, it's cublaslt for some Instinct, cublas for all else
|
||||
gcn_arch = str(
|
||||
torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0]
|
||||
)
|
||||
if gcn_arch in ["gfx90a", "gfx942", "gfx950"]:
|
||||
self.assertTrue(default == torch._C._BlasBackend.Cublaslt)
|
||||
else:
|
||||
self.assertTrue(default == torch._C._BlasBackend.Cublas)
|
||||
|
||||
_check_default()
|
||||
# "Default" can be set but is immediately reset internally to the actual default value.
|
||||
self.assertTrue(
|
||||
torch.backends.cuda.preferred_blas_library("default")
|
||||
!= torch._C._BlasBackend.Default
|
||||
)
|
||||
_check_default()
|
||||
self.assertTrue(
|
||||
torch.backends.cuda.preferred_blas_library("cublas")
|
||||
== torch._C._BlasBackend.Cublas
|
||||
)
|
||||
self.assertTrue(
|
||||
torch.backends.cuda.preferred_blas_library("hipblas")
|
||||
== torch._C._BlasBackend.Cublas
|
||||
)
|
||||
# check bad strings
|
||||
with self.assertRaisesRegex(
|
||||
RuntimeError,
|
||||
"Unknown input value. Choose from: default, cublas, hipblas, cublaslt, hipblaslt, ck.",
|
||||
):
|
||||
torch.backends.cuda.preferred_blas_library("unknown")
|
||||
# check bad input type
|
||||
with self.assertRaisesRegex(RuntimeError, "Unknown input value type."):
|
||||
torch.backends.cuda.preferred_blas_library(1.0)
|
||||
# check env var override
|
||||
custom_envs = [
|
||||
{"TORCH_BLAS_PREFER_CUBLASLT": "1"},
|
||||
{"TORCH_BLAS_PREFER_HIPBLASLT": "1"},
|
||||
]
|
||||
test_script = "import torch;print(torch.backends.cuda.preferred_blas_library())"
|
||||
for env_config in custom_envs:
|
||||
env = os.environ.copy()
|
||||
for key, value in env_config.items():
|
||||
env[key] = value
|
||||
r = (
|
||||
subprocess.check_output([sys.executable, "-c", test_script], env=env)
|
||||
.decode("ascii")
|
||||
.strip()
|
||||
)
|
||||
self.assertEqual("_BlasBackend.Cublaslt", r)
|
||||
|
||||
@unittest.skipIf(TEST_CUDAMALLOCASYNC, "temporarily disabled for async")
|
||||
@setBlasBackendsToDefaultFinally
|
||||
def test_cublas_workspace_explicit_allocation(self):
|
||||
|
||||
@ -4,6 +4,7 @@ import copy
|
||||
import itertools
|
||||
import functools
|
||||
import unittest
|
||||
import warnings
|
||||
from contextlib import nullcontext
|
||||
|
||||
try:
|
||||
@ -1612,6 +1613,16 @@ class TestMkldnn(TestCase):
|
||||
]:
|
||||
common(self, shape1, shape2, op, dtype)
|
||||
|
||||
def test_mkldnn_setflags_nowarn(self, device):
|
||||
# Regression test for https://github.com/pytorch/pytorch/issues/149829
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
rc = torch.backends.mkldnn.set_flags()
|
||||
# torch.backends.mkldnn. returns previously set flags
|
||||
# That one should be able to set back without cauinsg a warning
|
||||
torch.backends.mkldnn.set_flags(*rc)
|
||||
# Above should trigger no warnings regardless of configuration
|
||||
self.assertEqual(len(w), 0)
|
||||
|
||||
|
||||
instantiate_device_type_tests(TestMkldnn, globals(), only_for=('cpu',))
|
||||
|
||||
|
||||
@ -394,6 +394,7 @@ def mps_ops_modifier(ops):
|
||||
'constant_pad_nd',
|
||||
'cos',
|
||||
'cosh',
|
||||
'cov',
|
||||
'count_nonzero',
|
||||
'diff',
|
||||
'div',
|
||||
@ -7848,13 +7849,21 @@ class TestMPS(TestCaseMPS):
|
||||
self.assertEqual(tril_result, tril_result_cpu)
|
||||
self.assertEqual(x.grad, cpu_x.grad)
|
||||
|
||||
helper((2, 8, 4, 5))
|
||||
helper((2, 8, 4, 5), diag=1)
|
||||
helper((2, 8, 4, 5), diag=2)
|
||||
helper((2, 8, 4, 5), diag=3)
|
||||
helper((2, 8, 4, 5), diag=-1)
|
||||
helper((2, 8, 4, 5), diag=-2)
|
||||
helper((2, 8, 4, 5), diag=-3)
|
||||
for diag in [0, 1, 2, 3, -1, -2, -3]:
|
||||
helper((2, 8, 4, 5), diag=diag)
|
||||
|
||||
def helper_nans_infs(value, diag_vals=(0, 1, -2)):
|
||||
"""For nans and infs"""
|
||||
mps_tensor = torch.full((2, 2, 5, 5), value, device="mps")
|
||||
cpu_tensor = torch.full((2, 2, 5, 5), value, device="cpu")
|
||||
for diag in diag_vals:
|
||||
mps_result = torch.tril(mps_tensor, diagonal=diag)
|
||||
cpu_result = torch.tril(cpu_tensor, diagonal=diag)
|
||||
self.assertEqual(mps_result, cpu_result, f"Mismatch for diag={diag}")
|
||||
|
||||
helper_nans_infs(float("inf"))
|
||||
helper_nans_infs(float("-inf"))
|
||||
helper_nans_infs(float("nan"))
|
||||
|
||||
# test eye
|
||||
def test_eye(self):
|
||||
@ -9909,6 +9918,29 @@ class TestSDPA(TestCaseMPS):
|
||||
y_ref = F.scaled_dot_product_attention(q.cpu(), k.cpu(), v.cpu(), attn_mask=mask.cpu(), dropout_p=0.0, is_causal=False)
|
||||
self._compare_tensors(y.cpu(), y_ref)
|
||||
|
||||
@parametrize("dtype", [torch.float16, torch.float32])
|
||||
@parametrize("is_causal", [True, False])
|
||||
def test_sdpa_enable_gqa(self, dtype, is_causal):
|
||||
q_heads = 32
|
||||
key_heads = 16
|
||||
L = 7
|
||||
S = 17
|
||||
HS = 23
|
||||
|
||||
q = torch.randn([2, q_heads, L, HS], dtype=dtype, device="mps")
|
||||
k = torch.randn([2, key_heads, S, HS], dtype=dtype, device="mps")
|
||||
v = torch.randn([2, key_heads, S, HS], dtype=dtype, device="mps")
|
||||
|
||||
y_ref = F.scaled_dot_product_attention(
|
||||
q.cpu(), k.cpu(), v.cpu(), dropout_p=0.0, is_causal=is_causal, enable_gqa=True,
|
||||
)
|
||||
|
||||
with torch.nn.attention.sdpa_kernel([torch.nn.attention.SDPBackend.MATH]):
|
||||
y = F.scaled_dot_product_attention(
|
||||
q, k, v, dropout_p=0.0, is_causal=is_causal, enable_gqa=True,
|
||||
)
|
||||
self._compare_tensors(y.cpu(), y_ref)
|
||||
|
||||
|
||||
class TestGatherScatter(TestCaseMPS):
|
||||
def test_slicing_with_step(self):
|
||||
@ -12435,8 +12467,16 @@ MPS_GRAD_DTYPES = [torch.float32, torch.float16]
|
||||
|
||||
def transform_opinfo_sample_to_mps(sample):
|
||||
"""Transforms opinfo.core.SampleInput from CPU to MPS"""
|
||||
mps_sample = sample.transform(
|
||||
lambda x: x.detach().to("mps").requires_grad_(x.requires_grad) if isinstance(x, torch.Tensor) else x)
|
||||
def transform_sample(x):
|
||||
if not isinstance(x, torch.Tensor):
|
||||
return x
|
||||
requires_grad = x.requires_grad
|
||||
conjugated = x.is_conj()
|
||||
rc = x.detach()
|
||||
rc = rc.to("mps") if not conjugated else x.conj().to("mps").conj()
|
||||
return rc.requires_grad_(x.requires_grad)
|
||||
|
||||
mps_sample = sample.transform(transform_sample)
|
||||
|
||||
# Transform kwargs `device="cpu"` to `device="mps"`
|
||||
if mps_sample.kwargs.get("device", "") == "cpu":
|
||||
@ -12555,12 +12595,14 @@ class TestConsistency(TestCaseMPS):
|
||||
@ops(mps_ops_modifier(test_consistency_op_db), allowed_dtypes=MPS_DTYPES)
|
||||
def test_output_match(self, device, dtype, op):
|
||||
self.assertEqual(device, "cpu")
|
||||
include_conjugated_inputs = dtype.is_complex and op.test_conjugated_samples
|
||||
|
||||
def get_samples():
|
||||
return op.sample_inputs(
|
||||
device,
|
||||
dtype,
|
||||
requires_grad=(dtype.is_floating_point or dtype.is_complex),
|
||||
include_conjugated_inputs=include_conjugated_inputs,
|
||||
# TODO: Enable per-sample seed setting and tweak tolerances / fix xfails
|
||||
set_seed=False,
|
||||
)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# Owner(s): ["module: __torch_function__"]
|
||||
|
||||
import sys
|
||||
import torch
|
||||
import numpy as np
|
||||
import inspect
|
||||
@ -9,6 +10,7 @@ import pickle
|
||||
import collections
|
||||
import unittest
|
||||
import contextlib
|
||||
import os
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
|
||||
from torch.overrides import (
|
||||
@ -29,6 +31,14 @@ from torch.utils._pytree import tree_map
|
||||
|
||||
Tensor = torch.Tensor
|
||||
|
||||
if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
|
||||
# This test is not supported on ARM
|
||||
print(
|
||||
"Skipping due to failing when cuda build runs on non cuda machine, "
|
||||
+ "see https://github.com/pytorch/pytorch/pull/150059 for example"
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
# The functions below simulate the pure-python torch functions in the
|
||||
# torch.functional namespace. We use examples local to this file rather
|
||||
# than any of the real examples implemented in Python since in the
|
||||
|
||||
2
third_party/xpu.txt
vendored
2
third_party/xpu.txt
vendored
@ -1 +1 @@
|
||||
026b2c8c7c92a7b2cec5d26334006e3423251cc6
|
||||
3ee2bd2f13e1ed17a685986ff667a58bed5f2aa5
|
||||
|
||||
@ -1309,6 +1309,7 @@ def _get_blas_preferred_backend() -> torch._C._BlasBackend: ...
|
||||
def _set_blas_preferred_backend(arg: torch._C._BlasBackend): ...
|
||||
|
||||
class _BlasBackend:
|
||||
Default: _BlasBackend
|
||||
Cublas: _BlasBackend
|
||||
Cublaslt: _BlasBackend
|
||||
Ck: _BlasBackend
|
||||
@ -2534,12 +2535,6 @@ class _NodeBase:
|
||||
return_type: Any,
|
||||
) -> None: ...
|
||||
def _update_args_kwargs(self, args: tuple[Any, ...], kwargs: dict[str, Any]): ...
|
||||
def _prepend(self, n: FxNode) -> None: ...
|
||||
def _remove_from_list(self) -> None: ...
|
||||
def __lt__(self, n: Self) -> _bool: ...
|
||||
def __gt__(self, n: Self) -> _bool: ...
|
||||
def __le__(self, n: Self) -> _bool: ...
|
||||
def __ge__(self, n: Self) -> _bool: ...
|
||||
|
||||
class _NodeIter(Iterator):
|
||||
def __init__(self, root: FxNode, reversed: _bool) -> None: ...
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# mypy: disable-error-code="type-arg"
|
||||
from datetime import timedelta
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, overload
|
||||
from typing import Any, overload
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
@ -139,8 +139,6 @@ class BroadcastOptions:
|
||||
class AllreduceOptions:
|
||||
reduceOp: ReduceOp
|
||||
timeout: timedelta
|
||||
asyncOp: bool
|
||||
sparseIndices: Optional[Tensor]
|
||||
|
||||
class AllreduceCoalescedOptions(AllreduceOptions): ...
|
||||
|
||||
@ -149,7 +147,6 @@ class ReduceOptions:
|
||||
rootRank: int
|
||||
rootTensor: int
|
||||
timeout: timedelta
|
||||
asyncOp: bool
|
||||
|
||||
class AllgatherOptions:
|
||||
timeout: timedelta
|
||||
@ -158,7 +155,6 @@ class AllgatherOptions:
|
||||
class GatherOptions:
|
||||
rootRank: int
|
||||
timeout: timedelta
|
||||
asyncOp: bool
|
||||
|
||||
class ScatterOptions:
|
||||
rootRank: int
|
||||
@ -174,11 +170,9 @@ class BarrierOptions:
|
||||
device_ids: list[int]
|
||||
device: torch.device
|
||||
timeout: timedelta
|
||||
asyncOp: bool
|
||||
|
||||
class AllToAllOptions:
|
||||
timeout: timedelta
|
||||
asyncOp: bool
|
||||
|
||||
class Store:
|
||||
def set(self, key: str, value: str): ...
|
||||
|
||||
@ -271,6 +271,16 @@ if sys.platform == "win32":
|
||||
del _load_dll_libraries
|
||||
|
||||
|
||||
def _get_cuda_dep_paths(path: str, lib_folder: str, lib_name: str) -> list[str]:
|
||||
# Libraries can either be in path/nvidia/lib_folder/lib or path/lib_folder/lib
|
||||
nvidia_lib_paths = glob.glob(
|
||||
os.path.join(path, "nvidia", lib_folder, "lib", lib_name)
|
||||
)
|
||||
lib_paths = glob.glob(os.path.join(path, lib_folder, "lib", lib_name))
|
||||
|
||||
return nvidia_lib_paths + lib_paths
|
||||
|
||||
|
||||
def _preload_cuda_deps(lib_folder: str, lib_name: str) -> None:
|
||||
"""Preloads cuda deps if they could not be found otherwise."""
|
||||
# Should only be called on Linux if default path resolution have failed
|
||||
@ -278,21 +288,9 @@ def _preload_cuda_deps(lib_folder: str, lib_name: str) -> None:
|
||||
|
||||
lib_path = None
|
||||
for path in sys.path:
|
||||
nvidia_path = os.path.join(path, "nvidia")
|
||||
if not os.path.exists(nvidia_path):
|
||||
continue
|
||||
candidate_lib_paths = glob.glob(
|
||||
os.path.join(nvidia_path, lib_folder, "lib", lib_name)
|
||||
)
|
||||
# if path/nvidia/lib_folder/ is not found look in path/lib_folder/
|
||||
if not candidate_lib_paths:
|
||||
candidate_lib_paths = glob.glob(
|
||||
os.path.join(path, lib_folder, "lib", lib_name)
|
||||
)
|
||||
|
||||
if candidate_lib_paths and not lib_path:
|
||||
candidate_lib_paths = _get_cuda_dep_paths(path, lib_folder, lib_name)
|
||||
if candidate_lib_paths:
|
||||
lib_path = candidate_lib_paths[0]
|
||||
if lib_path:
|
||||
break
|
||||
if not lib_path:
|
||||
raise ValueError(f"{lib_name} not found in the system path {sys.path}")
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user