mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-13 21:59:56 +08:00
Compare commits
80 Commits
v2.9.0-rc3
...
cherry-pic
| Author | SHA1 | Date | |
|---|---|---|---|
| 2e7b92286d | |||
| d29deefa9e | |||
| 593377555e | |||
| e0c8ff1b8a | |||
| 3dead93453 | |||
| e2f6f8c079 | |||
| 32e37e6b9d | |||
| cbe1a35dbd | |||
| 9315f44cd6 | |||
| e9e3db62fe | |||
| c19082674b | |||
| 4dca449358 | |||
| 0fabc3ba44 | |||
| 26e023a973 | |||
| 6f12be2770 | |||
| 42f0c2c970 | |||
| b015422da1 | |||
| d4c4307032 | |||
| 3b57315b1b | |||
| c74f05797d | |||
| fd364580a9 | |||
| 2f6387e9a1 | |||
| 017d857f5f | |||
| d6e8411889 | |||
| 10b501fde9 | |||
| 31c72b8a96 | |||
| 1cd83de315 | |||
| 881c2ccae9 | |||
| 764f65584a | |||
| 3e8a062385 | |||
| 3abee625e1 | |||
| f227c883f9 | |||
| a5feacb14b | |||
| 71282c8364 | |||
| e70d9f5322 | |||
| 005e3e8d78 | |||
| 72cf48ea43 | |||
| a21a4bf11a | |||
| 21fec65781 | |||
| 22d46b50ec | |||
| d1b63e2b4a | |||
| 20100b7210 | |||
| a2c77043ee | |||
| b64fc8e41e | |||
| 709f4f62a0 | |||
| 11f776c8ee | |||
| 45e257f046 | |||
| 37e2626639 | |||
| d7a703ea92 | |||
| daa3d04325 | |||
| 999304396f | |||
| 5340e741df | |||
| 7cadf8ac04 | |||
| f9e495fe8e | |||
| 57dc68844d | |||
| 63da9d2730 | |||
| 824d59fbf6 | |||
| fc8bf12b38 | |||
| 49dab18ecf | |||
| 0154ca1d3d | |||
| 132d9fac3b | |||
| 87c5d4a858 | |||
| b0dc90881c | |||
| c0577aad39 | |||
| 9952b87600 | |||
| 300bade202 | |||
| 96f0c0fa07 | |||
| 7d024a6e29 | |||
| be29c5b207 | |||
| 5322dab793 | |||
| 1dadb6196b | |||
| 6c058c1262 | |||
| 715dca6725 | |||
| 47cb45e4f6 | |||
| 4966d058f2 | |||
| 579794ed7b | |||
| 7cf37ae3cb | |||
| f83cf0714e | |||
| ddd5074afc | |||
| 35c55da805 |
@ -5,9 +5,11 @@ GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
# Set CUDA architecture lists to match x86 build_cuda.sh
|
||||
if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;8.0;9.0"
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
|
||||
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;10.0;12.0"
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
||||
elif [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
||||
elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
|
||||
fi
|
||||
@ -15,6 +17,8 @@ fi
|
||||
# Compress the fatbin with -compress-mode=size for CUDA 13
|
||||
if [[ "$DESIRED_CUDA" == *"13"* ]]; then
|
||||
export TORCH_NVCC_FLAGS="-compress-mode=size"
|
||||
# Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
|
||||
export BUILD_BUNDLE_PTXAS=1
|
||||
fi
|
||||
|
||||
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
||||
@ -42,9 +46,6 @@ else
|
||||
echo "Bundling CUDA libraries with wheel for aarch64."
|
||||
else
|
||||
echo "Using nvidia libs from pypi for aarch64."
|
||||
# Fix platform constraints in PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64
|
||||
# Replace 'platform_machine == "x86_64"' with 'platform_machine == "aarch64"'
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS//platform_machine == \'x86_64\'/platform_machine == \'aarch64\'}"
|
||||
echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
|
||||
export USE_NVIDIA_PYPI_LIBS=1
|
||||
fi
|
||||
|
||||
@ -213,7 +213,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
]
|
||||
|
||||
# CUDA version-specific libraries
|
||||
if "130" in desired_cuda:
|
||||
if "13" in desired_cuda:
|
||||
minor_version = desired_cuda[-1]
|
||||
version_specific_libs = [
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
|
||||
"/usr/local/cuda/lib64/libcublas.so.13",
|
||||
@ -223,7 +224,7 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/local/cuda/lib64/libcusolver.so.12",
|
||||
"/usr/local/cuda/lib64/libnvJitLink.so.13",
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.13",
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.0",
|
||||
f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
|
||||
]
|
||||
elif "12" in desired_cuda:
|
||||
# Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
|
||||
@ -239,6 +240,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
||||
f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
|
||||
|
||||
# Combine all libraries
|
||||
libs_to_copy = common_libs + version_specific_libs
|
||||
|
||||
@ -214,8 +214,7 @@ case "$tag" in
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
|
||||
# TODO (huydhn): Upgrade this to Python >= 3.10
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
@ -263,13 +262,10 @@ case "$tag" in
|
||||
TRITON_CPU=yes
|
||||
;;
|
||||
pytorch-linux-jammy-linter)
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||
PYTHON_VERSION=3.9
|
||||
PYTHON_VERSION=3.10
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
|
||||
PYTHON_VERSION=3.9
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
|
||||
PYTHON_VERSION=3.10
|
||||
CUDA_VERSION=12.8.1
|
||||
;;
|
||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||
|
||||
@ -1 +1 @@
|
||||
fccfc522864cf8bc172abe0cd58ae5581e2d44b9
|
||||
bfeb066872bc1e8b2d2bc0a3b295b99dd77206e7
|
||||
|
||||
9
.ci/docker/common/patch_libstdc.sh
Executable file
9
.ci/docker/common/patch_libstdc.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
# Script used in Linux x86 and aarch64 CD pipeline
|
||||
|
||||
# Workaround for exposing statically linked libstdc++ CXX11 ABI symbols.
|
||||
# see: https://github.com/pytorch/pytorch/issues/133437
|
||||
LIBNONSHARED=$(gcc -print-file-name=libstdc++_nonshared.a)
|
||||
nm -g $LIBNONSHARED | grep " T " | grep recursive_directory_iterator | cut -c 20- > weaken-symbols.txt
|
||||
objcopy --weaken-symbols weaken-symbols.txt $LIBNONSHARED $LIBNONSHARED
|
||||
@ -130,7 +130,8 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
|
||||
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
|
||||
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
|
||||
done;
|
||||
|
||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
||||
|
||||
# cmake-3.18.4 from pip; force in case cmake3 already exists
|
||||
RUN yum install -y python3-pip && \
|
||||
|
||||
@ -71,3 +71,5 @@ RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
|
||||
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
||||
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
|
||||
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
|
||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
||||
|
||||
@ -95,3 +95,5 @@ COPY --from=nvpl /opt/nvpl/lib/ /usr/local/lib/
|
||||
COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/
|
||||
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
|
||||
ENV PATH=/usr/local/cuda/bin:$PATH
|
||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
||||
|
||||
@ -93,8 +93,9 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
mypy==1.16.0
|
||||
mypy==1.16.0 ; platform_system != "Windows"
|
||||
# Pin MyPy version because new errors are likely to appear with each release
|
||||
# Skip on Windows as lots of type annotations are POSIX specific
|
||||
#Description: linter
|
||||
#Pinned versions: 1.16.0
|
||||
#test that import: test_typing.py, test_type_hints.py
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
sphinx==5.3.0
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@1657ad2fc1acdc98aa719eebecbb0128a7c13ce4#egg=pytorch_sphinx_theme2
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
|
||||
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||
|
||||
@ -1 +1 @@
|
||||
3.5.0
|
||||
3.5.1
|
||||
|
||||
@ -7,4 +7,4 @@ set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
|
||||
USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh
|
||||
|
||||
@ -41,7 +41,6 @@ def sample_vllm_test_library():
|
||||
"pytest -v -s basic_correctness/test_cumem.py",
|
||||
"pytest -v -s basic_correctness/test_basic_correctness.py",
|
||||
"pytest -v -s basic_correctness/test_cpu_offload.py",
|
||||
"VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py",
|
||||
],
|
||||
},
|
||||
"vllm_basic_models_test": {
|
||||
@ -68,15 +67,12 @@ def sample_vllm_test_library():
|
||||
"-v",
|
||||
"-s",
|
||||
"entrypoints/llm",
|
||||
"--ignore=entrypoints/llm/test_lazy_outlines.py",
|
||||
"--ignore=entrypoints/llm/test_generate.py",
|
||||
"--ignore=entrypoints/llm/test_generate_multiple_loras.py",
|
||||
"--ignore=entrypoints/llm/test_collective_rpc.py",
|
||||
]
|
||||
),
|
||||
"pytest -v -s entrypoints/llm/test_lazy_outlines.py",
|
||||
"pytest -v -s entrypoints/llm/test_generate.py ",
|
||||
"VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode",
|
||||
"pytest -v -s entrypoints/llm/test_generate.py",
|
||||
"pytest -v -s entrypoints/offline_mode",
|
||||
],
|
||||
},
|
||||
"vllm_regression_test": {
|
||||
|
||||
@ -187,19 +187,22 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
|
||||
export USE_CUFILE=0
|
||||
else
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||
"/usr/local/cuda/lib64/libcublas.so.12"
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
||||
"/usr/local/cuda/lib64/libcudart.so.12"
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
|
||||
DEPS_SONAME+=(
|
||||
"libnvToolsExt.so.1"
|
||||
"libcublas.so.12"
|
||||
"libcublasLt.so.12"
|
||||
"libcudart.so.12"
|
||||
"libnvrtc.so.12"
|
||||
"libcupti.so.12")
|
||||
|
||||
if [[ $CUDA_VERSION != 12.9* ]]; then
|
||||
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
|
||||
DEPS_SONAME+=("libnvToolsExt.so.1")
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
|
||||
@ -67,7 +67,7 @@ fi
|
||||
# wheels with cxx11-abi
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
|
||||
# We also check that there are cxx11 symbols in libtorch
|
||||
#
|
||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||
|
||||
@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
|
||||
|
||||
# Build the docs
|
||||
pushd docs/cpp
|
||||
time make VERBOSE=1 html -j
|
||||
time make VERBOSE=1 html
|
||||
|
||||
popd
|
||||
popd
|
||||
|
||||
25
.ci/pytorch/numba-cuda-13.patch
Normal file
25
.ci/pytorch/numba-cuda-13.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From 6e08c9d08e9de59c7af28b720289debbbd384764 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Wang <13521008+isVoid@users.noreply.github.com>
|
||||
Date: Tue, 1 Apr 2025 17:28:05 -0700
|
||||
Subject: [PATCH] Avoid bumping certain driver API to avoid future breakage
|
||||
(#185)
|
||||
|
||||
Co-authored-by: isVoid <isVoid@users.noreply.github.com>
|
||||
---
|
||||
numba_cuda/numba/cuda/cudadrv/driver.py | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
|
||||
index 1641bf77..233e9ed7 100644
|
||||
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
|
||||
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
|
||||
@@ -365,6 +365,9 @@ def _find_api(self, fname):
|
||||
else:
|
||||
variants = ('_v2', '')
|
||||
|
||||
+ if fname in ("cuCtxGetDevice", "cuCtxSynchronize"):
|
||||
+ return getattr(self.lib, fname)
|
||||
+
|
||||
for variant in variants:
|
||||
try:
|
||||
return getattr(self.lib, f'{fname}{variant}')
|
||||
@ -32,6 +32,9 @@ LIBTORCH_NAMESPACE_LIST = (
|
||||
"torch::",
|
||||
)
|
||||
|
||||
# Patterns for detecting statically linked libstdc++ symbols
|
||||
STATICALLY_LINKED_CXX11_ABI = [re.compile(r".*recursive_directory_iterator.*")]
|
||||
|
||||
|
||||
def _apply_libtorch_symbols(symbols):
|
||||
return [
|
||||
@ -53,12 +56,17 @@ def get_symbols(lib: str) -> list[tuple[str, str, str]]:
|
||||
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
|
||||
|
||||
|
||||
def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
|
||||
def grep_symbols(
|
||||
lib: str, patterns: list[Any], symbol_type: str | None = None
|
||||
) -> list[str]:
|
||||
def _grep_symbols(
|
||||
symbols: list[tuple[str, str, str]], patterns: list[Any]
|
||||
) -> list[str]:
|
||||
rc = []
|
||||
for _s_addr, _s_type, s_name in symbols:
|
||||
# Filter by symbol type if specified
|
||||
if symbol_type and _s_type != symbol_type:
|
||||
continue
|
||||
for pattern in patterns:
|
||||
if pattern.match(s_name):
|
||||
rc.append(s_name)
|
||||
@ -80,6 +88,18 @@ def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
|
||||
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
|
||||
|
||||
|
||||
def check_lib_statically_linked_libstdc_cxx_abi_symbols(lib: str) -> None:
|
||||
cxx11_statically_linked_symbols = grep_symbols(
|
||||
lib, STATICALLY_LINKED_CXX11_ABI, symbol_type="T"
|
||||
)
|
||||
num_statically_linked_symbols = len(cxx11_statically_linked_symbols)
|
||||
print(f"num_statically_linked_symbols (T): {num_statically_linked_symbols}")
|
||||
if num_statically_linked_symbols > 0:
|
||||
raise RuntimeError(
|
||||
f"Found statically linked libstdc++ symbols (recursive_directory_iterator): {cxx11_statically_linked_symbols[:100]}"
|
||||
)
|
||||
|
||||
|
||||
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
|
||||
print(f"lib: {lib}")
|
||||
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
|
||||
@ -107,6 +127,7 @@ def main() -> None:
|
||||
|
||||
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
|
||||
check_lib_statically_linked_libstdc_cxx_abi_symbols(libtorch_cpu_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -32,6 +32,16 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
|
||||
git config --global --add safe.directory /var/lib/jenkins/workspace
|
||||
fi
|
||||
|
||||
|
||||
# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878
|
||||
NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
|
||||
if [ -n "$NUMBA_CUDA_DIR" ]; then
|
||||
NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
|
||||
pushd "$NUMBA_CUDA_DIR"
|
||||
patch -p4 <"$NUMBA_PATCH"
|
||||
popd
|
||||
fi
|
||||
|
||||
echo "Environment variables:"
|
||||
env
|
||||
|
||||
@ -1614,6 +1624,25 @@ test_operator_benchmark() {
|
||||
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
||||
}
|
||||
|
||||
test_operator_microbenchmark() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
TEST_DIR=$(pwd)
|
||||
|
||||
cd benchmarks/operator_benchmark/pt_extension
|
||||
python -m pip install .
|
||||
|
||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
||||
|
||||
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
|
||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
||||
--benchmark-name "PyTorch operator microbenchmark" --use-compile
|
||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \
|
||||
--benchmark-name "PyTorch operator microbenchmark"
|
||||
done
|
||||
}
|
||||
|
||||
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
||||
(cd test && python -c "import torch; print(torch.__config__.show())")
|
||||
@ -1668,6 +1697,8 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
|
||||
test_operator_benchmark cpu ${TEST_MODE}
|
||||
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then
|
||||
test_operator_microbenchmark
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
||||
test_inductor_distributed
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||
@ -1721,11 +1752,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
|
||||
test_inductor_distributed
|
||||
fi
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *einops* ]]; then
|
||||
test_einops
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||
|
||||
@ -137,7 +137,7 @@ sccache --show-stats
|
||||
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
||||
(
|
||||
if "%BUILD_ENVIRONMENT%"=="" (
|
||||
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
|
||||
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%\envs\py_tmp` in Command Prompt before running Git Bash.
|
||||
) else (
|
||||
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
|
||||
|
||||
@ -3,12 +3,12 @@ if "%BUILD_ENVIRONMENT%"=="" (
|
||||
) else (
|
||||
set CONDA_PARENT_DIR=C:\Jenkins
|
||||
)
|
||||
|
||||
set CONDA_ROOT_DIR=%CONDA_PARENT_DIR%\Miniconda3
|
||||
|
||||
:: Be conservative here when rolling out the new AMI with conda. This will try
|
||||
:: to install conda as before if it couldn't find the conda installation. This
|
||||
:: can be removed eventually after we gain enough confidence in the AMI
|
||||
if not exist %CONDA_PARENT_DIR%\Miniconda3 (
|
||||
if not exist %CONDA_ROOT_DIR% (
|
||||
set INSTALL_FRESH_CONDA=1
|
||||
)
|
||||
|
||||
@ -17,10 +17,14 @@ if "%INSTALL_FRESH_CONDA%"=="1" (
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
|
||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_ROOT_DIR%
|
||||
if errorlevel 1 exit /b
|
||||
if not errorlevel 0 exit /b
|
||||
)
|
||||
|
||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
||||
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
||||
call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%
|
||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
||||
call conda activate py_tmp
|
||||
|
||||
call pip install -r .ci/docker/requirements-ci.txt
|
||||
|
||||
@ -14,7 +14,7 @@ if not errorlevel 0 exit /b
|
||||
:: build\torch. Rather than changing all these references, making a copy of torch folder
|
||||
:: from conda to the current workspace is easier. The workspace will be cleaned up after
|
||||
:: the job anyway
|
||||
xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
||||
xcopy /s %CONDA_ROOT_DIR%\envs\py_tmp\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
||||
|
||||
pushd .
|
||||
if "%VC_VERSION%" == "" (
|
||||
|
||||
@ -38,7 +38,14 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
fi
|
||||
|
||||
# TODO: Move both of them to Windows AMI
|
||||
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
||||
python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
||||
|
||||
# Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments
|
||||
# pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node'
|
||||
# scipy from 1.6.3 to 1.10
|
||||
# expecttest from 0.1.3 to 0.3.0
|
||||
# xdoctest from 1.0.2 to 1.3.0
|
||||
python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42"
|
||||
|
||||
# Install Z3 optional dependency for Windows builds.
|
||||
python -m pip install z3-solver==4.15.1.0
|
||||
@ -52,9 +59,6 @@ python -m pip install parameterized==0.8.1
|
||||
# Install pulp for testing ilps under torch\distributed\_tools
|
||||
python -m pip install pulp==2.9.0
|
||||
|
||||
# Install expecttest to merge https://github.com/pytorch/pytorch/pull/155308
|
||||
python -m pip install expecttest==0.3.0
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||
|
||||
@ -37,10 +37,10 @@ IF "%CUDA_PATH_V128%"=="" (
|
||||
)
|
||||
|
||||
IF "%BUILD_VISION%" == "" (
|
||||
set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0
|
||||
set TORCH_CUDA_ARCH_LIST=7.0;7.5;8.0;8.6;9.0;10.0;12.0
|
||||
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
||||
) ELSE (
|
||||
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
|
||||
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
|
||||
)
|
||||
|
||||
set "CUDA_PATH=%CUDA_PATH_V128%"
|
||||
|
||||
@ -71,14 +71,7 @@ export PYTORCH_BUILD_NUMBER=1
|
||||
|
||||
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
|
||||
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
||||
|
||||
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
|
||||
# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
|
||||
if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||
fi
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
||||
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
|
||||
16
.github/actions/setup-win/action.yml
vendored
16
.github/actions/setup-win/action.yml
vendored
@ -6,6 +6,12 @@ inputs:
|
||||
cuda-version:
|
||||
description: which cuda version to install, 'cpu' for none
|
||||
required: true
|
||||
python-version:
|
||||
required: false
|
||||
type: string
|
||||
default: "3.10"
|
||||
description: |
|
||||
The python version to be used. Will be 3.10 by default
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
@ -38,18 +44,24 @@ runs:
|
||||
CONDA="C:\Jenkins\Miniconda3\condabin\conda.bat"
|
||||
|
||||
{
|
||||
echo "CONDA=${CONDA}";
|
||||
echo "CONDA_RUN=${CONDA} run --no-capture-output";
|
||||
echo "CONDA_BUILD=${CONDA} run conda-build";
|
||||
echo "CONDA_INSTALL=${CONDA} install";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Setup Python3
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
shell: bash
|
||||
run: |
|
||||
set +e
|
||||
set -x
|
||||
|
||||
PYTHON3=$(${CONDA_RUN} which python3)
|
||||
# Create new py_tmp env with python-version
|
||||
${CONDA} create -y -n py_tmp python=${PYTHON_VERSION} intel-openmp libuv
|
||||
|
||||
PYTHON3=$(${CONDA_RUN} -n py_tmp which python3)
|
||||
EXIT_CODE=$?
|
||||
|
||||
if [[ "${EXIT_CODE}" == "0" ]]; then
|
||||
@ -62,7 +74,7 @@ runs:
|
||||
# installation, which is Python 3 based. Its Python is default to Python 3. Further, there
|
||||
# is also the Miniconda installation that is Python 2 based, and both can be installed if
|
||||
# needed. In both cases, Python binary is just called python
|
||||
PYTHON=$(${CONDA_RUN} which python)
|
||||
PYTHON=$(${CONDA_RUN} -n py_tmp which python)
|
||||
EXIT_CODE=$?
|
||||
|
||||
if [[ "${EXIT_CODE}" == "0" ]]; then
|
||||
|
||||
2
.github/ci_commit_pins/vllm.txt
vendored
2
.github/ci_commit_pins/vllm.txt
vendored
@ -1 +1 @@
|
||||
e10fef08838612b4560e9c72e5cb1414a5edfa13
|
||||
78a47f87ce259a48f0391fa9ae15add05ea7432b
|
||||
|
||||
128
.github/scripts/generate_binary_build_matrix.py
vendored
128
.github/scripts/generate_binary_build_matrix.py
vendored
@ -16,16 +16,18 @@ from typing import Optional
|
||||
|
||||
|
||||
# NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
||||
CUDA_ARCHES = ["12.6", "12.8", "13.0"]
|
||||
CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"]
|
||||
CUDA_STABLE = "12.8"
|
||||
CUDA_ARCHES_FULL_VERSION = {
|
||||
"12.6": "12.6.3",
|
||||
"12.8": "12.8.1",
|
||||
"12.9": "12.9.1",
|
||||
"13.0": "13.0.0",
|
||||
}
|
||||
CUDA_ARCHES_CUDNN_VERSION = {
|
||||
"12.6": "9",
|
||||
"12.8": "9",
|
||||
"12.9": "9",
|
||||
"13.0": "9",
|
||||
}
|
||||
|
||||
@ -38,60 +40,77 @@ CPU_AARCH64_ARCH = ["cpu-aarch64"]
|
||||
|
||||
CPU_S390X_ARCH = ["cpu-s390x"]
|
||||
|
||||
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "13.0-aarch64"]
|
||||
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "12.9-aarch64", "13.0-aarch64"]
|
||||
|
||||
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"12.6": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
"nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | "
|
||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
|
||||
"nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | "
|
||||
"nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | "
|
||||
"nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | "
|
||||
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | "
|
||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | "
|
||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
|
||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
|
||||
),
|
||||
"12.8": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
"nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | "
|
||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
|
||||
"nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | "
|
||||
"nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | "
|
||||
"nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | "
|
||||
"nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | "
|
||||
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | "
|
||||
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
|
||||
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
|
||||
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
|
||||
),
|
||||
"12.9": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | "
|
||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
|
||||
"nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | "
|
||||
"nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | "
|
||||
"nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | "
|
||||
"nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | "
|
||||
"nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||
"nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | "
|
||||
"nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | "
|
||||
"nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | "
|
||||
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'"
|
||||
),
|
||||
"13.0": (
|
||||
"nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
"nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | "
|
||||
"nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | "
|
||||
"nvidia-cublas==13.0.0.19; platform_system == 'Linux' | "
|
||||
"nvidia-cufft==12.0.0.15; platform_system == 'Linux' | "
|
||||
"nvidia-curand==10.4.0.35; platform_system == 'Linux' | "
|
||||
"nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
|
||||
"nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
|
||||
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
|
||||
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
|
||||
"nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
|
||||
"nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
|
||||
"nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
|
||||
"nvidia-cufile==1.15.0.42; platform_system == 'Linux'"
|
||||
),
|
||||
"xpu": (
|
||||
"intel-cmplr-lib-rt==2025.2.1 | "
|
||||
@ -222,7 +241,11 @@ def generate_libtorch_matrix(
|
||||
arches += CUDA_ARCHES
|
||||
arches += ROCM_ARCHES
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES
|
||||
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up
|
||||
# in 2.10
|
||||
windows_cuda_arches = CUDA_ARCHES.copy()
|
||||
windows_cuda_arches.remove("12.9")
|
||||
arches += windows_cuda_arches
|
||||
if libtorch_variants is None:
|
||||
libtorch_variants = [
|
||||
"shared-with-deps",
|
||||
@ -286,7 +309,11 @@ def generate_wheels_matrix(
|
||||
if os == "linux":
|
||||
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES + XPU_ARCHES
|
||||
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up
|
||||
# in 2.10
|
||||
windows_cuda_arches = CUDA_ARCHES.copy()
|
||||
windows_cuda_arches.remove("12.9")
|
||||
arches += windows_cuda_arches + XPU_ARCHES
|
||||
elif os == "linux-aarch64":
|
||||
# Separate new if as the CPU type is different and
|
||||
# uses different build/test scripts
|
||||
@ -322,7 +349,7 @@ def generate_wheels_matrix(
|
||||
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||
|
||||
if (
|
||||
arch_version in ["13.0", "12.8", "12.6"]
|
||||
arch_version in ["13.0", "12.9", "12.8", "12.6"]
|
||||
and os == "linux"
|
||||
or arch_version in CUDA_AARCH64_ARCHES
|
||||
):
|
||||
@ -386,5 +413,6 @@ def generate_wheels_matrix(
|
||||
|
||||
|
||||
validate_nccl_dep_consistency("13.0")
|
||||
validate_nccl_dep_consistency("12.9")
|
||||
validate_nccl_dep_consistency("12.8")
|
||||
validate_nccl_dep_consistency("12.6")
|
||||
|
||||
2
.github/scripts/generate_ci_workflows.py
vendored
2
.github/scripts/generate_ci_workflows.py
vendored
@ -135,7 +135,7 @@ ROCM_SMOKE_WORKFLOWS = [
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
arches=["6.4"],
|
||||
python_versions=["3.9"],
|
||||
python_versions=["3.10"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={
|
||||
|
||||
@ -77,6 +77,9 @@ jobs:
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
timeout-minutes: 420
|
||||
{%- elif config["gpu_arch_type"] == "rocm" %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.24xlarge.ephemeral
|
||||
|
||||
2
.github/workflows/_binary-test-linux.yml
vendored
2
.github/workflows/_binary-test-linux.yml
vendored
@ -186,8 +186,6 @@ jobs:
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
|
||||
with:
|
||||
driver-version: ${{ startsWith(inputs.GPU_ARCH_VERSION, '13') && '580.65.06' || '570.133.07' }}
|
||||
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
||||
|
||||
- name: configure aws credentials
|
||||
|
||||
2
.github/workflows/_docs.yml
vendored
2
.github/workflows/_docs.yml
vendored
@ -67,7 +67,7 @@ jobs:
|
||||
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
|
||||
# to the next available tier of 12xlarge. So much memory just to generate cpp
|
||||
# doc
|
||||
runner: ${{ inputs.runner_prefix }}linux.12xlarge
|
||||
runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory
|
||||
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
|
||||
# Let's try to figure out how this can be improved
|
||||
timeout-minutes: 360
|
||||
|
||||
4
.github/workflows/_linux-test.yml
vendored
4
.github/workflows/_linux-test.yml
vendored
@ -169,7 +169,7 @@ jobs:
|
||||
id: install-nvidia-driver
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
|
||||
with:
|
||||
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
|
||||
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
|
||||
|
||||
- name: Setup GPU_FLAG for docker run
|
||||
@ -273,6 +273,8 @@ jobs:
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
|
||||
OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
|
||||
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
|
||||
2
.github/workflows/_win-build.yml
vendored
2
.github/workflows/_win-build.yml
vendored
@ -151,7 +151,7 @@ jobs:
|
||||
BUILD_WHEEL: 1
|
||||
MAX_JOBS: 8
|
||||
CUDA_VERSION: ${{ inputs.cuda-version }}
|
||||
PYTHON_VERSION: "3.9"
|
||||
PYTHON_VERSION: "3.10"
|
||||
SCCACHE_BUCKET: "ossci-compiler-cache"
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
SCCACHE_REGION: us-east-1
|
||||
|
||||
2
.github/workflows/_win-test.yml
vendored
2
.github/workflows/_win-test.yml
vendored
@ -184,7 +184,7 @@ jobs:
|
||||
env:
|
||||
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
||||
INSTALL_WINDOWS_SDK: 1
|
||||
PYTHON_VERSION: 3.9
|
||||
PYTHON_VERSION: "3.10"
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
|
||||
|
||||
4
.github/workflows/build-manywheel-images.yml
vendored
4
.github/workflows/build-manywheel-images.yml
vendored
@ -46,10 +46,12 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include: [
|
||||
{ name: "manylinux2_28-builder", tag: "cuda13.0", runner: "linux.9xlarge.ephemeral" },
|
||||
{ name: "manylinux2_28-builder", tag: "cuda13.0", runner: "linux.9xlarge.ephemeral" },
|
||||
{ name: "manylinux2_28-builder", tag: "cuda12.8", runner: "linux.9xlarge.ephemeral" },
|
||||
{ name: "manylinux2_28-builder", tag: "cuda12.9", runner: "linux.9xlarge.ephemeral" },
|
||||
{ name: "manylinux2_28-builder", tag: "cuda12.6", runner: "linux.9xlarge.ephemeral" },
|
||||
{ name: "manylinuxaarch64-builder", tag: "cuda13.0", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||
{ name: "manylinux2_28-builder", tag: "rocm6.3", runner: "linux.9xlarge.ephemeral" },
|
||||
|
||||
2
.github/workflows/docker-builds.yml
vendored
2
.github/workflows/docker-builds.yml
vendored
@ -70,7 +70,7 @@ jobs:
|
||||
pytorch-linux-jammy-py3-clang18-asan,
|
||||
pytorch-linux-jammy-py3-clang12-onnx,
|
||||
pytorch-linux-jammy-linter,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter,
|
||||
# Executorch pin needs update
|
||||
# pytorch-linux-jammy-py3-clang12-executorch,
|
||||
pytorch-linux-jammy-py3.12-triton-cpu,
|
||||
|
||||
364
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
364
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -132,7 +132,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -178,7 +178,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -204,6 +204,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_10-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -224,7 +270,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -335,7 +381,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -381,7 +427,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -407,6 +453,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_11-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -427,7 +519,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -538,7 +630,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -584,7 +676,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -610,6 +702,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_12-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -630,7 +768,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -741,7 +879,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -787,7 +925,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -813,6 +951,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -833,7 +1017,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -944,7 +1128,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -990,7 +1174,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1016,6 +1200,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1036,7 +1266,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1147,7 +1377,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1193,7 +1423,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1219,6 +1449,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_14-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1239,7 +1515,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1350,7 +1626,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1396,7 +1672,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1422,6 +1698,52 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14t-cuda-aarch64-12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda-aarch64-12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_14t-cuda-aarch64-12_9-build
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9-aarch64"
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14t-cuda-aarch64-13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1442,7 +1764,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
70
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
70
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
@ -248,6 +248,74 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
libtorch-cuda12_9-shared-with-deps-release-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: libtorch
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
LIBTORCH_CONFIG: release
|
||||
LIBTORCH_VARIANT: shared-with-deps
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: libtorch-cuda12_9-shared-with-deps-release
|
||||
build_environment: linux-binary-libtorch
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
libtorch-cuda12_9-shared-with-deps-release-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- libtorch-cuda12_9-shared-with-deps-release-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: libtorch
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
LIBTORCH_CONFIG: release
|
||||
LIBTORCH_VARIANT: shared-with-deps
|
||||
build_name: libtorch-cuda12_9-shared-with-deps-release
|
||||
build_environment: linux-binary-libtorch
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
libtorch-cuda12_9-shared-with-deps-release-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: libtorch-cuda12_9-shared-with-deps-release-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: libtorch
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
LIBTORCH_CONFIG: release
|
||||
LIBTORCH_VARIANT: shared-with-deps
|
||||
build_name: libtorch-cuda12_9-shared-with-deps-release
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
libtorch-cuda13_0-shared-with-deps-release-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -333,6 +401,7 @@ jobs:
|
||||
LIBTORCH_CONFIG: release
|
||||
LIBTORCH_VARIANT: shared-with-deps
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: libtorch-rocm6_3-shared-with-deps-release
|
||||
build_environment: linux-binary-libtorch
|
||||
secrets:
|
||||
@ -446,6 +515,7 @@ jobs:
|
||||
LIBTORCH_CONFIG: release
|
||||
LIBTORCH_VARIANT: shared-with-deps
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
||||
build_environment: linux-binary-libtorch
|
||||
secrets:
|
||||
|
||||
2
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
2
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -60,7 +60,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
|
||||
518
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
518
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -127,7 +127,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_6-test: # Testing
|
||||
@ -193,7 +193,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_8-test: # Testing
|
||||
@ -241,6 +241,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_10-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_10-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -259,7 +325,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda13_0-test: # Testing
|
||||
@ -323,6 +389,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_10-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -433,6 +500,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_10-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -716,7 +784,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-test: # Testing
|
||||
@ -782,7 +850,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-test: # Testing
|
||||
@ -830,6 +898,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_11-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_11-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -848,7 +982,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda13_0-test: # Testing
|
||||
@ -912,6 +1046,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_11-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -1022,6 +1157,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_11-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -1305,7 +1441,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_6-test: # Testing
|
||||
@ -1371,7 +1507,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
@ -1419,6 +1555,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_12-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_12-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1437,7 +1639,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda13_0-test: # Testing
|
||||
@ -1501,6 +1703,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_12-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -1611,6 +1814,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_12-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -1894,7 +2098,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_6-test: # Testing
|
||||
@ -1960,7 +2164,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_8-test: # Testing
|
||||
@ -2008,6 +2212,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -2026,7 +2296,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda13_0-test: # Testing
|
||||
@ -2090,6 +2360,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_13-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -2200,6 +2471,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_13-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -2483,7 +2755,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_6-test: # Testing
|
||||
@ -2549,7 +2821,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_8-test: # Testing
|
||||
@ -2597,6 +2869,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -2615,7 +2953,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda13_0-test: # Testing
|
||||
@ -2679,6 +3017,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_13t-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -2789,6 +3128,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_13t-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -3072,7 +3412,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_6-test: # Testing
|
||||
@ -3138,7 +3478,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_8-test: # Testing
|
||||
@ -3186,6 +3526,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_14-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14"
|
||||
build_name: manywheel-py3_14-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_14-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14"
|
||||
build_name: manywheel-py3_14-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -3204,7 +3610,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda13_0-test: # Testing
|
||||
@ -3268,6 +3674,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.14"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_14-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -3378,6 +3785,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.14"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_14-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -3661,7 +4069,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_6-test: # Testing
|
||||
@ -3727,7 +4135,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_8-test: # Testing
|
||||
@ -3775,6 +4183,72 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14t-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_14t-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
build_name: manywheel-py3_14t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_9-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_14t-cuda12_9-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: "12.9"
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
build_name: manywheel-py3_14t-cuda12_9
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_14t-cuda13_0-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -3793,7 +4267,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda13_0-test: # Testing
|
||||
@ -3857,6 +4331,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_14t-rocm6_3
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
@ -3967,6 +4442,7 @@ jobs:
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.14t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_14t-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
|
||||
15
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
15
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
@ -44,7 +44,7 @@ jobs:
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
manywheel-py3_9-rocm6_4-build:
|
||||
manywheel-py3_10-rocm6_4-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
@ -58,16 +58,17 @@ jobs:
|
||||
GPU_ARCH_TYPE: rocm
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.9"
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-rocm6_4
|
||||
timeout-minutes: 300
|
||||
build_name: manywheel-py3_10-rocm6_4
|
||||
build_environment: linux-binary-manywheel-rocm
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-rocm6_4-test: # Testing
|
||||
manywheel-py3_10-rocm6_4-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_9-rocm6_4-build
|
||||
- manywheel-py3_10-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
@ -82,14 +83,14 @@ jobs:
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
DESIRED_PYTHON: "3.9"
|
||||
DESIRED_PYTHON: "3.10"
|
||||
steps:
|
||||
- name: Setup ROCm
|
||||
uses: ./.github/actions/setup-rocm
|
||||
- uses: actions/download-artifact@v4.1.7
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_9-rocm6_4
|
||||
name: manywheel-py3_10-rocm6_4
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
|
||||
4
.github/workflows/inductor-nightly.yml
vendored
4
.github/workflows/inductor-nightly.yml
vendored
@ -37,7 +37,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-default-label-prefix
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -56,7 +56,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: nightly-dynamo-benchmarks-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }}
|
||||
timeout-minutes: 720
|
||||
|
||||
@ -75,7 +75,7 @@ jobs:
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
needs: inductor-build
|
||||
if: github.event.schedule == '0 7 * * *'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
@ -118,7 +118,7 @@ jobs:
|
||||
needs: inductor-build
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
|
||||
@ -80,7 +80,7 @@ jobs:
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -107,7 +107,7 @@ jobs:
|
||||
needs: inductor-build
|
||||
if: github.event.schedule == '0 7 * * *'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
@ -124,7 +124,7 @@ jobs:
|
||||
needs: inductor-build
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }}
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
|
||||
4
.github/workflows/inductor-periodic.yml
vendored
4
.github/workflows/inductor-periodic.yml
vendored
@ -154,7 +154,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-default-label-prefix
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -200,7 +200,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: periodic-dynamo-benchmarks-cpu-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
4
.github/workflows/inductor-unittest.yml
vendored
4
.github/workflows/inductor-unittest.yml
vendored
@ -110,7 +110,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -127,7 +127,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: inductor-cpu-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
4
.github/workflows/inductor.yml
vendored
4
.github/workflows/inductor.yml
vendored
@ -79,7 +79,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: inductor-cpu-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@ -53,7 +53,7 @@ jobs:
|
||||
with:
|
||||
timeout: 120
|
||||
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
|
||||
docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter
|
||||
docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter
|
||||
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
|
||||
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
|
||||
fetch-depth: 0
|
||||
@ -265,10 +265,10 @@ jobs:
|
||||
with:
|
||||
submodules: false
|
||||
fetch-depth: 1
|
||||
- name: Setup Python 3.9
|
||||
- name: Setup Python 3.10
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: '3.9'
|
||||
python-version: '3.10'
|
||||
architecture: x64
|
||||
cache: pip
|
||||
- name: Install dependencies
|
||||
|
||||
10
.github/workflows/operator_benchmark.yml
vendored
10
.github/workflows/operator_benchmark.yml
vendored
@ -14,6 +14,10 @@ on:
|
||||
schedule:
|
||||
# Run at 07:00 UTC every Sunday
|
||||
- cron: 0 7 * * 0
|
||||
pull_request:
|
||||
paths:
|
||||
- benchmarks/operator_benchmark/**
|
||||
- .github/workflows/operator_benchmark.yml
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -29,7 +33,7 @@ jobs:
|
||||
name: opbenchmark-build
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -42,7 +46,7 @@ jobs:
|
||||
name: opbenchmark-on-demand-build
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -55,7 +59,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: opbenchmark-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
build-environment: linux-jammy-py3.10-gcc11-build
|
||||
docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
46
.github/workflows/operator_microbenchmark.yml
vendored
Normal file
46
.github/workflows/operator_microbenchmark.yml
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
name: operator_microbenchmark
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- ciflow/op-benchmark/*
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Run at 06:00 UTC everyday
|
||||
- cron: 0 6 * * *
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
opmicrobenchmark-build:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: opmicrobenchmark-build
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '8.0 9.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.h100" },
|
||||
{ config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.a100" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
opmicrobenchmark-test:
|
||||
name: opmicrobenchmark-test
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: opmicrobenchmark-build
|
||||
with:
|
||||
timeout-minutes: 500
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
11
.github/workflows/periodic.yml
vendored
11
.github/workflows/periodic.yml
vendored
@ -59,13 +59,14 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.4-py3.10-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11
|
||||
cuda-arch-list: 7.5
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
|
||||
{ config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
4
.github/workflows/trunk.yml
vendored
4
.github/workflows/trunk.yml
vendored
@ -240,7 +240,7 @@ jobs:
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-gcc11
|
||||
build-environment: linux-jammy-py3.10-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -255,7 +255,7 @@ jobs:
|
||||
- verify-cachebench-cpu-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11
|
||||
build-environment: linux-jammy-py3.10-gcc11
|
||||
docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -82,6 +82,7 @@ torch/return_types.pyi
|
||||
torch/nn/functional.pyi
|
||||
torch/utils/data/datapipes/datapipe.pyi
|
||||
torch/csrc/autograd/generated/*
|
||||
torch/csrc/functionalization/generated/*
|
||||
torch/csrc/lazy/generated/*.[!m]*
|
||||
torch_compile_debug/
|
||||
# Listed manually because some files in this directory are not generated
|
||||
|
||||
@ -91,6 +91,8 @@ generated_cpu_cpp = [
|
||||
"aten/src/ATen/NativeMetaFunctions.h",
|
||||
"aten/src/ATen/RegistrationDeclarations.h",
|
||||
"aten/src/ATen/VmapGeneratedPlumbing.h",
|
||||
"aten/src/ATen/ViewMetaClasses.h",
|
||||
"aten/src/ATen/ViewMetaClasses.cpp",
|
||||
"aten/src/ATen/core/aten_interned_strings.h",
|
||||
"aten/src/ATen/core/enum_tag.h",
|
||||
"aten/src/ATen/core/TensorBody.h",
|
||||
@ -1106,6 +1108,7 @@ test_suite(
|
||||
"aten/src/ATen/templates/LazyNonNativeIr.h",
|
||||
"aten/src/ATen/templates/RegisterDispatchKey.cpp",
|
||||
"aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
|
||||
"aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp",
|
||||
"aten/src/ATen/native/native_functions.yaml",
|
||||
"aten/src/ATen/native/tags.yaml",
|
||||
"aten/src/ATen/native/ts_native_functions.yaml",
|
||||
|
||||
@ -50,11 +50,10 @@ RUN git submodule update --init --recursive
|
||||
FROM conda as conda-installs
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG CUDA_PATH=cu121
|
||||
ARG CUDA_CHANNEL=nvidia
|
||||
ARG INSTALL_CHANNEL=whl/nightly
|
||||
# Automatically set by buildx
|
||||
RUN /opt/conda/bin/conda update -y -n base -c defaults conda
|
||||
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION}
|
||||
# pinning version of conda here see: https://github.com/pytorch/pytorch/issues/164574
|
||||
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda=25.7.0
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||

|
||||

|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
@ -72,7 +72,7 @@ Elaborating Further:
|
||||
|
||||
If you use NumPy, then you have used Tensors (a.k.a. ndarray).
|
||||
|
||||

|
||||

|
||||
|
||||
PyTorch provides Tensors that can live either on the CPU or the GPU and accelerates the
|
||||
computation by a huge amount.
|
||||
@ -99,7 +99,7 @@ from several research papers on this topic, as well as current and past work suc
|
||||
While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date.
|
||||
You get the best of speed and flexibility for your crazy research.
|
||||
|
||||

|
||||

|
||||
|
||||
### Python First
|
||||
|
||||
|
||||
@ -9,11 +9,6 @@
|
||||
|
||||
namespace at::functionalization {
|
||||
|
||||
ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
|
||||
if (out_idx == this->out_index) return *this;
|
||||
return ViewMeta(forward_fn, reverse_fn, has_symbolic_inputs, is_multi_output, is_as_strided, out_idx);
|
||||
}
|
||||
|
||||
// Note [Functionalization: Alias Removal Part 2]
|
||||
// See Note [Functionalization: Alias Removal] for more details.
|
||||
// This function applies a single update from one of the views to the StorageImpl.
|
||||
@ -42,12 +37,12 @@ ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
|
||||
static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) {
|
||||
at::Tensor t = update.new_val;
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||
if (update.view_metas.empty()) return t;
|
||||
if (update.view_metas.empty()) { return t; }
|
||||
|
||||
std::vector<at::Tensor> tmp_values({base});
|
||||
tmp_values.reserve(update.view_metas.size());
|
||||
for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
|
||||
at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index);
|
||||
at::Tensor next_view = update.view_metas[i]->forward(tmp_values.back());
|
||||
// NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
|
||||
// All of these ops require additional information to recover the sizes of the original tensor.
|
||||
// If need to, we could probably apply this optimization and only bother computing tmp_values
|
||||
@ -55,9 +50,8 @@ static const Tensor apply_update(const FunctionalStorageImpl::Update& update, co
|
||||
tmp_values.push_back(std::move(next_view));
|
||||
}
|
||||
for(int64_t i = static_cast<int64_t>(update.view_metas.size()) - 1; i >= 0; --i) {
|
||||
int64_t out_idx = update.view_metas[i].out_index;
|
||||
// Each view inverse is implemented in ViewInverses.cpp.
|
||||
t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx);
|
||||
t = update.view_metas[i]->reverse(tmp_values[i], t);
|
||||
}
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||
return t;
|
||||
@ -111,13 +105,13 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base)
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_));
|
||||
}
|
||||
|
||||
void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<ViewMeta>& metas) {
|
||||
void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<std::shared_ptr<ViewMeta>>& metas) {
|
||||
TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage");
|
||||
|
||||
if (metas.size() > 1) {
|
||||
for (size_t i = 1; i < metas.size(); ++i) {
|
||||
// Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI
|
||||
TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i].is_as_strided,
|
||||
TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i]->is_as_strided,
|
||||
"During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i,
|
||||
" was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today,"
|
||||
"so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you "
|
||||
|
||||
@ -8,44 +8,89 @@ namespace at::functionalization {
|
||||
|
||||
// See Note [Functionalization Pass In Core]
|
||||
|
||||
enum class InverseReturnMode {
|
||||
/// Specifies that functional inverses should always return a view.
|
||||
AlwaysView,
|
||||
/// Specifies that functional inverses should always return a non-view / copy.
|
||||
NeverView,
|
||||
/// Specifies that functional inverses should return a view unless a (copying)
|
||||
/// scatter
|
||||
/// inverse exists, in which case that will be used instead.
|
||||
/// This avoids as_strided() calls that can be difficult for subclasses to
|
||||
/// handle.
|
||||
ViewOrScatterInverse,
|
||||
};
|
||||
|
||||
#define FUNCTIONALIZATION_VIEWMETA_NAME(TYPE) \
|
||||
static const char* name() { \
|
||||
return #TYPE; \
|
||||
}
|
||||
|
||||
#define FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(...) \
|
||||
using SerializableTuple = std::tuple<__VA_ARGS__>
|
||||
|
||||
// ViewMeta is a class used by the functionalization pass to navigate between
|
||||
// a base tensor and a view tensor.
|
||||
// For example, if I call `b = a.view1(...)`
|
||||
// the functionalization pass will generate and store a ViewMeta on b that looks
|
||||
// like:
|
||||
// the functionalization pass will generate and store a ViewMeta specialization
|
||||
// for `view1` operation on b that looks like:
|
||||
//
|
||||
// ViewMeta(
|
||||
// [<captures>](const Tensor& base, int64_t mutated_view_idx) {
|
||||
// return base.view1(...);
|
||||
// },
|
||||
// [<captures>](const at::Tensor& base, const at::Tensor& mutated_view,
|
||||
// int64_t mutated_view_idx) -> at::Tensor {
|
||||
// return at::functionalization::impl::view1_inverse(base, mutated_view,
|
||||
// ...);
|
||||
// struct TORCH_API view1_ViewMeta : public ViewMeta {
|
||||
// FUNCTIONALIZATION_VIEWMETA_NAME(view1_ViewMeta);
|
||||
// FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
||||
// bool /* reapply_views */,
|
||||
// const std::vector<int64_t>&);
|
||||
//
|
||||
// view1_ViewMeta(const SerializableTuple& tpl)
|
||||
// : view1_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
||||
//
|
||||
// view1_ViewMeta(bool reapply_views, const std::vector<int64_t>& size)
|
||||
// : ViewMeta(/*has_symbolic_inputs=*/false),
|
||||
// reapply_views(reapply_views),
|
||||
// size(size) {}
|
||||
//
|
||||
// Tensor forward(const Tensor& base) override {
|
||||
// return base.view1(...);
|
||||
// }
|
||||
//
|
||||
// The forward_fn lambda describes how to replay view1 on a tensor.
|
||||
// Tensor reverse(const Tensor& base, const Tensor& mutated_view) override {
|
||||
// return at::functionalization::impl::view1_inverse(base, mutated_view,
|
||||
// ...);
|
||||
// }
|
||||
//
|
||||
// The reverse_fn lambda describes how, given a tensor that is already a view,
|
||||
// SerializableTuple to_serializable_tuple() {
|
||||
// return std::make_tuple(reapply_views, size);
|
||||
// }
|
||||
//
|
||||
// bool reapply_views;
|
||||
// std::vector<int64_t> size;
|
||||
// };
|
||||
//
|
||||
// The forward function describes how to replay view1 on a tensor.
|
||||
//
|
||||
// The reverse function describes how, given a tensor that is already a view,
|
||||
// how to get the corresponding base tensor. See Note [Functionalization Pass:
|
||||
// View Inverses] for details.
|
||||
//
|
||||
// `SerializedTuple` is a typedef that defines an `std::tuple<...>` type
|
||||
// representing the `ViewMeta` instance state. Methods that take in/return such
|
||||
// a type are used for supporting pickle serialization.
|
||||
struct ViewMeta {
|
||||
ViewMeta(
|
||||
std::function<Tensor(const Tensor&, int64_t)> forward,
|
||||
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse,
|
||||
bool has_symbolic_inputs,
|
||||
bool is_multi_output = false,
|
||||
bool is_as_strided = false,
|
||||
int64_t out_idx = 0)
|
||||
: forward_fn(std::move(forward)),
|
||||
reverse_fn(std::move(reverse)),
|
||||
out_index(out_idx),
|
||||
: out_index(out_idx),
|
||||
is_multi_output(is_multi_output),
|
||||
is_as_strided(is_as_strided),
|
||||
has_symbolic_inputs(has_symbolic_inputs) {}
|
||||
|
||||
std::function<Tensor(const Tensor&, int64_t)> forward_fn;
|
||||
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn;
|
||||
virtual ~ViewMeta() = default;
|
||||
|
||||
virtual Tensor forward(const Tensor& base) = 0;
|
||||
virtual Tensor reverse(const Tensor& base, const Tensor& mutated_view) = 0;
|
||||
|
||||
// See Note [out_idx in ViewMeta]
|
||||
int64_t out_index;
|
||||
|
||||
@ -57,10 +102,17 @@ struct ViewMeta {
|
||||
// Tells us if this view operation has any symbolic inputs
|
||||
bool has_symbolic_inputs;
|
||||
|
||||
// Returns a copy of the current ViewMeta, if out_idx matches the current
|
||||
// out_index. Otherwise, returns a new ViewMeta with the same forward/reverse
|
||||
// Returns a new ViewMeta with the same forward/reverse
|
||||
// functions, but a new out index.
|
||||
ViewMeta to_out_idx(int64_t out_idx);
|
||||
//
|
||||
// This method should be implemented by those `ViewMeta` that have more than
|
||||
// one output.
|
||||
virtual std::shared_ptr<ViewMeta> to_out_index(int64_t out_index) {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
false,
|
||||
"ViewMeta::to_out_index not implemented. ",
|
||||
"Likely because there's only one output.");
|
||||
}
|
||||
};
|
||||
|
||||
// FunctionalStorageImpl is a subclass of StorageImpl used by the
|
||||
@ -93,14 +145,14 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
||||
const at::Tensor new_val;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
||||
const std::vector<ViewMeta> view_metas;
|
||||
const std::vector<std::shared_ptr<ViewMeta>> view_metas;
|
||||
};
|
||||
|
||||
explicit FunctionalStorageImpl(const Tensor& value);
|
||||
|
||||
void add_update(
|
||||
const Tensor& updated_val,
|
||||
const std::vector<ViewMeta>& view_metas);
|
||||
const std::vector<std::shared_ptr<ViewMeta>>& view_metas);
|
||||
bool apply_updates();
|
||||
const Tensor& base() {
|
||||
return base_;
|
||||
|
||||
@ -129,17 +129,19 @@ void FunctionalTensorWrapper::freeze_storage() const {
|
||||
// - view_value: The output tensor that we need to wrap.
|
||||
// - base: The "base" of the view that `view_value` was generated from.
|
||||
// See Note [Functionalization: Alias Removal Part 2] for more details on the mutation replay logic.
|
||||
FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const FunctionalTensorWrapper* base, const functionalization::ViewMeta& meta)
|
||||
: c10::TensorImpl(
|
||||
c10::DispatchKeySet(DispatchKey::Functionalize),
|
||||
view_value.dtype(),
|
||||
view_value.device()
|
||||
),
|
||||
value_(view_value),
|
||||
is_multi_output_view_(base->is_multi_output_view_ || meta.is_multi_output),
|
||||
was_storage_changed_(base->was_storage_changed_),
|
||||
is_symbolic_(base->is_symbolic_)
|
||||
{
|
||||
FunctionalTensorWrapper::FunctionalTensorWrapper(
|
||||
const Tensor& view_value,
|
||||
const FunctionalTensorWrapper* base,
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta)
|
||||
: c10::TensorImpl(
|
||||
c10::DispatchKeySet(DispatchKey::Functionalize),
|
||||
view_value.dtype(),
|
||||
view_value.device()),
|
||||
value_(view_value),
|
||||
is_multi_output_view_(
|
||||
base->is_multi_output_view_ || meta->is_multi_output),
|
||||
was_storage_changed_(base->was_storage_changed_),
|
||||
is_symbolic_(base->is_symbolic_) {
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(value_));
|
||||
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
||||
set_constructor_metadata();
|
||||
@ -148,11 +150,10 @@ FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const
|
||||
view_metas_ = base->view_metas_; // copy
|
||||
}
|
||||
view_metas_.push_back(meta);
|
||||
maybe_mark_symbolic(meta);
|
||||
maybe_mark_symbolic(meta.get());
|
||||
storage_ = base->storage_; // alias this tensor's storage with the base tensor's
|
||||
}
|
||||
|
||||
|
||||
functionalization::FunctionalStorageImpl* FunctionalTensorWrapper::functional_storage_impl() const {
|
||||
return static_cast<functionalization::FunctionalStorageImpl*>(storage_.unsafeGetStorageImpl());
|
||||
}
|
||||
@ -176,18 +177,18 @@ bool FunctionalTensorWrapper::is_up_to_date() const {
|
||||
}
|
||||
|
||||
// See Note [Functionalization Pass - Inplace View Ops]
|
||||
void FunctionalTensorWrapper::mutate_view_meta(const at::functionalization::ViewMeta& meta) {
|
||||
void FunctionalTensorWrapper::mutate_view_meta(const std::shared_ptr<at::functionalization::ViewMeta>& meta) {
|
||||
view_metas_.push_back(meta);
|
||||
// Manually track the fact that this tensor received a metadata mutation!
|
||||
has_metadata_mutation_ = true;
|
||||
// Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation.
|
||||
maybe_mark_symbolic(meta);
|
||||
maybe_mark_symbolic(meta.get());
|
||||
// Note [Functionalization Pass - Inplace View Ops]
|
||||
// So, these ops are special - they're mutation AND view ops. They get special codegen.
|
||||
// An example is transpose_, e.g. `a.transpose_()`
|
||||
// Calling transpose_() should ensure that a gets an alias, and append the new ViewMeta to a's current list of ViewMetas.
|
||||
at::AutoDispatchSkipFunctionalize guard;
|
||||
value_ = meta.forward_fn(value_, meta.out_index);
|
||||
value_ = meta->forward(value_);
|
||||
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
||||
}
|
||||
|
||||
@ -368,15 +369,8 @@ void FunctionalTensorWrapper::sync_() {
|
||||
regenerate_from_base();
|
||||
}
|
||||
|
||||
Tensor FunctionalTensorWrapper::apply_view_metas(const Tensor& base) {
|
||||
auto t = base;
|
||||
|
||||
// Reapply views to get the viewed tensor from the base in alias_
|
||||
for (auto& view_meta: view_metas_) {
|
||||
t = view_meta.forward_fn(t, view_meta.out_index);
|
||||
}
|
||||
|
||||
return t;
|
||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& FunctionalTensorWrapper::view_metas() const {
|
||||
return view_metas_;
|
||||
}
|
||||
|
||||
void FunctionalTensorWrapper::regenerate_from_base() {
|
||||
@ -385,7 +379,7 @@ void FunctionalTensorWrapper::regenerate_from_base() {
|
||||
auto t = storage_impl->base();
|
||||
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||
t = apply_view_metas(t);
|
||||
t = at::functionalization::impl::apply_view_meta_sequence(t, view_metas_);
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||
|
||||
replace_(t, /*from_lazy_regenerate=*/true);
|
||||
@ -724,11 +718,11 @@ bool isFunctionalTensor(const std::optional<Tensor>& t) {
|
||||
}
|
||||
|
||||
bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
|
||||
if (t_list.empty()) return false;
|
||||
if (t_list.empty()) { return false; }
|
||||
auto functional_count = 0;
|
||||
for (const auto i : c10::irange(t_list.size())) {
|
||||
auto const & e= t_list[i];
|
||||
if (!e.has_value() || !e->defined()) continue;
|
||||
if (!e.has_value() || !e->defined()) { continue; }
|
||||
if (isFunctionalTensor(e)) {
|
||||
++functional_count;
|
||||
}
|
||||
@ -738,10 +732,10 @@ bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
|
||||
|
||||
template <typename T>
|
||||
static bool isFunctionalTensorIListRef(c10::IListRef<T> list) {
|
||||
if (list.size() == 0) return false;
|
||||
if (list.size() == 0) { return false; }
|
||||
auto functional_count = 0;
|
||||
for (const auto& tensor : list) {
|
||||
if (!tensor.defined()) continue;
|
||||
if (!tensor.defined()) { continue; }
|
||||
if (isFunctionalTensor(tensor)) {
|
||||
++functional_count;
|
||||
}
|
||||
@ -759,20 +753,28 @@ void freeze_functional_tensor(const Tensor& tensor) {
|
||||
functional_base_impl->freeze_storage();
|
||||
}
|
||||
|
||||
Tensor create_functional_tensor_with_view_meta(const at::Tensor& view_to_wrap, const at::Tensor& base, functionalization::ViewMeta meta, int64_t out_idx) {
|
||||
Tensor create_functional_tensor_with_view_meta(
|
||||
const at::Tensor& view_to_wrap,
|
||||
const at::Tensor& base,
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta,
|
||||
int64_t out_idx) {
|
||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(view_to_wrap));
|
||||
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(base));
|
||||
auto functional_base_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(base);
|
||||
auto meta_ = meta;
|
||||
if (out_idx != 0) {
|
||||
// Note [out_idx in ViewMeta]
|
||||
// When a view op outputs multiple tensors, each output needs its own separate ViewMeta.
|
||||
// Each ViewMeta also tracks the index of the particular output tensor, which is needed in the reverse function.
|
||||
meta = meta.to_out_idx(out_idx);
|
||||
meta_ = meta->to_out_index(out_idx);
|
||||
}
|
||||
return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta);
|
||||
return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta_);
|
||||
}
|
||||
|
||||
std::vector<Tensor> create_functional_tensor_with_view_meta(ITensorListRef view_to_wrap, const at::Tensor& base, const functionalization::ViewMeta& meta) {
|
||||
std::vector<Tensor> create_functional_tensor_with_view_meta(
|
||||
ITensorListRef view_to_wrap,
|
||||
const at::Tensor& base,
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta) {
|
||||
std::vector<Tensor> outputs(view_to_wrap.size());
|
||||
int64_t i = 0;
|
||||
for (const auto& tensor : view_to_wrap) {
|
||||
@ -782,12 +784,22 @@ std::vector<Tensor> create_functional_tensor_with_view_meta(ITensorListRef view_
|
||||
return outputs;
|
||||
}
|
||||
|
||||
void mutate_view_meta(const at::Tensor& self, const functionalization::ViewMeta& meta) {
|
||||
void mutate_view_meta(const at::Tensor& self, const std::shared_ptr<functionalization::ViewMeta>& meta) {
|
||||
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self));
|
||||
auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self);
|
||||
self_impl->mutate_view_meta(meta);
|
||||
}
|
||||
|
||||
Tensor apply_view_meta_sequence(
|
||||
const Tensor& base,
|
||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence) {
|
||||
Tensor r = base;
|
||||
for (auto& vm : sequence) {
|
||||
r = vm->forward(r);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Note [Propagating strides in the functionalization pass]
|
||||
// In order to properly compute stride information, the functionalization pass
|
||||
// calls each {view} reference implementations with meta tensors.
|
||||
@ -881,7 +893,7 @@ void functionalize_op_helper(const c10::OperatorHandle& op, torch::jit::Stack* s
|
||||
const auto& ivalue = returns[idx];
|
||||
if (ivalue.isTensor()) {
|
||||
const auto& t = ivalue.toTensor();
|
||||
if (!t.defined()) continue;
|
||||
if (!t.defined()) { continue; }
|
||||
at::functionalization::impl::sync(t);
|
||||
auto t_new = c10::IValue(at::functionalization::impl::from_functional_tensor(t));
|
||||
(*stack)[returns_begin + idx] = t_new;
|
||||
|
||||
@ -56,7 +56,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
explicit FunctionalTensorWrapper(
|
||||
const Tensor& view_value,
|
||||
const FunctionalTensorWrapper* base,
|
||||
const functionalization::ViewMeta& meta);
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
||||
|
||||
// Get the underlying, actual tensor, that doesn't know anything about
|
||||
// functionalization.
|
||||
@ -99,17 +99,17 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
->are_all_mutations_under_no_grad_or_inference_mode();
|
||||
}
|
||||
|
||||
void maybe_mark_symbolic(const functionalization::ViewMeta& meta) {
|
||||
is_symbolic_ = is_symbolic_ | meta.has_symbolic_inputs;
|
||||
void maybe_mark_symbolic(functionalization::ViewMeta* meta) {
|
||||
is_symbolic_ = is_symbolic_ | meta->has_symbolic_inputs;
|
||||
}
|
||||
|
||||
bool is_symbolic() const {
|
||||
return is_symbolic_;
|
||||
}
|
||||
|
||||
// Runs the forward_fn of every ViewMeta collected in the current instance
|
||||
// to some other base.
|
||||
Tensor apply_view_metas(const Tensor& base);
|
||||
// Retrieves the ViewMeta sequence of this tensor.
|
||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& view_metas()
|
||||
const;
|
||||
|
||||
// Sync's the underlying tensor with its alias, if it's out of date. This
|
||||
// involves two steps: 1) Apply any pending updates/mutations to the alias 2)
|
||||
@ -146,7 +146,8 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
// from the base tensor. This method is used by inplace-view ops like
|
||||
// transpose_. It appends a ViewMeta to the existing stack, and refreshes the
|
||||
// tensor by replaying the views off of the alias.
|
||||
void mutate_view_meta(const at::functionalization::ViewMeta& meta);
|
||||
void mutate_view_meta(
|
||||
const std::shared_ptr<at::functionalization::ViewMeta>& meta);
|
||||
|
||||
// Custom implementation of self.set_(src)
|
||||
void set__impl(const FunctionalTensorWrapper* other);
|
||||
@ -285,7 +286,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
bool is_symbolic_ = false;
|
||||
|
||||
size_t generation_ = 0;
|
||||
std::vector<at::functionalization::ViewMeta> view_metas_;
|
||||
std::vector<std::shared_ptr<at::functionalization::ViewMeta>> view_metas_;
|
||||
|
||||
protected:
|
||||
static void copy_tensor_metadata(
|
||||
@ -377,16 +378,20 @@ TORCH_API void propagate_xla_data_direct(
|
||||
Tensor create_functional_tensor_with_view_meta(
|
||||
const Tensor& view_to_wrap,
|
||||
const Tensor& base,
|
||||
functionalization::ViewMeta meta,
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta,
|
||||
int64_t out_idx = 0);
|
||||
std::vector<Tensor> create_functional_tensor_with_view_meta(
|
||||
ITensorListRef view_to_wrap,
|
||||
const Tensor& base,
|
||||
const functionalization::ViewMeta& meta);
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
||||
|
||||
void mutate_view_meta(
|
||||
const Tensor& self,
|
||||
const functionalization::ViewMeta& meta);
|
||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
||||
|
||||
TORCH_API Tensor apply_view_meta_sequence(
|
||||
const Tensor& base,
|
||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence);
|
||||
|
||||
void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out);
|
||||
void set_sizes_strides_offset(
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
#include <ATen/FunctionalizeFallbackKernel.h>
|
||||
|
||||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
#include <ATen/core/LegacyTypeDispatch.h>
|
||||
#include <ATen/EmptyTensor.h>
|
||||
@ -7,7 +9,6 @@
|
||||
#include <torch/library.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/strides.h>
|
||||
#include <ATen/EmptyTensor.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/ATen.h>
|
||||
@ -28,6 +29,31 @@
|
||||
#include <utility>
|
||||
#endif
|
||||
|
||||
namespace at::functionalization {
|
||||
|
||||
Tensor resize__ViewMeta::forward(const Tensor& base) {
|
||||
if (reapply_views) {
|
||||
return base.as_strided(size, c10::contiguous_strides(size));
|
||||
} else {
|
||||
return at::as_strided_copy(base, size, c10::contiguous_strides(size));
|
||||
}
|
||||
}
|
||||
|
||||
Tensor resize__ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
|
||||
return base.as_strided_scatter(
|
||||
mutated_view, size, c10::contiguous_strides(size));
|
||||
}
|
||||
|
||||
Tensor _unsafe_view_ViewMeta::forward(const Tensor& base) {
|
||||
return at::_unsafe_view_symint(base, size);
|
||||
}
|
||||
|
||||
Tensor _unsafe_view_ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
|
||||
return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
|
||||
}
|
||||
|
||||
} // namespace at::functionalization
|
||||
|
||||
namespace {
|
||||
void functionalizeFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatchKeySet [[maybe_unused]], torch::jit::Stack* stack) {
|
||||
const auto& schema = op.schema();
|
||||
@ -106,7 +132,9 @@ namespace {
|
||||
const auto& ivalue = returns[idx];
|
||||
if (ivalue.isTensor() && should_wrap_outputs) {
|
||||
const auto& t = ivalue.toTensor();
|
||||
if (!t.defined()) continue;
|
||||
if (!t.defined()) {
|
||||
continue;
|
||||
}
|
||||
auto t_new = c10::IValue(at::functionalization::impl::to_functional_tensor(t));
|
||||
(*stack)[returns_begin + idx] = t_new;
|
||||
} else if (ivalue.isTensorList() && should_wrap_outputs) {
|
||||
@ -169,19 +197,8 @@ static const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatch
|
||||
// The output of resizing is equivalent to taking a slice of a larger tensor.
|
||||
// We have to emulate this "slicing" with an as_strided call.
|
||||
auto reapply_views = at::functionalization::impl::getFunctionalizationReapplyViewsTLS();
|
||||
at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
|
||||
[reapply_views = reapply_views, size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||
if (reapply_views) {
|
||||
return base.as_strided(size, c10::contiguous_strides(size));
|
||||
} else {
|
||||
return at::as_strided_copy(base, size, c10::contiguous_strides(size));
|
||||
}
|
||||
},
|
||||
[size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||
return base.as_strided_scatter(mutated_view, size, c10::contiguous_strides(size));
|
||||
},
|
||||
/*has_symbolic_inputs=*/false
|
||||
);
|
||||
auto view_meta = std::make_shared<at::functionalization::resize__ViewMeta>(
|
||||
reapply_views, size.vec());
|
||||
at::functionalization::impl::mutate_view_meta(self, view_meta);
|
||||
return self;
|
||||
}
|
||||
@ -300,17 +317,11 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt
|
||||
tmp_output = at::_unsafe_view_symint(self_, size);
|
||||
}
|
||||
|
||||
bool has_symbolic_inputs = std::any_of(size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
|
||||
|
||||
at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
|
||||
[size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||
return at::_unsafe_view_symint(base, size);
|
||||
},
|
||||
[size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||
return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
|
||||
},
|
||||
/*has_symbolic_inputs=*/has_symbolic_inputs
|
||||
);
|
||||
bool has_symbolic_inputs = std::any_of(
|
||||
size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
|
||||
auto view_meta =
|
||||
std::make_shared<at::functionalization::_unsafe_view_ViewMeta>(
|
||||
has_symbolic_inputs, size.vec());
|
||||
|
||||
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta));
|
||||
// See Note [Propagating strides in the functionalization pass]
|
||||
|
||||
58
aten/src/ATen/FunctionalizeFallbackKernel.h
Normal file
58
aten/src/ATen/FunctionalizeFallbackKernel.h
Normal file
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/FunctionalStorageImpl.h>
|
||||
|
||||
namespace at::functionalization {
|
||||
|
||||
// `ViewMeta` implementation for `resize_` operation.
|
||||
struct TORCH_API resize__ViewMeta : public ViewMeta {
|
||||
FUNCTIONALIZATION_VIEWMETA_NAME(resize__ViewMeta)
|
||||
FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
||||
bool /* reapply_views */,
|
||||
const std::vector<int64_t>&);
|
||||
|
||||
resize__ViewMeta(const SerializableTuple& tpl)
|
||||
: resize__ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
||||
|
||||
resize__ViewMeta(bool reapply_views, const std::vector<int64_t>& size)
|
||||
: ViewMeta(/*has_symbolic_inputs=*/false),
|
||||
reapply_views(reapply_views),
|
||||
size(size) {}
|
||||
|
||||
Tensor forward(const Tensor& base) override;
|
||||
Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
|
||||
|
||||
SerializableTuple to_serializable_tuple() {
|
||||
return std::make_tuple(reapply_views, size);
|
||||
}
|
||||
|
||||
bool reapply_views;
|
||||
std::vector<int64_t> size;
|
||||
};
|
||||
|
||||
// `ViewMeta` implementation for `_unsafe_view` operation.
|
||||
struct TORCH_API _unsafe_view_ViewMeta : public ViewMeta {
|
||||
FUNCTIONALIZATION_VIEWMETA_NAME(_unsafe_view_ViewMeta)
|
||||
FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
||||
bool /* has_symbolic_inputs */,
|
||||
const std::vector<c10::SymInt>&);
|
||||
|
||||
_unsafe_view_ViewMeta(const SerializableTuple& tpl)
|
||||
: _unsafe_view_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
||||
|
||||
_unsafe_view_ViewMeta(
|
||||
bool has_symbolic_inputs,
|
||||
const std::vector<c10::SymInt>& size)
|
||||
: ViewMeta(has_symbolic_inputs), size(size) {}
|
||||
|
||||
Tensor forward(const Tensor& base) override;
|
||||
Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
|
||||
|
||||
SerializableTuple to_serializable_tuple() {
|
||||
return std::make_tuple(has_symbolic_inputs, size);
|
||||
}
|
||||
|
||||
std::vector<c10::SymInt> size;
|
||||
};
|
||||
|
||||
} // namespace at::functionalization
|
||||
@ -12,7 +12,7 @@
|
||||
|
||||
#define MPS_ERROR_NOT_COMPILED "PyTorch code is not compiled with MPS enabled"
|
||||
#define MPS_ERROR_RUNTIME_TOO_LOW \
|
||||
"The MPS backend is supported on MacOS 13.0+.", \
|
||||
"The MPS backend is supported on MacOS 14.0+. ", \
|
||||
"Current OS version can be queried using `sw_vers`"
|
||||
#define MPS_ERROR_DOUBLE_NOT_SUPPORTED "Cannot convert a MPS Tensor to float64 dtype " \
|
||||
"as the MPS framework doesn't support float64. Please use float32 instead."
|
||||
|
||||
@ -70,7 +70,10 @@ void MPSHooks::commitStream() const {
|
||||
}
|
||||
|
||||
void* MPSHooks::getCommandBuffer() const {
|
||||
return at::mps::getDefaultMPSStream()->commandBuffer();
|
||||
auto stream = at::mps::getDefaultMPSStream();
|
||||
// Release pending computeCommandEncoder, as extensions is likely to allocate new one
|
||||
stream->endKernelCoalescing();
|
||||
return stream->commandBuffer();
|
||||
}
|
||||
|
||||
void* MPSHooks::getDispatchQueue() const {
|
||||
|
||||
@ -158,7 +158,18 @@ void MPSStream::fill(id<MTLBuffer> buffer, uint8_t value, size_t length, size_t
|
||||
endKernelCoalescing();
|
||||
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer() blitCommandEncoder];
|
||||
|
||||
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset, length) value:value];
|
||||
// For some reason fillBufferfor stopped working for lengh > 4Gb on MacOS 26
|
||||
// See https://github.com/pytorch/pytorch/issues/163962
|
||||
// Workaround by batching copy commands into 4Gb chunks
|
||||
constexpr size_t max_copy_size = 0x100000000; // 4GB
|
||||
size_t bytes_filled = 0;
|
||||
size_t bytes_remains = length;
|
||||
while (bytes_remains > 0) {
|
||||
NSUInteger bytes_to_copy = std::min(max_copy_size, bytes_remains);
|
||||
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset + bytes_filled, bytes_to_copy) value:value];
|
||||
bytes_filled += bytes_to_copy;
|
||||
bytes_remains -= bytes_to_copy;
|
||||
}
|
||||
[blitEncoder endEncoding];
|
||||
synchronize(syncType);
|
||||
}
|
||||
|
||||
@ -410,11 +410,23 @@ struct ConvParams {
|
||||
// cudnn and miopen are guaranteed not to be on mobile, and T102591915 / T110194934 suggest
|
||||
// that maybe the compiledWithCuDNN() check sometimes segfaults (though I can't imagine how)
|
||||
#if !defined(C10_MOBILE)
|
||||
if (!detail::getCUDAHooks().compiledWithCuDNN()) {
|
||||
if (!detail::getCUDAHooks().compiledWithCuDNN() || !input.is_cuda() || !cudnn_enabled) {
|
||||
return false;
|
||||
}
|
||||
static long cudnn_version = detail::getCUDAHooks().versionCuDNN();
|
||||
// broken on cuDNN 9.8 - 9.14
|
||||
if (cudnn_version >= 90800 && cudnn_version < 91500) {
|
||||
if (cudnn_conv_suggest_memory_format(input, weight) == at::MemoryFormat::Contiguous &&
|
||||
(input.scalar_type() == at::kBFloat16 || input.scalar_type() == at::kHalf) &&
|
||||
weight.dim() == 5) {
|
||||
for (int i = 2; i < weight.dim(); i++) {
|
||||
if (weight.size(i) != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (needs_64bit_indexing_no_split(input, weight)) {
|
||||
static long cudnn_version = detail::getCUDAHooks().versionCuDNN();
|
||||
if (!(cudnn_version >= 90300 && at::native::cudnnv8_enabled_check_debug())) {
|
||||
TORCH_WARN_ONCE("cuDNN cannot be used for large non-batch-splittable convolutions"
|
||||
" if the V8 API is not enabled or before cuDNN version 9.3+."
|
||||
@ -422,9 +434,6 @@ struct ConvParams {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!input.is_cuda() || !cudnn_enabled) {
|
||||
return false;
|
||||
}
|
||||
if (input.scalar_type() == at::kBFloat16 || weight.scalar_type() == at::kBFloat16) {
|
||||
if (!(detail::getCUDAHooks().supportsBFloat16ConvolutionWithCuDNNv8() && at::native::cudnnv8_enabled_check_debug())) {
|
||||
return false;
|
||||
@ -443,16 +452,19 @@ struct ConvParams {
|
||||
|
||||
// Use cudnn for FP16 depthwise convolutions
|
||||
bool use_cudnn_depthwise(const at::Tensor& input, const at::Tensor& weight) const {
|
||||
if (!detail::getCUDAHooks().compiledWithCuDNN()) {
|
||||
if (!cudnn_enabled || !detail::getCUDAHooks().compiledWithCuDNN() || !input.is_cuda()) {
|
||||
return false;
|
||||
}
|
||||
if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous && use_cudnn(input, weight)) {
|
||||
// always use cudnn_depthwise for channels_last format
|
||||
return true;
|
||||
}
|
||||
// native kernel doesn't support 64-bit non-splittable case
|
||||
if (cudnn_enabled && !(canUse32BitIndexMath(input) && canUse32BitIndexMath(weight))) {
|
||||
if (!(canUse32BitIndexMath(input) && canUse32BitIndexMath(weight))) {
|
||||
static long cudnn_version = detail::getCUDAHooks().compiledWithCuDNN() ? detail::getCUDAHooks().versionCuDNN() : -1;
|
||||
// TODO(eqy): remove this once cuDNN fixes 64-bit depthwise support, first broken in 9.11x
|
||||
if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous) {
|
||||
if (cudnn_version < 0 || cudnn_version > 91000) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(cudnn_version >= 90300 && at::native::cudnnv8_enabled_check_debug())) {
|
||||
TORCH_WARN_ONCE("cuDNN cannot be used for large non-batch-splittable convolutions"
|
||||
" if the V8 API is not enabled or before cuDNN version 9.3+."
|
||||
@ -462,6 +474,10 @@ struct ConvParams {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous) {
|
||||
// always use cudnn_depthwise for channels_last format
|
||||
return true;
|
||||
}
|
||||
if (detail::getCUDAHooks().supportsDepthwiseConvolutionWithCuDNN()) {
|
||||
bool kernel_cond = (use_cudnn(input, weight) &&
|
||||
input.scalar_type() == kHalf && // only for FP16
|
||||
|
||||
@ -14,8 +14,8 @@ template <typename T, int D, int V = D>
|
||||
device T* out [[buffer(3)]],
|
||||
const constant uint& gqa_factor [[buffer(4)]],
|
||||
const constant uint& N [[buffer(5)]],
|
||||
const constant uint2& k_head_seq_stride [[buffer(6)]],
|
||||
const constant uint2& v_head_seq_stride [[buffer(7)]],
|
||||
const constant uint3& qkv_head_strides [[buffer(6)]],
|
||||
const constant uint3& qkv_seq_strides [[buffer(7)]],
|
||||
const constant float& scale [[buffer(8)]],
|
||||
const device bool* mask [[buffer(9)]],
|
||||
const constant uint3& mask_strides [[buffer(10)]],
|
||||
@ -28,10 +28,12 @@ template <typename T, int D, int V = D>
|
||||
constexpr uint BD = 32;
|
||||
constexpr uint qk_per_thread = D / BD;
|
||||
constexpr uint v_per_thread = V / BD;
|
||||
const uint k_head_stride = k_head_seq_stride.x;
|
||||
const uint k_seq_stride = k_head_seq_stride.y;
|
||||
const uint v_head_stride = v_head_seq_stride.x;
|
||||
const uint v_seq_stride = v_head_seq_stride.y;
|
||||
const uint q_head_stride = qkv_head_strides.x;
|
||||
const uint q_seq_stride = qkv_seq_strides.x;
|
||||
const uint k_head_stride = qkv_head_strides.y;
|
||||
const uint k_seq_stride = qkv_seq_strides.y;
|
||||
const uint v_head_stride = qkv_head_strides.z;
|
||||
const uint v_seq_stride = qkv_seq_strides.z;
|
||||
const uint mask_head_stride = mask_strides.x;
|
||||
const uint mask_kv_seq_stride = mask_strides.y;
|
||||
const uint mask_q_seq_stride = mask_strides.z;
|
||||
@ -54,9 +56,9 @@ template <typename T, int D, int V = D>
|
||||
const int kv_head_idx = head_idx / gqa_factor;
|
||||
const int Q = tpg.y;
|
||||
const int group_offset = head_idx * Q + q_seq_idx;
|
||||
const int q_offset = group_offset;
|
||||
const int o_offset = group_offset;
|
||||
queries += q_offset * D + simd_lid * qk_per_thread;
|
||||
queries += head_idx * q_head_stride + q_seq_idx * q_seq_stride +
|
||||
simd_lid * qk_per_thread;
|
||||
keys += kv_head_idx * k_head_stride + simd_gid * k_seq_stride +
|
||||
simd_lid * qk_per_thread;
|
||||
values += kv_head_idx * v_head_stride + simd_gid * v_seq_stride +
|
||||
@ -156,8 +158,8 @@ template <typename T, int D, int V = D>
|
||||
device float* maxs [[buffer(5)]],
|
||||
const constant uint& gqa_factor [[buffer(6)]],
|
||||
const constant uint& N [[buffer(7)]],
|
||||
const constant uint2& k_head_seq_stride [[buffer(8)]],
|
||||
const constant uint2& v_head_seq_stride [[buffer(9)]],
|
||||
const constant uint3& qkv_head_strides [[buffer(8)]],
|
||||
const constant uint3& qkv_seq_strides [[buffer(9)]],
|
||||
const constant float& scale [[buffer(10)]],
|
||||
const device bool* mask [[buffer(11)]],
|
||||
const constant uint3& mask_strides [[buffer(12)]],
|
||||
@ -170,10 +172,12 @@ template <typename T, int D, int V = D>
|
||||
constexpr int BD = 32;
|
||||
constexpr int qk_per_thread = D / BD;
|
||||
constexpr int v_per_thread = V / BD;
|
||||
const int k_head_stride = k_head_seq_stride.x;
|
||||
const int k_seq_stride = k_head_seq_stride.y;
|
||||
const int v_head_stride = v_head_seq_stride.x;
|
||||
const int v_seq_stride = v_head_seq_stride.y;
|
||||
const int q_head_stride = qkv_head_strides.x;
|
||||
const int q_seq_stride = qkv_seq_strides.x;
|
||||
const int k_head_stride = qkv_head_strides.y;
|
||||
const int k_seq_stride = qkv_seq_strides.y;
|
||||
const int v_head_stride = qkv_head_strides.z;
|
||||
const int v_seq_stride = qkv_seq_strides.z;
|
||||
const int mask_kv_seq_stride = mask_strides.x;
|
||||
const int mask_q_seq_stride = mask_strides.y;
|
||||
const int mask_head_stride = mask_strides.z;
|
||||
@ -196,10 +200,10 @@ template <typename T, int D, int V = D>
|
||||
const int head_idx = tid.x;
|
||||
const int q_seq_idx = tid.y;
|
||||
const int o_offset = head_idx * tpg.y + q_seq_idx;
|
||||
const int q_offset = o_offset;
|
||||
const int kv_head_idx = head_idx / gqa_factor;
|
||||
|
||||
queries += q_offset * D + simd_lid * qk_per_thread;
|
||||
queries += head_idx * q_head_stride + q_seq_idx * q_seq_stride +
|
||||
simd_lid * qk_per_thread;
|
||||
keys += kv_head_idx * k_head_stride +
|
||||
(block_idx * BN + simd_gid) * k_seq_stride + simd_lid * qk_per_thread;
|
||||
values += kv_head_idx * v_head_stride +
|
||||
@ -520,25 +524,25 @@ kernel void attention(
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANTIATE_SDPA_VECTOR(DTYPE, QK_DIM, VALUE_DIM) \
|
||||
template [[host_name("sdpa_vector_" #DTYPE "_" #QK_DIM \
|
||||
"_" #VALUE_DIM)]] kernel void \
|
||||
sdpa_vector<DTYPE, QK_DIM, VALUE_DIM>( \
|
||||
const device DTYPE* queries [[buffer(0)]], \
|
||||
const device DTYPE* keys [[buffer(1)]], \
|
||||
const device DTYPE* values [[buffer(2)]], \
|
||||
device DTYPE* out [[buffer(3)]], \
|
||||
const constant uint& gqa_factor [[buffer(4)]], \
|
||||
const constant uint& N [[buffer(5)]], \
|
||||
const constant uint2& k_head_seq_stride [[buffer(6)]], \
|
||||
const constant uint2& v_head_seq_stride [[buffer(7)]], \
|
||||
const constant float& scale [[buffer(8)]], \
|
||||
const device bool* mask [[buffer(9)]], \
|
||||
const constant uint3& mask_strides [[buffer(10)]], \
|
||||
const constant bool& has_mask [[buffer(11)]], \
|
||||
uint3 tid [[threadgroup_position_in_grid]], \
|
||||
uint3 tpg [[threadgroups_per_grid]], \
|
||||
uint simd_gid [[simdgroup_index_in_threadgroup]], \
|
||||
#define INSTANTIATE_SDPA_VECTOR(DTYPE, QK_DIM, VALUE_DIM) \
|
||||
template [[host_name("sdpa_vector_" #DTYPE "_" #QK_DIM \
|
||||
"_" #VALUE_DIM)]] kernel void \
|
||||
sdpa_vector<DTYPE, QK_DIM, VALUE_DIM>( \
|
||||
const device DTYPE* queries [[buffer(0)]], \
|
||||
const device DTYPE* keys [[buffer(1)]], \
|
||||
const device DTYPE* values [[buffer(2)]], \
|
||||
device DTYPE* out [[buffer(3)]], \
|
||||
const constant uint& gqa_factor [[buffer(4)]], \
|
||||
const constant uint& N [[buffer(5)]], \
|
||||
const constant uint3& qkv_head_strides [[buffer(6)]], \
|
||||
const constant uint3& qkv_seq_strides [[buffer(7)]], \
|
||||
const constant float& scale [[buffer(8)]], \
|
||||
const device bool* mask [[buffer(9)]], \
|
||||
const constant uint3& mask_strides [[buffer(10)]], \
|
||||
const constant bool& has_mask [[buffer(11)]], \
|
||||
uint3 tid [[threadgroup_position_in_grid]], \
|
||||
uint3 tpg [[threadgroups_per_grid]], \
|
||||
uint simd_gid [[simdgroup_index_in_threadgroup]], \
|
||||
uint simd_lid [[thread_index_in_simdgroup]]);
|
||||
|
||||
#define INSTANTIATE_SDPA_VECTOR_2PASS_1(DTYPE, QK_DIM, VALUE_DIM) \
|
||||
@ -553,8 +557,8 @@ kernel void attention(
|
||||
device float* maxs [[buffer(5)]], \
|
||||
const constant uint& gqa_factor [[buffer(6)]], \
|
||||
const constant uint& N [[buffer(7)]], \
|
||||
const constant uint2& k_head_seq_stride [[buffer(8)]], \
|
||||
const constant uint2& v_head_seq_stride [[buffer(9)]], \
|
||||
const constant uint3& qkv_head_strides [[buffer(8)]], \
|
||||
const constant uint3& qkv_seq_strides [[buffer(9)]], \
|
||||
const constant float& scale [[buffer(10)]], \
|
||||
const device bool* mask [[buffer(11)]], \
|
||||
const constant uint3& mask_strides [[buffer(12)]], \
|
||||
|
||||
@ -182,6 +182,8 @@ static std::tuple<Tensor, Tensor> sdpa_vector_fast_mps(const Tensor& q_,
|
||||
uint maxSeqLength = k_.size(2);
|
||||
uint N = k_.size(2);
|
||||
uint B = q_.size(0) * q_.size(1);
|
||||
uint q_head_stride = q_.stride(1);
|
||||
uint q_seq_stride = q_.stride(2);
|
||||
uint k_head_stride = k_.stride(1);
|
||||
uint k_seq_stride = k_.stride(2);
|
||||
uint v_head_stride = v_.stride(1);
|
||||
@ -209,8 +211,8 @@ static std::tuple<Tensor, Tensor> sdpa_vector_fast_mps(const Tensor& q_,
|
||||
out,
|
||||
1,
|
||||
N,
|
||||
std::array<uint32_t, 2>{k_head_stride, k_seq_stride},
|
||||
std::array<uint32_t, 2>{v_head_stride, v_seq_stride},
|
||||
std::array<uint32_t, 3>{q_head_stride, k_head_stride, v_head_stride},
|
||||
std::array<uint32_t, 3>{q_seq_stride, k_seq_stride, v_seq_stride},
|
||||
scale_factor);
|
||||
|
||||
if (has_mask) {
|
||||
@ -257,6 +259,8 @@ static std::tuple<Tensor, Tensor> sdpa_vector_2pass_mps(const Tensor& q_,
|
||||
uint B = batchSize * num_heads;
|
||||
uint gqa_factor = q_.size(1) / k_.size(1);
|
||||
|
||||
uint q_head_stride = q_.stride(1);
|
||||
uint q_seq_stride = q_.stride(2);
|
||||
uint k_head_stride = k_.stride(1);
|
||||
uint k_seq_stride = k_.stride(2);
|
||||
uint v_head_stride = v_.stride(1);
|
||||
@ -294,8 +298,8 @@ static std::tuple<Tensor, Tensor> sdpa_vector_2pass_mps(const Tensor& q_,
|
||||
maxs,
|
||||
gqa_factor,
|
||||
N,
|
||||
std::array<uint32_t, 2>{k_head_stride, k_seq_stride},
|
||||
std::array<uint32_t, 2>{v_head_stride, v_seq_stride},
|
||||
std::array<uint32_t, 3>{q_head_stride, k_head_stride, v_head_stride},
|
||||
std::array<uint32_t, 3>{q_seq_stride, k_seq_stride, v_seq_stride},
|
||||
scale_factor);
|
||||
|
||||
if (has_mask) {
|
||||
|
||||
@ -666,6 +666,15 @@ bool can_use_cudnn_attention(const sdp_params& params, bool debug) {
|
||||
TORCH_WARN(CUDNN_VERSION, " cuDNN version too old to use cuDNN Attention (< v9.0.0)");
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
#if defined(CUDNN_VERSION)
|
||||
static auto cudnn_version = cudnnGetVersion();
|
||||
if (params.dropout > 0.0 && cudnn_version > 91100 && cudnn_version < 91400) {
|
||||
if (debug) {
|
||||
TORCH_WARN(CUDNN_VERSION, " cuDNN version does not support droppout in SDPA (9.11 - 9.13).");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
// Define gate functions that determine if a flash kernel can be ran
|
||||
// Replace with std::to_array when we migrate to c++20
|
||||
|
||||
@ -2,22 +2,12 @@
|
||||
|
||||
// ${generated_comment}
|
||||
|
||||
#include <ATen/FunctionalStorageImpl.h>
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
namespace at {
|
||||
namespace functionalization {
|
||||
|
||||
enum class InverseReturnMode {
|
||||
/// Specifies that functional inverses should always return a view.
|
||||
AlwaysView,
|
||||
/// Specifies that functional inverses should always return a non-view / copy.
|
||||
NeverView,
|
||||
/// Specifies that functional inverses should return a view unless a (copying) scatter
|
||||
/// inverse exists, in which case that will be used instead.
|
||||
/// This avoids as_strided() calls that can be difficult for subclasses to handle.
|
||||
ViewOrScatterInverse,
|
||||
};
|
||||
|
||||
struct FunctionalInverses {
|
||||
|
||||
${view_inverse_declarations}
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
#include <ATen/core/LegacyTypeDispatch.h>
|
||||
#include <ATen/EmptyTensor.h>
|
||||
#include <ATen/FunctionalTensorWrapper.h>
|
||||
#include <ATen/FunctionalInverses.h>
|
||||
#include <ATen/ViewMetaClasses.h>
|
||||
#include <ATen/MemoryOverlap.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
|
||||
19
aten/src/ATen/templates/ViewMetaClasses.cpp
Normal file
19
aten/src/ATen/templates/ViewMetaClasses.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
// ${generated_comment}
|
||||
|
||||
#include <ATen/FunctionalInverses.h>
|
||||
#include <ATen/ViewMetaClasses.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/Operators.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#else
|
||||
${op_headers}
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
namespace functionalization {
|
||||
|
||||
${view_meta_implementations}
|
||||
|
||||
} // namespace functionalization
|
||||
} // namespace at
|
||||
12
aten/src/ATen/templates/ViewMetaClasses.h
Normal file
12
aten/src/ATen/templates/ViewMetaClasses.h
Normal file
@ -0,0 +1,12 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
// ${generated_comment}
|
||||
|
||||
#include <ATen/FunctionalStorageImpl.h>
|
||||
|
||||
namespace at {
|
||||
namespace functionalization {
|
||||
|
||||
${view_meta_declarations}
|
||||
|
||||
} // namespace functionalization
|
||||
} // namespace at
|
||||
11
aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp
Normal file
11
aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include <ATen/ViewMetaClasses.h>
|
||||
#include <torch/csrc/functionalization/Module.h>
|
||||
|
||||
namespace torch::functionalization {
|
||||
|
||||
void initGenerated(PyObject* module) {
|
||||
auto functionalization = py::handle(module).cast<py::module>();
|
||||
$view_meta_bindings
|
||||
}
|
||||
|
||||
} // namespace torch::functionalization
|
||||
@ -5,51 +5,6 @@
|
||||
|
||||
#include <ATen/test/allocator_clone_test.h>
|
||||
|
||||
#include <torch/csrc/cuda/CUDAPluggableAllocator.h>
|
||||
|
||||
TEST(AllocatorTestCUDA, test_clone) {
|
||||
test_allocator_clone(c10::cuda::CUDACachingAllocator::get());
|
||||
}
|
||||
|
||||
static int called_dummy_free_0 = 0;
|
||||
static int called_dummy_free_1 = 0;
|
||||
|
||||
void* dummy_alloc_0(size_t size, int device, void* stream) {return nullptr;}
|
||||
void dummy_free_0(void* data, size_t size, int device, void* stream) {
|
||||
called_dummy_free_0++;
|
||||
}
|
||||
void dummy_free_1(void* data, size_t size, int device, void* stream) {
|
||||
called_dummy_free_1++;
|
||||
}
|
||||
|
||||
// Tests that data_ptrs have their respective deleters
|
||||
// when mixing allocators
|
||||
TEST(AllocatorTestCUDA, test_pluggable_allocator_deleters) {
|
||||
// Create a tensor with dummy_allocator_0, where dummy_free_0 is the deleter
|
||||
auto dummy_allocator_0 = torch::cuda::CUDAPluggableAllocator::createCustomAllocator(dummy_alloc_0, dummy_free_0);
|
||||
c10::cuda::CUDACachingAllocator::allocator.store(dummy_allocator_0.get());
|
||||
at::Tensor a = at::empty({0}, at::TensorOptions().device(at::kCUDA));
|
||||
|
||||
// Create a tensor with dummy_allocator_1, where dummy_free_1 is the deleter
|
||||
auto dummy_allocator_1 = torch::cuda::CUDAPluggableAllocator::createCustomAllocator(dummy_alloc_0, dummy_free_1);
|
||||
c10::cuda::CUDACachingAllocator::allocator.store(dummy_allocator_1.get());
|
||||
at::Tensor b = at::empty({0}, at::TensorOptions().device(at::kCUDA));
|
||||
|
||||
// Manually use a's deleter
|
||||
auto* ctx = a.storage().data_ptr().get_context();
|
||||
a.storage().data_ptr().get_deleter()(ctx);
|
||||
a.storage().mutable_data_ptr().release_context();
|
||||
|
||||
// a's deleter is dummy_free_0
|
||||
// dummy_free_0 should be called above, so called_dummy_free_0 should be 1
|
||||
ASSERT_TRUE(called_dummy_free_0 == 1);
|
||||
|
||||
// Manually use b's deleter
|
||||
ctx = b.storage().data_ptr().get_context();
|
||||
b.storage().data_ptr().get_deleter()(ctx);
|
||||
b.storage().mutable_data_ptr().release_context();
|
||||
|
||||
// b's deleter is dummy_free_1
|
||||
// dummy_free_1 should be called above, so called_dummy_free_1 should be 1
|
||||
ASSERT_TRUE(called_dummy_free_1 == 1);
|
||||
}
|
||||
|
||||
@ -98,11 +98,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,pass,5
|
||||
doctr_det_predictor,pass,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,pass,4
|
||||
doctr_reco_predictor,pass,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -98,11 +98,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,pass,5
|
||||
doctr_det_predictor,pass,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,pass,4
|
||||
doctr_reco_predictor,pass,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -98,11 +98,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,pass,5
|
||||
doctr_det_predictor,pass,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,pass,4
|
||||
doctr_reco_predictor,pass,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,11 +82,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,pass,5
|
||||
doctr_det_predictor,pass,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,pass,4
|
||||
doctr_reco_predictor,pass,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -98,11 +98,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,pass,5
|
||||
doctr_det_predictor,pass,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,pass,4
|
||||
doctr_reco_predictor,pass,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -106,11 +106,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,eager_fail_to_run,5
|
||||
doctr_det_predictor,eager_fail_to_run,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,eager_fail_to_run,4
|
||||
doctr_reco_predictor,eager_fail_to_run,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -106,11 +106,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,eager_fail_to_run,5
|
||||
doctr_det_predictor,eager_fail_to_run,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,eager_fail_to_run,4
|
||||
doctr_reco_predictor,eager_fail_to_run,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -106,11 +106,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,eager_fail_to_run,5
|
||||
doctr_det_predictor,eager_fail_to_run,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,eager_fail_to_run,4
|
||||
doctr_reco_predictor,eager_fail_to_run,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -106,11 +106,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,eager_fail_to_run,5
|
||||
doctr_det_predictor,eager_fail_to_run,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,eager_fail_to_run,4
|
||||
doctr_reco_predictor,eager_fail_to_run,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -106,11 +106,11 @@ dlrm,pass,0
|
||||
|
||||
|
||||
|
||||
doctr_det_predictor,eager_fail_to_run,5
|
||||
doctr_det_predictor,eager_fail_to_run,3
|
||||
|
||||
|
||||
|
||||
doctr_reco_predictor,eager_fail_to_run,4
|
||||
doctr_reco_predictor,eager_fail_to_run,1
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@ import csv
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import timeit
|
||||
from collections import namedtuple
|
||||
from dataclasses import asdict, dataclass
|
||||
@ -17,6 +18,7 @@ import torch
|
||||
|
||||
# needs to be imported after torch
|
||||
import torch.utils.cpp_extension as cpp_extension # noqa: F401
|
||||
from torch.utils.benchmark import Timer
|
||||
|
||||
|
||||
"""Performance microbenchmarks.
|
||||
@ -191,6 +193,11 @@ class BenchmarkRunner:
|
||||
self.predefined_minimum_secs = 1
|
||||
self.max_iters = 1e6
|
||||
self.use_jit = args.use_jit
|
||||
self.use_compile = args.use_compile
|
||||
if self.use_jit and self.use_compile:
|
||||
raise ValueError(
|
||||
"use_jit and use_compile are mutually exclusive, please specify one."
|
||||
)
|
||||
self.num_runs = args.num_runs
|
||||
self.print_per_iter = False
|
||||
self.output_csv = args.output_csv
|
||||
@ -222,7 +229,7 @@ class BenchmarkRunner:
|
||||
if self.args.operators:
|
||||
print(f"# {self.args.operators}")
|
||||
|
||||
def _print_perf_result(self, reported_run_time_us, test_case):
|
||||
def _print_perf_result(self, results, test_case):
|
||||
if self.args.report_aibench:
|
||||
# Output for AIBench
|
||||
# Print out per iteration execution time instead of avg time
|
||||
@ -236,12 +243,14 @@ class BenchmarkRunner:
|
||||
"type": test_name,
|
||||
"metric": "latency",
|
||||
"unit": "us",
|
||||
"value": str(reported_run_time_us[run]),
|
||||
"value": str(results["reported_run_time_us"[run]]),
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")
|
||||
print(
|
||||
f"# Mode: {'JIT' if self.use_jit else 'Compile' if self.use_compile else 'Eager'}"
|
||||
)
|
||||
print(
|
||||
f"# Name: {test_case.test_config.test_name}\n# Input: {test_case.test_config.input_config}"
|
||||
)
|
||||
@ -250,25 +259,33 @@ class BenchmarkRunner:
|
||||
if self.num_runs > 1:
|
||||
for run in range(self.num_runs):
|
||||
print(
|
||||
f"Run: {run}, {mode} Execution Time (us) : {reported_run_time_us[run]:.3f}"
|
||||
f"Run: {run}, {mode} Execution Time (us) : {results['reported_run_time_us'][run]:.3f}"
|
||||
)
|
||||
print()
|
||||
else:
|
||||
print(f"{mode} Execution Time (us) : {reported_run_time_us[0]:.3f}\n")
|
||||
print(
|
||||
f"{mode} Execution Time (us) : {results['reported_run_time_us'][0]:.3f}"
|
||||
)
|
||||
print(f"Peak Memory (KB) : {results['peak_memory']}\n")
|
||||
|
||||
def _perf_result_to_dict(self, reported_run_time_us, test_case):
|
||||
def _perf_result_to_dict(self, results, test_case):
|
||||
"""This function is the parallel of _print_perf_result, which instead of
|
||||
writing information to terminal, returns a dictionary.
|
||||
"""
|
||||
if self.args.report_aibench:
|
||||
return {}
|
||||
|
||||
out = {
|
||||
"test_name": test_case.test_config.test_name,
|
||||
"input_config": test_case.test_config.input_config,
|
||||
"mode": "JIT" if self.use_jit else "Eager",
|
||||
"runtime": (
|
||||
"JIT" if self.use_jit else "Compile" if self.use_compile else "Eager"
|
||||
),
|
||||
"run": "Backward" if test_case.test_config.run_backward else "Forward",
|
||||
"latency": round(reported_run_time_us[0], 3),
|
||||
"latency": round(results["reported_run_time_us"][0], 3),
|
||||
"latency unit": "us",
|
||||
"peak memory": results["peak_memory"],
|
||||
"memory unit": "KB",
|
||||
}
|
||||
|
||||
# parsing test_case.test_config.input_config, adding it as entries to the 'out' dictionary
|
||||
@ -330,10 +347,26 @@ class BenchmarkRunner:
|
||||
func = test_case.run_forward
|
||||
if self.use_jit:
|
||||
func = test_case.run_jit_forward
|
||||
forward_time = timeit.timeit(
|
||||
functools.partial(func, iters, print_per_iter, cuda_sync), number=1
|
||||
if self.use_compile:
|
||||
func = test_case.run_compile_forward
|
||||
|
||||
if not cuda_sync:
|
||||
forward_time = timeit.timeit(
|
||||
functools.partial(func, iters, print_per_iter, cuda_sync), number=1
|
||||
)
|
||||
return forward_time
|
||||
# Stable timing with Timer
|
||||
timer = Timer(
|
||||
stmt="func(iters, print_per_iter, cuda_sync)",
|
||||
globals={
|
||||
"func": func,
|
||||
"iters": iters,
|
||||
"print_per_iter": print_per_iter,
|
||||
"cuda_sync": cuda_sync,
|
||||
},
|
||||
)
|
||||
return forward_time
|
||||
result = timer.adaptive_autorange(min_run_time=0.0001)
|
||||
return result.median * iters
|
||||
|
||||
def _launch_backward(self, test_case, iters, print_per_iter=False):
|
||||
"""This function runs forward path of an op to get an output. Then the backward path is executed
|
||||
@ -346,7 +379,7 @@ class BenchmarkRunner:
|
||||
)
|
||||
return backward_time
|
||||
|
||||
def _measure_time(self, launch_test, test_case, iters, print_per_iter):
|
||||
def _measure_metrics(self, launch_test, test_case, iters, print_per_iter):
|
||||
"""
|
||||
This function execute the operator for <iters> iterations then look at the time.
|
||||
If it's not significant, the number of iterations will be increased before rerun.
|
||||
@ -354,8 +387,25 @@ class BenchmarkRunner:
|
||||
"""
|
||||
curr_test_total_time = 0
|
||||
time_trace = []
|
||||
peak_memory = 0
|
||||
input_values = test_case.op_bench.inputs.values()
|
||||
device, device_module = None, None
|
||||
if input_values and isinstance(next(iter(input_values)), torch.Tensor):
|
||||
# The device and device module information are crucial for memory metric calculation,
|
||||
# In case of ops where inputs are integers (not tensor), memory metrics need not be calculated.
|
||||
sample_input = next(iter(input_values))
|
||||
device = sample_input.device
|
||||
device_module = torch.get_device_module(device.type)
|
||||
# TODO: add support for cpu memory measurement
|
||||
while True:
|
||||
if hasattr(device_module, "reset_peak_memory_stats"):
|
||||
device_module.reset_peak_memory_stats(device)
|
||||
run_time_sec = launch_test(test_case, iters, print_per_iter)
|
||||
if hasattr(device_module, "synchronize"):
|
||||
device_module.synchronize(device)
|
||||
# Memory measurement process
|
||||
if hasattr(device_module, "max_memory_allocated"):
|
||||
peak_memory = device_module.max_memory_allocated(device)
|
||||
curr_test_total_time += run_time_sec
|
||||
# Analyze time after each run to decide if the result is stable
|
||||
results_are_significant = self._iteration_result_is_significant(
|
||||
@ -369,7 +419,13 @@ class BenchmarkRunner:
|
||||
time_trace.append(report_run_time)
|
||||
# Print out the time spent in each epoch in ms
|
||||
if self.args.report_aibench:
|
||||
mode = "JIT" if self.use_jit else "Eager"
|
||||
mode = (
|
||||
"JIT"
|
||||
if self.use_jit
|
||||
else "Compile"
|
||||
if self.use_compile
|
||||
else "Eager"
|
||||
)
|
||||
test_name = "_".join(
|
||||
[test_case.framework, test_case.test_config.test_name, mode]
|
||||
)
|
||||
@ -381,7 +437,7 @@ class BenchmarkRunner:
|
||||
"metric": "latency",
|
||||
"unit": "ms",
|
||||
"value": str(report_run_time / 1e3),
|
||||
}
|
||||
},
|
||||
)
|
||||
)
|
||||
if results_are_significant:
|
||||
@ -391,7 +447,7 @@ class BenchmarkRunner:
|
||||
# iteration count, and run the benchmark again...
|
||||
iters = self._predict_num_iter_needed(iters)
|
||||
reported_run_time_us = np.percentile(np.array(time_trace), 50)
|
||||
return reported_run_time_us
|
||||
return reported_run_time_us, peak_memory / 1024
|
||||
|
||||
def _check_keep(self, test_flag, cmd_flag):
|
||||
return cmd_flag is None or test_flag == cmd_flag
|
||||
@ -478,6 +534,7 @@ class BenchmarkRunner:
|
||||
self,
|
||||
perf_list,
|
||||
output_file,
|
||||
benchmark_name="PyTorch operator benchmark",
|
||||
):
|
||||
"""
|
||||
Write the result into JSON format, so that it can be uploaded to the benchmark database
|
||||
@ -495,8 +552,10 @@ class BenchmarkRunner:
|
||||
input_config = perf_item.get("input_config", "")
|
||||
run_type = perf_item.get("run")
|
||||
latency = perf_item.get("latency", 0)
|
||||
|
||||
dtype = "float32" # default
|
||||
peak_memory = perf_item.get("peak memory", 0)
|
||||
device = perf_item.get("device", "unknown")
|
||||
dtype = perf_item.get("dtype", "torch.float").split(".")[1]
|
||||
runtime = perf_item.get("runtime", None)
|
||||
|
||||
# Extract mode based on run_type
|
||||
mode = None
|
||||
@ -505,6 +564,22 @@ class BenchmarkRunner:
|
||||
elif run_type == "Backward":
|
||||
mode = "training"
|
||||
|
||||
# Extract use_compile from it
|
||||
if runtime == "Compile":
|
||||
use_compile = True
|
||||
elif runtime == "Eager":
|
||||
use_compile = False
|
||||
else:
|
||||
use_compile = None
|
||||
|
||||
device_arch = (
|
||||
torch.cuda.get_device_name(0)
|
||||
if device == "cuda"
|
||||
else platform.processor()
|
||||
if device == "cpu"
|
||||
else "unknown"
|
||||
)
|
||||
|
||||
# Create the record
|
||||
@dataclass
|
||||
class BenchmarkInfo:
|
||||
@ -532,12 +607,18 @@ class BenchmarkRunner:
|
||||
model: ModelInfo
|
||||
metric: MetricInfo
|
||||
|
||||
record = BenchmarkRecord(
|
||||
# Add record for latency
|
||||
record_latency = BenchmarkRecord(
|
||||
benchmark=BenchmarkInfo(
|
||||
name="PyTorch operator benchmark",
|
||||
name=benchmark_name,
|
||||
mode=mode,
|
||||
dtype=dtype,
|
||||
extra_info={"input_config": input_config},
|
||||
extra_info={
|
||||
"input_config": input_config,
|
||||
"device": device,
|
||||
"arch": device_arch,
|
||||
"use_compile": use_compile,
|
||||
},
|
||||
),
|
||||
model=ModelInfo(
|
||||
name=test_name, type="micro-benchmark", origins=["pytorch"]
|
||||
@ -549,8 +630,17 @@ class BenchmarkRunner:
|
||||
target_value=None,
|
||||
),
|
||||
)
|
||||
records.append(asdict(record_latency))
|
||||
|
||||
records.append(asdict(record))
|
||||
# Add record for peak memory
|
||||
record_memory = copy.deepcopy(record_latency)
|
||||
record_memory.metric = MetricInfo(
|
||||
name="peak memory",
|
||||
unit="KB",
|
||||
benchmark_values=[peak_memory],
|
||||
target_value=None,
|
||||
)
|
||||
records.append(asdict(record_memory))
|
||||
|
||||
# Write all records to the output file
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
@ -566,6 +656,7 @@ class BenchmarkRunner:
|
||||
"tag",
|
||||
"run_backward",
|
||||
"Execution Time",
|
||||
"Peak Memory (KB)",
|
||||
]
|
||||
|
||||
if self.args.output_json or self.args.output_json_for_dashboard:
|
||||
@ -603,13 +694,16 @@ class BenchmarkRunner:
|
||||
test_case, self.args.warmup_iterations, print_per_iter=False
|
||||
)
|
||||
# Actual Execution
|
||||
reported_time = [
|
||||
self._measure_time(
|
||||
results = [
|
||||
self._measure_metrics(
|
||||
launch_func, test_case, self.iters, self.print_per_iter
|
||||
)
|
||||
for _ in range(self.num_runs)
|
||||
]
|
||||
self._print_perf_result(reported_time, test_case)
|
||||
result_dict = dict()
|
||||
result_dict["reported_run_time_us"] = [r[0] for r in results]
|
||||
result_dict["peak_memory"] = results[0][1]
|
||||
self._print_perf_result(results=result_dict, test_case=test_case)
|
||||
|
||||
# output results to csv
|
||||
self._output_csv(
|
||||
@ -625,16 +719,17 @@ class BenchmarkRunner:
|
||||
),
|
||||
test_case.test_config.tag,
|
||||
test_case.test_config.run_backward,
|
||||
reported_time[0],
|
||||
result_dict["reported_run_time_us"][0],
|
||||
result_dict["peak_memory"],
|
||||
],
|
||||
)
|
||||
if self.args.output_json or self.args.output_json_for_dashboard:
|
||||
perf_list.append(
|
||||
self._perf_result_to_dict(reported_time, test_case)
|
||||
)
|
||||
perf_list.append(self._perf_result_to_dict(result_dict, test_case))
|
||||
|
||||
if self.args.output_json_for_dashboard:
|
||||
self._output_json(perf_list, self.args.output_json_for_dashboard)
|
||||
self._output_json(
|
||||
perf_list, self.args.output_json_for_dashboard, self.args.benchmark_name
|
||||
)
|
||||
|
||||
if self.args.output_json:
|
||||
with open(self.args.output_json, "w") as f:
|
||||
|
||||
@ -4,6 +4,15 @@ import time
|
||||
import torch
|
||||
|
||||
|
||||
# Import the C++ extension to register the _consume operator
|
||||
try:
|
||||
import benchmark_cpp_extension # noqa: F401
|
||||
except ImportError as err:
|
||||
# If the extension isn't built, the script must raise an error
|
||||
raise ImportError(
|
||||
"Failed to import C++ extension, please build it using \ncd pt_extension \npython -m pip install ."
|
||||
) from err
|
||||
|
||||
"""PyTorch performance microbenchmarks.
|
||||
|
||||
This module contains PyTorch-specific functionalities for performance
|
||||
@ -71,6 +80,16 @@ class TorchBenchmarkBase(torch.nn.Module):
|
||||
for _ in range(iters):
|
||||
torch.ops.operator_benchmark._consume(self.forward_impl())
|
||||
|
||||
def forward_impl_eager(self):
|
||||
# This is to supply the inputs to the forward function which
|
||||
# will be called in both the eager and compile mode of local runs
|
||||
return self.forward(*self.get_inputs())
|
||||
|
||||
def forward_consume_eager(self, iters: int):
|
||||
# Eager version of forward_consume without decorators (compilation handled by torch.compile)
|
||||
for _ in range(iters):
|
||||
torch.ops.operator_benchmark._consume(self.forward_impl_eager())
|
||||
|
||||
def module_name(self):
|
||||
"""this is used to label the operator being benchmarked"""
|
||||
if self.user_given_name:
|
||||
@ -117,18 +136,34 @@ class PyTorchOperatorTestCase:
|
||||
self.framework = "PyTorch"
|
||||
self.time_series = []
|
||||
self._jit_forward_graph = None
|
||||
self._compile_forward_graph = None
|
||||
|
||||
def _generate_jit_forward_graph(self):
|
||||
"""generate a graph for the forward function via scripting"""
|
||||
scripted_op_bench = torch.jit.script(self.op_bench)
|
||||
return scripted_op_bench.forward_consume
|
||||
|
||||
def _generate_compile_forward_graph(self):
|
||||
"""generate a compiled graph for the forward function via torch.compile"""
|
||||
compiled_forward_consume = torch.compile(
|
||||
self.op_bench.forward_consume_eager, backend="inductor"
|
||||
)
|
||||
return compiled_forward_consume
|
||||
|
||||
def run_jit_forward(self, num_runs, print_per_iter=False, cuda_sync=False):
|
||||
"""Run the forward path of an op with JIT mode"""
|
||||
if self._jit_forward_graph is None:
|
||||
self._jit_forward_graph = self._generate_jit_forward_graph()
|
||||
self._jit_forward_graph(num_runs)
|
||||
|
||||
def run_compile_forward(self, num_runs, print_per_iter=False, cuda_sync=False):
|
||||
"""Run the forward path of an op with compile mode"""
|
||||
if self._compile_forward_graph is None:
|
||||
self._compile_forward_graph = self._generate_compile_forward_graph()
|
||||
self._compile_forward_graph(num_runs)
|
||||
if cuda_sync:
|
||||
torch.cuda.synchronize(torch.cuda.current_device())
|
||||
|
||||
def _print_per_iter(self):
|
||||
# print last 50 values
|
||||
length = min(len(self.time_series), 50)
|
||||
@ -150,14 +185,14 @@ class PyTorchOperatorTestCase:
|
||||
if print_per_iter:
|
||||
for _ in range(num_runs):
|
||||
start_time = time.time()
|
||||
self.output = self.op_bench.forward_impl()
|
||||
self.output = self.op_bench.forward_impl_eager()
|
||||
if cuda_sync:
|
||||
torch.cuda.synchronize(torch.cuda.current_device())
|
||||
end_time = time.time()
|
||||
self.time_series.append((end_time - start_time) * 1e3)
|
||||
else:
|
||||
for _ in range(num_runs):
|
||||
self.output = self.op_bench.forward_impl()
|
||||
self.output = self.op_bench.forward_impl_eager()
|
||||
if cuda_sync:
|
||||
torch.cuda.synchronize(torch.cuda.current_device())
|
||||
|
||||
|
||||
@ -62,6 +62,13 @@ def parse_args():
|
||||
default=None,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--benchmark-name",
|
||||
"--benchmark_name",
|
||||
help="Name of the benchmark to store results to",
|
||||
default="PyTorch operator benchmark",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--list-tests",
|
||||
"--list_tests",
|
||||
@ -135,6 +142,16 @@ def parse_args():
|
||||
help="Run operators with PyTorch JIT mode",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--use-compile",
|
||||
"--use_compile",
|
||||
type=benchmark_utils.str2bool,
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=False,
|
||||
help="Run operators with PyTorch Compile mode",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--forward-only",
|
||||
"--forward_only",
|
||||
@ -162,7 +179,7 @@ def parse_args():
|
||||
"--output-json-for-dashboard",
|
||||
"--output_json_for_dashboard",
|
||||
help="Save results in JSON format for display on the OSS dashboard",
|
||||
default="False",
|
||||
default="benchmark-results.json",
|
||||
)
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time
|
||||
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
|
||||
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,2.459
|
||||
PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
|
||||
PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826
|
||||
PyTorch,add,add_M1_N1_K1_cpu_bwdall_BACKWARD,short,TRUE,58.1449
|
||||
@ -376,10 +376,10 @@ PyTorch,relu6,"relu6_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",sho
|
||||
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,9.6588
|
||||
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,9.5969
|
||||
PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,9.547
|
||||
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,68.739
|
||||
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.21375
|
||||
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,45.14133333
|
||||
PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,52.6664
|
||||
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,69.1875
|
||||
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,51.49525
|
||||
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,48.3458
|
||||
PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,62.0719
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,7.5728
|
||||
@ -388,10 +388,10 @@ PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplace
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,8.1647
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,8.1768
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,8.0619
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.118
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,48.88475
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,43.702
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,50.3613
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.436
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.3995
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,46.9813
|
||||
PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,59.2295
|
||||
PyTorch,functional.hardsigmoid,"functional.hardsigmoid_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,6.5189
|
||||
@ -1316,4 +1316,4 @@ PyTorch,where,"where_cond_shape(8,16,1)_input_shape(1,)_other_shape(1,)_cpu_dtyp
|
||||
PyTorch,where,"where_cond_shape(8,16,1)_input_shape(16,1)_other_shape(8,16,1)_cpu_dtypetorch.float32",short,FALSE,5.763
|
||||
PyTorch,where,"where_cond_shape(8,16,1)_input_shape(8,1,1)_other_shape(1,)_cpu_dtypetorch.float32",short,FALSE,5.744666667
|
||||
PyTorch,clamp,clamp_M512_N512_cpu,short,FALSE,15.26233333
|
||||
PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667
|
||||
PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667
|
||||
|
||||
|
@ -52,27 +52,6 @@ class AddBenchmark(op_bench.TorchBenchmarkBase):
|
||||
op_bench.generate_pt_test(add_long_configs + add_short_configs, AddBenchmark)
|
||||
op_bench.generate_pt_gradient_test(add_long_configs + add_short_configs, AddBenchmark)
|
||||
|
||||
|
||||
"""Mircobenchmark for addmm operator."""
|
||||
|
||||
|
||||
class AddmmBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, M, N, K, device):
|
||||
self.inputs = {
|
||||
"input_one": torch.rand(M, K, device=device, requires_grad=self.auto_set()),
|
||||
"mat1": torch.rand(M, N, device=device, requires_grad=self.auto_set()),
|
||||
"mat2": torch.rand(N, K, device=device, requires_grad=self.auto_set()),
|
||||
}
|
||||
self.set_module_name("addmm")
|
||||
|
||||
def forward(self, input_one, mat1, mat2):
|
||||
return torch.addmm(input_one, mat1, mat2)
|
||||
|
||||
|
||||
op_bench.generate_pt_test(add_long_configs + add_short_configs, AddmmBenchmark)
|
||||
op_bench.generate_pt_gradient_test(add_long_configs + add_short_configs, AddmmBenchmark)
|
||||
|
||||
|
||||
"""Mircobenchmark for addr operator."""
|
||||
|
||||
|
||||
@ -106,46 +85,5 @@ addr_configs = op_bench.cross_product_configs(
|
||||
op_bench.generate_pt_test(addr_configs, AddrBenchmark)
|
||||
op_bench.generate_pt_gradient_test(addr_configs, AddrBenchmark)
|
||||
|
||||
|
||||
"""Mircobenchmark for addbmm operator."""
|
||||
|
||||
|
||||
class AddbmmBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, B, M, N, K, device):
|
||||
self.inputs = {
|
||||
"input_one": torch.rand(
|
||||
(M, N), device=device, requires_grad=self.auto_set()
|
||||
),
|
||||
"batch1": torch.rand(
|
||||
(B, M, K), device=device, requires_grad=self.auto_set()
|
||||
),
|
||||
"batch2": torch.rand(
|
||||
(
|
||||
B,
|
||||
K,
|
||||
N,
|
||||
),
|
||||
device=device,
|
||||
requires_grad=self.auto_set(),
|
||||
),
|
||||
}
|
||||
self.set_module_name("addbmm")
|
||||
|
||||
def forward(self, input_one, batch1, batch2):
|
||||
return torch.addbmm(input_one, batch1, batch2)
|
||||
|
||||
|
||||
addbmm_configs = op_bench.cross_product_configs(
|
||||
B=[2, 100],
|
||||
M=[8, 256],
|
||||
N=[256, 16],
|
||||
K=[15, 16],
|
||||
device=["cpu", "cuda"],
|
||||
tags=["addbmm"],
|
||||
)
|
||||
|
||||
op_bench.generate_pt_test(addbmm_configs, AddbmmBenchmark)
|
||||
op_bench.generate_pt_gradient_test(addbmm_configs, AddbmmBenchmark)
|
||||
|
||||
if __name__ == "__main__":
|
||||
op_bench.benchmark_runner.main()
|
||||
|
||||
115
benchmarks/operator_benchmark/pt/addmm_test.py
Normal file
115
benchmarks/operator_benchmark/pt/addmm_test.py
Normal file
@ -0,0 +1,115 @@
|
||||
import operator_benchmark as op_bench
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
"""Microbenchmarks for add_(matmul) operator. Supports both Caffe2/PyTorch."""
|
||||
|
||||
# Configs for PT add operator
|
||||
addmm_long_configs = op_bench.cross_product_configs(
|
||||
M=[256, 1024, 3000],
|
||||
N=[512, 4096],
|
||||
K=[512, 4096],
|
||||
device=["cuda"],
|
||||
tags=["long"],
|
||||
dtype=[torch.float16, torch.bfloat16, torch.float32],
|
||||
)
|
||||
|
||||
|
||||
addmm_short_configs = op_bench.config_list(
|
||||
attr_names=["M", "N", "K"],
|
||||
attrs=[
|
||||
[1, 1, 1],
|
||||
[64, 64, 64],
|
||||
[64, 64, 128],
|
||||
],
|
||||
cross_product_configs={
|
||||
"device": ["cpu", "cuda"],
|
||||
"dtype": [torch.float],
|
||||
},
|
||||
tags=["short"],
|
||||
)
|
||||
|
||||
|
||||
"""Mircobenchmark for addmm operator."""
|
||||
|
||||
|
||||
class AddmmBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, M, N, K, device, dtype):
|
||||
self.inputs = {
|
||||
"input_one": torch.rand(
|
||||
M, K, device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
"mat1": torch.rand(
|
||||
M, N, device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
"mat2": torch.rand(
|
||||
N, K, device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
}
|
||||
self.set_module_name("addmm")
|
||||
|
||||
def forward(self, input_one, mat1, mat2):
|
||||
return torch.addmm(input_one, mat1, mat2)
|
||||
|
||||
|
||||
op_bench.generate_pt_test(addmm_long_configs + addmm_long_configs, AddmmBenchmark)
|
||||
op_bench.generate_pt_gradient_test(
|
||||
addmm_long_configs + addmm_long_configs, AddmmBenchmark
|
||||
)
|
||||
|
||||
"""Mircobenchmark for addbmm operator."""
|
||||
|
||||
|
||||
class AddbmmBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, B, M, N, K, device, dtype):
|
||||
self.inputs = {
|
||||
"input_one": torch.rand(
|
||||
(M, N), device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
"batch1": torch.rand(
|
||||
(B, M, K), device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
"batch2": torch.rand(
|
||||
(
|
||||
B,
|
||||
K,
|
||||
N,
|
||||
),
|
||||
device=device,
|
||||
requires_grad=self.auto_set(),
|
||||
dtype=dtype,
|
||||
),
|
||||
}
|
||||
self.set_module_name("addbmm")
|
||||
|
||||
def forward(self, input_one, batch1, batch2):
|
||||
return torch.addbmm(input_one, batch1, batch2)
|
||||
|
||||
|
||||
addbmm_long_configs = op_bench.cross_product_configs(
|
||||
B=[8, 32],
|
||||
M=[256, 1024],
|
||||
N=[256, 1024],
|
||||
K=[64, 128],
|
||||
device=["cuda"],
|
||||
dtype=[torch.float16, torch.bfloat16, torch.float32],
|
||||
tags=["long"],
|
||||
)
|
||||
addbmm_short_configs = op_bench.cross_product_configs(
|
||||
B=[1, 8],
|
||||
M=[8, 128],
|
||||
N=[32, 64],
|
||||
K=[256, 512],
|
||||
device=["cpu", "cuda"],
|
||||
dtype=[torch.float16, torch.bfloat16, torch.float32],
|
||||
tags=["short"],
|
||||
)
|
||||
|
||||
op_bench.generate_pt_test(addbmm_long_configs + addbmm_short_configs, AddbmmBenchmark)
|
||||
op_bench.generate_pt_gradient_test(
|
||||
addbmm_long_configs + addbmm_short_configs, AddbmmBenchmark
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
op_bench.benchmark_runner.main()
|
||||
@ -27,12 +27,12 @@ batched_binary_configs_short = op_bench.config_list(
|
||||
)
|
||||
|
||||
batched_binary_configs_long = op_bench.cross_product_configs(
|
||||
B=[1, 128],
|
||||
M=[8, 128],
|
||||
N=[32, 64],
|
||||
K=[4, 256],
|
||||
device=["cpu", "cuda"],
|
||||
dtype=[torch.float, torch.bfloat16],
|
||||
B=[8, 32],
|
||||
M=[256, 1024],
|
||||
N=[256, 1024],
|
||||
K=[64, 128],
|
||||
device=["cuda"],
|
||||
dtype=[torch.float32, torch.bfloat16, torch.float16],
|
||||
tags=["long"],
|
||||
)
|
||||
|
||||
@ -40,8 +40,12 @@ batched_binary_configs_long = op_bench.cross_product_configs(
|
||||
class BatchedBinaryOpBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, B, M, N, K, device, dtype, op_func):
|
||||
self.inputs = {
|
||||
"batch1": torch.rand((B, M, N), device=device).to(dtype=dtype),
|
||||
"batch2": torch.rand((B, N, K), device=device).to(dtype=dtype),
|
||||
"batch1": torch.rand(
|
||||
(B, M, N), device=device, dtype=dtype, requires_grad=self.auto_set()
|
||||
),
|
||||
"batch2": torch.rand(
|
||||
(B, N, K), device=device, dtype=dtype, requires_grad=self.auto_set()
|
||||
),
|
||||
}
|
||||
self.op_func = op_func
|
||||
|
||||
@ -54,6 +58,11 @@ op_bench.generate_pt_tests_from_op_list(
|
||||
batched_binary_configs_short + batched_binary_configs_long,
|
||||
BatchedBinaryOpBenchmark,
|
||||
)
|
||||
op_bench.generate_pt_gradient_tests_from_op_list(
|
||||
batched_binary_ops,
|
||||
batched_binary_configs_long,
|
||||
BatchedBinaryOpBenchmark,
|
||||
)
|
||||
|
||||
|
||||
# batched ternary ops
|
||||
@ -66,9 +75,15 @@ batched_ternary_ops = op_bench.op_list(
|
||||
class BatchedTernaryOpBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, B, M, N, K, device, dtype, op_func):
|
||||
self.inputs = {
|
||||
"input_": torch.rand((B, M, K), device=device).to(dtype=dtype),
|
||||
"batch1": torch.rand((B, M, N), device=device).to(dtype=dtype),
|
||||
"batch2": torch.rand((B, N, K), device=device).to(dtype=dtype),
|
||||
"input_": torch.rand(
|
||||
(B, M, K), device=device, dtype=dtype, requires_grad=self.auto_set()
|
||||
),
|
||||
"batch1": torch.rand(
|
||||
(B, M, N), device=device, dtype=dtype, requires_grad=self.auto_set()
|
||||
),
|
||||
"batch2": torch.rand(
|
||||
(B, N, K), device=device, dtype=dtype, requires_grad=self.auto_set()
|
||||
),
|
||||
}
|
||||
self.op_func = op_func
|
||||
|
||||
@ -81,6 +96,12 @@ op_bench.generate_pt_tests_from_op_list(
|
||||
batched_binary_configs_short + batched_binary_configs_long,
|
||||
BatchedTernaryOpBenchmark,
|
||||
)
|
||||
op_bench.generate_pt_gradient_tests_from_op_list(
|
||||
batched_ternary_ops,
|
||||
batched_binary_configs_long,
|
||||
BatchedTernaryOpBenchmark,
|
||||
)
|
||||
|
||||
|
||||
# TODO: does it automatically register new scripts?
|
||||
|
||||
|
||||
@ -13,33 +13,46 @@ mm_short_configs = op_bench.config_list(
|
||||
[128, 128, 128, True, False],
|
||||
[256, 256, 256, False, True],
|
||||
],
|
||||
cross_product_configs={
|
||||
"device": ["cpu", "cuda"],
|
||||
},
|
||||
cross_product_configs={"device": ["cpu", "cuda"]},
|
||||
tags=["short"],
|
||||
)
|
||||
|
||||
|
||||
mm_long_configs = op_bench.cross_product_configs(
|
||||
M=[32],
|
||||
N=[512, 128],
|
||||
K=[64],
|
||||
M=[256, 1024, 3000],
|
||||
N=[512, 4096],
|
||||
K=[512, 4096],
|
||||
trans_a=[False, True],
|
||||
trans_b=[True, False],
|
||||
device=["cpu", "cuda"],
|
||||
device=["cuda"],
|
||||
dtype=[torch.float16, torch.bfloat16, torch.float32],
|
||||
tags=["long"],
|
||||
)
|
||||
|
||||
|
||||
class MatMulBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, M, N, K, trans_a, trans_b, device):
|
||||
def init(self, M, N, K, trans_a, trans_b, device, dtype=torch.float):
|
||||
# Create tensors without requires_grad first, then set it separately
|
||||
# This avoids creating graph leaves that cannot be deep copied
|
||||
if trans_a:
|
||||
input_one = torch.rand(M, N, device=device, dtype=dtype)
|
||||
else:
|
||||
input_one = torch.rand(N, M, device=device, dtype=dtype).t()
|
||||
|
||||
if trans_b:
|
||||
input_two = torch.rand(N, K, device=device, dtype=dtype)
|
||||
else:
|
||||
input_two = torch.rand(K, N, device=device, dtype=dtype).t()
|
||||
|
||||
# Set requires_grad after tensor creation to avoid graph leaf issues
|
||||
if self.auto_set():
|
||||
input_one.requires_grad_(True)
|
||||
if self.auto_set():
|
||||
input_two.requires_grad_(True)
|
||||
|
||||
self.inputs = {
|
||||
"input_one": torch.rand(M, N, device=device)
|
||||
if trans_a
|
||||
else torch.rand(N, M, device=device).t(),
|
||||
"input_two": torch.rand(N, K, device=device)
|
||||
if trans_b
|
||||
else torch.rand(K, N, device=device).t(),
|
||||
"input_one": input_one,
|
||||
"input_two": input_two,
|
||||
}
|
||||
self.set_module_name("matmul")
|
||||
|
||||
@ -48,6 +61,7 @@ class MatMulBenchmark(op_bench.TorchBenchmarkBase):
|
||||
|
||||
|
||||
op_bench.generate_pt_test(mm_long_configs + mm_short_configs, MatMulBenchmark)
|
||||
op_bench.generate_pt_gradient_test(mm_long_configs, MatMulBenchmark)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -23,11 +23,11 @@ mm_short_configs = op_bench.config_list(
|
||||
)
|
||||
|
||||
mm_long_configs = op_bench.cross_product_configs(
|
||||
M=[8, 128],
|
||||
N=[32, 64],
|
||||
K=[256, 512],
|
||||
device=["cpu", "cuda"],
|
||||
dtype=[torch.float, torch.bfloat16],
|
||||
M=[256, 1024, 3000],
|
||||
N=[512, 4096],
|
||||
K=[512, 4096],
|
||||
device=["cuda"],
|
||||
dtype=[torch.float16, torch.bfloat16, torch.float32],
|
||||
tags=["long"],
|
||||
)
|
||||
|
||||
@ -35,8 +35,12 @@ mm_long_configs = op_bench.cross_product_configs(
|
||||
class MmOpBenchmark(op_bench.TorchBenchmarkBase):
|
||||
def init(self, M, N, K, device, dtype, op_func):
|
||||
self.inputs = {
|
||||
"input_one": torch.randn(M, N, device=device).to(dtype=dtype),
|
||||
"input_two": torch.randn(N, K, device=device).to(dtype=dtype),
|
||||
"input_one": torch.randn(
|
||||
M, N, device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
"input_two": torch.randn(
|
||||
N, K, device=device, requires_grad=self.auto_set(), dtype=dtype
|
||||
),
|
||||
}
|
||||
self.op_func = op_func
|
||||
|
||||
@ -47,6 +51,9 @@ class MmOpBenchmark(op_bench.TorchBenchmarkBase):
|
||||
op_bench.generate_pt_tests_from_op_list(
|
||||
ops_list, mm_short_configs + mm_long_configs, MmOpBenchmark
|
||||
)
|
||||
op_bench.generate_pt_gradient_tests_from_op_list(
|
||||
ops_list, mm_long_configs, MmOpBenchmark
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -391,6 +391,8 @@ def get_aten_generated_files(enabled_backends):
|
||||
"CompositeExplicitAutogradFunctions_inl.h",
|
||||
"CompositeExplicitAutogradNonFunctionalFunctions.h",
|
||||
"CompositeExplicitAutogradNonFunctionalFunctions_inl.h",
|
||||
"ViewMetaClasses.h",
|
||||
"ViewMetaClasses.cpp",
|
||||
"VmapGeneratedPlumbing.h",
|
||||
"core/ATenOpList.cpp",
|
||||
"core/TensorBody.h",
|
||||
@ -1192,6 +1194,7 @@ def define_buck_targets(
|
||||
"NativeMetaFunctions.h": ":gen_aten[NativeMetaFunctions.h]",
|
||||
"Operators.h": ":gen_aten[Operators.h]",
|
||||
"RedispatchFunctions.h": ":gen_aten[RedispatchFunctions.h]",
|
||||
"ViewMetaClasses.h": ":gen_aten[ViewMetaClasses.h]",
|
||||
"core/TensorBody.h": ":gen_aten[core/TensorBody.h]",
|
||||
"core/aten_interned_strings.h": ":gen_aten[core/aten_interned_strings.h]",
|
||||
"core/enum_tag.h": ":gen_aten[core/enum_tag.h]",
|
||||
|
||||
@ -118,6 +118,9 @@ def define_targets(rules):
|
||||
":LazyNonNativeIr.h",
|
||||
":RegisterDispatchDefinitions.ini",
|
||||
":RegisterDispatchKey.cpp",
|
||||
":ViewMetaClassesPythonBinding.cpp",
|
||||
":ViewMetaClasses.cpp",
|
||||
":ViewMetaClasses.h",
|
||||
":native_functions.yaml",
|
||||
":shape_inference.h",
|
||||
":tags.yaml",
|
||||
@ -170,6 +173,7 @@ GENERATED_H = [
|
||||
"FunctionalInverses.h",
|
||||
"RedispatchFunctions.h",
|
||||
"RegistrationDeclarations.h",
|
||||
"ViewMetaClasses.h",
|
||||
"VmapGeneratedPlumbing.h",
|
||||
]
|
||||
|
||||
@ -246,6 +250,7 @@ GENERATED_CPP = [
|
||||
"RegisterFunctionalization_1.cpp",
|
||||
"RegisterFunctionalization_2.cpp",
|
||||
"RegisterFunctionalization_3.cpp",
|
||||
"ViewMetaClasses.cpp",
|
||||
]
|
||||
|
||||
GENERATED_CPP_CORE = [
|
||||
@ -307,6 +312,7 @@ _GENERATED_AUTOGRAD_PYTHON_CPP = [
|
||||
"torch/csrc/autograd/generated/python_torch_functions_1.cpp",
|
||||
"torch/csrc/autograd/generated/python_torch_functions_2.cpp",
|
||||
"torch/csrc/autograd/generated/python_variable_methods.cpp",
|
||||
"torch/csrc/functionalization/generated/ViewMetaClassesPythonBinding.cpp"
|
||||
]
|
||||
|
||||
GENERATED_AUTOGRAD_PYTHON = _GENERATED_AUTOGRAD_PYTHON_HEADERS + _GENERATED_AUTOGRAD_PYTHON_CPP
|
||||
|
||||
@ -1007,6 +1007,7 @@ libtorch_python_core_sources = [
|
||||
"torch/csrc/utils/disable_torch_function.cpp",
|
||||
"torch/csrc/utils/verbose.cpp",
|
||||
"torch/csrc/cpu/Module.cpp",
|
||||
"torch/csrc/functionalization/Module.cpp",
|
||||
"torch/csrc/instruction_counter/Module.cpp",
|
||||
"torch/nativert/python/Bindings.cpp",
|
||||
] + lazy_tensor_core_python_sources
|
||||
@ -1049,6 +1050,7 @@ def glob_libtorch_python_sources(gencode_pattern = ":generate-code[{}]"):
|
||||
"torch/csrc/autograd/generated/python_torch_functions_1.cpp",
|
||||
"torch/csrc/autograd/generated/python_torch_functions_2.cpp",
|
||||
"torch/csrc/autograd/generated/python_variable_methods.cpp",
|
||||
"torch/csrc/functionalization/generated/ViewMetaClassesPythonBinding.cpp",
|
||||
]]
|
||||
|
||||
_libtorch_python_sources.extend(libtorch_python_core_sources)
|
||||
|
||||
@ -316,6 +316,7 @@ set(GENERATED_CXX_PYTHON
|
||||
"${TORCH_SRC_DIR}/csrc/autograd/generated/python_special_functions.cpp"
|
||||
"${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.cpp"
|
||||
"${TORCH_SRC_DIR}/csrc/autograd/generated/python_enum_tag.cpp"
|
||||
"${TORCH_SRC_DIR}/csrc/functionalization/generated/ViewMetaClassesPythonBinding.cpp"
|
||||
)
|
||||
|
||||
set(GENERATED_H_PYTHON
|
||||
@ -379,6 +380,9 @@ add_custom_command(
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/LazyIr.h"
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/LazyNonNativeIr.h"
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/RegisterDispatchKey.cpp"
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/ViewMetaClasses.h"
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/ViewMetaClasses.cpp"
|
||||
"${TORCH_ROOT}/aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp"
|
||||
${autograd_python}
|
||||
${autograd_yaml}
|
||||
${autograd_templates}
|
||||
|
||||
@ -40,7 +40,34 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
] + (["breathe", "exhale"] if run_doxygen else [])
|
||||
|
||||
intersphinx_mapping = {"pytorch": ("https://pytorch.org/docs/main", None)}
|
||||
intersphinx_mapping = {"pytorch": ("https://docs.pytorch.org/docs/main", None)}
|
||||
|
||||
# Configure Sphinx warnings and error handling
|
||||
suppress_warnings = [
|
||||
"ref.citation",
|
||||
"ref.footnote",
|
||||
"ref.doc",
|
||||
"toc.excluded",
|
||||
"toc.not_readable",
|
||||
"misc.highlighting_failure",
|
||||
]
|
||||
|
||||
# Configure Breathe
|
||||
breathe_show_define_initializer = True
|
||||
breathe_show_enumvalue_initializer = True
|
||||
breathe_default_members = ("members", "undoc-members")
|
||||
|
||||
|
||||
# Fix for Python 3.10+ compatibility with exhale 2.3.0
|
||||
# MutableMapping was moved from collections to collections.abc in Python 3.10
|
||||
try:
|
||||
import collections
|
||||
from collections.abc import MutableMapping
|
||||
|
||||
if not hasattr(collections, "MutableMapping"):
|
||||
collections.MutableMapping = MutableMapping
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Setup absolute paths for communicating with breathe / exhale where
|
||||
# items are expected / should be trimmed by.
|
||||
@ -101,6 +128,21 @@ exhale_args = {
|
||||
Welcome to the developer reference for the PyTorch C++ API.
|
||||
"""
|
||||
),
|
||||
############################################################################
|
||||
# Duplicate handling and error management. #
|
||||
############################################################################
|
||||
# Note: Using Doxyfile instead of stdin configuration
|
||||
# "exhaleDoxygenStdin" is not compatible with "exhaleUseDoxyfile"
|
||||
# Handle unresolved references more gracefully
|
||||
"unabridgedOrphanKinds": {
|
||||
"function",
|
||||
"define",
|
||||
"enum",
|
||||
"enumvalue",
|
||||
"typedef",
|
||||
"variable",
|
||||
},
|
||||
"fullToctreeMaxDepth": 2,
|
||||
}
|
||||
|
||||
# Tell sphinx what the primary language being documented is.
|
||||
@ -174,6 +216,7 @@ html_theme = "pytorch_sphinx_theme2"
|
||||
#
|
||||
html_theme_options = {
|
||||
"canonical_url": "https://pytorch.org/docs/stable/",
|
||||
"analytics_id": "GTM-T8XT4PS",
|
||||
"collapse_navigation": False,
|
||||
"logo": {"text": "Home"},
|
||||
"icon_links": [
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user