mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-27 00:54:52 +08:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			issue-1610
			...
			module-shi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3f85e2baa3 | |||
| 339fe1a29d | |||
| 8fdf326e85 | |||
| 1cd74387af | 
| @ -3,13 +3,12 @@ set -eux -o pipefail | ||||
|  | ||||
| GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} | ||||
|  | ||||
| # Set CUDA architecture lists to match x86 build_cuda.sh | ||||
| if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0" | ||||
| elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then | ||||
| if [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0" | ||||
| elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX" | ||||
| fi | ||||
|  | ||||
| if [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0" | ||||
| fi | ||||
|  | ||||
| # Compress the fatbin with -compress-mode=size for CUDA 13 | ||||
| @ -28,7 +27,7 @@ cd / | ||||
| # on the mounted pytorch repo | ||||
| git config --global --add safe.directory /pytorch | ||||
| pip install -r /pytorch/requirements.txt | ||||
| pip install auditwheel==6.2.0 wheel | ||||
| pip install auditwheel==6.2.0 | ||||
| if [ "$DESIRED_CUDA" = "cpu" ]; then | ||||
|     echo "BASE_CUDA_VERSION is not set. Building cpu wheel." | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
| @ -36,16 +35,6 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then | ||||
| else | ||||
|     echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" | ||||
|     export USE_SYSTEM_NCCL=1 | ||||
|  | ||||
|     # Check if we should use NVIDIA libs from PyPI (similar to x86 build_cuda.sh logic) | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling CUDA libraries with wheel for aarch64." | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi for aarch64." | ||||
|         echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS" | ||||
|         export USE_NVIDIA_PYPI_LIBS=1 | ||||
|     fi | ||||
|  | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
|     USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda | ||||
| fi | ||||
|  | ||||
| @ -69,186 +69,83 @@ def replace_tag(filename) -> None: | ||||
|         f.writelines(lines) | ||||
|  | ||||
|  | ||||
| def patch_library_rpath( | ||||
|     folder: str, | ||||
|     lib_name: str, | ||||
|     use_nvidia_pypi_libs: bool = False, | ||||
|     desired_cuda: str = "", | ||||
| ) -> None: | ||||
|     """Apply patchelf to set RPATH for a library in torch/lib""" | ||||
|     lib_path = f"{folder}/tmp/torch/lib/{lib_name}" | ||||
|  | ||||
|     if use_nvidia_pypi_libs: | ||||
|         # For PyPI NVIDIA libraries, construct CUDA RPATH | ||||
|         cuda_rpaths = [ | ||||
|             "$ORIGIN/../../nvidia/cudnn/lib", | ||||
|             "$ORIGIN/../../nvidia/nvshmem/lib", | ||||
|             "$ORIGIN/../../nvidia/nccl/lib", | ||||
|             "$ORIGIN/../../nvidia/cusparselt/lib", | ||||
|         ] | ||||
|  | ||||
|         if "130" in desired_cuda: | ||||
|             cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib") | ||||
|         else: | ||||
|             cuda_rpaths.extend( | ||||
|                 [ | ||||
|                     "$ORIGIN/../../nvidia/cublas/lib", | ||||
|                     "$ORIGIN/../../nvidia/cuda_cupti/lib", | ||||
|                     "$ORIGIN/../../nvidia/cuda_nvrtc/lib", | ||||
|                     "$ORIGIN/../../nvidia/cuda_runtime/lib", | ||||
|                     "$ORIGIN/../../nvidia/cufft/lib", | ||||
|                     "$ORIGIN/../../nvidia/curand/lib", | ||||
|                     "$ORIGIN/../../nvidia/cusolver/lib", | ||||
|                     "$ORIGIN/../../nvidia/cusparse/lib", | ||||
|                     "$ORIGIN/../../nvidia/nvtx/lib", | ||||
|                     "$ORIGIN/../../nvidia/cufile/lib", | ||||
|                 ] | ||||
|             ) | ||||
|  | ||||
|         # Add $ORIGIN for local torch libs | ||||
|         rpath = ":".join(cuda_rpaths) + ":$ORIGIN" | ||||
|     else: | ||||
|         # For bundled libraries, just use $ORIGIN | ||||
|         rpath = "$ORIGIN" | ||||
|  | ||||
|     if os.path.exists(lib_path): | ||||
|         os.system( | ||||
|             f"cd {folder}/tmp/torch/lib/; " | ||||
|             f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def copy_and_patch_library( | ||||
|     src_path: str, | ||||
|     folder: str, | ||||
|     use_nvidia_pypi_libs: bool = False, | ||||
|     desired_cuda: str = "", | ||||
| ) -> None: | ||||
|     """Copy a library to torch/lib and patch its RPATH""" | ||||
|     if os.path.exists(src_path): | ||||
|         lib_name = os.path.basename(src_path) | ||||
|         shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}") | ||||
|         patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda) | ||||
|  | ||||
|  | ||||
| def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|     """ | ||||
|     Package the cuda wheel libraries | ||||
|     """ | ||||
|     folder = os.path.dirname(wheel_path) | ||||
|     wheelname = os.path.basename(wheel_path) | ||||
|     os.mkdir(f"{folder}/tmp") | ||||
|     os.system(f"unzip {wheel_path} -d {folder}/tmp") | ||||
|     # Delete original wheel since it will be repackaged | ||||
|     os.system(f"rm {wheel_path}") | ||||
|     # Common libraries for all CUDA versions | ||||
|     common_libs = [ | ||||
|         # Non-NVIDIA system libraries | ||||
|         "/lib64/libgomp.so.1", | ||||
|         "/usr/lib64/libgfortran.so.5", | ||||
|         "/acl/build/libarm_compute.so", | ||||
|         "/acl/build/libarm_compute_graph.so", | ||||
|         # Common CUDA libraries (same for all versions) | ||||
|         "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|         "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so", | ||||
|         "/usr/local/cuda/lib64/libcudnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcusparseLt.so.0", | ||||
|         "/usr/local/cuda/lib64/libcurand.so.10", | ||||
|         "/usr/local/cuda/lib64/libnccl.so.2", | ||||
|         "/usr/local/cuda/lib64/libnvshmem_host.so.3", | ||||
|         "/usr/local/cuda/lib64/libcudnn_adv.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_cnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_graph.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_ops.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", | ||||
|         "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|         "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|         "/usr/local/cuda/lib64/libcusparse.so.12", | ||||
|     ] | ||||
|  | ||||
|     # Check if we should use PyPI NVIDIA libraries or bundle system libraries | ||||
|     use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1" | ||||
|  | ||||
|     if use_nvidia_pypi_libs: | ||||
|         print("Using nvidia libs from pypi - skipping CUDA library bundling") | ||||
|         # For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages | ||||
|         # We only need to bundle non-NVIDIA libraries | ||||
|         minimal_libs_to_copy = [ | ||||
|             "/lib64/libgomp.so.1", | ||||
|             "/usr/lib64/libgfortran.so.5", | ||||
|             "/acl/build/libarm_compute.so", | ||||
|             "/acl/build/libarm_compute_graph.so", | ||||
|             "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|     # CUDA version-specific libraries | ||||
|     if "130" in desired_cuda: | ||||
|         version_specific_libs = [ | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13", | ||||
|             "/usr/local/cuda/lib64/libcublas.so.13", | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.13", | ||||
|             "/usr/local/cuda/lib64/libcudart.so.13", | ||||
|             "/usr/local/cuda/lib64/libcufft.so.12", | ||||
|             "/usr/local/cuda/lib64/libcusolver.so.12", | ||||
|             "/usr/local/cuda/lib64/libnvJitLink.so.13", | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.13", | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so.13.0", | ||||
|         ] | ||||
|     elif "12" in desired_cuda: | ||||
|         # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9") | ||||
|         minor_version = desired_cuda[-1] | ||||
|         version_specific_libs = [ | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||||
|             "/usr/local/cuda/lib64/libcublas.so.12", | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.12", | ||||
|             "/usr/local/cuda/lib64/libcudart.so.12", | ||||
|             "/usr/local/cuda/lib64/libcufft.so.11", | ||||
|             "/usr/local/cuda/lib64/libcusolver.so.11", | ||||
|             "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|             f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}", | ||||
|         ] | ||||
|  | ||||
|         # Copy minimal libraries to unzipped_folder/torch/lib | ||||
|         for lib_path in minimal_libs_to_copy: | ||||
|             copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda) | ||||
|     # Combine all libraries | ||||
|     libs_to_copy = common_libs + version_specific_libs | ||||
|  | ||||
|         # Patch torch libraries used for searching libraries | ||||
|         torch_libs_to_patch = [ | ||||
|             "libtorch.so", | ||||
|             "libtorch_cpu.so", | ||||
|             "libtorch_cuda.so", | ||||
|             "libtorch_cuda_linalg.so", | ||||
|             "libtorch_global_deps.so", | ||||
|             "libtorch_python.so", | ||||
|             "libtorch_nvshmem.so", | ||||
|             "libc10.so", | ||||
|             "libc10_cuda.so", | ||||
|             "libcaffe2_nvrtc.so", | ||||
|             "libshm.so", | ||||
|         ] | ||||
|         for lib_name in torch_libs_to_patch: | ||||
|             patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda) | ||||
|     else: | ||||
|         print("Bundling CUDA libraries with wheel") | ||||
|         # Original logic for bundling system CUDA libraries | ||||
|         # Common libraries for all CUDA versions | ||||
|         common_libs = [ | ||||
|             # Non-NVIDIA system libraries | ||||
|             "/lib64/libgomp.so.1", | ||||
|             "/usr/lib64/libgfortran.so.5", | ||||
|             "/acl/build/libarm_compute.so", | ||||
|             "/acl/build/libarm_compute_graph.so", | ||||
|             # Common CUDA libraries (same for all versions) | ||||
|             "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so", | ||||
|             "/usr/local/cuda/lib64/libcudnn.so.9", | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0", | ||||
|             "/usr/local/cuda/lib64/libcurand.so.10", | ||||
|             "/usr/local/cuda/lib64/libnccl.so.2", | ||||
|             "/usr/local/cuda/lib64/libnvshmem_host.so.3", | ||||
|             "/usr/local/cuda/lib64/libcudnn_adv.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_cnn.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_graph.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_ops.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", | ||||
|             "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             "/usr/local/cuda/lib64/libcusparse.so.12", | ||||
|         ] | ||||
|  | ||||
|         # CUDA version-specific libraries | ||||
|         if "13" in desired_cuda: | ||||
|             minor_version = desired_cuda[-1] | ||||
|             version_specific_libs = [ | ||||
|                 "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13", | ||||
|                 "/usr/local/cuda/lib64/libcublas.so.13", | ||||
|                 "/usr/local/cuda/lib64/libcublasLt.so.13", | ||||
|                 "/usr/local/cuda/lib64/libcudart.so.13", | ||||
|                 "/usr/local/cuda/lib64/libcufft.so.12", | ||||
|                 "/usr/local/cuda/lib64/libcusolver.so.12", | ||||
|                 "/usr/local/cuda/lib64/libnvJitLink.so.13", | ||||
|                 "/usr/local/cuda/lib64/libnvrtc.so.13", | ||||
|                 f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}", | ||||
|             ] | ||||
|         elif "12" in desired_cuda: | ||||
|             # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9") | ||||
|             minor_version = desired_cuda[-1] | ||||
|             version_specific_libs = [ | ||||
|                 "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||||
|                 "/usr/local/cuda/lib64/libcublas.so.12", | ||||
|                 "/usr/local/cuda/lib64/libcublasLt.so.12", | ||||
|                 "/usr/local/cuda/lib64/libcudart.so.12", | ||||
|                 "/usr/local/cuda/lib64/libcufft.so.11", | ||||
|                 "/usr/local/cuda/lib64/libcusolver.so.11", | ||||
|                 "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|                 "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|                 f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}", | ||||
|             ] | ||||
|         else: | ||||
|             raise ValueError(f"Unsupported CUDA version: {desired_cuda}.") | ||||
|  | ||||
|         # Combine all libraries | ||||
|         libs_to_copy = common_libs + version_specific_libs | ||||
|  | ||||
|         # Copy libraries to unzipped_folder/torch/lib | ||||
|         for lib_path in libs_to_copy: | ||||
|             copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda) | ||||
|     # Copy libraries to unzipped_folder/a/lib | ||||
|     for lib_path in libs_to_copy: | ||||
|         lib_name = os.path.basename(lib_path) | ||||
|         shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}") | ||||
|         os.system( | ||||
|             f"cd {folder}/tmp/torch/lib/; " | ||||
|             f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}" | ||||
|         ) | ||||
|  | ||||
|     # Make sure the wheel is tagged with manylinux_2_28 | ||||
|     for f in os.scandir(f"{folder}/tmp/"): | ||||
| @ -256,8 +153,14 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|             replace_tag(f"{f.path}/WHEEL") | ||||
|             break | ||||
|  | ||||
|     os.system(f"wheel pack {folder}/tmp/ -d {folder}") | ||||
|     os.system(f"rm -rf {folder}/tmp/") | ||||
|     os.mkdir(f"{folder}/cuda_wheel") | ||||
|     os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") | ||||
|     shutil.move( | ||||
|         f"{folder}/cuda_wheel/{wheelname}", | ||||
|         f"{folder}/{wheelname}", | ||||
|         copy_function=shutil.copy2, | ||||
|     ) | ||||
|     os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/") | ||||
|  | ||||
|  | ||||
| def complete_wheel(folder: str) -> str: | ||||
| @ -280,7 +183,14 @@ def complete_wheel(folder: str) -> str: | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|     else: | ||||
|         repaired_wheel_name = list_dir(f"/{folder}/dist")[0] | ||||
|         repaired_wheel_name = wheel_name.replace( | ||||
|             "linux_aarch64", "manylinux_2_28_aarch64" | ||||
|         ) | ||||
|         print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}") | ||||
|         os.rename( | ||||
|             f"/{folder}/dist/{wheel_name}", | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|  | ||||
|     print(f"Copying {repaired_wheel_name} to artifacts") | ||||
|     shutil.copy2( | ||||
| @ -322,16 +232,6 @@ if __name__ == "__main__": | ||||
|     if enable_cuda: | ||||
|         build_vars += "MAX_JOBS=5 " | ||||
|  | ||||
|         # Handle PyPI NVIDIA libraries vs bundled libraries | ||||
|         use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1" | ||||
|         if use_nvidia_pypi_libs: | ||||
|             print("Configuring build for PyPI NVIDIA libraries") | ||||
|             # Configure for dynamic linking (matching x86 logic) | ||||
|             build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 " | ||||
|         else: | ||||
|             print("Configuring build for bundled NVIDIA libraries") | ||||
|             # Keep existing static linking approach - already configured above | ||||
|  | ||||
|     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||||
|     desired_cuda = os.getenv("DESIRED_CUDA") | ||||
|     if override_package_version is not None: | ||||
|  | ||||
| @ -214,7 +214,8 @@ case "$tag" in | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     # TODO (huydhn): Upgrade this to Python >= 3.10 | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|  | ||||
| @ -56,13 +56,9 @@ ENV INSTALLED_VISION ${VISION} | ||||
|  | ||||
| # Install rocm | ||||
| ARG ROCM_VERSION | ||||
| RUN mkdir ci_commit_pins | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt | ||||
| COPY ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh | ||||
| RUN rm install_rocm.sh common_utils.sh | ||||
| RUN rm -r ci_commit_pins | ||||
| RUN rm install_rocm.sh | ||||
| COPY ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} | ||||
| RUN rm install_rocm_magma.sh | ||||
|  | ||||
| @ -1 +0,0 @@ | ||||
| 7fe50dc3da2069d6645d9deb8c017a876472a977 | ||||
| @ -1 +1 @@ | ||||
| 1b0418a9a454b2b93ab8d71f40e59d2297157fae | ||||
| d0e80f39c562c70986fc548fa6e5852ad86e16e7 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 5ae38bdb0dc066c5823e34dc9797afb9de42c866 | ||||
| f7888497a1eb9e98d4c07537f0d0bcfe180d1363 | ||||
|  | ||||
| @ -147,7 +147,7 @@ function install_128 { | ||||
| } | ||||
|  | ||||
| function install_130 { | ||||
|   CUDNN_VERSION=9.13.0.50 | ||||
|   CUDNN_VERSION=9.12.0.46 | ||||
|   echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" | ||||
|   # install CUDA 13.0 in the same container | ||||
|   install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux | ||||
|  | ||||
| @ -2,11 +2,6 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| # for pip_install function | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)" | ||||
|  | ||||
| ver() { | ||||
|     printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' '); | ||||
| } | ||||
| @ -118,8 +113,6 @@ EOF | ||||
|         rm -rf HIP clr | ||||
|     fi | ||||
|  | ||||
|     pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION" | ||||
|  | ||||
|     # Cleanup | ||||
|     apt-get autoclean && apt-get clean | ||||
|     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||||
| @ -183,8 +176,6 @@ install_centos() { | ||||
|       sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;" | ||||
|   done | ||||
|  | ||||
|   pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION" | ||||
|  | ||||
|   # Cleanup | ||||
|   yum clean all | ||||
|   rm -rf /var/cache/yum | ||||
|  | ||||
| @ -74,14 +74,6 @@ RUN bash ./install_cuda.sh 13.0 | ||||
| RUN bash ./install_magma.sh 13.0 | ||||
| RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda | ||||
|  | ||||
| # Install libibverbs for libtorch and copy to CUDA directory | ||||
| RUN apt-get update -y && \ | ||||
|     apt-get install -y libibverbs-dev librdmacm-dev && \ | ||||
|     cp /usr/lib/x86_64-linux-gnu/libmlx5.so* /usr/local/cuda/lib64/ && \ | ||||
|     cp /usr/lib/x86_64-linux-gnu/librdmacm.so* /usr/local/cuda/lib64/ && \ | ||||
|     cp /usr/lib/x86_64-linux-gnu/libibverbs.so* /usr/local/cuda/lib64/ && \ | ||||
|     cp /usr/lib/x86_64-linux-gnu/libnl* /usr/local/cuda/lib64/ | ||||
|  | ||||
| FROM cpu as rocm | ||||
| ARG ROCM_VERSION | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 3.5.0 | ||||
| 3.4.0 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 3.5.0 | ||||
| 3.4.0 | ||||
|  | ||||
| @ -52,13 +52,9 @@ ENV INSTALLED_VISION ${VISION} | ||||
|  | ||||
| # Install rocm | ||||
| ARG ROCM_VERSION | ||||
| RUN mkdir ci_commit_pins | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt | ||||
| COPY ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh | ||||
| RUN rm install_rocm.sh common_utils.sh | ||||
| RUN rm -r ci_commit_pins | ||||
| RUN rm install_rocm.sh | ||||
| COPY ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} | ||||
| RUN rm install_rocm_magma.sh | ||||
|  | ||||
| @ -7,4 +7,4 @@ set -ex | ||||
|  | ||||
| SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||||
|  | ||||
| USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh | ||||
| USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh | ||||
|  | ||||
| @ -76,6 +76,7 @@ def sample_vllm_test_library(): | ||||
|                 ), | ||||
|                 "pytest -v -s entrypoints/llm/test_lazy_outlines.py", | ||||
|                 "pytest -v -s entrypoints/llm/test_generate.py ", | ||||
|                 "pytest -v -s entrypoints/llm/test_generate_multiple_loras.py", | ||||
|                 "VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode", | ||||
|             ], | ||||
|         }, | ||||
| @ -96,24 +97,14 @@ def sample_vllm_test_library(): | ||||
|             "num_gpus": 4, | ||||
|             "steps": [ | ||||
|                 "pytest -v -s -x lora/test_chatglm3_tp.py", | ||||
|                 "echo $VLLM_WORKER_MULTIPROC_METHOD", | ||||
|                 "pytest -v -s -x lora/test_llama_tp.py", | ||||
|                 "pytest -v -s -x lora/test_llm_with_multi_loras.py", | ||||
|                 "pytest -v -s -x lora/test_multi_loras_with_tp.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_distributed_test_28_failure_test": { | ||||
|             "title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure", | ||||
|             "id": "vllm_distributed_test_28_failure_test", | ||||
|             "env_vars": { | ||||
|                 "VLLM_WORKER_MULTIPROC_METHOD": "spawn", | ||||
|             }, | ||||
|             "num_gpus": 4, | ||||
|             "steps": [ | ||||
|                 "pytest -v -s distributed/test_sequence_parallel.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_lora_28_failure_test": { | ||||
|             "title": "LoRA pytorch 2.8 failure test", | ||||
|             "id": "vllm_lora_28_failure_test", | ||||
|         "vllm_lora_280_failure_test": { | ||||
|             "title": "LoRA 280 failure test", | ||||
|             "id": "vllm_lora_280_failure_test", | ||||
|             "steps": ["pytest -v lora/test_quant_model.py"], | ||||
|         }, | ||||
|         "vllm_multi_model_processor_test": { | ||||
| @ -124,15 +115,6 @@ def sample_vllm_test_library(): | ||||
|                 "pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_multi_model_test_28_failure_test": { | ||||
|             "title": "Multi-Model Test (Failed 2.8 release)", | ||||
|             "id": "vllm_multi_model_test_28_failure_test", | ||||
|             "package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"], | ||||
|             "steps": [ | ||||
|                 "pytest -v -s models/multimodal/generation/test_voxtral.py", | ||||
|                 "pytest -v -s models/multimodal/pooling", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_pytorch_compilation_unit_tests": { | ||||
|             "title": "PyTorch Compilation Unit Tests", | ||||
|             "id": "vllm_pytorch_compilation_unit_tests", | ||||
| @ -147,28 +129,6 @@ def sample_vllm_test_library(): | ||||
|                 "pytest -v -s compile/test_decorator.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_languagde_model_test_extended_generation_28_failure_test": { | ||||
|             "title": "Language Models Test (Extended Generation) 2.8 release failure", | ||||
|             "id": "vllm_languagde_model_test_extended_generation_28_failure_test", | ||||
|             "package_install": [ | ||||
|                 "--no-build-isolation", | ||||
|                 "git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8", | ||||
|             ], | ||||
|             "steps": [ | ||||
|                 "pytest -v -s models/language/generation/test_mistral.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_distributed_test_2_gpu_28_failure_test": { | ||||
|             "title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure", | ||||
|             "id": "vllm_distributed_test_2_gpu_28_failure_test", | ||||
|             "env_vars": { | ||||
|                 "VLLM_WORKER_MULTIPROC_METHOD": "spawn", | ||||
|             }, | ||||
|             "num_gpus": 4, | ||||
|             "steps": [ | ||||
|                 "pytest -v -s distributed/test_sequence_parallel.py", | ||||
|             ], | ||||
|         }, | ||||
|         # TODO(elainewy):need to add g6 with 4 gpus to run this test | ||||
|         "vllm_lora_test": { | ||||
|             "title": "LoRA Test %N", | ||||
|  | ||||
| @ -66,11 +66,6 @@ class VllmBuildParameters: | ||||
|         "DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile.tmp_vllm" | ||||
|     ) | ||||
|  | ||||
|     # the cleaning script to remove torch dependencies from pip | ||||
|     cleaning_script: Path = env_path_field( | ||||
|         "cleaning_script", ".github/ci_configs/vllm/use_existing_torch.py" | ||||
|     ) | ||||
|  | ||||
|     # OUTPUT_DIR: where docker buildx (local exporter) will write artifacts | ||||
|     output_dir: Path = env_path_field("OUTPUT_DIR", "external/vllm") | ||||
|  | ||||
| @ -165,7 +160,6 @@ class VllmBuildRunner(BaseRunner): | ||||
|         logger.info("Running vllm build with inputs: %s", inputs) | ||||
|         vllm_commit = clone_vllm() | ||||
|  | ||||
|         self.cp_torch_cleaning_script(inputs) | ||||
|         self.cp_dockerfile_if_exist(inputs) | ||||
|         # cp torch wheels from root direct to vllm workspace if exist | ||||
|         self.cp_torch_whls_if_exist(inputs) | ||||
| @ -211,11 +205,6 @@ class VllmBuildRunner(BaseRunner): | ||||
|         copy(inputs.torch_whls_path, tmp_dir) | ||||
|         return tmp_dir | ||||
|  | ||||
|     def cp_torch_cleaning_script(self, inputs: VllmBuildParameters): | ||||
|         script = get_path(inputs.cleaning_script, resolve=True) | ||||
|         vllm_script = Path(f"./{self.work_directory}/use_existing_torch.py") | ||||
|         copy(script, vllm_script) | ||||
|  | ||||
|     def cp_dockerfile_if_exist(self, inputs: VllmBuildParameters): | ||||
|         if not inputs.use_local_dockerfile: | ||||
|             logger.info("using vllm default dockerfile.torch_nightly for build") | ||||
|  | ||||
| @ -11,7 +11,7 @@ from typing import Any | ||||
|  | ||||
| from cli.lib.common.cli_helper import BaseRunner | ||||
| from cli.lib.common.envs_helper import env_path_field, env_str_field, get_env | ||||
| from cli.lib.common.path_helper import copy, get_path, remove_dir | ||||
| from cli.lib.common.path_helper import copy, remove_dir | ||||
| from cli.lib.common.pip_helper import ( | ||||
|     pip_install_first_match, | ||||
|     pip_install_packages, | ||||
| @ -43,10 +43,6 @@ class VllmTestParameters: | ||||
|  | ||||
|     torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9") | ||||
|  | ||||
|     cleaning_script: Path = env_path_field( | ||||
|         "cleaning_script", ".github/ci_configs/vllm/use_existing_torch.py" | ||||
|     ) | ||||
|  | ||||
|     def __post_init__(self): | ||||
|         if not self.torch_whls_path.exists(): | ||||
|             raise ValueError("missing torch_whls_path") | ||||
| @ -96,13 +92,11 @@ class VllmTestRunner(BaseRunner): | ||||
|         self._set_envs(params) | ||||
|  | ||||
|         clone_vllm(dst=self.work_directory) | ||||
|         self.cp_torch_cleaning_script(params) | ||||
|         with working_directory(self.work_directory): | ||||
|             remove_dir(Path("vllm")) | ||||
|             self._install_wheels(params) | ||||
|             self._install_dependencies() | ||||
|         # verify the torches are not overridden by test dependencies | ||||
|  | ||||
|         check_versions() | ||||
|  | ||||
|     def run(self): | ||||
| @ -110,31 +104,20 @@ class VllmTestRunner(BaseRunner): | ||||
|         main function to run vllm test | ||||
|         """ | ||||
|         self.prepare() | ||||
|         try: | ||||
|             with working_directory(self.work_directory): | ||||
|                 if self.test_type == TestInpuType.TEST_PLAN: | ||||
|                     if self.num_shards > 1: | ||||
|                         run_test_plan( | ||||
|                             self.test_plan, | ||||
|                             "vllm", | ||||
|                             sample_vllm_test_library(), | ||||
|                             self.shard_id, | ||||
|                             self.num_shards, | ||||
|                         ) | ||||
|                     else: | ||||
|                         run_test_plan( | ||||
|                             self.test_plan, "vllm", sample_vllm_test_library() | ||||
|                         ) | ||||
|         with working_directory(self.work_directory): | ||||
|             if self.test_type == TestInpuType.TEST_PLAN: | ||||
|                 if self.num_shards > 1: | ||||
|                     run_test_plan( | ||||
|                         self.test_plan, | ||||
|                         "vllm", | ||||
|                         sample_vllm_test_library(), | ||||
|                         self.shard_id, | ||||
|                         self.num_shards, | ||||
|                     ) | ||||
|                 else: | ||||
|                     raise ValueError(f"Unknown test type {self.test_type}") | ||||
|         finally: | ||||
|             # double check the torches are not overridden by other packages | ||||
|             check_versions() | ||||
|  | ||||
|     def cp_torch_cleaning_script(self, params: VllmTestParameters): | ||||
|         script = get_path(params.cleaning_script, resolve=True) | ||||
|         vllm_script = Path(f"./{self.work_directory}/use_existing_torch.py") | ||||
|         copy(script, vllm_script) | ||||
|                     run_test_plan(self.test_plan, "vllm", sample_vllm_test_library()) | ||||
|             else: | ||||
|                 raise ValueError(f"Unknown test type {self.test_type}") | ||||
|  | ||||
|     def _install_wheels(self, params: VllmTestParameters): | ||||
|         logger.info("Running vllm test with inputs: %s", params) | ||||
|  | ||||
| @ -124,7 +124,6 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
|     fi | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling with cudnn and cublas." | ||||
|  | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcudnn_adv.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_cnn.so.9" | ||||
| @ -134,11 +133,16 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_heuristic.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn.so.9" | ||||
|             "/usr/local/cuda/lib64/libcublas.so.12" | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.12" | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0" | ||||
|             "/usr/local/cuda/lib64/libcudart.so.12" | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.12" | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so" | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0" | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1" | ||||
|             "/usr/local/cuda/lib64/libnvshmem_host.so.3" | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12" | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so" | ||||
|         ) | ||||
|         DEPS_SONAME+=( | ||||
| @ -150,56 +154,22 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
|             "libcudnn_engines_precompiled.so.9" | ||||
|             "libcudnn_heuristic.so.9" | ||||
|             "libcudnn.so.9" | ||||
|             "libcublas.so.12" | ||||
|             "libcublasLt.so.12" | ||||
|             "libcusparseLt.so.0" | ||||
|             "libcudart.so.12" | ||||
|             "libnvrtc.so.12" | ||||
|             "libnvrtc-builtins.so" | ||||
|             "libnvshmem_host.so.3" | ||||
|             "libcufile.so.0" | ||||
|             "libcufile_rdma.so.1" | ||||
|             "libcupti.so.12" | ||||
|             "libnvperf_host.so" | ||||
|         ) | ||||
|         # Add libnvToolsExt only if CUDA version is not 12.9 | ||||
|         if [[ $CUDA_VERSION == 13* ]]; then | ||||
|             DEPS_LIST+=( | ||||
|                 "/usr/local/cuda/lib64/libcublas.so.13" | ||||
|                 "/usr/local/cuda/lib64/libcublasLt.so.13" | ||||
|                 "/usr/local/cuda/lib64/libcudart.so.13" | ||||
|                 "/usr/local/cuda/lib64/libnvrtc.so.13" | ||||
|                 "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13" | ||||
|                 "/usr/local/cuda/lib64/libibverbs.so.1" | ||||
|                 "/usr/local/cuda/lib64/librdmacm.so.1" | ||||
|                 "/usr/local/cuda/lib64/libmlx5.so.1" | ||||
|                 "/usr/local/cuda/lib64/libnl-3.so.200" | ||||
|                 "/usr/local/cuda/lib64/libnl-route-3.so.200") | ||||
|             DEPS_SONAME+=( | ||||
|                 "libcublas.so.13" | ||||
|                 "libcublasLt.so.13" | ||||
|                 "libcudart.so.13" | ||||
|                 "libnvrtc.so.13" | ||||
|                 "libcupti.so.13" | ||||
|                 "libibverbs.so.1" | ||||
|                 "librdmacm.so.1" | ||||
|                 "libmlx5.so.1" | ||||
|                 "libnl-3.so.200" | ||||
|                 "libnl-route-3.so.200") | ||||
|             export USE_CUPTI_SO=1 | ||||
|             export ATEN_STATIC_CUDA=0 | ||||
|             export USE_CUDA_STATIC_LINK=0 | ||||
|             export USE_CUFILE=0 | ||||
|         else | ||||
|             DEPS_LIST+=( | ||||
|                 "/usr/local/cuda/lib64/libnvToolsExt.so.1" | ||||
|                 "/usr/local/cuda/lib64/libcublas.so.12" | ||||
|                 "/usr/local/cuda/lib64/libcublasLt.so.12" | ||||
|                 "/usr/local/cuda/lib64/libcudart.so.12" | ||||
|                 "/usr/local/cuda/lib64/libnvrtc.so.12" | ||||
|                 "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12") | ||||
|             DEPS_SONAME+=( | ||||
|                 "libnvToolsExt.so.1" | ||||
|                 "libcublas.so.12" | ||||
|                 "libcublasLt.so.12" | ||||
|                 "libcudart.so.12" | ||||
|                 "libnvrtc.so.12" | ||||
|                 "libcupti.so.12") | ||||
|         if [[ $CUDA_VERSION != 12.9* ]]; then | ||||
|             DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1") | ||||
|             DEPS_SONAME+=("libnvToolsExt.so.1") | ||||
|         fi | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|  | ||||
| @ -258,19 +258,11 @@ function install_torchrec_and_fbgemm() { | ||||
|       git clone --recursive https://github.com/pytorch/fbgemm | ||||
|       pushd fbgemm/fbgemm_gpu | ||||
|       git checkout "${fbgemm_commit}" --recurse-submodules | ||||
|       # until the fbgemm_commit includes the tbb patch | ||||
|       patch <<'EOF' | ||||
| --- a/FbgemmGpu.cmake | ||||
| +++ b/FbgemmGpu.cmake | ||||
| @@ -184,5 +184,6 @@ gpu_cpp_library( | ||||
|      fbgemm_gpu_tbe_cache | ||||
|      fbgemm_gpu_tbe_optimizers | ||||
|      fbgemm_gpu_tbe_utils | ||||
| +    tbb | ||||
|    DESTINATION | ||||
|      fbgemm_gpu) | ||||
| EOF | ||||
|       python setup.py bdist_wheel --build-variant=rocm | ||||
|       python setup.py bdist_wheel \ | ||||
|         --build-variant=rocm \ | ||||
|         -DHIP_ROOT_DIR="${ROCM_PATH}" \ | ||||
|         -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ | ||||
|         -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" | ||||
|       popd | ||||
|  | ||||
|       # Save the wheel before cleaning up | ||||
|  | ||||
| @ -199,7 +199,7 @@ torchbench_setup_macos() { | ||||
|   git checkout "$(cat ../.github/ci_commit_pins/vision.txt)" | ||||
|   git submodule update --init --recursive | ||||
|   python setup.py clean | ||||
|   python -m pip install -e . -v --no-build-isolation | ||||
|   python setup.py develop | ||||
|   popd | ||||
|  | ||||
|   pushd torchaudio | ||||
| @ -208,7 +208,7 @@ torchbench_setup_macos() { | ||||
|   git submodule update --init --recursive | ||||
|   python setup.py clean | ||||
|   #TODO: Remove me, when figure out how to make TorchAudio find brew installed openmp | ||||
|   USE_OPENMP=0 python -m pip install -e . -v --no-build-isolation | ||||
|   USE_OPENMP=0 python setup.py develop | ||||
|   popd | ||||
|  | ||||
|   checkout_install_torchbench | ||||
|  | ||||
| @ -386,8 +386,8 @@ def smoke_test_compile(device: str = "cpu") -> None: | ||||
|  | ||||
|  | ||||
| def smoke_test_nvshmem() -> None: | ||||
|     if not torch.cuda.is_available() or target_os == "windows": | ||||
|         print("Windows platform or CUDA is not available, skipping NVSHMEM test") | ||||
|     if not torch.cuda.is_available(): | ||||
|         print("CUDA is not available, skipping NVSHMEM test") | ||||
|         return | ||||
|  | ||||
|     # Check if NVSHMEM is compiled in current build | ||||
| @ -396,9 +396,7 @@ def smoke_test_nvshmem() -> None: | ||||
|     except ImportError: | ||||
|         # Not built with NVSHMEM support. | ||||
|         # torch is not compiled with NVSHMEM prior to 2.9 | ||||
|         from torch.torch_version import TorchVersion | ||||
|  | ||||
|         if TorchVersion(torch.__version__) < (2, 9): | ||||
|         if torch.__version__ < "2.9": | ||||
|             return | ||||
|         else: | ||||
|             # After 2.9: NVSHMEM is expected to be compiled in current build | ||||
|  | ||||
| @ -1721,6 +1721,11 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then | ||||
| elif [[ "${TEST_CONFIG}" == *inductor* ]]; then | ||||
|   install_torchvision | ||||
|   test_inductor_shard "${SHARD_NUMBER}" | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|     if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then | ||||
|       test_inductor_distributed | ||||
|     fi | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *einops* ]]; then | ||||
|   test_einops | ||||
| elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then | ||||
|  | ||||
| @ -1,20 +1,12 @@ | ||||
|  | ||||
| if %CUDA_VERSION% geq 130 ( | ||||
|     set "dll_path=bin\x64" | ||||
| ) else ( | ||||
|     set "dll_path=bin" | ||||
| ) | ||||
|  | ||||
| copy "%CUDA_PATH%\%dll_path%\cusparse*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\cublas*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\cudart*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\curand*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\cufft*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\cusolver*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\nvrtc*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\%dll_path%\nvJitLink_*.dll*"  pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\cusparse*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\cublas*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\cudart*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\curand*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\cufft*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib | ||||
|  | ||||
| copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib | ||||
| copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib | ||||
|  | ||||
| @ -28,3 +20,8 @@ copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib | ||||
| if exist "C:\Windows\System32\zlibwapi.dll" ( | ||||
|     copy "C:\Windows\System32\zlibwapi.dll"  pytorch\torch\lib | ||||
| ) | ||||
|  | ||||
| ::copy nvJitLink dll is requires for cuda 12+ | ||||
| if exist "%CUDA_PATH%\bin\nvJitLink_*.dll*" ( | ||||
|     copy "%CUDA_PATH%\bin\nvJitLink_*.dll*"  pytorch\torch\lib | ||||
| ) | ||||
|  | ||||
| @ -1,9 +1,9 @@ | ||||
| set WIN_DRIVER_VN=580.88 | ||||
| set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" & REM @lint-ignore | ||||
| curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe | ||||
| set WIN_DRIVER_VN=528.89 | ||||
| set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe" & REM @lint-ignore | ||||
| curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe -s -noreboot | ||||
| start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| del %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe || ver > NUL | ||||
| del %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL | ||||
|  | ||||
| @ -85,7 +85,7 @@ mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
| # Create an isolated directory to store this builds pytorch checkout and conda | ||||
| # installation | ||||
| if [[ -z "$MAC_PACKAGE_WORK_DIR" ]]; then | ||||
|     MAC_PACKAGE_WORK_DIR="$(pwd)/tmp_wheel_${DESIRED_PYTHON}_$(date +%H%M%S)" | ||||
|     MAC_PACKAGE_WORK_DIR="$(pwd)/tmp_wheel_conda_${DESIRED_PYTHON}_$(date +%H%M%S)" | ||||
| fi | ||||
| mkdir -p "$MAC_PACKAGE_WORK_DIR" || true | ||||
| if [[ -n ${GITHUB_ACTIONS} ]]; then | ||||
| @ -96,11 +96,11 @@ fi | ||||
| whl_tmp_dir="${MAC_PACKAGE_WORK_DIR}/dist" | ||||
| mkdir -p "$whl_tmp_dir" | ||||
|  | ||||
| mac_version='macosx-11_0-arm64' | ||||
| mac_version='macosx_11_0_arm64' | ||||
| libtorch_arch='arm64' | ||||
|  | ||||
| # Create a consistent wheel package name to rename the wheel to | ||||
| wheel_filename_new="${TORCH_PACKAGE_NAME}-${build_version}${build_number_prefix}-cp${python_nodot}-none-${mac_version//[-,]/_}.whl" | ||||
| wheel_filename_new="${TORCH_PACKAGE_NAME}-${build_version}${build_number_prefix}-cp${python_nodot}-none-${mac_version}.whl" | ||||
|  | ||||
| ########################################################### | ||||
|  | ||||
| @ -124,57 +124,93 @@ popd | ||||
|  | ||||
| export TH_BINARY_BUILD=1 | ||||
| export INSTALL_TEST=0 # dont install test binaries into site-packages | ||||
| export MACOSX_DEPLOYMENT_TARGET=11.0 | ||||
| export MACOSX_DEPLOYMENT_TARGET=10.15 | ||||
| export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | ||||
|  | ||||
| SETUPTOOLS_PINNED_VERSION="==70.1.0" | ||||
| PYYAML_PINNED_VERSION="==5.3" | ||||
| EXTRA_CONDA_INSTALL_FLAGS="" | ||||
| CONDA_ENV_CREATE_FLAGS="" | ||||
| RENAME_WHEEL=true | ||||
| case $desired_python in | ||||
|     3.14t) | ||||
|         echo "Using 3.14 deps" | ||||
|         mac_version='macosx-11.0-arm64' | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|         desired_python="3.14.0rc1" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.14) | ||||
|         echo "Using 3.14t deps" | ||||
|         mac_version='macosx-11.0-arm64' | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|         desired_python="3.14.0rc1" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13t) | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|         desired_python="3.13" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13) | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         ;; | ||||
|     3.12) | ||||
|         echo "Using 3.12 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         ;; | ||||
|     3.11) | ||||
|         echo "Using 3.11 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         ;; | ||||
|     3.10) | ||||
|         echo "Using 3.10 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         ;; | ||||
|     3.9) | ||||
|         echo "Using 3.9 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Unsupported version $desired_python" | ||||
|         exit 1 | ||||
|         echo "Using default deps" | ||||
|         NUMPY_PINNED_VERSION="==1.11.3" | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| # Install into a fresh env | ||||
| tmp_env_name="wheel_py$python_nodot" | ||||
| conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} | ||||
| source activate "$tmp_env_name" | ||||
|  | ||||
| PINNED_PACKAGES=( | ||||
|     "setuptools${SETUPTOOLS_PINNED_VERSION}" | ||||
|     "pyyaml${PYYAML_PINNED_VERSION}" | ||||
|     "numpy${NUMPY_PINNED_VERSION}" | ||||
| ) | ||||
| python -mvenv ~/${desired_python}-build | ||||
| source ~/${desired_python}-build/bin/activate | ||||
| retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" | ||||
| retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements-build.txt" | ||||
| pip install requests ninja typing-extensions | ||||
| retry pip install -r "${pytorch_rootdir}/requirements.txt" || true | ||||
| retry brew install libomp | ||||
|  | ||||
| # For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which | ||||
| @ -188,7 +224,7 @@ export BUILD_TEST=OFF | ||||
| pushd "$pytorch_rootdir" | ||||
| echo "Calling setup.py bdist_wheel at $(date)" | ||||
|  | ||||
| _PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name "${mac_version//[-.]/_}" | ||||
| python setup.py bdist_wheel -d "$whl_tmp_dir" | ||||
|  | ||||
| echo "Finished setup.py bdist_wheel at $(date)" | ||||
|  | ||||
|  | ||||
							
								
								
									
										2
									
								
								.flake8
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								.flake8
									
									
									
									
									
								
							| @ -73,7 +73,7 @@ exclude = | ||||
|     ./docs/src, | ||||
|     ./functorch/docs, | ||||
|     ./functorch/examples, | ||||
|     ./functorch/docs/source/tutorials, | ||||
|     ./functorch/notebooks, | ||||
|     ./scripts, | ||||
|     ./test/generated_type_hints_smoketest.py, | ||||
|     ./third_party, | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							| @ -12,16 +12,13 @@ self-hosted-runner: | ||||
|     - linux.9xlarge.ephemeral | ||||
|     - am2.linux.9xlarge.ephemeral | ||||
|     - linux.12xlarge | ||||
|     - linux.12xlarge.memory | ||||
|     - linux.24xlarge | ||||
|     - linux.24xlarge.memory | ||||
|     - linux.24xlarge.ephemeral | ||||
|     - linux.24xlarge.amd | ||||
|     - linux.arm64.2xlarge | ||||
|     - linux.arm64.2xlarge.ephemeral | ||||
|     - linux.arm64.m7g.4xlarge | ||||
|     - linux.arm64.m7g.4xlarge.ephemeral | ||||
|     - linux.arm64.r7g.12xlarge.memory | ||||
|     - linux.4xlarge.nvidia.gpu | ||||
|     - linux.8xlarge.nvidia.gpu | ||||
|     - linux.16xlarge.nvidia.gpu | ||||
|  | ||||
| @ -4,11 +4,6 @@ name: Build External packages | ||||
| description: build external packages for PyTorch | ||||
|  | ||||
| inputs: | ||||
|   cuda-version: | ||||
|     description: CUDA version to use | ||||
|     type: string | ||||
|     required: true | ||||
|     default: '12.8.1' | ||||
|   cuda-arch-list: | ||||
|     description: TORCH_CUDA_ARCH_LIST (e.g., "8.0;8.9;9.0") | ||||
|     type: string | ||||
| @ -49,12 +44,11 @@ runs: | ||||
|       env: | ||||
|         SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | ||||
|         SCCACHE_REGION: us-east-1 | ||||
|         CUDA_VERSION: ${{ inputs.cuda-version }} | ||||
|         TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }} | ||||
|         BASE_IMAGE: ${{ inputs.docker-image }} | ||||
|         BUILD_TARGETS: ${{ inputs.build-targets }} | ||||
|         PARENT_OUTPUT_DIR: ${{ inputs.output-dir }} | ||||
|         TORCH_WHEELS_PATH: ${{ inputs.torch-wheel-dir }} | ||||
|         PARENT_OUTPUT_DIR: ${{ inputs.output-dir}} | ||||
|  | ||||
|       shell: bash | ||||
|       run: | | ||||
|         set -euo pipefail | ||||
| @ -75,6 +69,7 @@ runs: | ||||
|           export OUTPUT_DIR | ||||
|           echo "Building external package: $target in directory $OUTPUT_DIR" | ||||
|           python3 -m cli.run build external "$target" | ||||
|  | ||||
|         done | ||||
|  | ||||
|         END_TIME=$(date +%s) | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 87ff22e49ed0e92576c4935ccb8c143daac4a3cd | ||||
| 0757bbb660855272f7dd8d31cc84e7c631522805 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/fbgemm_rocm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/fbgemm_rocm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 08ae0af1395c8d8471f4025deb6af9aef90b342f | ||||
| 7f1de94a4c2d14f59ad4ca84538c36084ea6b2c8 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 973c9d01da863cac9c51e8a5c0d390fc84b84fbc | ||||
| 862f2ef893d9751db0a92bd2d4ae0e3d9677872f | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| c77852e117bdf056c8e9a087e51d6f65cf6ba53d | ||||
| 763e5b78d4fcd74a9e812256656c075f99d9a781 | ||||
|  | ||||
							
								
								
									
										211
									
								
								.github/ci_configs/vllm/Dockerfile.tmp_vllm
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										211
									
								
								.github/ci_configs/vllm/Dockerfile.tmp_vllm
									
									
									
									
										vendored
									
									
								
							| @ -12,46 +12,54 @@ ARG BUILD_BASE_IMAGE=torch-nightly-base | ||||
| # by default, it uses devel-ubuntu22.04 official image. | ||||
| ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 | ||||
|  | ||||
| # The logic is copied from https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile | ||||
| ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py" | ||||
|  | ||||
|  | ||||
| #################### TORCH NIGHTLY BASE IMAGE #################### | ||||
| #################### TORCH NIGHTLY  BASE IMAGE #################### | ||||
| # A base image for building vLLM with devel ubuntu 22.04, this is mainly used to build vllm in vllm builtkite ci | ||||
| FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base | ||||
| From nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base | ||||
| ARG CUDA_VERSION=12.8.1 | ||||
| ARG PYTHON_VERSION=3.12 | ||||
| ARG TARGETPLATFORM | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| ARG CUDA_VERSION | ||||
| ARG PYTHON_VERSION | ||||
| ARG GET_PIP_URL | ||||
| RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ | ||||
|     echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment | ||||
|  | ||||
| # Install Python and other dependencies | ||||
| RUN apt-get update -y \ | ||||
|     && apt-get install -y ccache software-properties-common git curl wget sudo vim \ | ||||
|     && add-apt-repository -y ppa:deadsnakes/ppa \ | ||||
|     && apt-get update -y \ | ||||
|     && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ | ||||
|     && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ | ||||
|     && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ | ||||
|     && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ | ||||
|     && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ | ||||
|     && python3 --version && python3 -m pip --version | ||||
| # Install Python and other dependencies if it does not existed | ||||
| RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \ | ||||
|       echo "Installing Python ${PYTHON_VERSION}..." && \ | ||||
|       echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \ | ||||
|       echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y ccache software-properties-common git curl sudo && \ | ||||
|       for i in 1 2 3; do \ | ||||
|         add-apt-repository -y ppa:deadsnakes/ppa && break || \ | ||||
|         { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ | ||||
|       done && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \ | ||||
|       update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ | ||||
|       update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \ | ||||
|       ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \ | ||||
|       curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \ | ||||
|    else \ | ||||
|       echo "Python ${PYTHON_VERSION} already present, skipping setup."; \ | ||||
|    fi \ | ||||
|    && python3 --version && python3 -m pip --version | ||||
|  | ||||
| # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 | ||||
| # as it was causing spam when compiling the CUTLASS kernels | ||||
| # Ensure gcc >= 10 to avoid CUTLASS issues (bug 92519) | ||||
| RUN current_gcc_version=$(gcc -dumpversion | cut -f1 -d.) && \ | ||||
|     if command -v apt-get >/dev/null; then \ | ||||
|         if [ "$current_gcc_version" -lt 10 ]; then \ | ||||
|             echo "GCC version is $current_gcc_version, installing gcc-10..."; \ | ||||
|             apt-get update \ | ||||
|             && apt-get install -y gcc-10 g++-10 \ | ||||
|             && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 \ | ||||
|             && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \ | ||||
|         else \ | ||||
|             echo "GCC version is $current_gcc_version, no need to install gcc-10."; \ | ||||
|         fi \ | ||||
|     fi \ | ||||
|     && gcc --version && g++ --version | ||||
|     if [ "$current_gcc_version" -lt 10 ]; then \ | ||||
|       echo "GCC version is $current_gcc_version, installing gcc-10..."; \ | ||||
|       apt-get update && \ | ||||
|       apt-get install -y gcc-10 g++-10 && \ | ||||
|       update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 && \ | ||||
|       update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \ | ||||
|     else \ | ||||
|       echo "GCC version is $current_gcc_version, no need to install gcc-10."; \ | ||||
|     fi && \ | ||||
|     gcc --version && g++ --version | ||||
|  | ||||
| # install uv for faster pip installs | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
| @ -71,20 +79,11 @@ ENV UV_LINK_MODE=copy | ||||
| FROM ${BUILD_BASE_IMAGE} AS base | ||||
| USER root | ||||
|  | ||||
| ARG CUDA_VERSION | ||||
| ARG PYTHON_VERSION | ||||
|  | ||||
| # TODO (huydhn): Only work with PyTorch manylinux builder | ||||
| ENV PATH="/opt/python/cp312-cp312/bin:${PATH}" | ||||
|  | ||||
| # Install some system dependencies and double check python version | ||||
| RUN if command -v apt-get >/dev/null; then \ | ||||
|         apt-get update -y \ | ||||
|         && apt-get install -y ccache software-properties-common git curl wget sudo vim; \ | ||||
|     else \ | ||||
|         dnf install -y git curl wget sudo; \ | ||||
|     fi \ | ||||
|     && python3 --version && python3 -m pip --version | ||||
| # Workaround for https://github.com/openai/triton/issues/2507 and | ||||
| # https://github.com/pytorch/pytorch/issues/107960 -- hopefully | ||||
| # this won't be needed for future versions of this docker image | ||||
| # or future versions of triton. | ||||
| RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ | ||||
|  | ||||
| # Install uv for faster pip installs if not existed | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
| @ -119,15 +118,17 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \ | ||||
|     if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \ | ||||
|         echo "[INFO] Installing torch wheels to build vllm"; \ | ||||
|         torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \ | ||||
|         vision_whl=$(find /dist -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]" "${vision_whl}" "${audio_whl}" /dist/*.whl; \ | ||||
|         vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]"; \ | ||||
|         uv pip install --system "${vision_whl}"; \ | ||||
|         uv pip install --system "${audio_whl}"; \ | ||||
|     elif [ -n "$PINNED_TORCH_VERSION" ]; then \ | ||||
|         echo "[INFO] Installing pinned torch nightly version to build vllm: $PINNED_TORCH_VERSION"; \ | ||||
|         uv pip install --system "$PINNED_TORCH_VERSION" --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ | ||||
|         uv pip install --system "$PINNED_TORCH_VERSION" --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     else \ | ||||
|         echo "[INFO] Installing torch nightly with latest one to build vllm"; \ | ||||
|         uv pip install --system torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ | ||||
|         uv pip install --system torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     fi | ||||
|  | ||||
| # Install numba 0.61.2 for cuda environment | ||||
| @ -136,11 +137,12 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|  | ||||
| # Install common dependencies from vllm common.txt | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -r requirements/common.txt | ||||
| uv pip install --system -r requirements/common.txt | ||||
|  | ||||
|  | ||||
| # Must put before installing xformers, so it can install the correct version of xfomrers. | ||||
| ARG xformers_cuda_arch_list='7.5;8.0+PTX;9.0a' | ||||
| ENV TORCH_CUDA_ARCH_LIST=${xformers_cuda_arch_list} | ||||
| ARG exformer_cuda_arch_list='7.5;8.0+PTX;9.0a' | ||||
| ENV TORCH_CUDA_ARCH_LIST=${exformer_cuda_arch_list} | ||||
|  | ||||
| ARG max_jobs=16 | ||||
| ENV MAX_JOBS=${max_jobs} | ||||
| @ -151,8 +153,8 @@ RUN pip freeze | grep -E 'ninja' | ||||
|  | ||||
| # Build xformers with cuda and torch nightly/wheel | ||||
| # following official xformers guidance: https://github.com/facebookresearch/xformers#build | ||||
| # sha for https://github.com/facebookresearch/xformers/tree/v0.0.32.post2 | ||||
| ARG XFORMERS_COMMIT=5d4b92a5e5a9c6c6d4878283f47d82e17995b468 | ||||
| # sha for https://github.com/facebookresearch/xformers/tree/v0.0.31 | ||||
| ARG XFORMERS_COMMIT=eb0946a363464da96ea40afd1a7f72a907c25497 | ||||
| ENV CCACHE_DIR=/root/.cache/ccache | ||||
|  | ||||
| RUN --mount=type=cache,target=/root/.cache/ccache \ | ||||
| @ -186,6 +188,11 @@ RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio' | ||||
| FROM base AS build | ||||
| ARG TARGETPLATFORM | ||||
|  | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| COPY . . | ||||
|  | ||||
| RUN python3 use_existing_torch.py | ||||
| @ -214,16 +221,11 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0 | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     --mount=type=bind,source=.git,target=.git \ | ||||
|     if [ "$USE_SCCACHE" = "1" ]; then \ | ||||
|         echo "Installing sccache..."; \ | ||||
|         if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ | ||||
|             SCCACHE_ARCHIVE="sccache-v0.8.1-aarch64-unknown-linux-musl"; \ | ||||
|         else \ | ||||
|             SCCACHE_ARCHIVE="sccache-v0.8.1-x86_64-unknown-linux-musl"; \ | ||||
|         fi; \ | ||||
|         curl -L -o sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v0.8.1/${SCCACHE_ARCHIVE}.tar.gz" \ | ||||
|         echo "Installing sccache..." \ | ||||
|         && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ | ||||
|         && tar -xzf sccache.tar.gz \ | ||||
|         && sudo mv "${SCCACHE_ARCHIVE}"/sccache /usr/bin/sccache \ | ||||
|         && rm -rf sccache.tar.gz "${SCCACHE_ARCHIVE}" \ | ||||
|         && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ | ||||
|         && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ | ||||
|         && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ | ||||
|         && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ | ||||
|         && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ | ||||
| @ -249,9 +251,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ | ||||
|         python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38; \ | ||||
|     fi | ||||
|  | ||||
| RUN echo "[INFO] Listing current directory:" && \ | ||||
| RUN echo "[DEBUG] Listing  current directory:" && \ | ||||
|     ls -al && \ | ||||
|     echo "[INFO] Showing torch_build_versions.txt content:" && \ | ||||
|     echo "[DEBUG] Showing torch_build_versions.txt content:" && \ | ||||
|     cat torch_build_versions.txt | ||||
|  | ||||
| #################### WHEEL BUILD IMAGE #################### | ||||
| @ -261,42 +263,51 @@ RUN echo "[INFO] Listing current directory:" && \ | ||||
| # Setup clean environment for vLLM for test and api server using ubuntu22.04 with AOT flashinfer | ||||
| FROM ${FINAL_BASE_IMAGE} AS vllm-base | ||||
| USER root | ||||
|  | ||||
| ARG CUDA_VERSION | ||||
| ARG PYTHON_VERSION | ||||
| ARG GET_PIP_URL | ||||
|  | ||||
| # TODO (huydhn): Only work with PyTorch manylinux builder | ||||
| ENV PATH="/opt/python/cp312-cp312/bin:${PATH}" | ||||
|  | ||||
| # prepare for environment starts | ||||
| WORKDIR /workspace | ||||
|  | ||||
| # Install Python and other dependencies | ||||
| RUN if command -v apt-get >/dev/null; then \ | ||||
|         apt-get update -y \ | ||||
|         && apt-get install -y ccache software-properties-common git curl wget sudo vim \ | ||||
|         && add-apt-repository -y ppa:deadsnakes/ppa \ | ||||
|         && apt-get update -y \ | ||||
|         && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ | ||||
|         && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ | ||||
|         && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ | ||||
|         && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ | ||||
|         && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \ | ||||
|     else \ | ||||
|         dnf install -y git curl wget sudo; \ | ||||
|     fi \ | ||||
|     && python3 --version && python3 -m pip --version | ||||
| RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ | ||||
|     echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment | ||||
|  | ||||
| # Install Python and other dependencies if it does not existed | ||||
| RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \ | ||||
|       echo "Installing Python ${PYTHON_VERSION}..." && \ | ||||
|       echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \ | ||||
|       echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y ccache software-properties-common git curl sudo && \ | ||||
|       for i in 1 2 3; do \ | ||||
|         add-apt-repository -y ppa:deadsnakes/ppa && break || \ | ||||
|         { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ | ||||
|       done && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \ | ||||
|       update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ | ||||
|       update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \ | ||||
|       ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \ | ||||
|       curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \ | ||||
|    else \ | ||||
|       echo "Python ${PYTHON_VERSION} already present, skipping setup."; \ | ||||
|    fi \ | ||||
|    && python3 --version && python3 -m pip --version | ||||
|  | ||||
|  | ||||
| # Get the torch versions, and whls used in previous stagtes for consistency | ||||
| COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt | ||||
| COPY --from=base /workspace/xformers-dist /wheels/xformers | ||||
| COPY --from=build /workspace/vllm-dist /wheels/vllm | ||||
| RUN echo "[INFO] Listing current directory before torch install step:" && \ | ||||
| RUN echo "[DEBUG] Listing current directory before torch install step:" && \ | ||||
|     ls -al && \ | ||||
|     echo "[INFO] Showing torch_build_versions.txt content:" && \ | ||||
|     echo "[DEBUG] Showing torch_build_versions.txt content:" && \ | ||||
|     cat torch_build_versions.txt | ||||
|  | ||||
| # Workaround for https://github.com/openai/triton/issues/2507 and | ||||
| # https://github.com/pytorch/pytorch/issues/107960 -- hopefully | ||||
| # this won't be needed for future versions of this docker image | ||||
| # or future versions of triton. | ||||
| RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ | ||||
|  | ||||
|  | ||||
| # Install uv for faster pip installs if not existed | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if ! python3 -m uv --version > /dev/null 2>&1; then \ | ||||
| @ -316,13 +327,15 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \ | ||||
|     --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \ | ||||
|         torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \ | ||||
|         vision_whl=$(find /dist -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         echo "[INFO] Use wheels to build : '${torch_whl}' '${audio_whl}' '${vision_whl}'"; \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]" "${vision_whl}" "${audio_whl}" /dist/*.whl; \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]"; \ | ||||
|         uv pip install --system "${vision_whl}"; \ | ||||
|         uv pip install --system "${audio_whl}"; \ | ||||
|     else \ | ||||
|         echo "[INFO] Installing torch versions from torch_build_versions.txt"; \ | ||||
|         uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ | ||||
|         uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     fi | ||||
|  | ||||
| # Install the vllm wheel from previous stage | ||||
| @ -333,8 +346,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system /wheels/xformers/*.whl --verbose | ||||
|  | ||||
|  | ||||
| # Build flashinfer from source. | ||||
| ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0' | ||||
| ARG torch_cuda_arch_list='8.0;8.9;9.0a' | ||||
| # install package for build flashinfer | ||||
| # see issue: https://github.com/flashinfer-ai/flashinfer/issues/738 | ||||
|  | ||||
| @ -402,6 +416,11 @@ RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -r requirements/nightly_torch_test.txt | ||||
|  | ||||
| # Workaround for #17068 | ||||
| # pinned commit for v2.2.4 | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@95d8aba8a8c75aedcaa6143713b11e745e7cd0d9#egg=mamba-ssm" | ||||
|  | ||||
| # Logging to confirm the torch versions | ||||
| RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer' | ||||
|  | ||||
|  | ||||
							
								
								
									
										17
									
								
								.github/ci_configs/vllm/use_existing_torch.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								.github/ci_configs/vllm/use_existing_torch.py
									
									
									
									
										vendored
									
									
								
							| @ -1,17 +0,0 @@ | ||||
| import glob | ||||
|  | ||||
|  | ||||
| requires_files = glob.glob("requirements/*.txt") | ||||
| requires_files += ["pyproject.toml"] | ||||
| for file in requires_files: | ||||
|     print(f">>> cleaning {file}") | ||||
|     with open(file) as f: | ||||
|         lines = f.readlines() | ||||
|     if "torch" in "".join(lines).lower(): | ||||
|         print("removed:") | ||||
|         with open(file, "w") as f: | ||||
|             for line in lines: | ||||
|                 if "torch" not in line.lower(): | ||||
|                     f.write(line) | ||||
|     print(f"<<< done cleaning {file}") | ||||
|     print() | ||||
| @ -15,7 +15,7 @@ optree==0.13.0 | ||||
| packaging==23.1 | ||||
| parameterized==0.8.1 | ||||
| pillow==10.3.0 | ||||
| protobuf==5.29.5 | ||||
| protobuf==5.29.4 | ||||
| psutil==5.9.8 | ||||
| pygments==2.15.0 | ||||
| pytest-cpp==2.3.0 | ||||
| @ -26,7 +26,7 @@ pytest-xdist==3.3.1 | ||||
| pytest==7.3.2 | ||||
| pyyaml==6.0.2 | ||||
| scipy==1.12.0 | ||||
| setuptools==78.1.1 | ||||
| setuptools==72.1.0 | ||||
| sympy==1.13.3 | ||||
| tlparse==0.4.0 | ||||
| tensorboard==2.13.0 | ||||
|  | ||||
							
								
								
									
										1
									
								
								.github/scripts/build_triton_wheel.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/scripts/build_triton_wheel.py
									
									
									
									
										vendored
									
									
								
							| @ -84,7 +84,6 @@ def build_triton( | ||||
|                 ["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir | ||||
|             ) | ||||
|         else: | ||||
|             check_call(["git", "fetch", "origin", commit_hash], cwd=triton_basedir) | ||||
|             check_call(["git", "checkout", commit_hash], cwd=triton_basedir) | ||||
|  | ||||
|         # change built wheel name and version | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/scripts/docathon-label-sync.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/scripts/docathon-label-sync.py
									
									
									
									
										vendored
									
									
								
							| @ -39,9 +39,7 @@ def main() -> None: | ||||
|     pull_request_label_names = [label.name for label in pull_request_labels] | ||||
|     issue_label_names = [label.name for label in issue_labels] | ||||
|     labels_to_add = [ | ||||
|         label | ||||
|         for label in issue_label_names | ||||
|         if label not in pull_request_label_names and label != "actionable" | ||||
|         label for label in issue_label_names if label not in pull_request_label_names | ||||
|     ] | ||||
|     if not labels_to_add: | ||||
|         print("The pull request already has the same labels.") | ||||
|  | ||||
							
								
								
									
										118
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										118
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							| @ -16,16 +16,18 @@ from typing import Optional | ||||
|  | ||||
|  | ||||
| # NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this | ||||
| CUDA_ARCHES = ["12.6", "12.8", "13.0"] | ||||
| CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"] | ||||
| CUDA_STABLE = "12.8" | ||||
| CUDA_ARCHES_FULL_VERSION = { | ||||
|     "12.6": "12.6.3", | ||||
|     "12.8": "12.8.1", | ||||
|     "12.9": "12.9.1", | ||||
|     "13.0": "13.0.0", | ||||
| } | ||||
| CUDA_ARCHES_CUDNN_VERSION = { | ||||
|     "12.6": "9", | ||||
|     "12.8": "9", | ||||
|     "12.9": "9", | ||||
|     "13.0": "9", | ||||
| } | ||||
|  | ||||
| @ -38,60 +40,77 @@ CPU_AARCH64_ARCH = ["cpu-aarch64"] | ||||
|  | ||||
| CPU_S390X_ARCH = ["cpu-s390x"] | ||||
|  | ||||
| CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "13.0-aarch64"] | ||||
| CUDA_AARCH64_ARCHES = ["12.9-aarch64", "13.0-aarch64"] | ||||
|  | ||||
|  | ||||
| PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|     "12.6": ( | ||||
|         "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | " | ||||
|         "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " | ||||
|         "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | " | ||||
|         "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | " | ||||
|         "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | " | ||||
|         "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | " | ||||
|         "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'" | ||||
|         "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "12.8": ( | ||||
|         "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | " | ||||
|         "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " | ||||
|         "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | " | ||||
|         "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | " | ||||
|         "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | " | ||||
|         "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | " | ||||
|         "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'" | ||||
|         "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "12.9": ( | ||||
|         "nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "13.0": ( | ||||
|         "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | " | ||||
|         "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | " | ||||
|         "nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | " | ||||
|         "nvidia-cublas==13.0.0.19; platform_system == 'Linux' | " | ||||
|         "nvidia-cufft==12.0.0.15; platform_system == 'Linux' | " | ||||
|         "nvidia-curand==10.4.0.35; platform_system == 'Linux' | " | ||||
|         "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx==13.0.39; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | " | ||||
|         "nvidia-cufile==1.15.0.42; platform_system == 'Linux'" | ||||
|         "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "xpu": ( | ||||
|         "intel-cmplr-lib-rt==2025.2.1 | " | ||||
| @ -221,6 +240,8 @@ def generate_libtorch_matrix( | ||||
|         if os == "linux": | ||||
|             arches += CUDA_ARCHES | ||||
|             arches += ROCM_ARCHES | ||||
|             if "13.0" in arches: | ||||
|                 arches.remove("13.0") | ||||
|         elif os == "windows": | ||||
|             arches += CUDA_ARCHES | ||||
|     if libtorch_variants is None: | ||||
| @ -322,7 +343,7 @@ def generate_wheels_matrix( | ||||
|             # cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install | ||||
|  | ||||
|             if ( | ||||
|                 arch_version in ["13.0", "12.8", "12.6"] | ||||
|                 arch_version in ["13.0", "12.9", "12.8", "12.6"] | ||||
|                 and os == "linux" | ||||
|                 or arch_version in CUDA_AARCH64_ARCHES | ||||
|             ): | ||||
| @ -386,5 +407,6 @@ def generate_wheels_matrix( | ||||
|  | ||||
|  | ||||
| validate_nccl_dep_consistency("13.0") | ||||
| validate_nccl_dep_consistency("12.9") | ||||
| validate_nccl_dep_consistency("12.8") | ||||
| validate_nccl_dep_consistency("12.6") | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							| @ -22,7 +22,7 @@ LABEL_CIFLOW_BINARIES = "ciflow/binaries" | ||||
| LABEL_CIFLOW_PERIODIC = "ciflow/periodic" | ||||
| LABEL_CIFLOW_BINARIES_LIBTORCH = "ciflow/binaries_libtorch" | ||||
| LABEL_CIFLOW_BINARIES_WHEEL = "ciflow/binaries_wheel" | ||||
| LABEL_CIFLOW_ROCM = "ciflow/rocm" | ||||
| LABEL_CIFLOW_ROCM = "ciflow/rocm-mi300" | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| @ -139,8 +139,6 @@ ROCM_SMOKE_WORKFLOWS = [ | ||||
|         ), | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             labels={ | ||||
|                 LABEL_CIFLOW_BINARIES, | ||||
|                 LABEL_CIFLOW_BINARIES_WHEEL, | ||||
|                 LABEL_CIFLOW_ROCM, | ||||
|             }, | ||||
|             isolated_workflow=True, | ||||
|  | ||||
							
								
								
									
										94
									
								
								.github/scripts/prepare_vllm_wheels.sh
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										94
									
								
								.github/scripts/prepare_vllm_wheels.sh
									
									
									
									
										vendored
									
									
								
							| @ -1,94 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -eux | ||||
|  | ||||
| torch_version=$(unzip -p torch-* '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) | ||||
| nightly=$(echo ${torch_version} | cut -d'.' -f4) | ||||
|  | ||||
| # Copied from .ci/manywheel/build_common.sh | ||||
| make_wheel_record() { | ||||
|   fpath=$1 | ||||
|   if echo $fpath | grep RECORD >/dev/null 2>&1; then | ||||
|     echo "$fpath,," | ||||
|   else | ||||
|     fhash=$(openssl dgst -sha256 -binary $fpath | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g') | ||||
|     fsize=$(ls -nl $fpath | awk '{print $5}') | ||||
|     echo "$fpath,sha256=$fhash,$fsize" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| change_wheel_version() { | ||||
|   local package=$1 | ||||
|   local wheel=$2 | ||||
|   local f_version=$3 | ||||
|   local t_version=$4 | ||||
|  | ||||
|   # Extract the wheel | ||||
|   ${PYTHON_EXECUTABLE} -mwheel unpack $wheel | ||||
|  | ||||
|   mv "${package}-${f_version}" "${package}-${t_version}" | ||||
|   # Change the version from f_version to t_version in the dist-info dir | ||||
|   pushd "${package}-${t_version}" | ||||
|   mv "${package}-${f_version}.dist-info" "${package}-${t_version}.dist-info" | ||||
|  | ||||
|   pushd "${package}-${t_version}.dist-info" | ||||
|   sed -i "s/${package}-${f_version}.dist-info/${package}-${t_version}.dist-info/g" RECORD | ||||
|  | ||||
|   # Update the version in METADATA and its SHA256 hash | ||||
|   sed -i "s/Version: ${f_version}/Version: ${t_version}/g" METADATA | ||||
|   # then add PyTorch nightly dependency of vLLM | ||||
|   if [[ "${package}" == vllm ]] || [[ "${package}" == xformers ]]; then | ||||
|     sed -i "/License-File/a\Requires-Dist: torch==${torch_version}" METADATA | ||||
|   fi | ||||
|   sed -i '/METADATA,sha256/d' RECORD | ||||
|   popd | ||||
|  | ||||
|   make_wheel_record "${package}-${t_version}.dist-info/METADATA" >> "${package}-${t_version}.dist-info/RECORD" | ||||
|   popd | ||||
|  | ||||
|   # Repack the wheel | ||||
|   ${PYTHON_EXECUTABLE} -mwheel pack "${package}-${t_version}" | ||||
|  | ||||
|   # Clean up | ||||
|   rm -rf "${package}-${t_version}" | ||||
| } | ||||
|  | ||||
| repackage_wheel() { | ||||
|   local package=$1 | ||||
|   pushd $package | ||||
|  | ||||
|   local orig_wheel=$(find . -name *${package//-/_}*) | ||||
|   local orig_version=$(unzip -p $orig_wheel '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) | ||||
|  | ||||
|   local version="" | ||||
|   if [[ "${package}" == vllm ]]; then | ||||
|     # Copied from vllm/.buildkite/scripts/upload-wheels.sh | ||||
|     version=1.0.0 | ||||
|   else | ||||
|     version=$(echo $orig_version | tr '.+' '.' | cut -d'.' -f1-3) | ||||
|   fi | ||||
|   local nightly_version=$version.$nightly | ||||
|  | ||||
|   # Use nightly version | ||||
|   change_wheel_version ${package//-/_} $orig_wheel $orig_version $nightly_version | ||||
|   # Clean up | ||||
|   rm "${orig_wheel}" | ||||
|  | ||||
|   auditwheel repair --plat $PLATFORM *.whl \ | ||||
|     --exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv* | ||||
|   local repair_wheel=$(find wheelhouse -name *${PLATFORM}*) | ||||
|   local repair_wheel=$(basename ${repair_wheel}) | ||||
|   popd | ||||
|  | ||||
|   cp ${package}/wheelhouse/${repair_wheel} . | ||||
|   rm -rf $package | ||||
| } | ||||
|  | ||||
| # Require to re-package the wheel | ||||
| ${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1 | ||||
|  | ||||
| pushd externals/vllm/wheels | ||||
| for package in xformers flashinfer-python vllm; do | ||||
|   repackage_wheel $package | ||||
| done | ||||
| popd | ||||
| @ -171,7 +171,7 @@ jobs: | ||||
|       - name: Teardown XPU | ||||
|         uses: ./.github/actions/teardown-xpu | ||||
|     {%- else %} | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: !{{ common.timeout_minutes }} | ||||
|     !{{ upload.binary_env(config) }} | ||||
|     steps: | ||||
|  | ||||
| @ -22,16 +22,6 @@ name: !{{ build_environment }} | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
| {%- endmacro %} | ||||
|  | ||||
| {%- macro setup_python(py_ver) -%} | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "!{{ (py_ver.strip('t') + '.4') if '3.14' not in py_ver else '3.14.0-rc.2' }}" | ||||
|           freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }} | ||||
| {%- endmacro %} | ||||
|  | ||||
| on: | ||||
| # TODO: Migrate to new ciflow trigger, reference https://github.com/pytorch/pytorch/pull/70321 | ||||
|   push: | ||||
| @ -71,13 +61,28 @@ jobs: | ||||
|     {%- endif %} | ||||
|     steps: | ||||
|       !{{ set_runner_specific_vars() }} | ||||
|       !{{ setup_python(config.get("python_version", "3.10")) }} | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       !{{ common.checkout(deep_clone=False, directory="pytorch") }} | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -94,6 +99,8 @@ jobs: | ||||
| {%- if config["package_type"] == "wheel" %} | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -104,9 +111,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/templates/upload.yml.j2
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/templates/upload.yml.j2
									
									
									
									
										vendored
									
									
								
							| @ -33,7 +33,7 @@ | ||||
|   {%- if is_windows %} | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|   {%- endif %} | ||||
|  | ||||
| {%- else %} | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -47,11 +47,12 @@ jobs: | ||||
|       matrix: | ||||
|         include: [ | ||||
|           { name: "manylinux2_28-builder",          tag: "cuda13.0",         runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cuda12.9",         runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cuda12.8",          runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cuda12.6",          runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda13.0",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.9",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.8",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.6",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.3",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.4",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cpu",               runner: "linux.9xlarge.ephemeral" }, | ||||
|  | ||||
							
								
								
									
										236
									
								
								.github/workflows/build-vllm-wheel.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										236
									
								
								.github/workflows/build-vllm-wheel.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,236 +0,0 @@ | ||||
| name: Build vLLM wheels | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     paths: | ||||
|       - .github/workflows/build-vllm-wheel.yml | ||||
|       - .github/ci_commit_pins/vllm.txt | ||||
|   workflow_dispatch: | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - .github/workflows/build-vllm-wheel.yml | ||||
|       - .github/ci_commit_pins/vllm.txt | ||||
|   schedule: | ||||
|     # every morning at 01:30PM UTC, 9:30AM EST, 6:30AM PST | ||||
|     - cron: 30 13 * * * | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   build-wheel: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         python-version: [ '3.12' ] | ||||
|         # TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved | ||||
|         platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] | ||||
|         device: [ 'cu128', 'cu129' ] | ||||
|         include: | ||||
|           - platform: manylinux_2_28_x86_64 | ||||
|             device: cu128 | ||||
|             manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8' | ||||
|             runner: linux.12xlarge.memory | ||||
|           - platform: manylinux_2_28_x86_64 | ||||
|             device: cu129 | ||||
|             manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9' | ||||
|             runner: linux.12xlarge.memory | ||||
|           - platform: manylinux_2_28_aarch64 | ||||
|             device: cu128 | ||||
|             manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8' | ||||
|             runner: linux.arm64.r7g.12xlarge.memory | ||||
|           - platform: manylinux_2_28_aarch64 | ||||
|             device: cu129 | ||||
|             manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9' | ||||
|             runner: linux.arm64.r7g.12xlarge.memory | ||||
|     name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" | ||||
|     runs-on: ${{ matrix.runner }} | ||||
|     timeout-minutes: 480 | ||||
|     env: | ||||
|       PY_VERS: ${{ matrix.python-version }} | ||||
|       MANYLINUX_IMAGE: ${{ matrix.manylinux-image }} | ||||
|       PLATFORM: ${{ matrix.platform }} | ||||
|       BUILD_DEVICE: ${{ matrix.device }} | ||||
|     steps: | ||||
|       - name: Setup SSH (Click me for login details) | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
|       - name: Checkout PyTorch | ||||
|         uses: pytorch/pytorch/.github/actions/checkout-pytorch@main | ||||
|         with: | ||||
|           submodules: false | ||||
|  | ||||
|       - name: Setup Linux | ||||
|         uses: ./.github/actions/setup-linux | ||||
|  | ||||
|       - name: Get latest PyTorch nightly | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -eux | ||||
|  | ||||
|           # Determine python executable for given version (copied from build-triton-wheel) | ||||
|           case $PY_VERS in | ||||
|           3.10) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python | ||||
|             ;; | ||||
|           3.11) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp311-cp311/bin/python | ||||
|             ;; | ||||
|           3.12) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp312-cp312/bin/python | ||||
|             ;; | ||||
|           3.13) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp313-cp313/bin/python | ||||
|             ;; | ||||
|           3.13t) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp313-cp313t/bin/python | ||||
|             ;; | ||||
|           3.14) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp314-cp314/bin/python | ||||
|             ;; | ||||
|           3.14t) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp314-cp314t/bin/python | ||||
|             ;; | ||||
|           *) | ||||
|             echo "Unsupported python version ${PY_VERS}" | ||||
|             exit 1 | ||||
|             ;; | ||||
|           esac | ||||
|  | ||||
|           # Keep PyTorch nightly wheel here so that we can install it later during | ||||
|           # vLLM build process | ||||
|           mkdir -p "${RUNNER_TEMP}/artifacts/" | ||||
|  | ||||
|           container_name=$(docker run \ | ||||
|             --tty \ | ||||
|             --detach \ | ||||
|             -e PLATFORM \ | ||||
|             -e PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ | ||||
|             -v "${GITHUB_WORKSPACE}:/pytorch" \ | ||||
|             -v "${RUNNER_TEMP}/artifacts:/artifacts" \ | ||||
|             -w /artifacts/ \ | ||||
|             "${MANYLINUX_IMAGE}" | ||||
|           ) | ||||
|  | ||||
|           docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -mpip install \ | ||||
|             --pre torch torchvision torchaudio \ | ||||
|             --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}" | ||||
|  | ||||
|           # I wonder if there is a command to both download and install the wheels | ||||
|           # in one go | ||||
|           docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -mpip download \ | ||||
|             --pre torch torchvision torchaudio \ | ||||
|             --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}" | ||||
|  | ||||
|           # Save this for later | ||||
|           echo "container_name=${container_name}" >> "$GITHUB_ENV" | ||||
|  | ||||
|       - name: Build vLLM wheel | ||||
|         uses: ./.github/actions/build-external-packages | ||||
|         with: | ||||
|           build-targets: vllm | ||||
|           docker-image: ${{ env.MANYLINUX_IMAGE }} | ||||
|           cuda-arch-list: '8.0;8.9;9.0;10.0;12.0' | ||||
|           torch-wheel-dir: ${{ runner.temp }}/artifacts | ||||
|           output-dir: ${{ runner.temp }}/artifacts/externals | ||||
|  | ||||
|       - name: Prepare vLLM wheel | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -eux | ||||
|  | ||||
|           # Get these wheels ready, the vllm renaming logic is copied from its .buildkite/scripts/upload-wheels.sh | ||||
|           docker exec -t "${container_name}" bash -c /pytorch/.github/scripts/prepare_vllm_wheels.sh | ||||
|           docker exec -t "${container_name}" chown -R 1000:1000 /artifacts | ||||
|  | ||||
|       - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 | ||||
|         with: | ||||
|           name: vllm-wheel-${{ matrix.device }}-${{ matrix.platform }}-${{ matrix.python-version }} | ||||
|           if-no-files-found: error | ||||
|           path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl | ||||
|  | ||||
|       - name: Teardown Linux | ||||
|         uses: pytorch/test-infra/.github/actions/teardown-linux@main | ||||
|         if: always() | ||||
|  | ||||
|   # Copied from build-triton-wheel workflow (mostly) | ||||
|   upload-wheel: | ||||
|     name: "Upload ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" | ||||
|     needs: | ||||
|       - build-wheel | ||||
|     runs-on: ubuntu-latest | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] | ||||
|         device: [ 'cu128', 'cu129' ] | ||||
|     env: | ||||
|       PLATFORM: ${{ matrix.platform }} | ||||
|       BUILD_DEVICE: ${{ matrix.device }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     container: | ||||
|       image: continuumio/miniconda3:4.12.0 | ||||
|     environment: ${{ (github.event_name == 'push' && github.event.ref == 'refs/heads/main') && 'nightly-wheel-upload' || '' }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||||
|  | ||||
|       - name: Configure AWS credentials(PyTorch account) for main | ||||
|         if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }} | ||||
|         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels | ||||
|           aws-region: us-east-1 | ||||
|  | ||||
|       - name: Configure AWS credentials(PyTorch account) for RC builds | ||||
|         if: ${{ github.event_name == 'push' &&  (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }} | ||||
|         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels | ||||
|           aws-region: us-east-1 | ||||
|  | ||||
|       - name: Download Build Artifacts | ||||
|         uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 | ||||
|         with: | ||||
|           # Download all available artifacts | ||||
|           path: ${{ runner.temp }}/artifacts-all | ||||
|  | ||||
|       - name: Select Wheel Artifacts | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -eux | ||||
|           mkdir -p "${RUNNER_TEMP}/artifacts/" | ||||
|           mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-"${PLATFORM}"-*/* "${RUNNER_TEMP}/artifacts/" | ||||
|  | ||||
|       - name: Set DRY_RUN | ||||
|         if: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "DRY_RUN=disabled" >> "$GITHUB_ENV" | ||||
|  | ||||
|       - name: Set UPLOAD_CHANNEL | ||||
|         if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }} | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -ex | ||||
|  | ||||
|           if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then | ||||
|             echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" | ||||
|           fi | ||||
|  | ||||
|       - name: Upload binaries | ||||
|         env: | ||||
|           PACKAGE_TYPE: wheel | ||||
|           UPLOAD_SUBFOLDER: ${{ env.BUILD_DEVICE }} | ||||
|           PKG_DIR: ${{ runner.temp }}/artifacts | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -ex | ||||
|           bash .circleci/scripts/binary_upload.sh | ||||
							
								
								
									
										504
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										504
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -112,7 +112,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_10-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_10-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -121,85 +121,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_10-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_10-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_10-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_10-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_10-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -224,7 +178,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -315,7 +269,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_11-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_11-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -324,85 +278,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_11-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_11-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_11-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_11-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_11-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -427,7 +335,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -518,7 +426,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_12-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_12-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -527,85 +435,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_12-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_12-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_12-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_12-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_12-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -630,7 +492,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -721,7 +583,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_13-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -730,85 +592,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_13-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_13-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -833,7 +649,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -924,7 +740,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13t-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_13t-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -933,85 +749,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_13t-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13t-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_13t-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13t-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13t-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -1036,7 +806,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1127,7 +897,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_14-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -1136,85 +906,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_14-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_14-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -1239,7 +963,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1330,7 +1054,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14t-cuda-aarch64-12_6-build: | ||||
|   manywheel-py3_14t-cuda-aarch64-12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -1339,85 +1063,39 @@ jobs: | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_6 | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_9 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda-aarch64-12_6-upload:  # Uploading | ||||
|   manywheel-py3_14t-cuda-aarch64-12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14t-cuda-aarch64-12_6-build | ||||
|     needs: manywheel-py3_14t-cuda-aarch64-12_9-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu126 | ||||
|       GPU_ARCH_VERSION: "12.6-aarch64" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_6 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14t-cuda-aarch64-12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda-aarch64-12_8-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14t-cuda-aarch64-12_8-build | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8-aarch64" | ||||
|       GPU_ARCH_TYPE: cuda-aarch64 | ||||
|       DOCKER_IMAGE: manylinuxaarch64-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_8 | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -1442,7 +1120,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										38
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -248,7 +248,7 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-cuda13_0-shared-with-deps-release-build: | ||||
|   libtorch-cuda12_9-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
| @ -257,22 +257,22 @@ jobs: | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu130 | ||||
|       GPU_ARCH_VERSION: "13.0" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: libtorch-cuda13_0-shared-with-deps-release | ||||
|       build_name: libtorch-cuda12_9-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-cuda13_0-shared-with-deps-release-test:  # Testing | ||||
|   libtorch-cuda12_9-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cuda13_0-shared-with-deps-release-build | ||||
|       - libtorch-cuda12_9-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
| @ -280,38 +280,38 @@ jobs: | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu130 | ||||
|       GPU_ARCH_VERSION: "13.0" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-cuda13_0-shared-with-deps-release | ||||
|       build_name: libtorch-cuda12_9-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-cuda13_0-shared-with-deps-release-upload:  # Uploading | ||||
|   libtorch-cuda12_9-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-cuda13_0-shared-with-deps-release-test | ||||
|     needs: libtorch-cuda12_9-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu130 | ||||
|       GPU_ARCH_VERSION: "13.0" | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-cuda13_0-shared-with-deps-release | ||||
|       build_name: libtorch-cuda12_9-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
| @ -342,7 +342,7 @@ jobs: | ||||
|     needs: | ||||
|       - libtorch-rocm6_3-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -456,7 +456,7 @@ jobs: | ||||
|     needs: | ||||
|       - libtorch-rocm6_4-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -60,7 +60,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_8-test:  # Testing | ||||
|  | ||||
							
								
								
									
										532
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										532
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -127,7 +127,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_10-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda12_6-test:  # Testing | ||||
| @ -193,7 +193,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_10-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda12_8-test:  # Testing | ||||
| @ -241,6 +241,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_10-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_10-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_10-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       build_name: manywheel-py3_10-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_10-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       build_name: manywheel-py3_10-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_10-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -259,7 +325,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_10-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-cuda13_0-test:  # Testing | ||||
| @ -332,7 +398,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_10-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -443,7 +509,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_10-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -719,7 +785,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_11-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda12_6-test:  # Testing | ||||
| @ -785,7 +851,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_11-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda12_8-test:  # Testing | ||||
| @ -833,6 +899,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_11-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_11-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_11-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       build_name: manywheel-py3_11-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_11-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       build_name: manywheel-py3_11-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_11-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -851,7 +983,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_11-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_11-cuda13_0-test:  # Testing | ||||
| @ -924,7 +1056,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_11-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -1035,7 +1167,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_11-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -1311,7 +1443,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_6-test:  # Testing | ||||
| @ -1377,7 +1509,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_8-test:  # Testing | ||||
| @ -1425,6 +1557,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_12-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_12-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_12-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_12-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -1443,7 +1641,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda13_0-test:  # Testing | ||||
| @ -1516,7 +1714,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_12-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -1627,7 +1825,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_12-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -1903,7 +2101,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda12_6-test:  # Testing | ||||
| @ -1969,7 +2167,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda12_8-test:  # Testing | ||||
| @ -2017,6 +2215,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_13-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       build_name: manywheel-py3_13-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       build_name: manywheel-py3_13-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -2035,7 +2299,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13-cuda13_0-test:  # Testing | ||||
| @ -2108,7 +2372,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_13-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -2219,7 +2483,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_13-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -2495,7 +2759,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13t-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda12_6-test:  # Testing | ||||
| @ -2561,7 +2825,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13t-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda12_8-test:  # Testing | ||||
| @ -2609,6 +2873,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13t-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13t-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_13t-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       build_name: manywheel-py3_13t-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_13t-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       build_name: manywheel-py3_13t-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_13t-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -2627,7 +2957,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_13t-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_13t-cuda13_0-test:  # Testing | ||||
| @ -2700,7 +3030,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_13t-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -2811,7 +3141,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_13t-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -3087,7 +3417,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda12_6-test:  # Testing | ||||
| @ -3153,7 +3483,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda12_8-test:  # Testing | ||||
| @ -3201,6 +3531,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_14-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       build_name: manywheel-py3_14-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       build_name: manywheel-py3_14-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -3219,7 +3615,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14-cuda13_0-test:  # Testing | ||||
| @ -3292,7 +3688,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_14-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -3403,7 +3799,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_14-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -3679,7 +4075,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14t-cuda12_6 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda12_6-test:  # Testing | ||||
| @ -3745,7 +4141,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14t-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda12_8-test:  # Testing | ||||
| @ -3793,6 +4189,72 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14t-cuda12_9-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14t-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda12_9-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_14t-cuda12_9-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       build_name: manywheel-py3_14t-cuda12_9 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda12_9-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: manywheel-py3_14t-cuda12_9-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.9 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       build_name: manywheel-py3_14t-cuda12_9 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   manywheel-py3_14t-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -3811,7 +4273,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_14t-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_14t-cuda13_0-test:  # Testing | ||||
| @ -3884,7 +4346,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_14t-rocm6_3-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
| @ -3995,7 +4457,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_14t-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|  | ||||
							
								
								
									
										6
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -10,9 +10,7 @@ on: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/binaries/*' | ||||
|       - 'ciflow/binaries_wheel/*' | ||||
|       - 'ciflow/rocm/*' | ||||
|       - 'ciflow/rocm-mi300/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
| @ -69,7 +67,7 @@ jobs: | ||||
|     needs: | ||||
|       - manywheel-py3_9-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|  | ||||
							
								
								
									
										25
									
								
								.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										25
									
								
								.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -46,7 +46,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -60,13 +60,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.10.4" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -81,9 +86,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
|  | ||||
							
								
								
									
										371
									
								
								.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										371
									
								
								.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -56,13 +56,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.10.4" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -77,9 +82,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -95,6 +104,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -105,9 +116,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -166,13 +201,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.11.4" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -187,9 +227,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -205,6 +249,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -215,9 +261,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -276,13 +346,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.12.4" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -297,9 +372,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -315,6 +394,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -325,9 +406,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -386,13 +491,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.13.4" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -407,9 +517,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -425,6 +539,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -435,9 +551,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -496,13 +636,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.13.4" | ||||
|           freethreaded: true | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -517,9 +662,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -535,6 +684,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -545,9 +696,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -606,13 +781,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.14.0-rc.2" | ||||
|           freethreaded: false | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -627,9 +807,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -645,6 +829,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -655,9 +841,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
| @ -716,13 +926,18 @@ jobs: | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           # shellcheck disable=SC2129 | ||||
|           echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" | ||||
|       - name: Setup Python | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           # TODO: Removeme once 3.14 is out | ||||
|           # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 | ||||
|           python-version: "3.14.0-rc.2" | ||||
|           freethreaded: true | ||||
|       - name: Install conda and dependencies | ||||
|         run: | | ||||
|           # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on | ||||
|           curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" | ||||
|           chmod +x "${RUNNER_TEMP}/conda.sh" | ||||
|           /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" | ||||
|           echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" | ||||
|           if [ -d "/Applications/Xcode_14.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           elif [ -d "/Applications/Xcode_13.3.1.app" ]; then | ||||
|             echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}" | ||||
|           fi | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
| @ -737,9 +952,13 @@ jobs: | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -755,6 +974,8 @@ jobs: | ||||
|           "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" | ||||
|       - name: Test PyTorch wheel | ||||
|         run: | | ||||
|           # shellcheck disable=SC1091 | ||||
|           source "${RUNNER_TEMP}/anaconda/bin/activate" | ||||
|           set -eux -o pipefail | ||||
|           # shellcheck disable=SC1090 | ||||
|           source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" | ||||
| @ -765,9 +986,33 @@ jobs: | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           python -mvenv test_venv | ||||
|           source test_venv/bin/activate | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|  | ||||
							
								
								
									
										6
									
								
								.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -64,7 +64,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Populate binary env | ||||
|         shell: cmd | ||||
| @ -141,7 +141,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Populate binary env | ||||
|         shell: cmd | ||||
| @ -201,7 +201,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cpu-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										6
									
								
								.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -64,7 +64,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Populate binary env | ||||
|         shell: cmd | ||||
| @ -141,7 +141,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Populate binary env | ||||
|         shell: cmd | ||||
| @ -201,7 +201,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -51,7 +51,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -166,7 +166,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|  | ||||
							
								
								
									
										274
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										274
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -58,7 +58,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -173,7 +173,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -283,7 +283,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cpu-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -306,7 +306,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -422,7 +422,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -533,7 +533,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_6-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -556,7 +556,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -672,7 +672,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -783,11 +783,261 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_8-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|   libtorch-cuda12_9-shared-with-deps-debug-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cuda12_9-shared-with-deps-debug | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cuda12_9-shared-with-deps-debug-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cuda12_9-shared-with-deps-debug-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cuda12_9-shared-with-deps-debug | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|   libtorch-cuda12_9-shared-with-deps-debug-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-cuda12_9-shared-with-deps-debug-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_9-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|   libtorch-cuda13_0-shared-with-deps-debug-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
| @ -806,7 +1056,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -922,7 +1172,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -1033,7 +1283,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda13_0-shared-with-deps-debug | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -51,7 +51,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -166,7 +166,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|  | ||||
							
								
								
									
										274
									
								
								.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										274
									
								
								.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -58,7 +58,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -173,7 +173,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -283,7 +283,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -306,7 +306,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -422,7 +422,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -533,7 +533,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_6-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -556,7 +556,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -672,7 +672,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -783,11 +783,261 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_8-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|   libtorch-cuda12_9-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cuda12_9-shared-with-deps-release | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cuda12_9-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cuda12_9-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cuda12_9-shared-with-deps-release | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|   libtorch-cuda12_9-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-cuda12_9-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu129 | ||||
|       GPU_ARCH_VERSION: "12.9" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda12_9-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|   libtorch-cuda13_0-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
| @ -806,7 +1056,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
| @ -922,7 +1172,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
| @ -1033,7 +1283,7 @@ jobs: | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       DESIRED_PYTHON: "3.9" | ||||
|       build_name: libtorch-cuda13_0-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										1666
									
								
								.github/workflows/generated-windows-binary-wheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1666
									
								
								.github/workflows/generated-windows-binary-wheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										4
									
								
								.github/workflows/inductor-nightly.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/inductor-nightly.yml
									
									
									
									
										vendored
									
									
								
							| @ -37,7 +37,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-default-label-prefix | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" | ||||
|       test-matrix: | | ||||
| @ -56,7 +56,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: nightly-dynamo-benchmarks-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }} | ||||
|       timeout-minutes: 720 | ||||
|  | ||||
| @ -43,11 +43,6 @@ on: | ||||
|         required: false | ||||
|         type: boolean | ||||
|         default: false | ||||
|       freezing: | ||||
|         description: Run freezing? | ||||
|         required: false | ||||
|         type: boolean | ||||
|         default: true | ||||
|       benchmark_configs: | ||||
|         description: The list of configs used the benchmark | ||||
|         required: false | ||||
| @ -80,7 +75,7 @@ jobs: | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
| @ -106,8 +101,8 @@ jobs: | ||||
|     needs: inductor-build | ||||
|     if: github.event.schedule == '0 7 * * *' | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true | ||||
|       docker-image: ${{ needs.inductor-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} | ||||
|       timeout-minutes: 720 | ||||
| @ -121,9 +116,10 @@ jobs: | ||||
|     name: inductor-test | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: inductor-build | ||||
|     if: github.event_name == 'workflow_dispatch' | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       dashboard-tag: training-${{ inputs.training || 'false' }}-inference-${{ inputs.inference || 'true' }}-default-${{ inputs.default || 'true' }}-dynamic-${{ inputs.dynamic || 'true' }}-cppwrapper-${{ inputs.cppwrapper || 'true' }}-aotinductor-${{ inputs.aotinductor || 'true' }}-freezing-${{ inputs.freezing || 'true' }} | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} | ||||
|       docker-image: ${{ needs.inductor-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} | ||||
|       timeout-minutes: 720 | ||||
|  | ||||
| @ -80,7 +80,7 @@ jobs: | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
| @ -107,7 +107,7 @@ jobs: | ||||
|     needs: inductor-build | ||||
|     if: github.event.schedule == '0 7 * * *' | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true | ||||
|       docker-image: ${{ needs.inductor-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} | ||||
| @ -124,7 +124,7 @@ jobs: | ||||
|     needs: inductor-build | ||||
|     if: github.event_name == 'workflow_dispatch' | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }} | ||||
|       docker-image: ${{ needs.inductor-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} | ||||
|  | ||||
							
								
								
									
										8
									
								
								.github/workflows/inductor-periodic.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/inductor-periodic.yml
									
									
									
									
										vendored
									
									
								
							| @ -39,7 +39,7 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks | ||||
|       cuda-arch-list: '8.0;8.6' | ||||
|       cuda-arch-list: '8.6' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
| @ -62,7 +62,7 @@ jobs: | ||||
|           { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.aws.a100" }, | ||||
|           { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
|           { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, | ||||
| @ -154,7 +154,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-default-label-prefix | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" | ||||
|       test-matrix: | | ||||
| @ -200,7 +200,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: periodic-dynamo-benchmarks-cpu-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										10
									
								
								.github/workflows/inductor-rocm.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/workflows/inductor-rocm.yml
									
									
									
									
										vendored
									
									
								
							| @ -3,10 +3,18 @@ name: inductor-rocm | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|       #- main | ||||
|       - release/* | ||||
|     tags: | ||||
|       - ciflow/inductor-rocm/* | ||||
|   schedule: | ||||
|     # We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs. | ||||
|     # Also run less frequently on weekends. | ||||
|     - cron: 45 0,8,16 * * 1-5 | ||||
|     - cron: 45 4 * * 0,6 | ||||
|     - cron: 45 4,12,20 * * 1-5 | ||||
|     - cron: 45 12 * * 0,6 | ||||
|     - cron: 29 8 * * *  # about 1:29am PDT, for mem leak check and rerun disabled tests | ||||
|   workflow_dispatch: | ||||
|  | ||||
| concurrency: | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/inductor-unittest.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/inductor-unittest.yml
									
									
									
									
										vendored
									
									
								
							| @ -110,7 +110,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       test-matrix: | | ||||
| @ -127,7 +127,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: inductor-cpu-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/inductor.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/inductor.yml
									
									
									
									
										vendored
									
									
								
							| @ -79,7 +79,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       test-matrix: | | ||||
| @ -101,7 +101,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: inductor-cpu-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/nightly.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/nightly.yml
									
									
									
									
										vendored
									
									
								
							| @ -54,7 +54,7 @@ jobs: | ||||
|       - get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-gcc11 | ||||
|       build-environment: linux-jammy-py3.9-gcc11 | ||||
|       docker-image: ${{ needs.docs-build.outputs.docker-image }} | ||||
|       push: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || startsWith(github.event.ref, 'refs/tags/v') }} | ||||
|       run-doxygen: true | ||||
|  | ||||
							
								
								
									
										10
									
								
								.github/workflows/operator_benchmark.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/workflows/operator_benchmark.yml
									
									
									
									
										vendored
									
									
								
							| @ -14,10 +14,6 @@ on: | ||||
|   schedule: | ||||
|     # Run at 07:00 UTC every Sunday | ||||
|     - cron: 0 7 * * 0 | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - benchmarks/operator_benchmark/** | ||||
|       - .github/workflows/operator_benchmark.yml | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
| @ -33,7 +29,7 @@ jobs: | ||||
|     name: opbenchmark-build | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
| @ -46,7 +42,7 @@ jobs: | ||||
|     name: opbenchmark-on-demand-build | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
| @ -59,7 +55,7 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: opbenchmark-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11-build | ||||
|       build-environment: linux-jammy-py3.9-gcc11-build | ||||
|       docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										10
									
								
								.github/workflows/rocm.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/workflows/rocm.yml
									
									
									
									
										vendored
									
									
								
							| @ -3,13 +3,19 @@ name: rocm | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|   #     - main | ||||
|       - release/* | ||||
|     tags: | ||||
|       - ciflow/rocm/* | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     - cron: 29 8 * * *  # about 1:29am PDT | ||||
|     # We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs. | ||||
|     # Also run less frequently on weekends. | ||||
|     - cron: 45 0,8,16 * * 1-5 | ||||
|     - cron: 45 4 * * 0,6 | ||||
|     - cron: 45 4,12,20 * * 1-5 | ||||
|     - cron: 45 12 * * 0,6 | ||||
|     - cron: 29 8 * * *  # about 1:29am PDT, for mem leak check and rerun disabled tests | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/workflows/trunk.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/trunk.yml
									
									
									
									
										vendored
									
									
								
							| @ -240,7 +240,7 @@ jobs: | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-gcc11 | ||||
|       build-environment: linux-jammy-py3.9-gcc11 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
| @ -255,7 +255,7 @@ jobs: | ||||
|       - verify-cachebench-cpu-build | ||||
|       - target-determination | ||||
|     with: | ||||
|       build-environment: linux-jammy-py3.10-gcc11 | ||||
|       build-environment: linux-jammy-py3.9-gcc11 | ||||
|       docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										11
									
								
								.github/workflows/vllm.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								.github/workflows/vllm.yml
									
									
									
									
										vendored
									
									
								
							| @ -2,9 +2,6 @@ name: vllm-test | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|       - release/* | ||||
|     tags: | ||||
|       - ciflow/vllm/* | ||||
|   workflow_dispatch: | ||||
| @ -48,18 +45,14 @@ jobs: | ||||
|           { config: "vllm_basic_models_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_entrypoints_test", shard: 1, num_shards: 1,runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_280_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_multi_model_processor_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_pytorch_compilation_unit_tests", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_multi_model_test_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu"}, | ||||
|           { config: "vllm_languagde_model_test_extended_generation_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu"}, | ||||
|           { config: "vllm_distributed_test_2_gpu_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_test", shard: 0, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_test", shard: 1, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_test", shard: 2, num_shards: 4,  runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_test", shard: 3, num_shards: 4,  runner: "linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|           { config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.g6.12xlarge.nvidia.gpu"}, | ||||
|           { config: "vllm_distributed_test_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.12xlarge.nvidia.gpu"} | ||||
|           { config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.aws.h100.4"}, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|  | ||||
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -389,5 +389,3 @@ android/pytorch_android_torchvision/.cxx | ||||
|  | ||||
| # Claude Code local configuration | ||||
| CLAUDE.local.md | ||||
| /test_*.py | ||||
| /debug_*.py | ||||
|  | ||||
| @ -13,7 +13,7 @@ exclude_patterns = [ | ||||
|     '**/fb/**', | ||||
|     'functorch/docs/**', | ||||
|     'functorch/examples/**', | ||||
|     'functorch/docs/source/tutorials/**', | ||||
|     'functorch/notebooks/**', | ||||
|     'torch/_inductor/fx_passes/serialized_patterns/**', | ||||
|     'torch/_inductor/autoheuristic/artifacts/**', | ||||
|     'scripts/**', | ||||
| @ -1568,6 +1568,7 @@ include_patterns = [ | ||||
| exclude_patterns = [ | ||||
|     'caffe2/**', | ||||
|     'functorch/docs/**', | ||||
|     'functorch/notebooks/**', | ||||
|     'torch/_inductor/fx_passes/serialized_patterns/**', | ||||
|     'torch/_inductor/autoheuristic/artifacts/**', | ||||
|     'test/dynamo/cpython/**', | ||||
|  | ||||
| @ -810,7 +810,7 @@ cc_library( | ||||
|     name = "torch_python", | ||||
|     srcs = libtorch_python_core_sources | ||||
|         + if_cuda(libtorch_python_cuda_sources) | ||||
|         + libtorch_python_distributed_sources | ||||
|         + if_cuda(libtorch_python_distributed_sources) | ||||
|         + GENERATED_AUTOGRAD_PYTHON, | ||||
|     hdrs = glob([ | ||||
|         "torch/csrc/generic/*.cpp", | ||||
|  | ||||
							
								
								
									
										15
									
								
								CLAUDE.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								CLAUDE.md
									
									
									
									
									
								
							| @ -1,15 +0,0 @@ | ||||
| # Testing | ||||
|  | ||||
| Use our test class and test runner: | ||||
|  | ||||
| ``` | ||||
| from torch.testing._internal.common_utils import run_tests, TestCase | ||||
|  | ||||
| class TestFeature(TestCase): | ||||
|     ... | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     run_tests() | ||||
| ``` | ||||
|  | ||||
| To test Tensor equality, use assertEqual. | ||||
| @ -234,7 +234,6 @@ cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on" | ||||
| option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF) | ||||
| option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON) | ||||
| option(USE_ASAN "Use Address+Undefined Sanitizers" OFF) | ||||
| option(USE_LSAN "Use Leak Sanitizer" OFF) | ||||
| option(USE_TSAN "Use Thread Sanitizer" OFF) | ||||
| option(USE_CUDA "Use CUDA" ON) | ||||
| option(USE_XPU "Use XPU" ON) | ||||
| @ -874,28 +873,17 @@ cmake_dependent_option( | ||||
|   "Whether to build the flash_attention kernel for scaled dot product attention.\ | ||||
|   Will be disabled if not supported by the platform" | ||||
|   ON | ||||
|   "(USE_CUDA AND NOT MSVC) OR USE_ROCM" | ||||
|   "USE_CUDA OR USE_ROCM;NOT MSVC" | ||||
|   OFF) | ||||
|  | ||||
| cmake_dependent_option( | ||||
|   USE_FBGEMM_GENAI | ||||
|   "Whether to build FBGEMM GenAI quantized GEMM kernels.\ | ||||
|   Will be disabled if not supported by the platform" | ||||
|   ON | ||||
|   "USE_ROCM" | ||||
|   OFF | ||||
|   "USE_CUDA OR USE_ROCM" | ||||
|   OFF) | ||||
|  | ||||
| IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF") | ||||
|   set(USE_FBGEMM_GENAI off) | ||||
| endif() | ||||
|  | ||||
| # Set USE_FBGEMM_GENAI to ON for CUDA build on SM100. | ||||
| if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) | ||||
|   message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a") | ||||
|   set(USE_FBGEMM_GENAI ON) | ||||
| endif() | ||||
|  | ||||
| # CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem | ||||
| # Eff Attention won't | ||||
| cmake_dependent_option( | ||||
| @ -909,7 +897,7 @@ cmake_dependent_option( | ||||
| # USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake | ||||
| # | ||||
| if(USE_ROCM) | ||||
|   if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION) | ||||
|   if(UNIX AND (USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)) | ||||
|     include(cmake/External/aotriton.cmake) | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| @ -88,13 +88,13 @@ source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
|  | ||||
| * If you want to have no-op incremental rebuilds (which are fast), see [Make no-op build fast](#make-no-op-build-fast) below. | ||||
|  | ||||
| * When installing with `python -m pip install -e . -v --no-build-isolation` (in contrast to `python -m pip install . -v --no-build-isolation`) Python runtime will use | ||||
| * When installing with `python -m pip install -e .` (in contrast to `python -m pip install .`) Python runtime will use | ||||
|   the current local source-tree when importing `torch` package. (This is done by creating [`.egg-link`](https://wiki.python.org/moin/PythonPackagingTerminology#egg-link) file in `site-packages` folder) | ||||
|   This way you do not need to repeatedly install after modifying Python files (`.py`). | ||||
|   However, you would need to reinstall if you modify Python interface (`.pyi`, `.pyi.in`) or non-Python files (`.cpp`, `.cc`, `.cu`, `.h`, ...). | ||||
|  | ||||
|  | ||||
|   One way to avoid running `python -m pip install -e . -v --no-build-isolation` every time one makes a change to C++/CUDA/ObjectiveC files on Linux/Mac, | ||||
|   One way to avoid running `python -m pip install -e .` every time one makes a change to C++/CUDA/ObjectiveC files on Linux/Mac, | ||||
|   is to create a symbolic link from `build` folder to `torch/lib`, for example, by issuing following: | ||||
|   ```bash | ||||
|   pushd torch/lib; sh -c "ln -sf ../../build/lib/libtorch_cpu.* ."; popd | ||||
| @ -116,7 +116,7 @@ source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
|  | ||||
|   Next run `python setup.py clean`. After that, you can install in editable mode again. | ||||
|  | ||||
| * If you run into errors when running `python -m pip install -e . -v --no-build-isolation`, here are some debugging steps: | ||||
| * If you run into errors when running `python -m pip install -e .`, here are some debugging steps: | ||||
|   1. Run `printf '#include <stdio.h>\nint main() { printf("Hello World");}'|clang -x c -; ./a.out` to make sure | ||||
|   your CMake works and can compile this simple Hello World program without errors. | ||||
|   2. Nuke your `build` directory. The `setup.py` script compiles binaries into the `build` folder and caches many | ||||
| @ -129,10 +129,10 @@ source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
|       git clean -xdf | ||||
|       python setup.py clean | ||||
|       git submodule update --init --recursive | ||||
|       python -m pip install --group dev | ||||
|       python -m pip install -r requirements.txt | ||||
|       python -m pip install --no-build-isolation -v -e . | ||||
|       ``` | ||||
|   4. The main step within `python -m pip install -e . -v --no-build-isolation` is running `make` from the `build` directory. If you want to | ||||
|   4. The main step within `python -m pip install -e .` is running `cmake --build build` from the `build` directory. If you want to | ||||
|     experiment with some environment variables, you can pass them into the command: | ||||
|       ```bash | ||||
|       ENV_KEY1=ENV_VAL1[, ENV_KEY2=ENV_VAL2]* CMAKE_FRESH=1 python -m pip install --no-build-isolation -v -e . | ||||
| @ -259,7 +259,6 @@ dependencies as well as the nightly binaries into the repo directory. | ||||
|       support for PyTorch. | ||||
| * [tools](tools) - Code generation scripts for the PyTorch library. | ||||
|   See [README](tools/README.md) of this directory for more details. | ||||
| * [torchgen](torchgen) - contains the logic and tooling for generating PyTorch's low-level C++ and Python bindings from operator definitions, typically specified in native_functions.yaml | ||||
| * [test](test) - Python unit tests for PyTorch Python frontend. | ||||
|   * [test_torch.py](test/test_torch.py) - Basic tests for PyTorch | ||||
|     functionality. | ||||
| @ -295,7 +294,7 @@ The following packages should be installed with `pip`: | ||||
| - `pytest` - recommended to run tests more selectively | ||||
| Running | ||||
| ``` | ||||
| pip install --group dev | ||||
| pip install -r requirements.txt | ||||
| ``` | ||||
| will install these dependencies for you. | ||||
|  | ||||
| @ -646,9 +645,9 @@ can be selected interactively with your mouse to zoom in on a particular part of | ||||
| the program execution timeline. The `--native` command-line option tells | ||||
| `py-spy` to record stack frame entries for PyTorch C++ code. To get line numbers | ||||
| for C++ code it may be necessary to compile PyTorch in debug mode by prepending | ||||
| your `python -m pip install -e . -v --no-build-isolation` call to compile | ||||
| PyTorch with `DEBUG=1`. Depending on your operating system it may also be | ||||
| necessary to run `py-spy` with root privileges. | ||||
| your `python -m pip install -e .` call to compile PyTorch with `DEBUG=1`. | ||||
| Depending on your operating system it may also be necessary to run `py-spy` with | ||||
| root privileges. | ||||
|  | ||||
| `py-spy` can also work in an `htop`-like "live profiling" mode and can be | ||||
| tweaked to adjust the stack sampling rate, see the `py-spy` readme for more | ||||
| @ -656,10 +655,10 @@ details. | ||||
|  | ||||
| ## Managing multiple build trees | ||||
|  | ||||
| One downside to using `python -m pip install -e . -v --no-build-isolation` is | ||||
| that your development version of PyTorch will be installed globally on your | ||||
| account (e.g., if you run `import torch` anywhere else, the development version | ||||
| will be used). | ||||
| One downside to using `python -m pip install -e .` is that your development | ||||
| version of PyTorch will be installed globally on your account (e.g., if | ||||
| you run `import torch` anywhere else, the development version will be | ||||
| used). | ||||
|  | ||||
| If you want to manage multiple builds of PyTorch, you can make use of | ||||
| [venv environments](https://docs.python.org/3/library/venv.html) to maintain | ||||
| @ -720,7 +719,7 @@ options. | ||||
|  | ||||
| ### Code completion and IDE support | ||||
|  | ||||
| When using `python -m pip install -e . -v --no-build-isolation`, PyTorch will generate | ||||
| When using `python -m pip install -e .`, PyTorch will generate | ||||
| a `compile_commands.json` file that can be used by many editors | ||||
| to provide command completion and error highlighting for PyTorch's | ||||
| C++ code. You need to `pip install ninja` to generate accurate | ||||
|  | ||||
| @ -243,7 +243,7 @@ git submodule update --init --recursive | ||||
|  | ||||
| ```bash | ||||
| # Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section above | ||||
| pip install --group dev | ||||
| pip install -r requirements.txt | ||||
| ``` | ||||
|  | ||||
| **On Linux** | ||||
| @ -394,7 +394,7 @@ On macOS | ||||
|  | ||||
| ```bash | ||||
| export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}" | ||||
| MACOSX_DEPLOYMENT_TARGET=11.0 CMAKE_ONLY=1 python setup.py build | ||||
| MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ CMAKE_ONLY=1 python setup.py build | ||||
| ccmake build  # or cmake-gui build | ||||
| ``` | ||||
|  | ||||
|  | ||||
| @ -50,7 +50,6 @@ Following is the Release Compatibility Matrix for PyTorch releases: | ||||
|  | ||||
| | PyTorch version | Python | C++ | Stable CUDA | Experimental CUDA | Stable ROCm | | ||||
| | --- | --- | --- | --- | --- | --- | | ||||
| | 2.9 | >=3.10, <=(3.14, 3.14t experimental) | C++17 | CUDA 12.6 (CUDNN 9.10.2.21), CUDA 12.8 (CUDNN 9.10.2.21) | CUDA 13.0 (CUDNN 9.13.0.50) | ROCm 6.4 | | ||||
| | 2.8 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 12.6 (CUDNN 9.10.2.21), CUDA 12.8 (CUDNN 9.10.2.21) | CUDA 12.9 (CUDNN 9.10.2.21) | ROCm 6.4 | | ||||
| | 2.7 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8 (CUDNN 9.1.0.70), CUDA 12.6 (CUDNN 9.5.1.17) | CUDA 12.8 (CUDNN 9.7.1.26) | ROCm 6.3 | | ||||
| | 2.6 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8, CUDA 12.4 (CUDNN 9.1.0.70) | CUDA 12.6 (CUDNN 9.5.1.17) | ROCm 6.2.4 | | ||||
|  | ||||
| @ -16,8 +16,6 @@ However, if you believe you have found a security vulnerability in PyTorch, we e | ||||
|  | ||||
| Please report security issues using https://github.com/pytorch/pytorch/security/advisories/new | ||||
|  | ||||
| All reports submitted thru the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework. | ||||
|  | ||||
| Please refer to the following page for our responsible disclosure policy, reward guidelines, and those things that should not be reported: | ||||
|  | ||||
| https://www.facebook.com/whitehat | ||||
|  | ||||
| @ -252,88 +252,47 @@ if(USE_MEM_EFF_ATTENTION) | ||||
|   list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu}) | ||||
| endif() | ||||
|  | ||||
| IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF") | ||||
|   set(USE_FBGEMM_GENAI off) | ||||
| endif() | ||||
|  | ||||
| # FBGEMM GenAI | ||||
| IF(USE_FBGEMM_GENAI) | ||||
|   set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/) | ||||
|   set(FBGEMM_GENAI_SRCS ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize) | ||||
|   if(USE_CUDA) | ||||
|     # To avoid increasing the build time/binary size unnecessarily, use an allow-list of kernels to build. | ||||
|     # If you want to integrate a kernel from FBGEMM into torch, you have to add it here. | ||||
|     set(FBGEMM_CUTLASS_KERNELS_REGEX ".*mx8mx8bf16_grouped.*") | ||||
|     file(GLOB_RECURSE fbgemm_genai_native_cuda_cu | ||||
|       "${FBGEMM_GENAI_SRCS}/cutlass_extensions/*.cu" | ||||
|       "${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu") | ||||
|     list(FILTER fbgemm_genai_native_cuda_cu INCLUDE REGEX ${FBGEMM_CUTLASS_KERNELS_REGEX}) | ||||
|   set(FBGEMM_GENAI_DIR ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize) | ||||
|  | ||||
|     # PyTorch is not built for 10.0a in CI, due to lack of portability, | ||||
|     # so we need to explicitly build these files for 10.0a. | ||||
|     foreach(cu_file ${fbgemm_genai_native_cuda_cu}) | ||||
|       _BUILD_FOR_ADDITIONAL_ARCHS( | ||||
|         "${cu_file}" | ||||
|         "100a") | ||||
|     endforeach() | ||||
|   if(USE_ROCM) | ||||
|     # Only include the kernels we want to build to avoid increasing binary size. | ||||
|     file(GLOB_RECURSE fbgemm_genai_native_rocm_hip | ||||
|       "${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip" | ||||
|       "${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip") | ||||
|     set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | ||||
|  | ||||
|     file(GLOB_RECURSE fbgemm_genai_native_cuda_cpp | ||||
|       "${FBGEMM_GENAI_SRCS}/common/*.cpp" | ||||
|     ) | ||||
|  | ||||
|     # Combine all source files into a single list | ||||
|     list(APPEND fbgemm_genai_all_sources | ||||
|       ${fbgemm_genai_native_cuda_cu} | ||||
|       ${fbgemm_genai_native_cuda_cpp} | ||||
|     ) | ||||
|  | ||||
|     # Now, create the library and provide the sources at the same time | ||||
|     add_library(fbgemm_genai OBJECT ${fbgemm_genai_all_sources}) | ||||
|     # Add additional HIPCC compiler flags for performance | ||||
|     set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS | ||||
|       -mllvm | ||||
|       -amdgpu-coerce-illegal-types=1 | ||||
|       -mllvm | ||||
|       -enable-post-misched=0 | ||||
|       -mllvm | ||||
|       -greedy-reverse-local-assignment=1 | ||||
|       -fhip-new-launch-api) | ||||
|  | ||||
|     hip_add_library( | ||||
|       fbgemm_genai STATIC | ||||
|       ${fbgemm_genai_native_rocm_hip} | ||||
|       HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS}) | ||||
|     set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
|  | ||||
|     set(fbgemm_genai_mx8mx8bf16_grouped | ||||
|       "${FBGEMM_GENAI_SRCS}/cutlass_extensions/mx8mx8bf16_grouped/" | ||||
|     ) | ||||
|     target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES) | ||||
|  | ||||
|     target_include_directories(fbgemm_genai PUBLIC | ||||
|       ${FBGEMM_THIRD_PARTY}/cutlass/include | ||||
|       ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include | ||||
|       ${fbgemm_genai_mx8mx8bf16_grouped} | ||||
|       ${FBGEMM_GENAI_SRCS}/common/include/   # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp | ||||
|       ${FBGEMM_GENAI_SRCS}/include/          # includes fbgemm_gpu/torch_ops.h | ||||
|       # FBGEMM version of Composable Kernel is used due to some customizations | ||||
|       ${FBGEMM_THIRD_PARTY}/composable_kernel/include | ||||
|       ${FBGEMM_THIRD_PARTY}/composable_kernel/library/include | ||||
|       ${FBGEMM_GENAI_DIR}/include/ | ||||
|       ${FBGEMM_GENAI_DIR}/common/include/ | ||||
|     ) | ||||
|   else() | ||||
|     if(USE_ROCM) | ||||
|       # Only include the kernels we want to build to avoid increasing binary size. | ||||
|       file(GLOB_RECURSE fbgemm_genai_native_rocm_hip | ||||
|         "${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip" | ||||
|         "${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip") | ||||
|       set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | ||||
|  | ||||
|       # Add additional HIPCC compiler flags for performance | ||||
|       set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS | ||||
|         -mllvm | ||||
|         -amdgpu-coerce-illegal-types=1 | ||||
|         -mllvm | ||||
|         -enable-post-misched=0 | ||||
|         -mllvm | ||||
|         -greedy-reverse-local-assignment=1 | ||||
|         -fhip-new-launch-api) | ||||
|  | ||||
|       hip_add_library( | ||||
|         fbgemm_genai STATIC | ||||
|         ${fbgemm_genai_native_rocm_hip} | ||||
|         HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS}) | ||||
|       set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
|       target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES) | ||||
|  | ||||
|       target_include_directories(fbgemm_genai PUBLIC | ||||
|         # FBGEMM version of Composable Kernel is used due to some customizations | ||||
|         ${FBGEMM_THIRD_PARTY}/composable_kernel/include | ||||
|         ${FBGEMM_THIRD_PARTY}/composable_kernel/library/include | ||||
|         ${FBGEMM_THIRD_PARTY}/cutlass/include | ||||
|         ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include | ||||
|         ${FBGEMM_GENAI_SRCS}/common/include/   # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp | ||||
|         ${FBGEMM_GENAI_SRCS}/include/          # includes fbgemm_gpu/torch_ops.h | ||||
|       ) | ||||
|     endif() | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| @ -676,26 +635,12 @@ if(USE_CUDA AND NOT USE_ROCM) | ||||
|   add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED) | ||||
|   list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include) | ||||
|   list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include) | ||||
|  | ||||
|   # Add FBGEMM_GENAI include directories for torch_ops.h | ||||
|   if(USE_FBGEMM_GENAI) | ||||
|     list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/include) | ||||
|     list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include) | ||||
|   endif() | ||||
|  | ||||
|   if($ENV{ATEN_STATIC_CUDA}) | ||||
|     if(CUDA_VERSION VERSION_LESS_EQUAL 12.9) | ||||
|       list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|           ${CUDA_LIBRARIES} | ||||
|           CUDA::cusparse_static | ||||
|           CUDA::cufft_static_nocallback) | ||||
|     else() | ||||
|       list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|           ${CUDA_LIBRARIES} | ||||
|           CUDA::cusparse_static | ||||
|           CUDA::cufft_static) | ||||
|     endif() | ||||
|  | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|       ${CUDA_LIBRARIES} | ||||
|       CUDA::cusparse_static | ||||
|       CUDA::cufft_static_nocallback | ||||
|     ) | ||||
|    if(NOT BUILD_LAZY_CUDA_LINALG) | ||||
|      list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|        CUDA::cusolver_static | ||||
|  | ||||
| @ -308,44 +308,17 @@ void fillVersion<DLManagedTensorVersioned>( | ||||
| // constructed out of ATen tensor | ||||
| template <class T> | ||||
| T* toDLPackImpl(const Tensor& src) { | ||||
|   auto view = src; | ||||
|  | ||||
|   // Detect whether there is need to normalize the strides | ||||
|   // Background: gh-83069 | ||||
|   // | ||||
|   // However, normalizing strides can come at a high-cost | ||||
|   // to slow down toDLPack conversion 3x, so we | ||||
|   // only normalize if needed. | ||||
|   // | ||||
|   // The following code detects whether the src follows | ||||
|   // a continuous pattern. If the src follows such pattern (common-case) | ||||
|   // then we do not need to normalize the strides. | ||||
|   bool need_normalize_strides = false; | ||||
|   int64_t expected_stride = 1; | ||||
|   for (int i = src.dim() - 1; i >= 0; i--) { | ||||
|     // detect if we do not meet continuous pattern | ||||
|     // and the size is 1, so there is opportunity to normalize | ||||
|     if (src.stride(i) != expected_stride && src.size(i) == 1) { | ||||
|       need_normalize_strides = true; | ||||
|       break; | ||||
|   // create a new tensor with possibly normalized strides | ||||
|   // gh-83069 | ||||
|   auto shape = src.sizes(); | ||||
|   auto strides = src.strides().vec(); | ||||
|   for (int i = 0; i < src.dim(); i++) { | ||||
|     if (shape[i] < 2) { | ||||
|       strides[i] = 1; | ||||
|     } | ||||
|     expected_stride *= src.size(i); | ||||
|   } | ||||
|  | ||||
|   // less common case, try normalizing the strides | ||||
|   if (need_normalize_strides) { | ||||
|     // create a new tensor with possibly normalized strides | ||||
|     // gh-83069 | ||||
|     auto shape = src.sizes(); | ||||
|     auto strides = src.strides().vec(); | ||||
|     for (int i = 0; i < src.dim(); i++) { | ||||
|       if (shape[i] < 2) { | ||||
|         strides[i] = 1; | ||||
|       } | ||||
|     } | ||||
|     view = src.as_strided(shape, strides, src.storage_offset()); | ||||
|   } | ||||
|  | ||||
|   auto view = src.as_strided(shape, strides, src.storage_offset()); | ||||
|   ATenDLMTensor<T>* atDLMTensor(new ATenDLMTensor<T>); | ||||
|   atDLMTensor->handle = view; | ||||
|   atDLMTensor->tensor.manager_ctx = atDLMTensor; | ||||
|  | ||||
| @ -1,17 +0,0 @@ | ||||
| #include <ATen/DTensorState.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| namespace { | ||||
| thread_local bool kDTensorAllowImplicitReplication = false; | ||||
| } | ||||
|  | ||||
| bool get_dtensor_allow_implicit_replication() { | ||||
|   return kDTensorAllowImplicitReplication; | ||||
| } | ||||
|  | ||||
| void set_dtensor_allow_implicit_replication(bool enabled) { | ||||
|   kDTensorAllowImplicitReplication = enabled; | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,34 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <c10/macros/Macros.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| TORCH_API bool get_dtensor_allow_implicit_replication(); | ||||
| TORCH_API void set_dtensor_allow_implicit_replication(bool enabled); | ||||
|  | ||||
| struct DTensorAllowImplicitReplication { | ||||
|   DTensorAllowImplicitReplication() | ||||
|       : prev_dtensor_allow_implicit_replication_( | ||||
|             get_dtensor_allow_implicit_replication()) { | ||||
|     set_dtensor_allow_implicit_replication(true); | ||||
|   } | ||||
|  | ||||
|   DTensorAllowImplicitReplication(const DTensorAllowImplicitReplication&) = | ||||
|       delete; | ||||
|   DTensorAllowImplicitReplication& operator=( | ||||
|       const DTensorAllowImplicitReplication&) = delete; | ||||
|   DTensorAllowImplicitReplication(DTensorAllowImplicitReplication&&) = delete; | ||||
|   DTensorAllowImplicitReplication& operator=( | ||||
|       DTensorAllowImplicitReplication&&) = delete; | ||||
|  | ||||
|   ~DTensorAllowImplicitReplication() { | ||||
|     set_dtensor_allow_implicit_replication( | ||||
|         prev_dtensor_allow_implicit_replication_); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   bool prev_dtensor_allow_implicit_replication_; | ||||
| }; | ||||
|  | ||||
| } // namespace at | ||||
| @ -133,12 +133,12 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { | ||||
|         "resize_ called on tensor with symbolic shape") | ||||
|     TORCH_CHECK( | ||||
|         sparse_dim + dense_dim == static_cast<int64_t>(size.size()), | ||||
|         "'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = ", | ||||
|         size.size(), | ||||
|         ", sparse_dim = ", | ||||
|         "number of dimensions must be sparse_dim (", | ||||
|         sparse_dim, | ||||
|         ", dense_dim = ", | ||||
|         dense_dim); | ||||
|         ") + dense_dim (", | ||||
|         dense_dim, | ||||
|         "), but got ", | ||||
|         size.size()); | ||||
|     if (nnz() > 0) { | ||||
|       [[maybe_unused]] auto constexpr alt_options_msg = | ||||
|           "You could try the following options:\n\ | ||||
| @ -254,12 +254,12 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { | ||||
|         "resize_and_clear_ called on tensor with symbolic shape") | ||||
|     TORCH_CHECK( | ||||
|         sparse_dim + dense_dim == static_cast<int64_t>(size.size()), | ||||
|         "'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = ", | ||||
|         size.size(), | ||||
|         ", sparse_dim = ", | ||||
|         "number of dimensions must be sparse_dim (", | ||||
|         sparse_dim, | ||||
|         ", dense_dim = ", | ||||
|         dense_dim); | ||||
|         ") + dense_dim (", | ||||
|         dense_dim, | ||||
|         "), but got ", | ||||
|         size.size()); | ||||
|  | ||||
|     set_sizes_and_strides(size, std::vector<int64_t>(size.size())); | ||||
|     sparse_dim_ = sparse_dim; | ||||
|  | ||||
| @ -8,7 +8,6 @@ | ||||
| #include <ATen/record_function.h> | ||||
| #include <ATen/SavedTensorHooks.h> | ||||
| #include <ATen/FunctionalTensorWrapper.h> | ||||
| #include <ATen/DTensorState.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| @ -20,7 +19,6 @@ ThreadLocalState::ThreadLocalState() | ||||
|       torch_dispatch_mode_state_(c10::impl::TorchDispatchModeTLS::get_state()), python_dispatcher_state_(c10::impl::PythonDispatcherTLS::get_state()), | ||||
|       python_torch_function_state_(at::impl::PythonTorchFunctionTLS::get_state()), | ||||
|       saved_tensors_default_hooks_state_(at::SavedTensorDefaultHooks::get_tls_state()), functionalization_reapply_views_state_(at::functionalization::impl::getFunctionalizationReapplyViewsTLS()), | ||||
|       dtensor_allow_implicit_replication_(at::get_dtensor_allow_implicit_replication()), | ||||
|       saved_objects_(at::impl::ThreadLocalPythonObjects::get_state()) { | ||||
| #if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) && !defined(BUILD_LITE_INTERPRETER) | ||||
|   for(size_t i=0; i<autocast_dtypes_.size(); i++) { | ||||
| @ -54,8 +52,6 @@ void ThreadLocalState::setThreadLocalState( | ||||
|  | ||||
|   c10::impl::PythonDispatcherTLS::set_state(state.python_dispatcher_state_); | ||||
|  | ||||
|   at::set_dtensor_allow_implicit_replication(state.dtensor_allow_implicit_replication_); | ||||
|  | ||||
|   c10::ThreadLocalDebugInfo::_forceCurrentDebugInfo(state.debug_info_); | ||||
|  | ||||
|   c10::impl::_force_tls_local_dispatch_key_set(state.dispatch_key_); | ||||
|  | ||||
| @ -75,8 +75,6 @@ class TORCH_API ThreadLocalState { | ||||
|  | ||||
|   bool functionalization_reapply_views_state_; | ||||
|  | ||||
|   bool dtensor_allow_implicit_replication_; | ||||
|  | ||||
|   // TLS for arbitrary python objects that is registered via hooks | ||||
|   at::impl::ThreadLocalPythonObjects saved_objects_; | ||||
|  | ||||
|  | ||||
| @ -64,7 +64,6 @@ constexpr DynamicTypeBits kDynamicClassTypeBit = DYNAMIC_TYPE_BIT(10); | ||||
|   _(ScalarType, kDynamicIntTypeBit, 1)                                \ | ||||
|   _(Layout, kDynamicIntTypeBit, 1)                                        \ | ||||
|   _(SymInt, kDynamicIntTypeBit, 1)                                        \ | ||||
|   _(SymBool, kDynamicIntTypeBit, 1)                                        \ | ||||
|   _(MemoryFormat, kDynamicIntTypeBit, 1) | ||||
|  | ||||
| #define FORWARD_DECL_TYPE(NAME, _, __) struct NAME ## Type; | ||||
|  | ||||
| @ -644,8 +644,6 @@ inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYP | ||||
|   void * beta_ptr = &fbeta; | ||||
| #ifdef USE_ROCM | ||||
|   int flag = 0; | ||||
|   rocblas_datatype c_type = std::is_same<C_Dtype, float>::value ? rocblas_datatype_f32_r : rocblas_datatype_f16_r; | ||||
|   rocblas_datatype d_type = c_type; | ||||
| #if USE_GEMM_FLAGS_FP16_ALT_IMPL | ||||
|   flag = at::ROCmBackwardPassGuard::is_backward_pass() ? rocblas_gemm_flags_fp16_alt_impl : 0; | ||||
| #endif | ||||
| @ -654,8 +652,8 @@ inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYP | ||||
|                                    hipOperationToRocOperation(opb), (int)m, (int)n, (int)k, | ||||
|                                    (void*)alpha_ptr, a, rocblas_datatype_f16_r, (int)lda, stridea, | ||||
|                                    b, rocblas_datatype_f16_r, (int)ldb, strideb, | ||||
|                                    (void*)beta_ptr, c, c_type, (int)ldc, stridec, | ||||
|                                    c, d_type, (int)ldc, stridec, | ||||
|                                    (void*)beta_ptr, c, rocblas_datatype_f16_r, (int)ldc, stridec, | ||||
|                                    c, rocblas_datatype_f16_r, (int)ldc, stridec, | ||||
|                                    (int) num_batches, rocblas_datatype_f32_r, rocblas_gemm_algo_standard, | ||||
|                                    0, flag))); | ||||
| #else | ||||
| @ -1098,8 +1096,6 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE( | ||||
|   GEMM_CHECK_ARGVALUES(at::Half); | ||||
| #ifdef USE_ROCM | ||||
|   int flag = 0; | ||||
|   rocblas_datatype c_type = std::is_same<C_Dtype, float>::value ? rocblas_datatype_f32_r : rocblas_datatype_f16_r; | ||||
|   rocblas_datatype d_type = c_type; | ||||
| #if USE_GEMM_FLAGS_FP16_ALT_IMPL | ||||
|   flag = at::ROCmBackwardPassGuard::is_backward_pass() ? rocblas_gemm_flags_fp16_alt_impl : 0; | ||||
| #endif | ||||
| @ -1119,10 +1115,10 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE( | ||||
|       ldb, | ||||
|       beta_ptr, | ||||
|       c, | ||||
|       c_type, | ||||
|       rocblas_datatype_f16_r, | ||||
|       ldc, | ||||
|       c, | ||||
|       d_type, | ||||
|       rocblas_datatype_f16_r, | ||||
|       ldc, | ||||
|       rocblas_datatype_f32_r, | ||||
|       rocblas_gemm_algo_standard, | ||||
| @ -1941,11 +1937,11 @@ void scaled_gemm( | ||||
|   computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_TRANSB, _cublasOpFromChar(transb)); | ||||
|   cublasLtMatmulDescAttributes_t matmulDescA = CUBLASLT_MATMUL_DESC_A_SCALE_POINTER; | ||||
|   cublasLtMatmulDescAttributes_t matmulDescB = CUBLASLT_MATMUL_DESC_B_SCALE_POINTER; | ||||
| #if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT) | ||||
|   // hipblaslt supported row-wise before cublas, and did so their own way (via | ||||
|   // the SCALE_POINTERSs), but then migrated to match how cublas does it (via | ||||
|   // the SCALE_MODEs). Here we check for this early custom mode. | ||||
|   bool use_rowwise = (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise); | ||||
| #if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT) | ||||
|   if (use_rowwise) { | ||||
|     matmulDescA = HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT; | ||||
|     matmulDescB = HIPBLASLT_MATMUL_DESC_B_SCALE_POINTER_VEC_EXT; | ||||
| @ -1960,12 +1956,8 @@ void scaled_gemm( | ||||
|             } | ||||
|   #endif | ||||
|   } | ||||
| #elif (CUDA_VERSION < 12090) && !defined(USE_ROCM) | ||||
|   // hipblaslt supported row-wise before cublas, and did so their own way (via | ||||
|   // the SCALE_POINTERSs), but then migrated to match how cublas does it (via | ||||
|   // the SCALE_MODEs). Here we check for this early custom mode. | ||||
|   bool use_rowwise = (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise); | ||||
|   // rowwise isn't supported using older cublaslt or older hipblaslt | ||||
| #else | ||||
|   // rowwise isn't supported using cublaslt or older hipblaslt | ||||
|   TORCH_INTERNAL_ASSERT(use_rowwise == false, "rowwise scaled_gemm not supported with blaslt"); | ||||
| #endif  // if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT) | ||||
|   computeDesc.setAttribute(matmulDescA, mat1_scale_ptr); | ||||
|  | ||||
| @ -45,24 +45,6 @@ struct OffsetCalculator { | ||||
|  | ||||
|   C10_HOST_DEVICE offset_type get(index_t linear_idx) const { | ||||
|     offset_type offsets; | ||||
|  | ||||
| #if defined(USE_ROCM) | ||||
|     if ((dims > 0) && (dims <= 2)) { | ||||
|       auto divmod = sizes_[0].divmod(linear_idx); | ||||
| #pragma unroll | ||||
|       for (int arg = 0; arg < NARGS; arg++) | ||||
|         offsets[arg] = divmod.mod * strides_[0][arg]; | ||||
|       if (dims >= 2) { | ||||
|         divmod = sizes_[1].divmod(divmod.div); | ||||
| #pragma unroll | ||||
|         for (int arg = 0; arg < NARGS; arg++) | ||||
|           offsets[arg] += divmod.mod * strides_[1][arg]; | ||||
|       } | ||||
|       // [...] | ||||
|       return offsets; | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     #pragma unroll | ||||
|     for (int arg = 0; arg < NARGS; arg++) { | ||||
|       offsets[arg] = 0; | ||||
|  | ||||
| @ -117,8 +117,6 @@ namespace at::cuda { | ||||
|   _(nvrtcGetPTXSize)                              \ | ||||
|   _(nvrtcGetPTX)                                  \ | ||||
|   _(cuModuleLoadData)                             \ | ||||
|   _(cuModuleLoad)                                 \ | ||||
|   _(cuGetErrorString)                             \ | ||||
|   _(cuModuleGetFunction)                          \ | ||||
|   _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR) \ | ||||
|   _(nvrtcGetErrorString)                          \ | ||||
|  | ||||
| @ -7,7 +7,6 @@ | ||||
| #include <ATen/functorch/BatchRulesHelper.h> | ||||
| #include <ATen/functorch/PlumbingHelper.h> | ||||
| #include <ATen/core/dispatch/Dispatcher.h> | ||||
| #include <ATen/DTensorState.h> | ||||
|  | ||||
| #include <utility> | ||||
|  | ||||
| @ -45,13 +44,8 @@ static std::tuple<Tensor, std::optional<int64_t>> embedding_batch_rule( | ||||
|   const auto weight_ = reshape_dim_into(*weight_bdim, 0, weight); | ||||
|   auto indices_ = moveBatchDimToFront(indices, indices_bdim); | ||||
|  | ||||
|   { | ||||
|     // getStepTensor returns a regular Tensor. If indices_ is a DTensor | ||||
|     // we want to allow this mixed DTensor-Tensor operation. | ||||
|     at::DTensorAllowImplicitReplication guard; | ||||
|     const auto range = getStepTensor(indices, batch_size, num_embeddings); | ||||
|     indices_ = indices_ + range; | ||||
|   } | ||||
|   const auto range = getStepTensor(indices, batch_size, num_embeddings); | ||||
|   indices_ = indices_ + range; | ||||
|   auto result = at::embedding_symint(weight_, indices_, std::move(padding_idx), scale_grad_by_freq, sparse); | ||||
|   return std::make_tuple(std::move(result), 0); | ||||
| } | ||||
|  | ||||
| @ -9,7 +9,6 @@ | ||||
| #include <ATen/native/mkldnn/Matmul.h> | ||||
| #include <ATen/native/mkldnn/Linear.h> | ||||
| #include <ATen/native/Resize.h> | ||||
| #include <ATen/native/GroupedMMUtils.h> | ||||
| #if !defined(__s390x__) && !defined(__powerpc__) | ||||
| #include <cpuinfo.h> | ||||
| #endif | ||||
| @ -333,23 +332,4 @@ _scaled_mm_cpu(const Tensor& mat_a, const Tensor& mat_b, | ||||
|   return _scaled_mm_out_cpu(mat_a, mat_b, scale_a, scale_b, bias, scale_result, out_dtype, use_fast_accum, out); | ||||
| } | ||||
|  | ||||
| // TODO(vasiliy, future PR): figure out why we need to declare this function, when | ||||
| // other functions that live in ATen/native/*.cpp without declarations | ||||
| // or headers work just fine. | ||||
| Tensor _grouped_mm(const Tensor& mat_a, const Tensor& mat_b, | ||||
| const std::optional<at::Tensor>& offs, | ||||
| const std::optional<at::Tensor>& bias, | ||||
| std::optional<c10::ScalarType> out_dtype); | ||||
|  | ||||
| Tensor _grouped_mm(const Tensor& mat_a, const Tensor& mat_b, | ||||
| const std::optional<at::Tensor>& offs, | ||||
| const std::optional<at::Tensor>& bias, | ||||
| std::optional<c10::ScalarType> out_dtype) { | ||||
|   _grouped_mm_validate_inputs(mat_a, mat_b, offs, bias, out_dtype); | ||||
|   const auto out_dtype_ = _resolve_grouped_mm_out_dtype(mat_a, mat_b, out_dtype); | ||||
|   Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_); | ||||
|   _grouped_mm_fallback(mat_a, mat_b, offs, bias, out_dtype, out); | ||||
|   return out; | ||||
| } | ||||
|  | ||||
| }  // namespace at::native | ||||
|  | ||||
| @ -14,7 +14,6 @@ | ||||
| #include <c10/util/accumulate.h> | ||||
| #include <c10/util/irange.h> | ||||
| #include <c10/macros/Macros.h> | ||||
| #include <algorithm> | ||||
| #include <limits> | ||||
| #include <utility> | ||||
|  | ||||
| @ -301,50 +300,67 @@ struct ConvParams { | ||||
|   bool allow_tf32{}; | ||||
|  | ||||
|   bool is_strided() const { | ||||
|     return std::any_of( | ||||
|       stride.cbegin(), stride.cend(), [](const T& s) { return s != 1; }); | ||||
|     bool is_strided = false; | ||||
|     for (const auto& s : stride) { | ||||
|       is_strided |= (s != 1); | ||||
|     } | ||||
|     return is_strided; | ||||
|   } | ||||
|  | ||||
|   bool is_dilated() const { | ||||
|     return std::any_of( | ||||
|       dilation.cbegin(), dilation.cend(), [](const T& d) { return d != 1; }); | ||||
|     bool is_dilated = false; | ||||
|     for (const auto& d : dilation) { | ||||
|       is_dilated |= (d != 1); | ||||
|     } | ||||
|     return is_dilated; | ||||
|   } | ||||
|  | ||||
|   bool is_padded() const { | ||||
|     return std::any_of( | ||||
|       padding.cbegin(), padding.cend(), [](const T& p) { return p != 0; }); | ||||
|     bool is_padded = false; | ||||
|     for (auto p : padding) { | ||||
|       is_padded |= (p != 0); | ||||
|     } | ||||
|     return is_padded; | ||||
|   } | ||||
|  | ||||
|   bool is_output_padding_neg() const { | ||||
|     return std::any_of( | ||||
|       output_padding.cbegin(), | ||||
|       output_padding.cend(), | ||||
|       [](const T& p) { return p < 0; }); | ||||
|     bool is_non_neg = false; | ||||
|     for (const auto& p : output_padding) { | ||||
|       is_non_neg |= (p < 0); | ||||
|     } | ||||
|     return is_non_neg; | ||||
|   } | ||||
|  | ||||
|   bool is_output_padding_big() const { | ||||
|     // Revisit this with std::views::zip at C++20. | ||||
|     bool is_big = false; | ||||
|     for (auto i: c10::irange(output_padding.size())) { | ||||
|       if (output_padding[i] >= stride[i]) { | ||||
|         return true; | ||||
|       } | ||||
|       is_big |= (output_padding[i] >= stride[i]); | ||||
|     } | ||||
|     return false; | ||||
|     return is_big; | ||||
|   } | ||||
|  | ||||
|   bool is_padding_neg() const { | ||||
|     return std::any_of( | ||||
|       padding.cbegin(), padding.cend(), [](const T& p) { return p < 0; }); | ||||
|     bool is_non_neg = false; | ||||
|     for (const auto& p : padding) { | ||||
|       is_non_neg |= (p < 0); | ||||
|     } | ||||
|     return is_non_neg; | ||||
|   } | ||||
|  | ||||
|   bool is_dilation_neg() const { | ||||
|     return std::any_of( | ||||
|       dilation.cbegin(), dilation.cend(), [](const T& d) { return d < 0; }); | ||||
|     bool is_non_neg = false; | ||||
|     for (const auto& p : dilation) { | ||||
|       is_non_neg |= (p < 0); | ||||
|     } | ||||
|     return is_non_neg; | ||||
|   } | ||||
|  | ||||
|   bool is_stride_nonpos() const { | ||||
|     return std::any_of( | ||||
|       stride.cbegin(), stride.cend(), [](const T& s) { return s <= 0; }); | ||||
|     bool is_nonpos = false; | ||||
|     for (const auto& s : stride) { | ||||
|       is_nonpos |= (s <= 0); | ||||
|     } | ||||
|     return is_nonpos; | ||||
|   } | ||||
|  | ||||
|   void view1d_as_2d() { | ||||
|  | ||||
| @ -1,167 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/core/Tensor.h> | ||||
| #include <ATen/TensorUtils.h> | ||||
|  | ||||
| #ifndef AT_PER_OPERATOR_HEADERS | ||||
| #include <ATen/CPUFunctions.h> | ||||
| #include <ATen/Functions.h> | ||||
| #include <ATen/NativeFunctions.h> | ||||
| #else | ||||
| #include <ATen/ops/bmm.h> | ||||
| #include <ATen/ops/empty.h> | ||||
| #include <ATen/ops/empty_strided.h> | ||||
| #include <ATen/ops/mm.h> | ||||
| #endif | ||||
|  | ||||
| namespace at::native { | ||||
|  | ||||
| inline bool check_valid_strides_and_return_transposed(const Tensor& mat) { | ||||
|   IntArrayRef tensor_strides = mat.strides(); | ||||
|   IntArrayRef tensor_sizes = mat.sizes(); | ||||
|   int end_dim = mat.dim() - 1; | ||||
|   int alignment = 16 / mat.element_size(); | ||||
|   TORCH_CHECK(uint64_t(mat.data_ptr()) % 16 ==0, "expected data_ptr to be aligned to 16 bytes\n"); | ||||
|   if ((tensor_strides[end_dim - 1] == 1) && (tensor_strides[end_dim] >= std::max<int64_t>(1, tensor_sizes[end_dim - 1]))) { | ||||
|     TORCH_CHECK(tensor_strides[end_dim] % alignment == 0, "strides should be multiple of 16 bytes"); | ||||
|     return true; | ||||
|   } else if ((tensor_strides[end_dim] == 1) && (tensor_strides[end_dim - 1] >= std::max<int64_t>(1, tensor_sizes[end_dim]))) { | ||||
|     TORCH_CHECK(tensor_strides[end_dim - 1] % alignment == 0, "strides should be multiple of 16 bytes"); | ||||
|     return false; | ||||
|   } else { | ||||
|     TORCH_CHECK(false, "Invalid strides/sizes, got ", mat.strides(), " for strides and ", mat.sizes(), " for sizes"); | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline at::Tensor create_grouped_gemm_output_tensor(const Tensor& mat_a, | ||||
| const Tensor& mat_b, | ||||
| const std::optional<at::Tensor>& offs, | ||||
| c10::ScalarType out_dtype | ||||
| ) { | ||||
|   c10::SmallVector<int64_t, 3> out_size; | ||||
|   const bool a_is_2d = mat_a.dim() == 2; | ||||
|   const bool b_is_2d = mat_b.dim() == 2; | ||||
|   if (a_is_2d) { | ||||
|     if (b_is_2d) { | ||||
|       out_size = {offs->size(0), mat_a.size(0), mat_b.size(1)}; | ||||
|     } else { | ||||
|       TORCH_CHECK(offs->size(0) == mat_b.size(0), "matrix batch sizes have to match"); | ||||
|       out_size = {mat_a.size(0), mat_b.size(-1)}; | ||||
|     } | ||||
|   } else { | ||||
|     if (b_is_2d) { | ||||
|       // this case is not actually encountered for MoE gemms | ||||
|       TORCH_CHECK(offs->size(0) == mat_a.size(0), "matrix batch sizes have to match"); | ||||
|       out_size = {mat_a.size(1), mat_b.size(1)}; | ||||
|     } else { // regular bmm | ||||
|       TORCH_CHECK(mat_a.size(0) == mat_b.size(0), "batched dimension has to match"); | ||||
|       out_size = {mat_a.size(0), mat_a.size(1), mat_b.size(-1)}; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   #ifndef USE_ROCM | ||||
|   // For TMA transfers, strides of output tensor have to be either | ||||
|   // 1, or aligned to 16 bytes. | ||||
|   const auto last_dim = out_size.size() - 1; | ||||
|   const auto alignment = 16 / c10::elementSize(out_dtype); | ||||
|   const int64_t size_padded = (out_size[last_dim] + alignment - 1) / alignment * alignment; | ||||
|   std::vector<int64_t> out_stride; | ||||
|   if (a_is_2d != b_is_2d) { | ||||
|     out_stride = {size_padded, 1}; | ||||
|   } else { | ||||
|     out_stride = {out_size[1] * size_padded, size_padded, 1}; | ||||
|   } | ||||
|   return at::empty_strided(out_size, out_stride, mat_a.options().dtype(out_dtype)); | ||||
|   #else | ||||
|   return at::empty(out_size, mat_a.options().dtype(out_dtype)); | ||||
|   #endif | ||||
| } | ||||
|  | ||||
| inline void _grouped_mm_validate_inputs(const Tensor& mat_a, const Tensor& mat_b, | ||||
| const std::optional<at::Tensor>& offs, | ||||
| const std::optional<at::Tensor>& bias, | ||||
| std::optional<c10::ScalarType> out_dtype) { | ||||
|   TORCH_CHECK((mat_a.dtype() == at::kBFloat16) || (mat_a.dtype() == at::kFloat) || (mat_a.dtype() == at::kHalf), "Expected mat_a to be Float32, BFloat16 or Float16 matrix, got ", mat_a.scalar_type()); | ||||
|   TORCH_CHECK((mat_b.dtype() == at::kBFloat16) || (mat_b.dtype() == at::kFloat) || (mat_b.dtype() == at::kHalf), "Expected mat_b to be Float32, BFloat16 or Float16 matrix, got ", mat_b.scalar_type()); | ||||
|   TORCH_CHECK(mat_a.dim() == 2 || mat_a.dim() == 3, "mat_a has to be 2 or 3d"); | ||||
|   TORCH_CHECK(mat_b.dim() == 2 || mat_b.dim() == 3, "mat_b has to be 2 or 3d"); | ||||
|   const bool a_is_2d = mat_a.dim() == 2; | ||||
|   const bool b_is_2d = mat_b.dim() == 2; | ||||
|   if (!a_is_2d || !b_is_2d) { | ||||
|     TORCH_CHECK(mat_a.size(-1) == mat_b.size(-2), "contraction dimension of mat_a and mat_b must match"); | ||||
|   } | ||||
|  | ||||
|   // check that the strides are valid, the fn will throw an error if not | ||||
|   check_valid_strides_and_return_transposed(mat_a); | ||||
|   check_valid_strides_and_return_transposed(mat_b); | ||||
|   TORCH_CHECK(offs.has_value() ==  (a_is_2d || b_is_2d), "Have to provide offsets if there is a 2d matrix, or no offset if both matrices are 3d"); | ||||
|  | ||||
|   if (offs.has_value()) { | ||||
|     TORCH_CHECK(offs->dim() == 1, "offs has to be 1D"); | ||||
|     TORCH_CHECK(offs->dtype() == at::kInt, "Offsets have to be int32"); | ||||
|   } | ||||
|   TORCH_CHECK(!bias.has_value(), "Bias not supported yet"); | ||||
| } | ||||
|  | ||||
| inline c10::ScalarType _resolve_grouped_mm_out_dtype(const Tensor& mat_a, const Tensor& mat_b, | ||||
| std::optional<c10::ScalarType> out_dtype) { | ||||
|   const auto out_dtype_ = out_dtype.value_or(mat_a.scalar_type()); | ||||
|   // TODO(future PR): enable float32 output dtype for bfloat16 and float16 inputs | ||||
|   TORCH_CHECK(out_dtype_ == mat_a.dtype(), "Grouped gemm output dtype must match `mat_a` dtype"); | ||||
|   return out_dtype_; | ||||
| } | ||||
|  | ||||
|  | ||||
| inline void _grouped_mm_fallback(const Tensor& mat_a, const Tensor& mat_b, | ||||
| const std::optional<at::Tensor>& offs, | ||||
| const std::optional<at::Tensor>& bias, | ||||
| std::optional<c10::ScalarType> out_dtype, | ||||
| Tensor out) { | ||||
|   LOG(INFO) << "fallback path for `torch._grouped_mm`, performance may not be optimal"; | ||||
|   const bool a_is_2d = mat_a.dim() == 2; | ||||
|   const bool b_is_2d = mat_b.dim() == 2; | ||||
|   if (a_is_2d && !b_is_2d) { | ||||
|     // 2d x 3d with offsets | ||||
|     int group_start_idx = 0; | ||||
|     auto offs_cpu = offs.value().cpu(); | ||||
|     for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) { | ||||
|       int group_end_idx = offs_cpu[group_idx].item<int>(); | ||||
|       auto mat_a_slice = mat_a.slice(0, group_start_idx, group_end_idx); | ||||
|       auto out_slice = out.slice(0, group_start_idx, group_end_idx); | ||||
|       at::mm_out(out_slice, mat_a_slice, mat_b[group_idx]); | ||||
|       group_start_idx = group_end_idx; | ||||
|     } | ||||
|  | ||||
|   } else if (!a_is_2d && b_is_2d) { | ||||
|     // 3d x 2d with offsets | ||||
|     int group_start_idx = 0; | ||||
|     auto offs_cpu = offs.value().cpu(); | ||||
|     for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) { | ||||
|       int group_end_idx = offs_cpu[group_idx].item<int>(); | ||||
|       auto mat_b_slice = mat_b.slice(1, group_start_idx, group_end_idx); | ||||
|       auto out_slice = out.slice(1, group_start_idx, group_end_idx); | ||||
|       at::mm_out(out_slice, mat_a[group_idx], mat_b_slice); | ||||
|       group_start_idx = group_end_idx; | ||||
|     } | ||||
|  | ||||
|   } else if (a_is_2d && b_is_2d) { | ||||
|     // 2d x 2d with offsets | ||||
|     int group_start_idx = 0; | ||||
|     auto offs_cpu = offs.value().cpu(); | ||||
|     for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) { | ||||
|       int group_end_idx = offs_cpu[group_idx].item<int>(); | ||||
|       auto mat_a_slice = mat_a.slice(1, group_start_idx, group_end_idx); | ||||
|       auto mat_b_slice = mat_b.slice(0, group_start_idx, group_end_idx); | ||||
|       auto out_slice = out[group_idx]; | ||||
|       at::mm_out(out_slice, mat_a_slice, mat_b_slice); | ||||
|       group_start_idx = group_end_idx; | ||||
|     } | ||||
|  | ||||
|   } else { | ||||
|     // 3d x 3d without offsets - regular bmm | ||||
|     at::bmm_out(out, mat_a, mat_b); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| } // namespace at::native | ||||
| @ -1360,8 +1360,7 @@ Tensor outer(const Tensor& self, const Tensor& vec2) { | ||||
| #endif | ||||
|  | ||||
|  | ||||
| #if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED() | ||||
| // Used by default on x86 platforms and on AArch64+ACL | ||||
| #if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED() | ||||
| static inline int64_t get_mkldnn_matmul_min_dim() { | ||||
|   static auto value = [&] { | ||||
|     const int64_t default_min_dim = [&] { | ||||
| @ -1396,6 +1395,8 @@ static inline bool apply_mkldnn_matmul_heur(int64_t m, int64_t k, int64_t n) { | ||||
|   return at::globalContext().userEnabledMkldnn() && m > min_dim && k > min_dim && n > min_dim && m * k * n > min_size; | ||||
| } | ||||
| #endif | ||||
|  | ||||
|  | ||||
| static void addmm_impl_cpu_( | ||||
|     Tensor &result, const Tensor &self, Tensor m1, Tensor m2, const Scalar& beta, const Scalar& alpha) { | ||||
|   TORCH_INTERNAL_ASSERT(self.dim() == 2 && m1.dim() == 2 && m2.dim() == 2); | ||||
| @ -1771,8 +1772,8 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens | ||||
|     return (strides[2] == 1 && (sizes[1] == 1 || strides[1] >= sizes[2])) || | ||||
|         (strides[1] == 1 && (sizes[2] == 1 || strides[2] >= sizes[1])); | ||||
|   }; | ||||
| #if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED() | ||||
|   // Always apply mkldnn heuristic on x86 platform, but on ARM only if compiled with ACL | ||||
|  | ||||
| #if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED() | ||||
|   bool apply_heur = apply_mkldnn_matmul_heur(batch1.sizes()[1], batch1.sizes()[2], batch2.sizes()[2]); | ||||
|   if (apply_heur && use_mkldnn_matmul(batch1, batch2, self_or_result)) { | ||||
|     try { | ||||
| @ -1784,6 +1785,7 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   if (contraction_size * res_rows * res_cols < 400) { | ||||
|     if (is_bmm_out) { | ||||
|       AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kBFloat16, kHalf, batch1.scalar_type(), "bmm", [&] { | ||||
|  | ||||
| @ -47,14 +47,10 @@ TORCH_META_FUNC(nll_loss_forward) | ||||
|   TORCH_CHECK( | ||||
|       target.dim() <= 1, | ||||
|       "0D or 1D target tensor expected, multi-target not supported"); | ||||
|   if (self.dim() == 1 && target.dim() == 1) { | ||||
|       TORCH_CHECK_VALUE( | ||||
|           target.size(0) == 1, | ||||
|           "For 1D input, 1D target must have size 1, but got target size: ", | ||||
|           target.size(0)); | ||||
|   } | ||||
|  | ||||
|   auto no_batch_dim = self.dim() == 1  && target.dim() == 0; | ||||
|   TORCH_CHECK( | ||||
|       self.dim() == 1 || (self.size(0) == target.size(0)), | ||||
|       no_batch_dim || (self.size(0) == target.size(0)), | ||||
|       "size mismatch (got input: ", | ||||
|       self.sizes(), | ||||
|       ", target: ", | ||||
|  | ||||
| @ -624,9 +624,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, int64_t> _batch_norm_impl_index( | ||||
|   if (backend == BatchNormBackend::Miopen) { | ||||
|     return std::tuple_cat( | ||||
|              at::miopen_batch_norm( | ||||
|                input.contiguous(input.suggest_memory_format()), | ||||
|                weight.contiguous(), | ||||
|                bias.contiguous(), | ||||
|                input.contiguous(), weight.contiguous(), bias.contiguous(), | ||||
|                running_mean.defined() ? running_mean.contiguous() : running_mean, | ||||
|                running_var.defined() ? running_var.contiguous() : running_var, | ||||
|                training, momentum, eps), | ||||
|  | ||||
| @ -1,6 +1,5 @@ | ||||
| #define TORCH_ASSERT_ONLY_METHOD_OPERATORS | ||||
| #include <ATen/core/Tensor.h> | ||||
| #include <ATen/DTensorState.h> | ||||
|  | ||||
| #ifndef AT_PER_OPERATOR_HEADERS | ||||
| #include <ATen/Functions.h> | ||||
| @ -25,13 +24,8 @@ Tensor one_hot(const Tensor &self, int64_t num_classes) { | ||||
|         if (num_classes == -1) { | ||||
|           num_classes = self.max().item().toLong() + 1; | ||||
|         } | ||||
|         { | ||||
|           // If `self` is a DTensor, then allow implicit replication | ||||
|           // of the `index` Tensor. | ||||
|           at::DTensorAllowImplicitReplication guard; | ||||
|           at::Tensor index = at::arange(num_classes, self.options()); | ||||
|           return at::eq(self.unsqueeze(-1), index).to(kLong); | ||||
|         } | ||||
|         at::Tensor index = at::arange(num_classes, self.options()); | ||||
|         return at::eq(self.unsqueeze(-1), index).to(kLong); | ||||
|     } | ||||
|  | ||||
|     auto shape = self.sizes().vec(); | ||||
|  | ||||
| @ -2174,7 +2174,7 @@ static void _scatter_via_index_put( | ||||
|   if (self.dim() == 1 || broadcast_index) { | ||||
|     Tensor squeezed = index; | ||||
|     if (broadcast_index && index.dim() > 1) { | ||||
|       for (int64_t d = index.dim() - 1; d >= 0; --d) { | ||||
|       for (const auto d : c10::irange(index.dim())) { | ||||
|         if (d == dim) { | ||||
|           continue; | ||||
|         } | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	