mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 20:34:54 +08:00 
			
		
		
		
	Compare commits
	
		
			14 Commits
		
	
	
		
			module-sta
			...
			fix_nvrtc_
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| e59a086a12 | |||
| c4d29f9ef5 | |||
| 8b0fca4dc6 | |||
| 225d61fc69 | |||
| 37ccb79993 | |||
| fac59e4aab | |||
| 60ec1a1f7a | |||
| 5ed4624298 | |||
| 3478f8d7e4 | |||
| 69b9a7f2e6 | |||
| eaa4392429 | |||
| 037f43ba45 | |||
| f39083423b | |||
| 5e7e562cda | 
| @ -15,8 +15,6 @@ fi | ||||
| # Compress the fatbin with -compress-mode=size for CUDA 13 | ||||
| if [[ "$DESIRED_CUDA" == *"13"* ]]; then | ||||
|     export TORCH_NVCC_FLAGS="-compress-mode=size" | ||||
|     # Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801 | ||||
|     export BUILD_BUNDLE_PTXAS=1 | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||||
|  | ||||
| @ -372,7 +372,7 @@ if __name__ == "__main__": | ||||
|     else: | ||||
|         print("build pytorch without mkldnn backend") | ||||
|  | ||||
|     os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") | ||||
|     if enable_cuda: | ||||
|         print("Updating Cuda Dependency") | ||||
|         filename = os.listdir("/pytorch/dist/") | ||||
|  | ||||
| @ -241,7 +241,7 @@ def wait_for_connection(addr, port, timeout=15, attempt_cnt=5): | ||||
|         try: | ||||
|             with socket.create_connection((addr, port), timeout=timeout): | ||||
|                 return | ||||
|         except (ConnectionRefusedError, TimeoutError):  # noqa: PERF203 | ||||
|         except (ConnectionRefusedError, socket.timeout):  # noqa: PERF203 | ||||
|             if i == attempt_cnt - 1: | ||||
|                 raise | ||||
|             time.sleep(timeout) | ||||
| @ -442,7 +442,7 @@ def build_torchvision( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd vision && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel") | ||||
|     vision_wheel_name = host.list_dir("vision/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name)) | ||||
|  | ||||
| @ -497,7 +497,7 @@ def build_torchdata( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd data && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel") | ||||
|     wheel_name = host.list_dir("data/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name)) | ||||
|  | ||||
| @ -553,7 +553,7 @@ def build_torchtext( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd text && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel") | ||||
|     wheel_name = host.list_dir("text/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name)) | ||||
|  | ||||
| @ -614,7 +614,7 @@ def build_torchaudio( | ||||
|     host.run_cmd( | ||||
|         f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \ | ||||
|         && ./packaging/ffmpeg/build.sh \ | ||||
|         && {build_vars} python3 -m build --wheel --no-isolation" | ||||
|         && {build_vars} python3 setup.py bdist_wheel" | ||||
|     ) | ||||
|  | ||||
|     wheel_name = host.list_dir("audio/dist")[0] | ||||
| @ -726,7 +726,7 @@ def start_build( | ||||
|     print("Building PyTorch wheel") | ||||
|     build_opts = "" | ||||
|     if pytorch_build_number is not None: | ||||
|         build_opts += f" -C--build-option=--build-number={pytorch_build_number}" | ||||
|         build_opts += f" --build-number {pytorch_build_number}" | ||||
|     # Breakpad build fails on aarch64 | ||||
|     build_vars = "USE_BREAKPAD=0 " | ||||
|     if branch == "nightly": | ||||
| @ -747,8 +747,7 @@ def start_build( | ||||
|         print("build pytorch with mkldnn+acl backend") | ||||
|         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" | ||||
|         host.run_cmd( | ||||
|             f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && " | ||||
|             f"{build_vars} python3 -m build --wheel --no-isolation{build_opts}" | ||||
|             f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}" | ||||
|         ) | ||||
|         print("Repair the wheel") | ||||
|         pytorch_wheel_name = host.list_dir("pytorch/dist")[0] | ||||
| @ -764,7 +763,7 @@ def start_build( | ||||
|     else: | ||||
|         print("build pytorch without mkldnn backend") | ||||
|         host.run_cmd( | ||||
|             f"cd pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}" | ||||
|             f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}" | ||||
|         ) | ||||
|  | ||||
|     print("Deleting build folder") | ||||
| @ -1005,7 +1004,7 @@ if __name__ == "__main__": | ||||
|         install_condaforge_python(host, args.python_version) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     python_version = args.python_version if args.python_version is not None else "3.10" | ||||
|     python_version = args.python_version if args.python_version is not None else "3.9" | ||||
|  | ||||
|     if args.use_torch_from_pypi: | ||||
|         configure_system(host, compiler=args.compiler, python_version=python_version) | ||||
|  | ||||
| @ -69,8 +69,7 @@ RUN bash ./install_cuda.sh 13.0 | ||||
| ENV DESIRED_CUDA=13.0 | ||||
|  | ||||
| FROM ${ROCM_IMAGE} as rocm | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
| ENV MKLROOT /opt/intel | ||||
|  | ||||
| @ -36,12 +36,6 @@ case ${DOCKER_TAG_PREFIX} in | ||||
|     ;; | ||||
|   rocm*) | ||||
|     BASE_TARGET=rocm | ||||
|     PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|     # add gfx950 conditionally starting in ROCm 7.0 | ||||
|     if [[ "$ROCM_VERSION" == *"7.0"* ]]; then | ||||
|         PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|     fi | ||||
|     EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" | ||||
|     ;; | ||||
|   *) | ||||
|     echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}" | ||||
|  | ||||
| @ -84,8 +84,8 @@ fi | ||||
| _UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152 | ||||
| _UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96 | ||||
| if [[ "$image" == *rocm* ]]; then | ||||
|   _UCX_COMMIT=29831d319e6be55cb8c768ca61de335c934ca39e | ||||
|   _UCC_COMMIT=9f4b242cbbd8b1462cbc732eb29316cdfa124b77 | ||||
|   _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6 | ||||
|   _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d | ||||
| fi | ||||
|  | ||||
| tag=$(echo $image | awk -F':' '{print $2}') | ||||
| @ -175,6 +175,20 @@ case "$tag" in | ||||
|     fi | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.4 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     if [[ $tag =~ "benchmarks" ]]; then | ||||
|       INDUCTOR_BENCHMARKS=yes | ||||
|     fi | ||||
|     ;; | ||||
|   pytorch-linux-noble-rocm-alpha-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=7.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
| @ -182,9 +196,6 @@ case "$tag" in | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950" | ||||
|     if [[ $tag =~ "benchmarks" ]]; then | ||||
|       INDUCTOR_BENCHMARKS=yes | ||||
|     fi | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
| @ -441,3 +452,12 @@ elif [ "$HAS_TRITON" = "yes" ]; then | ||||
|   echo "expecting triton to not be installed, but it is" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # Sanity check cmake version.  Executorch reinstalls cmake and I'm not sure if | ||||
| # they support 4.0.0 yet, so exclude them from this check. | ||||
| CMAKE_VERSION=$(drun cmake --version) | ||||
| if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then | ||||
|   echo "CMake version is not 4.0.0:" | ||||
|   drun cmake --version | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| v2.28.3-1 | ||||
| v2.27.5-1 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| v2.28.3-1 | ||||
| v2.27.7-1 | ||||
|  | ||||
| @ -42,6 +42,12 @@ EOF | ||||
|     rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}" | ||||
|     amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu" | ||||
|  | ||||
|     # Special case for ROCM_VERSION == 7.0 | ||||
|     if [[ $(ver "$ROCM_VERSION") -eq $(ver 7.0) ]]; then | ||||
|         rocm_baseurl="https://repo.radeon.com/rocm/apt/7.0_alpha2" | ||||
|         amdgpu_baseurl="https://repo.radeon.com/amdgpu/30.10_alpha2/ubuntu" | ||||
|     fi | ||||
|  | ||||
|     # Add amdgpu repository | ||||
|     UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` | ||||
|     echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list | ||||
|  | ||||
| @ -12,8 +12,8 @@ function do_install() { | ||||
|  | ||||
|     rocm_version_nodot=${rocm_version//./} | ||||
|  | ||||
|     # https://github.com/icl-utk-edu/magma/pull/65 | ||||
|     MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec | ||||
|     # Version 2.7.2 + ROCm related updates | ||||
|     MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|     magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
|  | ||||
|     rocm_dir="/opt/rocm" | ||||
|  | ||||
| @ -66,15 +66,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" | ||||
|   # Triton needs at least gcc-9 to build | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python -m build --wheel --no-isolation | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
| elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then | ||||
|   # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain | ||||
|   add-apt-repository -y ppa:ubuntu-toolchain-r/test | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python -m build --wheel --no-isolation | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
| else | ||||
|   conda_run python -m build --wheel --no-isolation | ||||
|   conda_run python setup.py bdist_wheel | ||||
| fi | ||||
|  | ||||
| # Copy the wheel to /opt for multi stage docker builds | ||||
|  | ||||
| @ -40,16 +40,12 @@ case ${DOCKER_TAG_PREFIX} in | ||||
|         ;; | ||||
|     rocm*) | ||||
|         # we want the patch version of 6.4 instead | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then | ||||
|         if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then | ||||
|             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2" | ||||
|         fi | ||||
|         BASE_TARGET=rocm | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         # add gfx950 conditionally starting in ROCm 7.0 | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then | ||||
|             PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}" | ||||
|         ;; | ||||
|     *) | ||||
|  | ||||
							
								
								
									
										71
									
								
								.ci/docker/manywheel/Dockerfile_cxx11-abi
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								.ci/docker/manywheel/Dockerfile_cxx11-abi
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,71 @@ | ||||
| FROM centos:8 as base | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| ENV PATH /opt/rh/gcc-toolset-11/root/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin | ||||
|  | ||||
| # change to a valid repo | ||||
| RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*.repo | ||||
| # enable to install ninja-build | ||||
| RUN sed -i 's|enabled=0|enabled=1|g' /etc/yum.repos.d/CentOS-Linux-PowerTools.repo | ||||
|  | ||||
| RUN yum -y update | ||||
| RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel sudo | ||||
| RUN yum install -y autoconf automake make cmake gdb gcc-toolset-11-gcc-c++ | ||||
|  | ||||
|  | ||||
| FROM base as openssl | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
|  | ||||
| # Install python | ||||
| FROM base as python | ||||
| RUN yum install -y openssl-devel zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel | ||||
| ADD common/install_cpython.sh install_cpython.sh | ||||
| RUN bash ./install_cpython.sh && rm install_cpython.sh | ||||
|  | ||||
| FROM base as conda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| RUN /opt/conda/bin/conda install -y cmake | ||||
|  | ||||
| FROM base as intel | ||||
| # Install MKL | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=conda              /opt/conda                            /opt/conda | ||||
| ENV PATH=/opt/conda/bin:$PATH | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
|  | ||||
| FROM base as patchelf | ||||
| ADD ./common/install_patchelf.sh install_patchelf.sh | ||||
| RUN bash ./install_patchelf.sh && rm install_patchelf.sh | ||||
| RUN cp $(which patchelf) /patchelf | ||||
|  | ||||
| FROM base as jni | ||||
| ADD ./common/install_jni.sh install_jni.sh | ||||
| ADD ./java/jni.h jni.h | ||||
| RUN bash ./install_jni.sh && rm install_jni.sh | ||||
|  | ||||
| FROM base as libpng | ||||
| ADD ./common/install_libpng.sh install_libpng.sh | ||||
| RUN bash ./install_libpng.sh && rm install_libpng.sh | ||||
|  | ||||
| FROM base as final | ||||
| COPY --from=openssl            /opt/openssl                          /opt/openssl | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=intel              /opt/intel                            /opt/intel | ||||
| COPY --from=conda              /opt/conda                            /opt/conda | ||||
| COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf | ||||
| COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h | ||||
| COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/include/png*               /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/ | ||||
| COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig | ||||
|  | ||||
| RUN yum install -y ninja-build | ||||
| @ -43,6 +43,12 @@ case ${image} in | ||||
|         MANY_LINUX_VERSION="2_28_aarch64" | ||||
|         OPENBLAS_VERSION="v0.3.30" | ||||
|         ;; | ||||
|     manylinuxcxx11-abi-builder:cpu-cxx11-abi) | ||||
|         TARGET=final | ||||
|         GPU_IMAGE="" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9" | ||||
|         MANY_LINUX_VERSION="cxx11-abi" | ||||
|         ;; | ||||
|     manylinuxs390x-builder:cpu-s390x) | ||||
|         TARGET=final | ||||
|         GPU_IMAGE=s390x/almalinux:8 | ||||
| @ -76,7 +82,7 @@ case ${image} in | ||||
|         ;; | ||||
|     manylinux2_28-builder:rocm*) | ||||
|         # we want the patch version of 6.4 instead | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then | ||||
|         if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then | ||||
|             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2" | ||||
|         fi | ||||
|         TARGET=rocm_final | ||||
| @ -84,10 +90,6 @@ case ${image} in | ||||
|         DEVTOOLSET_VERSION="11" | ||||
|         GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         # add gfx950 conditionally starting in ROCm 7.0 | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then | ||||
|             PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" | ||||
|         ;; | ||||
|     manylinux2_28-builder:xpu) | ||||
|  | ||||
| @ -10,11 +10,6 @@ boto3==1.35.42 | ||||
| #Pinned versions: 1.19.12, 1.16.34 | ||||
| #test that import: | ||||
|  | ||||
| build==1.3.0 | ||||
| #Description: A simple, correct Python build frontend. | ||||
| #Pinned versions: 1.3.0 | ||||
| #test that import: | ||||
|  | ||||
| click | ||||
| #Description: Command Line Interface Creation Kit | ||||
| #Pinned versions: | ||||
| @ -111,12 +106,14 @@ networkx==2.8.8 | ||||
| #Pinned versions: 2.8.8 | ||||
| #test that import: functorch | ||||
|  | ||||
| ninja==1.11.1.4 | ||||
| ninja==1.11.1.3 | ||||
| #Description: build system. Used in some tests. Used in build to generate build | ||||
| #time tracing information | ||||
| #Pinned versions: 1.11.1.4 | ||||
| #Pinned versions: 1.11.1.3 | ||||
| #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py | ||||
|  | ||||
| numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x" | ||||
| numba==0.55.2 ; python_version == "3.9" and platform_machine != "s390x" | ||||
| numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x" | ||||
| numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x" | ||||
| #Description: Just-In-Time Compiler for Numerical Functions | ||||
| @ -137,7 +134,7 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x" | ||||
| #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py, | ||||
| #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py, | ||||
| #test_binary_ufuncs.py | ||||
| numpy==1.22.4; python_version == "3.10" | ||||
| numpy==1.22.4; python_version == "3.9" or python_version == "3.10" | ||||
| numpy==1.26.2; python_version == "3.11" or python_version == "3.12" | ||||
| numpy==2.1.2; python_version >= "3.13" | ||||
|  | ||||
| @ -172,9 +169,9 @@ pillow==11.0.0 | ||||
| #Pinned versions: 10.3.0 | ||||
| #test that import: | ||||
|  | ||||
| protobuf==5.29.5 | ||||
| protobuf==5.29.4 | ||||
| #Description:  Google's data interchange format | ||||
| #Pinned versions: 5.29.5 | ||||
| #Pinned versions: 5.29.4 | ||||
| #test that import: test_tensorboard.py, test/onnx/* | ||||
|  | ||||
| psutil | ||||
| @ -329,6 +326,8 @@ pywavelets==1.7.0 ; python_version >= "3.12" | ||||
| lxml==5.3.0 | ||||
| #Description: This is a requirement of unittest-xml-reporting | ||||
|  | ||||
| # Python-3.9 binaries | ||||
|  | ||||
| PyGithub==2.3.0 | ||||
|  | ||||
| sympy==1.13.3 | ||||
| @ -378,7 +377,7 @@ dataclasses_json==0.6.7 | ||||
| #Pinned versions: 0.6.7 | ||||
| #test that import: | ||||
|  | ||||
| cmake==3.31.6 | ||||
| cmake==4.0.0 | ||||
| #Description: required for building | ||||
|  | ||||
| tlparse==0.4.0 | ||||
|  | ||||
| @ -1,15 +1,8 @@ | ||||
| sphinx==5.3.0 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 5.3.0 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@d53b0ffb9b1cda68260693ea98f3483823c88d8e#egg=pytorch_sphinx_theme2 | ||||
|  | ||||
| standard-imghdr==3.13.0; python_version >= "3.13" | ||||
| #Description: This is needed by Sphinx, so it needs to be added here. | ||||
| # The reasons are as follows: | ||||
| # 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr); | ||||
| # 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13. | ||||
| # Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency. | ||||
|  | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2 | ||||
| # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering | ||||
| # but it doesn't seem to work and hangs around idly. The initial thought that it is probably | ||||
| # something related to Docker setup. We can investigate this later. | ||||
|  | ||||
| @ -1,11 +1,11 @@ | ||||
| SHELL=/usr/bin/env bash | ||||
|  | ||||
| DOCKER_CMD ?= docker | ||||
| DESIRED_ROCM ?= 7.0 | ||||
| DESIRED_ROCM ?= 6.4 | ||||
| DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM)) | ||||
| PACKAGE_NAME = magma-rocm | ||||
| # inherit this from underlying docker image, do not pass this env var to docker | ||||
| #PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201 | ||||
| #PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201 | ||||
|  | ||||
| DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-v $(shell git rev-parse --show-toplevel)/.ci:/builder \ | ||||
| @ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	magma-rocm/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-rocm70 | ||||
| all: magma-rocm64 | ||||
| all: magma-rocm63 | ||||
|  | ||||
| @ -25,11 +24,6 @@ clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-rocm70 | ||||
| magma-rocm70: DESIRED_ROCM := 7.0 | ||||
| magma-rocm70: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-rocm64 | ||||
| magma-rocm64: DESIRED_ROCM := 6.4 | ||||
| magma-rocm64: | ||||
|  | ||||
| @ -6,8 +6,8 @@ set -eou pipefail | ||||
| # The script expects DESIRED_CUDA and PACKAGE_NAME to be set | ||||
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | ||||
|  | ||||
| # https://github.com/icl-utk-edu/magma/pull/65 | ||||
| MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec | ||||
| # Version 2.7.2 + ROCm related updates | ||||
| MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|  | ||||
| # Folders for the build | ||||
| PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata | ||||
| @ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE | ||||
|  | ||||
| # Fetch magma sources and verify checksum | ||||
| pushd ${PACKAGE_DIR} | ||||
| git clone https://github.com/jeffdaily/magma | ||||
| git clone https://bitbucket.org/icl/magma.git | ||||
| pushd magma | ||||
| git checkout ${MAGMA_VERSION} | ||||
| popd | ||||
|  | ||||
| @ -142,7 +142,7 @@ time CMAKE_ARGS=${CMAKE_ARGS[@]} \ | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ | ||||
|     BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ | ||||
|     USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ | ||||
|     python -m build --wheel --no-isolation --outdir /tmp/$WHEELHOUSE_DIR | ||||
|     python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR | ||||
| echo "Finished setup.py bdist at $(date)" | ||||
|  | ||||
| # Build libtorch packages | ||||
|  | ||||
| @ -104,7 +104,7 @@ if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr | ||||
| fi | ||||
|  | ||||
| echo "Calling -m pip install . -v --no-build-isolation at $(date)" | ||||
| echo "Calling 'python -m pip install .' at $(date)" | ||||
|  | ||||
| if [[ $LIBTORCH_VARIANT = *"static"* ]]; then | ||||
|     STATIC_CMAKE_FLAG="-DTORCH_STATIC=1" | ||||
|  | ||||
| @ -107,10 +107,6 @@ if [[ $ROCM_INT -ge 60200 ]]; then | ||||
|     ROCM_SO_FILES+=("librocm-core.so") | ||||
| fi | ||||
|  | ||||
| if [[ $ROCM_INT -ge 70000 ]]; then | ||||
|     ROCM_SO_FILES+=("librocroller.so") | ||||
| fi | ||||
|  | ||||
| OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
|  | ||||
| @ -290,13 +290,13 @@ else | ||||
|  | ||||
|       WERROR=1 python setup.py clean | ||||
|  | ||||
|       WERROR=1 python -m build --wheel --no-isolation | ||||
|       WERROR=1 python setup.py bdist_wheel | ||||
|     else | ||||
|       python setup.py clean | ||||
|       if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then | ||||
|         source .ci/pytorch/install_cache_xla.sh | ||||
|       fi | ||||
|       python -m build --wheel --no-isolation | ||||
|       python setup.py bdist_wheel | ||||
|     fi | ||||
|     pip_install_whl "$(echo dist/*.whl)" | ||||
|  | ||||
|  | ||||
| @ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \ | ||||
|  | ||||
| # Build the docs | ||||
| pushd docs/cpp | ||||
| time make VERBOSE=1 html | ||||
| time make VERBOSE=1 html -j | ||||
|  | ||||
| popd | ||||
| popd | ||||
|  | ||||
| @ -35,12 +35,11 @@ fi | ||||
|  | ||||
| print_cmake_info | ||||
| if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then | ||||
|   # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls | ||||
|   USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python -m build --wheel --no-isolation | ||||
|   USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel | ||||
| else | ||||
|   # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests | ||||
|   # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python -m build --wheel --no-isolation -C--build-option=--plat-name=macosx_11_0_arm64 | ||||
|   # NB: we always build with distributed; USE_DISTRIBUTED turns off all | ||||
|   # backends (specifically the gloo backend), so test that this case works too | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 | ||||
| fi | ||||
| if which sccache > /dev/null; then | ||||
|   print_sccache_stats | ||||
|  | ||||
| @ -13,9 +13,13 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( | ||||
| fi | ||||
| popd | ||||
|  | ||||
| python -mpip install -r requirements.txt | ||||
|  | ||||
| # enable debug asserts in serialization | ||||
| export TORCH_SERIALIZATION_DEBUG=1 | ||||
|  | ||||
| python -mpip install --no-input -r requirements.txt | ||||
|  | ||||
| setup_test_python() { | ||||
|   # The CircleCI worker hostname doesn't resolve to an address. | ||||
|   # This environment variable makes ProcessGroupGloo default to | ||||
| @ -55,7 +59,7 @@ test_python_shard() { | ||||
|  | ||||
|   setup_test_python | ||||
|  | ||||
|   time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests --shard "$1" "$NUM_TEST_SHARDS" | ||||
|   time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS" | ||||
|  | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -26,7 +26,6 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then | ||||
|     time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo | ||||
|     time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl | ||||
|     time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering | ||||
|     time python test/run_test.py --verbose -i distributed/test_aten_comm_compute_reordering | ||||
|     time python test/run_test.py --verbose -i distributed/test_store | ||||
|     time python test/run_test.py --verbose -i distributed/test_symmetric_memory | ||||
|     time python test/run_test.py --verbose -i distributed/test_pg_wrapper | ||||
|  | ||||
| @ -322,29 +322,23 @@ test_python_shard() { | ||||
|  | ||||
|   # modify LD_LIBRARY_PATH to ensure it has the conda env. | ||||
|   # This set of tests has been shown to be buggy without it for the split-build | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|  | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_python() { | ||||
|   # shellcheck disable=SC2086 | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_python_smoke() { | ||||
|   # Smoke tests for H100/B200 | ||||
|   # Smoke tests for H100 | ||||
|   time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_python_smoke_b200() { | ||||
|   # Targeted smoke tests for B200 - staged approach to avoid too many failures | ||||
|   time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_h100_distributed() { | ||||
|   # Distributed tests at H100 | ||||
|   time python test/run_test.py --include distributed/_composable/test_composability/test_pp_composability.py  $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
| @ -390,7 +384,6 @@ test_dynamo_wrapped_shard() { | ||||
|     --exclude-distributed-tests \ | ||||
|     --exclude-torch-export-tests \ | ||||
|     --exclude-aot-dispatch-tests \ | ||||
|     --exclude-quantization-tests \ | ||||
|     --shard "$1" "$NUM_TEST_SHARDS" \ | ||||
|     --verbose \ | ||||
|     --upload-artifacts-while-running | ||||
| @ -435,7 +428,7 @@ test_inductor_distributed() { | ||||
|  | ||||
|   # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported | ||||
|   # with if required # gpus aren't available | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_aten_comm_compute_reordering distributed/test_compute_comm_reordering --verbose | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -1163,12 +1156,6 @@ test_distributed() { | ||||
|   fi | ||||
| } | ||||
|  | ||||
| test_quantization() { | ||||
|   echo "Testing quantization" | ||||
|  | ||||
|   python test/test_quantization.py | ||||
| } | ||||
|  | ||||
| test_rpc() { | ||||
|   echo "Testing RPC C++ tests" | ||||
|   # NB: the ending test_rpc must match the current function name for the current | ||||
| @ -1415,7 +1402,7 @@ EOF | ||||
|   pip3 install -r requirements.txt | ||||
|   # shellcheck source=./common-build.sh | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" | ||||
|   python -m build --wheel --no-isolation -C--build-option=--bdist-dir="base_bdist_tmp" --outdir "base_dist" | ||||
|   python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist" | ||||
|   python -mpip install base_dist/*.whl | ||||
|   echo "::endgroup::" | ||||
|  | ||||
| @ -1586,7 +1573,7 @@ test_executorch() { | ||||
| test_linux_aarch64() { | ||||
|   python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \ | ||||
|         test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \ | ||||
|         test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops profiler/test_memory_profiler \ | ||||
|         test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \ | ||||
|         distributed/elastic/timer/api_test distributed/elastic/timer/local_timer_example distributed/elastic/timer/local_timer_test \ | ||||
|         --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
|  | ||||
| @ -1617,7 +1604,7 @@ test_operator_benchmark() { | ||||
|   test_inductor_set_cpu_affinity | ||||
|  | ||||
|   cd benchmarks/operator_benchmark/pt_extension | ||||
|   python -m pip install . -v --no-build-isolation | ||||
|   python -m pip install . | ||||
|  | ||||
|   cd "${TEST_DIR}"/benchmarks/operator_benchmark | ||||
|   $TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \ | ||||
| @ -1630,25 +1617,6 @@ test_operator_benchmark() { | ||||
|       --expected "expected_ci_operator_benchmark_eager_float32_cpu.csv" | ||||
| } | ||||
|  | ||||
| test_operator_microbenchmark() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   TEST_DIR=$(pwd) | ||||
|  | ||||
|   cd benchmarks/operator_benchmark/pt_extension | ||||
|   python -m pip install . | ||||
|  | ||||
|   cd "${TEST_DIR}"/benchmarks/operator_benchmark | ||||
|  | ||||
|   for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do | ||||
|     $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \ | ||||
|       --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \ | ||||
|       --benchmark-name "PyTorch operator microbenchmark" --use-compile | ||||
|     $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \ | ||||
|       --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \ | ||||
|       --benchmark-name "PyTorch operator microbenchmark" | ||||
|   done | ||||
| } | ||||
|  | ||||
| if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then | ||||
|   (cd test && python -c "import torch; print(torch.__config__.show())") | ||||
| @ -1681,8 +1649,6 @@ elif [[ "${TEST_CONFIG}" == *executorch* ]]; then | ||||
|   test_executorch | ||||
| elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then | ||||
|   test_python_legacy_jit | ||||
| elif [[ "$TEST_CONFIG" == 'quantization' ]]; then | ||||
|   test_quantization | ||||
| elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then | ||||
|   # TODO: run some C++ tests | ||||
|   echo "no-op at the moment" | ||||
| @ -1705,8 +1671,6 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then | ||||
|     test_operator_benchmark cpu ${TEST_MODE} | ||||
|  | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then | ||||
|   test_operator_microbenchmark | ||||
| elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then | ||||
|   test_inductor_distributed | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then | ||||
| @ -1809,14 +1773,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then | ||||
|   test_xpu_bin | ||||
| elif [[ "${TEST_CONFIG}" == smoke ]]; then | ||||
|   test_python_smoke | ||||
| elif [[ "${TEST_CONFIG}" == smoke_b200 ]]; then | ||||
|   test_python_smoke_b200 | ||||
| elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then | ||||
|   test_h100_distributed | ||||
| elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then | ||||
|   test_h100_symm_mem | ||||
| elif [[ "${TEST_CONFIG}" == "b200-symm-mem" ]]; then | ||||
|   test_h100_symm_mem | ||||
| elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then | ||||
|   test_h100_cutlass_backend | ||||
| else | ||||
|  | ||||
| @ -70,7 +70,7 @@ sccache --zero-stats | ||||
| sccache --show-stats | ||||
|  | ||||
| # Build the wheel | ||||
| python -m build --wheel --no-build-isolation | ||||
| python setup.py bdist_wheel | ||||
| if ($LASTEXITCODE -ne 0) { exit 1 } | ||||
|  | ||||
| # Install the wheel locally | ||||
|  | ||||
| @ -130,7 +130,7 @@ if "%USE_CUDA%"=="1" ( | ||||
| :: Print all existing environment variable for debugging | ||||
| set | ||||
|  | ||||
| python -m build --wheel --no-isolation | ||||
| python setup.py bdist_wheel | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
| sccache --show-stats | ||||
|  | ||||
| @ -25,7 +25,7 @@ echo Copying over test times file | ||||
| robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files" | ||||
|  | ||||
| echo Run nn tests | ||||
| python run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose | ||||
| python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose | ||||
| if ERRORLEVEL 1 goto fail | ||||
|  | ||||
| popd | ||||
|  | ||||
| @ -48,7 +48,7 @@ sccache --zero-stats | ||||
| sccache --show-stats | ||||
|  | ||||
| :: Call PyTorch build script | ||||
| python -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
| python setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
|  | ||||
| :: show sccache stats | ||||
| sccache --show-stats | ||||
|  | ||||
| @ -28,5 +28,5 @@ start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_t | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| set "PATH=%CD%\Python\Scripts;%CD%\Python;%PATH%" | ||||
| %PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel build | ||||
| %PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| @ -86,7 +86,7 @@ copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_ | ||||
| goto build_end | ||||
|  | ||||
| :pytorch | ||||
| %PYTHON_EXEC% -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
| %PYTHON_EXEC% setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
|  | ||||
| :build_end | ||||
| IF ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| @ -63,7 +63,7 @@ if errorlevel 1 exit /b 1 | ||||
| call %CONDA_HOME%\condabin\activate.bat testenv | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| call conda install  -y -q -c conda-forge libuv=1.51 | ||||
| call conda install  -y -q -c conda-forge libuv=1.39 | ||||
| call conda install -y -q intel-openmp | ||||
|  | ||||
| echo "install and test libtorch" | ||||
|  | ||||
| @ -18,7 +18,7 @@ if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake | ||||
|  | ||||
| %PYTHON_EXEC% -m pip install pyyaml | ||||
| %PYTHON_EXEC% -m pip install mkl-include mkl-static | ||||
| %PYTHON_EXEC% -m pip install boto3 requests ninja typing_extensions setuptools==72.1.0 | ||||
| %PYTHON_EXEC% -m pip install boto3 ninja typing_extensions setuptools==72.1.0 | ||||
|  | ||||
| where cmake.exe | ||||
|  | ||||
|  | ||||
| @ -143,8 +143,7 @@ case $desired_python in | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13t) | ||||
|         echo "Using 3.13t deps" | ||||
|         mac_version='macosx-11.0-arm64' | ||||
|         echo "Using 3.13 deps" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
| @ -178,7 +177,8 @@ source ~/${desired_python}-build/bin/activate | ||||
| retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" | ||||
| retry brew install libomp | ||||
|  | ||||
| # For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule | ||||
| # For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which | ||||
| # is build as part of tensorpipe submodule | ||||
| export USE_DISTRIBUTED=1 | ||||
|  | ||||
| export USE_MKLDNN=OFF | ||||
| @ -186,11 +186,11 @@ export USE_QNNPACK=OFF | ||||
| export BUILD_TEST=OFF | ||||
|  | ||||
| pushd "$pytorch_rootdir" | ||||
| echo "Calling -m build --wheel --no-isolation at $(date)" | ||||
| echo "Calling setup.py bdist_wheel at $(date)" | ||||
|  | ||||
| _PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python -m build --wheel --no-isolation --outdir "$whl_tmp_dir" -C--plat-name="${mac_version//[-.]/_}" | ||||
| _PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name "${mac_version//[-.]/_}" | ||||
|  | ||||
| echo "Finished -m build --wheel --no-isolation at $(date)" | ||||
| echo "Finished setup.py bdist_wheel at $(date)" | ||||
|  | ||||
| if [[ $package_type != 'libtorch' ]]; then | ||||
|     echo "delocating wheel dependencies" | ||||
|  | ||||
							
								
								
									
										47
									
								
								.circleci/scripts/functorch_doc_push_script.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										47
									
								
								.circleci/scripts/functorch_doc_push_script.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,47 @@ | ||||
| #!/bin/bash | ||||
| # =================== The following code **should** be executed inside Docker container =================== | ||||
|  | ||||
| # Install dependencies | ||||
| sudo apt-get -y update | ||||
| sudo apt-get -y install expect-dev | ||||
|  | ||||
| # This is where the local pytorch install in the docker image is located | ||||
| pt_checkout="/var/lib/jenkins/workspace" | ||||
| source "$pt_checkout/.ci/pytorch/common_utils.sh" | ||||
| echo "functorch_doc_push_script.sh: Invoked with $*" | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| version=${DOCS_VERSION:-nightly} | ||||
| echo "version: $version" | ||||
|  | ||||
| # Build functorch docs | ||||
| pushd $pt_checkout/functorch/docs | ||||
| pip -q install -r requirements.txt | ||||
| make html | ||||
| popd | ||||
|  | ||||
| git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages | ||||
| pushd functorch_ghpages | ||||
|  | ||||
| if [ $version == "main" ]; then | ||||
|   version=nightly | ||||
| fi | ||||
|  | ||||
| git rm -rf "$version" || true | ||||
| mv "$pt_checkout/functorch/docs/build/html" "$version" | ||||
|  | ||||
| git add "$version" || true | ||||
| git status | ||||
| git config user.email "soumith+bot@pytorch.org" | ||||
| git config user.name "pytorchbot" | ||||
| # If there aren't changes, don't make a commit; push is no-op | ||||
| git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true | ||||
| git status | ||||
|  | ||||
| if [[ "${WITH_PUSH:-}" == true ]]; then | ||||
|   git push -u origin gh-pages | ||||
| fi | ||||
|  | ||||
| popd | ||||
| # =================== The above code **should** be executed inside Docker container =================== | ||||
| @ -69,8 +69,6 @@ readability-string-compare, | ||||
| ' | ||||
| HeaderFilterRegex: '^(aten/|c10/|torch/).*$' | ||||
| WarningsAsErrors: '*' | ||||
| LineFilter: | ||||
|   - name: '/usr/include/.*' | ||||
| CheckOptions: | ||||
|   cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true | ||||
|   cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +1,6 @@ | ||||
| --- | ||||
| name: "⚠️ CI SEV" | ||||
| about: Tracking incidents for PyTorch's CI infra. | ||||
| title: '' | ||||
| labels: '' | ||||
| assignees: '' | ||||
|  | ||||
| --- | ||||
|  | ||||
| > NOTE: Remember to label this issue with "`ci: sev`" | ||||
|  | ||||
							
								
								
									
										18
									
								
								.github/ISSUE_TEMPLATE/disable-autorevert.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/ISSUE_TEMPLATE/disable-autorevert.md
									
									
									
									
										vendored
									
									
								
							| @ -1,18 +0,0 @@ | ||||
| --- | ||||
| name: DISABLE AUTOREVERT | ||||
| about: Disables autorevert when open | ||||
| title: "❌\U0001F519 [DISABLE AUTOREVERT]" | ||||
| labels: 'ci: disable-autorevert' | ||||
| assignees: '' | ||||
|  | ||||
| --- | ||||
|  | ||||
| This issue, while open, disables the autorevert functionality. | ||||
|  | ||||
| More details can be found [here](https://github.com/pytorch/test-infra/blob/main/aws/lambda/pytorch-auto-revert/README.md) | ||||
|  | ||||
|  | ||||
| ## Why are you disabling autorevert? | ||||
|  | ||||
|  | ||||
| ## Links to any issues/commits/errors that shows the source of problem | ||||
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE/disable-ci-jobs.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE/disable-ci-jobs.md
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +1,8 @@ | ||||
| --- | ||||
| name: Disable CI jobs (PyTorch Dev Infra only) | ||||
| about: Use this template to disable CI jobs | ||||
| title: DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME] | ||||
| labels: 'module: ci' | ||||
| assignees: '' | ||||
|  | ||||
| title: "DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]" | ||||
| labels: "module: ci" | ||||
| --- | ||||
|  | ||||
| > For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							| @ -22,9 +22,6 @@ self-hosted-runner: | ||||
|     - linux.arm64.m7g.4xlarge | ||||
|     - linux.arm64.m7g.4xlarge.ephemeral | ||||
|     - linux.arm64.r7g.12xlarge.memory | ||||
|     - linux.aws.h100 | ||||
|     - linux.aws.h100.4 | ||||
|     - linux.aws.h100.8 | ||||
|     - linux.4xlarge.nvidia.gpu | ||||
|     - linux.8xlarge.nvidia.gpu | ||||
|     - linux.16xlarge.nvidia.gpu | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/actions/setup-win/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/actions/setup-win/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -59,7 +59,7 @@ runs: | ||||
|         set -x | ||||
|  | ||||
|         # Create new py_tmp env with python-version | ||||
|         ${CONDA} create -y -n py_tmp python=${PYTHON_VERSION} intel-openmp libuv | ||||
|         ${CONDA} create -y -n py_tmp python=${PYTHON_VERSION} intel-openmp | ||||
|  | ||||
|         PYTHON3=$(${CONDA_RUN} -n py_tmp which python3) | ||||
|         EXIT_CODE=$? | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 78a47f87ce259a48f0391fa9ae15add05ea7432b | ||||
| 5aeb9254521023f97aca292b3478aa7ff485ffb2 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 0fc62aa26a30ed7ca419d285f285cb5ba02c4394 | ||||
| c77852e117bdf056c8e9a087e51d6f65cf6ba53d | ||||
|  | ||||
							
								
								
									
										15
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							| @ -525,21 +525,6 @@ | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: typechecking | ||||
|   patterns: | ||||
|   - 'pyrefly.toml' | ||||
|   - 'mypy.ini' | ||||
|   - 'mypy-strict.ini' | ||||
|   approved_by: | ||||
|   - lolpack | ||||
|   - maggiemoss | ||||
|   - ndmitchell | ||||
|   - kinto0 | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: superuser | ||||
|   patterns: | ||||
|   - '*' | ||||
|  | ||||
							
								
								
									
										39
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,44 +1,41 @@ | ||||
| tracking_issue: 24422 | ||||
| ciflow_tracking_issue: 64124 | ||||
| ciflow_push_tags: | ||||
| - ciflow/b200 | ||||
| - ciflow/b200-symm-mem | ||||
| - ciflow/binaries | ||||
| - ciflow/binaries_libtorch | ||||
| - ciflow/binaries_wheel | ||||
| - ciflow/h100 | ||||
| - ciflow/h100-cutlass-backend | ||||
| - ciflow/h100-distributed | ||||
| - ciflow/h100-symm-mem | ||||
| - ciflow/triton_binaries | ||||
| - ciflow/inductor | ||||
| - ciflow/inductor-cu126 | ||||
| - ciflow/inductor-micro-benchmark | ||||
| - ciflow/inductor-micro-benchmark-cpu-x86 | ||||
| - ciflow/inductor-perf-compare | ||||
| - ciflow/inductor-perf-test-nightly-rocm | ||||
| - ciflow/inductor-perf-test-nightly-x86-zen | ||||
| - ciflow/inductor-periodic | ||||
| - ciflow/inductor-rocm | ||||
| - ciflow/inductor-perf-test-nightly-rocm | ||||
| - ciflow/inductor-perf-compare | ||||
| - ciflow/inductor-micro-benchmark | ||||
| - ciflow/inductor-micro-benchmark-cpu-x86 | ||||
| - ciflow/inductor-perf-test-nightly-x86-zen | ||||
| - ciflow/inductor-cu126 | ||||
| - ciflow/linux-aarch64 | ||||
| - ciflow/mps | ||||
| - ciflow/nightly | ||||
| - ciflow/op-benchmark | ||||
| - ciflow/periodic | ||||
| - ciflow/periodic-rocm-mi300 | ||||
| - ciflow/pull | ||||
| - ciflow/quantization-periodic | ||||
| - ciflow/riscv64 | ||||
| - ciflow/rocm | ||||
| - ciflow/rocm-mi300 | ||||
| - ciflow/s390 | ||||
| - ciflow/riscv64 | ||||
| - ciflow/slow | ||||
| - ciflow/torchbench | ||||
| - ciflow/triton_binaries | ||||
| - ciflow/trunk | ||||
| - ciflow/unstable | ||||
| - ciflow/vllm | ||||
| - ciflow/win-arm64 | ||||
| - ciflow/xpu | ||||
| - ciflow/vllm | ||||
| - ciflow/torchbench | ||||
| - ciflow/op-benchmark | ||||
| - ciflow/pull | ||||
| - ciflow/h100 | ||||
| - ciflow/h100-distributed | ||||
| - ciflow/win-arm64 | ||||
| - ciflow/h100-symm-mem | ||||
| - ciflow/h100-cutlass-backend | ||||
| retryable_workflows: | ||||
| - pull | ||||
| - trunk | ||||
| @ -47,4 +44,4 @@ retryable_workflows: | ||||
| - inductor-A100-perf-nightly | ||||
| labeler_config: labeler.yml | ||||
| label_to_label_config: label_to_label.yml | ||||
| mergebot: true | ||||
| mergebot: True | ||||
|  | ||||
| @ -1,5 +1,4 @@ | ||||
| boto3==1.35.42 | ||||
| build==1.2.2.post1 | ||||
| cmake==3.27.* | ||||
| expecttest==0.3.0 | ||||
| fbscribelogger==0.1.7 | ||||
|  | ||||
| @ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = { | ||||
| } | ||||
|  | ||||
| # NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this | ||||
| ROCM_ARCHES = ["6.4", "7.0"] | ||||
| ROCM_ARCHES = ["6.3", "6.4"] | ||||
|  | ||||
| XPU_ARCHES = ["xpu"] | ||||
|  | ||||
| @ -53,7 +53,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | " | ||||
| @ -70,7 +70,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | " | ||||
| @ -87,7 +87,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx==13.0.39; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | " | ||||
|  | ||||
							
								
								
									
										93
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										93
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							| @ -127,6 +127,53 @@ LINUX_BINARY_BUILD_WORFKLOWS = [ | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| ROCM_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="manywheel", | ||||
|         build_variant="rocm", | ||||
|         build_configs=generate_binary_build_matrix.generate_wheels_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             arches=["6.4"], | ||||
|             python_versions=["3.10"], | ||||
|         ), | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             labels={ | ||||
|                 LABEL_CIFLOW_BINARIES, | ||||
|                 LABEL_CIFLOW_BINARIES_WHEEL, | ||||
|                 LABEL_CIFLOW_ROCM, | ||||
|             }, | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| LINUX_BINARY_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="manywheel", | ||||
|         build_configs=generate_binary_build_matrix.generate_wheels_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             arches=["12.8"], | ||||
|             python_versions=["3.12"], | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.RELEASE, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             generate_binary_build_matrix.RELEASE, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| WINDOWS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
| @ -212,6 +259,39 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| WINDOWS_BINARY_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.RELEASE, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.WINDOWS, | ||||
|             generate_binary_build_matrix.RELEASE, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|     ), | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.DEBUG, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.WINDOWS, | ||||
|             generate_binary_build_matrix.DEBUG, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| MACOS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.MACOS_ARM64, | ||||
| @ -292,10 +372,23 @@ def main() -> None: | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             S390X_BINARY_BUILD_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             # Give rocm it's own workflow file | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             ROCM_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             LINUX_BINARY_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("windows_binary_build_workflow.yml.j2"), | ||||
|             WINDOWS_BINARY_BUILD_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("windows_binary_build_workflow.yml.j2"), | ||||
|             WINDOWS_BINARY_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("macos_binary_build_workflow.yml.j2"), | ||||
|             MACOS_BINARY_BUILD_WORKFLOWS, | ||||
|  | ||||
| @ -71,15 +71,12 @@ jobs: | ||||
|     with:!{{ upload.binary_env_as_input(config) }} | ||||
|       {%- if "aarch64" in build_environment %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       {%- elif "s390x" in build_environment %} | ||||
|       runs_on: linux.s390x | ||||
|       ALPINE_IMAGE: "docker.io/s390x/alpine" | ||||
|       timeout-minutes: 420 | ||||
|       {%- elif config["gpu_arch_type"] == "rocm" %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       {%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.24xlarge.ephemeral | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/_docs.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_docs.yml
									
									
									
									
										vendored
									
									
								
							| @ -67,7 +67,7 @@ jobs: | ||||
|             # an OOM issue when running the job, so this upgrades the runner from 4xlarge | ||||
|             # to the next available tier of 12xlarge. So much memory just to generate cpp | ||||
|             # doc | ||||
|             runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory | ||||
|             runner: ${{ inputs.runner_prefix }}linux.12xlarge | ||||
|             # TODO: Nightly cpp docs take longer and longer to finish (more than 3h now) | ||||
|             # Let's try to figure out how this can be improved | ||||
|             timeout-minutes: 360 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							| @ -273,8 +273,6 @@ jobs: | ||||
|           TEST_CONFIG: ${{ matrix.config }} | ||||
|           SHARD_NUMBER: ${{ matrix.shard }} | ||||
|           NUM_TEST_SHARDS: ${{ matrix.num_shards }} | ||||
|           EXTRA_FLAGS: ${{ matrix.extra_flags || '' }} | ||||
|           OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }} | ||||
|           REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} | ||||
|           CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} | ||||
|           VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} | ||||
|  | ||||
							
								
								
									
										60
									
								
								.github/workflows/b200-symm-mem.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										60
									
								
								.github/workflows/b200-symm-mem.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,60 +0,0 @@ | ||||
| name: Limited CI for symmetric memory tests on B200 | ||||
|  | ||||
| on: | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - .github/workflows/b200-symm-mem.yml | ||||
|   workflow_dispatch: | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/b200-symm-mem/* | ||||
|   schedule: | ||||
|     - cron: 22 8 * * *  # about 1:22am PDT | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|  | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runner: linux.12xlarge.memory | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '10.0' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "b200-symm-mem", shard: 1, num_shards: 1, runner: "linux.dgx.b200.8" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-test: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: | ||||
|       - linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.test-matrix }} | ||||
|       aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|     secrets: inherit | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-almalinux-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-almalinux-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -36,7 +36,7 @@ jobs: | ||||
|     runs-on: linux.9xlarge.ephemeral | ||||
|     strategy: | ||||
|       matrix: | ||||
|         tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "cpu"] | ||||
|         tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"] | ||||
|     steps: | ||||
|       - name: Build docker image | ||||
|         uses: pytorch/pytorch/.github/actions/binary-docker-build@main | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-libtorch-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-libtorch-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -52,8 +52,8 @@ jobs: | ||||
|           { tag: "cuda12.9" }, | ||||
|           { tag: "cuda12.8" }, | ||||
|           { tag: "cuda12.6" }, | ||||
|           { tag: "rocm6.3"  }, | ||||
|           { tag: "rocm6.4"  }, | ||||
|           { tag: "rocm7.0"  }, | ||||
|           { tag: "cpu"      }, | ||||
|         ] | ||||
|     steps: | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-magma-rocm-linux.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-magma-rocm-linux.yml
									
									
									
									
										vendored
									
									
								
							| @ -34,7 +34,7 @@ jobs: | ||||
|       id-token: write | ||||
|     strategy: | ||||
|       matrix: | ||||
|         rocm_version: ["70", "64"] | ||||
|         rocm_version: ["64", "63"] | ||||
|     steps: | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -52,10 +52,11 @@ jobs: | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda13.0",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.8",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.6",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.3",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.4",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm7.0",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cpu",               runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28_aarch64-builder",  tag: "cpu-aarch64",       runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxcxx11-abi-builder",     tag: "cpu-cxx11-abi",     runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "xpu",               runner: "linux.9xlarge.ephemeral" }, | ||||
|         ] | ||||
|     runs-on: ${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }} | ||||
|  | ||||
							
								
								
									
										9
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							| @ -50,12 +50,12 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         py_vers: [ "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ] | ||||
|         py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ] | ||||
|         device: ["cuda", "rocm", "xpu", "aarch64"] | ||||
|         docker-image: ["pytorch/manylinux2_28-builder:cpu"] | ||||
|         include: | ||||
|           - device: "rocm" | ||||
|             rocm_version: "7.0" | ||||
|             rocm_version: "6.4" | ||||
|             runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" | ||||
|           - device: "cuda" | ||||
|             rocm_version: "" | ||||
| @ -108,6 +108,9 @@ jobs: | ||||
|  | ||||
|           # Determine python executable for given version | ||||
|           case $PY_VERS in | ||||
|           3.9) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python | ||||
|             ;; | ||||
|           3.10) | ||||
|             PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python | ||||
|             ;; | ||||
| @ -191,7 +194,7 @@ jobs: | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         py_vers: [ "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ] | ||||
|         py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ] | ||||
|         device: ["xpu"] | ||||
|     timeout-minutes: 40 | ||||
|     env: | ||||
|  | ||||
							
								
								
									
										59
									
								
								.github/workflows/create_release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										59
									
								
								.github/workflows/create_release.yml
									
									
									
									
										vendored
									
									
								
							| @ -35,7 +35,6 @@ jobs: | ||||
|       contents: write | ||||
|     outputs: | ||||
|       pt_release_name: ${{ steps.release_name.outputs.pt_release_name }} | ||||
|       pt_pep517_release_name: ${{ steps.release_name.outputs.pt_pep517_release_name }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||||
|         with: | ||||
| @ -54,12 +53,8 @@ jobs: | ||||
|           tag_or_branch="${tag_or_branch#refs/heads/}" | ||||
|           # replace directory separators with _ in branch name | ||||
|           tag_or_branch="${tag_or_branch//\//_}" | ||||
|           torch_version="$(python -c 'from tools.generate_torch_version import get_torch_version; print(get_torch_version())')" | ||||
|           { | ||||
|             echo "PT_RELEASE_NAME=pytorch-$tag_or_branch"; | ||||
|             echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz"; | ||||
|             echo "PT_PEP517_RELEASE_FILE=torch-${torch_version}.tar.gz"; | ||||
|           } >> "$GITHUB_ENV" | ||||
|           echo "PT_RELEASE_NAME=pytorch-$tag_or_branch" >> "$GITHUB_ENV" | ||||
|           echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz" >> "$GITHUB_ENV" | ||||
|       - name: Checkout optional submodules | ||||
|         run: python3 tools/optional_submodules.py | ||||
|       - name: Copy docs requirements for inclusion | ||||
| @ -69,47 +64,30 @@ jobs: | ||||
|           cp .ci/docker/requirements-docs.txt docs/requirements.txt | ||||
|       - name: Create source distribution | ||||
|         run: | | ||||
|           # Create new folder with specified name so extracting the archive yields that | ||||
|           rm -rf "/tmp/$PT_RELEASE_NAME" | ||||
|           cp -r "$PWD" "/tmp/$PT_RELEASE_NAME" | ||||
|           mv "/tmp/$PT_RELEASE_NAME" . | ||||
|           # Cleanup | ||||
|           rm -rf "$PT_RELEASE_NAME"/{.circleci,.ci} | ||||
|           find "$PT_RELEASE_NAME" -name '.git*' -exec rm -rv {} \; || true | ||||
|           # Create archive | ||||
|           tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME" | ||||
|           echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")" | ||||
|       - name: Create PEP 517 compatible source distribution | ||||
|         run: | | ||||
|           pip install build==1.2.2.post1 || exit 1 | ||||
|           python -m build --sdist || exit 1 | ||||
|           cd dist || exit 1 | ||||
|             # Create new folder with specified name so extracting the archive yields that | ||||
|             rm -rf "/tmp/$PT_RELEASE_NAME" | ||||
|             cp -r "$PWD" "/tmp/$PT_RELEASE_NAME" | ||||
|             mv "/tmp/$PT_RELEASE_NAME" . | ||||
|             # Cleanup | ||||
|             rm -rf "$PT_RELEASE_NAME"/{.circleci,.ci} | ||||
|             find "$PT_RELEASE_NAME" -name '.git*' -exec rm -rv {} \; || true | ||||
|             # Create archive | ||||
|             tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME" | ||||
|             echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")" | ||||
|       - name: Upload source distribution for release | ||||
|         if: ${{ github.event_name == 'release' }} | ||||
|         uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # v2.2.2 | ||||
|         with: | ||||
|           files: | | ||||
|             ${{ env.PT_RELEASE_FILE }} | ||||
|             ${{ env.PT_PEP517_RELEASE_FILE }} | ||||
|       - name: Upload source distribution to GHA artifacts  # for release tags | ||||
|           files: ${{env.PT_RELEASE_FILE}} | ||||
|       - name: Upload source distribution to GHA artifacts for release tags | ||||
|         if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }} | ||||
|         uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 | ||||
|         with: | ||||
|           name: ${{ env.PT_RELEASE_FILE }} | ||||
|           path: ${{ env.PT_RELEASE_FILE }} | ||||
|       - name: Upload PEP 517 source distribution to GHA artifacts  # for release tags | ||||
|         if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }} | ||||
|         uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 | ||||
|         with: | ||||
|           name: ${{ env.PT_PEP517_RELEASE_FILE }} | ||||
|           path: dist/${{ env.PT_PEP517_RELEASE_FILE }} | ||||
|       - name: Set output | ||||
|         id: release_name | ||||
|         run: | | ||||
|           { | ||||
|             echo "pt_release_name=${{ env.PT_RELEASE_FILE }}"; | ||||
|             echo "pt_pep517_release_name=${{ env.PT_PEP517_RELEASE_FILE }}"; | ||||
|           } >> "${GITHUB_OUTPUT}" | ||||
|         run: echo "pt_release_name=${{ env.PT_RELEASE_NAME }}.tar.gz" >> "${GITHUB_OUTPUT}" | ||||
|  | ||||
|   upload_source_code_to_s3: | ||||
|     if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }} | ||||
| @ -125,9 +103,6 @@ jobs: | ||||
|       - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 | ||||
|         with: | ||||
|           name: ${{ needs.release.outputs.pt_release_name }} | ||||
|       - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 | ||||
|         with: | ||||
|           name: ${{ needs.release.outputs.pt_pep517_release_name }} | ||||
|       - name: Configure AWS credentials(PyTorch account) | ||||
|         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | ||||
|         with: | ||||
| @ -138,9 +113,7 @@ jobs: | ||||
|           s3-bucket: pytorch | ||||
|           s3-prefix: source_code/test | ||||
|           if-no-files-found: warn | ||||
|           path: | | ||||
|             ${{ needs.release.outputs.pt_release_name }} | ||||
|             ${{ needs.release.outputs.pt_pep517_release_name }} | ||||
|           path: ${{ needs.release.outputs.pt_release_name }} | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }} | ||||
|  | ||||
							
								
								
									
										1
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							| @ -59,6 +59,7 @@ jobs: | ||||
|           pytorch-linux-jammy-py3.13-clang12, | ||||
|           pytorch-linux-jammy-rocm-n-py3, | ||||
|           pytorch-linux-noble-rocm-n-py3, | ||||
|           pytorch-linux-noble-rocm-alpha-py3, | ||||
|           pytorch-linux-jammy-rocm-n-py3-benchmarks, | ||||
|           pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12, | ||||
|           pytorch-linux-jammy-py3.10-gcc11, | ||||
|  | ||||
							
								
								
									
										98
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										98
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -62,7 +62,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -128,11 +128,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -174,11 +174,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -220,11 +220,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -265,7 +265,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -331,11 +331,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -377,11 +377,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -423,11 +423,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.11" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -468,7 +468,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -534,11 +534,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -580,11 +580,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -626,11 +626,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -671,7 +671,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -737,11 +737,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -783,11 +783,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -829,11 +829,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.13" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -874,7 +874,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -940,11 +940,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -986,11 +986,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1032,11 +1032,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.13t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1077,7 +1077,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -1143,11 +1143,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1189,11 +1189,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1235,11 +1235,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.14" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1280,7 +1280,7 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cpu-aarch64 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
| @ -1346,11 +1346,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.6 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_6 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1392,11 +1392,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-12_8 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1438,11 +1438,11 @@ jobs: | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.14t" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.arm64.r7g.12xlarge.memory | ||||
|       runs_on: linux.arm64.m7g.4xlarge.ephemeral | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										230
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										230
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -316,6 +316,120 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm6_3-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: libtorch-rocm6_3-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-rocm6_3-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-rocm6_3-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-rocm6_3-shared-with-deps-release | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: libtorch-cxx11-builder | ||||
|           custom-tag-prefix: rocm6.3 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
|   libtorch-rocm6_3-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-rocm6_3-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-rocm6_3-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm6_4-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -333,7 +447,6 @@ jobs: | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       build_name: libtorch-rocm6_4-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
| @ -430,118 +543,3 @@ jobs: | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm7_0-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       build_name: libtorch-rocm7_0-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-rocm7_0-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-rocm7_0-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-rocm7_0-shared-with-deps-release | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: libtorch-cxx11-builder | ||||
|           custom-tag-prefix: rocm7.0 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
|   libtorch-rocm7_0-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-rocm7_0-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-rocm7_0-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
							
								
								
									
										87
									
								
								.github/workflows/generated-linux-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								.github/workflows/generated-linux-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,87 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-libtorch-release | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/trunk/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-libtorch-release | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-cpu-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch-release | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.4xlarge | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
							
								
								
									
										88
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-manywheel | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/trunk/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-manywheel | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-manywheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   manywheel-py3_12-cuda12_8-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda12_8-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_12-cuda12_8-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu128 | ||||
|       GPU_ARCH_VERSION: "12.8" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda12.8 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda12_8 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
							
								
								
									
										1610
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1610
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										135
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,135 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-manywheel-rocm | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/binaries/*' | ||||
|       - 'ciflow/binaries_wheel/*' | ||||
|       - 'ciflow/rocm/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-manywheel-rocm | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-manywheel-rocm-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   manywheel-py3_10-rocm6_4-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.4 | ||||
|       GPU_ARCH_VERSION: "6.4" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.4 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_10-rocm6_4 | ||||
|       build_environment: linux-binary-manywheel-rocm | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-rocm6_4-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_10-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.4 | ||||
|       GPU_ARCH_VERSION: "6.4" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.4 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: manywheel-py3_10-rocm6_4 | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: manylinux2_28-builder | ||||
|           custom-tag-prefix: rocm6.4 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
							
								
								
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,261 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/windows_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: windows-binary-libtorch-debug | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|   workflow_dispatch: | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BUILD_ENVIRONMENT: windows-binary-libtorch-debug | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 1 | ||||
|   OS: windows | ||||
| concurrency: | ||||
|   group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-debug-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-debug | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cpu-shared-with-deps-debug-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-debug-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-debug | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
							
								
								
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,261 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/windows_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: windows-binary-libtorch-release | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|   workflow_dispatch: | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BUILD_ENVIRONMENT: windows-binary-libtorch-release | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 1 | ||||
|   OS: windows | ||||
| concurrency: | ||||
|   group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-release | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cpu-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-release | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
							
								
								
									
										46
									
								
								.github/workflows/operator_microbenchmark.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										46
									
								
								.github/workflows/operator_microbenchmark.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,46 +0,0 @@ | ||||
| name: operator_microbenchmark | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/op-benchmark/* | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     # Run at 06:00 UTC everyday | ||||
|     - cron: 0 6 * * * | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|   opmicrobenchmark-build: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: opmicrobenchmark-build | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     with: | ||||
|       runner: linux.12xlarge.memory | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '8.0 9.0' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.h100" }, | ||||
|           { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.a100" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   opmicrobenchmark-test: | ||||
|     name: opmicrobenchmark-test | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: opmicrobenchmark-build | ||||
|     with: | ||||
|       timeout-minutes: 500 | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 | ||||
|       docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
							
								
								
									
										29
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							| @ -59,14 +59,13 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.4-py3.10-gcc11 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: 7.5 | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
| @ -113,13 +112,13 @@ jobs: | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-build: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|   linux-jammy-cuda12_8-py3_9-gcc9-build: | ||||
|     name: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|       build-environment: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9 | ||||
|       cuda-arch-list: 8.6 | ||||
|       test-matrix: | | ||||
| @ -129,14 +128,14 @@ jobs: | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-test: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|   linux-jammy-cuda12_8-py3_9-gcc9-test: | ||||
|     name: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: linux-jammy-cuda12_8-py3_10-gcc9-build | ||||
|     needs: linux-jammy-cuda12_8-py3_9-gcc9-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.test-matrix }} | ||||
|       build-environment: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_9-gcc9-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_9-gcc9-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-debug-build: | ||||
|  | ||||
							
								
								
									
										8
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							| @ -127,8 +127,6 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       # More memory is needed to build with asan | ||||
|       runner: linux.2xlarge.memory | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-clang18-asan | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan | ||||
| @ -343,14 +341,14 @@ jobs: | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-xpu-n-py3_10-build: | ||||
|     name: linux-jammy-xpu-n-py3.10 | ||||
|   linux-jammy-xpu-n-py3_9-build: | ||||
|     name: linux-jammy-xpu-n-py3.9 | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       sync-tag: linux-xpu-n-build | ||||
|       runner_prefix: ${{ needs.get-label-type.outputs.label-type }} | ||||
|       build-environment: linux-jammy-xpu-n-py3.10 | ||||
|       build-environment: linux-jammy-xpu-n-py3.9 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3 | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|  | ||||
							
								
								
									
										54
									
								
								.github/workflows/quantization-periodic.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										54
									
								
								.github/workflows/quantization-periodic.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,54 +0,0 @@ | ||||
| name: quantization-periodic | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/quantization-periodic/* | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     # run weekly | ||||
|     - cron: "45 0 * * 0" | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|   get-default-label-prefix: | ||||
|     name: get-default-label-prefix | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|       opt_out_experiments: lf | ||||
|  | ||||
|   periodic-quantization-build: | ||||
|     name: periodic-quantization-build | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-default-label-prefix | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '8.9' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "quantization", shard: 1, num_shards: 1, runner: "${{ needs.get-default-label-prefix.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|   periodic-test-quantization: | ||||
|     name: periodic-test-quantization | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: periodic-quantization-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       docker-image: ${{ needs.periodic-quantization-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.periodic-quantization-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
							
								
								
									
										2
									
								
								.github/workflows/rocm-mi355.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/rocm-mi355.yml
									
									
									
									
										vendored
									
									
								
							| @ -38,7 +38,7 @@ jobs: | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-noble-rocm-py3.12-mi355 | ||||
|       docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3 | ||||
|       docker-image-name: ci-image:pytorch-linux-noble-rocm-alpha-py3 | ||||
|       sync-tag: rocm-build | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/slow.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/slow.yml
									
									
									
									
										vendored
									
									
								
							| @ -140,8 +140,6 @@ jobs: | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       # More memory is needed to build with asan | ||||
|       runner: linux.2xlarge.memory | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-py3.10-clang18-asan | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan | ||||
|  | ||||
							
								
								
									
										76
									
								
								.github/workflows/test-b200.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										76
									
								
								.github/workflows/test-b200.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,76 +0,0 @@ | ||||
| # B200 Smoke Tests CI Workflow | ||||
| # | ||||
| # This workflow runs smoke tests on B200 hardware | ||||
| # | ||||
| # Flow: | ||||
| # 1. Builds PyTorch with CUDA 12.8+ and sm100 architecture for B200 | ||||
| # 2. Runs smoke tests on linux.dgx.b200 runner | ||||
| # 3. Tests executed are defined in .ci/pytorch/test.sh -> test_python_smoke() function | ||||
| # | ||||
| # Triggered by: | ||||
| # - Pull requests modifying this workflow file | ||||
| # - Manual dispatch | ||||
| # - Schedule (every 6 hours) | ||||
| # - Adding ciflow/b200 label to a PR (creates ciflow/b200/* tag) | ||||
|  | ||||
| name: B200 Smoke Tests | ||||
|  | ||||
| on: | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - .github/workflows/test-b200.yml | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     - cron: 0 4,10,16,22 * * *  # every 6 hours | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/b200/* | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|  | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-build: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100 | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runner: linux.12xlarge.memory | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '10.0' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "smoke_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" }, | ||||
|         ]} | ||||
|       # config: "smoke_b200" maps to test_python_smoke_b200() in .ci/pytorch/test.sh | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-test: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100 | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: | ||||
|       - linux-jammy-cuda12_8-py3_10-gcc11-sm100-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build.outputs.test-matrix }} | ||||
|       aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|     secrets: inherit | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -82,7 +82,6 @@ torch/return_types.pyi | ||||
| torch/nn/functional.pyi | ||||
| torch/utils/data/datapipes/datapipe.pyi | ||||
| torch/csrc/autograd/generated/* | ||||
| torch/csrc/functionalization/generated/* | ||||
| torch/csrc/lazy/generated/*.[!m]* | ||||
| torch_compile_debug/ | ||||
| # Listed manually because some files in this directory are not generated | ||||
|  | ||||
| @ -49,7 +49,7 @@ init_command = [ | ||||
|     'mccabe==0.7.0', | ||||
|     'pycodestyle==2.14.0', | ||||
|     'pyflakes==3.4.0', | ||||
|     'torchfix==0.4.0 ; python_version >= "3.10" and python_version < "3.13"', | ||||
|     'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"', | ||||
| ] | ||||
|  | ||||
|  | ||||
| @ -153,7 +153,7 @@ init_command = [ | ||||
|     'python3', | ||||
|     'tools/linter/adapters/pip_init.py', | ||||
|     '--dry-run={{DRYRUN}}', | ||||
|     'numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11"', | ||||
|     'numpy==1.26.4 ; python_version >= "3.9" and python_version <= "3.11"', | ||||
|     'numpy==2.1.0 ; python_version >= "3.12"', | ||||
|     'expecttest==0.3.0', | ||||
|     'mypy==1.16.0', | ||||
| @ -196,7 +196,6 @@ exclude_patterns = [ | ||||
|     'tools/test/gen_operators_yaml_test.py', | ||||
|     'tools/test/gen_oplist_test.py', | ||||
|     'tools/test/test_selective_build.py', | ||||
|     'tools/experimental/dynamic_shapes/torchfuzz/**', | ||||
| ] | ||||
| command = [ | ||||
|     'python3', | ||||
| @ -1453,7 +1452,7 @@ init_command = [ | ||||
|     '--dry-run={{DRYRUN}}', | ||||
|     'usort==1.0.8.post1', | ||||
|     'isort==6.0.1', | ||||
|     'ruff==0.13.1',  # sync with RUFF | ||||
|     'ruff==0.12.9',  # sync with RUFF | ||||
| ] | ||||
| is_formatter = true | ||||
|  | ||||
| @ -1587,7 +1586,7 @@ init_command = [ | ||||
|     'python3', | ||||
|     'tools/linter/adapters/pip_init.py', | ||||
|     '--dry-run={{DRYRUN}}', | ||||
|     'ruff==0.13.1',  # sync with PYFMT | ||||
|     'ruff==0.12.9',  # sync with PYFMT | ||||
| ] | ||||
| is_formatter = true | ||||
|  | ||||
|  | ||||
							
								
								
									
										37
									
								
								BUILD.bazel
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								BUILD.bazel
									
									
									
									
									
								
							| @ -22,7 +22,6 @@ COMMON_COPTS = [ | ||||
|     "-DHAVE_SHM_UNLINK=1", | ||||
|     "-D_FILE_OFFSET_BITS=64", | ||||
|     "-DUSE_FBGEMM", | ||||
|     "-DUSE_DISTRIBUTED", | ||||
|     "-DAT_PER_OPERATOR_HEADERS", | ||||
|     "-DATEN_THREADING=NATIVE", | ||||
|     "-DNO_CUDNN_DESTROY_HANDLE", | ||||
| @ -91,8 +90,6 @@ generated_cpu_cpp = [ | ||||
|     "aten/src/ATen/NativeMetaFunctions.h", | ||||
|     "aten/src/ATen/RegistrationDeclarations.h", | ||||
|     "aten/src/ATen/VmapGeneratedPlumbing.h", | ||||
|     "aten/src/ATen/ViewMetaClasses.h", | ||||
|     "aten/src/ATen/ViewMetaClasses.cpp", | ||||
|     "aten/src/ATen/core/aten_interned_strings.h", | ||||
|     "aten/src/ATen/core/enum_tag.h", | ||||
|     "aten/src/ATen/core/TensorBody.h", | ||||
| @ -813,7 +810,7 @@ cc_library( | ||||
|     name = "torch_python", | ||||
|     srcs = libtorch_python_core_sources | ||||
|         + if_cuda(libtorch_python_cuda_sources) | ||||
|         + if_cuda(libtorch_python_distributed_sources) | ||||
|         + libtorch_python_distributed_sources | ||||
|         + GENERATED_AUTOGRAD_PYTHON, | ||||
|     hdrs = glob([ | ||||
|         "torch/csrc/generic/*.cpp", | ||||
| @ -835,6 +832,36 @@ pybind_extension( | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| cc_library( | ||||
|     name = "functorch", | ||||
|     hdrs = glob([ | ||||
|         "functorch/csrc/dim/*.h", | ||||
|     ]), | ||||
|     srcs = glob([ | ||||
|         "functorch/csrc/dim/*.cpp", | ||||
|     ]), | ||||
|     deps = [ | ||||
|         ":aten_nvrtc", | ||||
|         ":torch_python", | ||||
|         "@pybind11", | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| pybind_extension( | ||||
|     name = "functorch/_C", | ||||
|     copts=[ | ||||
|         "-DTORCH_EXTENSION_NAME=_C" | ||||
|     ], | ||||
|     srcs = [ | ||||
|         "functorch/csrc/init_dim_only.cpp", | ||||
|     ], | ||||
|     deps = [ | ||||
|         ":functorch", | ||||
|         ":torch_python", | ||||
|         ":aten_nvrtc", | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| cc_binary( | ||||
|     name = "torch/bin/torch_shm_manager", | ||||
|     srcs = [ | ||||
| @ -875,6 +902,7 @@ py_library( | ||||
|     ], | ||||
|     data = [ | ||||
|         ":torch/_C.so", | ||||
|         ":functorch/_C.so", | ||||
|         ":torch/bin/torch_shm_manager", | ||||
|     ], | ||||
| ) | ||||
| @ -1077,7 +1105,6 @@ test_suite( | ||||
|         "aten/src/ATen/templates/LazyNonNativeIr.h", | ||||
|         "aten/src/ATen/templates/RegisterDispatchKey.cpp", | ||||
|         "aten/src/ATen/templates/RegisterDispatchDefinitions.ini", | ||||
|         "aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp", | ||||
|         "aten/src/ATen/native/native_functions.yaml", | ||||
|         "aten/src/ATen/native/tags.yaml", | ||||
|         "aten/src/ATen/native/ts_native_functions.yaml", | ||||
|  | ||||
| @ -180,8 +180,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)") | ||||
|   set(CPU_POWER ON) | ||||
| endif() | ||||
|  | ||||
| # For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not | ||||
| # tested and likely won't work without additional changes. | ||||
| # For non-supported platforms, turn USE_DISTRIBUTED off by default. | ||||
| # NB: USE_DISTRIBUTED simply disables the backend; distributed code | ||||
| # still gets built | ||||
| if(NOT LINUX AND NOT WIN32) | ||||
|   set(USE_DISTRIBUTED | ||||
|       OFF | ||||
| @ -261,11 +262,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) | ||||
| option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) | ||||
| option(USE_NATIVE_ARCH "Use -march=native" OFF) | ||||
| cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) | ||||
| option(USE_DISTRIBUTED "Use distributed" ON) | ||||
| option(USE_DISTRIBUTED "Enable default distributed backends" ON) | ||||
| cmake_dependent_option(USE_NCCL "Use NCCL" ON | ||||
|                        "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) | ||||
| cmake_dependent_option(USE_XCCL "Use XCCL" ON | ||||
|                        "USE_XPU;UNIX;NOT APPLE" OFF) | ||||
|                        "USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF) | ||||
| cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) | ||||
| cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF) | ||||
| cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) | ||||
| @ -437,12 +438,11 @@ if(WIN32) | ||||
|       PATH_SUFFIXES lib | ||||
|       NO_DEFAULT_PATH) | ||||
|     if(NOT libuv_tmp_LIBRARY) | ||||
|       set(USE_DISTRIBUTED OFF) | ||||
|       set(USE_GLOO OFF) | ||||
|       message( | ||||
|         WARNING | ||||
|           "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " | ||||
|           "Please run command 'conda install -c conda-forge libuv=1.51' to install libuv." | ||||
|           "Libuv is not installed in current conda env. Set USE_GLOO to OFF. " | ||||
|           "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." | ||||
|       ) | ||||
|     else() | ||||
|       set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../) | ||||
| @ -888,28 +888,23 @@ cmake_dependent_option( | ||||
|   "(USE_CUDA AND NOT MSVC) OR USE_ROCM" | ||||
|   OFF) | ||||
|  | ||||
|  | ||||
| IF(USE_ROCM AND "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Setting USE_FBGEMM_GENAI for gfx942 to ON by default, doing ROCM build") | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT ON) | ||||
| elseif(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8 AND NOT WIN32) | ||||
|   message(STATUS "Setting USE_FBGEMM_GENAI to ON by default , doing CUDA build for SM100a") | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT ON) | ||||
| else() | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT OFF) | ||||
| endif() | ||||
|  | ||||
| cmake_dependent_option( | ||||
|   USE_FBGEMM_GENAI | ||||
|   "Whether to build FBGEMM GenAI quantized GEMM kernels.\ | ||||
|   Will be disabled if not supported by the platform" | ||||
|   ${USE_FBGEMM_GENAI_DEFAULT} | ||||
|   "(USE_CUDA AND NOT MSVC) OR USE_ROCM" | ||||
|   ON | ||||
|   "USE_ROCM" | ||||
|   OFF) | ||||
|  | ||||
| IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF") | ||||
|   set(USE_FBGEMM_GENAI off) | ||||
| endif() | ||||
|  | ||||
| # Set USE_FBGEMM_GENAI to ON for CUDA build on SM100. | ||||
| if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8 AND NOT WIN32) | ||||
|   message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a") | ||||
|   set(USE_FBGEMM_GENAI ON) | ||||
| endif() | ||||
|  | ||||
| # CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem | ||||
| @ -1395,6 +1390,10 @@ endif() | ||||
| include(cmake/Summary.cmake) | ||||
| caffe2_print_configuration_summary() | ||||
|  | ||||
| if(BUILD_FUNCTORCH) | ||||
|   add_subdirectory(functorch) | ||||
| endif() | ||||
|  | ||||
| # Parse custom debug info | ||||
| if(DEFINED USE_CUSTOM_DEBINFO) | ||||
|   string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}") | ||||
|  | ||||
							
								
								
									
										105
									
								
								MANIFEST.in
									
									
									
									
									
								
							
							
						
						
									
										105
									
								
								MANIFEST.in
									
									
									
									
									
								
							| @ -1,61 +1,20 @@ | ||||
| # Reference: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html | ||||
|  | ||||
| # Include individual top-level files | ||||
| include CITATION.cff | ||||
| include CODEOWNERS | ||||
| include Dockerfile | ||||
| include LICENSE | ||||
| include MANIFEST.in | ||||
| include Makefile | ||||
| include NOTICE | ||||
| include .bc-linter.yml | ||||
| include .clang-format .clang-tidy | ||||
| include .cmakelintrc | ||||
| include .coveragerc | ||||
| include .dockerignore | ||||
| include .editorconfig | ||||
| include .flake8 | ||||
| include .gdbinit | ||||
| include .lintrunner.toml | ||||
| include .lldbinit | ||||
| include codex_setup.sh | ||||
| include docker.Makefile | ||||
| include pyrefly.toml | ||||
| include ubsan.supp | ||||
|  | ||||
| # Include bazel and BUCK related files | ||||
| include BUILD.bazel BUCK.oss | ||||
| include WORKSPACE | ||||
| include *.bzl | ||||
| include .bazelignore .bazelrc .bazelversion | ||||
|  | ||||
| # Include general configuration files | ||||
| include *.ini | ||||
| # Include important top-level information | ||||
| include *.md | ||||
| # Include technical text files at the moment, comprises | ||||
| # version.txt, CMakeLists.txt, requirements.txt | ||||
| include *.txt | ||||
|  | ||||
| # Include ctags configuration | ||||
| include .ctags.d/*.ctags | ||||
|  | ||||
| # Include subfolders completely | ||||
| graft .devcontainer | ||||
| graft .vscode | ||||
| # Include source files in SDist | ||||
| include CMakeLists.txt | ||||
| include *.bzl *.bazel .bazel* BUILD *.BUILD BUILD.* WORKSPACE | ||||
| include BUCK BUCK.* | ||||
| include requirements*.txt | ||||
| include version.txt | ||||
| include [Mm]akefile *.[Mm]akefile [Mm]akefile.* | ||||
| include [Dd]ockerfile *.[Dd]ockerfile [Dd]ockerfile.* .dockerignore | ||||
| graft android | ||||
| graft aten | ||||
| graft benchmarks | ||||
| graft binaries | ||||
| graft c10 | ||||
| graft caffe2 | ||||
| graft cmake | ||||
| graft docs | ||||
| graft functorch | ||||
| graft ios | ||||
| graft mypy_plugins | ||||
| graft scripts | ||||
| graft test | ||||
| graft third_party | ||||
| graft tools | ||||
| graft torch | ||||
| @ -63,37 +22,29 @@ graft torchgen | ||||
| # FIXME: torch-xla build during codegen will fail if include this file in wheel | ||||
| exclude torchgen/BUILD.bazel | ||||
|  | ||||
| # The following exclusions omit parts from third-party dependencies that | ||||
| # contain invalid symlinks[1] and that are not needed for pytorch, such as | ||||
| # bindings for unused languages | ||||
| prune third_party/flatbuffers/java | ||||
| prune third_party/flatbuffers/kotlin | ||||
| prune third_party/ittapi/rust | ||||
| prune third_party/nccl/pkg/debian | ||||
| prune third_party/opentelemetry-cpp/third_party/prometheus-cpp/cmake/project-import-* | ||||
|  | ||||
| # The following document is also an invalid symlink[1] and superfluous | ||||
| exclude third_party/flatbuffers/docs/source/CONTRIBUTING.md | ||||
|  | ||||
| # Omit autogenerated code | ||||
| prune torchgen/packaged | ||||
|  | ||||
| # Omit caches, compiled, and scm related content | ||||
| prune */__pycache__ | ||||
| prune **/.github | ||||
| prune **/.gitlab | ||||
| global-exclude *.o *.obj *.so *.dylib *.a *.pxd *.dll *.lib | ||||
| global-exclude *.py[cod] *.swp *~ | ||||
| global-exclude .git .git-blame-ignore-revs .gitattributes .gitignore .gitmodules | ||||
| global-exclude .gitlab-ci.yml | ||||
| # Misc files and directories in SDist | ||||
| include *.md | ||||
| include CITATION.cff | ||||
| include LICENSE NOTICE | ||||
| include mypy*.ini | ||||
| graft benchmarks | ||||
| graft docs | ||||
| graft mypy_plugins | ||||
| graft scripts | ||||
|  | ||||
| # Misc files needed for custom setuptools command | ||||
| include .gitignore | ||||
| include .gitmodules | ||||
|  | ||||
| # [1] Invalid symlinks for the purposes of Python source distributions are, | ||||
| # according to the source distribution format[2] links pointing outside the | ||||
| # destination directory or links with a `..` component, which is those of | ||||
| # concern here. | ||||
| # Include test suites in SDist | ||||
| graft test | ||||
| include pytest.ini | ||||
| include .coveragerc | ||||
|  | ||||
| # [2] https://packaging.python.org/en/latest/specifications/source-distribution-format/#source-distribution-archive-features | ||||
| # Prune generated/compiled files | ||||
| prune torchgen/packaged | ||||
| prune */__pycache__ | ||||
| global-exclude *.o *.obj *.so *.a *.dylib *.pxd *.dll *.lib *.py[cod] | ||||
|  | ||||
| prune */.git | ||||
| global-exclude .git *~ *.swp | ||||
|  | ||||
| @ -161,7 +161,7 @@ They require JetPack 4.2 and above, and [@dusty-nv](https://github.com/dusty-nv) | ||||
|  | ||||
| #### Prerequisites | ||||
| If you are installing from source, you will need: | ||||
| - Python 3.10 or later | ||||
| - Python 3.9 or later | ||||
| - A compiler that fully supports C++17, such as clang or gcc (gcc 9.4.0 or newer is required, on Linux) | ||||
| - Visual Studio or Visual Studio Build Tool (Windows only) | ||||
|  | ||||
| @ -275,7 +275,7 @@ conda install pkg-config libuv | ||||
| pip install mkl-static mkl-include | ||||
| # Add these packages if torch.distributed is needed. | ||||
| # Distributed package support on Windows is a prototype feature and is subject to changes. | ||||
| conda install -c conda-forge libuv=1.51 | ||||
| conda install -c conda-forge libuv | ||||
| ``` | ||||
|  | ||||
| #### Install PyTorch | ||||
|  | ||||
| @ -317,20 +317,10 @@ IF(USE_FBGEMM_GENAI) | ||||
|         -greedy-reverse-local-assignment=1 | ||||
|         -fhip-new-launch-api) | ||||
|  | ||||
|       # Only compile for gfx942 for now. | ||||
|       # This is rather hacky, I could not figure out a clean solution :( | ||||
|       set(HIP_CLANG_FLAGS_ORIGINAL ${HIP_CLANG_FLAGS}) | ||||
|       string(REGEX REPLACE "--offload-arch=[^ ]*" "" FILTERED_HIP_CLANG_FLAGS "${HIP_CLANG_FLAGS}") | ||||
|       if("gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|         list(APPEND FILTERED_HIP_CLANG_FLAGS --offload-arch=gfx942;) | ||||
|       endif() | ||||
|       set(HIP_CLANG_FLAGS ${FILTERED_HIP_CLANG_FLAGS}) | ||||
|  | ||||
|       hip_add_library( | ||||
|         fbgemm_genai STATIC | ||||
|         ${fbgemm_genai_native_rocm_hip} | ||||
|         HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS}) | ||||
|       set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS_ORIGINAL}) | ||||
|       set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
|       target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES) | ||||
|  | ||||
|  | ||||
| @ -468,7 +468,7 @@ inline Tensor _sum_to( | ||||
|       // if we assume no reduction due to unbacked we ensure that at runtime. | ||||
|       TORCH_MAYBE_SYM_CHECK( | ||||
|           sym_eq(shape[i - leading_dims], sizes[i]), | ||||
|           "non-reduction path was assumed due to unbacked symbols expected those two sizes to be the same:", | ||||
|           "non-reduction path was assumed due to unabcked symbols expected those two sizes to be the same:", | ||||
|           shape[i - leading_dims], | ||||
|           ", ", | ||||
|           sizes[i]) | ||||
|  | ||||
| @ -9,6 +9,11 @@ | ||||
|  | ||||
| namespace at::functionalization { | ||||
|  | ||||
| ViewMeta ViewMeta::to_out_idx(int64_t out_idx) { | ||||
|   if (out_idx == this->out_index) return *this; | ||||
|   return ViewMeta(forward_fn, reverse_fn, has_symbolic_inputs, is_multi_output, is_as_strided, out_idx); | ||||
| } | ||||
|  | ||||
| // Note [Functionalization: Alias Removal Part 2] | ||||
| // See Note [Functionalization: Alias Removal] for more details. | ||||
| // This function applies a single update from one of the views to the StorageImpl. | ||||
| @ -37,12 +42,12 @@ namespace at::functionalization { | ||||
| static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) { | ||||
|   at::Tensor t = update.new_val; | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t)); | ||||
|   if (update.view_metas.empty()) { return t; } | ||||
|   if (update.view_metas.empty()) return t; | ||||
|  | ||||
|   std::vector<at::Tensor> tmp_values({base}); | ||||
|   tmp_values.reserve(update.view_metas.size()); | ||||
|   for (size_t i = 0; i < update.view_metas.size() - 1; ++i) { | ||||
|     at::Tensor next_view = update.view_metas[i]->forward(tmp_values.back()); | ||||
|     at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index); | ||||
|     // NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided | ||||
|     // All of these ops require additional information to recover the sizes of the original tensor. | ||||
|     // If need to, we could probably apply this optimization and only bother computing tmp_values | ||||
| @ -50,8 +55,9 @@ static const Tensor apply_update(const FunctionalStorageImpl::Update& update, co | ||||
|     tmp_values.push_back(std::move(next_view)); | ||||
|   } | ||||
|   for(int64_t i = static_cast<int64_t>(update.view_metas.size()) - 1; i >= 0; --i) { | ||||
|     int64_t out_idx = update.view_metas[i].out_index; | ||||
|     // Each view inverse is implemented in ViewInverses.cpp. | ||||
|     t = update.view_metas[i]->reverse(tmp_values[i], t); | ||||
|     t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx); | ||||
|   } | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t)); | ||||
|   return t; | ||||
| @ -105,13 +111,13 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base) | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_)); | ||||
| } | ||||
|  | ||||
| void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<std::shared_ptr<ViewMeta>>& metas) { | ||||
| void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<ViewMeta>& metas) { | ||||
|   TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage"); | ||||
|  | ||||
|   if (metas.size() > 1) { | ||||
|     for (size_t i = 1; i < metas.size(); ++i) { | ||||
|       // Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI | ||||
|       TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i]->is_as_strided, | ||||
|       TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i].is_as_strided, | ||||
| "During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i, | ||||
| " was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today," | ||||
| "so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you " | ||||
|  | ||||
| @ -8,89 +8,44 @@ namespace at::functionalization { | ||||
|  | ||||
| // See Note [Functionalization Pass In Core] | ||||
|  | ||||
| enum class InverseReturnMode { | ||||
|   /// Specifies that functional inverses should always return a view. | ||||
|   AlwaysView, | ||||
|   /// Specifies that functional inverses should always return a non-view / copy. | ||||
|   NeverView, | ||||
|   /// Specifies that functional inverses should return a view unless a (copying) | ||||
|   /// scatter | ||||
|   /// inverse exists, in which case that will be used instead. | ||||
|   /// This avoids as_strided() calls that can be difficult for subclasses to | ||||
|   /// handle. | ||||
|   ViewOrScatterInverse, | ||||
| }; | ||||
|  | ||||
| #define FUNCTIONALIZATION_VIEWMETA_NAME(TYPE) \ | ||||
|   static const char* name() {                 \ | ||||
|     return #TYPE;                             \ | ||||
|   } | ||||
|  | ||||
| #define FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(...) \ | ||||
|   using SerializableTuple = std::tuple<__VA_ARGS__> | ||||
|  | ||||
| // ViewMeta is a class used by the functionalization pass to navigate between | ||||
| // a base tensor and a view tensor. | ||||
| // For example, if I call `b = a.view1(...)` | ||||
| // the functionalization pass will generate and store a ViewMeta specialization | ||||
| // for `view1` operation on b that looks like: | ||||
| // the functionalization pass will generate and store a ViewMeta on b that looks | ||||
| // like: | ||||
| // | ||||
| // struct TORCH_API view1_ViewMeta : public ViewMeta { | ||||
| //   FUNCTIONALIZATION_VIEWMETA_NAME(view1_ViewMeta); | ||||
| //   FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE( | ||||
| //       bool /* reapply_views */, | ||||
| //       const std::vector<int64_t>&); | ||||
| // | ||||
| //   view1_ViewMeta(const SerializableTuple& tpl) | ||||
| //       : view1_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {} | ||||
| // | ||||
| //   view1_ViewMeta(bool reapply_views, const std::vector<int64_t>& size) | ||||
| //       : ViewMeta(/*has_symbolic_inputs=*/false), | ||||
| //         reapply_views(reapply_views), | ||||
| //         size(size) {} | ||||
| // | ||||
| //   Tensor forward(const Tensor& base) override { | ||||
| //       return base.view1(...); | ||||
| // ViewMeta( | ||||
| //   [<captures>](const Tensor& base, int64_t mutated_view_idx) { | ||||
| //     return base.view1(...); | ||||
| //   }, | ||||
| //   [<captures>](const at::Tensor& base, const at::Tensor& mutated_view, | ||||
| //   int64_t mutated_view_idx) -> at::Tensor { | ||||
| //     return at::functionalization::impl::view1_inverse(base, mutated_view, | ||||
| //     ...); | ||||
| //   } | ||||
| // | ||||
| //   Tensor reverse(const Tensor& base, const Tensor& mutated_view) override { | ||||
| //       return at::functionalization::impl::view1_inverse(base, mutated_view, | ||||
| //       ...); | ||||
| //   } | ||||
| // The forward_fn lambda describes how to replay view1 on a tensor. | ||||
| // | ||||
| //   SerializableTuple to_serializable_tuple() { | ||||
| //     return std::make_tuple(reapply_views, size); | ||||
| //   } | ||||
| // | ||||
| //   bool reapply_views; | ||||
| //   std::vector<int64_t> size; | ||||
| // }; | ||||
| // | ||||
| // The forward function describes how to replay view1 on a tensor. | ||||
| // | ||||
| // The reverse function describes how, given a tensor that is already a view, | ||||
| // The reverse_fn lambda describes how, given a tensor that is already a view, | ||||
| // how to get the corresponding base tensor. See Note [Functionalization Pass: | ||||
| // View Inverses] for details. | ||||
| // | ||||
| // `SerializedTuple` is a typedef that defines an `std::tuple<...>` type | ||||
| // representing the `ViewMeta` instance state. Methods that take in/return such | ||||
| // a type are used for supporting pickle serialization. | ||||
| struct ViewMeta { | ||||
|   ViewMeta( | ||||
|       std::function<Tensor(const Tensor&, int64_t)> forward, | ||||
|       std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse, | ||||
|       bool has_symbolic_inputs, | ||||
|       bool is_multi_output = false, | ||||
|       bool is_as_strided = false, | ||||
|       int64_t out_idx = 0) | ||||
|       : out_index(out_idx), | ||||
|       : forward_fn(std::move(forward)), | ||||
|         reverse_fn(std::move(reverse)), | ||||
|         out_index(out_idx), | ||||
|         is_multi_output(is_multi_output), | ||||
|         is_as_strided(is_as_strided), | ||||
|         has_symbolic_inputs(has_symbolic_inputs) {} | ||||
|  | ||||
|   virtual ~ViewMeta() = default; | ||||
|  | ||||
|   virtual Tensor forward(const Tensor& base) = 0; | ||||
|   virtual Tensor reverse(const Tensor& base, const Tensor& mutated_view) = 0; | ||||
|  | ||||
|   std::function<Tensor(const Tensor&, int64_t)> forward_fn; | ||||
|   std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn; | ||||
|   // See Note [out_idx in ViewMeta] | ||||
|   int64_t out_index; | ||||
|  | ||||
| @ -102,17 +57,10 @@ struct ViewMeta { | ||||
|   // Tells us if this view operation has any symbolic inputs | ||||
|   bool has_symbolic_inputs; | ||||
|  | ||||
|   // Returns a new ViewMeta with the same forward/reverse | ||||
|   // Returns a copy of the current ViewMeta, if out_idx matches the current | ||||
|   // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse | ||||
|   // functions, but a new out index. | ||||
|   // | ||||
|   // This method should be implemented by those `ViewMeta` that have more than | ||||
|   // one output. | ||||
|   virtual std::shared_ptr<ViewMeta> to_out_index(int64_t out_index) { | ||||
|     TORCH_CHECK_NOT_IMPLEMENTED( | ||||
|         false, | ||||
|         "ViewMeta::to_out_index not implemented. ", | ||||
|         "Likely because there's only one output."); | ||||
|   } | ||||
|   ViewMeta to_out_idx(int64_t out_idx); | ||||
| }; | ||||
|  | ||||
| // FunctionalStorageImpl is a subclass of StorageImpl used by the | ||||
| @ -145,14 +93,14 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl { | ||||
|     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) | ||||
|     const at::Tensor new_val; | ||||
|     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) | ||||
|     const std::vector<std::shared_ptr<ViewMeta>> view_metas; | ||||
|     const std::vector<ViewMeta> view_metas; | ||||
|   }; | ||||
|  | ||||
|   explicit FunctionalStorageImpl(const Tensor& value); | ||||
|  | ||||
|   void add_update( | ||||
|       const Tensor& updated_val, | ||||
|       const std::vector<std::shared_ptr<ViewMeta>>& view_metas); | ||||
|       const std::vector<ViewMeta>& view_metas); | ||||
|   bool apply_updates(); | ||||
|   const Tensor& base() { | ||||
|     return base_; | ||||
|  | ||||
| @ -129,19 +129,17 @@ void FunctionalTensorWrapper::freeze_storage() const { | ||||
| // - view_value: The output tensor that we need to wrap. | ||||
| // - base: The "base" of the view that `view_value` was generated from. | ||||
| // See Note [Functionalization: Alias Removal Part 2] for more details on the mutation replay logic. | ||||
| FunctionalTensorWrapper::FunctionalTensorWrapper( | ||||
|     const Tensor& view_value, | ||||
|     const FunctionalTensorWrapper* base, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta) | ||||
|     : c10::TensorImpl( | ||||
|           c10::DispatchKeySet(DispatchKey::Functionalize), | ||||
|           view_value.dtype(), | ||||
|           base->storage().data_ptr().device()), | ||||
|       value_(view_value), | ||||
|       is_multi_output_view_( | ||||
|           base->is_multi_output_view_ || meta->is_multi_output), | ||||
|       was_storage_changed_(base->was_storage_changed_), | ||||
|       is_symbolic_(base->is_symbolic_) { | ||||
| FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const FunctionalTensorWrapper* base, const functionalization::ViewMeta& meta) | ||||
|   : c10::TensorImpl( | ||||
|       c10::DispatchKeySet(DispatchKey::Functionalize), | ||||
|       view_value.dtype(), | ||||
|       base->storage().data_ptr().device() | ||||
|     ), | ||||
|     value_(view_value), | ||||
|     is_multi_output_view_(base->is_multi_output_view_ || meta.is_multi_output), | ||||
|     was_storage_changed_(base->was_storage_changed_), | ||||
|     is_symbolic_(base->is_symbolic_) | ||||
| { | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(value_)); | ||||
|   TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize)); | ||||
|   set_constructor_metadata(); | ||||
| @ -150,10 +148,11 @@ FunctionalTensorWrapper::FunctionalTensorWrapper( | ||||
|       view_metas_ = base->view_metas_;  // copy | ||||
|   } | ||||
|   view_metas_.push_back(meta); | ||||
|   maybe_mark_symbolic(meta.get()); | ||||
|   maybe_mark_symbolic(meta); | ||||
|   storage_ = base->storage_; // alias this tensor's storage with the base tensor's | ||||
| } | ||||
|  | ||||
|  | ||||
| functionalization::FunctionalStorageImpl* FunctionalTensorWrapper::functional_storage_impl() const { | ||||
|   return static_cast<functionalization::FunctionalStorageImpl*>(storage_.unsafeGetStorageImpl()); | ||||
| } | ||||
| @ -177,18 +176,18 @@ bool FunctionalTensorWrapper::is_up_to_date() const { | ||||
| } | ||||
|  | ||||
| // See Note [Functionalization Pass - Inplace View Ops] | ||||
| void FunctionalTensorWrapper::mutate_view_meta(const std::shared_ptr<at::functionalization::ViewMeta>& meta) { | ||||
| void FunctionalTensorWrapper::mutate_view_meta(const at::functionalization::ViewMeta& meta) { | ||||
|   view_metas_.push_back(meta); | ||||
|   // Manually track the fact that this tensor received a metadata mutation! | ||||
|   has_metadata_mutation_ = true; | ||||
|   // Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation. | ||||
|   maybe_mark_symbolic(meta.get()); | ||||
|   maybe_mark_symbolic(meta); | ||||
|   // Note [Functionalization Pass - Inplace View Ops] | ||||
|   // So, these ops are special - they're mutation AND view ops. They get special codegen. | ||||
|   // An example is transpose_, e.g. `a.transpose_()` | ||||
|   // Calling transpose_() should ensure that a gets an alias, and append the new ViewMeta to a's current list of ViewMetas. | ||||
|   at::AutoDispatchSkipFunctionalize guard; | ||||
|   value_ = meta->forward(value_); | ||||
|   value_ = meta.forward_fn(value_, meta.out_index); | ||||
|   TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize)); | ||||
| } | ||||
|  | ||||
| @ -369,8 +368,15 @@ void FunctionalTensorWrapper::sync_() { | ||||
|   regenerate_from_base(); | ||||
| } | ||||
|  | ||||
| const std::vector<std::shared_ptr<functionalization::ViewMeta>>& FunctionalTensorWrapper::view_metas() const { | ||||
|   return view_metas_; | ||||
| Tensor FunctionalTensorWrapper::apply_view_metas(const Tensor& base) { | ||||
|   auto t = base; | ||||
|  | ||||
|   // Reapply views to get the viewed tensor from the base in alias_ | ||||
|   for (auto& view_meta: view_metas_) { | ||||
|     t = view_meta.forward_fn(t, view_meta.out_index); | ||||
|   } | ||||
|  | ||||
|   return t; | ||||
| } | ||||
|  | ||||
| void FunctionalTensorWrapper::regenerate_from_base() { | ||||
| @ -379,7 +385,7 @@ void FunctionalTensorWrapper::regenerate_from_base() { | ||||
|   auto t = storage_impl->base(); | ||||
|  | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t)); | ||||
|   t = at::functionalization::impl::apply_view_meta_sequence(t, view_metas_); | ||||
|   t = apply_view_metas(t); | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t)); | ||||
|  | ||||
|   replace_(t, /*from_lazy_regenerate=*/true); | ||||
| @ -721,11 +727,11 @@ bool isFunctionalTensor(const std::optional<Tensor>& t) { | ||||
| } | ||||
|  | ||||
| bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) { | ||||
|   if (t_list.empty()) { return false; } | ||||
|   if (t_list.empty()) return false; | ||||
|   auto functional_count = 0; | ||||
|   for (const auto i : c10::irange(t_list.size())) { | ||||
|     auto const & e= t_list[i]; | ||||
|     if (!e.has_value() || !e->defined()) { continue; } | ||||
|     if (!e.has_value() || !e->defined()) continue; | ||||
|     if (isFunctionalTensor(e)) { | ||||
|       ++functional_count; | ||||
|     } | ||||
| @ -735,10 +741,10 @@ bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) { | ||||
|  | ||||
| template <typename T> | ||||
| static bool isFunctionalTensorIListRef(c10::IListRef<T> list) { | ||||
|   if (list.size() == 0) { return false; } | ||||
|   if (list.size() == 0) return false; | ||||
|   auto functional_count = 0; | ||||
|   for (const auto& tensor : list) { | ||||
|     if (!tensor.defined()) { continue; } | ||||
|     if (!tensor.defined()) continue; | ||||
|     if (isFunctionalTensor(tensor)) { | ||||
|       ++functional_count; | ||||
|     } | ||||
| @ -756,28 +762,20 @@ void freeze_functional_tensor(const Tensor& tensor) { | ||||
|   functional_base_impl->freeze_storage(); | ||||
| } | ||||
|  | ||||
| Tensor create_functional_tensor_with_view_meta( | ||||
|     const at::Tensor& view_to_wrap, | ||||
|     const at::Tensor& base, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta, | ||||
|     int64_t out_idx) { | ||||
| Tensor create_functional_tensor_with_view_meta(const at::Tensor& view_to_wrap, const at::Tensor& base, functionalization::ViewMeta meta, int64_t out_idx) { | ||||
|   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(view_to_wrap)); | ||||
|   TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(base)); | ||||
|   auto functional_base_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(base); | ||||
|   auto meta_ = meta; | ||||
|   if (out_idx != 0) { | ||||
|     // Note [out_idx in ViewMeta] | ||||
|     // When a view op outputs multiple tensors, each output needs its own separate ViewMeta. | ||||
|     // Each ViewMeta also tracks the index of the particular output tensor, which is needed in the reverse function. | ||||
|     meta_ = meta->to_out_index(out_idx); | ||||
|     meta = meta.to_out_idx(out_idx); | ||||
|   } | ||||
|   return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta_); | ||||
|   return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta); | ||||
| } | ||||
|  | ||||
| std::vector<Tensor> create_functional_tensor_with_view_meta( | ||||
|     ITensorListRef view_to_wrap, | ||||
|     const at::Tensor& base, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta) { | ||||
| std::vector<Tensor> create_functional_tensor_with_view_meta(ITensorListRef view_to_wrap, const at::Tensor& base, const functionalization::ViewMeta& meta) { | ||||
|   std::vector<Tensor> outputs(view_to_wrap.size()); | ||||
|   int64_t i = 0; | ||||
|   for (const auto& tensor : view_to_wrap) { | ||||
| @ -787,22 +785,12 @@ std::vector<Tensor> create_functional_tensor_with_view_meta( | ||||
|   return outputs; | ||||
| } | ||||
|  | ||||
| void mutate_view_meta(const at::Tensor& self, const std::shared_ptr<functionalization::ViewMeta>& meta) { | ||||
| void mutate_view_meta(const at::Tensor& self, const functionalization::ViewMeta& meta) { | ||||
|   TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self)); | ||||
|   auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self); | ||||
|   self_impl->mutate_view_meta(meta); | ||||
| } | ||||
|  | ||||
| Tensor apply_view_meta_sequence( | ||||
|     const Tensor& base, | ||||
|     const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence) { | ||||
|   Tensor r = base; | ||||
|   for (auto& vm : sequence) { | ||||
|     r = vm->forward(r); | ||||
|   } | ||||
|   return r; | ||||
| } | ||||
|  | ||||
| // Note [Propagating strides in the functionalization pass] | ||||
| // In order to properly compute stride information, the functionalization pass | ||||
| // calls each {view} reference implementations with meta tensors. | ||||
| @ -896,7 +884,7 @@ void functionalize_op_helper(const c10::OperatorHandle& op, torch::jit::Stack* s | ||||
|     const auto& ivalue = returns[idx]; | ||||
|     if (ivalue.isTensor()) { | ||||
|       const auto& t = ivalue.toTensor(); | ||||
|       if (!t.defined()) { continue; } | ||||
|       if (!t.defined()) continue; | ||||
|       at::functionalization::impl::sync(t); | ||||
|       auto t_new = c10::IValue(at::functionalization::impl::from_functional_tensor(t)); | ||||
|       (*stack)[returns_begin + idx] = t_new; | ||||
|  | ||||
| @ -56,7 +56,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { | ||||
|   explicit FunctionalTensorWrapper( | ||||
|       const Tensor& view_value, | ||||
|       const FunctionalTensorWrapper* base, | ||||
|       const std::shared_ptr<functionalization::ViewMeta>& meta); | ||||
|       const functionalization::ViewMeta& meta); | ||||
|  | ||||
|   // Get the underlying, actual tensor, that doesn't know anything about | ||||
|   // functionalization. | ||||
| @ -99,17 +99,17 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { | ||||
|         ->are_all_mutations_under_no_grad_or_inference_mode(); | ||||
|   } | ||||
|  | ||||
|   void maybe_mark_symbolic(functionalization::ViewMeta* meta) { | ||||
|     is_symbolic_ = is_symbolic_ | meta->has_symbolic_inputs; | ||||
|   void maybe_mark_symbolic(const functionalization::ViewMeta& meta) { | ||||
|     is_symbolic_ = is_symbolic_ | meta.has_symbolic_inputs; | ||||
|   } | ||||
|  | ||||
|   bool is_symbolic() const { | ||||
|     return is_symbolic_; | ||||
|   } | ||||
|  | ||||
|   // Retrieves the ViewMeta sequence of this tensor. | ||||
|   const std::vector<std::shared_ptr<functionalization::ViewMeta>>& view_metas() | ||||
|       const; | ||||
|   // Runs the forward_fn of every ViewMeta collected in the current instance | ||||
|   // to some other base. | ||||
|   Tensor apply_view_metas(const Tensor& base); | ||||
|  | ||||
|   // Sync's the underlying tensor with its alias, if it's out of date. This | ||||
|   // involves two steps: 1) Apply any pending updates/mutations to the alias 2) | ||||
| @ -146,8 +146,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { | ||||
|   // from the base tensor. This method is used by inplace-view ops like | ||||
|   // transpose_. It appends a ViewMeta to the existing stack, and refreshes the | ||||
|   // tensor by replaying the views off of the alias. | ||||
|   void mutate_view_meta( | ||||
|       const std::shared_ptr<at::functionalization::ViewMeta>& meta); | ||||
|   void mutate_view_meta(const at::functionalization::ViewMeta& meta); | ||||
|  | ||||
|   // Custom implementation of self.set_(src) | ||||
|   void set__impl(const FunctionalTensorWrapper* other); | ||||
| @ -286,7 +285,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { | ||||
|   bool is_symbolic_ = false; | ||||
|  | ||||
|   size_t generation_ = 0; | ||||
|   std::vector<std::shared_ptr<at::functionalization::ViewMeta>> view_metas_; | ||||
|   std::vector<at::functionalization::ViewMeta> view_metas_; | ||||
|  | ||||
|  protected: | ||||
|   static void copy_tensor_metadata( | ||||
| @ -378,20 +377,16 @@ TORCH_API void propagate_xla_data_direct( | ||||
| Tensor create_functional_tensor_with_view_meta( | ||||
|     const Tensor& view_to_wrap, | ||||
|     const Tensor& base, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta, | ||||
|     functionalization::ViewMeta meta, | ||||
|     int64_t out_idx = 0); | ||||
| std::vector<Tensor> create_functional_tensor_with_view_meta( | ||||
|     ITensorListRef view_to_wrap, | ||||
|     const Tensor& base, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta); | ||||
|     const functionalization::ViewMeta& meta); | ||||
|  | ||||
| void mutate_view_meta( | ||||
|     const Tensor& self, | ||||
|     const std::shared_ptr<functionalization::ViewMeta>& meta); | ||||
|  | ||||
| TORCH_API Tensor apply_view_meta_sequence( | ||||
|     const Tensor& base, | ||||
|     const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence); | ||||
|     const functionalization::ViewMeta& meta); | ||||
|  | ||||
| void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out); | ||||
| void set_sizes_strides_offset( | ||||
|  | ||||
| @ -1,5 +1,3 @@ | ||||
| #include <ATen/FunctionalizeFallbackKernel.h> | ||||
|  | ||||
| #include <ATen/core/dispatch/Dispatcher.h> | ||||
| #include <ATen/core/LegacyTypeDispatch.h> | ||||
| #include <ATen/EmptyTensor.h> | ||||
| @ -9,6 +7,7 @@ | ||||
| #include <torch/library.h> | ||||
| #include <c10/util/irange.h> | ||||
| #include <c10/util/strides.h> | ||||
| #include <ATen/EmptyTensor.h> | ||||
|  | ||||
| #ifndef AT_PER_OPERATOR_HEADERS | ||||
| #include <ATen/ATen.h> | ||||
| @ -29,31 +28,6 @@ | ||||
| #include <utility> | ||||
| #endif | ||||
|  | ||||
| namespace at::functionalization { | ||||
|  | ||||
| Tensor resize__ViewMeta::forward(const Tensor& base) { | ||||
|   if (reapply_views) { | ||||
|     return base.as_strided(size, c10::contiguous_strides(size)); | ||||
|   } else { | ||||
|     return at::as_strided_copy(base, size, c10::contiguous_strides(size)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| Tensor resize__ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) { | ||||
|   return base.as_strided_scatter( | ||||
|       mutated_view, size, c10::contiguous_strides(size)); | ||||
| } | ||||
|  | ||||
| Tensor _unsafe_view_ViewMeta::forward(const Tensor& base) { | ||||
|   return at::_unsafe_view_symint(base, size); | ||||
| } | ||||
|  | ||||
| Tensor _unsafe_view_ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) { | ||||
|   return at::_unsafe_view_symint(mutated_view, base.sym_sizes()); | ||||
| } | ||||
|  | ||||
| } // namespace at::functionalization | ||||
|  | ||||
| namespace { | ||||
|   void functionalizeFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatchKeySet [[maybe_unused]], torch::jit::Stack* stack) { | ||||
|     const auto& schema = op.schema(); | ||||
| @ -132,9 +106,7 @@ namespace { | ||||
|       const auto& ivalue = returns[idx]; | ||||
|       if (ivalue.isTensor() && should_wrap_outputs) { | ||||
|         const auto& t = ivalue.toTensor(); | ||||
|         if (!t.defined()) { | ||||
|           continue; | ||||
|         } | ||||
|         if (!t.defined()) continue; | ||||
|         auto t_new = c10::IValue(at::functionalization::impl::to_functional_tensor(t)); | ||||
|         (*stack)[returns_begin + idx] = t_new; | ||||
|       } else if (ivalue.isTensorList() && should_wrap_outputs) { | ||||
| @ -197,8 +169,19 @@ static const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatch | ||||
|   // The output of resizing is equivalent to taking a slice of a larger tensor. | ||||
|   // We have to emulate this "slicing" with an as_strided call. | ||||
|   auto reapply_views = at::functionalization::impl::getFunctionalizationReapplyViewsTLS(); | ||||
|   auto view_meta = std::make_shared<at::functionalization::resize__ViewMeta>( | ||||
|       reapply_views, size.vec()); | ||||
|   at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta( | ||||
|     [reapply_views = reapply_views, size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor { | ||||
|       if (reapply_views) { | ||||
|         return base.as_strided(size, c10::contiguous_strides(size)); | ||||
|       } else { | ||||
|         return at::as_strided_copy(base, size, c10::contiguous_strides(size)); | ||||
|       } | ||||
|     }, | ||||
|     [size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor { | ||||
|       return base.as_strided_scatter(mutated_view, size, c10::contiguous_strides(size)); | ||||
|     }, | ||||
|     /*has_symbolic_inputs=*/false | ||||
|   ); | ||||
|   at::functionalization::impl::mutate_view_meta(self, view_meta); | ||||
|   return self; | ||||
| } | ||||
| @ -317,11 +300,17 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt | ||||
|     tmp_output = at::_unsafe_view_symint(self_, size); | ||||
|   } | ||||
|  | ||||
|   bool has_symbolic_inputs = std::any_of( | ||||
|       size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); }); | ||||
|   auto view_meta = | ||||
|       std::make_shared<at::functionalization::_unsafe_view_ViewMeta>( | ||||
|           has_symbolic_inputs, size.vec()); | ||||
|   bool has_symbolic_inputs = std::any_of(size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); }); | ||||
|  | ||||
|   at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta( | ||||
|     [size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor { | ||||
|       return at::_unsafe_view_symint(base, size); | ||||
|     }, | ||||
|     [size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor { | ||||
|       return at::_unsafe_view_symint(mutated_view, base.sym_sizes()); | ||||
|     }, | ||||
|     /*has_symbolic_inputs=*/has_symbolic_inputs | ||||
|   ); | ||||
|  | ||||
|   auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta)); | ||||
|   // See  Note [Propagating strides in the functionalization pass] | ||||
|  | ||||
| @ -1,58 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/FunctionalStorageImpl.h> | ||||
|  | ||||
| namespace at::functionalization { | ||||
|  | ||||
| // `ViewMeta` implementation for `resize_` operation. | ||||
| struct TORCH_API resize__ViewMeta : public ViewMeta { | ||||
|   FUNCTIONALIZATION_VIEWMETA_NAME(resize__ViewMeta) | ||||
|   FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE( | ||||
|       bool /* reapply_views */, | ||||
|       const std::vector<int64_t>&); | ||||
|  | ||||
|   resize__ViewMeta(const SerializableTuple& tpl) | ||||
|       : resize__ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {} | ||||
|  | ||||
|   resize__ViewMeta(bool reapply_views, const std::vector<int64_t>& size) | ||||
|       : ViewMeta(/*has_symbolic_inputs=*/false), | ||||
|         reapply_views(reapply_views), | ||||
|         size(size) {} | ||||
|  | ||||
|   Tensor forward(const Tensor& base) override; | ||||
|   Tensor reverse(const Tensor& base, const Tensor& mutated_view) override; | ||||
|  | ||||
|   SerializableTuple to_serializable_tuple() { | ||||
|     return std::make_tuple(reapply_views, size); | ||||
|   } | ||||
|  | ||||
|   bool reapply_views; | ||||
|   std::vector<int64_t> size; | ||||
| }; | ||||
|  | ||||
| // `ViewMeta` implementation for `_unsafe_view` operation. | ||||
| struct TORCH_API _unsafe_view_ViewMeta : public ViewMeta { | ||||
|   FUNCTIONALIZATION_VIEWMETA_NAME(_unsafe_view_ViewMeta) | ||||
|   FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE( | ||||
|       bool /* has_symbolic_inputs */, | ||||
|       const std::vector<c10::SymInt>&); | ||||
|  | ||||
|   _unsafe_view_ViewMeta(const SerializableTuple& tpl) | ||||
|       : _unsafe_view_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {} | ||||
|  | ||||
|   _unsafe_view_ViewMeta( | ||||
|       bool has_symbolic_inputs, | ||||
|       const std::vector<c10::SymInt>& size) | ||||
|       : ViewMeta(has_symbolic_inputs), size(size) {} | ||||
|  | ||||
|   Tensor forward(const Tensor& base) override; | ||||
|   Tensor reverse(const Tensor& base, const Tensor& mutated_view) override; | ||||
|  | ||||
|   SerializableTuple to_serializable_tuple() { | ||||
|     return std::make_tuple(has_symbolic_inputs, size); | ||||
|   } | ||||
|  | ||||
|   std::vector<c10::SymInt> size; | ||||
| }; | ||||
|  | ||||
| } // namespace at::functionalization | ||||
| @ -45,39 +45,7 @@ inline void infer_size_impl( | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (infer_dim) { | ||||
|     // numel is the product of known sizes, it has to be divisible by newsize. | ||||
|     // and newsize should be positive unless newsize == numel (we throw | ||||
|     // different) error message in that case. | ||||
|     if constexpr (std::is_same_v<NumelType, c10::SymInt>) { | ||||
|       auto v = newsize.maybe_as_int(); | ||||
|       if (v and *v == 0) { | ||||
|         // Avoid div by 0 when sym_eq(numel % newsize, 0) is constructed! | ||||
|         // which may happen when newsize is not a symbol! if its a symbol | ||||
|         // division won't happen anyway during compile. | ||||
|         TORCH_MAYBE_SYM_CHECK( | ||||
|             numel == newsize, | ||||
|             "shape '", | ||||
|             shape, | ||||
|             "' is invalid for input of size ", | ||||
|             numel); | ||||
|       } else { | ||||
|         auto cond = sym_gt(newsize, 0) | ||||
|                         .sym_and(sym_eq(numel % newsize, 0)) | ||||
|                         .sym_or(sym_eq(numel, newsize)); | ||||
|         TORCH_MAYBE_SYM_CHECK( | ||||
|             cond, "shape '", shape, "' is invalid for input of size ", numel); | ||||
|       } | ||||
|  | ||||
|     } else { | ||||
|       TORCH_CHECK( | ||||
|           (newsize > 0 && (numel % newsize == 0)) || numel == newsize, | ||||
|           "shape '", | ||||
|           shape, | ||||
|           "' is invalid for input of size ", | ||||
|           numel); | ||||
|     } | ||||
|  | ||||
|   auto set_infer_dim = [&]() { | ||||
|     // We have a degree of freedom here to select the dimension size; follow | ||||
|     // NumPy semantics and just bail.  However, a nice error message is needed | ||||
|     // because users often use `view` as a way to flatten & unflatten | ||||
| @ -86,15 +54,19 @@ inline void infer_size_impl( | ||||
|     // works yet | ||||
|     //   empty_tensor.view(-1, 0) | ||||
|     // doesn't. | ||||
|     TORCH_MAYBE_SYM_CHECK( | ||||
|     TORCH_CHECK( | ||||
|         newsize != 0, | ||||
|         "cannot reshape tensor of 0 elements into shape ", | ||||
|         shape, | ||||
|         " because the unspecified dimension size -1 can be any " | ||||
|         "value and is ambiguous"); | ||||
|  | ||||
|     res[*infer_dim] = numel / newsize; | ||||
|     return; | ||||
|   }; | ||||
|  | ||||
|   if (infer_dim && newsize > 0 && numel % newsize == 0) { | ||||
|     set_infer_dim(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   TORCH_MAYBE_SYM_CHECK( | ||||
| @ -103,6 +75,9 @@ inline void infer_size_impl( | ||||
|       shape, | ||||
|       "' is invalid for input of size ", | ||||
|       numel); | ||||
|   if (infer_dim) { | ||||
|     set_infer_dim(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) { | ||||
|  | ||||
| @ -103,9 +103,7 @@ std::string get_cpu_capability() { | ||||
| #elif defined(HAVE_ZVECTOR_CPU_DEFINITION) | ||||
|     case native::CPUCapability::ZVECTOR: | ||||
|       return "Z VECTOR"; | ||||
| #elif defined(HAVE_SVE_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION) | ||||
|     case native::CPUCapability::SVE128: | ||||
|       return "SVE128"; | ||||
| #elif defined(HAVE_SVE256_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION) | ||||
|     case native::CPUCapability::SVE256: | ||||
|       return "SVE256"; | ||||
| #else | ||||
|  | ||||
| @ -1234,7 +1234,7 @@ struct TORCH_API TupleType : public NamedType { | ||||
|   std::shared_ptr<FunctionSchema> schema_; | ||||
| }; | ||||
|  | ||||
| // the common supertype of all Enums, only used in operator registration. | ||||
| // the common supertype of all Enums, only used in operator registraion. | ||||
| // EnumType <: AnyEnumType for all Enums | ||||
| struct AnyEnumType; | ||||
| using AnyEnumTypePtr = SingletonTypePtr<AnyEnumType>; | ||||
|  | ||||
| @ -102,31 +102,8 @@ struct VecReduceAllSIMD<float, Op> { | ||||
| #endif // defined(__GNUC__) && (__GNUC__ > 5) && !defined(_MSC_VER) && | ||||
|        // !defined(C10_MOBILE) | ||||
|  | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
| #if defined(CPU_CAPABILITY_SVE256) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
|       const Op& vec_fun, | ||||
|       const Vectorized<float>& acc_vec) { | ||||
|     using Vec = Vectorized<float>; | ||||
|     Vec v = acc_vec; | ||||
|     // 128-bit shuffle | ||||
|     svuint32_t ind = svdupq_n_u32(4, 5, 6, 7); | ||||
|     Vec v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 64-bit shuffle | ||||
|     ind = svdupq_n_u32(2, 3, 0, 1); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 32-bit shuffle | ||||
|     ind = svdupq_n_u32(1, 0, 2, 3); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     return svlasta(svpfalse(), v); | ||||
|   } | ||||
| }; | ||||
| #else | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \ | ||||
|     !defined(CPU_CAPABILITY_SVE) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
| @ -163,8 +140,35 @@ struct VecReduceAllSIMD<float, std::plus<Vectorized<float>>> { | ||||
|     return vaddvq_f32(acc_vec); | ||||
|   } | ||||
| }; | ||||
| #endif // defined(CPU_CAPABILITY_SVE256) | ||||
| #endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
|        // && !defined(CPU_CAPABILITY_SVE) | ||||
|  | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \ | ||||
|     defined(CPU_CAPABILITY_SVE256) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
|       const Op& vec_fun, | ||||
|       const Vectorized<float>& acc_vec) { | ||||
|     using Vec = Vectorized<float>; | ||||
|     Vec v = acc_vec; | ||||
|     // 128-bit shuffle | ||||
|     svuint32_t ind = svdupq_n_u32(4, 5, 6, 7); | ||||
|     Vec v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 64-bit shuffle | ||||
|     ind = svdupq_n_u32(2, 3, 0, 1); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 32-bit shuffle | ||||
|     ind = svdupq_n_u32(1, 0, 2, 3); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     return svlasta(svpfalse(), v); | ||||
|   } | ||||
| }; | ||||
| #endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
|        // && defined(CPU_CAPABILITY_SVE256) | ||||
|  | ||||
| template <typename scalar_t, typename Op> | ||||
| inline scalar_t vec_reduce_all( | ||||
|  | ||||
| @ -1,21 +1,9 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/cpu/vec/intrinsics.h> | ||||
| #include <c10/macros/Macros.h> | ||||
| #include <cstdint> | ||||
|  | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
|  | ||||
| #if defined(__aarch64__) &&                     \ | ||||
|     (defined(AT_BUILD_ARM_VEC256_WITH_SLEEF) || \ | ||||
|      defined(AT_BUILD_ARM_VECSVE_WITH_SLEEF)) | ||||
| #define SLEEF_STATIC_LIBS | ||||
| #include <sleef.h> | ||||
| #define USE_SLEEF(sleef_code, non_sleef_code) sleef_code | ||||
| #else | ||||
| #define USE_SLEEF(sleef_code, non_sleef_code) non_sleef_code | ||||
| #endif | ||||
|  | ||||
| #if defined(CPU_CAPABILITY_SVE) | ||||
|  | ||||
| // Define the data type of VLS(vector-length specific). | ||||
|  | ||||
| @ -2,6 +2,7 @@ | ||||
|  | ||||
| #include <ATen/cpu/vec/intrinsics.h> | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/sve/vec_common_sve.h> | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| #include <c10/util/bit_cast.h> | ||||
|  | ||||
| @ -8,48 +8,13 @@ | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| 
 | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_bfloat16_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_half_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_convert.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_qint.h> | ||||
| 
 | ||||
| #elif defined(CPU_CAPABILITY_SVE) | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| 
 | ||||
| #if defined(CPU_CAPABILITY_SVE) | ||||
| #include <ATen/cpu/vec/sve/vec_bfloat16.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_double.h> | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| #include <ATen/cpu/vec/sve/vec_int.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_qint.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_half.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_convert.h> | ||||
| 
 | ||||
| #else // NEON
 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_half_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_bfloat16_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_convert.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_qint.h> | ||||
| 
 | ||||
| #endif // defined(CPU_CAPABILITY_SVE128)
 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/functional.h> | ||||
| #endif | ||||
| 
 | ||||
| namespace at::vec { | ||||
| // Note [CPU_CAPABILITY namespace]
 | ||||
| @ -83,6 +48,12 @@ DEFINE_SVE_CAST(int32_t, s32, float, f32) | ||||
| DEFINE_SVE_CAST(int16_t, s16, float, f32) | ||||
| DEFINE_SVE_CAST(float, f32, double, f64) | ||||
| 
 | ||||
| #ifdef __ARM_FEATURE_BF16 | ||||
| DEFINE_SVE_CAST(int64_t, s64, c10::BFloat16, bf16) | ||||
| DEFINE_SVE_CAST(int32_t, s32, c10::BFloat16, bf16) | ||||
| DEFINE_SVE_CAST(int16_t, s16, c10::BFloat16, bf16) | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GATHER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||||
| 
 | ||||
| template <int64_t scale = 1> | ||||
| @ -202,11 +173,9 @@ std::pair< | ||||
|   // group cols crossing lanes:
 | ||||
|   //   return {a0, b0, a1, b1, a2, b2, a3, b3}
 | ||||
|   //          {a4, b4, a5, b5, a6, b6, a7, b7}
 | ||||
|   svbfloat16_t aReg = a; | ||||
|   svbfloat16_t bReg = b; | ||||
|   Vectorized<c10::BFloat16> c = svzip1_bf16(aReg, bReg); | ||||
|   Vectorized<c10::BFloat16> d = svzip2_bf16(aReg, bReg); | ||||
|   return std::make_pair(c, d); | ||||
|   return std::make_pair( | ||||
|       Vectorized<c10::BFloat16>(svzip1_bf16(a, b)), | ||||
|       Vectorized<c10::BFloat16>(svzip2_bf16(a, b))); | ||||
| } | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| @ -255,27 +224,12 @@ std::pair< | ||||
|   // swap lanes:
 | ||||
|   //   return {a0, a1, a2, a3, a4, a5, a6, a7}
 | ||||
|   //          {b0, b1, b2, b3, b4, b5, b6, b7}
 | ||||
|   svbfloat16_t aReg = a; | ||||
|   svbfloat16_t bReg = b; | ||||
|   Vectorized<c10::BFloat16> c = svuzp1_bf16(aReg, bReg); | ||||
|   Vectorized<c10::BFloat16> d = svuzp2_bf16(aReg, bReg); | ||||
|   return std::make_pair(c, d); | ||||
|   return std::make_pair( | ||||
|       Vectorized<c10::BFloat16>(svuzp1_bf16((svbfloat16_t)a, (svbfloat16_t)b)), | ||||
|       Vectorized<c10::BFloat16>(svuzp2_bf16((svbfloat16_t)a, (svbfloat16_t)b))); | ||||
| } | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FLIP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||||
| #define DEFINE_FLIP_FUNC(type, sve_func)                    \ | ||||
|   inline Vectorized<type> flip(const Vectorized<type>& v) { \ | ||||
|     return Vectorized<type>(sve_func(v));                   \ | ||||
|   } | ||||
| // Use the macro to define the flip functions
 | ||||
| DEFINE_FLIP_FUNC(float, svrev_f32) | ||||
| DEFINE_FLIP_FUNC(double, svrev_f64) | ||||
| DEFINE_FLIP_FUNC(int64_t, svrev_s64) | ||||
| DEFINE_FLIP_FUNC(int32_t, svrev_s32) | ||||
| DEFINE_FLIP_FUNC(int16_t, svrev_s16) | ||||
| DEFINE_FLIP_FUNC(int8_t, svrev_s8) | ||||
| 
 | ||||
| #endif // defined(CPU_CAPABILITY_SVE)
 | ||||
| 
 | ||||
| } // namespace CPU_CAPABILITY
 | ||||
| @ -1,8 +1,6 @@ | ||||
| #pragma once | ||||
|  | ||||
| #if defined(__aarch64__) | ||||
| #include <ATen/cpu/vec/vec_common_aarch64.h> | ||||
| #elif defined(CPU_CAPABILITY_AVX512) | ||||
| #if defined(CPU_CAPABILITY_AVX512) | ||||
| #include <ATen/cpu/vec/vec512/vec512.h> | ||||
| #else | ||||
| #include <ATen/cpu/vec/vec128/vec128.h> | ||||
| @ -13,34 +11,6 @@ namespace at::vec { | ||||
| // See Note [CPU_CAPABILITY namespace] | ||||
| inline namespace CPU_CAPABILITY { | ||||
|  | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::qint32& val) { | ||||
|   stream << val.val_; | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::qint8& val) { | ||||
|   stream << static_cast<int>(val.val_); | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::quint8& val) { | ||||
|   stream << static_cast<unsigned int>(val.val_); | ||||
|   return stream; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| std::ostream& operator<<(std::ostream& stream, const Vectorized<T>& vec) { | ||||
|   T buf[Vectorized<T>::size()]; | ||||
|   vec.store(buf); | ||||
|   stream << "vec["; | ||||
|   for (int i = 0; i != Vectorized<T>::size(); i++) { | ||||
|     if (i != 0) { | ||||
|       stream << ", "; | ||||
|     } | ||||
|     stream << buf[i]; | ||||
|   } | ||||
|   stream << "]"; | ||||
|   return stream; | ||||
| } | ||||
|  | ||||
| inline Vectorized<bool> convert_to_bool(Vectorized<int8_t> x) { | ||||
|   __at_align__ bool buffer[x.size()]; | ||||
|   x.ne(Vectorized<int8_t>(0)).store(buffer); | ||||
|  | ||||
| @ -2,7 +2,6 @@ | ||||
|  | ||||
| // DO NOT DEFINE STATIC DATA IN THIS HEADER! | ||||
| // See Note [Do not compile initializers with AVX] | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| #include <ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| @ -263,13 +262,6 @@ class Vectorized<c10::BFloat16> : public Vectorized16< | ||||
|             c10::bit_cast<at_bfloat16_t>(val6.x), | ||||
|             c10::bit_cast<at_bfloat16_t>(val7.x)}) {} | ||||
|  | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
|   Vectorized(svbfloat16_t v) : Vectorized16(svget_neonq(v)) {} | ||||
|   operator svbfloat16_t() const { | ||||
|     return svset_neonq(svundef_bf16(), values); | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   static Vectorized<c10::BFloat16> blendv( | ||||
|       const Vectorized<c10::BFloat16>& a, | ||||
|       const Vectorized<c10::BFloat16>& b, | ||||
| @ -382,23 +374,6 @@ class Vectorized<c10::BFloat16> : public Vectorized16< | ||||
|   Vectorized ge(const Vectorized& other) const; | ||||
|   Vectorized lt(const Vectorized& other) const; | ||||
|   Vectorized le(const Vectorized& other) const; | ||||
|  | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
|  | ||||
|   template <typename step_t> | ||||
|   static Vectorized<BFloat16> arange( | ||||
|       BFloat16 base = 0.f, | ||||
|       step_t step = static_cast<step_t>(1)) { | ||||
|     __at_align__ BFloat16 buffer[size()]; | ||||
|     for (int64_t i = 0; i < size(); i++) { | ||||
|       buffer[i] = base + i * step; | ||||
|     } | ||||
|     return svget_neonq( | ||||
|         svld1_bf16(ptrue, reinterpret_cast<bfloat16_t*>(buffer))); | ||||
|   } | ||||
|  | ||||
| #endif // CPU_CAPABILITY_SVE128 | ||||
|  | ||||
| }; // Vectorized<c10::BFloat16> | ||||
|  | ||||
| inline std::tuple<Vectorized<float>, Vectorized<float>> convert_bfloat16_float( | ||||
| @ -422,24 +397,6 @@ inline Vectorized<c10::BFloat16> convert_float_bfloat16( | ||||
|   return Vectorized<c10::BFloat16>(at_vcombine_bf16(x1, x2)); | ||||
| } | ||||
|  | ||||
| inline void load_fp32_from_bf16(const BFloat16* data, Vectorized<float>& out) { | ||||
|   __at_align__ float values[Vectorized<float>::size()]; | ||||
|   for (const auto k : c10::irange(Vectorized<float>::size())) { | ||||
|     values[k] = data[k]; | ||||
|   } | ||||
|   out = Vectorized<float>::loadu(values); | ||||
| } | ||||
|  | ||||
| inline void load_fp32_from_bf16( | ||||
|     const BFloat16* data, | ||||
|     Vectorized<float>& out1, | ||||
|     Vectorized<float>& out2) { | ||||
|   Vectorized<BFloat16> bf16_vec = Vectorized<BFloat16>::loadu(data); | ||||
|   auto floats = convert_bfloat16_float(bf16_vec); | ||||
|   out1 = std::get<0>(floats); | ||||
|   out2 = std::get<1>(floats); | ||||
| } | ||||
|  | ||||
| template <typename Op> | ||||
| Vectorized<c10::BFloat16> binary_operator_via_float( | ||||
|     Op op, | ||||
| @ -622,12 +579,6 @@ Vectorized<c10::BFloat16> inline fnmsub( | ||||
|   return -a * b - c; | ||||
| } | ||||
|  | ||||
| #else // | ||||
|  | ||||
| CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16) | ||||
|  | ||||
| LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16) | ||||
|  | ||||
| #endif // !defined(C10_MOBILE) && defined(__aarch64__) | ||||
|  | ||||
| } // namespace CPU_CAPABILITY | ||||
|  | ||||
| @ -4,7 +4,7 @@ | ||||
|  | ||||
| namespace at::vec { | ||||
| inline namespace CPU_CAPABILITY { | ||||
| #if defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256) | ||||
| #if (defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256)) | ||||
| template <typename src_t> | ||||
| struct VecConvert< | ||||
|     float, | ||||
| @ -60,7 +60,6 @@ struct VecConvert<float, 1, BFloat16, 1> { | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #endif // defined(__aarch64__) && (!defined(CPU_CAPABILITY_SVE) || | ||||
|        // defined(CPU_CAPABILITY_SVE128)) | ||||
| #endif // defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256) | ||||
| } // namespace CPU_CAPABILITY | ||||
| } // namespace at::vec | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	