mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-23 14:59:34 +08:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			mingw_cons
			...
			annotate_1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| f60c409a0a | 
| @ -15,8 +15,6 @@ fi | ||||
| # Compress the fatbin with -compress-mode=size for CUDA 13 | ||||
| if [[ "$DESIRED_CUDA" == *"13"* ]]; then | ||||
|     export TORCH_NVCC_FLAGS="-compress-mode=size" | ||||
|     # Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801 | ||||
|     export BUILD_BUNDLE_PTXAS=1 | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||||
|  | ||||
| @ -13,6 +13,49 @@ def list_dir(path: str) -> list[str]: | ||||
|     return check_output(["ls", "-1", path]).decode().split("\n") | ||||
|  | ||||
|  | ||||
| def build_ArmComputeLibrary() -> None: | ||||
|     """ | ||||
|     Using ArmComputeLibrary for aarch64 PyTorch | ||||
|     """ | ||||
|     print("Building Arm Compute Library") | ||||
|     acl_build_flags = [ | ||||
|         "debug=0", | ||||
|         "neon=1", | ||||
|         "opencl=0", | ||||
|         "os=linux", | ||||
|         "openmp=1", | ||||
|         "cppthreads=0", | ||||
|         "arch=armv8a", | ||||
|         "multi_isa=1", | ||||
|         "fixed_format_kernels=1", | ||||
|         "build=native", | ||||
|     ] | ||||
|     acl_install_dir = "/acl" | ||||
|     acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") | ||||
|     if os.path.isdir(acl_install_dir): | ||||
|         shutil.rmtree(acl_install_dir) | ||||
|     if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): | ||||
|         check_call( | ||||
|             [ | ||||
|                 "git", | ||||
|                 "clone", | ||||
|                 "https://github.com/ARM-software/ComputeLibrary.git", | ||||
|                 "-b", | ||||
|                 "v25.02", | ||||
|                 "--depth", | ||||
|                 "1", | ||||
|                 "--shallow-submodules", | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|     check_call( | ||||
|         ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, | ||||
|         cwd=acl_checkout_dir, | ||||
|     ) | ||||
|     for d in ["arm_compute", "include", "utils", "support", "src", "build"]: | ||||
|         shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") | ||||
|  | ||||
|  | ||||
| def replace_tag(filename) -> None: | ||||
|     with open(filename) as f: | ||||
|         lines = f.readlines() | ||||
| @ -313,17 +356,23 @@ if __name__ == "__main__": | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " | ||||
|  | ||||
|     if enable_mkldnn: | ||||
|         build_ArmComputeLibrary() | ||||
|         print("build pytorch with mkldnn+acl backend") | ||||
|         build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " | ||||
|         build_vars += "ACL_ROOT_DIR=/acl " | ||||
|         build_vars += ( | ||||
|             "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " | ||||
|             "ACL_ROOT_DIR=/acl " | ||||
|             "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " | ||||
|             "ACL_INCLUDE_DIR=/acl/build " | ||||
|             "ACL_LIBRARY=/acl/build " | ||||
|         ) | ||||
|         if enable_cuda: | ||||
|             build_vars += "BLAS=NVPL " | ||||
|         else: | ||||
|             build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS " | ||||
|             build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " | ||||
|     else: | ||||
|         print("build pytorch without mkldnn backend") | ||||
|  | ||||
|     os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") | ||||
|     if enable_cuda: | ||||
|         print("Updating Cuda Dependency") | ||||
|         filename = os.listdir("/pytorch/dist/") | ||||
|  | ||||
| @ -299,6 +299,40 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: | ||||
|     print("Building OpenBLAS") | ||||
|     host.run_cmd( | ||||
|         f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}" | ||||
|     ) | ||||
|     make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" | ||||
|     host.run_cmd( | ||||
|         f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: | ||||
|     print("Building Arm Compute Library") | ||||
|     acl_build_flags = " ".join( | ||||
|         [ | ||||
|             "debug=0", | ||||
|             "neon=1", | ||||
|             "opencl=0", | ||||
|             "os=linux", | ||||
|             "openmp=1", | ||||
|             "cppthreads=0", | ||||
|             "arch=armv8a", | ||||
|             "multi_isa=1", | ||||
|             "fixed_format_kernels=1", | ||||
|             "build=native", | ||||
|         ] | ||||
|     ) | ||||
|     host.run_cmd( | ||||
|         f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}" | ||||
|     ) | ||||
|  | ||||
|     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") | ||||
|  | ||||
|  | ||||
| def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: | ||||
|     host.run_cmd("pip3 install auditwheel") | ||||
|     host.run_cmd( | ||||
| @ -408,7 +442,7 @@ def build_torchvision( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd vision && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel") | ||||
|     vision_wheel_name = host.list_dir("vision/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name)) | ||||
|  | ||||
| @ -463,7 +497,7 @@ def build_torchdata( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd data && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel") | ||||
|     wheel_name = host.list_dir("data/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name)) | ||||
|  | ||||
| @ -519,7 +553,7 @@ def build_torchtext( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|  | ||||
|     host.run_cmd(f"cd text && {build_vars} python3 -m build --wheel --no-isolation") | ||||
|     host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel") | ||||
|     wheel_name = host.list_dir("text/dist")[0] | ||||
|     embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name)) | ||||
|  | ||||
| @ -580,7 +614,7 @@ def build_torchaudio( | ||||
|     host.run_cmd( | ||||
|         f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \ | ||||
|         && ./packaging/ffmpeg/build.sh \ | ||||
|         && {build_vars} python3 -m build --wheel --no-isolation" | ||||
|         && {build_vars} python3 setup.py bdist_wheel" | ||||
|     ) | ||||
|  | ||||
|     wheel_name = host.list_dir("audio/dist")[0] | ||||
| @ -666,6 +700,7 @@ def start_build( | ||||
|     configure_system( | ||||
|         host, compiler=compiler, use_conda=use_conda, python_version=python_version | ||||
|     ) | ||||
|     build_OpenBLAS(host, git_clone_flags) | ||||
|  | ||||
|     if host.using_docker(): | ||||
|         print("Move libgfortant.a into a standard location") | ||||
| @ -688,12 +723,10 @@ def start_build( | ||||
|         f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}" | ||||
|     ) | ||||
|  | ||||
|     host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh") | ||||
|  | ||||
|     print("Building PyTorch wheel") | ||||
|     build_opts = "" | ||||
|     if pytorch_build_number is not None: | ||||
|         build_opts += f" -C--build-option=--build-number={pytorch_build_number}" | ||||
|         build_opts += f" --build-number {pytorch_build_number}" | ||||
|     # Breakpad build fails on aarch64 | ||||
|     build_vars = "USE_BREAKPAD=0 " | ||||
|     if branch == "nightly": | ||||
| @ -710,18 +743,15 @@ def start_build( | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|     if enable_mkldnn: | ||||
|         host.run_cmd("pytorch/.ci/docker/common/install_acl.sh") | ||||
|         build_ArmComputeLibrary(host, git_clone_flags) | ||||
|         print("build pytorch with mkldnn+acl backend") | ||||
|         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" | ||||
|         build_vars += " BLAS=OpenBLAS" | ||||
|         build_vars += " OpenBLAS_HOME=/opt/OpenBLAS" | ||||
|         build_vars += " ACL_ROOT_DIR=/acl" | ||||
|         host.run_cmd( | ||||
|             f"cd $HOME/pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}" | ||||
|             f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}" | ||||
|         ) | ||||
|         print("Repair the wheel") | ||||
|         pytorch_wheel_name = host.list_dir("pytorch/dist")[0] | ||||
|         ld_library_path = "/acl/build:$HOME/pytorch/build/lib" | ||||
|         ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib" | ||||
|         host.run_cmd( | ||||
|             f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}" | ||||
|         ) | ||||
| @ -733,7 +763,7 @@ def start_build( | ||||
|     else: | ||||
|         print("build pytorch without mkldnn backend") | ||||
|         host.run_cmd( | ||||
|             f"cd pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}" | ||||
|             f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}" | ||||
|         ) | ||||
|  | ||||
|     print("Deleting build folder") | ||||
| @ -877,7 +907,7 @@ def terminate_instances(instance_type: str) -> None: | ||||
| def parse_arguments(): | ||||
|     from argparse import ArgumentParser | ||||
|  | ||||
|     parser = ArgumentParser("Build and test AARCH64 wheels using EC2") | ||||
|     parser = ArgumentParser("Builid and test AARCH64 wheels using EC2") | ||||
|     parser.add_argument("--key-name", type=str) | ||||
|     parser.add_argument("--debug", action="store_true") | ||||
|     parser.add_argument("--build-only", action="store_true") | ||||
|  | ||||
| @ -69,8 +69,7 @@ RUN bash ./install_cuda.sh 13.0 | ||||
| ENV DESIRED_CUDA=13.0 | ||||
|  | ||||
| FROM ${ROCM_IMAGE} as rocm | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
| ENV MKLROOT /opt/intel | ||||
|  | ||||
| @ -36,12 +36,6 @@ case ${DOCKER_TAG_PREFIX} in | ||||
|     ;; | ||||
|   rocm*) | ||||
|     BASE_TARGET=rocm | ||||
|     PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|     # add gfx950 conditionally starting in ROCm 7.0 | ||||
|     if [[ "$ROCM_VERSION" == *"7.0"* ]]; then | ||||
|         PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|     fi | ||||
|     EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" | ||||
|     ;; | ||||
|   *) | ||||
|     echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}" | ||||
|  | ||||
| @ -84,8 +84,8 @@ fi | ||||
| _UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152 | ||||
| _UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96 | ||||
| if [[ "$image" == *rocm* ]]; then | ||||
|   _UCX_COMMIT=29831d319e6be55cb8c768ca61de335c934ca39e | ||||
|   _UCC_COMMIT=9f4b242cbbd8b1462cbc732eb29316cdfa124b77 | ||||
|   _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6 | ||||
|   _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d | ||||
| fi | ||||
|  | ||||
| tag=$(echo $image | awk -F':' '{print $2}') | ||||
| @ -175,6 +175,20 @@ case "$tag" in | ||||
|     fi | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.4 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     if [[ $tag =~ "benchmarks" ]]; then | ||||
|       INDUCTOR_BENCHMARKS=yes | ||||
|     fi | ||||
|     ;; | ||||
|   pytorch-linux-noble-rocm-alpha-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=7.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
| @ -182,9 +196,6 @@ case "$tag" in | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950" | ||||
|     if [[ $tag =~ "benchmarks" ]]; then | ||||
|       INDUCTOR_BENCHMARKS=yes | ||||
|     fi | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
| @ -441,3 +452,12 @@ elif [ "$HAS_TRITON" = "yes" ]; then | ||||
|   echo "expecting triton to not be installed, but it is" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # Sanity check cmake version.  Executorch reinstalls cmake and I'm not sure if | ||||
| # they support 4.0.0 yet, so exclude them from this check. | ||||
| CMAKE_VERSION=$(drun cmake --version) | ||||
| if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then | ||||
|   echo "CMake version is not 4.0.0:" | ||||
|   drun cmake --version | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| v2.27.5-1 | ||||
| v2.27.5-1 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| v2.27.5-1 | ||||
| v2.27.7-1 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 27664085f804afc83df26f740bb46c365854f2c4 | ||||
| bbb06c0334a6772b92d24bde54956e675c8c6604 | ||||
|  | ||||
							
								
								
									
										27
									
								
								.ci/docker/common/install_acl.sh
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										27
									
								
								.ci/docker/common/install_acl.sh
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @ -1,27 +1,16 @@ | ||||
| #!/bin/bash | ||||
| # Script used only in CD pipeline | ||||
| set -euo pipefail | ||||
|  | ||||
| set -eux | ||||
|  | ||||
| ACL_VERSION=${ACL_VERSION:-"v25.02"} | ||||
| ACL_INSTALL_DIR="/acl" | ||||
| readonly version=v25.02 | ||||
| readonly src_host=https://github.com/ARM-software | ||||
| readonly src_repo=ComputeLibrary | ||||
|  | ||||
| # Clone ACL | ||||
| git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules | ||||
| [[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git | ||||
| cd ${src_repo} | ||||
|  | ||||
| git checkout $version | ||||
|  | ||||
| ACL_CHECKOUT_DIR="ComputeLibrary" | ||||
| # Build with scons | ||||
| pushd $ACL_CHECKOUT_DIR | ||||
| scons -j8  Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \ | ||||
|   os=linux arch=armv8a build=native multi_isa=1 \ | ||||
|   fixed_format_kernels=1 openmp=1 cppthreads=0 | ||||
| popd | ||||
|  | ||||
| # Install ACL | ||||
| sudo mkdir -p ${ACL_INSTALL_DIR} | ||||
| for d in arm_compute include utils support src build | ||||
| do | ||||
|   sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d} | ||||
| done | ||||
|  | ||||
| rm -rf $ACL_CHECKOUT_DIR | ||||
							
								
								
									
										12
									
								
								.ci/docker/common/install_openblas.sh
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										12
									
								
								.ci/docker/common/install_openblas.sh
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @ -3,10 +3,8 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"} | ||||
|  | ||||
| # Clone OpenBLAS | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules | ||||
| cd / | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules | ||||
|  | ||||
| OPENBLAS_CHECKOUT_DIR="OpenBLAS" | ||||
| OPENBLAS_BUILD_FLAGS=" | ||||
| @ -19,7 +17,5 @@ CFLAGS=-O3 | ||||
| BUILD_BFLOAT16=1 | ||||
| " | ||||
|  | ||||
| make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR | ||||
| sudo make install -C $OPENBLAS_CHECKOUT_DIR | ||||
|  | ||||
| rm -rf $OPENBLAS_CHECKOUT_DIR | ||||
| make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR} | ||||
| make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR} | ||||
|  | ||||
| @ -42,6 +42,12 @@ EOF | ||||
|     rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}" | ||||
|     amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu" | ||||
|  | ||||
|     # Special case for ROCM_VERSION == 7.0 | ||||
|     if [[ $(ver "$ROCM_VERSION") -eq $(ver 7.0) ]]; then | ||||
|         rocm_baseurl="https://repo.radeon.com/rocm/apt/7.0_alpha2" | ||||
|         amdgpu_baseurl="https://repo.radeon.com/amdgpu/30.10_alpha2/ubuntu" | ||||
|     fi | ||||
|  | ||||
|     # Add amdgpu repository | ||||
|     UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` | ||||
|     echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list | ||||
|  | ||||
| @ -12,8 +12,8 @@ function do_install() { | ||||
|  | ||||
|     rocm_version_nodot=${rocm_version//./} | ||||
|  | ||||
|     # https://github.com/icl-utk-edu/magma/pull/65 | ||||
|     MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec | ||||
|     # Version 2.7.2 + ROCm related updates | ||||
|     MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|     magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
|  | ||||
|     rocm_dir="/opt/rocm" | ||||
|  | ||||
| @ -66,15 +66,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" | ||||
|   # Triton needs at least gcc-9 to build | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python -m build --wheel --no-isolation | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
| elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then | ||||
|   # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain | ||||
|   add-apt-repository -y ppa:ubuntu-toolchain-r/test | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python -m build --wheel --no-isolation | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
| else | ||||
|   conda_run python -m build --wheel --no-isolation | ||||
|   conda_run python setup.py bdist_wheel | ||||
| fi | ||||
|  | ||||
| # Copy the wheel to /opt for multi stage docker builds | ||||
|  | ||||
| @ -40,16 +40,12 @@ case ${DOCKER_TAG_PREFIX} in | ||||
|         ;; | ||||
|     rocm*) | ||||
|         # we want the patch version of 6.4 instead | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then | ||||
|         if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then | ||||
|             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2" | ||||
|         fi | ||||
|         BASE_TARGET=rocm | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         # add gfx950 conditionally starting in ROCm 7.0 | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then | ||||
|             PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}" | ||||
|         ;; | ||||
|     *) | ||||
|  | ||||
| @ -62,13 +62,6 @@ ARG OPENBLAS_VERSION | ||||
| ADD ./common/install_openblas.sh install_openblas.sh | ||||
| RUN bash ./install_openblas.sh && rm install_openblas.sh | ||||
|  | ||||
| # Install Arm Compute Library | ||||
| FROM base as arm_compute | ||||
| # use python3.9 to install scons | ||||
| RUN python3.9 -m pip install scons==4.7.0 | ||||
| RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin | ||||
| COPY ./common/install_acl.sh install_acl.sh | ||||
| RUN bash ./install_acl.sh && rm install_acl.sh | ||||
| FROM base as final | ||||
|  | ||||
| # remove unnecessary python versions | ||||
| @ -77,5 +70,4 @@ RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 | ||||
| RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 | ||||
| RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
| COPY --from=openblas     /opt/OpenBLAS/  /opt/OpenBLAS/ | ||||
| COPY --from=arm_compute /acl /acl | ||||
| ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| @ -86,15 +86,6 @@ FROM base as nvpl | ||||
| ADD ./common/install_nvpl.sh install_nvpl.sh | ||||
| RUN bash ./install_nvpl.sh && rm install_nvpl.sh | ||||
|  | ||||
| # Install Arm Compute Library | ||||
| FROM base as arm_compute | ||||
| # use python3.9 to install scons | ||||
| RUN python3.9 -m pip install scons==4.7.0 | ||||
| RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin | ||||
| COPY ./common/install_acl.sh install_acl.sh | ||||
| RUN bash ./install_acl.sh && rm install_acl.sh | ||||
| FROM base as final | ||||
|  | ||||
| FROM final as cuda_final | ||||
| ARG BASE_CUDA_VERSION | ||||
| RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| @ -102,7 +93,5 @@ COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BAS | ||||
| COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=nvpl /opt/nvpl/lib/  /usr/local/lib/ | ||||
| COPY --from=nvpl /opt/nvpl/include/  /usr/local/include/ | ||||
| COPY --from=arm_compute /acl /acl | ||||
| RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda | ||||
| ENV PATH=/usr/local/cuda/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/acl/build/:$LD_LIBRARY_PATH | ||||
|  | ||||
							
								
								
									
										71
									
								
								.ci/docker/manywheel/Dockerfile_cxx11-abi
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								.ci/docker/manywheel/Dockerfile_cxx11-abi
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,71 @@ | ||||
| FROM centos:8 as base | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| ENV PATH /opt/rh/gcc-toolset-11/root/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin | ||||
|  | ||||
| # change to a valid repo | ||||
| RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*.repo | ||||
| # enable to install ninja-build | ||||
| RUN sed -i 's|enabled=0|enabled=1|g' /etc/yum.repos.d/CentOS-Linux-PowerTools.repo | ||||
|  | ||||
| RUN yum -y update | ||||
| RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel sudo | ||||
| RUN yum install -y autoconf automake make cmake gdb gcc-toolset-11-gcc-c++ | ||||
|  | ||||
|  | ||||
| FROM base as openssl | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
|  | ||||
| # Install python | ||||
| FROM base as python | ||||
| RUN yum install -y openssl-devel zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel | ||||
| ADD common/install_cpython.sh install_cpython.sh | ||||
| RUN bash ./install_cpython.sh && rm install_cpython.sh | ||||
|  | ||||
| FROM base as conda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| RUN /opt/conda/bin/conda install -y cmake | ||||
|  | ||||
| FROM base as intel | ||||
| # Install MKL | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=conda              /opt/conda                            /opt/conda | ||||
| ENV PATH=/opt/conda/bin:$PATH | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
|  | ||||
| FROM base as patchelf | ||||
| ADD ./common/install_patchelf.sh install_patchelf.sh | ||||
| RUN bash ./install_patchelf.sh && rm install_patchelf.sh | ||||
| RUN cp $(which patchelf) /patchelf | ||||
|  | ||||
| FROM base as jni | ||||
| ADD ./common/install_jni.sh install_jni.sh | ||||
| ADD ./java/jni.h jni.h | ||||
| RUN bash ./install_jni.sh && rm install_jni.sh | ||||
|  | ||||
| FROM base as libpng | ||||
| ADD ./common/install_libpng.sh install_libpng.sh | ||||
| RUN bash ./install_libpng.sh && rm install_libpng.sh | ||||
|  | ||||
| FROM base as final | ||||
| COPY --from=openssl            /opt/openssl                          /opt/openssl | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=intel              /opt/intel                            /opt/intel | ||||
| COPY --from=conda              /opt/conda                            /opt/conda | ||||
| COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf | ||||
| COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h | ||||
| COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/include/png*               /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/ | ||||
| COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig | ||||
|  | ||||
| RUN yum install -y ninja-build | ||||
| @ -28,7 +28,6 @@ fi | ||||
| MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-} | ||||
| DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-} | ||||
| OPENBLAS_VERSION=${OPENBLAS_VERSION:-} | ||||
| ACL_VERSION=${ACL_VERSION:-} | ||||
|  | ||||
| case ${image} in | ||||
|     manylinux2_28-builder:cpu) | ||||
| @ -42,6 +41,13 @@ case ${image} in | ||||
|         GPU_IMAGE=arm64v8/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1" | ||||
|         MANY_LINUX_VERSION="2_28_aarch64" | ||||
|         OPENBLAS_VERSION="v0.3.30" | ||||
|         ;; | ||||
|     manylinuxcxx11-abi-builder:cpu-cxx11-abi) | ||||
|         TARGET=final | ||||
|         GPU_IMAGE="" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9" | ||||
|         MANY_LINUX_VERSION="cxx11-abi" | ||||
|         ;; | ||||
|     manylinuxs390x-builder:cpu-s390x) | ||||
|         TARGET=final | ||||
| @ -76,7 +82,7 @@ case ${image} in | ||||
|         ;; | ||||
|     manylinux2_28-builder:rocm*) | ||||
|         # we want the patch version of 6.4 instead | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then | ||||
|         if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then | ||||
|             GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2" | ||||
|         fi | ||||
|         TARGET=rocm_final | ||||
| @ -84,10 +90,6 @@ case ${image} in | ||||
|         DEVTOOLSET_VERSION="11" | ||||
|         GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         # add gfx950 conditionally starting in ROCm 7.0 | ||||
|         if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then | ||||
|             PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" | ||||
|         ;; | ||||
|     manylinux2_28-builder:xpu) | ||||
| @ -119,8 +121,7 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') | ||||
| DOCKER_BUILDKIT=1 docker build  \ | ||||
|     ${DOCKER_GPU_BUILD_ARG} \ | ||||
|     --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|     --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \ | ||||
|     --build-arg "ACL_VERSION=${ACL_VERSION:-}" \ | ||||
|     --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \ | ||||
|     --target "${TARGET}" \ | ||||
|     -t "${tmp_tag}" \ | ||||
|     $@ \ | ||||
|  | ||||
| @ -10,11 +10,6 @@ boto3==1.35.42 | ||||
| #Pinned versions: 1.19.12, 1.16.34 | ||||
| #test that import: | ||||
|  | ||||
| build==1.3.0 | ||||
| #Description: A simple, correct Python build frontend. | ||||
| #Pinned versions: 1.3.0 | ||||
| #test that import: | ||||
|  | ||||
| click | ||||
| #Description: Command Line Interface Creation Kit | ||||
| #Pinned versions: | ||||
| @ -52,10 +47,10 @@ flatbuffers==24.12.23 | ||||
| #Pinned versions: 24.12.23 | ||||
| #test that import: | ||||
|  | ||||
| hypothesis==6.56.4 | ||||
| hypothesis==5.35.1 | ||||
| # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136 | ||||
| #Description: advanced library for generating parametrized tests | ||||
| #Pinned versions: 6.56.4 | ||||
| #Pinned versions: 5.35.1 | ||||
| #test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py | ||||
|  | ||||
| junitparser==2.1.1 | ||||
| @ -98,7 +93,7 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x" | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| mypy==1.16.0 ; platform_system == "Linux" | ||||
| mypy==1.16.0 ; platform_system != "Windows" | ||||
| # Pin MyPy version because new errors are likely to appear with each release | ||||
| # Skip on Windows as lots of type annotations are POSIX specific | ||||
| #Description: linter | ||||
| @ -111,10 +106,10 @@ networkx==2.8.8 | ||||
| #Pinned versions: 2.8.8 | ||||
| #test that import: functorch | ||||
|  | ||||
| ninja==1.11.1.4 | ||||
| ninja==1.11.1.3 | ||||
| #Description: build system. Used in some tests. Used in build to generate build | ||||
| #time tracing information | ||||
| #Pinned versions: 1.11.1.4 | ||||
| #Pinned versions: 1.11.1.3 | ||||
| #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py | ||||
|  | ||||
| numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x" | ||||
| @ -169,12 +164,12 @@ optree==0.13.0 | ||||
|  | ||||
| pillow==11.0.0 | ||||
| #Description:  Python Imaging Library fork | ||||
| #Pinned versions: 11.0.0 | ||||
| #Pinned versions: 10.3.0 | ||||
| #test that import: | ||||
|  | ||||
| protobuf==5.29.5 | ||||
| protobuf==5.29.4 | ||||
| #Description:  Google's data interchange format | ||||
| #Pinned versions: 5.29.5 | ||||
| #Pinned versions: 5.29.4 | ||||
| #test that import: test_tensorboard.py, test/onnx/* | ||||
|  | ||||
| psutil | ||||
| @ -217,7 +212,7 @@ pytest-subtests==0.13.1 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| xdoctest==1.3.0 | ||||
| xdoctest==1.1.0 | ||||
| #Description: runs doctests in pytest | ||||
| #Pinned versions: 1.1.0 | ||||
| #test that import: | ||||
| @ -268,7 +263,7 @@ scipy==1.14.1 ; python_version >= "3.12" | ||||
| #test that import: | ||||
|  | ||||
| # needed by torchgen utils | ||||
| typing-extensions==4.12.2 | ||||
| typing-extensions>=4.10.0 | ||||
| #Description: type hints for python | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -361,10 +356,9 @@ pwlf==2.2.1 | ||||
| #test that import: test_sac_estimator.py | ||||
|  | ||||
| # To build PyTorch itself | ||||
| pyyaml==6.0.2 | ||||
| pyyaml | ||||
| pyzstd | ||||
| setuptools==78.1.1 | ||||
| packaging==23.1 | ||||
| setuptools>=70.1.0 | ||||
| six | ||||
|  | ||||
| scons==4.5.2 ; platform_machine == "aarch64" | ||||
| @ -379,16 +373,13 @@ dataclasses_json==0.6.7 | ||||
| #Pinned versions: 0.6.7 | ||||
| #test that import: | ||||
|  | ||||
| cmake==3.31.6 | ||||
| cmake==4.0.0 | ||||
| #Description: required for building | ||||
|  | ||||
| tlparse==0.4.0 | ||||
| #Description: required for log parsing | ||||
|  | ||||
| filelock==3.18.0 | ||||
| #Description: required for inductor testing | ||||
|  | ||||
| cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x" and platform_system != "Darwin" | ||||
| cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x" | ||||
| #Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits. | ||||
| #test that import: test_cuda.py | ||||
|  | ||||
|  | ||||
| @ -1,15 +1,8 @@ | ||||
| sphinx==5.3.0 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 5.3.0 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@d53b0ffb9b1cda68260693ea98f3483823c88d8e#egg=pytorch_sphinx_theme2 | ||||
|  | ||||
| standard-imghdr==3.13.0; python_version >= "3.13" | ||||
| #Description: This is needed by Sphinx, so it needs to be added here. | ||||
| # The reasons are as follows: | ||||
| # 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr); | ||||
| # 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13. | ||||
| # Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency. | ||||
|  | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2 | ||||
| # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering | ||||
| # but it doesn't seem to work and hangs around idly. The initial thought that it is probably | ||||
| # something related to Docker setup. We can investigate this later. | ||||
|  | ||||
| @ -1,11 +1,11 @@ | ||||
| SHELL=/usr/bin/env bash | ||||
|  | ||||
| DOCKER_CMD ?= docker | ||||
| DESIRED_ROCM ?= 7.0 | ||||
| DESIRED_ROCM ?= 6.4 | ||||
| DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM)) | ||||
| PACKAGE_NAME = magma-rocm | ||||
| # inherit this from underlying docker image, do not pass this env var to docker | ||||
| #PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201 | ||||
| #PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201 | ||||
|  | ||||
| DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-v $(shell git rev-parse --show-toplevel)/.ci:/builder \ | ||||
| @ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	magma-rocm/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-rocm70 | ||||
| all: magma-rocm64 | ||||
| all: magma-rocm63 | ||||
|  | ||||
| @ -25,11 +24,6 @@ clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-rocm70 | ||||
| magma-rocm70: DESIRED_ROCM := 7.0 | ||||
| magma-rocm70: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-rocm64 | ||||
| magma-rocm64: DESIRED_ROCM := 6.4 | ||||
| magma-rocm64: | ||||
|  | ||||
| @ -6,8 +6,8 @@ set -eou pipefail | ||||
| # The script expects DESIRED_CUDA and PACKAGE_NAME to be set | ||||
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | ||||
|  | ||||
| # https://github.com/icl-utk-edu/magma/pull/65 | ||||
| MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec | ||||
| # Version 2.7.2 + ROCm related updates | ||||
| MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|  | ||||
| # Folders for the build | ||||
| PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata | ||||
| @ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE | ||||
|  | ||||
| # Fetch magma sources and verify checksum | ||||
| pushd ${PACKAGE_DIR} | ||||
| git clone https://github.com/jeffdaily/magma | ||||
| git clone https://bitbucket.org/icl/magma.git | ||||
| pushd magma | ||||
| git checkout ${MAGMA_VERSION} | ||||
| popd | ||||
|  | ||||
| @ -142,7 +142,7 @@ time CMAKE_ARGS=${CMAKE_ARGS[@]} \ | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ | ||||
|     BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ | ||||
|     USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ | ||||
|     python -m build --wheel --no-isolation --outdir /tmp/$WHEELHOUSE_DIR | ||||
|     python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR | ||||
| echo "Finished setup.py bdist at $(date)" | ||||
|  | ||||
| # Build libtorch packages | ||||
|  | ||||
| @ -104,7 +104,7 @@ if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr | ||||
| fi | ||||
|  | ||||
| echo "Calling -m pip install . -v --no-build-isolation at $(date)" | ||||
| echo "Calling 'python -m pip install .' at $(date)" | ||||
|  | ||||
| if [[ $LIBTORCH_VARIANT = *"static"* ]]; then | ||||
|     STATIC_CMAKE_FLAG="-DTORCH_STATIC=1" | ||||
|  | ||||
| @ -107,10 +107,6 @@ if [[ $ROCM_INT -ge 60200 ]]; then | ||||
|     ROCM_SO_FILES+=("librocm-core.so") | ||||
| fi | ||||
|  | ||||
| if [[ $ROCM_INT -ge 70000 ]]; then | ||||
|     ROCM_SO_FILES+=("librocroller.so") | ||||
| fi | ||||
|  | ||||
| OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
|  | ||||
| @ -89,7 +89,7 @@ fi | ||||
| if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then | ||||
|   export USE_MKLDNN=1 | ||||
|   export USE_MKLDNN_ACL=1 | ||||
|   export ACL_ROOT_DIR=/acl | ||||
|   export ACL_ROOT_DIR=/ComputeLibrary | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then | ||||
| @ -290,13 +290,13 @@ else | ||||
|  | ||||
|       WERROR=1 python setup.py clean | ||||
|  | ||||
|       WERROR=1 python -m build --wheel --no-isolation | ||||
|       WERROR=1 python setup.py bdist_wheel | ||||
|     else | ||||
|       python setup.py clean | ||||
|       if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then | ||||
|         source .ci/pytorch/install_cache_xla.sh | ||||
|       fi | ||||
|       python -m build --wheel --no-isolation | ||||
|       python setup.py bdist_wheel | ||||
|     fi | ||||
|     pip_install_whl "$(echo dist/*.whl)" | ||||
|  | ||||
|  | ||||
| @ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \ | ||||
|  | ||||
| # Build the docs | ||||
| pushd docs/cpp | ||||
| time make VERBOSE=1 html | ||||
| time make VERBOSE=1 html -j | ||||
|  | ||||
| popd | ||||
| popd | ||||
|  | ||||
| @ -36,11 +36,11 @@ fi | ||||
| print_cmake_info | ||||
| if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then | ||||
|   # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls | ||||
|   USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python -m build --wheel --no-isolation | ||||
|   USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel | ||||
| else | ||||
|   # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests | ||||
|   # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python -m build --wheel --no-isolation -C--build-option=--plat-name=macosx_11_0_arm64 | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 | ||||
| fi | ||||
| if which sccache > /dev/null; then | ||||
|   print_sccache_stats | ||||
|  | ||||
| @ -26,7 +26,6 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then | ||||
|     time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo | ||||
|     time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl | ||||
|     time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering | ||||
|     time python test/run_test.py --verbose -i distributed/test_aten_comm_compute_reordering | ||||
|     time python test/run_test.py --verbose -i distributed/test_store | ||||
|     time python test/run_test.py --verbose -i distributed/test_symmetric_memory | ||||
|     time python test/run_test.py --verbose -i distributed/test_pg_wrapper | ||||
|  | ||||
| @ -435,7 +435,7 @@ test_inductor_distributed() { | ||||
|  | ||||
|   # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported | ||||
|   # with if required # gpus aren't available | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_aten_comm_compute_reordering distributed/test_compute_comm_reordering --verbose | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -1415,7 +1415,7 @@ EOF | ||||
|   pip3 install -r requirements.txt | ||||
|   # shellcheck source=./common-build.sh | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" | ||||
|   python -m build --wheel --no-isolation -C--build-option=--bdist-dir="base_bdist_tmp" --outdir "base_dist" | ||||
|   python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist" | ||||
|   python -mpip install base_dist/*.whl | ||||
|   echo "::endgroup::" | ||||
|  | ||||
| @ -1617,7 +1617,7 @@ test_operator_benchmark() { | ||||
|   test_inductor_set_cpu_affinity | ||||
|  | ||||
|   cd benchmarks/operator_benchmark/pt_extension | ||||
|   python -m pip install . -v --no-build-isolation | ||||
|   python -m pip install . | ||||
|  | ||||
|   cd "${TEST_DIR}"/benchmarks/operator_benchmark | ||||
|   $TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \ | ||||
| @ -1630,25 +1630,6 @@ test_operator_benchmark() { | ||||
|       --expected "expected_ci_operator_benchmark_eager_float32_cpu.csv" | ||||
| } | ||||
|  | ||||
| test_operator_microbenchmark() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   TEST_DIR=$(pwd) | ||||
|  | ||||
|   cd benchmarks/operator_benchmark/pt_extension | ||||
|   python -m pip install . | ||||
|  | ||||
|   cd "${TEST_DIR}"/benchmarks/operator_benchmark | ||||
|  | ||||
|   for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do | ||||
|     $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \ | ||||
|       --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \ | ||||
|       --benchmark-name "PyTorch operator microbenchmark" --use-compile | ||||
|     $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \ | ||||
|       --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \ | ||||
|       --benchmark-name "PyTorch operator microbenchmark" | ||||
|   done | ||||
| } | ||||
|  | ||||
| if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then | ||||
|   (cd test && python -c "import torch; print(torch.__config__.show())") | ||||
| @ -1705,8 +1686,6 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then | ||||
|     test_operator_benchmark cpu ${TEST_MODE} | ||||
|  | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then | ||||
|   test_operator_microbenchmark | ||||
| elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then | ||||
|   test_inductor_distributed | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then | ||||
| @ -1815,8 +1794,6 @@ elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then | ||||
|   test_h100_distributed | ||||
| elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then | ||||
|   test_h100_symm_mem | ||||
| elif [[ "${TEST_CONFIG}" == "b200-symm-mem" ]]; then | ||||
|   test_h100_symm_mem | ||||
| elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then | ||||
|   test_h100_cutlass_backend | ||||
| else | ||||
|  | ||||
| @ -1,32 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex -o pipefail | ||||
|  | ||||
| # Suppress ANSI color escape sequences | ||||
| export TERM=vt100 | ||||
|  | ||||
| # shellcheck source=./common.sh | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
| # shellcheck source=./common-build.sh | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" | ||||
|  | ||||
| echo "Environment variables" | ||||
| env | ||||
|  | ||||
| echo "Testing FA3 stable wheel still works with currently built torch" | ||||
|  | ||||
| echo "Installing ABI Stable FA3 wheel" | ||||
| # The wheel was built on https://github.com/Dao-AILab/flash-attention/commit/b3846b059bf6b143d1cd56879933be30a9f78c81 | ||||
| # on torch nightly torch==2.9.0.dev20250830+cu129 | ||||
| $MAYBE_SUDO pip -q install https://s3.amazonaws.com/ossci-linux/wheels/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl | ||||
|  | ||||
| pushd flash-attention/hopper | ||||
| export PYTHONPATH=$PWD | ||||
| pytest -v -s \ | ||||
|   "test_flash_attn.py::test_flash_attn_output[1-1-192-False-False-False-0.0-False-False-mha-dtype0]" \ | ||||
|   "test_flash_attn.py::test_flash_attn_varlen_output[511-1-64-True-False-False-0.0-False-False-gqa-dtype2]" \ | ||||
|   "test_flash_attn.py::test_flash_attn_kvcache[1-128-128-False-False-True-None-0.0-False-False-True-False-True-False-gqa-dtype0]" \ | ||||
|   "test_flash_attn.py::test_flash_attn_race_condition[97-97-192-True-dtype0]" \ | ||||
|   "test_flash_attn.py::test_flash_attn_combine[2-3-64-dtype1]" \ | ||||
|   "test_flash_attn.py::test_flash3_bw_compatibility" | ||||
| popd | ||||
| @ -70,7 +70,7 @@ sccache --zero-stats | ||||
| sccache --show-stats | ||||
|  | ||||
| # Build the wheel | ||||
| python -m build --wheel --no-build-isolation | ||||
| python setup.py bdist_wheel | ||||
| if ($LASTEXITCODE -ne 0) { exit 1 } | ||||
|  | ||||
| # Install the wheel locally | ||||
|  | ||||
| @ -38,12 +38,10 @@ if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
|  | ||||
| :: Update CMake | ||||
| :: TODO: Investigate why this helps MKL detection, even when CMake from choco is not used | ||||
| call choco upgrade -y cmake --no-progress --installargs 'ADD_CMAKE_TO_PATH=System' --apply-install-arguments-to-dependencies --version=3.27.9 | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
|  | ||||
| :: TODO: Move to .ci/docker/requirements-ci.txt | ||||
| call pip install mkl==2024.2.0 mkl-static==2024.2.0 mkl-include==2024.2.0 | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
| @ -132,7 +130,7 @@ if "%USE_CUDA%"=="1" ( | ||||
| :: Print all existing environment variable for debugging | ||||
| set | ||||
|  | ||||
| python -m build --wheel --no-isolation | ||||
| python setup.py bdist_wheel | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
| sccache --show-stats | ||||
|  | ||||
| @ -37,8 +37,27 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|   export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda" | ||||
| fi | ||||
|  | ||||
| # TODO: Move this to .ci/docker/requirements-ci.txt | ||||
| python -m pip install "psutil==5.9.1" "pynvml==11.4.1" "pytest-shard==0.1.2" | ||||
| # TODO: Move both of them to Windows AMI | ||||
| python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1 | ||||
|  | ||||
| # Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments | ||||
| # pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node' | ||||
| # scipy from 1.6.3 to 1.10 | ||||
| # expecttest from 0.1.3 to 0.3.0 | ||||
| # xdoctest from 1.0.2 to 1.3.0 | ||||
| python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42" | ||||
|  | ||||
| # Install Z3 optional dependency for Windows builds. | ||||
| python -m pip install z3-solver==4.15.1.0 | ||||
|  | ||||
| # Install tlparse for test\dynamo\test_structured_trace.py UTs. | ||||
| python -m pip install tlparse==0.4.0 | ||||
|  | ||||
| # Install parameterized | ||||
| python -m pip install parameterized==0.8.1 | ||||
|  | ||||
| # Install pulp for testing ilps under torch\distributed\_tools | ||||
| python -m pip install pulp==2.9.0 | ||||
|  | ||||
| run_tests() { | ||||
|     # Run nvidia-smi if available | ||||
|  | ||||
| @ -48,7 +48,7 @@ sccache --zero-stats | ||||
| sccache --show-stats | ||||
|  | ||||
| :: Call PyTorch build script | ||||
| python -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
| python setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
|  | ||||
| :: show sccache stats | ||||
| sccache --show-stats | ||||
|  | ||||
| @ -28,5 +28,5 @@ start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_t | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| set "PATH=%CD%\Python\Scripts;%CD%\Python;%PATH%" | ||||
| %PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel build | ||||
| %PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| @ -86,7 +86,7 @@ copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_ | ||||
| goto build_end | ||||
|  | ||||
| :pytorch | ||||
| %PYTHON_EXEC% -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
| %PYTHON_EXEC% setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
|  | ||||
| :build_end | ||||
| IF ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| @ -63,7 +63,7 @@ if errorlevel 1 exit /b 1 | ||||
| call %CONDA_HOME%\condabin\activate.bat testenv | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| call conda install  -y -q -c conda-forge libuv=1.51 | ||||
| call conda install  -y -q -c conda-forge libuv=1.39 | ||||
| call conda install -y -q intel-openmp | ||||
|  | ||||
| echo "install and test libtorch" | ||||
|  | ||||
| @ -18,7 +18,7 @@ if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake | ||||
|  | ||||
| %PYTHON_EXEC% -m pip install pyyaml | ||||
| %PYTHON_EXEC% -m pip install mkl-include mkl-static | ||||
| %PYTHON_EXEC% -m pip install boto3 requests ninja typing_extensions setuptools==72.1.0 | ||||
| %PYTHON_EXEC% -m pip install boto3 ninja typing_extensions setuptools==72.1.0 | ||||
|  | ||||
| where cmake.exe | ||||
|  | ||||
|  | ||||
| @ -143,8 +143,7 @@ case $desired_python in | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13t) | ||||
|         echo "Using 3.13t deps" | ||||
|         mac_version='macosx-11.0-arm64' | ||||
|         echo "Using 3.13 deps" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
| @ -186,11 +185,11 @@ export USE_QNNPACK=OFF | ||||
| export BUILD_TEST=OFF | ||||
|  | ||||
| pushd "$pytorch_rootdir" | ||||
| echo "Calling -m build --wheel --no-isolation at $(date)" | ||||
| echo "Calling setup.py bdist_wheel at $(date)" | ||||
|  | ||||
| _PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python -m build --wheel --no-isolation --outdir "$whl_tmp_dir" -C--plat-name="${mac_version//[-.]/_}" | ||||
| _PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name "${mac_version//[-.]/_}" | ||||
|  | ||||
| echo "Finished -m build --wheel --no-isolation at $(date)" | ||||
| echo "Finished setup.py bdist_wheel at $(date)" | ||||
|  | ||||
| if [[ $package_type != 'libtorch' ]]; then | ||||
|     echo "delocating wheel dependencies" | ||||
|  | ||||
							
								
								
									
										47
									
								
								.circleci/scripts/functorch_doc_push_script.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										47
									
								
								.circleci/scripts/functorch_doc_push_script.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,47 @@ | ||||
| #!/bin/bash | ||||
| # =================== The following code **should** be executed inside Docker container =================== | ||||
|  | ||||
| # Install dependencies | ||||
| sudo apt-get -y update | ||||
| sudo apt-get -y install expect-dev | ||||
|  | ||||
| # This is where the local pytorch install in the docker image is located | ||||
| pt_checkout="/var/lib/jenkins/workspace" | ||||
| source "$pt_checkout/.ci/pytorch/common_utils.sh" | ||||
| echo "functorch_doc_push_script.sh: Invoked with $*" | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| version=${DOCS_VERSION:-nightly} | ||||
| echo "version: $version" | ||||
|  | ||||
| # Build functorch docs | ||||
| pushd $pt_checkout/functorch/docs | ||||
| pip -q install -r requirements.txt | ||||
| make html | ||||
| popd | ||||
|  | ||||
| git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages | ||||
| pushd functorch_ghpages | ||||
|  | ||||
| if [ $version == "main" ]; then | ||||
|   version=nightly | ||||
| fi | ||||
|  | ||||
| git rm -rf "$version" || true | ||||
| mv "$pt_checkout/functorch/docs/build/html" "$version" | ||||
|  | ||||
| git add "$version" || true | ||||
| git status | ||||
| git config user.email "soumith+bot@pytorch.org" | ||||
| git config user.name "pytorchbot" | ||||
| # If there aren't changes, don't make a commit; push is no-op | ||||
| git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true | ||||
| git status | ||||
|  | ||||
| if [[ "${WITH_PUSH:-}" == true ]]; then | ||||
|   git push -u origin gh-pages | ||||
| fi | ||||
|  | ||||
| popd | ||||
| # =================== The above code **should** be executed inside Docker container =================== | ||||
| @ -69,8 +69,6 @@ readability-string-compare, | ||||
| ' | ||||
| HeaderFilterRegex: '^(aten/|c10/|torch/).*$' | ||||
| WarningsAsErrors: '*' | ||||
| LineFilter: | ||||
|   - name: '/usr/include/.*' | ||||
| CheckOptions: | ||||
|   cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true | ||||
|   cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +1,6 @@ | ||||
| --- | ||||
| name: "⚠️ CI SEV" | ||||
| about: Tracking incidents for PyTorch's CI infra. | ||||
| title: '' | ||||
| labels: '' | ||||
| assignees: '' | ||||
|  | ||||
| --- | ||||
|  | ||||
| > NOTE: Remember to label this issue with "`ci: sev`" | ||||
|  | ||||
							
								
								
									
										18
									
								
								.github/ISSUE_TEMPLATE/disable-autorevert.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/ISSUE_TEMPLATE/disable-autorevert.md
									
									
									
									
										vendored
									
									
								
							| @ -1,18 +0,0 @@ | ||||
| --- | ||||
| name: DISABLE AUTOREVERT | ||||
| about: Disables autorevert when open | ||||
| title: "❌\U0001F519 [DISABLE AUTOREVERT]" | ||||
| labels: 'ci: disable-autorevert' | ||||
| assignees: '' | ||||
|  | ||||
| --- | ||||
|  | ||||
| This issue, while open, disables the autorevert functionality. | ||||
|  | ||||
| More details can be found [here](https://github.com/pytorch/test-infra/blob/main/aws/lambda/pytorch-auto-revert/README.md) | ||||
|  | ||||
|  | ||||
| ## Why are you disabling autorevert? | ||||
|  | ||||
|  | ||||
| ## Links to any issues/commits/errors that shows the source of problem | ||||
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE/disable-ci-jobs.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE/disable-ci-jobs.md
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +1,8 @@ | ||||
| --- | ||||
| name: Disable CI jobs (PyTorch Dev Infra only) | ||||
| about: Use this template to disable CI jobs | ||||
| title: DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME] | ||||
| labels: 'module: ci' | ||||
| assignees: '' | ||||
|  | ||||
| title: "DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]" | ||||
| labels: "module: ci" | ||||
| --- | ||||
|  | ||||
| > For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							| @ -22,9 +22,6 @@ self-hosted-runner: | ||||
|     - linux.arm64.m7g.4xlarge | ||||
|     - linux.arm64.m7g.4xlarge.ephemeral | ||||
|     - linux.arm64.r7g.12xlarge.memory | ||||
|     - linux.aws.h100 | ||||
|     - linux.aws.h100.4 | ||||
|     - linux.aws.h100.8 | ||||
|     - linux.4xlarge.nvidia.gpu | ||||
|     - linux.8xlarge.nvidia.gpu | ||||
|     - linux.16xlarge.nvidia.gpu | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/actions/teardown-win/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/actions/teardown-win/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -23,6 +23,9 @@ runs: | ||||
|       run: | | ||||
|         .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|     - name: Clean up leftover processes on non-ephemeral Windows runner | ||||
|       uses: pytorch/test-infra/.github/actions/cleanup-runner@main | ||||
|  | ||||
|     # Cleaning up Windows workspace sometimes fails flakily with device or resource busy | ||||
|     # error, meaning one or more processes haven't stopped completely yet. So trying to | ||||
|     # retry this step several time similar to how checkout-pytorch GHA does | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 78a47f87ce259a48f0391fa9ae15add05ea7432b | ||||
| 1983609239caaab24ab1ed2bfa2aa92e8c76c1b1 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 0fc62aa26a30ed7ca419d285f285cb5ba02c4394 | ||||
| c77852e117bdf056c8e9a087e51d6f65cf6ba53d | ||||
|  | ||||
							
								
								
									
										39
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,44 +1,43 @@ | ||||
| tracking_issue: 24422 | ||||
| ciflow_tracking_issue: 64124 | ||||
| ciflow_push_tags: | ||||
| - ciflow/b200 | ||||
| - ciflow/b200-symm-mem | ||||
| - ciflow/binaries | ||||
| - ciflow/binaries_libtorch | ||||
| - ciflow/binaries_wheel | ||||
| - ciflow/h100 | ||||
| - ciflow/h100-cutlass-backend | ||||
| - ciflow/h100-distributed | ||||
| - ciflow/h100-symm-mem | ||||
| - ciflow/triton_binaries | ||||
| - ciflow/inductor | ||||
| - ciflow/inductor-cu126 | ||||
| - ciflow/inductor-micro-benchmark | ||||
| - ciflow/inductor-micro-benchmark-cpu-x86 | ||||
| - ciflow/inductor-perf-compare | ||||
| - ciflow/inductor-perf-test-nightly-rocm | ||||
| - ciflow/inductor-perf-test-nightly-x86-zen | ||||
| - ciflow/inductor-periodic | ||||
| - ciflow/inductor-rocm | ||||
| - ciflow/inductor-perf-test-nightly-rocm | ||||
| - ciflow/inductor-perf-compare | ||||
| - ciflow/inductor-micro-benchmark | ||||
| - ciflow/inductor-micro-benchmark-cpu-x86 | ||||
| - ciflow/inductor-perf-test-nightly-x86-zen | ||||
| - ciflow/inductor-cu126 | ||||
| - ciflow/linux-aarch64 | ||||
| - ciflow/mps | ||||
| - ciflow/nightly | ||||
| - ciflow/op-benchmark | ||||
| - ciflow/periodic | ||||
| - ciflow/periodic-rocm-mi300 | ||||
| - ciflow/pull | ||||
| - ciflow/quantization-periodic | ||||
| - ciflow/riscv64 | ||||
| - ciflow/rocm | ||||
| - ciflow/rocm-mi300 | ||||
| - ciflow/s390 | ||||
| - ciflow/riscv64 | ||||
| - ciflow/slow | ||||
| - ciflow/torchbench | ||||
| - ciflow/triton_binaries | ||||
| - ciflow/trunk | ||||
| - ciflow/unstable | ||||
| - ciflow/vllm | ||||
| - ciflow/win-arm64 | ||||
| - ciflow/xpu | ||||
| - ciflow/vllm | ||||
| - ciflow/torchbench | ||||
| - ciflow/op-benchmark | ||||
| - ciflow/pull | ||||
| - ciflow/h100 | ||||
| - ciflow/h100-distributed | ||||
| - ciflow/win-arm64 | ||||
| - ciflow/h100-symm-mem | ||||
| - ciflow/h100-cutlass-backend | ||||
| - ciflow/b200 | ||||
| retryable_workflows: | ||||
| - pull | ||||
| - trunk | ||||
| @ -47,4 +46,4 @@ retryable_workflows: | ||||
| - inductor-A100-perf-nightly | ||||
| labeler_config: labeler.yml | ||||
| label_to_label_config: label_to_label.yml | ||||
| mergebot: true | ||||
| mergebot: True | ||||
|  | ||||
							
								
								
									
										36
									
								
								.github/requirements/pip-requirements-macOS.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								.github/requirements/pip-requirements-macOS.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | ||||
| boto3==1.35.42 | ||||
| cmake==3.27.* | ||||
| expecttest==0.3.0 | ||||
| fbscribelogger==0.1.7 | ||||
| filelock==3.18.0 | ||||
| hypothesis==6.56.4 | ||||
| librosa>=0.6.2 | ||||
| mpmath==1.3.0 | ||||
| networkx==2.8.7 | ||||
| ninja==1.10.2.4 | ||||
| numba==0.59.0 | ||||
| numpy==1.26.4 | ||||
| opt-einsum>=3.3 | ||||
| optree==0.13.0 | ||||
| packaging==23.1 | ||||
| parameterized==0.8.1 | ||||
| pillow==10.3.0 | ||||
| protobuf==5.29.5 | ||||
| psutil==5.9.8 | ||||
| pygments==2.15.0 | ||||
| pytest-cpp==2.3.0 | ||||
| pytest-flakefinder==1.1.0 | ||||
| pytest-rerunfailures==10.3 | ||||
| pytest-subtests==0.13.1 | ||||
| pytest-xdist==3.3.1 | ||||
| pytest==7.3.2 | ||||
| pyyaml==6.0.2 | ||||
| scipy==1.12.0 | ||||
| setuptools==78.1.1 | ||||
| sympy==1.13.3 | ||||
| tlparse==0.4.0 | ||||
| tensorboard==2.13.0 | ||||
| typing-extensions==4.12.2 | ||||
| unittest-xml-reporting<=3.2.0,>=2.0.0 | ||||
| xdoctest==1.1.0 | ||||
| z3-solver==4.15.1.0 | ||||
							
								
								
									
										6
									
								
								.github/scripts/filter_test_configs.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/scripts/filter_test_configs.py
									
									
									
									
										vendored
									
									
								
							| @ -502,7 +502,6 @@ def perform_misc_tasks( | ||||
|     job_name: str, | ||||
|     pr_body: str, | ||||
|     branch: Optional[str] = None, | ||||
|     tag: Optional[str] = None, | ||||
| ) -> None: | ||||
|     """ | ||||
|     In addition to apply the filter logic, the script also does the following | ||||
| @ -510,9 +509,7 @@ def perform_misc_tasks( | ||||
|     """ | ||||
|     set_output( | ||||
|         "keep-going", | ||||
|         branch == MAIN_BRANCH | ||||
|         or bool(tag and re.match(r"^trunk/[a-f0-9]{40}$", tag)) | ||||
|         or check_for_setting(labels, pr_body, "keep-going"), | ||||
|         branch == MAIN_BRANCH or check_for_setting(labels, pr_body, "keep-going"), | ||||
|     ) | ||||
|     set_output( | ||||
|         "ci-verbose-test-logs", | ||||
| @ -637,7 +634,6 @@ def main() -> None: | ||||
|         job_name=args.job_name, | ||||
|         pr_body=pr_body if pr_body else "", | ||||
|         branch=args.branch, | ||||
|         tag=tag, | ||||
|     ) | ||||
|  | ||||
|     # Set the filtered test matrix as the output | ||||
|  | ||||
| @ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = { | ||||
| } | ||||
|  | ||||
| # NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this | ||||
| ROCM_ARCHES = ["6.4", "7.0"] | ||||
| ROCM_ARCHES = ["6.3", "6.4"] | ||||
|  | ||||
| XPU_ARCHES = ["xpu"] | ||||
|  | ||||
| @ -87,7 +87,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | " | ||||
|         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | " | ||||
|         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | " | ||||
|         "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | " | ||||
|         "nvidia-nvtx==13.0.39; platform_system == 'Linux' | " | ||||
|         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | " | ||||
|  | ||||
							
								
								
									
										93
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										93
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							| @ -127,6 +127,53 @@ LINUX_BINARY_BUILD_WORFKLOWS = [ | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| ROCM_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="manywheel", | ||||
|         build_variant="rocm", | ||||
|         build_configs=generate_binary_build_matrix.generate_wheels_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             arches=["6.4"], | ||||
|             python_versions=["3.10"], | ||||
|         ), | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             labels={ | ||||
|                 LABEL_CIFLOW_BINARIES, | ||||
|                 LABEL_CIFLOW_BINARIES_WHEEL, | ||||
|                 LABEL_CIFLOW_ROCM, | ||||
|             }, | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| LINUX_BINARY_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="manywheel", | ||||
|         build_configs=generate_binary_build_matrix.generate_wheels_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             arches=["13.0"], | ||||
|             python_versions=["3.12"], | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.LINUX, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.RELEASE, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.LINUX, | ||||
|             generate_binary_build_matrix.RELEASE, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| WINDOWS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
| @ -212,6 +259,39 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| WINDOWS_BINARY_SMOKE_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.RELEASE, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.WINDOWS, | ||||
|             generate_binary_build_matrix.RELEASE, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|     ), | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.WINDOWS, | ||||
|         package_type="libtorch", | ||||
|         build_variant=generate_binary_build_matrix.DEBUG, | ||||
|         build_configs=generate_binary_build_matrix.generate_libtorch_matrix( | ||||
|             OperatingSystem.WINDOWS, | ||||
|             generate_binary_build_matrix.DEBUG, | ||||
|             arches=["cpu"], | ||||
|             libtorch_variants=["shared-with-deps"], | ||||
|         ), | ||||
|         branches="main", | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             isolated_workflow=True, | ||||
|         ), | ||||
|     ), | ||||
| ] | ||||
|  | ||||
| MACOS_BINARY_BUILD_WORKFLOWS = [ | ||||
|     BinaryBuildWorkflow( | ||||
|         os=OperatingSystem.MACOS_ARM64, | ||||
| @ -292,10 +372,23 @@ def main() -> None: | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             S390X_BINARY_BUILD_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             # Give rocm it's own workflow file | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             ROCM_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("linux_binary_build_workflow.yml.j2"), | ||||
|             LINUX_BINARY_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("windows_binary_build_workflow.yml.j2"), | ||||
|             WINDOWS_BINARY_BUILD_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("windows_binary_build_workflow.yml.j2"), | ||||
|             WINDOWS_BINARY_SMOKE_WORKFLOWS, | ||||
|         ), | ||||
|         ( | ||||
|             jinja_env.get_template("macos_binary_build_workflow.yml.j2"), | ||||
|             MACOS_BINARY_BUILD_WORKFLOWS, | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/_docs.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_docs.yml
									
									
									
									
										vendored
									
									
								
							| @ -67,7 +67,7 @@ jobs: | ||||
|             # an OOM issue when running the job, so this upgrades the runner from 4xlarge | ||||
|             # to the next available tier of 12xlarge. So much memory just to generate cpp | ||||
|             # doc | ||||
|             runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory | ||||
|             runner: ${{ inputs.runner_prefix }}linux.12xlarge | ||||
|             # TODO: Nightly cpp docs take longer and longer to finish (more than 3h now) | ||||
|             # Let's try to figure out how this can be improved | ||||
|             timeout-minutes: 360 | ||||
|  | ||||
							
								
								
									
										255
									
								
								.github/workflows/_linux-test-stable-fa3.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										255
									
								
								.github/workflows/_linux-test-stable-fa3.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,255 +0,0 @@ | ||||
| # The point of this workflow is to test that a FA3 wheel that was built based off the | ||||
| # stable ABI as of torch nightly 20250830 can still run on the newer torch. | ||||
| # | ||||
| # This workflow is very similar to the _linux-test.yml workflow, with the following | ||||
| # differences: | ||||
| #   1. It is simpler (there is no test matrix) | ||||
| #   2. It pulls flash-attention as a secondary repository in order to access the tests. | ||||
| #      Note that it does not BUILD anything from flash-attention, as we have a prebuilt | ||||
| #      wheel. We pull flash-attention only to run a few tests. | ||||
| #   3. It runs only FA3 tests. No PyTorch tests are run. | ||||
| name: linux-test-stable-fa3 | ||||
|  | ||||
| on: | ||||
|   workflow_call: | ||||
|     inputs: | ||||
|       build-environment: | ||||
|         required: true | ||||
|         type: string | ||||
|         description: Top-level label for what's being built/tested. | ||||
|       docker-image: | ||||
|         required: true | ||||
|         type: string | ||||
|         description: Docker image to run in. | ||||
|       timeout-minutes: | ||||
|         required: false | ||||
|         type: number | ||||
|         default: 30 | ||||
|         description: | | ||||
|           Set the maximum (in minutes) how long the workflow should take to finish | ||||
|       s3-bucket: | ||||
|         description: S3 bucket to download artifact | ||||
|         required: false | ||||
|         type: string | ||||
|         default: "gha-artifacts" | ||||
|     secrets: | ||||
|       HUGGING_FACE_HUB_TOKEN: | ||||
|         required: false | ||||
|         description: | | ||||
|           HF Auth token to avoid rate limits when downloading models or datasets from hub | ||||
|       VLLM_TEST_HUGGING_FACE_TOKEN: | ||||
|         required: false | ||||
|         description: | | ||||
|           HF Auth token to test vllm | ||||
|       SCRIBE_GRAPHQL_ACCESS_TOKEN: | ||||
|         required: false | ||||
|         description: | | ||||
|           FB app token to write to scribe endpoint | ||||
|  | ||||
| env: | ||||
|   GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} | ||||
|  | ||||
| jobs: | ||||
|   test: | ||||
|     # Don't run on forked repos | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     runs-on: linux.aws.h100 | ||||
|     timeout-minutes: ${{ inputs.timeout-minutes || 30 }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     steps: | ||||
|       - name: Checkout PyTorch | ||||
|         uses: pytorch/pytorch/.github/actions/checkout-pytorch@main | ||||
|         with: | ||||
|           no-sudo: true | ||||
|  | ||||
|       - name: Checkout flash-attention as a secondary repository | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           repository: Dao-AILab/flash-attention | ||||
|           path: flash-attention | ||||
|  | ||||
|       - name: Setup Linux | ||||
|         uses: ./.github/actions/setup-linux | ||||
|  | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-image-name: ${{ inputs.docker-image }} | ||||
|  | ||||
|       - name: Use following to pull public copy of the image | ||||
|         id: print-ghcr-mirror | ||||
|         env: | ||||
|           ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|         shell: bash | ||||
|         run: | | ||||
|           tag=${ECR_DOCKER_IMAGE##*:} | ||||
|           echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" | ||||
|  | ||||
|       - name: Pull docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|  | ||||
|       - name: Check if in a container runner | ||||
|         shell: bash | ||||
|         id: check_container_runner | ||||
|         run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT" | ||||
|  | ||||
|       - name: Setup GPU_FLAG for docker run | ||||
|         id: setup-gpu-flag | ||||
|         run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | ||||
|  | ||||
|       - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container | ||||
|         id: setup-sscache-port-flag | ||||
|         run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}" | ||||
|         if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }} | ||||
|  | ||||
|       - name: Get workflow job id | ||||
|         id: get-job-id | ||||
|         uses: ./.github/actions/get-workflow-job-id | ||||
|         if: always() | ||||
|         with: | ||||
|           github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
|       - name: Download build artifacts | ||||
|         uses: ./.github/actions/download-build-artifacts | ||||
|         with: | ||||
|           name: ${{ inputs.build-environment }} | ||||
|           s3-bucket: ${{ inputs.s3-bucket }} | ||||
|  | ||||
|       - name: Parse ref | ||||
|         id: parse-ref | ||||
|         run: .github/scripts/parse_ref.py | ||||
|  | ||||
|       - name: Set Test step time | ||||
|         id: test-timeout | ||||
|         shell: bash | ||||
|         env: | ||||
|           JOB_TIMEOUT: ${{ inputs.timeout-minutes }} | ||||
|         run: | | ||||
|           echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}" | ||||
|  | ||||
|       - name: Preserve github env variables for use in docker | ||||
|         shell: bash | ||||
|         run: | | ||||
|           env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}" | ||||
|           env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}" | ||||
|  | ||||
|       - name: Test | ||||
|         id: test | ||||
|         timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} | ||||
|         env: | ||||
|           BUILD_ENVIRONMENT: ${{ inputs.build-environment }} | ||||
|           PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|           GITHUB_REPOSITORY: ${{ github.repository }} | ||||
|           GITHUB_WORKFLOW: ${{ github.workflow }} | ||||
|           GITHUB_JOB: ${{ github.job }} | ||||
|           GITHUB_RUN_ID: ${{ github.run_id }} | ||||
|           GITHUB_RUN_NUMBER: ${{ github.run_number }} | ||||
|           GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }} | ||||
|           JOB_ID: ${{ steps.get-job-id.outputs.job-id }} | ||||
|           JOB_NAME: ${{ steps.get-job-id.outputs.job-name }} | ||||
|           BRANCH: ${{ steps.parse-ref.outputs.branch }} | ||||
|           SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|           BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }} | ||||
|           SHM_SIZE: '2g' | ||||
|           DOCKER_IMAGE: ${{ inputs.docker-image }} | ||||
|           VLLM_TEST_HUGGING_FACE_TOKEN: ${{ secrets.VLLM_TEST_HUGGING_FACE_TOKEN }} | ||||
|           HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | ||||
|           SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} | ||||
|           ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ steps.get-job-id.outputs.job-id }} | ||||
|         run: | | ||||
|           set -x | ||||
|  | ||||
|           TEST_COMMAND=.ci/pytorch/test_fa3_abi_stable.sh | ||||
|  | ||||
|           # Leaving 1GB for the runner and other things | ||||
|           TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo) | ||||
|           # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap | ||||
|           # comes from https://github.com/pytorch/test-infra/pull/6058 | ||||
|           TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3)) | ||||
|  | ||||
|  | ||||
|           SHM_OPTS="--shm-size=${SHM_SIZE}" | ||||
|           JENKINS_USER="--user jenkins" | ||||
|           DOCKER_SHELL_CMD= | ||||
|  | ||||
|           # detached container should get cleaned up by teardown_ec2_linux | ||||
|           # TODO: Stop building test binaries as part of the build phase | ||||
|           # Used for GPU_FLAG, SHM_OPTS, JENKINS_USER and DOCKER_SHELL_CMD since that doesn't play nice | ||||
|           # shellcheck disable=SC2086,SC2090 | ||||
|           container_name=$(docker run \ | ||||
|             ${GPU_FLAG:-} \ | ||||
|             ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \ | ||||
|             -e BUILD_ENVIRONMENT \ | ||||
|             -e PR_NUMBER \ | ||||
|             -e GITHUB_ACTIONS \ | ||||
|             -e GITHUB_REPOSITORY \ | ||||
|             -e GITHUB_WORKFLOW \ | ||||
|             -e GITHUB_JOB \ | ||||
|             -e GITHUB_RUN_ID \ | ||||
|             -e GITHUB_RUN_NUMBER \ | ||||
|             -e GITHUB_RUN_ATTEMPT \ | ||||
|             -e JOB_ID \ | ||||
|             -e JOB_NAME \ | ||||
|             -e BASE_SHA \ | ||||
|             -e BRANCH \ | ||||
|             -e SHA1 \ | ||||
|             -e MAX_JOBS="$(nproc --ignore=2)" \ | ||||
|             -e HUGGING_FACE_HUB_TOKEN \ | ||||
|             -e VLLM_TEST_HUGGING_FACE_TOKEN \ | ||||
|             -e SCRIBE_GRAPHQL_ACCESS_TOKEN \ | ||||
|             -e ARTIFACTS_FILE_SUFFIX \ | ||||
|             --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \ | ||||
|             --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \ | ||||
|             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ | ||||
|             --security-opt seccomp=unconfined \ | ||||
|             --cap-add=SYS_PTRACE \ | ||||
|             --ipc=host \ | ||||
|             ${SHM_OPTS} \ | ||||
|             --tty \ | ||||
|             --detach \ | ||||
|             --name="${container_name}" \ | ||||
|             ${JENKINS_USER} \ | ||||
|             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ | ||||
|             -w /var/lib/jenkins/workspace \ | ||||
|             "${DOCKER_IMAGE}" \ | ||||
|             ${DOCKER_SHELL_CMD} | ||||
|           ) | ||||
|  | ||||
|           echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}" | ||||
|  | ||||
|           docker exec -t "${container_name}" sh -c "python3 -m pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}" | ||||
|  | ||||
|       - name: Collect backtraces from coredumps (if any) | ||||
|         if: always() | ||||
|         run: | | ||||
|           # shellcheck disable=SC2156 | ||||
|           find . -iname "core.[1-9]*" -exec docker exec "${DOCKER_CONTAINER_ID}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \; | ||||
|  | ||||
|       - name: Store Core dumps on S3 | ||||
|         uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0 | ||||
|         if: failure() | ||||
|         with: | ||||
|           name: coredumps-fa3-stable-abi-smoke-tests | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: ignore | ||||
|           path: ./**/core.[1-9]* | ||||
|  | ||||
|       - name: Upload utilization stats | ||||
|         if: ${{ always() && steps.test.conclusion && steps.test.conclusion != 'skipped' }} | ||||
|         continue-on-error: true | ||||
|         uses: ./.github/actions/upload-utilization-stats | ||||
|         with: | ||||
|           job_id: ${{ steps.get-job-id.outputs.job-id }} | ||||
|           job_name: ${{ steps.get-job-id.outputs.job-name }} | ||||
|           workflow_name: ${{ github.workflow }} | ||||
|           workflow_run_id: ${{github.run_id}} | ||||
|           workflow_attempt: ${{github.run_attempt}} | ||||
|  | ||||
|       - name: Teardown Linux | ||||
|         uses: pytorch/test-infra/.github/actions/teardown-linux@main | ||||
|         if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' | ||||
							
								
								
									
										2
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							| @ -273,8 +273,6 @@ jobs: | ||||
|           TEST_CONFIG: ${{ matrix.config }} | ||||
|           SHARD_NUMBER: ${{ matrix.shard }} | ||||
|           NUM_TEST_SHARDS: ${{ matrix.num_shards }} | ||||
|           EXTRA_FLAGS: ${{ matrix.extra_flags || '' }} | ||||
|           OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }} | ||||
|           REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} | ||||
|           CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} | ||||
|           VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/_mac-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_mac-build.yml
									
									
									
									
										vendored
									
									
								
							| @ -85,7 +85,7 @@ jobs: | ||||
|         uses: pytorch/test-infra/.github/actions/setup-python@main | ||||
|         with: | ||||
|           python-version: ${{ inputs.python-version }} | ||||
|           pip-requirements-file: .ci/docker/requirements-ci.txt | ||||
|           pip-requirements-file: .github/requirements/pip-requirements-macOS.txt | ||||
|  | ||||
|       - name: Install sccache (only for non-forked PRs, and pushes to trunk) | ||||
|         uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/_mac-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_mac-test.yml
									
									
									
									
										vendored
									
									
								
							| @ -122,7 +122,7 @@ jobs: | ||||
|         uses: pytorch/test-infra/.github/actions/setup-python@main | ||||
|         with: | ||||
|           python-version: ${{ inputs.python-version }} | ||||
|           pip-requirements-file: .ci/docker/requirements-ci.txt | ||||
|           pip-requirements-file: .github/requirements/pip-requirements-macOS.txt | ||||
|  | ||||
|       - name: Start monitoring script | ||||
|         id: monitor-script | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/_win-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/_win-build.yml
									
									
									
									
										vendored
									
									
								
							| @ -84,6 +84,9 @@ jobs: | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|  | ||||
|       - name: Clean up leftover processes on non-ephemeral Windows runner | ||||
|         uses: pytorch/test-infra/.github/actions/cleanup-runner@main | ||||
|  | ||||
|       - name: Setup SSH (Click me for login details) | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         with: | ||||
|  | ||||
							
								
								
									
										24
									
								
								.github/workflows/_win-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								.github/workflows/_win-test.yml
									
									
									
									
										vendored
									
									
								
							| @ -77,6 +77,9 @@ jobs: | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|  | ||||
|       - name: Clean up leftover processes on non-ephemeral Windows runner | ||||
|         uses: pytorch/test-infra/.github/actions/cleanup-runner@main | ||||
|  | ||||
|       - name: Setup SSH (Click me for login details) | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         with: | ||||
| @ -103,6 +106,18 @@ jobs: | ||||
|         with: | ||||
|           cuda-version: ${{ inputs.cuda-version }} | ||||
|  | ||||
|       # TODO: Move to a requirements.txt file for windows | ||||
|       - name: Install pip dependencies | ||||
|         uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 | ||||
|         with: | ||||
|           shell: bash | ||||
|           timeout_minutes: 5 | ||||
|           max_attempts: 5 | ||||
|           retry_wait_seconds: 30 | ||||
|           command: | | ||||
|             set -eu | ||||
|             python3 -m pip install 'xdoctest>=1.1.0' | ||||
|  | ||||
|       - name: Get workflow job id | ||||
|         id: get-job-id | ||||
|         uses: ./.github/actions/get-workflow-job-id | ||||
| @ -257,6 +272,15 @@ jobs: | ||||
|         shell: bash | ||||
|         run: python3 .github/scripts/parse_ref.py | ||||
|  | ||||
|       - name: Uninstall PyTorch | ||||
|         if: always() | ||||
|         continue-on-error: true | ||||
|         shell: bash | ||||
|         run: | | ||||
|           # This step removes PyTorch installed by the test to give a clean slate | ||||
|           # to the next job | ||||
|           python3 -mpip uninstall -y torch | ||||
|  | ||||
|       - name: Teardown Windows | ||||
|         uses: ./.github/actions/teardown-win | ||||
|         if: always() | ||||
|  | ||||
							
								
								
									
										60
									
								
								.github/workflows/b200-symm-mem.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										60
									
								
								.github/workflows/b200-symm-mem.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,60 +0,0 @@ | ||||
| name: Limited CI for symmetric memory tests on B200 | ||||
|  | ||||
| on: | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - .github/workflows/b200-symm-mem.yml | ||||
|   workflow_dispatch: | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/b200-symm-mem/* | ||||
|   schedule: | ||||
|     - cron: 22 8 * * *  # about 1:22am PDT | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|  | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runner: linux.12xlarge.memory | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '10.0' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "b200-symm-mem", shard: 1, num_shards: 1, runner: "linux.dgx.b200.8" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm100-test: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: | ||||
|       - linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.test-matrix }} | ||||
|       aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|     secrets: inherit | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-almalinux-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-almalinux-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -36,7 +36,7 @@ jobs: | ||||
|     runs-on: linux.9xlarge.ephemeral | ||||
|     strategy: | ||||
|       matrix: | ||||
|         tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "cpu"] | ||||
|         tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"] | ||||
|     steps: | ||||
|       - name: Build docker image | ||||
|         uses: pytorch/pytorch/.github/actions/binary-docker-build@main | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-libtorch-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-libtorch-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -52,8 +52,8 @@ jobs: | ||||
|           { tag: "cuda12.9" }, | ||||
|           { tag: "cuda12.8" }, | ||||
|           { tag: "cuda12.6" }, | ||||
|           { tag: "rocm6.3"  }, | ||||
|           { tag: "rocm6.4"  }, | ||||
|           { tag: "rocm7.0"  }, | ||||
|           { tag: "cpu"      }, | ||||
|         ] | ||||
|     steps: | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-magma-rocm-linux.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-magma-rocm-linux.yml
									
									
									
									
										vendored
									
									
								
							| @ -34,7 +34,7 @@ jobs: | ||||
|       id-token: write | ||||
|     strategy: | ||||
|       matrix: | ||||
|         rocm_version: ["70", "64"] | ||||
|         rocm_version: ["64", "63"] | ||||
|     steps: | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							| @ -52,10 +52,11 @@ jobs: | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda13.0",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.8",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxaarch64-builder",       tag: "cuda12.6",          runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.3",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm6.4",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "rocm7.0",           runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "cpu",               runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28_aarch64-builder",  tag: "cpu-aarch64",       runner: "linux.arm64.2xlarge.ephemeral" }, | ||||
|           { name: "manylinuxcxx11-abi-builder",     tag: "cpu-cxx11-abi",     runner: "linux.9xlarge.ephemeral" }, | ||||
|           { name: "manylinux2_28-builder",          tag: "xpu",               runner: "linux.9xlarge.ephemeral" }, | ||||
|         ] | ||||
|     runs-on: ${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }} | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							| @ -55,7 +55,7 @@ jobs: | ||||
|         docker-image: ["pytorch/manylinux2_28-builder:cpu"] | ||||
|         include: | ||||
|           - device: "rocm" | ||||
|             rocm_version: "7.0" | ||||
|             rocm_version: "6.4" | ||||
|             runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" | ||||
|           - device: "cuda" | ||||
|             rocm_version: "" | ||||
|  | ||||
							
								
								
									
										1
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							| @ -59,6 +59,7 @@ jobs: | ||||
|           pytorch-linux-jammy-py3.13-clang12, | ||||
|           pytorch-linux-jammy-rocm-n-py3, | ||||
|           pytorch-linux-noble-rocm-n-py3, | ||||
|           pytorch-linux-noble-rocm-alpha-py3, | ||||
|           pytorch-linux-jammy-rocm-n-py3-benchmarks, | ||||
|           pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12, | ||||
|           pytorch-linux-jammy-py3.10-gcc11, | ||||
|  | ||||
							
								
								
									
										14
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -224,7 +224,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_10-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -427,7 +427,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_11-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -630,7 +630,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_12-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -833,7 +833,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1036,7 +1036,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_13t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1239,7 +1239,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
| @ -1442,7 +1442,7 @@ jobs: | ||||
|       ALPINE_IMAGE: "arm64v8/alpine" | ||||
|       build_name: manywheel-py3_14t-cuda-aarch64-13_0 | ||||
|       build_environment: linux-aarch64-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|       timeout-minutes: 420 | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|  | ||||
							
								
								
									
										230
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										230
									
								
								.github/workflows/generated-linux-binary-libtorch-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -316,6 +316,121 @@ jobs: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm6_3-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       build_name: libtorch-rocm6_3-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-rocm6_3-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-rocm6_3-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-rocm6_3-shared-with-deps-release | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: libtorch-cxx11-builder | ||||
|           custom-tag-prefix: rocm6.3 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
|   libtorch-rocm6_3-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-rocm6_3-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.3 | ||||
|       GPU_ARCH_VERSION: "6.3" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.3 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-rocm6_3-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm6_4-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
| @ -430,118 +545,3 @@ jobs: | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
|   libtorch-rocm7_0-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       build_name: libtorch-rocm7_0-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-rocm7_0-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-rocm7_0-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-rocm7_0-shared-with-deps-release | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: libtorch-cxx11-builder | ||||
|           custom-tag-prefix: rocm7.0 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
|   libtorch-rocm7_0-shared-with-deps-release-upload:  # Uploading | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     permissions: | ||||
|       id-token: write | ||||
|       contents: read | ||||
|     needs: libtorch-rocm7_0-shared-with-deps-release-test | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm7.0 | ||||
|       GPU_ARCH_VERSION: "7.0" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm7.0 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-rocm7_0-shared-with-deps-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|     uses: ./.github/workflows/_binary-upload.yml | ||||
|  | ||||
							
								
								
									
										87
									
								
								.github/workflows/generated-linux-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								.github/workflows/generated-linux-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,87 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-libtorch-release | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/trunk/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-libtorch-release | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch-release | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   libtorch-cpu-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       DOCKER_IMAGE: libtorch-cxx11-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cpu | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       build_name: libtorch-cpu-shared-with-deps-release | ||||
|       build_environment: linux-binary-libtorch-release | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.4xlarge | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
							
								
								
									
										88
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								.github/workflows/generated-linux-binary-manywheel-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-manywheel | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/trunk/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-manywheel | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-manywheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   manywheel-py3_12-cuda13_0-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu130 | ||||
|       GPU_ARCH_VERSION: "13.0" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build_name: manywheel-py3_12-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_12-cuda13_0-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_12-cuda13_0-build | ||||
|       - get-label-type | ||||
|     uses: ./.github/workflows/_binary-test-linux.yml | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cu130 | ||||
|       GPU_ARCH_VERSION: "13.0" | ||||
|       GPU_ARCH_TYPE: cuda | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: cuda13.0 | ||||
|       DESIRED_PYTHON: "3.12" | ||||
|       build_name: manywheel-py3_12-cuda13_0 | ||||
|       build_environment: linux-binary-manywheel | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
							
								
								
									
										1582
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1582
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										136
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,136 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: linux-binary-manywheel-rocm | ||||
|  | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|     tags: | ||||
|       - 'ciflow/binaries/*' | ||||
|       - 'ciflow/binaries_wheel/*' | ||||
|       - 'ciflow/rocm/*' | ||||
|   workflow_dispatch: | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BINARY_ENV_FILE: /tmp/env | ||||
|   BUILD_ENVIRONMENT: linux-binary-manywheel-rocm | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   PYTORCH_FINAL_PACKAGE_DIR: /artifacts | ||||
|   PYTORCH_ROOT: /pytorch | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 0 | ||||
| concurrency: | ||||
|   group: linux-binary-manywheel-rocm-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   manywheel-py3_10-rocm6_4-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     uses: ./.github/workflows/_binary-build-linux.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.4 | ||||
|       GPU_ARCH_VERSION: "6.4" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.4 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       timeout-minutes: 300 | ||||
|       build_name: manywheel-py3_10-rocm6_4 | ||||
|       build_environment: linux-binary-manywheel-rocm | ||||
|     secrets: | ||||
|       github-token: ${{ secrets.GITHUB_TOKEN }} | ||||
|   manywheel-py3_10-rocm6_4-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - manywheel-py3_10-rocm6_4-build | ||||
|       - get-label-type | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: 240 | ||||
|     env: | ||||
|       PYTORCH_ROOT: /pytorch | ||||
|       PACKAGE_TYPE: manywheel | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: rocm6.4 | ||||
|       GPU_ARCH_VERSION: "6.4" | ||||
|       GPU_ARCH_TYPE: rocm | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       DOCKER_IMAGE: manylinux2_28-builder | ||||
|       DOCKER_IMAGE_TAG_PREFIX: rocm6.4 | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Setup ROCm | ||||
|         uses: ./.github/actions/setup-rocm | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: manywheel-py3_10-rocm6_4 | ||||
|           path: "${{ runner.temp }}/artifacts/" | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: ROCm set GPU_FLAG | ||||
|         run: | | ||||
|           echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
|         with: | ||||
|           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only | ||||
|           aws-region: us-east-1 | ||||
|           role-duration-seconds: 18000 | ||||
|       - name: Calculate docker image | ||||
|         id: calculate-docker-image | ||||
|         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main | ||||
|         with: | ||||
|           docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} | ||||
|           docker-image-name: manylinux2_28-builder | ||||
|           custom-tag-prefix: rocm6.4 | ||||
|           docker-build-dir: .ci/docker | ||||
|           working-directory: pytorch | ||||
|       - name: Pull Docker image | ||||
|         uses: pytorch/test-infra/.github/actions/pull-docker-image@main | ||||
|         with: | ||||
|           docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Test Pytorch binary | ||||
|         uses: ./pytorch/.github/actions/test-pytorch-binary | ||||
|         env: | ||||
|           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} | ||||
|       - name: Teardown ROCm | ||||
|         uses: ./.github/actions/teardown-rocm | ||||
							
								
								
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-debug-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,261 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/windows_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: windows-binary-libtorch-debug | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|   workflow_dispatch: | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BUILD_ENVIRONMENT: windows-binary-libtorch-debug | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 1 | ||||
|   OS: windows | ||||
| concurrency: | ||||
|   group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-debug-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-debug | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cpu-shared-with-deps-debug-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-debug-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: debug | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-debug | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
							
								
								
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								.github/workflows/generated-windows-binary-libtorch-release-main.yml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,261 @@ | ||||
| # @generated DO NOT EDIT MANUALLY | ||||
|  | ||||
| # Template is at:    .github/templates/windows_binary_build_workflow.yml.j2 | ||||
| # Generation script: .github/scripts/generate_ci_workflows.py | ||||
| name: windows-binary-libtorch-release | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     branches: | ||||
|       - main | ||||
|   workflow_dispatch: | ||||
|  | ||||
| env: | ||||
|   # Needed for conda builds | ||||
|   ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | ||||
|   AWS_DEFAULT_REGION: us-east-1 | ||||
|   BUILD_ENVIRONMENT: windows-binary-libtorch-release | ||||
|   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|   PR_NUMBER: ${{ github.event.pull_request.number }} | ||||
|   SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | ||||
|   SKIP_ALL_TESTS: 1 | ||||
|   OS: windows | ||||
| concurrency: | ||||
|   group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| jobs: | ||||
|   get-label-type: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: get-label-type | ||||
|     uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | ||||
|     with: | ||||
|       triggering_actor: ${{ github.triggering_actor }} | ||||
|       issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | ||||
|       curr_branch: ${{ github.head_ref || github.ref_name }} | ||||
|       curr_ref_type: ${{ github.ref_type }} | ||||
|   libtorch-cpu-shared-with-deps-release-build: | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Build PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" | ||||
|       - uses: actions/upload-artifact@v4.4.0 | ||||
|         if: always() | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-release | ||||
|           retention-days: 14 | ||||
|           if-no-files-found: error | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
|  | ||||
|   libtorch-cpu-shared-with-deps-release-test:  # Testing | ||||
|     if: ${{ github.repository_owner == 'pytorch' }} | ||||
|     needs: | ||||
|       - libtorch-cpu-shared-with-deps-release-build | ||||
|       - get-label-type | ||||
|     runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" | ||||
|     timeout-minutes: 360 | ||||
|     env: | ||||
|       PYTORCH_ROOT: ${{ github.workspace }}/pytorch | ||||
|       PACKAGE_TYPE: libtorch | ||||
|       # TODO: This is a legacy variable that we eventually want to get rid of in | ||||
|       #       favor of GPU_ARCH_VERSION | ||||
|       DESIRED_CUDA: cpu | ||||
|       GPU_ARCH_TYPE: cpu | ||||
|       SKIP_ALL_TESTS: 1 | ||||
|       LIBTORCH_CONFIG: release | ||||
|       LIBTORCH_VARIANT: shared-with-deps | ||||
|       # This is a dummy value for libtorch to work correctly with our batch scripts | ||||
|       # without this value pip does not get installed for some reason | ||||
|       DESIRED_PYTHON: "3.10" | ||||
|     steps: | ||||
|       - name: Display EC2 information | ||||
|         shell: bash | ||||
|         run: | | ||||
|           set -euo pipefail | ||||
|           function get_ec2_metadata() { | ||||
|             # Pulled from instance metadata endpoint for EC2 | ||||
|             # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | ||||
|             category=$1 | ||||
|             curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" | ||||
|           } | ||||
|           echo "ami-id: $(get_ec2_metadata ami-id)" | ||||
|           echo "instance-id: $(get_ec2_metadata instance-id)" | ||||
|           echo "instance-type: $(get_ec2_metadata instance-type)" | ||||
|           echo "system info $(uname -a)" | ||||
|       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | ||||
|         uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||||
|         continue-on-error: true | ||||
|         with: | ||||
|           github-secret: ${{ secrets.GITHUB_TOKEN }} | ||||
|       - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon | ||||
|         shell: bash | ||||
|         run: | | ||||
|           git config --global core.longpaths true | ||||
|           git config --global core.symlinks true | ||||
|  | ||||
|           # https://git-scm.com/docs/git-fsmonitor--daemon.  The daemon could lock | ||||
|           # the directory on Windows and prevent GHA from checking out as reported | ||||
|           # in https://github.com/actions/checkout/issues/1018 | ||||
|           git config --global core.fsmonitor false | ||||
|       # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 | ||||
|       - name: Enable long paths on Windows | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 | ||||
|       # Since it's just a defensive command, the workflow should continue even the command fails. This step can be | ||||
|       # removed once Windows Defender is removed from the AMI | ||||
|       - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch | ||||
|         continue-on-error: true | ||||
|         shell: powershell | ||||
|         run: | | ||||
|           Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore | ||||
|           # Let's both exclude the path and disable Windows Defender completely just to be sure | ||||
|           # that it doesn't interfere | ||||
|           Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@v4 | ||||
|         with: | ||||
|           ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||||
|           submodules: recursive | ||||
|           path: pytorch | ||||
|           show-progress: false | ||||
|       - name: Clean PyTorch checkout | ||||
|         run: | | ||||
|           # Remove any artifacts from the previous checkouts | ||||
|           git clean -fxd | ||||
|         working-directory: pytorch | ||||
|       # NOTE: These environment variables are put here so that they can be applied on every job equally | ||||
|       #       They are also here because setting them at a workflow level doesn't give us access to the | ||||
|       #       runner.temp variable, which we need. | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" | ||||
|           echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" | ||||
|           echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" | ||||
|       - uses: actions/download-artifact@v4.1.7 | ||||
|         name: Download Build Artifacts | ||||
|         with: | ||||
|           name: libtorch-cpu-shared-with-deps-release | ||||
|           path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | ||||
|       - name: Populate binary env | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" | ||||
|       - name: Test PyTorch binary | ||||
|         shell: bash | ||||
|         run: | | ||||
|           "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" | ||||
|       - name: Wait until all sessions have drained | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         timeout-minutes: 120 | ||||
|         run: | | ||||
|           .github\scripts\wait_for_ssh_to_drain.ps1 | ||||
|       - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) | ||||
|         shell: powershell | ||||
|         working-directory: pytorch | ||||
|         if: always() | ||||
|         run: | | ||||
|           .github\scripts\kill_active_ssh_sessions.ps1 | ||||
							
								
								
									
										46
									
								
								.github/workflows/operator_microbenchmark.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										46
									
								
								.github/workflows/operator_microbenchmark.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,46 +0,0 @@ | ||||
| name: operator_microbenchmark | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     tags: | ||||
|       - ciflow/op-benchmark/* | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     # Run at 06:00 UTC everyday | ||||
|     - cron: 0 6 * * * | ||||
|  | ||||
| concurrency: | ||||
|   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||||
|   cancel-in-progress: true | ||||
|  | ||||
| permissions: | ||||
|   id-token: write | ||||
|   contents: read | ||||
|  | ||||
| jobs: | ||||
|   opmicrobenchmark-build: | ||||
|     if: github.repository_owner == 'pytorch' | ||||
|     name: opmicrobenchmark-build | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     with: | ||||
|       runner: linux.12xlarge.memory | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: '8.0 9.0' | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.h100" }, | ||||
|           { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.a100" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   opmicrobenchmark-test: | ||||
|     name: opmicrobenchmark-test | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: opmicrobenchmark-build | ||||
|     with: | ||||
|       timeout-minutes: 500 | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 | ||||
|       docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
							
								
								
									
										29
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							| @ -59,14 +59,13 @@ jobs: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.4-py3.10-gcc11 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11 | ||||
|       cuda-arch-list: 7.5 | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|           { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|           { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
| @ -113,13 +112,13 @@ jobs: | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-build: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|   linux-jammy-cuda12_8-py3_9-gcc9-build: | ||||
|     name: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|       build-environment: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9 | ||||
|       cuda-arch-list: 8.6 | ||||
|       test-matrix: | | ||||
| @ -129,14 +128,14 @@ jobs: | ||||
|         ]} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-test: | ||||
|     name: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|   linux-jammy-cuda12_8-py3_9-gcc9-test: | ||||
|     name: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|     uses: ./.github/workflows/_linux-test.yml | ||||
|     needs: linux-jammy-cuda12_8-py3_10-gcc9-build | ||||
|     needs: linux-jammy-cuda12_8-py3_9-gcc9-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc9 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.test-matrix }} | ||||
|       build-environment: linux-jammy-cuda12.8-py3.9-gcc9 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_9-gcc9-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_9-gcc9-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc9-debug-build: | ||||
|  | ||||
							
								
								
									
										6
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							| @ -343,14 +343,14 @@ jobs: | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-xpu-n-py3_10-build: | ||||
|     name: linux-jammy-xpu-n-py3.10 | ||||
|   linux-jammy-xpu-n-py3_9-build: | ||||
|     name: linux-jammy-xpu-n-py3.9 | ||||
|     uses: ./.github/workflows/_linux-build.yml | ||||
|     needs: get-label-type | ||||
|     with: | ||||
|       sync-tag: linux-xpu-n-build | ||||
|       runner_prefix: ${{ needs.get-label-type.outputs.label-type }} | ||||
|       build-environment: linux-jammy-xpu-n-py3.10 | ||||
|       build-environment: linux-jammy-xpu-n-py3.9 | ||||
|       docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3 | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/workflows/rocm-mi355.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/rocm-mi355.yml
									
									
									
									
										vendored
									
									
								
							| @ -38,7 +38,7 @@ jobs: | ||||
|     with: | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       build-environment: linux-noble-rocm-py3.12-mi355 | ||||
|       docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3 | ||||
|       docker-image-name: ci-image:pytorch-linux-noble-rocm-alpha-py3 | ||||
|       sync-tag: rocm-build | ||||
|       test-matrix: | | ||||
|         { include: [ | ||||
|  | ||||
							
								
								
									
										12
									
								
								.github/workflows/test-h100.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/test-h100.yml
									
									
									
									
										vendored
									
									
								
							| @ -61,15 +61,3 @@ jobs: | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build.outputs.docker-image }} | ||||
|       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build.outputs.test-matrix }} | ||||
|     secrets: inherit | ||||
|  | ||||
|   linux-jammy-cuda12_8-py3_10-gcc11-sm90-FA3-ABI-stable-test: | ||||
|     name: linux-jammy-cuda12_8-py3_10-gcc11-sm90-FA3-ABI-stable-test | ||||
|     uses: ./.github/workflows/_linux-test-stable-fa3.yml | ||||
|     needs: | ||||
|       - linux-jammy-cuda12_8-py3_10-gcc11-sm90-build | ||||
|     with: | ||||
|       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90 | ||||
|       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build.outputs.docker-image }} | ||||
|       timeout-minutes: 30 | ||||
|       s3-bucket: gha-artifacts | ||||
|     secrets: inherit | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/workflows/update-viablestrict.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/update-viablestrict.yml
									
									
									
									
										vendored
									
									
								
							| @ -48,7 +48,4 @@ jobs: | ||||
|             echo "{\"sha\": \"${LATEST_SHA}\", \"repository\":\"pytorch/pytorch\", \"timestamp\": ${TIME}}" > "/tmp/${LATEST_SHA}.json" | ||||
|             pip install awscli==1.29.40 | ||||
|             aws s3 cp "/tmp/${LATEST_SHA}.json" "s3://ossci-raw-job-status/stable_pushes/pytorch/pytorch/${LATEST_SHA}.json" | ||||
|             # Push new viable/strict tag | ||||
|             cd pytorch/pytorch | ||||
|             git push origin "${LATEST_SHA}:refs/tags/viable/strict/${TIME}" | ||||
|           fi | ||||
|  | ||||
| @ -1260,7 +1260,6 @@ exclude_patterns = [ | ||||
|     'test/test_masked.py', | ||||
|     'test/test_maskedtensor.py', | ||||
|     'test/test_matmul_cuda.py', | ||||
|     'test/test_scaled_matmul_cuda.py', | ||||
|     'test/test_meta.py', | ||||
|     'test/test_metal.py', | ||||
|     'test/test_mkl_verbose.py', | ||||
| @ -1454,7 +1453,7 @@ init_command = [ | ||||
|     '--dry-run={{DRYRUN}}', | ||||
|     'usort==1.0.8.post1', | ||||
|     'isort==6.0.1', | ||||
|     'ruff==0.13.1',  # sync with RUFF | ||||
|     'ruff==0.12.9',  # sync with RUFF | ||||
| ] | ||||
| is_formatter = true | ||||
|  | ||||
| @ -1588,7 +1587,7 @@ init_command = [ | ||||
|     'python3', | ||||
|     'tools/linter/adapters/pip_init.py', | ||||
|     '--dry-run={{DRYRUN}}', | ||||
|     'ruff==0.13.1',  # sync with PYFMT | ||||
|     'ruff==0.12.9',  # sync with PYFMT | ||||
| ] | ||||
| is_formatter = true | ||||
|  | ||||
|  | ||||
| @ -442,7 +442,7 @@ if(WIN32) | ||||
|       message( | ||||
|         WARNING | ||||
|           "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " | ||||
|           "Please run command 'conda install -c conda-forge libuv=1.51' to install libuv." | ||||
|           "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." | ||||
|       ) | ||||
|     else() | ||||
|       set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../) | ||||
| @ -888,28 +888,23 @@ cmake_dependent_option( | ||||
|   "(USE_CUDA AND NOT MSVC) OR USE_ROCM" | ||||
|   OFF) | ||||
|  | ||||
|  | ||||
| IF(USE_ROCM AND "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Setting USE_FBGEMM_GENAI for gfx942 to ON by default, doing ROCM build") | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT ON) | ||||
| elseif(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8 AND NOT WIN32) | ||||
|   message(STATUS "Setting USE_FBGEMM_GENAI to ON by default , doing CUDA build for SM100a") | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT ON) | ||||
| else() | ||||
|   set(USE_FBGEMM_GENAI_DEFAULT OFF) | ||||
| endif() | ||||
|  | ||||
| cmake_dependent_option( | ||||
|   USE_FBGEMM_GENAI | ||||
|   "Whether to build FBGEMM GenAI quantized GEMM kernels.\ | ||||
|   Will be disabled if not supported by the platform" | ||||
|   ${USE_FBGEMM_GENAI_DEFAULT} | ||||
|   "(USE_CUDA AND NOT MSVC) OR USE_ROCM" | ||||
|   ON | ||||
|   "USE_ROCM" | ||||
|   OFF) | ||||
|  | ||||
| IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH) | ||||
|   message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF") | ||||
|   set(USE_FBGEMM_GENAI off) | ||||
| endif() | ||||
|  | ||||
| # Set USE_FBGEMM_GENAI to ON for CUDA build on SM100. | ||||
| if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8 AND NOT WIN32) | ||||
|   message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a") | ||||
|   set(USE_FBGEMM_GENAI ON) | ||||
| endif() | ||||
|  | ||||
| # CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem | ||||
|  | ||||
| @ -81,7 +81,7 @@ git remote add upstream git@github.com:pytorch/pytorch.git | ||||
| make setup-env | ||||
| # Or run `make setup-env-cuda` for pre-built CUDA binaries | ||||
| # Or run `make setup-env-rocm` for pre-built ROCm binaries | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
| ``` | ||||
|  | ||||
| ### Tips and Debugging | ||||
| @ -182,36 +182,28 @@ You can use this script to check out a new nightly branch with the following: | ||||
|  | ||||
| ```bash | ||||
| ./tools/nightly.py checkout -b my-nightly-branch | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
| ``` | ||||
|  | ||||
| To install the nightly binaries built with CUDA, you can pass in the flag `--cuda`: | ||||
|  | ||||
| ```bash | ||||
| ./tools/nightly.py checkout -b my-nightly-branch --cuda | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
| ``` | ||||
|  | ||||
| To install the nightly binaries built with ROCm, you can pass in the flag `--rocm`: | ||||
|  | ||||
| ```bash | ||||
| ./tools/nightly.py checkout -b my-nightly-branch --rocm | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
| ``` | ||||
|  | ||||
| You can also use this tool to pull the nightly commits into the current branch: | ||||
|  | ||||
| ```bash | ||||
| ./tools/nightly.py pull | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| ``` | ||||
|  | ||||
| To create the virtual environment with a specific Python interpreter, you can | ||||
| pass in the `--python` argument: | ||||
|  | ||||
| ```bash | ||||
| ./tools/nightly.py --python /path/to/python3.12 | ||||
| source venv/bin/activate  # or `. .\venv\Scripts\activate` on Windows | ||||
| ./tools/nightly.py pull -p my-env | ||||
| source my-env/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows | ||||
| ``` | ||||
|  | ||||
| Pulling will recreate a fresh virtual environment and reinstall the development | ||||
|  | ||||
| @ -275,7 +275,7 @@ conda install pkg-config libuv | ||||
| pip install mkl-static mkl-include | ||||
| # Add these packages if torch.distributed is needed. | ||||
| # Distributed package support on Windows is a prototype feature and is subject to changes. | ||||
| conda install -c conda-forge libuv=1.51 | ||||
| conda install -c conda-forge libuv | ||||
| ``` | ||||
|  | ||||
| #### Install PyTorch | ||||
|  | ||||
| @ -468,7 +468,7 @@ inline Tensor _sum_to( | ||||
|       // if we assume no reduction due to unbacked we ensure that at runtime. | ||||
|       TORCH_MAYBE_SYM_CHECK( | ||||
|           sym_eq(shape[i - leading_dims], sizes[i]), | ||||
|           "non-reduction path was assumed due to unbacked symbols expected those two sizes to be the same:", | ||||
|           "non-reduction path was assumed due to unabcked symbols expected those two sizes to be the same:", | ||||
|           shape[i - leading_dims], | ||||
|           ", ", | ||||
|           sizes[i]) | ||||
|  | ||||
| @ -45,39 +45,7 @@ inline void infer_size_impl( | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (infer_dim) { | ||||
|     // numel is the product of known sizes, it has to be divisible by newsize. | ||||
|     // and newsize should be positive unless newsize == numel (we throw | ||||
|     // different) error message in that case. | ||||
|     if constexpr (std::is_same_v<NumelType, c10::SymInt>) { | ||||
|       auto v = newsize.maybe_as_int(); | ||||
|       if (v and *v == 0) { | ||||
|         // Avoid div by 0 when sym_eq(numel % newsize, 0) is constructed! | ||||
|         // which may happen when newsize is not a symbol! if its a symbol | ||||
|         // division won't happen anyway during compile. | ||||
|         TORCH_MAYBE_SYM_CHECK( | ||||
|             numel == newsize, | ||||
|             "shape '", | ||||
|             shape, | ||||
|             "' is invalid for input of size ", | ||||
|             numel); | ||||
|       } else { | ||||
|         auto cond = sym_gt(newsize, 0) | ||||
|                         .sym_and(sym_eq(numel % newsize, 0)) | ||||
|                         .sym_or(sym_eq(numel, newsize)); | ||||
|         TORCH_MAYBE_SYM_CHECK( | ||||
|             cond, "shape '", shape, "' is invalid for input of size ", numel); | ||||
|       } | ||||
|  | ||||
|     } else { | ||||
|       TORCH_CHECK( | ||||
|           (newsize > 0 && (numel % newsize == 0)) || numel == newsize, | ||||
|           "shape '", | ||||
|           shape, | ||||
|           "' is invalid for input of size ", | ||||
|           numel); | ||||
|     } | ||||
|  | ||||
|   auto set_infer_dim = [&]() { | ||||
|     // We have a degree of freedom here to select the dimension size; follow | ||||
|     // NumPy semantics and just bail.  However, a nice error message is needed | ||||
|     // because users often use `view` as a way to flatten & unflatten | ||||
| @ -86,15 +54,19 @@ inline void infer_size_impl( | ||||
|     // works yet | ||||
|     //   empty_tensor.view(-1, 0) | ||||
|     // doesn't. | ||||
|     TORCH_MAYBE_SYM_CHECK( | ||||
|     TORCH_CHECK( | ||||
|         newsize != 0, | ||||
|         "cannot reshape tensor of 0 elements into shape ", | ||||
|         shape, | ||||
|         " because the unspecified dimension size -1 can be any " | ||||
|         "value and is ambiguous"); | ||||
|  | ||||
|     res[*infer_dim] = numel / newsize; | ||||
|     return; | ||||
|   }; | ||||
|  | ||||
|   if (infer_dim && newsize > 0 && numel % newsize == 0) { | ||||
|     set_infer_dim(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   TORCH_MAYBE_SYM_CHECK( | ||||
| @ -103,6 +75,9 @@ inline void infer_size_impl( | ||||
|       shape, | ||||
|       "' is invalid for input of size ", | ||||
|       numel); | ||||
|   if (infer_dim) { | ||||
|     set_infer_dim(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) { | ||||
|  | ||||
| @ -103,9 +103,7 @@ std::string get_cpu_capability() { | ||||
| #elif defined(HAVE_ZVECTOR_CPU_DEFINITION) | ||||
|     case native::CPUCapability::ZVECTOR: | ||||
|       return "Z VECTOR"; | ||||
| #elif defined(HAVE_SVE_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION) | ||||
|     case native::CPUCapability::SVE128: | ||||
|       return "SVE128"; | ||||
| #elif defined(HAVE_SVE256_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION) | ||||
|     case native::CPUCapability::SVE256: | ||||
|       return "SVE256"; | ||||
| #else | ||||
|  | ||||
| @ -6,7 +6,6 @@ | ||||
| #include <c10/core/thread_pool.h> | ||||
| #include <c10/util/flat_hash_map.h> | ||||
| #include <c10/util/llvmMathExtras.h> | ||||
| #include <iostream> | ||||
| #include <optional> | ||||
|  | ||||
| #include <deque> | ||||
| @ -76,9 +75,6 @@ struct TORCH_API HostStats { | ||||
|  | ||||
|   // COUNT: number of times cudaHostFree/cudaHostUnregister was called. | ||||
|   int64_t num_host_free = 0; // This is derived from segment or timing | ||||
|  | ||||
|   // Count of cudaHostFree/cudaHostUnregister per bucket | ||||
|   std::vector<int64_t> bucket_allocation = std::vector<int64_t>(MAX_SIZE_INDEX); | ||||
| }; | ||||
|  | ||||
| // Struct containing memory allocator summary statistics for host, as they | ||||
| @ -200,17 +196,6 @@ struct CachingHostAllocatorImpl { | ||||
|     // background. | ||||
|     if (!pinned_use_background_threads()) { | ||||
|       process_events(); | ||||
|     } else { | ||||
|       // Launch the background thread and process events in a loop. | ||||
|       static bool background_thread_flag [[maybe_unused]] = [this] { | ||||
|         getBackgroundThreadPool()->run([&]() { | ||||
|           while (active_) { | ||||
|             process_events(); | ||||
|             std::this_thread::sleep_for(std::chrono::microseconds(100)); | ||||
|           } | ||||
|         }); | ||||
|         return true; | ||||
|       }(); | ||||
|     } | ||||
|  | ||||
|     // Round up the allocation to the nearest power of two to improve reuse. | ||||
| @ -223,6 +208,27 @@ struct CachingHostAllocatorImpl { | ||||
|       return {block->ptr_, reinterpret_cast<void*>(block)}; | ||||
|     } | ||||
|  | ||||
|     // Check in the recently freed blocks with pending events to see if we | ||||
|     // can reuse them. Call get_free_block again after processing events | ||||
|     if (pinned_use_background_threads()) { | ||||
|       process_events_for_specific_size(roundSize); | ||||
|       block = get_free_block(roundSize); | ||||
|       if (block) { | ||||
|         return {block->ptr_, reinterpret_cast<void*>(block)}; | ||||
|       } | ||||
|  | ||||
|       // Launch the background thread and process events in a loop. | ||||
|       static bool background_thread_flag [[maybe_unused]] = [this] { | ||||
|         getBackgroundThreadPool()->run([&]() { | ||||
|           while (active_) { | ||||
|             process_events(); | ||||
|             std::this_thread::sleep_for(std::chrono::microseconds(100)); | ||||
|           } | ||||
|         }); | ||||
|         return true; | ||||
|       }(); | ||||
|     } | ||||
|  | ||||
|     // Slow path: if we can't allocate from the cached free list, we need | ||||
|     // to create a new block. | ||||
|     void* ptr = nullptr; | ||||
| @ -272,6 +278,8 @@ struct CachingHostAllocatorImpl { | ||||
|       auto index = size_index(block->size_); | ||||
|       std::lock_guard<std::mutex> g(free_list_[index].mutex_); | ||||
|       free_list_[index].list_.push_back(block); | ||||
|       stats_.allocation_bucket_stats[index].decrease(1); | ||||
|       stats_.allocated_bytes_bucket_stats[index].decrease(block->size_); | ||||
|     } else { | ||||
|       // restore these events that record by used streams. | ||||
|       std::lock_guard<std::mutex> g(events_mutex_); | ||||
| @ -331,12 +339,9 @@ struct CachingHostAllocatorImpl { | ||||
|       for (auto* block : blocks_to_remove) { | ||||
|         blocks_.erase(block); | ||||
|         ptr_to_block_.erase(block->ptr_); | ||||
|         auto index = size_index(block->size_); | ||||
|         free_block(block); | ||||
|         stats_.allocation.decrease(1); | ||||
|         stats_.allocated_bytes.decrease(block->size_); | ||||
|         stats_.allocation_bucket_stats[index].decrease(1); | ||||
|         stats_.allocated_bytes_bucket_stats[index].decrease(block->size_); | ||||
|         free_block(block); | ||||
|         delete block; | ||||
|       } | ||||
|     } | ||||
| @ -393,7 +398,6 @@ struct CachingHostAllocatorImpl { | ||||
|       // a best effort manner, since we can't really replay the cached events per bucket. | ||||
|       add_bucket_stats(stats.allocation, stats_.allocation_bucket_stats[i]); | ||||
|       add_bucket_stats(stats.allocated_bytes, stats_.allocated_bytes_bucket_stats[i]); | ||||
|       stats.bucket_allocation[i] = stats_.allocation_bucket_stats[i].allocated; | ||||
|     } | ||||
|  | ||||
|     // Get the timing stats | ||||
| @ -484,6 +488,8 @@ struct CachingHostAllocatorImpl { | ||||
|       B* block = free_list_[index].list_.back(); | ||||
|       free_list_[index].list_.pop_back(); | ||||
|       block->allocated_ = true; | ||||
|       stats_.allocation_bucket_stats[index].increase(1); | ||||
|       stats_.allocated_bytes_bucket_stats[index].increase(size); | ||||
|       return block; | ||||
|     } | ||||
|     return nullptr; | ||||
| @ -577,6 +583,8 @@ struct CachingHostAllocatorImpl { | ||||
|         auto index = size_index(block->size_); | ||||
|         std::lock_guard<std::mutex> g(free_list_[index].mutex_); | ||||
|         free_list_[index].list_.push_back(block); | ||||
|         stats_.allocation_bucket_stats[index].decrease(1); | ||||
|         stats_.allocated_bytes_bucket_stats[index].decrease(size); | ||||
|         if (size != -1) { | ||||
|           return; | ||||
|         } | ||||
|  | ||||
| @ -2,7 +2,6 @@ | ||||
| #include <c10/core/impl/PythonDispatcherTLS.h> | ||||
| #include <ATen/core/PythonFallbackKernel.h> | ||||
| #include <c10/core/SafePyObject.h> | ||||
| #include <ATen/record_function.h> | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| @ -54,24 +53,20 @@ void pythonFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatch_ | ||||
|   TORCH_INTERNAL_ASSERT(tls_on_entry.has_value()); | ||||
|   // c10::impl::ForceDispatchKeyGuard dispatcher_guard(tls_on_entry.value()); | ||||
|   // StashTLSOnEntryGuard stash_guard; | ||||
|   c10::impl::ExcludeDispatchKeyGuard exclude_guard(after_Python_keyset); | ||||
|   c10::impl::ExcludeDispatchKeyGuard guard(after_Python_keyset); | ||||
|  | ||||
|   const auto& schema = op.schema(); | ||||
|   const auto num_arguments = schema.arguments().size(); | ||||
|  | ||||
|   // If Torch Dispatch Mode is active, use its PyInterpreter for dispatch | ||||
|   const auto mode_stack_len = c10::impl::TorchDispatchModeTLS::stack_len(); | ||||
|   if (mode_stack_len > 0) { | ||||
|     RECORD_FUNCTION("PythonDispatchMode", torch::jit::last(*stack, num_arguments)); | ||||
|     const auto& cur_torch_dispatch_mode_state = c10::impl::TorchDispatchModeTLS::get_stack_at(mode_stack_len - 1); | ||||
|     cur_torch_dispatch_mode_state->pyinterpreter()->dispatch(op, stack); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   RECORD_FUNCTION("PythonSubclass", torch::jit::last(*stack, num_arguments)); | ||||
|  | ||||
|   // Otherwise, find a PyInterpreter on a Tensor | ||||
|  | ||||
|   const auto& schema = op.schema(); | ||||
|   const auto num_arguments = schema.arguments().size(); | ||||
|   // It is safe to dispatch on the very first Tensor with a pyobj_interpreter | ||||
|   // without checking the interpreters of any of the arguments, because when | ||||
|   // we actually run dispatch(), we will take out PyObjects in the context | ||||
|  | ||||
| @ -1234,7 +1234,7 @@ struct TORCH_API TupleType : public NamedType { | ||||
|   std::shared_ptr<FunctionSchema> schema_; | ||||
| }; | ||||
|  | ||||
| // the common supertype of all Enums, only used in operator registration. | ||||
| // the common supertype of all Enums, only used in operator registraion. | ||||
| // EnumType <: AnyEnumType for all Enums | ||||
| struct AnyEnumType; | ||||
| using AnyEnumTypePtr = SingletonTypePtr<AnyEnumType>; | ||||
|  | ||||
| @ -102,31 +102,8 @@ struct VecReduceAllSIMD<float, Op> { | ||||
| #endif // defined(__GNUC__) && (__GNUC__ > 5) && !defined(_MSC_VER) && | ||||
|        // !defined(C10_MOBILE) | ||||
|  | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
| #if defined(CPU_CAPABILITY_SVE256) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
|       const Op& vec_fun, | ||||
|       const Vectorized<float>& acc_vec) { | ||||
|     using Vec = Vectorized<float>; | ||||
|     Vec v = acc_vec; | ||||
|     // 128-bit shuffle | ||||
|     svuint32_t ind = svdupq_n_u32(4, 5, 6, 7); | ||||
|     Vec v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 64-bit shuffle | ||||
|     ind = svdupq_n_u32(2, 3, 0, 1); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 32-bit shuffle | ||||
|     ind = svdupq_n_u32(1, 0, 2, 3); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     return svlasta(svpfalse(), v); | ||||
|   } | ||||
| }; | ||||
| #else | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \ | ||||
|     !defined(CPU_CAPABILITY_SVE) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
| @ -163,8 +140,35 @@ struct VecReduceAllSIMD<float, std::plus<Vectorized<float>>> { | ||||
|     return vaddvq_f32(acc_vec); | ||||
|   } | ||||
| }; | ||||
| #endif // defined(CPU_CAPABILITY_SVE256) | ||||
| #endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
|        // && !defined(CPU_CAPABILITY_SVE) | ||||
|  | ||||
| #if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \ | ||||
|     defined(CPU_CAPABILITY_SVE256) | ||||
| template <typename Op> | ||||
| struct VecReduceAllSIMD<float, Op> { | ||||
|   static inline float apply( | ||||
|       const Op& vec_fun, | ||||
|       const Vectorized<float>& acc_vec) { | ||||
|     using Vec = Vectorized<float>; | ||||
|     Vec v = acc_vec; | ||||
|     // 128-bit shuffle | ||||
|     svuint32_t ind = svdupq_n_u32(4, 5, 6, 7); | ||||
|     Vec v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 64-bit shuffle | ||||
|     ind = svdupq_n_u32(2, 3, 0, 1); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     // 32-bit shuffle | ||||
|     ind = svdupq_n_u32(1, 0, 2, 3); | ||||
|     v1 = svtbl_f32(v, ind); | ||||
|     v = vec_fun(v, v1); | ||||
|     return svlasta(svpfalse(), v); | ||||
|   } | ||||
| }; | ||||
| #endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) | ||||
|        // && defined(CPU_CAPABILITY_SVE256) | ||||
|  | ||||
| template <typename scalar_t, typename Op> | ||||
| inline scalar_t vec_reduce_all( | ||||
|  | ||||
| @ -1,21 +1,9 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/cpu/vec/intrinsics.h> | ||||
| #include <c10/macros/Macros.h> | ||||
| #include <cstdint> | ||||
|  | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
|  | ||||
| #if defined(__aarch64__) &&                     \ | ||||
|     (defined(AT_BUILD_ARM_VEC256_WITH_SLEEF) || \ | ||||
|      defined(AT_BUILD_ARM_VECSVE_WITH_SLEEF)) | ||||
| #define SLEEF_STATIC_LIBS | ||||
| #include <sleef.h> | ||||
| #define USE_SLEEF(sleef_code, non_sleef_code) sleef_code | ||||
| #else | ||||
| #define USE_SLEEF(sleef_code, non_sleef_code) non_sleef_code | ||||
| #endif | ||||
|  | ||||
| #if defined(CPU_CAPABILITY_SVE) | ||||
|  | ||||
| // Define the data type of VLS(vector-length specific). | ||||
|  | ||||
| @ -2,6 +2,7 @@ | ||||
|  | ||||
| #include <ATen/cpu/vec/intrinsics.h> | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/sve/vec_common_sve.h> | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| #include <c10/util/bit_cast.h> | ||||
|  | ||||
| @ -8,48 +8,13 @@ | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| 
 | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_bfloat16_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_half_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_convert.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_qint.h> | ||||
| 
 | ||||
| #elif defined(CPU_CAPABILITY_SVE) | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| 
 | ||||
| #if defined(CPU_CAPABILITY_SVE) | ||||
| #include <ATen/cpu/vec/sve/vec_bfloat16.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_double.h> | ||||
| #include <ATen/cpu/vec/sve/vec_float.h> | ||||
| #include <ATen/cpu/vec/sve/vec_int.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/sve/vec_qint.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_half.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_convert.h> | ||||
| 
 | ||||
| #else // NEON
 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_half_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_bfloat16_neon.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec128/vec128_convert.h> | ||||
| 
 | ||||
| #include <ATen/cpu/vec/vec256/vec256_qint.h> | ||||
| 
 | ||||
| #endif // defined(CPU_CAPABILITY_SVE128)
 | ||||
| 
 | ||||
| #include <ATen/cpu/vec/functional.h> | ||||
| #endif | ||||
| 
 | ||||
| namespace at::vec { | ||||
| // Note [CPU_CAPABILITY namespace]
 | ||||
| @ -83,6 +48,12 @@ DEFINE_SVE_CAST(int32_t, s32, float, f32) | ||||
| DEFINE_SVE_CAST(int16_t, s16, float, f32) | ||||
| DEFINE_SVE_CAST(float, f32, double, f64) | ||||
| 
 | ||||
| #ifdef __ARM_FEATURE_BF16 | ||||
| DEFINE_SVE_CAST(int64_t, s64, c10::BFloat16, bf16) | ||||
| DEFINE_SVE_CAST(int32_t, s32, c10::BFloat16, bf16) | ||||
| DEFINE_SVE_CAST(int16_t, s16, c10::BFloat16, bf16) | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GATHER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||||
| 
 | ||||
| template <int64_t scale = 1> | ||||
| @ -202,11 +173,9 @@ std::pair< | ||||
|   // group cols crossing lanes:
 | ||||
|   //   return {a0, b0, a1, b1, a2, b2, a3, b3}
 | ||||
|   //          {a4, b4, a5, b5, a6, b6, a7, b7}
 | ||||
|   svbfloat16_t aReg = a; | ||||
|   svbfloat16_t bReg = b; | ||||
|   Vectorized<c10::BFloat16> c = svzip1_bf16(aReg, bReg); | ||||
|   Vectorized<c10::BFloat16> d = svzip2_bf16(aReg, bReg); | ||||
|   return std::make_pair(c, d); | ||||
|   return std::make_pair( | ||||
|       Vectorized<c10::BFloat16>(svzip1_bf16(a, b)), | ||||
|       Vectorized<c10::BFloat16>(svzip2_bf16(a, b))); | ||||
| } | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| @ -255,27 +224,12 @@ std::pair< | ||||
|   // swap lanes:
 | ||||
|   //   return {a0, a1, a2, a3, a4, a5, a6, a7}
 | ||||
|   //          {b0, b1, b2, b3, b4, b5, b6, b7}
 | ||||
|   svbfloat16_t aReg = a; | ||||
|   svbfloat16_t bReg = b; | ||||
|   Vectorized<c10::BFloat16> c = svuzp1_bf16(aReg, bReg); | ||||
|   Vectorized<c10::BFloat16> d = svuzp2_bf16(aReg, bReg); | ||||
|   return std::make_pair(c, d); | ||||
|   return std::make_pair( | ||||
|       Vectorized<c10::BFloat16>(svuzp1_bf16((svbfloat16_t)a, (svbfloat16_t)b)), | ||||
|       Vectorized<c10::BFloat16>(svuzp2_bf16((svbfloat16_t)a, (svbfloat16_t)b))); | ||||
| } | ||||
| #endif // __ARM_FEATURE_BF16
 | ||||
| 
 | ||||
| // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FLIP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | ||||
| #define DEFINE_FLIP_FUNC(type, sve_func)                    \ | ||||
|   inline Vectorized<type> flip(const Vectorized<type>& v) { \ | ||||
|     return Vectorized<type>(sve_func(v));                   \ | ||||
|   } | ||||
| // Use the macro to define the flip functions
 | ||||
| DEFINE_FLIP_FUNC(float, svrev_f32) | ||||
| DEFINE_FLIP_FUNC(double, svrev_f64) | ||||
| DEFINE_FLIP_FUNC(int64_t, svrev_s64) | ||||
| DEFINE_FLIP_FUNC(int32_t, svrev_s32) | ||||
| DEFINE_FLIP_FUNC(int16_t, svrev_s16) | ||||
| DEFINE_FLIP_FUNC(int8_t, svrev_s8) | ||||
| 
 | ||||
| #endif // defined(CPU_CAPABILITY_SVE)
 | ||||
| 
 | ||||
| } // namespace CPU_CAPABILITY
 | ||||
| @ -1,8 +1,6 @@ | ||||
| #pragma once | ||||
|  | ||||
| #if defined(__aarch64__) | ||||
| #include <ATen/cpu/vec/vec_common_aarch64.h> | ||||
| #elif defined(CPU_CAPABILITY_AVX512) | ||||
| #if defined(CPU_CAPABILITY_AVX512) | ||||
| #include <ATen/cpu/vec/vec512/vec512.h> | ||||
| #else | ||||
| #include <ATen/cpu/vec/vec128/vec128.h> | ||||
| @ -13,34 +11,6 @@ namespace at::vec { | ||||
| // See Note [CPU_CAPABILITY namespace] | ||||
| inline namespace CPU_CAPABILITY { | ||||
|  | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::qint32& val) { | ||||
|   stream << val.val_; | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::qint8& val) { | ||||
|   stream << static_cast<int>(val.val_); | ||||
|   return stream; | ||||
| } | ||||
| inline std::ostream& operator<<(std::ostream& stream, const c10::quint8& val) { | ||||
|   stream << static_cast<unsigned int>(val.val_); | ||||
|   return stream; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| std::ostream& operator<<(std::ostream& stream, const Vectorized<T>& vec) { | ||||
|   T buf[Vectorized<T>::size()]; | ||||
|   vec.store(buf); | ||||
|   stream << "vec["; | ||||
|   for (int i = 0; i != Vectorized<T>::size(); i++) { | ||||
|     if (i != 0) { | ||||
|       stream << ", "; | ||||
|     } | ||||
|     stream << buf[i]; | ||||
|   } | ||||
|   stream << "]"; | ||||
|   return stream; | ||||
| } | ||||
|  | ||||
| inline Vectorized<bool> convert_to_bool(Vectorized<int8_t> x) { | ||||
|   __at_align__ bool buffer[x.size()]; | ||||
|   x.ne(Vectorized<int8_t>(0)).store(buffer); | ||||
|  | ||||
| @ -2,7 +2,6 @@ | ||||
|  | ||||
| // DO NOT DEFINE STATIC DATA IN THIS HEADER! | ||||
| // See Note [Do not compile initializers with AVX] | ||||
| #include <ATen/cpu/vec/sve/sve_helper.h> | ||||
| #include <ATen/cpu/vec/vec128/vec128_float_neon.h> | ||||
| #include <ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h> | ||||
| #include <ATen/cpu/vec/vec_base.h> | ||||
| @ -263,13 +262,6 @@ class Vectorized<c10::BFloat16> : public Vectorized16< | ||||
|             c10::bit_cast<at_bfloat16_t>(val6.x), | ||||
|             c10::bit_cast<at_bfloat16_t>(val7.x)}) {} | ||||
|  | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
|   Vectorized(svbfloat16_t v) : Vectorized16(svget_neonq(v)) {} | ||||
|   operator svbfloat16_t() const { | ||||
|     return svset_neonq(svundef_bf16(), values); | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   static Vectorized<c10::BFloat16> blendv( | ||||
|       const Vectorized<c10::BFloat16>& a, | ||||
|       const Vectorized<c10::BFloat16>& b, | ||||
| @ -382,23 +374,6 @@ class Vectorized<c10::BFloat16> : public Vectorized16< | ||||
|   Vectorized ge(const Vectorized& other) const; | ||||
|   Vectorized lt(const Vectorized& other) const; | ||||
|   Vectorized le(const Vectorized& other) const; | ||||
|  | ||||
| #ifdef CPU_CAPABILITY_SVE128 | ||||
|  | ||||
|   template <typename step_t> | ||||
|   static Vectorized<BFloat16> arange( | ||||
|       BFloat16 base = 0.f, | ||||
|       step_t step = static_cast<step_t>(1)) { | ||||
|     __at_align__ BFloat16 buffer[size()]; | ||||
|     for (int64_t i = 0; i < size(); i++) { | ||||
|       buffer[i] = base + i * step; | ||||
|     } | ||||
|     return svget_neonq( | ||||
|         svld1_bf16(ptrue, reinterpret_cast<bfloat16_t*>(buffer))); | ||||
|   } | ||||
|  | ||||
| #endif // CPU_CAPABILITY_SVE128 | ||||
|  | ||||
| }; // Vectorized<c10::BFloat16> | ||||
|  | ||||
| inline std::tuple<Vectorized<float>, Vectorized<float>> convert_bfloat16_float( | ||||
| @ -422,24 +397,6 @@ inline Vectorized<c10::BFloat16> convert_float_bfloat16( | ||||
|   return Vectorized<c10::BFloat16>(at_vcombine_bf16(x1, x2)); | ||||
| } | ||||
|  | ||||
| inline void load_fp32_from_bf16(const BFloat16* data, Vectorized<float>& out) { | ||||
|   __at_align__ float values[Vectorized<float>::size()]; | ||||
|   for (const auto k : c10::irange(Vectorized<float>::size())) { | ||||
|     values[k] = data[k]; | ||||
|   } | ||||
|   out = Vectorized<float>::loadu(values); | ||||
| } | ||||
|  | ||||
| inline void load_fp32_from_bf16( | ||||
|     const BFloat16* data, | ||||
|     Vectorized<float>& out1, | ||||
|     Vectorized<float>& out2) { | ||||
|   Vectorized<BFloat16> bf16_vec = Vectorized<BFloat16>::loadu(data); | ||||
|   auto floats = convert_bfloat16_float(bf16_vec); | ||||
|   out1 = std::get<0>(floats); | ||||
|   out2 = std::get<1>(floats); | ||||
| } | ||||
|  | ||||
| template <typename Op> | ||||
| Vectorized<c10::BFloat16> binary_operator_via_float( | ||||
|     Op op, | ||||
| @ -622,12 +579,6 @@ Vectorized<c10::BFloat16> inline fnmsub( | ||||
|   return -a * b - c; | ||||
| } | ||||
|  | ||||
| #else // | ||||
|  | ||||
| CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16) | ||||
|  | ||||
| LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16) | ||||
|  | ||||
| #endif // !defined(C10_MOBILE) && defined(__aarch64__) | ||||
|  | ||||
| } // namespace CPU_CAPABILITY | ||||
|  | ||||
| @ -4,7 +4,7 @@ | ||||
|  | ||||
| namespace at::vec { | ||||
| inline namespace CPU_CAPABILITY { | ||||
| #if defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256) | ||||
| #if (defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256)) | ||||
| template <typename src_t> | ||||
| struct VecConvert< | ||||
|     float, | ||||
| @ -60,7 +60,6 @@ struct VecConvert<float, 1, BFloat16, 1> { | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #endif // defined(__aarch64__) && (!defined(CPU_CAPABILITY_SVE) || | ||||
|        // defined(CPU_CAPABILITY_SVE128)) | ||||
| #endif // defined(__aarch64__) && !defined(CPU_CAPABILITY_SVE256) | ||||
| } // namespace CPU_CAPABILITY | ||||
| } // namespace at::vec | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	