mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-01 04:54:55 +08:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			v2.7.1-rc2
			...
			Update-Fla
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a5e8b0ad38 | 
| @ -3,12 +3,6 @@ set -eux -o pipefail | ||||
|  | ||||
| GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} | ||||
|  | ||||
| if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="9.0" | ||||
| elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="9.0;10.0;12.0" | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||||
| source $SCRIPTPATH/aarch64_ci_setup.sh | ||||
|  | ||||
| @ -20,7 +14,7 @@ cd / | ||||
| # on the mounted pytorch repo | ||||
| git config --global --add safe.directory /pytorch | ||||
| pip install -r /pytorch/requirements.txt | ||||
| pip install auditwheel==6.2.0 | ||||
| pip install auditwheel | ||||
| if [ "$DESIRED_CUDA" = "cpu" ]; then | ||||
|     echo "BASE_CUDA_VERSION is not set. Building cpu wheel." | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
|  | ||||
| @ -5,14 +5,16 @@ set -eux -o pipefail | ||||
| # By creating symlinks from desired /opt/python to /usr/local/bin/ | ||||
|  | ||||
| NUMPY_VERSION=2.0.2 | ||||
| if [[ "$DESIRED_PYTHON"  == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then | ||||
| PYGIT2_VERSION=1.15.1 | ||||
| if [[ "$DESIRED_PYTHON"  == "3.13" ]]; then | ||||
|     NUMPY_VERSION=2.1.2 | ||||
|     PYGIT2_VERSION=1.16.0 | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
| source $SCRIPTPATH/../manywheel/set_desired_python.sh | ||||
|  | ||||
| pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 | ||||
| pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION} | ||||
|  | ||||
| for tool in python python3 pip pip3 ninja scons patchelf; do | ||||
|     ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin; | ||||
|  | ||||
| @ -4,9 +4,12 @@ | ||||
| import os | ||||
| import shutil | ||||
| from subprocess import check_call, check_output | ||||
| from typing import List | ||||
|  | ||||
| from pygit2 import Repository | ||||
|  | ||||
|  | ||||
| def list_dir(path: str) -> list[str]: | ||||
| def list_dir(path: str) -> List[str]: | ||||
|     """' | ||||
|     Helper for getting paths for Python | ||||
|     """ | ||||
| @ -39,7 +42,7 @@ def build_ArmComputeLibrary() -> None: | ||||
|             "clone", | ||||
|             "https://github.com/ARM-software/ComputeLibrary.git", | ||||
|             "-b", | ||||
|             "v25.02", | ||||
|             "v24.09", | ||||
|             "--depth", | ||||
|             "1", | ||||
|             "--shallow-submodules", | ||||
| @ -55,7 +58,7 @@ def build_ArmComputeLibrary() -> None: | ||||
|         shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") | ||||
|  | ||||
|  | ||||
| def update_wheel(wheel_path, desired_cuda) -> None: | ||||
| def update_wheel(wheel_path) -> None: | ||||
|     """ | ||||
|     Update the cuda wheel libraries | ||||
|     """ | ||||
| @ -77,6 +80,7 @@ def update_wheel(wheel_path, desired_cuda) -> None: | ||||
|         "/usr/local/cuda/lib64/libnvToolsExt.so.1", | ||||
|         "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|         "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|         "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6", | ||||
|         "/usr/local/cuda/lib64/libcudnn_adv.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_cnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_graph.so.9", | ||||
| @ -96,18 +100,6 @@ def update_wheel(wheel_path, desired_cuda) -> None: | ||||
|             "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|         ] | ||||
|         if "126" in desired_cuda: | ||||
|             libs_to_copy += [ | ||||
|                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6", | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             ] | ||||
|         elif "128" in desired_cuda: | ||||
|             libs_to_copy += [ | ||||
|                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8", | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             ] | ||||
|     else: | ||||
|         libs_to_copy += [ | ||||
|             "/opt/OpenBLAS/lib/libopenblas.so.0", | ||||
| @ -136,9 +128,6 @@ def complete_wheel(folder: str) -> str: | ||||
|     """ | ||||
|     wheel_name = list_dir(f"/{folder}/dist")[0] | ||||
|  | ||||
|     # Please note for cuda we don't run auditwheel since we use custom script to package | ||||
|     # the cuda dependencies to the wheel file using update_wheel() method. | ||||
|     # However we need to make sure filename reflects the correct Manylinux platform. | ||||
|     if "pytorch" in folder and not enable_cuda: | ||||
|         print("Repairing Wheel with AuditWheel") | ||||
|         check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) | ||||
| @ -150,14 +139,7 @@ def complete_wheel(folder: str) -> str: | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|     else: | ||||
|         repaired_wheel_name = wheel_name.replace( | ||||
|             "linux_aarch64", "manylinux_2_28_aarch64" | ||||
|         ) | ||||
|         print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}") | ||||
|         os.rename( | ||||
|             f"/{folder}/dist/{wheel_name}", | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|         repaired_wheel_name = wheel_name | ||||
|  | ||||
|     print(f"Copying {repaired_wheel_name} to artifacts") | ||||
|     shutil.copy2( | ||||
| @ -189,22 +171,22 @@ if __name__ == "__main__": | ||||
|     args = parse_arguments() | ||||
|     enable_mkldnn = args.enable_mkldnn | ||||
|     enable_cuda = args.enable_cuda | ||||
|     branch = check_output( | ||||
|         ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch" | ||||
|     ).decode() | ||||
|     repo = Repository("/pytorch") | ||||
|     branch = repo.head.name | ||||
|     if branch == "HEAD": | ||||
|         branch = "master" | ||||
|  | ||||
|     print("Building PyTorch wheel") | ||||
|     build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " | ||||
|     os.system("cd /pytorch; python setup.py clean") | ||||
|  | ||||
|     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||||
|     desired_cuda = os.getenv("DESIRED_CUDA") | ||||
|     if override_package_version is not None: | ||||
|         version = override_package_version | ||||
|         build_vars += ( | ||||
|             f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " | ||||
|         ) | ||||
|     elif branch in ["nightly", "main"]: | ||||
|     elif branch in ["nightly", "master"]: | ||||
|         build_date = ( | ||||
|             check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") | ||||
|             .decode() | ||||
| @ -214,11 +196,12 @@ if __name__ == "__main__": | ||||
|             check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] | ||||
|         ) | ||||
|         if enable_cuda: | ||||
|             desired_cuda = os.getenv("DESIRED_CUDA") | ||||
|             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 " | ||||
|         else: | ||||
|             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " | ||||
|     elif branch.startswith(("v1.", "v2.")): | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " | ||||
|  | ||||
|     if enable_mkldnn: | ||||
|         build_ArmComputeLibrary() | ||||
| @ -242,6 +225,6 @@ if __name__ == "__main__": | ||||
|         print("Updating Cuda Dependency") | ||||
|         filename = os.listdir("/pytorch/dist/") | ||||
|         wheel_path = f"/pytorch/dist/{filename[0]}" | ||||
|         update_wheel(wheel_path, desired_cuda) | ||||
|         update_wheel(wheel_path) | ||||
|     pytorch_wheel_name = complete_wheel("/pytorch/") | ||||
|     print(f"Build Complete. Created {pytorch_wheel_name}..") | ||||
|  | ||||
| @ -12,7 +12,7 @@ import os | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
| from typing import Optional, Union | ||||
| from typing import Dict, List, Optional, Tuple, Union | ||||
|  | ||||
| import boto3 | ||||
|  | ||||
| @ -24,12 +24,10 @@ os_amis = { | ||||
|     "ubuntu22_04": "ami-0c6c29c5125214c77",  # login_name: ubuntu | ||||
|     "redhat8": "ami-0698b90665a2ddcf1",  # login_name: ec2-user | ||||
| } | ||||
|  | ||||
| ubuntu18_04_ami = os_amis["ubuntu18_04"] | ||||
| ubuntu20_04_ami = os_amis["ubuntu20_04"] | ||||
|  | ||||
|  | ||||
| def compute_keyfile_path(key_name: Optional[str] = None) -> tuple[str, str]: | ||||
| def compute_keyfile_path(key_name: Optional[str] = None) -> Tuple[str, str]: | ||||
|     if key_name is None: | ||||
|         key_name = os.getenv("AWS_KEY_NAME") | ||||
|         if key_name is None: | ||||
| @ -59,7 +57,7 @@ def ec2_instances_by_id(instance_id): | ||||
|  | ||||
|  | ||||
| def start_instance( | ||||
|     key_name, ami=ubuntu20_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50 | ||||
|     key_name, ami=ubuntu18_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50 | ||||
| ): | ||||
|     inst = ec2.create_instances( | ||||
|         ImageId=ami, | ||||
| @ -98,7 +96,7 @@ class RemoteHost: | ||||
|         self.keyfile_path = keyfile_path | ||||
|         self.login_name = login_name | ||||
|  | ||||
|     def _gen_ssh_prefix(self) -> list[str]: | ||||
|     def _gen_ssh_prefix(self) -> List[str]: | ||||
|         return [ | ||||
|             "ssh", | ||||
|             "-o", | ||||
| @ -110,13 +108,13 @@ class RemoteHost: | ||||
|         ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _split_cmd(args: Union[str, list[str]]) -> list[str]: | ||||
|     def _split_cmd(args: Union[str, List[str]]) -> List[str]: | ||||
|         return args.split() if isinstance(args, str) else args | ||||
|  | ||||
|     def run_ssh_cmd(self, args: Union[str, list[str]]) -> None: | ||||
|     def run_ssh_cmd(self, args: Union[str, List[str]]) -> None: | ||||
|         subprocess.check_call(self._gen_ssh_prefix() + self._split_cmd(args)) | ||||
|  | ||||
|     def check_ssh_output(self, args: Union[str, list[str]]) -> str: | ||||
|     def check_ssh_output(self, args: Union[str, List[str]]) -> str: | ||||
|         return subprocess.check_output( | ||||
|             self._gen_ssh_prefix() + self._split_cmd(args) | ||||
|         ).decode("utf-8") | ||||
| @ -159,7 +157,7 @@ class RemoteHost: | ||||
|     def using_docker(self) -> bool: | ||||
|         return self.container_id is not None | ||||
|  | ||||
|     def run_cmd(self, args: Union[str, list[str]]) -> None: | ||||
|     def run_cmd(self, args: Union[str, List[str]]) -> None: | ||||
|         if not self.using_docker(): | ||||
|             return self.run_ssh_cmd(args) | ||||
|         assert self.container_id is not None | ||||
| @ -180,7 +178,7 @@ class RemoteHost: | ||||
|         if rc != 0: | ||||
|             raise subprocess.CalledProcessError(rc, docker_cmd) | ||||
|  | ||||
|     def check_output(self, args: Union[str, list[str]]) -> str: | ||||
|     def check_output(self, args: Union[str, List[str]]) -> str: | ||||
|         if not self.using_docker(): | ||||
|             return self.check_ssh_output(args) | ||||
|         assert self.container_id is not None | ||||
| @ -232,7 +230,7 @@ class RemoteHost: | ||||
|             ) | ||||
|         self.download_file(remote_file, local_file) | ||||
|  | ||||
|     def list_dir(self, path: str) -> list[str]: | ||||
|     def list_dir(self, path: str) -> List[str]: | ||||
|         return self.check_output(["ls", "-1", path]).split("\n") | ||||
|  | ||||
|  | ||||
| @ -329,7 +327,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None | ||||
|         ] | ||||
|     ) | ||||
|     host.run_cmd( | ||||
|         f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}" | ||||
|         f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}" | ||||
|     ) | ||||
|  | ||||
|     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") | ||||
| @ -360,7 +358,7 @@ def checkout_repo( | ||||
|     branch: str = "main", | ||||
|     url: str, | ||||
|     git_clone_flags: str, | ||||
|     mapping: dict[str, tuple[str, str]], | ||||
|     mapping: Dict[str, Tuple[str, str]], | ||||
| ) -> Optional[str]: | ||||
|     for prefix in mapping: | ||||
|         if not branch.startswith(prefix): | ||||
| @ -683,7 +681,7 @@ def build_domains( | ||||
|     branch: str = "main", | ||||
|     use_conda: bool = True, | ||||
|     git_clone_flags: str = "", | ||||
| ) -> tuple[str, str, str, str]: | ||||
| ) -> Tuple[str, str, str, str]: | ||||
|     vision_wheel_name = build_torchvision( | ||||
|         host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags | ||||
|     ) | ||||
| @ -710,7 +708,7 @@ def start_build( | ||||
|     pytorch_build_number: Optional[str] = None, | ||||
|     shallow_clone: bool = True, | ||||
|     enable_mkldnn: bool = False, | ||||
| ) -> tuple[str, str, str, str, str]: | ||||
| ) -> Tuple[str, str, str, str, str]: | ||||
|     git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else "" | ||||
|     if host.using_docker() and not use_conda: | ||||
|         print("Auto-selecting conda option for docker images") | ||||
| @ -761,7 +759,7 @@ def start_build( | ||||
|         version = host.check_output("cat pytorch/version.txt").strip()[:-2] | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1" | ||||
|     if branch.startswith(("v1.", "v2.")): | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1" | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1" | ||||
|     if host.using_docker(): | ||||
|         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" | ||||
|     if enable_mkldnn: | ||||
| @ -934,9 +932,9 @@ def parse_arguments(): | ||||
|     parser.add_argument("--debug", action="store_true") | ||||
|     parser.add_argument("--build-only", action="store_true") | ||||
|     parser.add_argument("--test-only", type=str) | ||||
|     group = parser.add_mutually_exclusive_group() | ||||
|     group.add_argument("--os", type=str, choices=list(os_amis.keys())) | ||||
|     group.add_argument("--ami", type=str) | ||||
|     parser.add_argument( | ||||
|         "--os", type=str, choices=list(os_amis.keys()), default="ubuntu20_04" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--python-version", | ||||
|         type=str, | ||||
| @ -966,13 +964,7 @@ def parse_arguments(): | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     args = parse_arguments() | ||||
|     ami = ( | ||||
|         args.ami | ||||
|         if args.ami is not None | ||||
|         else os_amis[args.os] | ||||
|         if args.os is not None | ||||
|         else ubuntu20_04_ami | ||||
|     ) | ||||
|     ami = os_amis[args.os] | ||||
|     keyfile_path, key_name = compute_keyfile_path(args.key_name) | ||||
|  | ||||
|     if args.list_instances: | ||||
|  | ||||
							
								
								
									
										5
									
								
								.ci/docker/aotriton_version.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								.ci/docker/aotriton_version.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,5 @@ | ||||
| 0.8b | ||||
| manylinux_2_28 | ||||
| rocm6.2 | ||||
| 6f8cbcac8a92775291bb1ba8f514d4beb350baf4 | ||||
| e938def5d32869fe2e00aec0300f354c9f157867bebdf2e104d732b94cb238d8 | ||||
| @ -1,8 +1,4 @@ | ||||
| #!/bin/bash | ||||
| # The purpose of this script is to: | ||||
| # 1. Extract the set of parameters to be used for a docker build based on the provided image name. | ||||
| # 2. Run docker build with the parameters found in step 1. | ||||
| # 3. Run the built image and print out the expected and actual versions of packages installed. | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| @ -90,20 +86,30 @@ CMAKE_VERSION=3.18.5 | ||||
|  | ||||
| _UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb | ||||
| _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b | ||||
| if [[ "$image" == *rocm* ]]; then | ||||
|   _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6 | ||||
|   _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d | ||||
| fi | ||||
|  | ||||
| # It's annoying to rename jobs every time you want to rewrite a | ||||
| # configuration, so we hardcode everything here rather than do it | ||||
| # from scratch | ||||
| case "$image" in | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=12.1.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
| @ -128,6 +134,36 @@ case "$image" in | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.1.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.1.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
| @ -158,65 +194,6 @@ case "$image" in | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.13 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=11.8.0 | ||||
|     CUDNN_VERSION=9 | ||||
| @ -231,6 +208,20 @@ case "$image" in | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=12.1.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3-clang10-onnx) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=10 | ||||
| @ -273,7 +264,18 @@ case "$image" in | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.1 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
| @ -281,25 +283,6 @@ case "$image" in | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.3 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-2024.0-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
| @ -385,7 +368,7 @@ case "$image" in | ||||
|     EXECUTORCH=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.12-halide) | ||||
|     CUDA_VERSION=12.6 | ||||
|     CUDA_VERSION=12.4 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
| @ -393,7 +376,7 @@ case "$image" in | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.12-triton-cpu) | ||||
|     CUDA_VERSION=12.6 | ||||
|     CUDA_VERSION=12.4 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
| @ -514,7 +497,7 @@ docker build \ | ||||
|        --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \ | ||||
|        --build-arg "KATEX=${KATEX:-}" \ | ||||
|        --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \ | ||||
|        --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \ | ||||
|        --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \ | ||||
|        --build-arg "IMAGE_NAME=${IMAGE_NAME}" \ | ||||
|        --build-arg "UCX_COMMIT=${UCX_COMMIT}" \ | ||||
|        --build-arg "UCC_COMMIT=${UCC_COMMIT}" \ | ||||
|  | ||||
| @ -113,6 +113,13 @@ COPY triton_version.txt triton_version.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt | ||||
|  | ||||
| # Install AOTriton (Early fail) | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_aotriton.sh install_aotriton.sh | ||||
| RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"] | ||||
| ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton | ||||
|  | ||||
| # Install ccache/sccache (do this last, so we get priority in PATH) | ||||
| COPY ./common/install_cache.sh install_cache.sh | ||||
| ENV PATH /opt/cache/bin:$PATH | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 01a22b6f16d117454b7d21ebdc691b0785b84a7f | ||||
| a29b208a06ab378bb29ab1aa68932e412f8e09f1 | ||||
|  | ||||
| @ -1 +0,0 @@ | ||||
| v2.21.5-1 | ||||
| @ -1 +0,0 @@ | ||||
| v2.26.2-1 | ||||
| @ -1 +1 @@ | ||||
| 5d535d7a2d4b435b1b5c1177fd8f04a12b942b9a | ||||
| ac3470188b914c5d7a5058a7e28b9eb685a62427 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 0bcc8265e677e5321606a3311bf71470f14456a8 | ||||
| e98b6fcb8df5b44eb0d0addb6767c573d37ba024 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 96316ce50fade7e209553aba4898cd9b82aab83b | ||||
| 0d4682f073ded4d1a8260dd4208a43d735ae3a2b | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| set -euo pipefail | ||||
|  | ||||
| readonly version=v25.02 | ||||
| readonly src_host=https://github.com/ARM-software | ||||
| readonly version=v24.04 | ||||
| readonly src_host=https://review.mlplatform.org/ml | ||||
| readonly src_repo=ComputeLibrary | ||||
|  | ||||
| # Clone ACL | ||||
|  | ||||
							
								
								
									
										23
									
								
								.ci/docker/common/install_aotriton.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										23
									
								
								.ci/docker/common/install_aotriton.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,23 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| TARBALL='aotriton.tar.gz' | ||||
| # This read command alwasy returns with exit code 1 | ||||
| read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true | ||||
| ARCH=$(uname -m) | ||||
| AOTRITON_INSTALL_PREFIX="$1" | ||||
| AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz" | ||||
|  | ||||
| cd "${AOTRITON_INSTALL_PREFIX}" | ||||
| # Must use -L to follow redirects | ||||
| curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}" | ||||
| ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1) | ||||
| if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then | ||||
|   echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256}," | ||||
|   echo " which does not match the expected value ${SHA256}." | ||||
|   exit | ||||
| fi | ||||
| tar xf "${TARBALL}" && rm -rf "${TARBALL}" | ||||
| @ -32,12 +32,8 @@ install_ubuntu() { | ||||
|  | ||||
|   # HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes | ||||
|   # See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729 | ||||
|   # TODO: Eliminate this hack, we should not relay on apt-get installation | ||||
|   # See https://github.com/pytorch/pytorch/issues/144768 | ||||
|   if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then | ||||
|     maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages" | ||||
|   elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then | ||||
|     maybe_libnccl_dev="libnccl2=2.26.2-1+cuda12.4 libnccl-dev=2.26.2-1+cuda12.4 --allow-downgrades --allow-change-held-packages" | ||||
|   else | ||||
|     maybe_libnccl_dev="" | ||||
|   fi | ||||
|  | ||||
| @ -9,7 +9,7 @@ install_ubuntu() { | ||||
|   # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh`` | ||||
|   apt-get install -y cargo | ||||
|   echo "Checking out sccache repo" | ||||
|   git clone https://github.com/mozilla/sccache -b v0.9.1 | ||||
|   git clone https://github.com/mozilla/sccache -b v0.9.0 | ||||
|   cd sccache | ||||
|   echo "Building sccache" | ||||
|   cargo build --release | ||||
|  | ||||
| @ -66,7 +66,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|  | ||||
|   # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README | ||||
|   if [[ $(uname -m) == "aarch64" ]]; then | ||||
|     conda_install "openblas==0.3.29=*openmp*" | ||||
|     conda_install "openblas==0.3.28=*openmp*" | ||||
|   else | ||||
|     conda_install "mkl=2021.4.0 mkl-include=2021.4.0" | ||||
|   fi | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION=v2.26.2-1 | ||||
| NCCL_VERSION=v2.21.5-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
|  | ||||
| function install_cusparselt_040 { | ||||
| @ -16,6 +16,17 @@ function install_cusparselt_040 { | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_052 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_062 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
| @ -40,7 +51,6 @@ function install_cusparselt_063 { | ||||
|  | ||||
| function install_118 { | ||||
|     CUDNN_VERSION=9.1.0.70 | ||||
|     NCCL_VERSION=v2.21.5-1 | ||||
|     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0" | ||||
|     rm -rf /usr/local/cuda-11.8 /usr/local/cuda | ||||
|     # install CUDA 11.8.0 in the same container | ||||
| @ -73,6 +83,39 @@ function install_118 { | ||||
|     ldconfig | ||||
| } | ||||
|  | ||||
| function install_121 { | ||||
|     echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" | ||||
|     rm -rf /usr/local/cuda-12.1 /usr/local/cuda | ||||
|     # install CUDA 12.1.0 in the same container | ||||
|     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run | ||||
|     chmod +x cuda_12.1.1_530.30.02_linux.run | ||||
|     ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent | ||||
|     rm -f cuda_12.1.1_530.30.02_linux.run | ||||
|     rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda | ||||
|  | ||||
|     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|     mkdir tmp_cudnn && cd tmp_cudnn | ||||
|     wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|     tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|     cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     cd .. | ||||
|     rm -rf tmp_cudnn | ||||
|  | ||||
|     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|     git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|     cd nccl && make -j src.build | ||||
|     cp -a build/include/* /usr/local/cuda/include/ | ||||
|     cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|     cd .. | ||||
|     rm -rf nccl | ||||
|  | ||||
|     install_cusparselt_052 | ||||
|  | ||||
|     ldconfig | ||||
| } | ||||
|  | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
| @ -171,6 +214,37 @@ function prune_118 { | ||||
|     rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ | ||||
| } | ||||
|  | ||||
| function prune_121 { | ||||
|   echo "Pruning CUDA 12.1" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.1 prune static libs | ||||
|   ##################################################################################### | ||||
|     export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune" | ||||
|     export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64" | ||||
|  | ||||
|     export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|     export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|     if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|         export GENCODE=$OVERRIDE_GENCODE | ||||
|     fi | ||||
|  | ||||
|     # all CUDA libs except CuDNN and CuBLAS | ||||
|     ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|     # prune CuDNN and CuBLAS | ||||
|     $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|     $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|     ##################################################################################### | ||||
|     # CUDA 12.1 prune visual tools | ||||
|     ##################################################################################### | ||||
|     export CUDA_BASE="/usr/local/cuda-12.1/" | ||||
|     rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/ | ||||
| } | ||||
|  | ||||
| function prune_124 { | ||||
|   echo "Pruning CUDA 12.4" | ||||
|   ##################################################################################### | ||||
| @ -239,52 +313,18 @@ function prune_126 { | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| function install_128 { | ||||
|   CUDNN_VERSION=9.7.1.26 | ||||
|   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|   # install CUDA 12.8.0 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run | ||||
|   chmod +x cuda_12.8.0_570.86.10_linux.run | ||||
|   ./cuda_12.8.0_570.86.10_linux.run --toolkit --silent | ||||
|   rm -f cuda_12.8.0_570.86.10_linux.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
|     case "$1" in | ||||
|     11.8) install_118; prune_118 | ||||
|         ;; | ||||
|     12.1) install_121; prune_121 | ||||
|         ;; | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6) install_126; prune_126 | ||||
|         ;; | ||||
|     12.8) install_128; | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|     esac | ||||
|  | ||||
| @ -3,8 +3,19 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION=v2.26.2-1 | ||||
| CUDNN_VERSION=9.8.0.87 | ||||
| NCCL_VERSION=v2.21.5-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
|  | ||||
| function install_cusparselt_062 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_063 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
| @ -17,15 +28,80 @@ function install_cusparselt_063 { | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_128 { | ||||
|   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|   # install CUDA 12.8.0 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   chmod +x cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   ./cuda_12.8.0_570.86.10_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
|   rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|   # install CUDA 12.4.1 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   ./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_062 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_124 { | ||||
|   echo "Pruning CUDA 12.4" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.4/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||||
| } | ||||
|  | ||||
| function install_126 { | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|   # install CUDA 12.6.3 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   ./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
| @ -50,11 +126,47 @@ function install_128 { | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_126 { | ||||
|   echo "Pruning CUDA 12.6" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.6/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
|     case "$1" in | ||||
|     12.8) install_128; | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6) install_126; prune_126 | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|  | ||||
| @ -4,9 +4,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then | ||||
|     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|     mkdir tmp_cudnn | ||||
|     pushd tmp_cudnn | ||||
|     if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then | ||||
|     if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive" | ||||
|  | ||||
| @ -5,15 +5,7 @@ set -ex | ||||
| # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
| mkdir tmp_cusparselt && cd tmp_cusparselt | ||||
|  | ||||
| if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
|         arch_path='x86_64' | ||||
|     fi | ||||
|     CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive" | ||||
|     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz | ||||
| elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then | ||||
| if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
| @ -21,11 +13,17 @@ elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then | ||||
|     fi | ||||
|     CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive" | ||||
|     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz | ||||
| elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
|         arch_path='x86_64' | ||||
|     fi | ||||
|     CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive" | ||||
|     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz | ||||
| elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then | ||||
|     CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive" | ||||
|     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz | ||||
| else | ||||
|     echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}" | ||||
| fi | ||||
|  | ||||
| tar xf ${CUSPARSELT_NAME}.tar.xz | ||||
|  | ||||
| @ -37,12 +37,7 @@ install_conda_dependencies() { | ||||
|  | ||||
| install_pip_dependencies() { | ||||
|   pushd executorch | ||||
|   as_jenkins bash install_executorch.sh | ||||
|  | ||||
|   # A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current | ||||
|   # numba and scipy version used in PyTorch CI | ||||
|   conda_run pip uninstall -y numba scipy | ||||
|  | ||||
|   as_jenkins bash install_requirements.sh --pybind xnnpack | ||||
|   popd | ||||
| } | ||||
|  | ||||
| @ -53,7 +48,7 @@ setup_executorch() { | ||||
|   export EXECUTORCH_BUILD_PYBIND=ON | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|  | ||||
|   as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true | ||||
|   as_jenkins .ci/scripts/setup-linux.sh cmake || true | ||||
|   popd | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -4,15 +4,10 @@ set -ex | ||||
|  | ||||
| [ -n "$NINJA_VERSION" ] | ||||
|  | ||||
| arch=$(uname -m) | ||||
| if [ "$arch" == "aarch64" ]; then | ||||
|     url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip" | ||||
| else | ||||
|     url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip" | ||||
| fi | ||||
| url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip" | ||||
|  | ||||
| pushd /tmp | ||||
| wget --no-verbose --output-document=ninja-linux.zip "$url" | ||||
| unzip ninja-linux.zip -d /usr/local/bin | ||||
| rm -f ninja-linux.zip | ||||
| popd | ||||
| popd | ||||
|  | ||||
| @ -31,15 +31,15 @@ pip_install \ | ||||
| pip_install coloredlogs packaging | ||||
|  | ||||
| pip_install onnxruntime==1.18.1 | ||||
| pip_install onnx==1.17.0 | ||||
| pip_install onnxscript==0.2.2 --no-deps | ||||
| pip_install onnx==1.16.2 | ||||
| pip_install onnxscript==0.1.0.dev20241124 --no-deps | ||||
| # required by onnxscript | ||||
| pip_install ml_dtypes | ||||
|  | ||||
| # Cache the transformers model to be used later by ONNX tests. We need to run the transformers | ||||
| # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/ | ||||
| IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py" | ||||
| as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}" | ||||
| as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}" | ||||
|  | ||||
| # Need a PyTorch version for transformers to work | ||||
| pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu | ||||
|  | ||||
| @ -4,7 +4,7 @@ | ||||
| set -ex | ||||
|  | ||||
| cd / | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 --depth 1 --shallow-submodules | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.28 --depth 1 --shallow-submodules | ||||
|  | ||||
|  | ||||
| OPENBLAS_BUILD_FLAGS=" | ||||
|  | ||||
| @ -62,22 +62,6 @@ install_ubuntu() { | ||||
|         sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;" | ||||
|     done | ||||
|  | ||||
|     # ROCm 6.3 had a regression where initializing static code objects had significant overhead | ||||
|     if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then | ||||
|         # clr build needs CppHeaderParser but can only find it using conda's python | ||||
|         /opt/conda/bin/python -m pip install CppHeaderParser | ||||
|         git clone https://github.com/ROCm/HIP -b rocm-6.3.x | ||||
|         HIP_COMMON_DIR=$(readlink -f HIP) | ||||
|         git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix | ||||
|         mkdir -p clr/build | ||||
|         pushd clr/build | ||||
|         cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR | ||||
|         make -j | ||||
|         cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.* | ||||
|         popd | ||||
|         rm -rf HIP clr | ||||
|     fi | ||||
|  | ||||
|     # Cleanup | ||||
|     apt-get autoclean && apt-get clean | ||||
|     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||||
|  | ||||
| @ -115,7 +115,7 @@ index a5007ffc..13fa07fc 100644 | ||||
|  	if (!fp) { | ||||
| -		fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE, | ||||
| -			strerror(errno)); | ||||
| +		//fprintf(stderr, "amdgpu.ids: No such file or directory\n"); | ||||
| +		fprintf(stderr, "amdgpu.ids: No such file or directory\n"); | ||||
|  		return; | ||||
|  	} | ||||
|  | ||||
|  | ||||
| @ -60,15 +60,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" | ||||
|   # Triton needs at least gcc-9 to build | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 pip_install . | ||||
|   CXX=g++-9 pip_install -e . | ||||
| elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then | ||||
|   # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain | ||||
|   add-apt-repository -y ppa:ubuntu-toolchain-r/test | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 pip_install . | ||||
|   CXX=g++-9 pip_install -e . | ||||
| else | ||||
|   pip_install . | ||||
|   pip_install -e . | ||||
| fi | ||||
|  | ||||
| if [ -n "${CONDA_CMAKE}" ]; then | ||||
|  | ||||
| @ -8,12 +8,6 @@ else | ||||
|   with_cuda=no | ||||
| fi | ||||
|  | ||||
| if [[ -d "/opt/rocm" ]]; then | ||||
|   with_rocm=/opt/rocm | ||||
| else | ||||
|   with_rocm=no | ||||
| fi | ||||
|  | ||||
| function install_ucx() { | ||||
|   set -ex | ||||
|   git clone --recursive https://github.com/openucx/ucx.git | ||||
| @ -25,7 +19,6 @@ function install_ucx() { | ||||
|   ./configure --prefix=$UCX_HOME      \ | ||||
|       --enable-mt                     \ | ||||
|       --with-cuda=$with_cuda          \ | ||||
|       --with-rocm=$with_rocm          \ | ||||
|       --enable-profiling              \ | ||||
|       --enable-stats | ||||
|   time make -j | ||||
| @ -43,29 +36,12 @@ function install_ucc() { | ||||
|   git submodule update --init --recursive | ||||
|  | ||||
|   ./autogen.sh | ||||
|  | ||||
|   # We only run distributed tests on Tesla M60 and A10G | ||||
|   NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86" | ||||
|  | ||||
|   if [[ -n "$ROCM_VERSION" ]]; then | ||||
|     if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then | ||||
|       amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'` | ||||
|     else | ||||
|       amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs` | ||||
|     fi | ||||
|     for arch in $amdgpu_targets; do | ||||
|       HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch" | ||||
|     done | ||||
|   else | ||||
|     HIP_OFFLOAD="all-arch-no-native" | ||||
|   fi | ||||
|  | ||||
|   ./configure --prefix=$UCC_HOME          \ | ||||
|     --with-ucx=$UCX_HOME                  \ | ||||
|     --with-cuda=$with_cuda                \ | ||||
|     --with-nvcc-gencode="${NVCC_GENCODE}" \ | ||||
|     --with-rocm=$with_rocm                \ | ||||
|     --with-rocm-arch="${HIP_OFFLOAD}" | ||||
|     --with-nvcc-gencode="${NVCC_GENCODE}" | ||||
|   time make -j | ||||
|   sudo make install | ||||
|  | ||||
|  | ||||
| @ -47,9 +47,6 @@ function install_ubuntu() { | ||||
|     # Development Packages | ||||
|     apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev | ||||
|     # Install Intel Support Packages | ||||
|     if [[ "$XPU_VERSION" == "2025.0" ]]; then | ||||
|         XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl=2025.0.1-6" | ||||
|     fi | ||||
|     apt-get install -y ${XPU_PACKAGES} | ||||
|  | ||||
|     # Cleanup | ||||
| @ -85,9 +82,6 @@ gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS. | ||||
| EOF | ||||
|  | ||||
|     # Install Intel Support Packages | ||||
|     if [[ "$XPU_VERSION" == "2025.0" ]]; then | ||||
|         XPU_PACKAGES="${XPU_PACKAGES} intel-oneapi-dnnl-2025.0.1-6" | ||||
|     fi | ||||
|     yum install -y ${XPU_PACKAGES} | ||||
|     # The xpu-smi packages | ||||
|     dnf install -y xpu-smi | ||||
|  | ||||
| @ -56,6 +56,11 @@ RUN bash ./install_cuda.sh 11.8 | ||||
| RUN bash ./install_magma.sh 11.8 | ||||
| RUN ln -sf /usr/local/cuda-11.8 /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda12.1 | ||||
| RUN bash ./install_cuda.sh 12.1 | ||||
| RUN bash ./install_magma.sh 12.1 | ||||
| RUN ln -sf /usr/local/cuda-12.1 /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda12.4 | ||||
| RUN bash ./install_cuda.sh 12.4 | ||||
| RUN bash ./install_magma.sh 12.4 | ||||
| @ -66,11 +71,6 @@ RUN bash ./install_cuda.sh 12.6 | ||||
| RUN bash ./install_magma.sh 12.6 | ||||
| RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda12.8 | ||||
| RUN bash ./install_cuda.sh 12.8 | ||||
| RUN bash ./install_magma.sh 12.8 | ||||
| RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|  | ||||
| FROM cpu as rocm | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| @ -92,6 +92,13 @@ RUN apt-get update -y && \ | ||||
| RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
|  | ||||
| # Install AOTriton | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
| COPY ./common/install_aotriton.sh install_aotriton.sh | ||||
| RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt | ||||
| ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton | ||||
|  | ||||
| FROM ${BASE_TARGET} as final | ||||
| COPY --from=openssl            /opt/openssl           /opt/openssl | ||||
| # Install patchelf | ||||
|  | ||||
| @ -39,7 +39,7 @@ case ${GPU_ARCH_TYPE} in | ||||
|         BASE_TARGET=rocm | ||||
|         DOCKER_TAG=rocm${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" | ||||
|         ;; | ||||
|     *) | ||||
|  | ||||
| @ -198,3 +198,10 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
|  | ||||
| # Install AOTriton | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
| COPY ./common/install_aotriton.sh install_aotriton.sh | ||||
| RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt | ||||
| ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton | ||||
|  | ||||
							
								
								
									
										153
									
								
								.ci/docker/manywheel/Dockerfile_2014
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								.ci/docker/manywheel/Dockerfile_2014
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,153 @@ | ||||
| # syntax = docker/dockerfile:experimental | ||||
| ARG ROCM_VERSION=3.7 | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| ARG GPU_IMAGE=nvidia/cuda:${BASE_CUDA_VERSION}-devel-centos7 | ||||
| FROM quay.io/pypa/manylinux2014_x86_64 as base | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel | ||||
| RUN yum install -y yum-utils centos-release-scl sudo | ||||
| RUN yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils | ||||
| ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| # cmake | ||||
| RUN yum install -y cmake3 && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
| FROM base as openssl | ||||
| # Install openssl (this must precede `build python` step) | ||||
| # (In order to have a proper SSL module, Python is compiled | ||||
| # against a recent openssl [see env vars above], which is linked | ||||
| # statically. We delete openssl afterwards.) | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
|  | ||||
|  | ||||
|  | ||||
| # remove unncessary python versions | ||||
| RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 | ||||
| RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 | ||||
| RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 | ||||
| RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
|  | ||||
| FROM base as cuda | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| # Install CUDA | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh | ||||
|  | ||||
| FROM base as intel | ||||
| # MKL | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
|  | ||||
| FROM base as magma | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| # Install magma | ||||
| ADD ./common/install_magma.sh install_magma.sh | ||||
| RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh | ||||
|  | ||||
| FROM base as jni | ||||
| # Install java jni header | ||||
| ADD ./common/install_jni.sh install_jni.sh | ||||
| ADD ./java/jni.h jni.h | ||||
| RUN bash ./install_jni.sh && rm install_jni.sh | ||||
|  | ||||
| FROM base as libpng | ||||
| # Install libpng | ||||
| ADD ./common/install_libpng.sh install_libpng.sh | ||||
| RUN bash ./install_libpng.sh && rm install_libpng.sh | ||||
|  | ||||
| FROM ${GPU_IMAGE} as common | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| RUN yum install -y \ | ||||
|         aclocal \ | ||||
|         autoconf \ | ||||
|         automake \ | ||||
|         bison \ | ||||
|         bzip2 \ | ||||
|         curl \ | ||||
|         diffutils \ | ||||
|         file \ | ||||
|         git \ | ||||
|         make \ | ||||
|         patch \ | ||||
|         perl \ | ||||
|         unzip \ | ||||
|         util-linux \ | ||||
|         wget \ | ||||
|         which \ | ||||
|         xz \ | ||||
|         yasm | ||||
| RUN yum install -y \ | ||||
|     https://repo.ius.io/ius-release-el7.rpm \ | ||||
|     https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm | ||||
|  | ||||
| RUN yum swap -y git git236-core | ||||
| # git236+ would refuse to run git commands in repos owned by other users | ||||
| # Which causes version check to fail, as pytorch repo is bind-mounted into the image | ||||
| # Override this behaviour by treating every folder as safe | ||||
| # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 | ||||
| RUN git config --global --add safe.directory "*" | ||||
|  | ||||
| ENV SSL_CERT_FILE=/opt/_internal/certs.pem | ||||
| # Install LLVM version | ||||
| COPY --from=openssl            /opt/openssl                          /opt/openssl | ||||
| COPY --from=base               /opt/python                           /opt/python | ||||
| COPY --from=base               /opt/_internal                        /opt/_internal | ||||
| COPY --from=base               /usr/local/bin/auditwheel             /usr/local/bin/auditwheel | ||||
| COPY --from=intel              /opt/intel                            /opt/intel | ||||
| COPY --from=base               /usr/local/bin/patchelf               /usr/local/bin/patchelf | ||||
| COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/include/png*               /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/ | ||||
| COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig | ||||
| COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h | ||||
|  | ||||
| FROM common as cpu_final | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| RUN yum install -y yum-utils centos-release-scl | ||||
| RUN yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils | ||||
| ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| # cmake | ||||
| RUN yum install -y cmake3 && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
|  | ||||
| # ninja | ||||
| RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm | ||||
| RUN yum install -y ninja-build | ||||
|  | ||||
| FROM cpu_final as cuda_final | ||||
| RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
|  | ||||
| FROM common as rocm_final | ||||
| ARG ROCM_VERSION=3.7 | ||||
| # Install ROCm | ||||
| ADD ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh | ||||
| # cmake is already installed inside the rocm base image, but both 2 and 3 exist | ||||
| # cmake3 is needed for the later MIOpen custom build, so that step is last. | ||||
| RUN yum install -y cmake3 && \ | ||||
|     rm -f /usr/bin/cmake && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
| @ -38,12 +38,6 @@ RUN yum install -y \ | ||||
|   sudo \ | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-toolchain | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
| COPY ./common/install_ninja.sh install_ninja.sh | ||||
| RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi | ||||
| RUN rm install_ninja.sh | ||||
|  | ||||
| # Ensure the expected devtoolset is used | ||||
| ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| @ -48,7 +48,7 @@ case ${GPU_ARCH_TYPE} in | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-aarch64 | ||||
|         GPU_IMAGE=arm64v8/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11" | ||||
|         MANY_LINUX_VERSION="2_28_aarch64" | ||||
|         ;; | ||||
|     cpu-cxx11-abi) | ||||
| @ -97,7 +97,7 @@ case ${GPU_ARCH_TYPE} in | ||||
|             DEVTOOLSET_VERSION="11" | ||||
|             GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         fi | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" | ||||
|         ;; | ||||
|     xpu) | ||||
| @ -121,8 +121,7 @@ fi | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|     # Only activate this if in CI | ||||
|     if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then | ||||
|     if [ "$(uname -m)" != "s390x" ]; then | ||||
|         # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|         # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|         sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
| @ -140,7 +139,7 @@ fi | ||||
|         "${TOPDIR}/.ci/docker/" | ||||
| ) | ||||
|  | ||||
| GITHUB_REF=${GITHUB_REF:-"dev")} | ||||
| GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)} | ||||
| GIT_BRANCH_NAME=${GITHUB_REF##*/} | ||||
| GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)} | ||||
| DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME} | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/ | ||||
| CURL_DOWNLOAD_URL=https://curl.se/download | ||||
| CURL_DOWNLOAD_URL=https://curl.askapache.com/download | ||||
|  | ||||
| AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf | ||||
|  | ||||
|  | ||||
| @ -90,10 +90,10 @@ librosa>=0.6.2 ; python_version < "3.11" | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| mypy==1.14.0 | ||||
| mypy==1.13.0 | ||||
| # Pin MyPy version because new errors are likely to appear with each release | ||||
| #Description: linter | ||||
| #Pinned versions: 1.14.0 | ||||
| #Pinned versions: 1.10.0 | ||||
| #test that import: test_typing.py, test_type_hints.py | ||||
|  | ||||
| networkx==2.8.8 | ||||
| @ -294,7 +294,7 @@ ghstack==0.8.0 | ||||
| #Pinned versions: 0.8.0 | ||||
| #test that import: | ||||
|  | ||||
| jinja2==3.1.6 | ||||
| jinja2==3.1.5 | ||||
| #Description: jinja2 template engine | ||||
| #Pinned versions: 3.1.4 | ||||
| #test that import: | ||||
| @ -304,7 +304,7 @@ pytest-cpp==2.3.0 | ||||
| #Pinned versions: 2.3.0 | ||||
| #test that import: | ||||
|  | ||||
| z3-solver==4.12.6.0 | ||||
| z3-solver==4.12.2.0 | ||||
| #Description: The Z3 Theorem Prover Project | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -329,7 +329,7 @@ lxml==5.3.0 | ||||
|  | ||||
| PyGithub==2.3.0 | ||||
|  | ||||
| sympy==1.13.3 | ||||
| sympy==1.13.1 ; python_version >= "3.9" | ||||
| #Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -339,7 +339,7 @@ onnx==1.17.0 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| onnxscript==0.2.2 | ||||
| onnxscript==0.1.0.dev20240817 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -362,7 +362,6 @@ pwlf==2.2.1 ; python_version >= "3.8" | ||||
| # To build PyTorch itself | ||||
| astunparse | ||||
| PyYAML | ||||
| pyzstd | ||||
| setuptools | ||||
|  | ||||
| ninja==1.11.1 ; platform_machine == "aarch64" | ||||
| @ -372,8 +371,3 @@ pulp==2.9.0 ; python_version >= "3.8" | ||||
| #Description: required for testing ilp formulaiton under torch/distributed/_tools | ||||
| #Pinned versions: 2.9.0 | ||||
| #test that import: test_sac_ilp.py | ||||
|  | ||||
| dataclasses_json==0.6.7 | ||||
| #Description: required for data pipeline and scripts under tools/stats | ||||
| #Pinned versions: 0.6.7 | ||||
| #test that import: | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 3.3.1 | ||||
| 3.2.0 | ||||
|  | ||||
| @ -14,20 +14,21 @@ ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| COPY ./common/install_base.sh install_base.sh | ||||
| RUN bash ./install_base.sh && rm install_base.sh | ||||
|  | ||||
| # Install clang | ||||
| ARG LLVMDEV | ||||
| ARG CLANG_VERSION | ||||
| COPY ./common/install_clang.sh install_clang.sh | ||||
| RUN bash ./install_clang.sh && rm install_clang.sh | ||||
|  | ||||
| # Install user | ||||
| COPY ./common/install_user.sh install_user.sh | ||||
| RUN bash ./install_user.sh && rm install_user.sh | ||||
|  | ||||
| # Install katex | ||||
| ARG KATEX | ||||
| COPY ./common/install_docs_reqs.sh install_docs_reqs.sh | ||||
| RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| ARG CONDA_CMAKE | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| @ -38,11 +39,6 @@ ARG GCC_VERSION | ||||
| COPY ./common/install_gcc.sh install_gcc.sh | ||||
| RUN bash ./install_gcc.sh && rm install_gcc.sh | ||||
|  | ||||
| # Install clang | ||||
| ARG CLANG_VERSION | ||||
| COPY ./common/install_clang.sh install_clang.sh | ||||
| RUN bash ./install_clang.sh && rm install_clang.sh | ||||
|  | ||||
| # (optional) Install protobuf for ONNX | ||||
| ARG PROTOBUF | ||||
| COPY ./common/install_protobuf.sh install_protobuf.sh | ||||
| @ -89,32 +85,6 @@ COPY ./common/install_amdsmi.sh install_amdsmi.sh | ||||
| RUN bash ./install_amdsmi.sh | ||||
| RUN rm install_amdsmi.sh | ||||
|  | ||||
| # (optional) Install UCC | ||||
| ARG UCX_COMMIT | ||||
| ARG UCC_COMMIT | ||||
| ENV UCX_COMMIT $UCX_COMMIT | ||||
| ENV UCC_COMMIT $UCC_COMMIT | ||||
| ENV UCX_HOME /usr | ||||
| ENV UCC_HOME /usr | ||||
| ADD ./common/install_ucc.sh install_ucc.sh | ||||
| RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi | ||||
| RUN rm install_ucc.sh | ||||
|  | ||||
| COPY ./common/install_openssl.sh install_openssl.sh | ||||
| ENV OPENSSL_ROOT_DIR /opt/openssl | ||||
| RUN bash ./install_openssl.sh | ||||
| ENV OPENSSL_DIR /opt/openssl | ||||
|  | ||||
| ARG INDUCTOR_BENCHMARKS | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/huggingface.txt huggingface.txt | ||||
| COPY ci_commit_pins/timm.txt timm.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt | ||||
|  | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| @ -137,17 +107,24 @@ COPY triton_version.txt triton_version.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt | ||||
|  | ||||
| # Install AOTriton | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_aotriton.sh install_aotriton.sh | ||||
| RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"] | ||||
| ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton | ||||
|  | ||||
| # This is needed by sccache | ||||
| COPY ./common/install_openssl.sh install_openssl.sh | ||||
| ENV OPENSSL_ROOT_DIR /opt/openssl | ||||
| RUN bash ./install_openssl.sh | ||||
| ENV OPENSSL_DIR /opt/openssl | ||||
|  | ||||
| # Install ccache/sccache (do this last, so we get priority in PATH) | ||||
| COPY ./common/install_cache.sh install_cache.sh | ||||
| ENV PATH /opt/cache/bin:$PATH | ||||
| RUN bash ./install_cache.sh && rm install_cache.sh | ||||
|  | ||||
| # Install Open MPI for ROCm | ||||
| COPY ./common/install_openmpi.sh install_openmpi.sh | ||||
| RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi | ||||
| RUN rm install_openmpi.sh | ||||
|  | ||||
| # Include BUILD_ENVIRONMENT environment variable in image | ||||
| ARG BUILD_ENVIRONMENT | ||||
| ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} | ||||
|  | ||||
| @ -12,13 +12,13 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \ | ||||
| 	-e DESIRED_CUDA=${DESIRED_CUDA} \ | ||||
| 	-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \ | ||||
| 	"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \ | ||||
| 	"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \ | ||||
| 	magma/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-cuda128 | ||||
| all: magma-cuda126 | ||||
| all: magma-cuda124 | ||||
| all: magma-cuda121 | ||||
| all: magma-cuda118 | ||||
|  | ||||
| .PHONY: | ||||
| @ -26,12 +26,6 @@ clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-cuda128 | ||||
| magma-cuda128: DESIRED_CUDA := 12.8 | ||||
| magma-cuda128: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120 | ||||
| magma-cuda128: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda126 | ||||
| magma-cuda126: DESIRED_CUDA := 12.6 | ||||
| magma-cuda126: | ||||
| @ -42,6 +36,11 @@ magma-cuda124: DESIRED_CUDA := 12.4 | ||||
| magma-cuda124: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda121 | ||||
| magma-cuda121: DESIRED_CUDA := 12.1 | ||||
| magma-cuda121: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda118 | ||||
| magma-cuda118: DESIRED_CUDA := 11.8 | ||||
| magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37 | ||||
|  | ||||
| @ -14,7 +14,6 @@ export USE_CUDA_STATIC_LINK=1 | ||||
| export INSTALL_TEST=0 # dont install test binaries into site-packages | ||||
| export USE_CUPTI_SO=0 | ||||
| export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build | ||||
| export USE_CUFILE=${USE_CUFILE:-1} | ||||
|  | ||||
| # Keep an array of cmake variables to add to | ||||
| if [[ -z "$CMAKE_ARGS" ]]; then | ||||
| @ -53,15 +52,23 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') | ||||
|  | ||||
| TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6" | ||||
| case ${CUDA_VERSION} in | ||||
|     12.8) | ||||
|         TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" #removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8 and will be removed in future releases | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.6) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then | ||||
|             TORCH_CUDA_ARCH_LIST="9.0" | ||||
|         else | ||||
|             TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX" | ||||
|         fi | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.4) | ||||
|         if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then | ||||
|             TORCH_CUDA_ARCH_LIST="9.0" | ||||
|         else | ||||
|             TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         fi | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.1) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
| @ -119,16 +126,7 @@ if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then | ||||
|         ) | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are | ||||
| # not available in PYPI | ||||
| if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then | ||||
|     export USE_CUFILE=0 | ||||
| fi | ||||
|  | ||||
|  | ||||
| # CUDA_VERSION 12.4, 12.6, 12.8 | ||||
| if [[ $CUDA_VERSION == 12* ]]; then | ||||
| if [[ $CUDA_VERSION == "12.4" || $CUDA_VERSION == "12.6" ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Try parallelizing nvcc as well | ||||
|     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
| @ -169,16 +167,6 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|             "libnvrtc.so.12" | ||||
|             "libnvrtc-builtins.so" | ||||
|         ) | ||||
|         if [[ $USE_CUFILE == 1 ]]; then | ||||
|             DEPS_LIST+=( | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0" | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1" | ||||
|             ) | ||||
|             DEPS_SONAME+=( | ||||
|                 "libcufile.so.0" | ||||
|                 "libcufile_rdma.so.1" | ||||
|             ) | ||||
|         fi | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|         CUDA_RPATHS=( | ||||
| @ -195,11 +183,6 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|         ) | ||||
|         if [[ $USE_CUFILE == 1 ]]; then | ||||
|             CUDA_RPATHS+=( | ||||
|                 '$ORIGIN/../../nvidia/cufile/lib' | ||||
|             ) | ||||
|         fi | ||||
|         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") | ||||
|         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' | ||||
|  | ||||
| @ -186,6 +186,15 @@ do | ||||
|     OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array | ||||
| done | ||||
|  | ||||
| # FIXME: Temporary until https://github.com/pytorch/pytorch/pull/137443 lands | ||||
| # Install AOTriton | ||||
| if [ -e ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt ]; then | ||||
|     cp -a ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt aotriton_version.txt | ||||
|     bash ${PYTORCH_ROOT}/.ci/docker/common/install_aotriton.sh ${ROCM_HOME} && rm aotriton_version.txt | ||||
|     export AOTRITON_INSTALLED_PREFIX=${ROCM_HOME}/aotriton | ||||
|     ROCM_SO_FILES+=("libaotriton_v2.so") | ||||
| fi | ||||
|  | ||||
| # rocBLAS library files | ||||
| ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library | ||||
| ROCBLAS_LIB_DST=lib/rocblas/library | ||||
| @ -257,6 +266,20 @@ RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC)) | ||||
| DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/}) | ||||
| DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/}) | ||||
|  | ||||
| # PyTorch 2.6+ (AOTriton 0.8b+) | ||||
| # AKS = "AOTriton Kernel Storage", a file format to store GPU kernels compactly | ||||
| if (( $(echo "${PYTORCH_VERSION} 2.6" | awk '{print ($1 >= $2)}') )); then | ||||
|     LIBAOTRITON_DIR=$(find "$ROCM_HOME/lib/" -name "libaotriton_v2.so" -printf '%h\n') | ||||
|     if [[ -z ${LIBAOTRITON_DIR} ]]; then | ||||
|         LIBAOTRITON_DIR=$(find "$ROCM_HOME/" -name "libaotriton_v2.so" -printf '%h\n') | ||||
|     fi | ||||
|     AKS_FILES=($(find "${LIBAOTRITON_DIR}/aotriton.images" -type f -name '*.aks?' -printf '%P\n')) | ||||
|     AKS_SRC="${LIBAOTRITON_DIR}/aotriton.images" | ||||
|     AKS_DST="lib/aotriton.images" | ||||
|     DEPS_AUX_SRCLIST+=(${AKS_FILES[@]/#/${AKS_SRC}/}) | ||||
|     DEPS_AUX_DSTLIST+=(${AKS_FILES[@]/#/${AKS_DST}/}) | ||||
| fi | ||||
|  | ||||
| echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}" | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
|  | ||||
| @ -173,7 +173,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
|   # XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA | ||||
|   export USE_KINETO=0 | ||||
|   export TORCH_XPU_ARCH_LIST=pvc | ||||
| fi | ||||
|  | ||||
| # sccache will fail for CUDA builds if all cores are used for compiling | ||||
| @ -192,7 +191,7 @@ fi | ||||
|  | ||||
| # We only build FlashAttention files for CUDA 8.0+, and they require large amounts of | ||||
| # memory to build and will OOM | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]] && [ -z "$MAX_JOBS_OVERRIDE" ]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then | ||||
|   echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM" | ||||
|   echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage" | ||||
|   export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))" | ||||
| @ -229,7 +228,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then | ||||
|   export CMAKE_BUILD_TYPE=RelWithAssert | ||||
| fi | ||||
|  | ||||
| # Do not change workspace permissions for ROCm and s390x CI jobs | ||||
| # Do not change workspace permissions for ROCm CI jobs | ||||
| # as it can leave workspace with bad permissions for cancelled jobs | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then | ||||
|   # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) | ||||
| @ -378,10 +377,8 @@ else | ||||
|     # This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization | ||||
|     # is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has | ||||
|     # 16 CPUs | ||||
|     if [ -z "$MAX_JOBS_OVERRIDE" ]; then | ||||
|       MAX_JOBS=$(nproc --ignore=4) | ||||
|       export MAX_JOBS | ||||
|     fi | ||||
|     MAX_JOBS=$(nproc --ignore=4) | ||||
|     export MAX_JOBS | ||||
|  | ||||
|     # NB: Install outside of source directory (at the same level as the root | ||||
|     # pytorch folder) so that it doesn't get cleaned away prior to docker push. | ||||
|  | ||||
| @ -387,7 +387,7 @@ fi | ||||
| ############################################################################### | ||||
| # Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries | ||||
| ############################################################################### | ||||
| if [[ "$(uname)" == 'Linux' &&  "$PACKAGE_TYPE" == 'manywheel' ]]; then | ||||
| if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel')]]; then | ||||
|   pushd /tmp | ||||
|   python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))" | ||||
|   popd | ||||
|  | ||||
| @ -169,40 +169,30 @@ function install_torchrec_and_fbgemm() { | ||||
|   torchrec_commit=$(get_pinned_commit torchrec) | ||||
|   local fbgemm_commit | ||||
|   fbgemm_commit=$(get_pinned_commit fbgemm) | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then | ||||
|     fbgemm_commit=$(get_pinned_commit fbgemm_rocm) | ||||
|   fi | ||||
|   pip_uninstall torchrec-nightly | ||||
|   pip_uninstall fbgemm-gpu-nightly | ||||
|   pip_install setuptools-git-versioning scikit-build pyre-extensions | ||||
|  | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then | ||||
|     # install torchrec first because it installs fbgemm nightly on top of rocm fbgemm | ||||
|     pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" | ||||
|     pip_uninstall fbgemm-gpu-nightly | ||||
|   # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it | ||||
|   # seems to be an sccache-related issue | ||||
|   if [[ "$IS_A100_RUNNER" == "1" ]]; then | ||||
|     unset CMAKE_CUDA_COMPILER_LAUNCHER | ||||
|     sudo mv /opt/cache/bin /opt/cache/bin-backup | ||||
|   fi | ||||
|  | ||||
|     pip_install tabulate  # needed for newer fbgemm | ||||
|     pip_install patchelf  # needed for rocm fbgemm | ||||
|     git clone --recursive https://github.com/pytorch/fbgemm | ||||
|     pushd fbgemm/fbgemm_gpu | ||||
|     git checkout "${fbgemm_commit}" | ||||
|     python setup.py install \ | ||||
|       --package_variant=rocm \ | ||||
|       -DHIP_ROOT_DIR="${ROCM_PATH}" \ | ||||
|       -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ | ||||
|       -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" | ||||
|     popd | ||||
|     rm -rf fbgemm | ||||
|   else | ||||
|     # See https://github.com/pytorch/pytorch/issues/106971 | ||||
|     CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" | ||||
|     pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" | ||||
|   # See https://github.com/pytorch/pytorch/issues/106971 | ||||
|   CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" | ||||
|   pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" | ||||
|  | ||||
|   if [[ "$IS_A100_RUNNER" == "1" ]]; then | ||||
|     export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache | ||||
|     sudo mv /opt/cache/bin-backup /opt/cache/bin | ||||
|   fi | ||||
| } | ||||
|  | ||||
| function clone_pytorch_xla() { | ||||
|   if [[ ! -d ./xla ]]; then | ||||
|     git clone --recursive -b r2.7 https://github.com/pytorch/xla.git | ||||
|     git clone --recursive --quiet https://github.com/pytorch/xla.git | ||||
|     pushd xla | ||||
|     # pin the xla hash so that we don't get broken by changes to xla | ||||
|     git checkout "$(cat ../.github/ci_commit_pins/xla.txt)" | ||||
| @ -226,11 +216,6 @@ function checkout_install_torchbench() { | ||||
|     # to install and test other models | ||||
|     python install.py --continue_on_fail | ||||
|   fi | ||||
|  | ||||
|   # TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488 | ||||
|   # is regressing speedup metric. This needs to be investigated further | ||||
|   pip install transformers==4.38.1 | ||||
|  | ||||
|   echo "Print all dependencies after TorchBench is installed" | ||||
|   python -mpip freeze | ||||
|   popd | ||||
|  | ||||
| @ -18,9 +18,6 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( | ||||
| fi | ||||
| popd | ||||
|  | ||||
| # enable debug asserts in serialization | ||||
| export TORCH_SERIALIZATION_DEBUG=1 | ||||
|  | ||||
| setup_test_python() { | ||||
|   # The CircleCI worker hostname doesn't resolve to an address. | ||||
|   # This environment variable makes ProcessGroupGloo default to | ||||
|  | ||||
| @ -40,7 +40,7 @@ retry () { | ||||
| if [[ "$#" != 3 ]]; then | ||||
|   if [[ -z "${DESIRED_PYTHON:-}" || -z "${DESIRED_CUDA:-}" || -z "${PACKAGE_TYPE:-}" ]]; then | ||||
|     echo "USAGE: run_tests.sh  PACKAGE_TYPE  DESIRED_PYTHON  DESIRED_CUDA" | ||||
|     echo "The env variable PACKAGE_TYPE must be set to 'manywheel' or 'libtorch'" | ||||
|     echo "The env variable PACKAGE_TYPE must be set to 'conda' or 'manywheel' or 'libtorch'" | ||||
|     echo "The env variable DESIRED_PYTHON must be set like '2.7mu' or '3.6m' etc" | ||||
|     echo "The env variable DESIRED_CUDA must be set like 'cpu' or 'cu80' etc" | ||||
|     exit 1 | ||||
|  | ||||
| @ -6,7 +6,7 @@ import itertools | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from typing import Any | ||||
| from typing import Any, List, Tuple | ||||
|  | ||||
|  | ||||
| # We also check that there are [not] cxx11 symbols in libtorch | ||||
| @ -46,17 +46,17 @@ LIBTORCH_PRE_CXX11_PATTERNS = _apply_libtorch_symbols(PRE_CXX11_SYMBOLS) | ||||
|  | ||||
|  | ||||
| @functools.lru_cache(100) | ||||
| def get_symbols(lib: str) -> list[tuple[str, str, str]]: | ||||
| def get_symbols(lib: str) -> List[Tuple[str, str, str]]: | ||||
|     from subprocess import check_output | ||||
|  | ||||
|     lines = check_output(f'nm "{lib}"|c++filt', shell=True) | ||||
|     return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]] | ||||
|  | ||||
|  | ||||
| def grep_symbols(lib: str, patterns: list[Any]) -> list[str]: | ||||
| def grep_symbols(lib: str, patterns: List[Any]) -> List[str]: | ||||
|     def _grep_symbols( | ||||
|         symbols: list[tuple[str, str, str]], patterns: list[Any] | ||||
|     ) -> list[str]: | ||||
|         symbols: List[Tuple[str, str, str]], patterns: List[Any] | ||||
|     ) -> List[str]: | ||||
|         rc = [] | ||||
|         for _s_addr, _s_type, s_name in symbols: | ||||
|             for pattern in patterns: | ||||
|  | ||||
| @ -46,9 +46,7 @@ def train(args, model, device, train_loader, optimizer, epoch): | ||||
|         optimizer.step() | ||||
|         if batch_idx % args.log_interval == 0: | ||||
|             print( | ||||
|                 f"Train Epoch: {epoch} " | ||||
|                 f"[{batch_idx * len(data)}/{len(train_loader.dataset)} " | ||||
|                 f"({100.0 * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}" | ||||
|                 f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"  # noqa: B950 | ||||
|             ) | ||||
|             if args.dry_run: | ||||
|                 break | ||||
| @ -73,9 +71,7 @@ def test(model, device, test_loader): | ||||
|     test_loss /= len(test_loader.dataset) | ||||
|  | ||||
|     print( | ||||
|         f"\nTest set: Average loss: {test_loss:.4f}, " | ||||
|         f"Accuracy: {correct}/{len(test_loader.dataset)} " | ||||
|         f"({100.0 * correct / len(test_loader.dataset):.0f}%)\n" | ||||
|         f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n"  # noqa: B950 | ||||
|     ) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -6,7 +6,6 @@ import re | ||||
| import subprocess | ||||
| import sys | ||||
| from pathlib import Path | ||||
| from tempfile import NamedTemporaryFile | ||||
|  | ||||
| import torch | ||||
| import torch._dynamo | ||||
| @ -76,13 +75,10 @@ def read_release_matrix(): | ||||
|  | ||||
|  | ||||
| def test_numpy(): | ||||
|     try: | ||||
|         import numpy as np | ||||
|     import numpy as np | ||||
|  | ||||
|         x = np.arange(5) | ||||
|         torch.tensor(x) | ||||
|     except ImportError: | ||||
|         print("Numpy check skipped. Numpy is not installed.") | ||||
|     x = np.arange(5) | ||||
|     torch.tensor(x) | ||||
|  | ||||
|  | ||||
| def check_version(package: str) -> None: | ||||
| @ -165,36 +161,6 @@ def test_cuda_runtime_errors_captured() -> None: | ||||
|         raise RuntimeError("Expected CUDA RuntimeError but have not received!") | ||||
|  | ||||
|  | ||||
| def test_cuda_gds_errors_captured() -> None: | ||||
|     major_version = int(torch.version.cuda.split(".")[0]) | ||||
|     minor_version = int(torch.version.cuda.split(".")[1]) | ||||
|  | ||||
|     if target_os == "windows": | ||||
|         print(f"{target_os} is not supported for GDS smoke test") | ||||
|         return | ||||
|  | ||||
|     if major_version < 12 or (major_version == 12 and minor_version < 6): | ||||
|         print("CUDA version is not supported for GDS smoke test") | ||||
|         return | ||||
|  | ||||
|     cuda_exception_missed = True | ||||
|     try: | ||||
|         print("Testing test_cuda_gds_errors_captured") | ||||
|         with NamedTemporaryFile() as f: | ||||
|             torch.cuda.gds.GdsFile(f.name, os.O_CREAT | os.O_RDWR) | ||||
|     except RuntimeError as e: | ||||
|         expected_error = "cuFileHandleRegister failed" | ||||
|         if re.search(expected_error, f"{e}"): | ||||
|             print(f"Caught CUDA exception with success: {e}") | ||||
|             cuda_exception_missed = False | ||||
|         else: | ||||
|             raise e | ||||
|     if cuda_exception_missed: | ||||
|         raise RuntimeError( | ||||
|             "Expected cuFileHandleRegister failed RuntimeError but have not received!" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def smoke_test_cuda( | ||||
|     package: str, runtime_error_check: str, torch_compile_check: str | ||||
| ) -> None: | ||||
| @ -413,10 +379,8 @@ def main() -> None: | ||||
|     smoke_test_conv2d() | ||||
|     test_linalg() | ||||
|     test_numpy() | ||||
|  | ||||
|     if is_cuda_system: | ||||
|         test_linalg("cuda") | ||||
|         test_cuda_gds_errors_captured() | ||||
|  | ||||
|     if options.package == "all": | ||||
|         smoke_test_modules() | ||||
|  | ||||
| @ -12,9 +12,9 @@ export TERM=vt100 | ||||
| # shellcheck source=./common.sh | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| # Do not change workspace permissions for ROCm and s390x CI jobs | ||||
| # Do not change workspace permissions for ROCm CI jobs | ||||
| # as it can leave workspace with bad permissions for cancelled jobs | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && -d /var/lib/jenkins/workspace ]]; then | ||||
|   # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) | ||||
|   WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace") | ||||
|   cleanup_workspace() { | ||||
| @ -46,9 +46,6 @@ BUILD_BIN_DIR="$BUILD_DIR"/bin | ||||
| SHARD_NUMBER="${SHARD_NUMBER:=1}" | ||||
| NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}" | ||||
|  | ||||
| # enable debug asserts in serialization | ||||
| export TORCH_SERIALIZATION_DEBUG=1 | ||||
|  | ||||
| export VALGRIND=ON | ||||
| # export TORCH_INDUCTOR_INSTALL_GXX=ON | ||||
| if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
| @ -89,13 +86,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   export VALGRIND=OFF | ||||
| fi | ||||
|  | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then | ||||
|   # There are additional warnings on s390x, maybe due to newer gcc. | ||||
|   # Skip this check for now | ||||
|   export VALGRIND=OFF | ||||
| fi | ||||
|  | ||||
| if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then | ||||
|   # When rerunning disable tests, do not generate core dumps as it could consume | ||||
|   # the runner disk space when crashed tests are run multiple times. Running out | ||||
| @ -177,9 +167,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then | ||||
|   # Print GPU info | ||||
|   rocminfo | ||||
|   rocminfo | grep -E 'Name:.*\sgfx|Marketing' | ||||
|  | ||||
|   # for benchmarks/dynamo/check_accuracy.py, we need to put results in a rocm specific directory to avoid clashes with cuda | ||||
|   MAYBE_ROCM="rocm/" | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
| @ -314,13 +301,6 @@ test_python() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_lazy_tensor_meta_reference_disabled() { | ||||
|   export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1 | ||||
|   echo "Testing lazy tensor operations without meta reference" | ||||
|   time python test/run_test.py --include lazy/test_ts_opinfo.py --verbose | ||||
|   export -n TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE | ||||
| } | ||||
|  | ||||
|  | ||||
| test_dynamo_wrapped_shard() { | ||||
|   if [[ -z "$NUM_TEST_SHARDS" ]]; then | ||||
| @ -424,10 +404,7 @@ test_inductor_cpp_wrapper_shard() { | ||||
|  | ||||
|   # Run certain inductor unit tests with cpp wrapper. In the end state, we | ||||
|   # should be able to run all the inductor unit tests with cpp_wrapper. | ||||
|   python test/run_test.py \ | ||||
|     --include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \ | ||||
|     --verbose | ||||
|   python test/run_test.py --inductor --include test_torch -k 'take' --verbose | ||||
|   python test/run_test.py --include inductor/test_torchinductor --verbose | ||||
|  | ||||
|   # Run inductor benchmark tests with cpp wrapper. | ||||
|   # Skip benchmark tests if it's in rerun-disabled-mode. | ||||
| @ -440,7 +417,7 @@ test_inductor_cpp_wrapper_shard() { | ||||
|     --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_timm_training.csv" | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" | ||||
|  | ||||
|     python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|       --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
| @ -450,7 +427,7 @@ test_inductor_cpp_wrapper_shard() { | ||||
|       --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_torchbench_inference.csv" | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| @ -483,8 +460,6 @@ elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then | ||||
|   DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager) | ||||
| elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then | ||||
|   DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor) | ||||
| elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then | ||||
|   DYNAMO_BENCHMARK_FLAGS+=(--inductor --inductor-compile-mode max-autotune) | ||||
| elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then | ||||
|   DYNAMO_BENCHMARK_FLAGS+=(--inductor) | ||||
| fi | ||||
| @ -499,59 +474,6 @@ else | ||||
|   DYNAMO_BENCHMARK_FLAGS+=(--device cuda) | ||||
| fi | ||||
|  | ||||
| test_cachebench() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   local BENCHMARK | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|     local BENCHMARK=torchbench | ||||
|   elif [[ "${SHARD_NUMBER}" == 2 ]]; then | ||||
|     local BENCHMARK=huggingface | ||||
|   else | ||||
|     echo "invalid SHARD_NUMBER: ${SHARD_NUMBER}" | ||||
|     exit 1 | ||||
|   fi | ||||
|  | ||||
|   local mode_options=("training" "inference") | ||||
|  | ||||
|   for mode in "${mode_options[@]}"; do | ||||
|     $TASKSET python "benchmarks/dynamo/cachebench.py" \ | ||||
|         --mode "$mode" \ | ||||
|         --device cuda \ | ||||
|         --benchmark "$BENCHMARK" \ | ||||
|         --repeat 3 \ | ||||
|         --output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}.json" | ||||
|  | ||||
|     $TASKSET python "benchmarks/dynamo/cachebench.py" \ | ||||
|         --mode "$mode" \ | ||||
|         --dynamic \ | ||||
|         --device cuda \ | ||||
|         --benchmark "$BENCHMARK" \ | ||||
|         --repeat 3 \ | ||||
|         --output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}_dynamic.json" | ||||
|   done | ||||
| } | ||||
|  | ||||
| test_verify_cachebench() { | ||||
|   TMP_TEST_REPORTS_DIR=$(mktemp -d) | ||||
|   TEST_OUTPUT="$TMP_TEST_REPORTS_DIR/test.json" | ||||
|  | ||||
|   $TASKSET python "benchmarks/dynamo/cachebench.py" \ | ||||
|       --mode training \ | ||||
|       --device cpu \ | ||||
|       --model nanogpt \ | ||||
|       --benchmark torchbench \ | ||||
|       --output "$TEST_OUTPUT" | ||||
|  | ||||
|   # -s checks file exists and is non empty | ||||
|   if [[ ! -s "$TEST_OUTPUT" ]]; then | ||||
|     echo "Cachebench failed to produce an output." | ||||
|     echo "Run 'python benchmarks/dynamo/cachebench.py' to make sure it works" | ||||
|     exit 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| test_perf_for_dashboard() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
| @ -580,10 +502,6 @@ test_perf_for_dashboard() { | ||||
|     test_inductor_set_cpu_affinity | ||||
|   elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then | ||||
|     device=cuda_a10g | ||||
|   elif [[ "${TEST_CONFIG}" == *h100* ]]; then | ||||
|     device=cuda_h100 | ||||
|   elif [[ "${TEST_CONFIG}" == *rocm* ]]; then | ||||
|     device=rocm | ||||
|   fi | ||||
|  | ||||
|   for mode in "${modes[@]}"; do | ||||
| @ -616,7 +534,7 @@ test_perf_for_dashboard() { | ||||
|             --dynamic-batch-only "$@" \ | ||||
|             --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv" | ||||
|       fi | ||||
|       if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]]; then | ||||
|       if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then | ||||
|         TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ | ||||
|             "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \ | ||||
|             --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv" | ||||
| @ -700,16 +618,16 @@ test_single_dynamo_benchmark() { | ||||
|       TEST_CONFIG=${TEST_CONFIG//_avx512/} | ||||
|     fi | ||||
|     python "benchmarks/dynamo/$suite.py" \ | ||||
|       --ci --accuracy --timing --explain --print-compilation-time \ | ||||
|       --ci --accuracy --timing --explain \ | ||||
|       "${DYNAMO_BENCHMARK_FLAGS[@]}" \ | ||||
|       "$@" "${partition_flags[@]}" \ | ||||
|       --output "$TEST_REPORTS_DIR/${name}_${suite}.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv" | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv" | ||||
|     python benchmarks/dynamo/check_graph_breaks.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv" | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| @ -732,7 +650,7 @@ test_inductor_halide() { | ||||
| } | ||||
|  | ||||
| test_inductor_triton_cpu() { | ||||
|   python test/run_test.py --include inductor/test_triton_cpu_backend.py inductor/test_torchinductor_strided_blocks.py --verbose | ||||
|   python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -762,8 +680,6 @@ test_dynamo_benchmark() { | ||||
|       fi | ||||
|     elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then | ||||
|       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@" | ||||
|     elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then | ||||
|       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@" | ||||
|     else | ||||
|       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@" | ||||
|       test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@" | ||||
| @ -798,7 +714,7 @@ test_inductor_torchbench_smoketest_perf() { | ||||
|       --only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_huggingface_training.csv" | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv" | ||||
|   done | ||||
| } | ||||
|  | ||||
| @ -994,20 +910,10 @@ test_libtorch_api() { | ||||
|   else | ||||
|     # Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy | ||||
|     OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest" | ||||
|  | ||||
|     # On s390x, pytorch is built without llvm. | ||||
|     # Even if it would be built with llvm, llvm currently doesn't support used features on s390x and | ||||
|     # test fails with errors like: | ||||
|     # JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer | ||||
|     # unknown file: Failure | ||||
|     # C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) } | ||||
|     if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then | ||||
|       python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr | ||||
|     fi | ||||
|     python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr | ||||
|   fi | ||||
|  | ||||
|   # quantization is not fully supported on s390x yet | ||||
|   if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* && "${BUILD_ENVIRONMENT}" != *s390x* ]]; then | ||||
|   if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then | ||||
|     # NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR | ||||
|     export CPP_TESTS_DIR="${BUILD_BIN_DIR}" | ||||
|     python test/run_test.py --cpp --verbose -i cpp/static_runtime_test | ||||
| @ -1173,9 +1079,8 @@ build_xla() { | ||||
|   apply_patches | ||||
|   SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|   # These functions are defined in .circleci/common.sh in pytorch/xla repo | ||||
|   retry install_pre_deps_pytorch_xla $XLA_DIR $USE_CACHE | ||||
|   retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE | ||||
|   CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR | ||||
|   retry install_post_deps_pytorch_xla | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -1482,7 +1387,7 @@ test_executorch() { | ||||
|   bash examples/models/llama3_2_vision/install_requirements.sh | ||||
|   # NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch | ||||
|   # from the PR | ||||
|   bash .ci/scripts/setup-linux.sh --build-tool cmake | ||||
|   bash .ci/scripts/setup-linux.sh cmake | ||||
|  | ||||
|   echo "Run ExecuTorch unit tests" | ||||
|   pytest -v -n auto | ||||
| @ -1506,7 +1411,7 @@ test_executorch() { | ||||
| test_linux_aarch64() { | ||||
|   python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \ | ||||
|         test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \ | ||||
|         test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \ | ||||
|         test_foreach test_reductions test_unary_ufuncs \ | ||||
|         --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
|  | ||||
|   # Dynamo tests | ||||
| @ -1574,16 +1479,6 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then | ||||
|   install_torchvision | ||||
|   id=$((SHARD_NUMBER-1)) | ||||
|   test_dynamo_benchmark timm_models "$id" | ||||
| elif [[ "${TEST_CONFIG}" == cachebench ]]; then | ||||
|   install_torchaudio cuda | ||||
|   install_torchvision | ||||
|   checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco | ||||
|   PYTHONPATH=$(pwd)/torchbench test_cachebench | ||||
| elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then | ||||
|   install_torchaudio cpu | ||||
|   install_torchvision | ||||
|   checkout_install_torchbench nanogpt | ||||
|   PYTHONPATH=$(pwd)/torchbench test_verify_cachebench | ||||
| elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then | ||||
|   if [[ "${TEST_CONFIG}" == *cpu* ]]; then | ||||
|     install_torchaudio cpu | ||||
| @ -1639,7 +1534,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then | ||||
|   test_python_shard "$SHARD_NUMBER" | ||||
|   test_aten | ||||
| elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then | ||||
|   test_lazy_tensor_meta_reference_disabled | ||||
|   test_without_numpy | ||||
|   install_torchvision | ||||
|   test_python_shard 1 | ||||
|  | ||||
| @ -1,41 +0,0 @@ | ||||
| r""" | ||||
| It's used to check basic rnn features with cpu-only. | ||||
| For example, it would throw exception if some components are missing | ||||
| """ | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| import torch.optim as optim | ||||
|  | ||||
|  | ||||
| class SimpleCNN(nn.Module): | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self.conv = nn.Conv2d(1, 1, 3) | ||||
|         self.pool = nn.MaxPool2d(2, 2) | ||||
|  | ||||
|     def forward(self, inputs): | ||||
|         output = self.pool(F.relu(self.conv(inputs))) | ||||
|         output = output.view(1) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| try: | ||||
|     # Mock one infer | ||||
|     net = SimpleCNN() | ||||
|     net_inputs = torch.rand((1, 1, 5, 5)) | ||||
|     outputs = net(net_inputs) | ||||
|     print(outputs) | ||||
|  | ||||
|     criterion = nn.MSELoss() | ||||
|     optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.1) | ||||
|  | ||||
|     # Mock one step training | ||||
|     label = torch.full((1,), 1.0, dtype=torch.float) | ||||
|     loss = criterion(outputs, label) | ||||
|     loss.backward() | ||||
|     optimizer.step() | ||||
|  | ||||
| except Exception as e: | ||||
|     print(f"An error occurred: {e}") | ||||
| @ -1,13 +0,0 @@ | ||||
| r""" | ||||
| It's used to check basic rnn features with cpu-only. | ||||
| For example, it would throw exception if missing some components are missing | ||||
| """ | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
|  | ||||
| rnn = nn.RNN(10, 20, 2) | ||||
| inputs = torch.randn(5, 3, 10) | ||||
| h0 = torch.randn(2, 3, 20) | ||||
| output, hn = rnn(inputs, h0) | ||||
| @ -18,9 +18,6 @@ export PYTORCH_FINAL_PACKAGE_DIR="${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-result | ||||
| PYTORCH_FINAL_PACKAGE_DIR_WIN=$(cygpath -w "${PYTORCH_FINAL_PACKAGE_DIR}") | ||||
| export PYTORCH_FINAL_PACKAGE_DIR_WIN | ||||
|  | ||||
| # enable debug asserts in serialization | ||||
| export TORCH_SERIALIZATION_DEBUG=1 | ||||
|  | ||||
| mkdir -p "$TMP_DIR"/build/torch | ||||
|  | ||||
| export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers | ||||
|  | ||||
| @ -1,31 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency ARM Performance Libraries (APL) installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| :: Set download URL for the ARM Performance Libraries (APL) | ||||
| set DOWNLOAD_URL="https://developer.arm.com/-/cdn-downloads/permalink/Arm-Performance-Libraries/Version_24.10/arm-performance-libraries_24.10_Windows.msi" | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\arm-performance-libraries.msi | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading ARM Performance Libraries (APL)... | ||||
| curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL% | ||||
|  | ||||
| :: Install ARM Performance Libraries (APL) | ||||
| echo Installing ARM Performance Libraries (APL)... | ||||
| msiexec /i "%INSTALLER_FILE%" /qn /norestart ACCEPT_EULA=1 INSTALLFOLDER="%DEPENDENCIES_DIR%" | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install ARM Performance Libraries (APL) components. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| :: Add to environment | ||||
| echo ARMPL_DIR=%DEPENDENCIES_DIR%\armpl_24.10\>> %GITHUB_ENV% | ||||
| echo %DEPENDENCIES_DIR%\armpl_24.10\bin\>> %GITHUB_PATH% | ||||
|  | ||||
| echo Dependency ARM Performance Libraries (APL) installation finished. | ||||
| @ -1,41 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency MSVC Build Tools with C++ with ARM64/ARM64EC components installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir "%DOWNLOADS_DIR%" | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir "%DEPENDENCIES_DIR%" | ||||
|  | ||||
| :: Set download URL for the Visual Studio Installer | ||||
| set DOWNLOAD_URL=https://aka.ms/vs/17/release/vs_BuildTools.exe | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\vs_BuildTools.exe | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading Visual Studio Build Tools with C++ installer... | ||||
| curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL% | ||||
|  | ||||
| :: Install the Visual Studio Build Tools with C++ components | ||||
| echo Installing Visual Studio Build Tools with C++ components... | ||||
| echo Installing MSVC %MSVC_VERSION% | ||||
| "%INSTALLER_FILE%" --norestart --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^ | ||||
|     --add Microsoft.VisualStudio.Workload.VCTools ^ | ||||
|     --add Microsoft.VisualStudio.Component.Windows10SDK ^ | ||||
|     --add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.ASAN ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.CMake.Project ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.CoreBuildTools ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.CoreIde ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.Tools.ARM64EC ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^ | ||||
|     --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 | ||||
|  | ||||
| echo exitcode = %errorlevel% | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo Failed to install Visual Studio Build Tools with C++ components. | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| echo Dependency Visual Studio Build Tools with C++ installation finished. | ||||
| @ -1,37 +0,0 @@ | ||||
| :: we need to install newer version of Git manually as "-submodules" function is not supported in the default version of runner. | ||||
|  | ||||
| @echo off | ||||
|  | ||||
| echo Dependency Git installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| :: Set download URL for the Git | ||||
| set DOWNLOAD_URL="https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/Git-2.46.0-64-bit.exe" | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\Git-2.46.0-64-bit.exe | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading Git... | ||||
| curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL% | ||||
|  | ||||
| :: Install Git | ||||
| echo Installing Git... | ||||
| "%INSTALLER_FILE%" /VERYSILENT /DIR="%DEPENDENCIES_DIR%\git" | ||||
|  | ||||
| dir %DEPENDENCIES_DIR%\git | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install Git. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| :: Enable long paths | ||||
| call "%DEPENDENCIES_DIR%\git\cmd\git.exe" config --system core.longpaths true | ||||
|  | ||||
| :: Add to PATH | ||||
| echo %DEPENDENCIES_DIR%\git\cmd\;%DEPENDENCIES_DIR%\git\bin\>> %GITHUB_PATH% | ||||
|  | ||||
| echo Dependency Git installation finished. | ||||
| @ -1,33 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency libuv installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| :: activate visual studio | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
| where cl.exe | ||||
|  | ||||
| cd %DEPENDENCIES_DIR% | ||||
| git clone https://github.com/libuv/libuv.git -b v1.39.0 | ||||
|  | ||||
| echo Configuring libuv... | ||||
| mkdir libuv\build | ||||
| cd libuv\build | ||||
| cmake .. -DBUILD_TESTING=OFF | ||||
|  | ||||
| echo Building libuv... | ||||
| cmake --build . --config Release | ||||
|  | ||||
| echo Installing libuv... | ||||
| cmake --install . --prefix ../install | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install libuv. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| echo Dependency libuv installation finished. | ||||
| @ -1,46 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency OpenBLAS installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| :: activate visual studio | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
| where cl.exe | ||||
|  | ||||
| :: Clone OpenBLAS | ||||
| cd %DEPENDENCIES_DIR% | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 | ||||
|  | ||||
| echo Configuring OpenBLAS... | ||||
| mkdir OpenBLAS\build | ||||
| cd OpenBLAS\build | ||||
| cmake .. -G Ninja ^ | ||||
|   -DBUILD_TESTING=0 ^ | ||||
|   -DBUILD_BENCHMARKS=0 ^ | ||||
|   -DC_LAPACK=1 ^ | ||||
|   -DNOFORTRAN=1 ^ | ||||
|   -DDYNAMIC_ARCH=0 ^ | ||||
|   -DARCH=arm64 ^ | ||||
|   -DBINARY=64 ^ | ||||
|   -DTARGET=GENERIC ^ | ||||
|   -DUSE_OPENMP=1 ^ | ||||
|   -DCMAKE_SYSTEM_PROCESSOR=ARM64 ^ | ||||
|   -DCMAKE_SYSTEM_NAME=Windows ^ | ||||
|   -DCMAKE_BUILD_TYPE=Release | ||||
|  | ||||
| echo Building OpenBLAS... | ||||
| cmake --build . --config Release | ||||
|  | ||||
| echo Installing OpenBLAS... | ||||
| cmake --install . --prefix ../install | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install OpenBLAS. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| echo Dependency OpenBLAS installation finished. | ||||
| @ -1,44 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency Python installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| if "%DESIRED_PYTHON%" == "3.13" ( | ||||
|     echo Python version is set to 3.13 | ||||
|     set DOWNLOAD_URL=https://www.python.org/ftp/python/3.13.2/python-3.13.2-arm64.exe | ||||
| ) else if "%DESIRED_PYTHON%" == "3.12" ( | ||||
|     echo Python version is set to 3.12 | ||||
|     set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe | ||||
| ) else if "%DESIRED_PYTHON%" == "3.11" ( | ||||
|     echo Python version is set to 3.11 | ||||
|     set DOWNLOAD_URL=https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe | ||||
| ) else ( | ||||
|     echo DESIRED_PYTHON not defined, Python version is set to 3.12 | ||||
|     set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe | ||||
| ) | ||||
|  | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\python-installer.exe | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading Python... | ||||
| curl -L -o "%INSTALLER_FILE%" "%DOWNLOAD_URL%" | ||||
|  | ||||
| :: Install Python | ||||
| echo Installing Python... | ||||
| "%INSTALLER_FILE%" /quiet Include_debug=1 TargetDir="%DEPENDENCIES_DIR%\Python" | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install Python. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| :: Add to PATH | ||||
| echo %DEPENDENCIES_DIR%\Python\>> %GITHUB_PATH% | ||||
| echo %DEPENDENCIES_DIR%\Python\scripts\>> %GITHUB_PATH% | ||||
| echo %DEPENDENCIES_DIR%\Python\libs\>> %GITHUB_PATH% | ||||
|  | ||||
| echo Dependency Python installation finished. | ||||
| @ -1,33 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency Rust installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| set DOWNLOAD_URL="https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe" | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\rustup-init.exe | ||||
| set RUSTUP_HOME=%DEPENDENCIES_DIR%\rust | ||||
| set CARGO_HOME=%DEPENDENCIES_DIR%\cargo | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading Rust... | ||||
| curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL% | ||||
|  | ||||
| :: Install APL | ||||
| echo Installing Rust... | ||||
| "%INSTALLER_FILE%" -q -y --default-host aarch64-pc-windows-msvc --default-toolchain stable --profile default | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install Rust. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| :: Add to PATH | ||||
| echo %DEPENDENCIES_DIR%\cargo\bin\>> %GITHUB_PATH% | ||||
| echo RUSTUP_HOME=%DEPENDENCIES_DIR%\rust>> %GITHUB_ENV% | ||||
| echo CARGO_HOME=%DEPENDENCIES_DIR%\cargo>> %GITHUB_ENV% | ||||
|  | ||||
| echo Dependency Rust installation finished. | ||||
| @ -1,33 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| echo Dependency sccache installation started. | ||||
|  | ||||
| :: Pre-check for downloads and dependencies folders | ||||
| if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR% | ||||
| if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR% | ||||
|  | ||||
| :: Set download URL for the sccache | ||||
| set DOWNLOAD_URL="https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-pc-windows-msvc.zip" | ||||
| set INSTALLER_FILE=%DOWNLOADS_DIR%\sccache.zip | ||||
|  | ||||
| :: Download installer | ||||
| echo Downloading sccache.zip... | ||||
| curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL% | ||||
|  | ||||
| :: Install sccache | ||||
| echo Extracting sccache.zip... | ||||
| tar -xf "%INSTALLER_FILE%" -C %DEPENDENCIES_DIR% | ||||
| cd %DEPENDENCIES_DIR% | ||||
| ren sccache-v0.8.1-x86_64-pc-windows-msvc sccache | ||||
| cd .. | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed to install sccache. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| :: Add to PATH | ||||
| echo %DEPENDENCIES_DIR%\sccache\>> %GITHUB_PATH% | ||||
|  | ||||
| echo Dependency sccache installation finished. | ||||
| @ -1,22 +0,0 @@ | ||||
| :: change to source directory | ||||
| cd %PYTORCH_ROOT% | ||||
|  | ||||
| :: activate visual studio | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
| where cl.exe | ||||
|  | ||||
| :: create virtual environment | ||||
| python -m venv .venv | ||||
| echo * > .venv\.gitignore | ||||
| call .\.venv\Scripts\activate | ||||
| where python | ||||
|  | ||||
| :: install dependencies | ||||
| python -m pip install --upgrade pip | ||||
| pip install -r requirements.txt | ||||
| pip install pytest numpy protobuf expecttest hypothesis | ||||
|  | ||||
| :: find file name for pytorch wheel | ||||
| for /f "delims=" %%f in ('dir /b "%PYTORCH_FINAL_PACKAGE_DIR%" ^| findstr "torch-"') do set "TORCH_WHEEL_FILENAME=%PYTORCH_FINAL_PACKAGE_DIR%\%%f" | ||||
|  | ||||
| pip install %TORCH_WHEEL_FILENAME% | ||||
| @ -1,101 +0,0 @@ | ||||
| @echo on | ||||
|  | ||||
| :: environment variables | ||||
| set CMAKE_BUILD_TYPE=%BUILD_TYPE% | ||||
| set CMAKE_C_COMPILER_LAUNCHER=sccache | ||||
| set CMAKE_CXX_COMPILER_LAUNCHER=sccache | ||||
| set libuv_ROOT=%DEPENDENCIES_DIR%\libuv\install | ||||
| set MSSdk=1 | ||||
| if defined PYTORCH_BUILD_VERSION ( | ||||
|   set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION% | ||||
|   set PYTORCH_BUILD_NUMBER=1 | ||||
| ) | ||||
|  | ||||
| :: Set BLAS type | ||||
| if %ENABLE_APL% == 1 ( | ||||
|     set BLAS=APL | ||||
|     set USE_LAPACK=1 | ||||
| ) else if %ENABLE_OPENBLAS% == 1 ( | ||||
|     set BLAS=OpenBLAS | ||||
|     set OpenBLAS_HOME=%DEPENDENCIES_DIR%\OpenBLAS\install | ||||
| ) | ||||
|  | ||||
| :: activate visual studio | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
| where cl.exe | ||||
|  | ||||
| :: change to source directory | ||||
| cd %PYTORCH_ROOT% | ||||
|  | ||||
| :: copy libuv.dll | ||||
| copy %libuv_ROOT%\lib\Release\uv.dll torch\lib\uv.dll | ||||
|  | ||||
| :: create virtual environment | ||||
| python -m venv .venv | ||||
| echo * > .venv\.gitignore | ||||
| call .\.venv\Scripts\activate | ||||
| where python | ||||
|  | ||||
| :: python install dependencies | ||||
| python -m pip install --upgrade pip | ||||
| pip install -r requirements.txt | ||||
| :: DISTUTILS_USE_SDK should be set after psutil dependency | ||||
| set DISTUTILS_USE_SDK=1 | ||||
|  | ||||
| :: start sccache server and reset sccache stats | ||||
| sccache --start-server | ||||
| sccache --zero-stats | ||||
| sccache --show-stats | ||||
|   | ||||
| :: Prepare the environment | ||||
| mkdir libtorch | ||||
| mkdir libtorch\bin | ||||
| mkdir libtorch\cmake | ||||
| mkdir libtorch\include | ||||
| mkdir libtorch\lib | ||||
| mkdir libtorch\share | ||||
| mkdir libtorch\test | ||||
|  | ||||
| :: Call LibTorch build script | ||||
| python ./tools/build_libtorch.py | ||||
|  | ||||
| :: Check if there is an error | ||||
| IF ERRORLEVEL 1 exit /b 1 | ||||
| IF NOT ERRORLEVEL 0 exit /b 1 | ||||
|   | ||||
| :: Move the files to the correct location | ||||
| move /Y torch\bin\*.* libtorch\bin\ | ||||
| move /Y torch\cmake\*.* libtorch\cmake\ | ||||
| robocopy /move /e torch\include\ libtorch\include\ | ||||
| move /Y torch\lib\*.* libtorch\lib\ | ||||
| robocopy /move /e torch\share\ libtorch\share\ | ||||
| move /Y torch\test\*.* libtorch\test\ | ||||
| move /Y libtorch\bin\*.dll libtorch\lib\ | ||||
|  | ||||
| :: Set version | ||||
| echo %PYTORCH_BUILD_VERSION% > libtorch\build-version | ||||
| git rev-parse HEAD > libtorch\build-hash | ||||
|  | ||||
| :: Set LIBTORCH_PREFIX | ||||
| IF "%DEBUG%" == "" ( | ||||
|     set LIBTORCH_PREFIX=libtorch-win-arm64-shared-with-deps | ||||
| ) ELSE ( | ||||
|     set LIBTORCH_PREFIX=libtorch-win-arm64-shared-with-deps-debug | ||||
| ) | ||||
|  | ||||
| :: Create output | ||||
| C:\Windows\System32\tar.exe -cvaf %LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip -C libtorch * | ||||
|  | ||||
| :: Copy output to target directory | ||||
| if not exist ..\output mkdir ..\output | ||||
| copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_DIR%\" | ||||
| copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_DIR%\%LIBTORCH_PREFIX%-latest.zip" | ||||
|  | ||||
| :: Cleanup raw data to save space | ||||
| rmdir /s /q libtorch | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed on build_libtorch. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
| @ -1,60 +0,0 @@ | ||||
| @echo on | ||||
|  | ||||
| :: environment variables | ||||
| set CMAKE_BUILD_TYPE=%BUILD_TYPE% | ||||
| set CMAKE_C_COMPILER_LAUNCHER=sccache | ||||
| set CMAKE_CXX_COMPILER_LAUNCHER=sccache | ||||
| set libuv_ROOT=%DEPENDENCIES_DIR%\libuv\install | ||||
| set MSSdk=1 | ||||
| if defined PYTORCH_BUILD_VERSION ( | ||||
|   set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION% | ||||
|   set PYTORCH_BUILD_NUMBER=1 | ||||
| ) | ||||
|  | ||||
| :: Set BLAS type | ||||
| if %ENABLE_APL% == 1 ( | ||||
|     set BLAS=APL | ||||
|     set USE_LAPACK=1 | ||||
| ) else if %ENABLE_OPENBLAS% == 1 ( | ||||
|     set BLAS=OpenBLAS | ||||
|     set OpenBLAS_HOME=%DEPENDENCIES_DIR%\OpenBLAS\install | ||||
| ) | ||||
|  | ||||
| :: activate visual studio | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
| where cl.exe | ||||
|  | ||||
| :: change to source directory | ||||
| cd %PYTORCH_ROOT% | ||||
|  | ||||
| :: copy libuv.dll | ||||
| copy %libuv_ROOT%\lib\Release\uv.dll torch\lib\uv.dll | ||||
|  | ||||
| :: create virtual environment | ||||
| python -m venv .venv | ||||
| echo * > .venv\.gitignore | ||||
| call .\.venv\Scripts\activate | ||||
| where python | ||||
|  | ||||
| :: python install dependencies | ||||
| python -m pip install --upgrade pip | ||||
| pip install -r requirements.txt | ||||
| :: DISTUTILS_USE_SDK should be set after psutil dependency | ||||
| set DISTUTILS_USE_SDK=1 | ||||
|  | ||||
| :: start sccache server and reset sccache stats | ||||
| sccache --start-server | ||||
| sccache --zero-stats | ||||
| sccache --show-stats | ||||
|  | ||||
| :: Call PyTorch build script | ||||
| python setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%" | ||||
|  | ||||
| :: show sccache stats | ||||
| sccache --show-stats | ||||
|  | ||||
| :: Check if installation was successful | ||||
| if %errorlevel% neq 0 ( | ||||
|     echo "Failed on build_pytorch. (exitcode = %errorlevel%)" | ||||
|     exit /b 1 | ||||
| ) | ||||
| @ -1,49 +0,0 @@ | ||||
| @echo off | ||||
| setlocal | ||||
|  | ||||
| if "%PACKAGE_TYPE%" == "wheel" goto wheel | ||||
| if "%PACKAGE_TYPE%" == "libtorch" goto libtorch | ||||
|  | ||||
| echo "unknown package type" | ||||
| exit /b 1 | ||||
|  | ||||
| :wheel | ||||
| call %PYTORCH_ROOT%\.ci\pytorch\windows\arm64\bootstrap_tests.bat | ||||
|  | ||||
| echo Running python rnn_smoke.py... | ||||
| python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke_win_arm64.py | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| echo Checking that basic CNN works... | ||||
| python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\cnn_smoke_win_arm64.py | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| goto end | ||||
|  | ||||
| :libtorch | ||||
| echo "install and test libtorch" | ||||
|  | ||||
| if not exist tmp mkdir tmp | ||||
|  | ||||
| for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do C:\Windows\System32\tar.exe -xf "%%i" -C tmp | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| pushd tmp | ||||
|  | ||||
| set VC_VERSION_LOWER=14 | ||||
| set VC_VERSION_UPPER=36 | ||||
|  | ||||
| call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64 | ||||
|  | ||||
| set install_root=%CD% | ||||
| set INCLUDE=%INCLUDE%;%install_root%\include;%install_root%\include\torch\csrc\api\include | ||||
| set LIB=%LIB%;%install_root%\lib | ||||
| set PATH=%PATH%;%install_root%\lib | ||||
|  | ||||
| cl %PYTORCH_ROOT%\.ci\pytorch\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc /std:c++17 | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| .\simple-torch-test.exe | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| :end | ||||
| @ -9,13 +9,12 @@ FOR %%v IN (%DESIRED_PYTHON%) DO ( | ||||
|     set PYTHON_VERSION_STR=%%v | ||||
|     set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! | ||||
|     conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s | ||||
|     if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y numpy=2.0.1 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1  boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1  boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1  boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.13" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.1.2  boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.13t" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.1.2 boto3 cmake ninja typing_extensions setuptools=72.1.0 python-freethreading python=3.13 | ||||
|     call conda run -n py!PYTHON_VERSION_STR! pip install pyyaml | ||||
|     if "%%v" == "3.8" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=2.0.1 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=2.0.1 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=2.0.1 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=2.0.1 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     if "%%v" == "3.13" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=2.1.2 pyyaml boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v | ||||
|     call conda run -n py!PYTHON_VERSION_STR! pip install mkl-include | ||||
|     call conda run -n py!PYTHON_VERSION_STR! pip install mkl-static | ||||
| ) | ||||
|  | ||||
| @ -1,59 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| set MODULE_NAME=pytorch | ||||
|  | ||||
| IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( | ||||
|     call internal\clone.bat | ||||
|     cd %~dp0 | ||||
| ) ELSE ( | ||||
|     call internal\clean.bat | ||||
| ) | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| call internal\check_deps.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| REM Check for optional components | ||||
|  | ||||
| set USE_CUDA= | ||||
| set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 | ||||
|  | ||||
| IF "%NVTOOLSEXT_PATH%"=="" ( | ||||
|     IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib"  ( | ||||
|         set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt | ||||
|     ) ELSE ( | ||||
|         echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing | ||||
|         exit /b 1 | ||||
|     ) | ||||
| ) | ||||
|  | ||||
| IF "%CUDA_PATH_V128%"=="" ( | ||||
|     IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\nvcc.exe" ( | ||||
|         set "CUDA_PATH_V128=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | ||||
|     ) ELSE ( | ||||
|         echo CUDA 12.8 not found, failing | ||||
|         exit /b 1 | ||||
|     ) | ||||
| ) | ||||
|  | ||||
| IF "%BUILD_VISION%" == "" ( | ||||
|     set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0 | ||||
|     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all | ||||
| ) ELSE ( | ||||
|     set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120 | ||||
| ) | ||||
|  | ||||
| set "CUDA_PATH=%CUDA_PATH_V128%" | ||||
| set "PATH=%CUDA_PATH_V128%\bin;%PATH%" | ||||
|  | ||||
| :optcheck | ||||
|  | ||||
| call internal\check_opts.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\.. | ||||
| call  %~dp0\internal\copy.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| call  %~dp0\internal\setup.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
| @ -9,8 +9,7 @@ if "%CUDA_VERSION%" == "xpu" ( | ||||
|     exit /b 0 | ||||
| ) | ||||
|  | ||||
| set SRC_DIR=%~dp0\.. | ||||
|  | ||||
| set SRC_DIR=%NIGHTLIES_PYTORCH_ROOT% | ||||
| if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" | ||||
|  | ||||
| set /a CUDA_VER=%CUDA_VERSION% | ||||
| @ -24,9 +23,9 @@ set CUDNN_LIB_FOLDER="lib\x64" | ||||
| if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" goto set_cuda_env_vars | ||||
|  | ||||
| if %CUDA_VER% EQU 118 goto cuda118 | ||||
| if %CUDA_VER% EQU 121 goto cuda121 | ||||
| if %CUDA_VER% EQU 124 goto cuda124 | ||||
| if %CUDA_VER% EQU 126 goto cuda126 | ||||
| if %CUDA_VER% EQU 128 goto cuda128 | ||||
|  | ||||
| echo CUDA %CUDA_VERSION_STR% is not supported | ||||
| exit /b 1 | ||||
| @ -112,33 +111,6 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" | ||||
|  | ||||
| goto cuda_common | ||||
|  | ||||
| :cuda128 | ||||
|  | ||||
| set CUDA_INSTALL_EXE=cuda_12.8.0_571.96_windows.exe | ||||
| if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( | ||||
|     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" | ||||
|     if errorlevel 1 exit /b 1 | ||||
|     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" | ||||
|     set "ARGS=cuda_profiler_api_12.8 thrust_12.8 nvcc_12.8 cuobjdump_12.8 nvprune_12.8 nvprof_12.8 cupti_12.8 cublas_12.8 cublas_dev_12.8 cudart_12.8 cufft_12.8 cufft_dev_12.8 curand_12.8 curand_dev_12.8 cusolver_12.8 cusolver_dev_12.8 cusparse_12.8 cusparse_dev_12.8 npp_12.8 npp_dev_12.8 nvrtc_12.8 nvrtc_dev_12.8 nvml_dev_12.8 nvjitlink_12.8 nvtx_12.8" | ||||
| ) | ||||
|  | ||||
| set CUDNN_FOLDER=cudnn-windows-x86_64-9.7.0.66_cuda12-archive | ||||
| set CUDNN_LIB_FOLDER="lib" | ||||
| set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" | ||||
| if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( | ||||
|     curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" | ||||
|     if errorlevel 1 exit /b 1 | ||||
|     set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" | ||||
| ) | ||||
|  | ||||
| @REM cuDNN 8.3+ required zlib to be installed on the path | ||||
| echo Installing ZLIB dlls | ||||
| curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip" | ||||
| 7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib" | ||||
| xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" | ||||
|  | ||||
| goto cuda_common | ||||
|  | ||||
| :cuda_common | ||||
| :: NOTE: We only install CUDA if we don't have it installed already. | ||||
| :: With GHA runners these should be pre-installed as part of our AMI process | ||||
|  | ||||
| @ -27,6 +27,7 @@ for /F "delims=" %%i in ('wmic path win32_VideoController get name') do ( | ||||
| endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS% | ||||
|  | ||||
| if "%PACKAGE_TYPE%" == "wheel" goto wheel | ||||
| if "%PACKAGE_TYPE%" == "conda" goto conda | ||||
| if "%PACKAGE_TYPE%" == "libtorch" goto libtorch | ||||
|  | ||||
| echo "unknown package type" | ||||
| @ -36,7 +37,6 @@ exit /b 1 | ||||
| echo "install wheel package" | ||||
|  | ||||
| set PYTHON_INSTALLER_URL= | ||||
| if "%DESIRED_PYTHON%" == "3.13t" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe" | ||||
| if "%DESIRED_PYTHON%" == "3.13" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe" | ||||
| if "%DESIRED_PYTHON%" == "3.12" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.12.0/python-3.12.0-amd64.exe" | ||||
| if "%DESIRED_PYTHON%" == "3.11" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.11.0/python-3.11.0-amd64.exe" | ||||
| @ -47,13 +47,6 @@ if "%PYTHON_INSTALLER_URL%" == "" ( | ||||
|     echo Python %DESIRED_PYTHON% not supported yet | ||||
| ) | ||||
|  | ||||
| set ADDITIONAL_OPTIONS="" | ||||
| set PYTHON_EXEC="python" | ||||
| if "%DESIRED_PYTHON%" == "3.13t" ( | ||||
|     set ADDITIONAL_OPTIONS="Include_freethreaded=1" | ||||
|     set PYTHON_EXEC="python3.13t" | ||||
| ) | ||||
|  | ||||
| del python-amd64.exe | ||||
| curl --retry 3 -kL "%PYTHON_INSTALLER_URL%" --output python-amd64.exe | ||||
| if errorlevel 1 exit /b 1 | ||||
| @ -62,39 +55,85 @@ if errorlevel 1 exit /b 1 | ||||
| :: the installed Python to PATH system-wide. Even calling set PATH=%ORIG_PATH% later on won't make | ||||
| :: a change. As the builder directory will be removed after the smoke test, all subsequent non-binary | ||||
| :: jobs will fail to find any Python executable there | ||||
| start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_test=0 %ADDITIONAL_OPTIONS% TargetDir=%CD%\Python | ||||
| start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_test=0 TargetDir=%CD%\Python | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%" | ||||
| if "%DESIRED_PYTHON%" == "3.13t" %PYTHON_EXEC% -m pip install --pre numpy==2.2.1 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install --pre numpy==2.1.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.11" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.10" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf networkx | ||||
|  | ||||
| if "%DESIRED_PYTHON%" == "3.13" pip install -q --pre numpy==2.1.0 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.12" pip install -q --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.11" pip install -q --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.10" pip install -q --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.9" pip install -q --pre numpy==2.0.2 protobuf | ||||
| if "%DESIRED_PYTHON%" == "3.8" pip install -q numpy protobuf | ||||
|  | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| if "%PYTORCH_BUILD_VERSION:dev=%" NEQ "%PYTORCH_BUILD_VERSION%" ( | ||||
|     set "CHANNEL=nightly" | ||||
| ) else ( | ||||
|     set "CHANNEL=test" | ||||
| ) | ||||
|  | ||||
| set "EXTRA_INDEX= " | ||||
| if "%CUDA_VERSION%" == "xpu" set "EXTRA_INDEX=--index-url https://download.pytorch.org/whl/%CHANNEL%/xpu" | ||||
|  | ||||
| for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do %PYTHON_EXEC% -m pip install "%%i" %EXTRA_INDEX% | ||||
| for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i" | ||||
| if errorlevel 1 exit /b 1 | ||||
|  | ||||
| goto smoke_test | ||||
|  | ||||
| :conda | ||||
| echo "install conda package" | ||||
|  | ||||
| :: Install Miniconda3 | ||||
| set "CONDA_HOME=%CD%\conda" | ||||
| set "tmp_conda=%CONDA_HOME%" | ||||
| set "miniconda_exe=%CD%\miniconda.exe" | ||||
| set "CONDA_EXTRA_ARGS=cpuonly -c pytorch-nightly" | ||||
| if "%CUDA_VERSION%" == "118" ( | ||||
|     set "CONDA_EXTRA_ARGS=pytorch-cuda=11.8 -c nvidia -c pytorch-nightly" | ||||
| ) | ||||
| if "%CUDA_VERSION%" == "121" ( | ||||
|     set "CONDA_EXTRA_ARGS=pytorch-cuda=12.1 -c nvidia -c pytorch-nightly" | ||||
| ) | ||||
| if "%CUDA_VERSION%" == "124" ( | ||||
|     set "CONDA_EXTRA_ARGS=pytorch-cuda=12.4 -c nvidia -c pytorch-nightly" | ||||
| ) | ||||
| if "%CUDA_VERSION%" == "126" ( | ||||
|     set "CONDA_EXTRA_ARGS=pytorch-cuda=12.6 -c nvidia -c pytorch-nightly" | ||||
| ) | ||||
|  | ||||
| rmdir /s /q conda | ||||
| del miniconda.exe | ||||
| curl -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" | ||||
| start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" | ||||
|  | ||||
| conda create -qyn testenv python=%DESIRED_PYTHON% | ||||
| if errorlevel 1 exit /b 1 | ||||
| call conda install -yq conda-build | ||||
| if errorlevel 1 exit /b 1 | ||||
| call %CONDA_HOME%\condabin\activate.bat testenv | ||||
| if errorlevel 1 exit /b 1 | ||||
| set "NO_ARCH_PATH=%PYTORCH_FINAL_PACKAGE_DIR:/=\%\noarch" | ||||
| mkdir %NO_ARCH_PATH% | ||||
| for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *') do xcopy "%%i" %NO_ARCH_PATH% /Y | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
| call conda index %PYTORCH_FINAL_PACKAGE_DIR% | ||||
| if errorlevel 1 exit /b 1 | ||||
| call conda install -yq -c "file:///%PYTORCH_FINAL_PACKAGE_DIR%" pytorch==%PYTORCH_BUILD_VERSION% -c pytorch -c numba/label/dev -c nvidia | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
| call conda install -yq numpy | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| set /a CUDA_VER=%CUDA_VERSION% | ||||
| set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1% | ||||
| set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1% | ||||
| set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% | ||||
|  | ||||
| :: Install package we just build | ||||
|  | ||||
|  | ||||
| :smoke_test | ||||
| %PYTHON_EXEC% -c "import torch" | ||||
| python -c "import torch" | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| echo Checking that MKL is available | ||||
| %PYTHON_EXEC% -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" | ||||
| python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| if "%NVIDIA_GPU_EXISTS%" == "0" ( | ||||
| @ -103,24 +142,24 @@ if "%NVIDIA_GPU_EXISTS%" == "0" ( | ||||
| ) | ||||
|  | ||||
| echo Checking that CUDA archs are setup correctly | ||||
| %PYTHON_EXEC% -c "import torch; torch.randn([3,5]).cuda()" | ||||
| python -c "import torch; torch.randn([3,5]).cuda()" | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| echo Checking that magma is available | ||||
| %PYTHON_EXEC% -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" | ||||
| python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| echo Checking that CuDNN is available | ||||
| %PYTHON_EXEC% -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" | ||||
| python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| echo Checking that basic RNN works | ||||
| %PYTHON_EXEC% %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke.py | ||||
| python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke.py | ||||
|  | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| echo Checking that basic CNN works | ||||
| %PYTHON_EXEC% %PYTORCH_ROOT%\.ci\pytorch\test_example_code\cnn_smoke.py | ||||
| python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\cnn_smoke.py | ||||
| if ERRORLEVEL 1 exit /b 1 | ||||
|  | ||||
| goto end | ||||
|  | ||||
| @ -47,9 +47,9 @@ set XPU_EXTRA_INSTALLED=0 | ||||
| set XPU_EXTRA_UNINSTALL=0 | ||||
|  | ||||
| if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] ( | ||||
|     set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe | ||||
|     set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe | ||||
|     set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product | ||||
|     set XPU_BUNDLE_VERSION=2025.0.1+20 | ||||
|     set XPU_BUNDLE_VERSION=2025.0.0+335 | ||||
|     set XPU_BUNDLE_INSTALLED=0 | ||||
|     set XPU_BUNDLE_UNINSTALL=0 | ||||
|     set XPU_EXTRA_URL=NULL | ||||
| @ -104,6 +104,14 @@ goto xpu_install_end | ||||
|  | ||||
| :xpu_bundle_install | ||||
|  | ||||
| :: Install Level Zero SDK | ||||
| set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip | ||||
| curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip" | ||||
| echo "Installing level zero SDK..." | ||||
| 7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero" | ||||
| set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%" | ||||
|  | ||||
| :: Install Bundle | ||||
| curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL% | ||||
| echo "XPU Bundle installing..." | ||||
| start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle | ||||
| @ -120,14 +128,3 @@ if errorlevel 1 exit /b 1 | ||||
| del xpu_extra.exe | ||||
|  | ||||
| :xpu_install_end | ||||
|  | ||||
| if not "%XPU_ENABLE_KINETO%"=="1" goto install_end | ||||
| :: Install Level Zero SDK | ||||
| set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip | ||||
| curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip" | ||||
| echo "Installing level zero SDK..." | ||||
| 7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero" | ||||
| set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%" | ||||
| del "%SRC_DIR%\temp_build\level_zero_sdk.zip" | ||||
|  | ||||
| :install_end | ||||
|  | ||||
| @ -28,6 +28,11 @@ call "%XPU_BUNDLE_ROOT%\compiler\latest\env\vars.bat" | ||||
| call "%XPU_BUNDLE_ROOT%\ocloc\latest\env\vars.bat" | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| :: Workaround for https://github.com/pytorch/pytorch/issues/134989 | ||||
| set CMAKE_SHARED_LINKER_FLAGS=/FORCE:MULTIPLE | ||||
| set CMAKE_MODULE_LINKER_FLAGS=/FORCE:MULTIPLE | ||||
| set CMAKE_EXE_LINKER_FLAGS=/FORCE:MULTIPLE | ||||
|  | ||||
| if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\.. | ||||
| call %~dp0\internal\copy_cpu.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| @ -130,19 +130,7 @@ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | ||||
| SETUPTOOLS_PINNED_VERSION="=46.0.0" | ||||
| PYYAML_PINNED_VERSION="=5.3" | ||||
| EXTRA_CONDA_INSTALL_FLAGS="" | ||||
| CONDA_ENV_CREATE_FLAGS="" | ||||
| RENAME_WHEEL=true | ||||
| case $desired_python in | ||||
|     3.13t) | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=68.0.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="=2.1.0" | ||||
|         CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|         desired_python="3.13" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13) | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=68.0.0" | ||||
| @ -181,15 +169,18 @@ esac | ||||
|  | ||||
| # Install into a fresh env | ||||
| tmp_env_name="wheel_py$python_nodot" | ||||
| conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} | ||||
| conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" | ||||
| source activate "$tmp_env_name" | ||||
|  | ||||
| pip install "numpy=${NUMPY_PINNED_VERSION}"  "pyyaml${PYYAML_PINNED_VERSION}" requests ninja "setuptools${SETUPTOOLS_PINNED_VERSION}" typing_extensions | ||||
| retry pip install -r "${pytorch_rootdir}/requirements.txt" || true | ||||
| retry brew install libomp | ||||
| pip install -q "numpy=${NUMPY_PINNED_VERSION}"  "pyyaml${PYYAML_PINNED_VERSION}" requests | ||||
| retry pip install -qr "${pytorch_rootdir}/requirements.txt" || true | ||||
| # TODO : Remove me later (but in the interim, use Anaconda cmake, to find Anaconda installed OpenMP) | ||||
| retry pip uninstall -y cmake | ||||
| retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq  llvm-openmp=14.0.6 cmake ninja "setuptools${SETUPTOOLS_PINNED_VERSION}" typing_extensions | ||||
|  | ||||
| # For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule | ||||
| # For USE_DISTRIBUTED=1 on macOS, need libuv and pkg-config to find libuv. | ||||
| export USE_DISTRIBUTED=1 | ||||
| retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq libuv pkg-config | ||||
|  | ||||
| if [[ -n "$CROSS_COMPILE_ARM64" ]]; then | ||||
|     export CMAKE_OSX_ARCHITECTURES=arm64 | ||||
| @ -231,13 +222,10 @@ echo "The wheel is in $(find $whl_tmp_dir -name '*.whl')" | ||||
| wheel_filename_gen=$(find $whl_tmp_dir -name '*.whl' | head -n1 | xargs -I {} basename {}) | ||||
| popd | ||||
|  | ||||
| if [[ -z "$BUILD_PYTHONLESS" && $RENAME_WHEEL == true  ]]; then | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|     # Copy the whl to a final destination before tests are run | ||||
|     echo "Renaming Wheel file: $wheel_filename_gen to $wheel_filename_new" | ||||
|     cp "$whl_tmp_dir/$wheel_filename_gen" "$PYTORCH_FINAL_PACKAGE_DIR/$wheel_filename_new" | ||||
| elif [[ $RENAME_WHEEL == false ]]; then | ||||
|     echo "Copying Wheel file: $wheel_filename_gen to $PYTORCH_FINAL_PACKAGE_DIR" | ||||
|     cp "$whl_tmp_dir/$wheel_filename_gen" "$PYTORCH_FINAL_PACKAGE_DIR/$wheel_filename_gen" | ||||
| else | ||||
|     pushd "$pytorch_rootdir" | ||||
|  | ||||
|  | ||||
| @ -30,10 +30,12 @@ fi | ||||
| # Pick docker image | ||||
| export DOCKER_IMAGE=${DOCKER_IMAGE:-} | ||||
| if [[ -z "$DOCKER_IMAGE" ]]; then | ||||
|   if [[ "$DESIRED_CUDA" == cpu ]]; then | ||||
|     export DOCKER_IMAGE="pytorch/manylinux2_28:cpu" | ||||
|   if [[ "$PACKAGE_TYPE" == conda ]]; then | ||||
|     export DOCKER_IMAGE="pytorch/conda-cuda" | ||||
|   elif [[ "$DESIRED_CUDA" == cpu ]]; then | ||||
|     export DOCKER_IMAGE="pytorch/manylinux:cpu" | ||||
|   else | ||||
|     export DOCKER_IMAGE="pytorch/manylinux2_28-builder:${DESIRED_CUDA:2}" | ||||
|     export DOCKER_IMAGE="pytorch/manylinux-builder:${DESIRED_CUDA:2}" | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| @ -61,7 +63,7 @@ if tagged_version >/dev/null; then | ||||
|   # Turns tag v1.6.0-rc1 -> v1.6.0 | ||||
|   BASE_BUILD_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')" | ||||
| fi | ||||
| if [[ "$(uname)" == 'Darwin' ]]; then | ||||
| if [[ "$(uname)" == 'Darwin' ]] || [[ "$PACKAGE_TYPE" == conda ]]; then | ||||
|   export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}" | ||||
| else | ||||
|   export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}+$DESIRED_CUDA" | ||||
| @ -74,12 +76,6 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt) | ||||
|  | ||||
| # Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT | ||||
| TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|  | ||||
| # CUDA 12.8 builds have triton for Linux and Linux aarch64 binaries. | ||||
| if [[ "$DESIRED_CUDA" == cu128 ]]; then | ||||
|   TRITON_CONSTRAINT="platform_system == 'Linux'" | ||||
| fi | ||||
|  | ||||
| if [[ "$PACKAGE_TYPE" =~ .*wheel.* &&  -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then | ||||
|   TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" | ||||
|   if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then | ||||
| @ -104,11 +100,11 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B | ||||
| fi | ||||
|  | ||||
| # Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package | ||||
| if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then | ||||
|     TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}" | ||||
| if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then | ||||
|     TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" | ||||
|     if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then | ||||
|         TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt) | ||||
|         TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}" | ||||
|         TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" | ||||
|     fi | ||||
|     if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then | ||||
|         export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" | ||||
| @ -153,6 +149,8 @@ export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS: | ||||
|  | ||||
| # TODO: We don't need this anymore IIUC | ||||
| export TORCH_PACKAGE_NAME='torch' | ||||
| export TORCH_CONDA_BUILD_FOLDER='pytorch-nightly' | ||||
| export ANACONDA_USER='pytorch' | ||||
|  | ||||
| export USE_FBGEMM=1 | ||||
| export PIP_UPLOAD_FOLDER="$PIP_UPLOAD_FOLDER" | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
|  | ||||
| set -euo pipefail | ||||
|  | ||||
| PACKAGE_TYPE=${PACKAGE_TYPE:-wheel} | ||||
| PACKAGE_TYPE=${PACKAGE_TYPE:-conda} | ||||
|  | ||||
| PKG_DIR=${PKG_DIR:-/tmp/workspace/final_pkgs} | ||||
|  | ||||
| @ -18,8 +18,10 @@ BUILD_NAME=${BUILD_NAME:-} | ||||
|  | ||||
| DRY_RUN=${DRY_RUN:-enabled} | ||||
| # Don't actually do work unless explicit | ||||
| ANACONDA="true anaconda" | ||||
| AWS_S3_CP="aws s3 cp --dryrun" | ||||
| if [[ "${DRY_RUN}" = "disabled" ]]; then | ||||
|   ANACONDA="anaconda" | ||||
|   AWS_S3_CP="aws s3 cp" | ||||
| fi | ||||
|  | ||||
| @ -32,6 +34,10 @@ if [[ ${BUILD_NAME} == *-full* ]]; then | ||||
|   UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full" | ||||
| fi | ||||
|  | ||||
| # Sleep 2 minutes between retries for conda upload | ||||
| retry () { | ||||
|   "$@"  || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") | ||||
| } | ||||
|  | ||||
| do_backup() { | ||||
|   local backup_dir | ||||
| @ -43,33 +49,72 @@ do_backup() { | ||||
|   ) | ||||
| } | ||||
|  | ||||
| conda_upload() { | ||||
|   ( | ||||
|     set -x | ||||
|     retry \ | ||||
|     ${ANACONDA} \ | ||||
|     upload  \ | ||||
|     ${PKG_DIR}/*.tar.bz2 \ | ||||
|     -u "pytorch-${UPLOAD_CHANNEL}" \ | ||||
|     --label main \ | ||||
|     --no-progress \ | ||||
|     --force | ||||
|   ) | ||||
| } | ||||
|  | ||||
| s3_upload() { | ||||
|   local extension | ||||
|   local pkg_type | ||||
|   extension="$1" | ||||
|   pkg_type="$2" | ||||
|   s3_root_dir="${UPLOAD_BUCKET}/${pkg_type}/${UPLOAD_CHANNEL}" | ||||
|   s3_key_prefix="${pkg_type}/${UPLOAD_CHANNEL}" | ||||
|   if [[ -z ${UPLOAD_SUBFOLDER:-} ]]; then | ||||
|     s3_upload_dir="${s3_root_dir}/" | ||||
|     s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/" | ||||
|   else | ||||
|     s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/" | ||||
|     s3_key_prefix="${s3_key_prefix}/${UPLOAD_SUBFOLDER}" | ||||
|     s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/" | ||||
|   fi | ||||
|   ( | ||||
|     for pkg in ${PKG_DIR}/*.${extension}; do | ||||
|       ( | ||||
|         set -x | ||||
|         shm_id=$(sha256sum "${pkg}" | awk '{print $1}') | ||||
|         ${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \ | ||||
|           --metadata "checksum-sha256=${shm_id}" | ||||
|         ${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" | ||||
|         if [[ ${pkg_type} == "whl" ]]; then | ||||
|           dry_run_arg="--dry-run" | ||||
|           if [[ "${DRY_RUN}" = "disabled" ]]; then | ||||
|             dry_run_arg="" | ||||
|           fi | ||||
|           uv run scripts/release/upload_metadata_file.py \ | ||||
|             --package "${pkg}" \ | ||||
|             --bucket "${UPLOAD_BUCKET}" \ | ||||
|             --key-prefix "${s3_key_prefix}" \ | ||||
|             ${dry_run_arg} | ||||
|         fi | ||||
|       ) | ||||
|     done | ||||
|   ) | ||||
| } | ||||
|  | ||||
| # Install dependencies (should be a no-op if previously installed) | ||||
| conda install -yq anaconda-client | ||||
| pip install -q awscli uv | ||||
|  | ||||
| case "${PACKAGE_TYPE}" in | ||||
|   conda) | ||||
|     conda_upload | ||||
|     for conda_archive in ${PKG_DIR}/*.tar.bz2; do | ||||
|       # Fetch  platform (eg. win-64, linux-64, etc.) from index file because | ||||
|       # there's no actual conda command to read this | ||||
|       subdir=$(\ | ||||
|         tar -xOf "${conda_archive}" info/index.json \ | ||||
|           | grep subdir  \ | ||||
|           | cut -d ':' -f2 \ | ||||
|           | sed -e 's/[[:space:]]//' -e 's/"//g' -e 's/,//' \ | ||||
|       ) | ||||
|       BACKUP_DIR="conda/${subdir}" | ||||
|     done | ||||
|     ;; | ||||
|   libtorch) | ||||
|     s3_upload "zip" "libtorch" | ||||
|     BACKUP_DIR="libtorch/${UPLOAD_CHANNEL}/${UPLOAD_SUBFOLDER}" | ||||
|  | ||||
| @ -1,22 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -eux -o pipefail | ||||
|  | ||||
| source "${BINARY_ENV_FILE:-/c/w/env}" | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
|  | ||||
| export USE_SCCACHE=1 | ||||
| export SCCACHE_IGNORE_SERVER_IO_ERROR=1 | ||||
|  | ||||
| echo "Free space on filesystem before build:" | ||||
| df -h | ||||
|  | ||||
| export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT" | ||||
|  | ||||
| if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|     pytorch/.ci/pytorch/windows/arm64/build_libtorch.bat | ||||
| elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then | ||||
|     pytorch/.ci/pytorch/windows/arm64/build_pytorch.bat | ||||
| fi | ||||
|  | ||||
| echo "Free space on filesystem after build:" | ||||
| df -h | ||||
| @ -1,6 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -eux -o pipefail | ||||
|  | ||||
| source "${BINARY_ENV_FILE:-/c/w/env}" | ||||
|  | ||||
| pytorch/.ci/pytorch/windows/arm64/smoke_test.bat | ||||
| @ -14,7 +14,6 @@ if [[ "$DESIRED_CUDA" == 'xpu' ]]; then | ||||
|     export VC_YEAR=2022 | ||||
|     export USE_SCCACHE=0 | ||||
|     export XPU_VERSION=2025.0 | ||||
|     export XPU_ENABLE_KINETO=1 | ||||
| fi | ||||
|  | ||||
| echo "Free space on filesystem before build:" | ||||
|  | ||||
							
								
								
									
										10
									
								
								.clang-tidy
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								.clang-tidy
									
									
									
									
									
								
							| @ -1,9 +1,8 @@ | ||||
| --- | ||||
| # NOTE there must be no spaces before the '-', so put the comma last. | ||||
| # The check bugprone-unchecked-optional-access is also turned on. | ||||
| # Note that it can cause clang-tidy to hang randomly. The tracking issue | ||||
| # The check bugprone-unchecked-optional-access is also turned off atm | ||||
| # because it causes clang-tidy to hang randomly. The tracking issue | ||||
| # can be found at https://github.com/llvm/llvm-project/issues/69369. | ||||
| # When that happens, we can disable it on the problematic code by NOLINT. | ||||
| InheritParentConfig: true | ||||
| Checks: ' | ||||
| bugprone-*, | ||||
| @ -12,8 +11,8 @@ bugprone-*, | ||||
| -bugprone-macro-parentheses, | ||||
| -bugprone-lambda-function-name, | ||||
| -bugprone-reserved-identifier, | ||||
| -bugprone-return-const-ref-from-parameter, | ||||
| -bugprone-swapped-arguments, | ||||
| -bugprone-unchecked-optional-access, | ||||
| clang-analyzer-core.*, | ||||
| clang-analyzer-cplusplus.*, | ||||
| clang-analyzer-nullability.*, | ||||
| @ -25,7 +24,6 @@ cppcoreguidelines-*, | ||||
| -cppcoreguidelines-avoid-non-const-global-variables, | ||||
| -cppcoreguidelines-interfaces-global-init, | ||||
| -cppcoreguidelines-macro-usage, | ||||
| -cppcoreguidelines-macro-to-enum, | ||||
| -cppcoreguidelines-owning-memory, | ||||
| -cppcoreguidelines-pro-bounds-array-to-pointer-decay, | ||||
| -cppcoreguidelines-pro-bounds-constant-array-index, | ||||
| @ -48,7 +46,6 @@ misc-*, | ||||
| -misc-no-recursion, | ||||
| -misc-non-private-member-variables-in-classes, | ||||
| -misc-unused-using-decls, | ||||
| -misc-use-internal-linkage, | ||||
| modernize-*, | ||||
| -modernize-macro-to-enum, | ||||
| -modernize-return-braced-init-list, | ||||
| @ -58,7 +55,6 @@ modernize-*, | ||||
| -modernize-use-trailing-return-type, | ||||
| -modernize-use-nodiscard, | ||||
| performance-*, | ||||
| -performance-enum-size, | ||||
| readability-container-size-empty, | ||||
| readability-delete-null-pointer, | ||||
| readability-duplicate-include | ||||
|  | ||||
							
								
								
									
										1
									
								
								.flake8
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								.flake8
									
									
									
									
									
								
							| @ -38,7 +38,6 @@ per-file-ignores = | ||||
|     torchgen/api/types/__init__.py: F401,F403 | ||||
|     torchgen/executorch/api/types/__init__.py: F401,F403 | ||||
|     test/dynamo/test_higher_order_ops.py: B950 | ||||
|     test/dynamo/test_error_messages.py: B950 | ||||
|     torch/testing/_internal/dynamo_test_failures.py: B950 | ||||
|     # TOR901 is only for test, we want to ignore it for everything else. | ||||
|     # It's not easy to configure this without affecting other per-file-ignores, | ||||
|  | ||||
| @ -24,10 +24,6 @@ e3900d2ba5c9f91a24a9ce34520794c8366d5c54 | ||||
| 2e26976ad3b06ce95dd6afccfdbe124802edf28f | ||||
| # 2021-06-07 Strictly typed everything in `.github` and `tools` | ||||
| 737d920b21db9b4292d056ee1329945990656304 | ||||
| # 2021-08-12 [codemod][lint][fbcode/c*] Enable BLACK by default | ||||
| b0043072529b81276a69df29e00555333117646c | ||||
| # 2021-08-25 Reformat run_test.py | ||||
| 67d8e7b659b19e1ee68208b28bfa7dba73375dbc | ||||
| # 2022-06-09 Apply clang-format to ATen headers | ||||
| 95b15c266baaf989ef7b6bbd7c23a2d90bacf687 | ||||
| # 2022-06-11 [lint] autoformat test/cpp and torch/csrc | ||||
| @ -48,57 +44,3 @@ a53cda1ddc15336dc1ff0ce1eff2a49cdc5f882e | ||||
| d80939e5e9337e8078f11489afefec59fd42f93b | ||||
| # 2024-06-28 enable UFMT in `torch.utils.data` | ||||
| 7cf0b90e49689d45be91aa539fdf54cf2ea8a9a3 | ||||
| # 2024-07-03 Enable UFMT on test/test_public_bindings.py (#128389) | ||||
| fe5424d0f8604f6e66d827ae9f94b05cb7119d55 | ||||
| # 2024-07-03 Enable UFMT on test/test_public_bindings.py (#128389) | ||||
| c686304277f7cd72331f685605325498cff94a0b | ||||
| # 2024-07-15 Enable UFMT on all of torch/sparse (#130545) | ||||
| 535016967ae65a6027f83d6b935a985996223d49 | ||||
| # 2024-07-15 [BE][Easy][1/19] enforce style for empty lines in import segments (#129752) | ||||
| a3abfa5cb57203b6a8ba7dff763f4057db8282a8 | ||||
| # 2024-07-15 [BE][Easy][2/19] enforce style for empty lines in import segments in `.ci/` and `.github/` (#129753) | ||||
| ba48cf653541e9160dfdefa7bfea885c22e48608 | ||||
| # 2024-07-16 [BE][Easy][5/19] enforce style for empty lines in import segments in `tools/` and `torchgen/` (#129756) | ||||
| f6838d521a243dbedc50ae96575720bf2cc8a8ad | ||||
| # 2024-07-17 [BE][Easy][9/19] enforce style for empty lines in import segments in `test/[e-h]*/` (#129760) | ||||
| 76169cf69184bd462b9add40f893f57675f8a057 | ||||
| # 2024-07-16 [BE][Easy][3/19] enforce style for empty lines in import segments in `benchmarks/` (#129754) | ||||
| c0ed38e644aed812d76b0ec85fae2f6019bf462b | ||||
| # 2024-07-16 [BE][Easy][4/19] enforce style for empty lines in import segments in `functorch/` (#129755) | ||||
| 740fb229660f388feddc288c127ab12c82e67d36 | ||||
| # 2024-07-17 [BE][Easy][12/19] enforce style for empty lines in import segments in `test/i*/` (#129763) | ||||
| aecc746fccc4495313167e3a7f94210daf457e1d | ||||
| # 2024-07-18 Revert "[BE][Easy][12/19] enforce style for empty lines in import segments in `test/i*/` (#129763)" | ||||
| b732b52f1e4378f8486ceb5e7026be3321c2651c | ||||
| # 2024-07-18 [BE][Easy][12/19] enforce style for empty lines in import segments in `test/i*/` (#129763) | ||||
| 134bc4fc34bb02795aa694e66b132dcea5dde1e1 | ||||
| # 2024-07-26 [BE][Easy][8/19] enforce style for empty lines in import segments in `test/[k-p]*/` (#129759) | ||||
| fbe6f42dcf1834213e0baa87b87529161df3c4d7 | ||||
| # 2024-07-31 [BE][Easy][14/19] enforce style for empty lines in import segments in `torch/_[a-c]*/` and `torch/_[e-h]*/` and `torch/_[j-z]*/` (#129765) | ||||
| e7eeee473c6cb45942e87de5a616b0eb635513d6 | ||||
| # 2024-07-31 Fix lint after PR #130572 (#132316) | ||||
| d72e863b3ecd3de4c8ea00518e110da964583f4f | ||||
| # 2024-07-31 [BE][Easy][15/19] enforce style for empty lines in import segments in `torch/_d*/` (#129767) | ||||
| e74ba1b34a476b46e76b4e32afe2d481f97e9a47 | ||||
| # 2024-07-31 [BE][Easy][18/19] enforce style for empty lines in import segments in `torch/d*/` (#129770) | ||||
| b25ef91bf158ce459d8654e33c50c8e6ed8db716 | ||||
| # 2024-07-20 [BE][Easy][13/19] enforce style for empty lines in import segments in `test/j*/` (#129764) | ||||
| 6ff1e43a416c43cd82b210e22ac47384494c172e | ||||
| # 2024-11-01 [Lint] Clang-format all metal kernels (#139530) | ||||
| b3ad45733bd908b7358959ca1e1f8d026f4507eb | ||||
| # 2024-11-17 [BE][MPS] Apply clang-format to mps headers (#140906) | ||||
| 99014a297c179862af38ee86bac2051434d3db41 | ||||
| # 2024-11-27 Apply clang-format for ATen/core/boxing headers (#141105) | ||||
| 19d01a1ef0c0d65768eb0a5c97a25328eec57fbd | ||||
| # 2024-12-05 fix the lint from D66795414 (#142122) | ||||
| 65c2086d452ae6966ce9d7fb3cb2eef2fd0d2add | ||||
| # 2024-12-20 Apply clang-format for ATen/core/dispatch headers (#143620) | ||||
| cee06e74eeb54994b97000a02b715a4e63a97951 | ||||
| # 2024-12-22 Better fix for f-strings in set_linter for py3.12 (#143725) | ||||
| eebc93d41eeffb936cbf20c9052e1e813d0cc052 | ||||
| # 2025-01-04 [mps/BE] Fix linter warning/advice. (#144199) | ||||
| 0dc1e6be192b260f1c072d70e1b06a3ac8e109fa | ||||
| # 2025-01-07 Fix lint in `test_provenance_tracing.py` (#144296) | ||||
| 61c0a3d1cbaf6420e40ab0f9c9019daa21145e69 | ||||
| # 2025-01-09 [BE] fix ruff rule E226: add missing whitespace around operator in f-strings (#144415) | ||||
| dcc3cf7066b4d8cab63ecb73daf1e36b01220a4e | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
								
							| @ -5,7 +5,7 @@ body: | ||||
| - type: markdown | ||||
|   attributes: | ||||
|     value: > | ||||
|       #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+). Note: Please write your bug report in English to ensure it can be understood and addressed by the development team. If you are filing a bug for torch.compile, please use the [torch.compile issue template](https://github.com/pytorch/pytorch/issues/new?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen&template=pt2-bug-report.yml). | ||||
|       #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+). | ||||
| - type: textarea | ||||
|   attributes: | ||||
|     label: 🐛 Describe the bug | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/ISSUE_TEMPLATE/documentation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/ISSUE_TEMPLATE/documentation.yml
									
									
									
									
										vendored
									
									
								
							| @ -2,10 +2,6 @@ name: 📚 Documentation | ||||
| description: Report an issue related to https://pytorch.org/docs/stable/index.html | ||||
|  | ||||
| body: | ||||
| - type: markdown | ||||
|   attributes: | ||||
|     value: > | ||||
|       #### Note: Please report your documentation issue in English to ensure it can be understood and addressed by the development team. | ||||
| - type: textarea | ||||
|   attributes: | ||||
|     label: 📚 The doc issue | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/ISSUE_TEMPLATE/feature-request.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/ISSUE_TEMPLATE/feature-request.yml
									
									
									
									
										vendored
									
									
								
							| @ -2,10 +2,6 @@ name: 🚀 Feature request | ||||
| description: Submit a proposal/request for a new PyTorch feature | ||||
|  | ||||
| body: | ||||
| - type: markdown | ||||
|   attributes: | ||||
|     value: > | ||||
|       #### Note: Please write your feature request in English to ensure it can be understood and addressed by the development team. | ||||
| - type: textarea | ||||
|   attributes: | ||||
|     label: 🚀 The feature, motivation and pitch | ||||
|  | ||||
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE/pt2-bug-report.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE/pt2-bug-report.yml
									
									
									
									
										vendored
									
									
								
							| @ -3,10 +3,6 @@ description: Create a report to help us reproduce and fix the bug | ||||
| labels: ["oncall: pt2"] | ||||
|  | ||||
| body: | ||||
|   - type: markdown | ||||
|     attributes: | ||||
|       value: > | ||||
|         #### Note: Please write your bug report in English to ensure it can be understood and addressed by the development team. | ||||
|   - type: markdown | ||||
|     attributes: | ||||
|       value: > | ||||
| @ -22,8 +18,6 @@ body: | ||||
|  | ||||
|         - If comparing eager and torch.compile at fp16/bf16, you should use fp32 as baseline | ||||
|  | ||||
|         - Ensure rng state used to compare results is equivalent. Use `torch._inductor.config.fallback_random=True` and reset the torch rng seed between comparisons | ||||
|  | ||||
|         If the above requirements are met, add the label "topic: fuzzer" to your issue. | ||||
|  | ||||
|   - type: textarea | ||||
|  | ||||
							
								
								
									
										9
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +1,8 @@ | ||||
| self-hosted-runner: | ||||
|   labels: | ||||
|     # GitHub hosted runner that actionlint doesn't recognize because actionlint version (1.6.21) is too old | ||||
|     - ubuntu-24.04 | ||||
|     # GitHub hosted x86 Linux runners | ||||
|     - linux.24_04.4x | ||||
|     - linux.24_04.16x | ||||
|     - linux.20_04.4x | ||||
|     - linux.20_04.16x | ||||
|     # Organization-wide AWS Linux Runners | ||||
|     - linux.large | ||||
|     - linux.2xlarge | ||||
| @ -12,6 +10,7 @@ self-hosted-runner: | ||||
|     - linux.9xlarge.ephemeral | ||||
|     - am2.linux.9xlarge.ephemeral | ||||
|     - linux.12xlarge | ||||
|     - linux.12xlarge.ephemeral | ||||
|     - linux.24xlarge | ||||
|     - linux.24xlarge.ephemeral | ||||
|     - linux.arm64.2xlarge | ||||
| @ -43,8 +42,6 @@ self-hosted-runner: | ||||
|     - windows.8xlarge.nvidia.gpu | ||||
|     - windows.8xlarge.nvidia.gpu.nonephemeral | ||||
|     - windows.g5.4xlarge.nvidia.gpu | ||||
|     # Windows ARM64 runners | ||||
|     - windows-11-arm64 | ||||
|     # Organization-wide AMD hosted runners | ||||
|     - linux.rocm.gpu | ||||
|     - linux.rocm.gpu.2 | ||||
|  | ||||
							
								
								
									
										5
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -40,11 +40,6 @@ runs: | ||||
|         fi | ||||
|         mkdir "${GITHUB_WORKSPACE}" | ||||
|  | ||||
|         # Use all available CPUs for fetching | ||||
|         cd "${GITHUB_WORKSPACE}" | ||||
|         git config --global fetch.parallel 0 | ||||
|         git config --global submodule.fetchJobs 0 | ||||
|  | ||||
|     - name: Checkout PyTorch | ||||
|       uses: actions/checkout@v4 | ||||
|       with: | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/actions/diskspace-cleanup/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/actions/diskspace-cleanup/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -17,10 +17,6 @@ runs: | ||||
|         set -ex | ||||
|         diskspace_cutoff=${{ inputs.diskspace-cutoff }} | ||||
|         docker_root_dir=$(docker info -f '{{.DockerRootDir}}') | ||||
|         if [ ! -d "$docker_root_dir" ]; then | ||||
|             echo "Docker root directory ($docker_root_dir) does not exist. Skipping disk space check." | ||||
|             exit 0 | ||||
|         fi | ||||
|         diskspace=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //') | ||||
|         msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified" | ||||
|         if [[ "$diskspace" -ge "$diskspace_cutoff" ]] ; then | ||||
|  | ||||
							
								
								
									
										24
									
								
								.github/actions/setup-rocm/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								.github/actions/setup-rocm/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -5,6 +5,20 @@ description: Set up ROCm host for CI | ||||
| runs: | ||||
|   using: composite | ||||
|   steps: | ||||
|     - name: Set DOCKER_HOST | ||||
|       shell: bash | ||||
|       run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" | ||||
|  | ||||
|     - name: Remove leftover Docker config file | ||||
|       shell: bash | ||||
|       continue-on-error: true | ||||
|       run: | | ||||
|         set -ex | ||||
|  | ||||
|         cat ~/.docker/config.json || true | ||||
|         # https://stackoverflow.com/questions/64455468/error-when-logging-into-ecr-with-docker-login-error-saving-credentials-not | ||||
|         rm -f ~/.docker/config.json | ||||
|  | ||||
|     - name: Stop all running docker containers | ||||
|       if: always() | ||||
|       shell: bash | ||||
| @ -97,16 +111,8 @@ runs: | ||||
|       shell: bash | ||||
|       run: | | ||||
|         # All GPUs are visible to the runner; visibility, if needed, will be set by run_test.py. | ||||
|         # Add render group for container creation. | ||||
|         render_gid=`cat /etc/group | grep render | cut -d: -f3` | ||||
|         # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG. | ||||
|         if [ -f "/etc/podinfo/gha-render-devices" ]; then | ||||
|           DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | ||||
|         else | ||||
|           DEVICE_FLAG="--device /dev/dri" | ||||
|         fi | ||||
|         # The --group-add daemon and --group-add bin are needed in the Ubuntu 24.04 and Almalinux OSs respectively. | ||||
|         # This is due to the device files (/dev/kfd & /dev/dri) being owned by video group on bare metal. | ||||
|         # This video group ID maps to subgid 1 inside the docker image due to the /etc/subgid entries. | ||||
|         # The group name corresponding to group ID 1 can change depending on the OS, so both are necessary. | ||||
|         echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd $DEVICE_FLAG --group-add video --group-add $render_gid --group-add daemon --group-add bin --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --network=host" >> "${GITHUB_ENV}" | ||||
|         echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon --group-add bin" >> "${GITHUB_ENV}" | ||||
|  | ||||
| @ -1,56 +0,0 @@ | ||||
| name: upload-utilization-stats | ||||
|  | ||||
| description: Upload utilization stats to artifacts | ||||
|  | ||||
| inputs: | ||||
|     workflow_run_id: | ||||
|       type: string | ||||
|       description: 'workflow (run) id of the workflow the test is running' | ||||
|       required: True | ||||
|     workflow_attempt: | ||||
|       type: string | ||||
|       description: 'the workflow (run) attempt' | ||||
|       required: True | ||||
|     workflow_name: | ||||
|       description: 'name of the workflow' | ||||
|       type: string | ||||
|       required: True | ||||
|     job_id: | ||||
|       type: string | ||||
|       description: 'the job (run) id for the test' | ||||
|       required: True | ||||
|     job_name: | ||||
|       type: string | ||||
|       description: 'the job name of the test' | ||||
|       required: True | ||||
|  | ||||
| runs: | ||||
|   using: composite | ||||
|   steps: | ||||
|     - name: Print Inputs | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "workflow_id: ${{inputs.workflow_run_id}}" | ||||
|         echo "workflow_attempt: ${{inputs.workflow_attempt}}" | ||||
|         echo "workflow_Name: ${{inputs.workflow_name}}" | ||||
|         echo "job_id: ${{inputs.job_id}}" | ||||
|         echo "job_name:  ${{inputs.job_name}}" | ||||
|     - uses: nick-fields/retry@v3.0.0 | ||||
|       name: Setup dependencies | ||||
|       with: | ||||
|         shell: bash | ||||
|         timeout_minutes: 5 | ||||
|         max_attempts: 5 | ||||
|         retry_wait_seconds: 30 | ||||
|         command: | | ||||
|           set -eu | ||||
|           python3 -m pip install python-dateutil==2.8.2 boto3==1.35.42 pandas==2.1.3 | ||||
|     - name: Upload utilizatoin stats to s3 | ||||
|       shell: bash | ||||
|       run: | | ||||
|         python3 -m tools.stats.upload_utilization_stats.upload_utilization_stats \ | ||||
|           --workflow-run-id "${{inputs.workflow_run_id}}" \ | ||||
|           --workflow-name "${{inputs.workflow_name}}" \ | ||||
|           --workflow-run-attempt "${{inputs.workflow_attempt}}" \ | ||||
|           --job-id "${{inputs.job_id}}" \ | ||||
|           --job-name "${{inputs.job_name}}" | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| c670ad81fda266b6598aeeef434583eb98197ae8 | ||||
| b6d4675c7aedc53ba04f3f55786aac1de32be6b4 | ||||
|  | ||||
							
								
								
									
										1
									
								
								.github/ci_commit_pins/fbgemm_rocm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/ci_commit_pins/fbgemm_rocm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| 5fb5024118e9bb9decf96c2b0b1a8f0010bf56be | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/torchbench.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/torchbench.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 373ffb19dc470f4423a3176a4133f8f4b3cdb5bd | ||||
| 766a5e3a189384659fd35a68c3b17b88c761aaac | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| r2.7 | ||||
| b2b890e962f5fb6f481e5da2eb4a43bb990d0f1b | ||||
|  | ||||
							
								
								
									
										7
									
								
								.github/labeler.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/labeler.yml
									
									
									
									
										vendored
									
									
								
							| @ -98,7 +98,7 @@ | ||||
| - test/distributed/** | ||||
| - torch/testing/_internal/distributed/** | ||||
|  | ||||
| "release notes: distributed (checkpoint)": | ||||
| "module: distributed_checkpoint": | ||||
| - torch/distributed/checkpoint/** | ||||
| - test/distributed/checkpoint/** | ||||
|  | ||||
| @ -107,8 +107,3 @@ | ||||
| - torch/csrc/dynamo/compiled_autograd.h | ||||
| - torch/_dynamo/compiled_autograd.py | ||||
| - torch/inductor/test_compiled_autograd.py | ||||
|  | ||||
| "ciflow/xpu": | ||||
| - torch/csrc/inductor/aoti_include/xpu.h | ||||
| - torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h | ||||
| - torch/csrc/inductor/cpp_wrapper/xpu.h | ||||
|  | ||||
							
								
								
									
										44
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										44
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							| @ -79,6 +79,7 @@ | ||||
|   - .ci/docker/ci_commit_pins/triton.txt | ||||
|   approved_by: | ||||
|   - pytorchbot | ||||
|   ignore_flaky_failures: false | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
| @ -90,6 +91,7 @@ | ||||
|   - test/slow_tests.json | ||||
|   approved_by: | ||||
|   - pytorchbot | ||||
|   ignore_flaky_failures: false | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
| @ -101,10 +103,12 @@ | ||||
|   - .ci/docker/ci_commit_pins/executorch.txt | ||||
|   approved_by: | ||||
|   - pytorchbot | ||||
|   ignore_flaky_failures: false | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
|   - pull | ||||
|   - pull / linux-jammy-py3-clang12-executorch / build | ||||
|   - pull / linux-jammy-py3-clang12-executorch / test (executorch, 1, 1, linux.2xlarge) | ||||
|  | ||||
| - name: OSS CI / pytorchbot / XLA | ||||
|   patterns: | ||||
| @ -115,7 +119,8 @@ | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
|   - pull | ||||
|   - pull / linux-focal-py3_9-clang9-xla / build | ||||
|   - pull / linux-focal-py3_9-clang9-xla / test (xla, 1, 1, linux.12xlarge) | ||||
|  | ||||
| - name: Documentation | ||||
|   patterns: | ||||
| @ -242,6 +247,25 @@ | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: XPU ATen | ||||
|   patterns: | ||||
|   - aten/src/ATen/xpu/** | ||||
|   - c10/xpu/** | ||||
|   - torch/csrc/xpu/** | ||||
|   - torch/xpu/** | ||||
|   - test/xpu/** | ||||
|   - test/test_xpu.py | ||||
|   - third_party/xpu.txt | ||||
|   - .ci/docker/ci_commit_pins/triton-xpu.txt | ||||
|   approved_by: | ||||
|   - EikanWang | ||||
|   - jgong5 | ||||
|   - gujinghui | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: Distributions | ||||
|   patterns: | ||||
|   - torch/distributions/** | ||||
| @ -334,7 +358,6 @@ | ||||
|   - XiaobingSuper | ||||
|   - jgong5 | ||||
|   - mingfeima | ||||
|   - EikanWang | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
| @ -367,7 +390,6 @@ | ||||
|   - jgong5 | ||||
|   - vfdev-5 | ||||
|   - leslie-fang-intel | ||||
|   - EikanWang | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
| @ -381,7 +403,6 @@ | ||||
|   approved_by: | ||||
|   - leslie-fang-intel | ||||
|   - jgong5 | ||||
|   - EikanWang | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
| @ -498,19 +519,6 @@ | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: XPU | ||||
|   patterns: | ||||
|   - '**xpu**' | ||||
|   - '**sycl**' | ||||
|   approved_by: | ||||
|   - EikanWang | ||||
|   - jgong5 | ||||
|   - gujinghui | ||||
|   mandatory_checks_name: | ||||
|   - EasyCLA | ||||
|   - Lint | ||||
|   - pull | ||||
|  | ||||
| - name: superuser | ||||
|   patterns: | ||||
|   - '*' | ||||
|  | ||||
							
								
								
									
										7
									
								
								.github/nitpicks.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/nitpicks.yml
									
									
									
									
										vendored
									
									
								
							| @ -3,10 +3,3 @@ | ||||
|     If you are adding a new function or defaulted argument to native_functions.yaml, you cannot use it from pre-existing Python frontend code until our FC window passes (two weeks).  Split your PR into two PRs, one which adds the new C++ functionality, and one that makes use of it from Python, and land them two weeks apart.  See https://github.com/pytorch/pytorch/wiki/PyTorch's-Python-Frontend-Backward-and-Forward-Compatibility-Policy#forwards-compatibility-fc for more info. | ||||
|   pathFilter: | ||||
|     - 'aten/src/ATen/native/native_functions.yaml' | ||||
|  | ||||
| - markdown: | | ||||
|     ## Attention! PyTorch one of the C-stable API file was changed | ||||
|     You MUST NOT change existing function declarations in this, as this header defines a stable C ABI.  If you need to change the signature for a function, introduce a new v2 version of the function and modify code generation to target the new version of the function. | ||||
|   pathFilter: | ||||
|     - 'torch/csrc/inductor/aoti_torch/c/*' | ||||
|     - 'torch/csrc/inductor/aoti_torch/generated/*' | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							| @ -7,7 +7,6 @@ ciflow_push_tags: | ||||
| - ciflow/inductor | ||||
| - ciflow/inductor-periodic | ||||
| - ciflow/inductor-rocm | ||||
| - ciflow/inductor-perf-test-nightly-rocm | ||||
| - ciflow/inductor-perf-compare | ||||
| - ciflow/inductor-micro-benchmark | ||||
| - ciflow/inductor-micro-benchmark-cpu-x86 | ||||
| @ -17,7 +16,6 @@ ciflow_push_tags: | ||||
| - ciflow/nightly | ||||
| - ciflow/periodic | ||||
| - ciflow/rocm | ||||
| - ciflow/rocm-mi300 | ||||
| - ciflow/s390 | ||||
| - ciflow/slow | ||||
| - ciflow/trunk | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	